942 files changed, 30489 insertions, 26149 deletions
diff --git a/.gitignore b/.gitignore
index e213b27..ce57b79 100644
--- a/.gitignore
+++ b/.gitignore
@@ -96,3 +96,6 @@ x509.genkey
 
 # Kconfig presets
 all.config
+
+# Kdevelop4
+*.kdev4
diff --git a/.mailmap b/.mailmap
index 1ad6873..ada8ad6 100644
--- a/.mailmap
+++ b/.mailmap
@@ -17,7 +17,7 @@ Aleksey Gorelov <aleksey_gorelov@phoenix.com>
 Al Viro <viro@ftp.linux.org.uk>
 Al Viro <viro@zenIV.linux.org.uk>
 Andreas Herrmann <aherrman@de.ibm.com>
-Andrew Morton <akpm@osdl.org>
+Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
 Archit Taneja <archit@ti.com>
@@ -102,6 +102,8 @@ Rudolf Marek <R.Marek@sh.cvut.cz>
 Rui Saraiva <rmps@joel.ist.utl.pt>
 Sachin P Sant <ssant@in.ibm.com>
 Sam Ravnborg <sam@mars.ravnborg.org>
+Santosh Shilimkar <ssantosh@kernel.org>
+Santosh Shilimkar <santosh.shilimkar@oracle.org>
 Sascha Hauer <s.hauer@pengutronix.de>
 S.Çağlar Onur <caglar@pardus.org.tr>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
diff --git a/CREDITS b/CREDITS
index c56d8aa..96935df 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1734,14 +1734,14 @@ S: Chapel Hill, North Carolina 27514-4818
 S: USA
 
 N: Dave Jones
-E: davej@redhat.com
+E: davej@codemonkey.org.uk
 W: http://www.codemonkey.org.uk
 D: Assorted VIA x86 support.
 D: 2.5 AGPGART overhaul.
 D: CPUFREQ maintenance.
-D: Fedora kernel maintenance.
+D: Fedora kernel maintenance (2003-2014).
+D: 'Trinity' and similar fuzz testing work.
 D: Misc/Other.
-S: 314 Littleton Rd, Westford, MA 01886, USA
 
 N: Martin Josfsson
 E: gandalf@wlug.westbo.se
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop
new file mode 100644
index 0000000..7969443
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop
@@ -0,0 +1,60 @@
+What:		/sys/class/leds/dell::kbd_backlight/als_setting
+Date:		December 2014
+KernelVersion:	3.19
+Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+		Pali Rohár <pali.rohar@gmail.com>
+Description:
+		This file allows to control the automatic keyboard
+		illumination mode on some systems that have an ambient
+		light sensor. Write 1 to this file to enable the auto
+		mode, 0 to disable it.
+
+What:		/sys/class/leds/dell::kbd_backlight/start_triggers
+Date:		December 2014
+KernelVersion:	3.19
+Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+		Pali Rohár <pali.rohar@gmail.com>
+Description:
+		This file allows to control the input triggers that
+		turn on the keyboard backlight illumination that is
+		disabled because of inactivity.
+		Read the file to see the triggers available. The ones
+		enabled are preceded by '+', those disabled by '-'.
+
+		To enable a trigger, write its name preceded by '+' to
+		this file. To disable a trigger, write its name preceded
+		by '-' instead.
+
+		For example, to enable the keyboard as trigger run:
+		    echo +keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+		To disable it:
+		    echo -keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+
+		Note that not all the available triggers can be configured.
+
+What:		/sys/class/leds/dell::kbd_backlight/stop_timeout
+Date:		December 2014
+KernelVersion:	3.19
+Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+		Pali Rohár <pali.rohar@gmail.com>
+Description:
+		This file allows to specify the interval after which the
+		keyboard illumination is disabled because of inactivity.
+		The timeouts are expressed in seconds, minutes, hours and
+		days, for which the symbols are 's', 'm', 'h' and 'd'
+		respectively.
+
+		To configure the timeout, write to this file a value along
+		with any the above units. If no unit is specified, the value
+		is assumed to be expressed in seconds.
+
+		For example, to set the timeout to 10 minutes run:
+		    echo 10m > /sys/class/leds/dell::kbd_backlight/stop_timeout
+
+		Note that when this file is read, the returned value might be
+		expressed in a different unit than the one used when the timeout
+		was set.
+
+		Also note that only some timeouts are supported and that
+		some systems might fall back to a specific timeout in case
+		an invalid timeout is written to this file.
diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml
index 0a2debf..350dfb3 100644
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -2579,6 +2579,18 @@ fields changed from _s32 to _u32.
       </orderedlist>
     </section>
 
+    <section>
+      <title>V4L2 in Linux 3.19</title>
+      <orderedlist>
+	<listitem>
+	  <para>Rewrote Colorspace chapter, added new &v4l2-ycbcr-encoding;
+and &v4l2-quantization; fields to &v4l2-pix-format;, &v4l2-pix-format-mplane;
+and &v4l2-mbus-framefmt;.
+	  </para>
+	</listitem>
+      </orderedlist>
+    </section>
+
     <section id="other">
       <title>Relation of V4L2 to other Linux multimedia APIs</title>
 
diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index ccf6053..d5eca4b 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -138,9 +138,25 @@ applicable values.</para></entry>
 	<row>
 	  <entry>__u32</entry>
 	  <entry><structfield>flags</structfield></entry>
-	    <entry>Flags set by the application or driver, see <xref
+	  <entry>Flags set by the application or driver, see <xref
 linkend="format-flags" />.</entry>
 	</row>
+	<row>
+	  <entry>&v4l2-ycbcr-encoding;</entry>
+	  <entry><structfield>ycbcr_enc</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
+	<row>
+	  <entry>&v4l2-quantization;</entry>
+	  <entry><structfield>quantization</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
       </tbody>
     </tgroup>
   </table>
@@ -232,9 +248,25 @@ codes can be used.</entry>
 	  <entry>Flags set by the application or driver, see <xref
 linkend="format-flags" />.</entry>
 	</row>
+	<row>
+	  <entry>&v4l2-ycbcr-encoding;</entry>
+	  <entry><structfield>ycbcr_enc</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
+	<row>
+	  <entry>&v4l2-quantization;</entry>
+	  <entry><structfield>quantization</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
         <row>
           <entry>__u8</entry>
-          <entry><structfield>reserved[10]</structfield></entry>
+          <entry><structfield>reserved[8]</structfield></entry>
           <entry>Reserved for future extensions. Should be zeroed by the
            application.</entry>
         </row>
diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 18730b9..c5ea868 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -34,8 +34,24 @@
 	  <xref linkend="colorspaces" /> for details.</entry>
 	</row>
 	<row>
+	  <entry>&v4l2-ycbcr-encoding;</entry>
+	  <entry><structfield>ycbcr_enc</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
+	<row>
+	  <entry>&v4l2-quantization;</entry>
+	  <entry><structfield>quantization</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
+	<row>
 	  <entry>__u32</entry>
-	  <entry><structfield>reserved</structfield>[7]</entry>
+	  <entry><structfield>reserved</structfield>[6]</entry>
 	  <entry>Reserved for future extensions. Applications and drivers must
 	  set the array to zero.</entry>
 	</row>
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml
index 7cfe618..ac0f8d9 100644
--- a/Documentation/DocBook/media/v4l/v4l2.xml
+++ b/Documentation/DocBook/media/v4l/v4l2.xml
@@ -152,6 +152,15 @@ structs, ioctls) must be noted in more detail in the history chapter
 applications. -->
 
       <revision>
+	<revnumber>3.19</revnumber>
+	<date>2014-12-05</date>
+	<authorinitials>hv</authorinitials>
+	<revremark>Rewrote Colorspace chapter, added new &v4l2-ycbcr-encoding; and &v4l2-quantization; fields
+to &v4l2-pix-format;, &v4l2-pix-format-mplane; and &v4l2-mbus-framefmt;.
+	</revremark>
+      </revision>
+
+      <revision>
 	<revnumber>3.17</revnumber>
 	<date>2014-08-04</date>
 	<authorinitials>lp, hv</authorinitials>
@@ -539,7 +548,7 @@ and discussions on the V4L mailing list.</revremark>
 </partinfo>
 
 <title>Video for Linux Two API Specification</title>
- <subtitle>Revision 3.17</subtitle>
+ <subtitle>Revision 3.19</subtitle>
 
   <chapter id="common">
     &sub-common;
diff --git a/Documentation/devicetree/bindings/i2c/i2c-opal.txt b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
new file mode 100644
index 0000000..12bc614
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
@@ -0,0 +1,37 @@
+Device-tree bindings for I2C OPAL driver
+----------------------------------------
+
+Most of the device node and properties layout is specific to the firmware and
+used by the firmware itself for configuring the port. From the linux
+perspective, the properties of use are "ibm,port-name" and "ibm,opal-id".
+
+Required properties:
+
+- reg: Port-id within a given master
+- compatible: must be "ibm,opal-i2c"
+- ibm,opal-id: Refers to a specific bus and used to identify it when calling
+	       the relevant OPAL functions.
+- bus-frequency: Operating frequency of the i2c bus (in HZ). Informational for
+		 linux, used by the FW though.
+
+Optional properties:
+- ibm,port-name: Firmware provides this name that uniquely identifies the i2c
+		 port.
+
+The node contains a number of other properties that are used by the FW itself
+and depend on the specific hardware implementation. The example below depicts
+a P8 on-chip bus.
+
+Example:
+
+i2c-bus@0 {
+	reg = <0x0>;
+	bus-frequency = <0x61a80>;
+	compatible = "ibm,power8-i2c-port", "ibm,opal-i2c";
+	ibm,opal-id = <0x1>;
+	ibm,port-name = "p8_00000000_e1p0";
+	#address-cells = <0x1>;
+	phandle = <0x10000006>;
+	#size-cells = <0x0>;
+	linux,phandle = <0x10000006>;
+};
diff --git a/Documentation/devicetree/bindings/input/cap1106.txt b/Documentation/devicetree/bindings/input/cap11xx.txt
index 4b46390..7d0a300 100644
--- a/Documentation/devicetree/bindings/input/cap1106.txt
+++ b/Documentation/devicetree/bindings/input/cap11xx.txt
@@ -1,14 +1,16 @@
-Device tree bindings for Microchip CAP1106, 6 channel capacitive touch sensor
+Device tree bindings for Microchip CAP11xx based capacitive touch sensors
 
-The node for this driver must be a child of a I2C controller node, as the
+The node for this device must be a child of a I2C controller node, as the
 device communication via I2C only.
 
 Required properties:
 
-	compatible:		Must be "microchip,cap1106"
+	compatible:		Must contain one of:
+					"microchip,cap1106"
+					"microchip,cap1126"
+					"microchip,cap1188"
 
 	reg:			The I2C slave address of the device.
-				Only 0x28 is valid.
 
 	interrupts:		Property describing the interrupt line the
 				device's ALERT#/CM_IRQ# pin is connected to.
@@ -26,6 +28,10 @@ Optional properties:
 				Valid values are 1, 2, 4, and 8.
 				By default, a gain of 1 is set.
 
+	microchip,irq-active-high:	By default the interrupt pin is active low
+				open drain. This property allows using the active
+				high push-pull output.
+
 	linux,keycodes:		Specifies an array of numeric keycode values to
 				be used for the channels. If this property is
 				omitted, KEY_A, KEY_B, etc are used as
@@ -43,11 +49,11 @@ i2c_controller {
 		autorepeat;
 		microchip,sensor-gain = <2>;
 
-		linux,keycodes = <103		/* KEY_UP */
-				  106		/* KEY_RIGHT */
-				  108		/* KEY_DOWN */
-				  105		/* KEY_LEFT */
-				  109		/* KEY_PAGEDOWN */
-				  104>;		/* KEY_PAGEUP */
+		linux,keycodes = <103>,		/* KEY_UP */
+				 <106>,		/* KEY_RIGHT */
+				 <108>,		/* KEY_DOWN */
+				 <105>,		/* KEY_LEFT */
+				 <109>,		/* KEY_PAGEDOWN */
+				 <104>;		/* KEY_PAGEUP */
 	};
 }
diff --git a/Documentation/devicetree/bindings/input/elan_i2c.txt b/Documentation/devicetree/bindings/input/elan_i2c.txt
new file mode 100644
index 0000000..ee3242c
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/elan_i2c.txt
@@ -0,0 +1,34 @@
+Elantech I2C Touchpad
+
+Required properties:
+- compatible: must be "elan,ekth3000".
+- reg: I2C address of the chip.
+- interrupt-parent: a phandle for the interrupt controller (see interrupt
+  binding[0]).
+- interrupts: interrupt to which the chip is connected (see interrupt
+  binding[0]).
+
+Optional properties:
+- wakeup-source: touchpad can be used as a wakeup source.
+- pinctrl-names: should be "default" (see pinctrl binding [1]).
+- pinctrl-0: a phandle pointing to the pin settings for the device (see
+  pinctrl binding [1]).
+- vcc-supply: a phandle for the regulator supplying 3.3V power.
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+Example:
+	&i2c1 {
+		/* ... */
+
+		touchpad@15 {
+			compatible = "elan,ekth3000";
+			reg = <0x15>;
+			interrupt-parent = <&gpio4>;
+			interrupts = <0x0 IRQ_TYPE_EDGE_FALLING>;
+			wakeup-source;
+		};
+
+		/* ... */
+	};
diff --git a/Documentation/devicetree/bindings/input/elants_i2c.txt b/Documentation/devicetree/bindings/input/elants_i2c.txt
new file mode 100644
index 0000000..a765232
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/elants_i2c.txt
@@ -0,0 +1,33 @@
+Elantech I2C Touchscreen
+
+Required properties:
+- compatible: must be "elan,ekth3500".
+- reg: I2C address of the chip.
+- interrupt-parent: a phandle for the interrupt controller (see interrupt
+  binding[0]).
+- interrupts: interrupt to which the chip is connected (see interrupt
+  binding[0]).
+
+Optional properties:
+- wakeup-source: touchscreen can be used as a wakeup source.
+- pinctrl-names: should be "default" (see pinctrl binding [1]).
+- pinctrl-0: a phandle pointing to the pin settings for the device (see
+  pinctrl binding [1]).
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+Example:
+	&i2c1 {
+		/* ... */
+
+		touchscreen@10 {
+			compatible = "elan,ekth3500";
+			reg = <0x10>;
+			interrupt-parent = <&gpio4>;
+			interrupts = <0x0 IRQ_TYPE_EDGE_FALLING>;
+			wakeup-source;
+		};
+
+		/* ... */
+	};
diff --git a/Documentation/devicetree/bindings/input/gpio-keys.txt b/Documentation/devicetree/bindings/input/gpio-keys.txt
index 5c2c021..a4a38fc 100644
--- a/Documentation/devicetree/bindings/input/gpio-keys.txt
+++ b/Documentation/devicetree/bindings/input/gpio-keys.txt
@@ -10,10 +10,13 @@ Optional properties:
 Each button (key) is represented as a sub-node of "gpio-keys":
 Subnode properties:
 
-	- gpios: OF device-tree gpio specification.
 	- label: Descriptive name of the key.
 	- linux,code: Keycode to emit.
 
+Required mutual exclusive subnode-properties:
+	- gpios: OF device-tree gpio specification.
+	- interrupts: the interrupt line for that input
+
 Optional subnode-properties:
 	- linux,input-type: Specify event type this button/key generates.
 	  If not specified defaults to <1> == EV_KEY.
@@ -33,4 +36,9 @@ Example nodes:
 				linux,code = <103>;
 				gpios = <&gpio1 0 1>;
 			};
+			button@22 {
+				label = "GPIO Key DOWN";
+				linux,code = <108>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+			};
 			...
diff --git a/Documentation/devicetree/bindings/media/rcar_vin.txt b/Documentation/devicetree/bindings/media/rcar_vin.txt
index ba61782..9dafe6b 100644
--- a/Documentation/devicetree/bindings/media/rcar_vin.txt
+++ b/Documentation/devicetree/bindings/media/rcar_vin.txt
@@ -6,6 +6,8 @@ family of devices. The current blocks are always slaves and suppot one input
 channel which can be either RGB, YUYV or BT656.
 
  - compatible: Must be one of the following
+   - "renesas,vin-r8a7794" for the R8A7794 device
+   - "renesas,vin-r8a7793" for the R8A7793 device
    - "renesas,vin-r8a7791" for the R8A7791 device
    - "renesas,vin-r8a7790" for the R8A7790 device
    - "renesas,vin-r8a7779" for the R8A7779 device
diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
index 6edc3b6..1fe6dde 100644
--- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -5,7 +5,9 @@ Required properties:
 - reg : should specify localbus address and size used for the chip,
 	and hardware ECC controller if available.
 	If the hardware ECC is PMECC, it should contain address and size for
-	PMECC, PMECC Error Location controller and ROM which has lookup tables.
+	PMECC and PMECC Error Location controller.
+	The PMECC lookup table address and size in ROM is optional. If not
+	specified, driver will build it in runtime.
 - atmel,nand-addr-offset : offset for the address latch.
 - atmel,nand-cmd-offset : offset for the command latch.
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
@@ -27,7 +29,7 @@ Optional properties:
   are: 512, 1024.
 - atmel,pmecc-lookup-table-offset : includes two offsets of lookup table in ROM
   for different sector size. First one is for sector size 512, the next is for
-  sector size 1024.
+  sector size 1024. If not specified, driver will build the table in runtime.
 - nand-bus-width : 8 or 16 bus width if not present 8
 - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
 - Nand Flash Controller(NFC) is a slave driver under Atmel nand flash
diff --git a/Documentation/devicetree/bindings/mtd/diskonchip.txt b/Documentation/devicetree/bindings/mtd/diskonchip.txt
new file mode 100644
index 0000000..3e13bfd
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/diskonchip.txt
@@ -0,0 +1,15 @@
+M-Systems and Sandisk DiskOnChip devices
+
+M-System DiskOnChip G3
+======================
+The Sandisk (formerly M-Systems) docg3 is a nand device of 64M to 256MB.
+
+Required properties:
+ - compatible: should be "m-systems,diskonchip-g3"
+ - reg: register base and size
+
+Example:
+	docg3: flash@0 {
+		compatible = "m-systems,diskonchip-g3";
+		reg = <0x0 0x2000>;
+	};
diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
index 36ef07d..af8915b 100644
--- a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
@@ -11,8 +11,8 @@ Required properties:
   are made in native endianness.
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
   representing partitions.
-- gpios : specifies the gpio pins to control the NAND device.  nwp is an
-  optional gpio and may be set to 0 if not present.
+- gpios : Specifies the GPIO pins to control the NAND device.  The order of
+  GPIO references is:  RDY, nCE, ALE, CLE, and an optional nWP.
 
 Optional properties:
 - bank-width : Width (in bytes) of the device.  If not present, the width
@@ -35,11 +35,11 @@ gpio-nand@1,0 {
 	reg = <1 0x0000 0x2>;
 	#address-cells = <1>;
 	#size-cells = <1>;
-	gpios = <&banka 1 0	/* rdy */
-		 &banka 2 0 	/* nce */
-		 &banka 3 0 	/* ale */
-		 &banka 4 0 	/* cle */
-		 0		/* nwp */>;
+	gpios = <&banka 1 0>,	/* RDY */
+		<&banka 2 0>, 	/* nCE */
+		<&banka 3 0>, 	/* ALE */
+		<&banka 4 0>, 	/* CLE */
+		<0>;		/* nWP */
 
 	partition@0 {
 	...
diff --git a/Documentation/devicetree/bindings/mtd/sunxi-nand.txt b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
new file mode 100644
index 0000000..0273adb
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
@@ -0,0 +1,45 @@
+Allwinner NAND Flash Controller (NFC)
+
+Required properties:
+- compatible : "allwinner,sun4i-a10-nand".
+- reg : shall contain registers location and length for data and reg.
+- interrupts : shall define the nand controller interrupt.
+- #address-cells: shall be set to 1. Encode the nand CS.
+- #size-cells : shall be set to 0.
+- clocks : shall reference nand controller clocks.
+- clock-names : nand controller internal clock names. Shall contain :
+    * "ahb" : AHB gating clock
+    * "mod" : nand controller clock
+
+Optional children nodes:
+Children nodes represent the available nand chips.
+
+Optional properties:
+- allwinner,rb : shall contain the native Ready/Busy ids.
+ or
+- rb-gpios : shall contain the gpios used as R/B pins.
+- nand-ecc-mode : one of the supported ECC modes ("hw", "hw_syndrome", "soft",
+  "soft_bch" or "none")
+
+see Documentation/devicetree/mtd/nand.txt for generic bindings.
+
+
+Examples:
+nfc: nand@01c03000 {
+	compatible = "allwinner,sun4i-a10-nand";
+	reg = <0x01c03000 0x1000>;
+	interrupts = <0 37 1>;
+	clocks = <&ahb_gates 13>, <&nand_clk>;
+	clock-names = "ahb", "mod";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&nand_pins_a &nand_cs0_pins_a &nand_rb0_pins_a>;
+	status = "okay";
+
+	nand@0 {
+		reg = <0>;
+		allwinner,rb = <0>;
+		nand-ecc-mode = "soft_bch";
+	};
+};
diff --git a/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
new file mode 100644
index 0000000..cfda0d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
@@ -0,0 +1,29 @@
+Device-Tree bindings for Atmel's HLCDC (High-end LCD Controller) PWM driver
+
+The Atmel HLCDC PWM is subdevice of the HLCDC MFD device.
+See ../mfd/atmel-hlcdc.txt for more details.
+
+Required properties:
+ - compatible: value should be one of the following:
+   "atmel,hlcdc-pwm"
+ - pinctr-names: the pin control state names. Should contain "default".
+ - pinctrl-0: should contain the pinctrl states described by pinctrl
+   default.
+ - #pwm-cells: should be set to 3. This PWM chip use the default 3 cells
+   bindings defined in pwm.txt in this directory.
+
+Example:
+
+	hlcdc: hlcdc@f0030000 {
+		compatible = "atmel,sama5d3-hlcdc";
+		reg = <0xf0030000 0x2000>;
+		clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>;
+		clock-names = "periph_clk","sys_clk", "slow_clk";
+
+		hlcdc_pwm: hlcdc-pwm {
+			compatible = "atmel,hlcdc-pwm";
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_lcd_pwm>;
+			#pwm-cells = <3>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
new file mode 100644
index 0000000..fb6fb31
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
@@ -0,0 +1,30 @@
+BCM2835 PWM controller (Raspberry Pi controller)
+
+Required properties:
+- compatible: should be "brcm,bcm2835-pwm"
+- reg: physical base address and length of the controller's registers
+- clock: This clock defines the base clock frequency of the PWM hardware
+  system, the period and the duty_cycle of the PWM signal is a multiple of
+  the base period.
+- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of
+  the cells format.
+
+Examples:
+
+pwm@2020c000 {
+	compatible = "brcm,bcm2835-pwm";
+	reg = <0x2020c000 0x28>;
+	clocks = <&clk_pwm>;
+	#pwm-cells = <2>;
+};
+
+clocks {
+	....
+		clk_pwm: pwm {
+			compatible = "fixed-clock";
+			reg = <3>;
+			#clock-cells = <0>;
+			clock-frequency = <9200000>;
+		};
+	....
+};
diff --git a/Documentation/devicetree/bindings/thermal/armada-thermal.txt b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
index 4cf0249..4698e0e 100644
--- a/Documentation/devicetree/bindings/thermal/armada-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
@@ -5,17 +5,9 @@ Required properties:
 - compatible:	Should be set to one of the following:
 		marvell,armada370-thermal
 		marvell,armada375-thermal
-		marvell,armada375-z1-thermal
 		marvell,armada380-thermal
 		marvell,armadaxp-thermal
 
-		Note: As the name suggests, "marvell,armada375-z1-thermal"
-		applies for the SoC Z1 stepping only. On such stepping
-		some quirks need to be done and the register offset differs
-		from the one in the A0 stepping.
-		The operating system may auto-detect the SoC stepping and
-		update the compatible and register offsets at runtime.
-
 - reg:		Device's register space.
 		Two entries are expected, see the examples below.
 		The first one is required for the sensor register;
diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
new file mode 100644
index 0000000..ef802de
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
@@ -0,0 +1,68 @@
+* Temperature Sensor ADC (TSADC) on rockchip SoCs
+
+Required properties:
+- compatible : "rockchip,rk3288-tsadc"
+- reg : physical base address of the controller and length of memory mapped
+	region.
+- interrupts : The interrupt number to the cpu. The interrupt specifier format
+	       depends on the interrupt controller.
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Shall be "tsadc" for the converter-clock, and "apb_pclk" for
+		the peripheral clock.
+- resets : Must contain an entry for each entry in reset-names.
+	   See ../reset/reset.txt for details.
+- reset-names : Must include the name "tsadc-apb".
+- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description.
+- rockchip,hw-tshut-temp : The hardware-controlled shutdown temperature value.
+- rockchip,hw-tshut-mode : The hardware-controlled shutdown mode 0:CRU 1:GPIO.
+- rockchip,hw-tshut-polarity : The hardware-controlled active polarity 0:LOW
+			       1:HIGH.
+
+Exiample:
+tsadc: tsadc@ff280000 {
+	compatible = "rockchip,rk3288-tsadc";
+	reg = <0xff280000 0x100>;
+	interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;
+	clock-names = "tsadc", "apb_pclk";
+	resets = <&cru SRST_TSADC>;
+	reset-names = "tsadc-apb";
+	pinctrl-names = "default";
+	pinctrl-0 = <&otp_out>;
+	#thermal-sensor-cells = <1>;
+	rockchip,hw-tshut-temp = <95000>;
+	rockchip,hw-tshut-mode = <0>;
+	rockchip,hw-tshut-polarity = <0>;
+};
+
+Example: referring to thermal sensors:
+thermal-zones {
+	cpu_thermal: cpu_thermal {
+		polling-delay-passive = <1000>; /* milliseconds */
+		polling-delay = <5000>; /* milliseconds */
+
+		/* sensor	ID */
+		thermal-sensors = <&tsadc	1>;
+
+		trips {
+			cpu_alert0: cpu_alert {
+				temperature = <70000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "passive";
+			};
+			cpu_crit: cpu_crit {
+				temperature = <90000>; /* millicelsius */
+				hysteresis = <2000>; /* millicelsius */
+				type = "critical";
+			};
+		};
+
+		cooling-maps {
+			map0 {
+				trip = <&cpu_alert0>;
+				cooling-device =
+				    <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+			};
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt
new file mode 100644
index 0000000..ecf3ed7
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt
@@ -0,0 +1,53 @@
+Tegra124 SOCTHERM thermal management system
+
+The SOCTHERM IP block contains thermal sensors, support for polled
+or interrupt-based thermal monitoring, CPU and GPU throttling based
+on temperature trip points, and handling external overcurrent
+notifications. It is also used to manage emergency shutdown in an
+overheating situation.
+
+Required properties :
+- compatible : "nvidia,tegra124-soctherm".
+- reg : Should contain 1 entry:
+  - SOCTHERM register set
+- interrupts : Defines the interrupt used by SOCTHERM
+- clocks : Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+  - tsensor
+  - soctherm
+- resets : Must contain an entry for each entry in reset-names.
+  See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+  - soctherm
+- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description
+    of this property. See <dt-bindings/thermal/tegra124-soctherm.h> for a
+    list of valid values when referring to thermal sensors.
+
+
+Example :
+
+	soctherm@0,700e2000 {
+		compatible = "nvidia,tegra124-soctherm";
+		reg = <0x0 0x700e2000 0x0 0x1000>;
+		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+			<&tegra_car TEGRA124_CLK_SOC_THERM>;
+		clock-names = "tsensor", "soctherm";
+		resets = <&tegra_car 78>;
+		reset-names = "soctherm";
+
+		#thermal-sensor-cells = <1>;
+	};
+
+Example: referring to thermal sensors :
+
+       thermal-zones {
+                cpu {
+                        polling-delay-passive = <1000>;
+                        polling-delay = <1000>;
+
+                        thermal-sensors =
+                                <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>;
+                };
+	};
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 423d474..b1df0ad 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -47,6 +47,7 @@ dlink	D-Link Corporation
 dmo	Data Modul AG
 ebv	EBV Elektronik
 edt	Emerging Display Technologies
+elan	Elan Microelectronic Corp.
 emmicro	EM Microelectronic
 energymicro	Silicon Laboratories (formerly Energy Micro AS)
 epcos	EPCOS AG
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
deleted file mode 100644
index ffb5c80..0000000
--- a/Documentation/ia64/kvm.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-Currently, kvm module is in EXPERIMENTAL stage on IA64. This means that
-interfaces are not stable enough to use. So, please don't run critical
-applications in virtual machine.
-We will try our best to improve it in future versions!
-
-				Guide: How to boot up guests on kvm/ia64
-
-This guide is to describe how to enable kvm support for IA-64 systems.
-
-1. Get the kvm source from git.kernel.org.
-	Userspace source:
-		git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
-	Kernel Source:
-		git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
-
-2. Compile the source code.
-	2.1 Compile userspace code:
-		(1)cd ./kvm-userspace
-		(2)./configure
-		(3)cd kernel
-		(4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
-		(5)cd ..
-		(6)make qemu
-		(7)cd qemu; make install
-
-	2.2 Compile kernel source code:
-		(1) cd ./$kernel_dir
-		(2) Make menuconfig
-		(3) Enter into virtualization option, and choose kvm.
-		(4) make
-		(5) Once (4) done, make modules_install
-		(6) Make initrd, and use new kernel to reboot up host machine.
-		(7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
-		(8) insmod kvm.ko; insmod kvm-intel.ko
-
-Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
-
-3. Get Guest Firmware named as Flash.fd, and put it under right place:
-	(1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
-
-	(2) If you have no firmware at hand, Please download its source from
-		hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
-	    you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
-
-	(3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
-
-4. Boot up Linux or Windows guests:
-	4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
-
-	4.2 Boot up guests use the following command.
-		/usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
-		(xx is the number of virtual processors for the guest, now the maximum value is 4)
-
-5. Known possible issue on some platforms with old Firmware.
-
-In the event of strange host crash issues, try to solve it through either of the following ways:
-
-(1): Upgrade your Firmware to the latest one.
-
-(2): Applying the below patch to kernel source.
-diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
-index 0b53344..f02b0f7 100644
---- a/arch/ia64/kernel/pal.S
-+++ b/arch/ia64/kernel/pal.S
-@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
--	srlz.d				// serialize restoration of psr.l
-+	srlz.i			// serialize restoration of psr.l
-+	;;
-	br.ret.sptk.many b0
- END(ia64_pal_call_static)
-
-6. Bug report:
-	If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
-	https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
-
-Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
-
-
-								Xiantao Zhang <xiantao.zhang@intel.com>
-											2008.3.10
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bda85f1..4df73da 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1457,6 +1457,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 		       disable
 		         Do not enable intel_pstate as the default
 		         scaling driver for the supported processors
+		       force
+			 Enable intel_pstate on systems that prohibit it by default
+			 in favor of acpi-cpufreq. Forcing the intel_pstate driver
+			 instead of acpi-cpufreq may disable platform features, such
+			 as thermal controls and power capping, that rely on ACPI
+			 P-States information being indicated to OSPM and therefore
+			 should be used with caution. This option does not work with
+			 processors that aren't supported by the intel_pstate driver
+			 or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
 		       no_hwp
 		         Do not enable hardware P state control (HWP)
 			 if available.
diff --git a/Documentation/networking/fib_trie.txt b/Documentation/networking/fib_trie.txt
index 0723db7..fe71938 100644
--- a/Documentation/networking/fib_trie.txt
+++ b/Documentation/networking/fib_trie.txt
@@ -73,8 +73,8 @@ trie_leaf_remove()
 
 trie_rebalance()
 	The key function for the dynamic trie after any change in the trie
-	it is run to optimize and reorganize. Tt will walk the trie upwards 
-	towards the root from a given tnode, doing a resize() at each step 
+	it is run to optimize and reorganize. It will walk the trie upwards
+	towards the root from a given tnode, doing a resize() at each step
 	to implement level compression.
 
 resize()
diff --git a/Documentation/video4linux/vivid.txt b/Documentation/video4linux/vivid.txt
index e5a940e..6cfc854 100644
--- a/Documentation/video4linux/vivid.txt
+++ b/Documentation/video4linux/vivid.txt
@@ -640,6 +640,21 @@ Colorspace: selects which colorspace should be used when generating the image.
 	Changing the colorspace will result in the V4L2_EVENT_SOURCE_CHANGE
 	to be sent since it emulates a detected colorspace change.
 
+Y'CbCr Encoding: selects which Y'CbCr encoding should be used when generating
+	a Y'CbCr image.	This only applies if the CSC Colorbar test pattern is
+	selected, and if the format is set to a Y'CbCr format as opposed to an
+	RGB format.
+
+	Changing the Y'CbCr encoding will result in the V4L2_EVENT_SOURCE_CHANGE
+	to be sent since it emulates a detected colorspace change.
+
+Quantization: selects which quantization should be used for the RGB or Y'CbCr
+	encoding when generating the test pattern. This only applies if the CSC
+	Colorbar test pattern is selected.
+
+	Changing the quantization will result in the V4L2_EVENT_SOURCE_CHANGE
+	to be sent since it emulates a detected colorspace change.
+
 Limited RGB Range (16-235): selects if the RGB range of the HDMI source should
 	be limited or full range. This combines with the Digital Video 'Rx RGB
 	Quantization Range' control and can be used to test what happens if
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7610eaa..0007fef 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -68,9 +68,12 @@ description:
 
   Capability: which KVM extension provides this ioctl.  Can be 'basic',
       which means that is will be provided by any kernel that supports
-      API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
+      API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
       means availability needs to be checked with KVM_CHECK_EXTENSION
-      (see section 4.4).
+      (see section 4.4), or 'none' which means that while not all kernels
+      support this ioctl, there's no capability bit to check its
+      availability: for kernels that don't support the ioctl,
+      the ioctl returns -ENOTTY.
 
   Architectures: which instruction set architectures provide this ioctl.
       x86 includes both i386 and x86_64.
@@ -604,7 +607,7 @@ struct kvm_fpu {
 4.24 KVM_CREATE_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
-Architectures: x86, ia64, ARM, arm64, s390
+Architectures: x86, ARM, arm64, s390
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -612,7 +615,7 @@ Returns: 0 on success, -1 on error
 Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
+only go to the IOAPIC.  On ARM/arm64, a GIC is
 created. On s390, a dummy irq routing table is created.
 
 Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
@@ -622,7 +625,7 @@ before KVM_CREATE_IRQCHIP can be used.
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm, arm64
+Architectures: x86, arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
@@ -676,7 +679,7 @@ struct kvm_irq_level {
 4.26 KVM_GET_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_irqchip (in/out)
 Returns: 0 on success, -1 on error
@@ -698,7 +701,7 @@ struct kvm_irqchip {
 4.27 KVM_SET_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_irqchip (in)
 Returns: 0 on success, -1 on error
@@ -991,7 +994,7 @@ for vm-wide capabilities.
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1005,16 +1008,15 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal [x86,
-                                 ia64]
+                                 which has not yet received an INIT signal [x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI [x86, ia64]
+                                 now ready for a SIPI [x86]
  - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt [x86, ia64]
+                                 is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86]
  - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
  - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
@@ -1022,7 +1024,7 @@ Possible values are:
  - KVM_MP_STATE_LOAD:            the vcpu is in a special load/startup state
                                  [s390]
 
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
@@ -1030,7 +1032,7 @@ these architectures.
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1038,7 +1040,7 @@ Returns: 0 on success; -1 on error
 Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
 arguments.
 
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
@@ -1065,7 +1067,7 @@ documentation when it pops into existence).
 4.41 KVM_SET_BOOT_CPU_ID
 
 Capability: KVM_CAP_SET_BOOT_CPU_ID
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: unsigned long vcpu_id
 Returns: 0 on success, -1 on error
@@ -1257,8 +1259,8 @@ The flags bitmap is defined as:
 
 4.48 KVM_ASSIGN_PCI_DEVICE
 
-Capability: KVM_CAP_DEVICE_ASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_pci_dev (in)
 Returns: 0 on success, -1 on error
@@ -1298,25 +1300,36 @@ Only PCI header type 0 devices with PCI BAR resources are supported by
 device assignment.  The user requesting this ioctl must have read/write
 access to the PCI sysfs resource files associated with the device.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.49 KVM_DEASSIGN_PCI_DEVICE
 
-Capability: KVM_CAP_DEVICE_DEASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_pci_dev (in)
 Returns: 0 on success, -1 on error
 
 Ends PCI device assignment, releasing all associated resources.
 
-See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
+See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
 used in kvm_assigned_pci_dev to identify the device.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
 
 4.50 KVM_ASSIGN_DEV_IRQ
 
 Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_irq (in)
 Returns: 0 on success, -1 on error
@@ -1346,11 +1359,17 @@ The following flags are defined:
 It is not valid to specify multiple types per host or guest IRQ. However, the
 IRQ type of host and guest can differ or can even be null.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.51 KVM_DEASSIGN_DEV_IRQ
 
 Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_irq (in)
 Returns: 0 on success, -1 on error
@@ -1365,7 +1384,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
 4.52 KVM_SET_GSI_ROUTING
 
 Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 ia64 s390
+Architectures: x86 s390
 Type: vm ioctl
 Parameters: struct kvm_irq_routing (in)
 Returns: 0 on success, -1 on error
@@ -1423,8 +1442,8 @@ struct kvm_irq_routing_s390_adapter {
 
 4.53 KVM_ASSIGN_SET_MSIX_NR
 
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_msix_nr (in)
 Returns: 0 on success, -1 on error
@@ -1445,8 +1464,8 @@ struct kvm_assigned_msix_nr {
 
 4.54 KVM_ASSIGN_SET_MSIX_ENTRY
 
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_msix_entry (in)
 Returns: 0 on success, -1 on error
@@ -1461,6 +1480,12 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.55 KVM_SET_TSC_KHZ
 
@@ -2453,9 +2478,15 @@ return ENOEXEC for that vcpu.
 Note that because some registers reflect machine topology, all vcpus
 should be created before this ioctl is invoked.
 
+Userspace can call this function multiple times for a given vcpu, including
+after the vcpu has been run. This will reset the vcpu to its initial
+state. All calls to this function after the initial call must use the same
+target and same set of feature flags, otherwise EINVAL will be returned.
+
 Possible features:
 	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
-	  Depends on KVM_CAP_ARM_PSCI.
+	  Depends on KVM_CAP_ARM_PSCI.  If not set, the CPU will be powered on
+	  and execute guest code when KVM_RUN is called.
 	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
 	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 	- KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
@@ -2951,6 +2982,15 @@ HVC instruction based PSCI call from the vcpu. The 'type' field describes
 the system-level event type. The 'flags' field describes architecture
 specific flags for the system-level event.
 
+Valid values for 'type' are:
+  KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
+   VM. Userspace is not obliged to honour this, and if it does honour
+   this does not need to destroy the VM synchronously (ie it may call
+   KVM_RUN again before shutdown finally occurs).
+  KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
+   As with SHUTDOWN, userspace can choose to ignore the request, or
+   to schedule the reset to occur in the future and may call KVM_RUN again.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 0d16f96..d426fc8 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -12,14 +12,14 @@ specific.
 1. GROUP: KVM_S390_VM_MEM_CTRL
 Architectures: s390
 
-1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
 Parameters: none
-Returns: -EBUSY if already a vcpus is defined, otherwise 0
+Returns: -EBUSY if a vcpu is already defined, otherwise 0
 
-Enables CMMA for the virtual machine
+Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
 
-1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
-Parameteres: none
+1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
+Parameters: none
 Returns: 0
 
 Clear the CMMA status for all guest pages, so any pages the guest marked
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 6d470ae..2a71c8f 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -168,7 +168,7 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
 	64 byte memory area which must be in guest RAM and must be
 	zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
 	when asynchronous page faults are enabled on the vcpu 0 when
-	disabled. Bit 2 is 1 if asynchronous page faults can be injected
+	disabled. Bit 1 is 1 if asynchronous page faults can be injected
 	when vcpu is in cpl == 0.
 
 	First 4 byte of 64 byte memory location will be written to by
diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
index 4472ed2..818518a 100644
--- a/Documentation/x86/intel_mpx.txt
+++ b/Documentation/x86/intel_mpx.txt
@@ -7,11 +7,15 @@ that can be used in conjunction with compiler changes to check memory
 references, for those references whose compile-time normal intentions are
 usurped at runtime due to buffer overflow or underflow.
 
+You can tell if your CPU supports MPX by looking in /proc/cpuinfo:
+
+	cat /proc/cpuinfo  | grep ' mpx '
+
 For more information, please refer to Intel(R) Architecture Instruction
 Set Extensions Programming Reference, Chapter 9: Intel(R) Memory Protection
 Extensions.
 
-Note: Currently no hardware with MPX ISA is available but it is always
+Note: As of December 2014, no hardware with MPX is available but it is
 possible to use SDE (Intel(R) Software Development Emulator) instead, which
 can be downloaded from
 http://software.intel.com/en-us/articles/intel-software-development-emulator
@@ -30,9 +34,15 @@ is how we expect the compiler, application and kernel to work together.
    instrumentation as well as some setup code called early after the app
    starts. New instruction prefixes are noops for old CPUs.
 2) That setup code allocates (virtual) space for the "bounds directory",
-   points the "bndcfgu" register to the directory and notifies the kernel
-   (via the new prctl(PR_MPX_ENABLE_MANAGEMENT)) that the app will be using
-   MPX.
+   points the "bndcfgu" register to the directory (must also set the valid
+   bit) and notifies the kernel (via the new prctl(PR_MPX_ENABLE_MANAGEMENT))
+   that the app will be using MPX.  The app must be careful not to access
+   the bounds tables between the time when it populates "bndcfgu" and
+   when it calls the prctl().  This might be hard to guarantee if the app
+   is compiled with MPX.  You can add "__attribute__((bnd_legacy))" to
+   the function to disable MPX instrumentation to help guarantee this.
+   Also be careful not to call out to any other code which might be
+   MPX-instrumented.
 3) The kernel detects that the CPU has MPX, allows the new prctl() to
    succeed, and notes the location of the bounds directory. Userspace is
    expected to keep the bounds directory at that locationWe note it
diff --git a/MAINTAINERS b/MAINTAINERS
index 4aac6c8..08f671d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4255,6 +4255,12 @@ L:	linux-media@vger.kernel.org
 S:	Maintained
 F:	drivers/media/usb/go7007/
 
+GOODIX TOUCHSCREEN
+M:	Bastien Nocera <hadess@hadess.net>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/input/touchscreen/goodix.c
+
 GPIO SUBSYSTEM
 M:	Linus Walleij <linus.walleij@linaro.org>
 M:	Alexandre Courbot <gnurou@gmail.com>
@@ -4957,6 +4963,12 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
 S:	Supported
 F:	drivers/idle/intel_idle.c
 
+INTEL PSTATE DRIVER
+M:	Kristen Carlson Accardi <kristen@linux.intel.com>
+L:	linux-pm@vger.kernel.org
+S:	Supported
+F:	drivers/cpufreq/intel_pstate.c
+
 INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
 M:	Maik Broemme <mbroemme@plusserver.de>
 L:	linux-fbdev@vger.kernel.org
@@ -5489,15 +5501,6 @@ S:	Supported
 F:	arch/powerpc/include/asm/kvm*
 F:	arch/powerpc/kvm/
 
-KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
-M:	Xiantao Zhang <xiantao.zhang@intel.com>
-L:	kvm-ia64@vger.kernel.org
-W:	http://kvm.qumranet.com
-S:	Supported
-F:	Documentation/ia64/kvm.txt
-F:	arch/ia64/include/asm/kvm*
-F:	arch/ia64/kvm/
-
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M:	Christian Borntraeger <borntraeger@de.ibm.com>
 M:	Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -6613,19 +6616,8 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 
 NETWORKING [WIRELESS]
-M:	"John W. Linville" <linville@tuxdriver.com>
 L:	linux-wireless@vger.kernel.org
 Q:	http://patchwork.kernel.org/project/linux-wireless/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless.git
-S:	Maintained
-F:	net/mac80211/
-F:	net/rfkill/
-F:	net/wireless/
-F:	include/net/ieee80211*
-F:	include/linux/wireless.h
-F:	include/uapi/linux/wireless.h
-F:	include/net/iw_handler.h
-F:	drivers/net/wireless/
 
 NETWORKING DRIVERS
 L:	netdev@vger.kernel.org
@@ -6646,6 +6638,14 @@ F:	include/linux/inetdevice.h
 F:	include/uapi/linux/if_*
 F:	include/uapi/linux/netdevice.h
 
+NETWORKING DRIVERS (WIRELESS)
+M:	Kalle Valo <kvalo@codeaurora.org>
+L:	linux-wireless@vger.kernel.org
+Q:	http://patchwork.kernel.org/project/linux-wireless/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git/
+S:	Maintained
+F:	drivers/net/wireless/
+
 NETXEN (1/10) GbE SUPPORT
 M:	Manish Chopra <manish.chopra@qlogic.com>
 M:	Sony Chacko <sony.chacko@qlogic.com>
@@ -9510,6 +9510,7 @@ Q:	https://patchwork.kernel.org/project/linux-pm/list/
 S:	Supported
 F:	drivers/thermal/
 F:	include/linux/thermal.h
+F:	include/uapi/linux/thermal.h
 F:	include/linux/cpu_cooling.h
 F:	Documentation/devicetree/bindings/thermal/
 
@@ -10242,13 +10243,13 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/via/via-velocity.*
 
-VIVI VIRTUAL VIDEO DRIVER
+VIVID VIRTUAL VIDEO DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 W:	http://linuxtv.org
 S:	Maintained
-F:	drivers/media/platform/vivi*
+F:	drivers/media/platform/vivid/*
 
 VLAN (802.1Q)
 M:	Patrick McHardy <kaber@trash.net>
diff --git a/Makefile b/Makefile
index fd80c6e..fa9604d 100644
--- a/Makefile
+++ b/Makefile
@@ -481,9 +481,10 @@ asm-generic:
 # of make so .config is not included in this case either (for *config).
 
 version_h := include/generated/uapi/linux/version.h
+old_version_h := include/linux/version.h
 
 no-dot-config-targets := clean mrproper distclean \
-			 cscope gtags TAGS tags help %docs check% coccicheck \
+			 cscope gtags TAGS tags help% %docs check% coccicheck \
 			 $(version_h) headers_% archheaders archscripts \
 			 kernelversion %src-pkg
 
@@ -1005,6 +1006,7 @@ endef
 
 $(version_h): $(srctree)/Makefile FORCE
 	$(call filechk,version.h)
+	$(Q)rm -f $(old_version_h)
 
 include/generated/utsrelease.h: include/config/kernel.release FORCE
 	$(call filechk,utsrelease.h)
@@ -1036,8 +1038,6 @@ firmware_install: FORCE
 #Default location for installed headers
 export INSTALL_HDR_PATH = $(objtree)/usr
 
-hdr-inst := -rR -f $(srctree)/scripts/Makefile.headersinst obj
-
 # If we do an all arch process set dst to asm-$(hdr-arch)
 hdr-dst = $(if $(KBUILD_HEADERS), dst=include/asm-$(hdr-arch), dst=include/asm)
 
@@ -1175,7 +1175,7 @@ MRPROPER_FILES += .config .config.old .version .old_version $(version_h) \
 		  Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
 		  signing_key.priv signing_key.x509 x509.genkey		\
 		  extra_certificates signing_key.x509.keyid		\
-		  signing_key.x509.signer include/linux/version.h
+		  signing_key.x509.signer
 
 # clean - Delete most, but leave enough to build external modules
 #
@@ -1235,7 +1235,7 @@ rpm: include/config/kernel.release FORCE
 # ---------------------------------------------------------------------------
 
 boards := $(wildcard $(srctree)/arch/$(SRCARCH)/configs/*_defconfig)
-boards := $(notdir $(boards))
+boards := $(sort $(notdir $(boards)))
 board-dirs := $(dir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/*/*_defconfig))
 board-dirs := $(sort $(notdir $(board-dirs:/=)))
 
@@ -1326,7 +1326,7 @@ help-board-dirs := $(addprefix help-,$(board-dirs))
 
 help-boards: $(help-board-dirs)
 
-boards-per-dir = $(notdir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/$*/*_defconfig))
+boards-per-dir = $(sort $(notdir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/$*/*_defconfig)))
 
 $(help-board-dirs): help-%:
 	@echo  'Architecture specific targets ($(SRCARCH) $*):'
@@ -1581,11 +1581,6 @@ ifneq ($(cmd_files),)
   include $(cmd_files)
 endif
 
-# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir
-# Usage:
-# $(Q)$(MAKE) $(clean)=dir
-clean := -f $(srctree)/scripts/Makefile.clean obj
-
 endif	# skip-makefile
 
 PHONY += FORCE
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index fe44b24..df94ac1 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -428,3 +428,4 @@ source "arch/arc/Kconfig.debug"
 source "security/Kconfig"
 source "crypto/Kconfig"
 source "lib/Kconfig"
+source "kernel/power/Kconfig"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 10bc3d4..db72fec 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -12,7 +12,7 @@ ifeq ($(CROSS_COMPILE),)
 CROSS_COMPILE := arc-linux-uclibc-
 endif
 
-KBUILD_DEFCONFIG := fpga_defconfig
+KBUILD_DEFCONFIG := nsim_700_defconfig
 
 cflags-y	+= -mA7 -fno-common -pipe -fno-builtin -D__linux__
 
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index cfaedd9..1c169dc 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,7 +20,7 @@
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
 	};
 
 	aliases {
@@ -41,9 +41,9 @@
 			#interrupt-cells = <1>;
 		};
 
-		uart0: serial@c0000000 {
+		uart0: serial@f0000000 {
 			compatible = "ns8250";
-			reg = <0xc0000000 0x2000>;
+			reg = <0xf0000000 0x2000>;
 			interrupts = <11>;
 			clock-frequency = <3686400>;
 			baud = <115200>;
@@ -52,21 +52,21 @@
 			no-loopback-test = <1>;
 		};
 
-		pgu0: pgu@c9000000 {
+		pgu0: pgu@f9000000 {
 			compatible = "snps,arcpgufb";
-			reg = <0xc9000000 0x400>;
+			reg = <0xf9000000 0x400>;
 		};
 
-		ps2: ps2@c9001000 {
+		ps2: ps2@f9001000 {
 			compatible = "snps,arc_ps2";
-			reg = <0xc9000400 0x14>;
+			reg = <0xf9000400 0x14>;
 			interrupts = <13>;
 			interrupt-names = "arc_ps2_irq";
 		};
 
-		eth0: ethernet@c0003000 {
+		eth0: ethernet@f0003000 {
 			compatible = "snps,oscilan";
-			reg = <0xc0003000 0x44>;
+			reg = <0xf0003000 0x44>;
 			interrupts = <7>, <8>;
 			interrupt-names = "rx", "tx";
 		};
diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig
deleted file mode 100644
index 49c9301..0000000
--- a/arch/arc/configs/fpga_noramfs_defconfig
+++ /dev/null
@@ -1,63 +0,0 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NAMESPACES=y
-# CONFIG_UTS_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-# CONFIG_SLUB_DEBUG is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
-CONFIG_PREEMPT=y
-# CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_UNIX_DIAG=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-# CONFIG_IPV6 is not set
-# CONFIG_STANDALONE is not set
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_FIRMWARE_IN_KERNEL is not set
-# CONFIG_BLK_DEV is not set
-CONFIG_NETDEVICES=y
-CONFIG_ARC_EMAC=y
-CONFIG_LXT_PHY=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/nsim_700_defconfig
index ef4d3bc..ef4d3bc 100644
--- a/arch/arc/configs/fpga_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index 742816f..27ecc69 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -41,6 +41,15 @@
 
 /******************************************************************
  * IRQ Control Macros
+ *
+ * All of them have "memory" clobber (compiler barrier) which is needed to
+ * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
+ *
+ * Noted at the time of Abilis Timer List corruption
+ * 	Orig Bug + Rejected solution	: https://lkml.org/lkml/2013/3/29/67
+ * 	Reasoning			: https://lkml.org/lkml/2013/4/8/15
+ *
  ******************************************************************/
 
 /*
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index d01df0c..20ebb60 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -26,8 +26,10 @@
 #include <asm/setup.h>
 #include <asm/mach_desc.h>
 
+#ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+#endif
 
 struct plat_smp_ops  plat_smp_ops;
 
diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
index 51b373f..4eb540b 100644
--- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts
+++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
@@ -1942,4 +1942,48 @@
 			 <&tegra_car TEGRA124_CLK_EXTERN1>;
 		clock-names = "pll_a", "pll_a_out0", "mclk";
 	};
+
+	thermal-zones {
+		cpu {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+
+		mem {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+
+		gpu {
+			trips {
+				trip@0 {
+					temperature = <101000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				/* There are currently no cooling maps because there are no cooling devices */
+			};
+		};
+	};
 };
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 3ad2e3c..4be06c6 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -4,6 +4,7 @@
 #include <dt-bindings/pinctrl/pinctrl-tegra.h>
 #include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h>
 #include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/thermal/tegra124-soctherm.h>
 
 #include "skeleton.dtsi"
 
@@ -657,6 +658,18 @@
 		status = "disabled";
 	};
 
+	soctherm: thermal-sensor@0,700e2000 {
+		compatible = "nvidia,tegra124-soctherm";
+		reg = <0x0 0x700e2000 0x0 0x1000>;
+		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+			<&tegra_car TEGRA124_CLK_SOC_THERM>;
+		clock-names = "tsensor", "soctherm";
+		resets = <&tegra_car 78>;
+		reset-names = "soctherm";
+		#thermal-sensor-cells = <1>;
+	};
+
 	ahub@0,70300000 {
 		compatible = "nvidia,tegra124-ahub";
 		reg = <0x0 0x70300000 0x0 0x200>,
@@ -898,6 +911,40 @@
 		};
 	};
 
+	thermal-zones {
+		cpu {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>;
+		};
+
+		mem {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_MEM>;
+		};
+
+		gpu {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_GPU>;
+		};
+
+		pllx {
+			polling-delay-passive = <1000>;
+			polling-delay = <1000>;
+
+			thermal-sensors =
+				<&soctherm TEGRA124_SOCTHERM_SENSOR_PLLX>;
+		};
+	};
+
 	timer {
 		compatible = "arm,armv7-timer";
 		interrupts = <GIC_PPI 13
diff --git a/arch/arm/configs/ape6evm_defconfig b/arch/arm/configs/ape6evm_defconfig
index db81d8c..9e9a72e 100644
--- a/arch/arm/configs/ape6evm_defconfig
+++ b/arch/arm/configs/ape6evm_defconfig
@@ -33,7 +33,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_BINFMT_MISC=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig
index d9675c68..5666e37 100644
--- a/arch/arm/configs/armadillo800eva_defconfig
+++ b/arch/arm/configs/armadillo800eva_defconfig
@@ -43,7 +43,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig
index 83a87e4..7117662 100644
--- a/arch/arm/configs/bcm_defconfig
+++ b/arch/arm/configs/bcm_defconfig
@@ -39,7 +39,7 @@ CONFIG_CPU_IDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
diff --git a/arch/arm/configs/bockw_defconfig b/arch/arm/configs/bockw_defconfig
index 1dde5da..3125e00 100644
--- a/arch/arm/configs/bockw_defconfig
+++ b/arch/arm/configs/bockw_defconfig
@@ -29,7 +29,7 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index 759f9b0..235842c 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -49,7 +49,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=m
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_ONDEMAND=m
 CONFIG_CPU_IDLE=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index c419907..5ef14de 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -27,7 +27,7 @@ CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init=/linuxrc mem=256M"
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index eb440aa..ea316c4 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig
@@ -39,7 +39,6 @@ CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 CONFIG_PM=y
 CONFIG_APM_EMULATION=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig
index 1fe3621f..1125436 100644
--- a/arch/arm/configs/hisi_defconfig
+++ b/arch/arm/configs/hisi_defconfig
@@ -18,7 +18,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_NEON=y
 CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index 182e546..18e59fe 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig
@@ -31,7 +31,6 @@ CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 CONFIG_PM=y
 CONFIG_APM_EMULATION=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index f707cd2..7c2075a0 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -54,7 +54,7 @@ CONFIG_ARM_IMX6Q_CPUFREQ=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_BINFMT_MISC=m
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_TEST_SUSPEND=y
 CONFIG_NET=y
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 20a3ff9..a2067cb 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -30,7 +30,7 @@ CONFIG_HIGHMEM=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/kzm9g_defconfig b/arch/arm/configs/kzm9g_defconfig
index 8cb115d..5d63fc5 100644
--- a/arch/arm/configs/kzm9g_defconfig
+++ b/arch/arm/configs/kzm9g_defconfig
@@ -43,7 +43,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/lager_defconfig b/arch/arm/configs/lager_defconfig
index 929c571..a82afc9 100644
--- a/arch/arm/configs/lager_defconfig
+++ b/arch/arm/configs/lager_defconfig
@@ -37,7 +37,7 @@ CONFIG_AUTO_ZRELADDR=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/mackerel_defconfig b/arch/arm/configs/mackerel_defconfig
index 57ececb..05a5293 100644
--- a/arch/arm/configs/mackerel_defconfig
+++ b/arch/arm/configs/mackerel_defconfig
@@ -28,7 +28,6 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_PM=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/marzen_defconfig b/arch/arm/configs/marzen_defconfig
index ff91630..3c8b6d8 100644
--- a/arch/arm/configs/marzen_defconfig
+++ b/arch/arm/configs/marzen_defconfig
@@ -33,7 +33,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 CONFIG_KEXEC=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 115cda9..a7dce67 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -63,7 +63,6 @@ CONFIG_FPE_NWFPE=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 # CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/prima2_defconfig b/arch/arm/configs/prima2_defconfig
index 23591db..f610230 100644
--- a/arch/arm/configs/prima2_defconfig
+++ b/arch/arm/configs/prima2_defconfig
@@ -18,7 +18,7 @@ CONFIG_PREEMPT=y
 CONFIG_AEABI=y
 CONFIG_KEXEC=y
 CONFIG_BINFMT_MISC=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index b58fb32..afa2479 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -32,7 +32,7 @@ CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_KERNEL_MODE_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_ADVANCED_DEBUG=y
 CONFIG_NET=y
diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index df2c0f5..3df6ca0 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig
@@ -39,7 +39,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index f7ac037..7a342d2 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -11,7 +11,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 40750f9..3ea9c33 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -46,7 +46,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_IDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig
index d219d6a..6a1c989 100644
--- a/arch/arm/configs/u8500_defconfig
+++ b/arch/arm/configs/u8500_defconfig
@@ -25,7 +25,7 @@ CONFIG_CPU_IDLE=y
 CONFIG_ARM_U8500_CPUIDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/vt8500_v6_v7_defconfig b/arch/arm/configs/vt8500_v6_v7_defconfig
index 9e7a256..1bfaa7b 100644
--- a/arch/arm/configs/vt8500_v6_v7_defconfig
+++ b/arch/arm/configs/vt8500_v6_v7_defconfig
@@ -16,7 +16,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b9db269..66ce176 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
 	return 1;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 53036e21..254e065 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -150,8 +150,6 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index acb0d57..63e0ecc 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+					     unsigned long size,
+					     bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
 		kvm_flush_dcache_to_poc((void *)hva, size);
 	
 	/*
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index e34934f..f7c65ad 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -484,7 +484,7 @@ static void armpmu_disable(struct pmu *pmu)
 	armpmu->stop(armpmu);
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int armpmu_runtime_resume(struct device *dev)
 {
 	struct arm_pmu_platdata *plat = dev_get_platdata(dev);
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8..2d6d910 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	int err;
 	struct kvm_vcpu *vcpu;
 
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+		err = -EBUSY;
+		goto out;
+	}
+
 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 	if (!vcpu) {
 		err = -ENOMEM;
@@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
@@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
 	int ret;
 
 	if (likely(vcpu->arch.has_run_once))
@@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.has_run_once = true;
 
 	/*
-	 * Initialize the VGIC before running a vcpu the first time on
-	 * this VM.
+	 * Map the VGIC hardware resources before running a vcpu the first
+	 * time on this VM.
 	 */
-	if (unlikely(!vgic_initialized(vcpu->kvm))) {
-		ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_ready(kvm))) {
+		ret = kvm_vgic_map_resources(kvm);
 		if (ret)
 			return ret;
 	}
 
+	/*
+	 * Enable the arch timers only if we have an in-kernel VGIC
+	 * and it has been properly initialized, since we cannot handle
+	 * interrupts from the virtual timer with a userspace gic.
+	 */
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+		kvm_timer_enable(kvm);
+
 	return 0;
 }
 
@@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			       const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	/*
+	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+	 * use the same target.
+	 */
+	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+		return -EINVAL;
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+		if (set && i >= KVM_VCPU_MAX_FEATURES)
+			return -ENOENT;
+
+		/*
+		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+		 * use the same feature set.
+		 */
+		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+		    test_bit(i, vcpu->arch.features) != set)
+			return -EINVAL;
+
+		if (set)
+			set_bit(i, vcpu->arch.features);
+	}
+
+	vcpu->arch.target = phys_target;
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+
 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 					 struct kvm_vcpu_init *init)
 {
@@ -659,10 +716,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 		return ret;
 
 	/*
+	 * Ensure a rebooted VM will fault in RAM pages and detect if the
+	 * guest MMU is turned off and flush the caches as needed.
+	 */
+	if (vcpu->arch.has_run_once)
+		stage2_unmap_vm(vcpu->kvm);
+
+	vcpu_reset_hcr(vcpu);
+
+	/*
 	 * Handle the "start in power-off" case by marking the VCPU as paused.
 	 */
-	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
 		vcpu->arch.pause = true;
+	else
+		vcpu->arch.pause = false;
 
 	return 0;
 }
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b787..384bab6 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
@@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void)
 	}
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-
-	/* We can only cope with guest==host and only on A15/A7 (for now). */
-	if (init->target != kvm_target_cpu())
-		return -EINVAL;
-
-	vcpu->arch.target = init->target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (test_bit(i, (void *)init->features)) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93..5d3bfc0 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
-	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
 
-	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
-					 KVM_TRACE_MMIO_READ_UNSATISFIED,
-			mmio.len, fault_ipa,
-			(mmio.is_write) ? data : 0);
+	if (mmio.is_write) {
+		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
+					       mmio.len);
 
-	if (mmio.is_write)
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
+			       fault_ipa, data);
 		mmio_write_buf(mmio.data, mmio.len, data);
+	} else {
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+			       fault_ipa, 0);
+	}
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 8664ff1..1dc9778 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -612,6 +612,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
+static void stage2_unmap_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	hva_t hva = memslot->userspace_addr;
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = PAGE_SIZE * memslot->npages;
+	hva_t reg_end = hva + size;
+
+	/*
+	 * A memory region could potentially cover multiple VMAs, and any holes
+	 * between them, so iterate over all of them to find out if we should
+	 * unmap any of them.
+	 *
+	 *     +--------------------------------------------+
+	 * +---------------+----------------+   +----------------+
+	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+	 * +---------------+----------------+   +----------------+
+	 *     |               memory region                |
+	 *     +--------------------------------------------+
+	 */
+	do {
+		struct vm_area_struct *vma = find_vma(current->mm, hva);
+		hva_t vm_start, vm_end;
+
+		if (!vma || vma->vm_start >= reg_end)
+			break;
+
+		/*
+		 * Take the intersection of this VMA with the memory region
+		 */
+		vm_start = max(hva, vma->vm_start);
+		vm_end = min(reg_end, vma->vm_end);
+
+		if (!(vma->vm_flags & VM_PFNMAP)) {
+			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+		}
+		hva = vm_end;
+	} while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_unmap_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
@@ -853,6 +918,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct vm_area_struct *vma;
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
+	bool fault_ipa_uncached;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +985,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (!hugetlb && !force_pte)
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
+	fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +994,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1003,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
 			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
 	}
@@ -1294,11 +1364,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		hva = vm_end;
 	} while (hva < reg_end);
 
-	if (ret) {
-		spin_lock(&kvm->mmu_lock);
+	spin_lock(&kvm->mmu_lock);
+	if (ret)
 		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-		spin_unlock(&kvm->mmu_lock);
-	}
+	else
+		stage2_flush_memslot(kvm, memslot);
+	spin_unlock(&kvm->mmu_lock);
 	return ret;
 }
 
@@ -1310,6 +1381,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
+	/*
+	 * Readonly memslots are not incoherent with the caches by definition,
+	 * but in practice, they are used mostly to emulate ROMs or NOR flashes
+	 * that the guest may consider devices and hence map as uncached.
+	 * To prevent incoherency issues in these cases, tag all readonly
+	 * regions as incoherent.
+	 */
+	if (slot->flags & KVM_MEM_READONLY)
+		slot->flags |= KVM_MEMSLOT_INCOHERENT;
 	return 0;
 }
 
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf377..58cb324 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/preempt.h>
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
+	int i;
+	struct kvm_vcpu *tmp;
+
+	/*
+	 * The KVM ABI specifies that a system event exit may call KVM_RUN
+	 * again and may perform shutdown/reboot at a later time that when the
+	 * actual request is made.  Since we are implementing PSCI and a
+	 * caller of PSCI reboot and shutdown expects that the system shuts
+	 * down or reboots immediately, let's make sure that VCPUs are not run
+	 * after this call is handled and before the VCPUs have been
+	 * re-initialized.
+	 */
+	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+		tmp->arch.pause = true;
+		kvm_vcpu_kick(tmp);
+	}
+
 	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
 	vcpu->run->system_event.type = type;
 	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm/mach-davinci/pm_domain.c b/arch/arm/mach-davinci/pm_domain.c
index 6b98413..641edc3 100644
--- a/arch/arm/mach-davinci/pm_domain.c
+++ b/arch/arm/mach-davinci/pm_domain.c
@@ -14,7 +14,7 @@
 #include <linux/pm_clock.h>
 #include <linux/platform_device.h>
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int davinci_pm_runtime_suspend(struct device *dev)
 {
 	int ret;
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index e4a00ba..603820e 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -21,7 +21,7 @@ menuconfig ARCH_EXYNOS
 	select HAVE_S3C_RTC if RTC_CLASS
 	select PINCTRL
 	select PINCTRL_EXYNOS
-	select PM_GENERIC_DOMAINS if PM_RUNTIME
+	select PM_GENERIC_DOMAINS if PM
 	select S5P_DEV_MFC
 	select SRAM
 	select MFD_SYSCON
diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c
index ca79dda..ef6041e 100644
--- a/arch/arm/mach-keystone/pm_domain.c
+++ b/arch/arm/mach-keystone/pm_domain.c
@@ -19,7 +19,7 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int keystone_pm_runtime_suspend(struct device *dev)
 {
 	int ret;
diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c
index 3f2d396..c40e209 100644
--- a/arch/arm/mach-omap1/pm_bus.c
+++ b/arch/arm/mach-omap1/pm_bus.c
@@ -21,7 +21,7 @@
 
 #include "soc.h"
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int omap1_pm_runtime_suspend(struct device *dev)
 {
 	int ret;
@@ -59,7 +59,7 @@ static struct dev_pm_domain default_pm_domain = {
 #define OMAP1_PM_DOMAIN (&default_pm_domain)
 #else
 #define OMAP1_PM_DOMAIN NULL
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static struct pm_clk_notifier_block platform_bus_notifier = {
 	.pm_domain = OMAP1_PM_DOMAIN,
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index f0edec1..6ab656c 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -15,7 +15,7 @@ config ARCH_OMAP3
 	select ARM_CPU_SUSPEND if PM
 	select OMAP_INTERCONNECT
 	select PM_OPP if PM
-	select PM_RUNTIME if CPU_IDLE
+	select PM if CPU_IDLE
 	select SOC_HAS_OMAP2_SDRC
 
 config ARCH_OMAP4
@@ -32,7 +32,7 @@ config ARCH_OMAP4
 	select PL310_ERRATA_588369 if CACHE_L2X0
 	select PL310_ERRATA_727915 if CACHE_L2X0
 	select PM_OPP if PM
-	select PM_RUNTIME if CPU_IDLE
+	select PM if CPU_IDLE
 	select ARM_ERRATA_754322
 	select ARM_ERRATA_775420
 
@@ -103,7 +103,7 @@ config ARCH_OMAP2PLUS_TYPICAL
 	select I2C_OMAP
 	select MENELAUS if ARCH_OMAP2
 	select NEON if CPU_V7
-	select PM_RUNTIME
+	select PM
 	select REGULATOR
 	select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4
 	select TWL4030_POWER if ARCH_OMAP3 || ARCH_OMAP4
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 4fc8383..a1bd6af 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -361,7 +361,7 @@ static void __init omap_hwmod_init_postsetup(void)
 	u8 postsetup_state;
 
 	/* Set the default postsetup state for all hwmods */
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	postsetup_state = _HWMOD_STATE_IDLE;
 #else
 	postsetup_state = _HWMOD_STATE_ENABLED;
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 8c58b71..be9541e 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -588,7 +588,7 @@ odbs_exit:
 	return ERR_PTR(ret);
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int _od_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5674a55..8127e45 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+}
+
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2012c4b..0b7dfdb 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -165,8 +165,6 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0caf7a5..14a74f1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+					     unsigned long size,
+					     bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
 		kvm_flush_dcache_to_poc((void *)hva, size);
 
 	if (!icache_is_aliasing()) {		/* PIPT */
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 7679469..9535bd5 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
 	return 0;
 }
 
@@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void)
 	return -EINVAL;
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-	int phys_target = kvm_target_cpu();
-
-	if (init->target != phys_target)
-		return -EINVAL;
-
-	vcpu->arch.target = phys_target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (init->features[i / 32] & (1 << (i % 32))) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();
diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c
index b0a608d..b964c66 100644
--- a/arch/cris/arch-v10/lib/usercopy.c
+++ b/arch/cris/arch-v10/lib/usercopy.c
@@ -30,8 +30,7 @@
 /* Copy to userspace.  This is based on the memcpy used for
    kernel-to-kernel copying; see "string.c".  */
 
-unsigned long
-__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -187,13 +186,14 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
 
   return retn;
 }
+EXPORT_SYMBOL(__copy_user);
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
    userland.  The return-value is the number of bytes that were
    inaccessible.  */
 
-unsigned long
-__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -369,11 +369,10 @@ copy_exception_bytes:
 
   return retn + n;
 }
+EXPORT_SYMBOL(__copy_user_zeroing);
 
 /* Zero userspace.  */
-
-unsigned long
-__do_clear_user (void __user *pto, unsigned long pn)
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -521,3 +520,4 @@ __do_clear_user (void __user *pto, unsigned long pn)
 
   return retn;
 }
+EXPORT_SYMBOL(__do_clear_user);
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 15a9ed1..4fc16b4 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -108,6 +108,7 @@ config ETRAX_AXISFLASHMAP
 	select MTD_JEDECPROBE
 	select MTD_BLOCK
 	select MTD_COMPLEX_MAPPINGS
+	select MTD_MTDRAM
 	help
 	  This option enables MTD mapping of flash devices.  Needed to use
 	  flash memories.  If unsure, say Y.
@@ -358,13 +359,6 @@ config ETRAX_SPI_MMC
 	default MMC
 	select SPI
 	select MMC_SPI
-	select ETRAX_SPI_MMC_BOARD
-
-# For the parts that can't be a module (due to restrictions in
-# framework elsewhere).
-config ETRAX_SPI_MMC_BOARD
-       boolean
-       default n
 
 # While the board info is MMC_SPI only, the drivers are written to be
 # independent of MMC_SPI, so we'll keep SPI non-dependent on the
diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile
index 39aa3c1..15fbfef 100644
--- a/arch/cris/arch-v32/drivers/Makefile
+++ b/arch/cris/arch-v32/drivers/Makefile
@@ -10,4 +10,3 @@ obj-$(CONFIG_ETRAX_IOP_FW_LOAD)         += iop_fw_load.o
 obj-$(CONFIG_ETRAX_I2C)			+= i2c.o
 obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL)	+= sync_serial.o
 obj-$(CONFIG_PCI)			+= pci/
-obj-$(CONFIG_ETRAX_SPI_MMC_BOARD)	+= board_mmcspi.o
diff --git a/arch/cris/arch-v32/drivers/i2c.h b/arch/cris/arch-v32/drivers/i2c.h
index c073cf4..d9cc856 100644
--- a/arch/cris/arch-v32/drivers/i2c.h
+++ b/arch/cris/arch-v32/drivers/i2c.h
@@ -2,7 +2,6 @@
 #include <linux/init.h>
 
 /* High level I2C actions */
-int __init i2c_init(void);
 int i2c_write(unsigned char theSlave, void *data, size_t nbytes);
 int i2c_read(unsigned char theSlave, void *data, size_t nbytes);
 int i2c_writereg(unsigned char theSlave, unsigned char theReg, unsigned char theValue);
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index 5a14913..08a313f 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -1,8 +1,7 @@
 /*
- * Simple synchronous serial port driver for ETRAX FS and Artpec-3.
- *
- * Copyright (c) 2005 Axis Communications AB
+ * Simple synchronous serial port driver for ETRAX FS and ARTPEC-3.
  *
+ * Copyright (c) 2005, 2008 Axis Communications AB
  * Author: Mikael Starvik
  *
  */
@@ -16,16 +15,17 @@
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
 #include <linux/wait.h>
 
 #include <asm/io.h>
-#include <dma.h>
+#include <mach/dma.h>
 #include <pinmux.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/sser_defs.h>
+#include <hwregs/timer_defs.h>
 #include <hwregs/dma_defs.h>
 #include <hwregs/dma.h>
 #include <hwregs/intr_vect_defs.h>
@@ -59,22 +59,23 @@
 /* the rest of the data pointed out by Descr1 and set readp to the start */
 /* of Descr2                                                             */
 
-#define SYNC_SERIAL_MAJOR 125
-
 /* IN_BUFFER_SIZE should be a multiple of 6 to make sure that 24 bit */
 /* words can be handled */
-#define IN_BUFFER_SIZE 12288
-#define IN_DESCR_SIZE 256
-#define NBR_IN_DESCR (IN_BUFFER_SIZE/IN_DESCR_SIZE)
+#define IN_DESCR_SIZE SSP_INPUT_CHUNK_SIZE
+#define NBR_IN_DESCR (8*6)
+#define IN_BUFFER_SIZE (IN_DESCR_SIZE * NBR_IN_DESCR)
 
-#define OUT_BUFFER_SIZE 1024*8
 #define NBR_OUT_DESCR 8
+#define OUT_BUFFER_SIZE (1024 * NBR_OUT_DESCR)
 
 #define DEFAULT_FRAME_RATE 0
 #define DEFAULT_WORD_RATE 7
 
+/* To be removed when we move to pure udev. */
+#define SYNC_SERIAL_MAJOR 125
+
 /* NOTE: Enabling some debug will likely cause overrun or underrun,
- * especially if manual mode is use.
+ * especially if manual mode is used.
  */
 #define DEBUG(x)
 #define DEBUGREAD(x)
@@ -85,11 +86,28 @@
 #define DEBUGTRDMA(x)
 #define DEBUGOUTBUF(x)
 
-typedef struct sync_port
-{
-	reg_scope_instances regi_sser;
-	reg_scope_instances regi_dmain;
-	reg_scope_instances regi_dmaout;
+enum syncser_irq_setup {
+	no_irq_setup = 0,
+	dma_irq_setup = 1,
+	manual_irq_setup = 2,
+};
+
+struct sync_port {
+	unsigned long regi_sser;
+	unsigned long regi_dmain;
+	unsigned long regi_dmaout;
+
+	/* Interrupt vectors. */
+	unsigned long dma_in_intr_vect; /* Used for DMA in. */
+	unsigned long dma_out_intr_vect; /* Used for DMA out. */
+	unsigned long syncser_intr_vect; /* Used when no DMA. */
+
+	/* DMA number for in and out. */
+	unsigned int dma_in_nbr;
+	unsigned int dma_out_nbr;
+
+	/* DMA owner. */
+	enum dma_owner req_dma;
 
 	char started; /* 1 if port has been started */
 	char port_nbr; /* Port 0 or 1 */
@@ -99,22 +117,29 @@ typedef struct sync_port
 	char use_dma;  /* 1 if port uses dma */
 	char tr_running;
 
-	char init_irqs;
+	enum syncser_irq_setup init_irqs;
 	int output;
 	int input;
 
 	/* Next byte to be read by application */
-	volatile unsigned char *volatile readp;
+	unsigned char *readp;
 	/* Next byte to be written by etrax */
-	volatile unsigned char *volatile writep;
+	unsigned char *writep;
 
 	unsigned int in_buffer_size;
+	unsigned int in_buffer_len;
 	unsigned int inbufchunk;
-	unsigned char out_buffer[OUT_BUFFER_SIZE] __attribute__ ((aligned(32)));
-	unsigned char in_buffer[IN_BUFFER_SIZE]__attribute__ ((aligned(32)));
-	unsigned char flip[IN_BUFFER_SIZE] __attribute__ ((aligned(32)));
-	struct dma_descr_data* next_rx_desc;
-	struct dma_descr_data* prev_rx_desc;
+	/* Data buffers for in and output. */
+	unsigned char out_buffer[OUT_BUFFER_SIZE] __aligned(32);
+	unsigned char in_buffer[IN_BUFFER_SIZE] __aligned(32);
+	unsigned char flip[IN_BUFFER_SIZE] __aligned(32);
+	struct timespec timestamp[NBR_IN_DESCR];
+	struct dma_descr_data *next_rx_desc;
+	struct dma_descr_data *prev_rx_desc;
+
+	struct timeval last_timestamp;
+	int read_ts_idx;
+	int write_ts_idx;
 
 	/* Pointer to the first available descriptor in the ring,
 	 * unless active_tr_descr == catch_tr_descr and a dma
@@ -135,114 +160,138 @@ typedef struct sync_port
 	/* Number of bytes currently locked for being read by DMA */
 	int out_buf_count;
 
-	dma_descr_data in_descr[NBR_IN_DESCR] __attribute__ ((__aligned__(16)));
-	dma_descr_context in_context __attribute__ ((__aligned__(32)));
-	dma_descr_data out_descr[NBR_OUT_DESCR]
-		__attribute__ ((__aligned__(16)));
-	dma_descr_context out_context __attribute__ ((__aligned__(32)));
+	dma_descr_context in_context __aligned(32);
+	dma_descr_context out_context __aligned(32);
+	dma_descr_data in_descr[NBR_IN_DESCR] __aligned(16);
+	dma_descr_data out_descr[NBR_OUT_DESCR] __aligned(16);
+
 	wait_queue_head_t out_wait_q;
 	wait_queue_head_t in_wait_q;
 
 	spinlock_t lock;
-} sync_port;
+};
 
 static DEFINE_MUTEX(sync_serial_mutex);
 static int etrax_sync_serial_init(void);
 static void initialize_port(int portnbr);
 static inline int sync_data_avail(struct sync_port *port);
 
-static int sync_serial_open(struct inode *, struct file*);
-static int sync_serial_release(struct inode*, struct file*);
+static int sync_serial_open(struct inode *, struct file *);
+static int sync_serial_release(struct inode *, struct file *);
 static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
 
-static int sync_serial_ioctl(struct file *,
-			     unsigned int cmd, unsigned long arg);
-static ssize_t sync_serial_write(struct file * file, const char * buf,
+static long sync_serial_ioctl(struct file *file,
+			      unsigned int cmd, unsigned long arg);
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg);
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos);
-static ssize_t sync_serial_read(struct file *file, char *buf,
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos);
 
-#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
-     defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
-    (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
-     defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))
+#if ((defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
+	(defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)))
 #define SYNC_SER_DMA
+#else
+#define SYNC_SER_MANUAL
 #endif
 
-static void send_word(sync_port* port);
-static void start_dma_out(struct sync_port *port, const char *data, int count);
-static void start_dma_in(sync_port* port);
 #ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count);
+static void start_dma_in(struct sync_port *port);
 static irqreturn_t tr_interrupt(int irq, void *dev_id);
 static irqreturn_t rx_interrupt(int irq, void *dev_id);
 #endif
-
-#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
-     !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
-    (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
-     !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))
-#define SYNC_SER_MANUAL
-#endif
 #ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port);
 static irqreturn_t manual_interrupt(int irq, void *dev_id);
 #endif
 
-#ifdef CONFIG_ETRAXFS	/* ETRAX FS */
-#define OUT_DMA_NBR 4
-#define IN_DMA_NBR 5
-#define PINMUX_SSER pinmux_sser0
-#define SYNCSER_INST regi_sser0
-#define SYNCSER_INTR_VECT SSER0_INTR_VECT
-#define OUT_DMA_INST regi_dma4
-#define IN_DMA_INST regi_dma5
-#define DMA_OUT_INTR_VECT DMA4_INTR_VECT
-#define DMA_IN_INTR_VECT DMA5_INTR_VECT
-#define REQ_DMA_SYNCSER dma_sser0
-#else			/* Artpec-3 */
-#define OUT_DMA_NBR 6
-#define IN_DMA_NBR 7
-#define PINMUX_SSER pinmux_sser
-#define SYNCSER_INST regi_sser
-#define SYNCSER_INTR_VECT SSER_INTR_VECT
-#define OUT_DMA_INST regi_dma6
-#define IN_DMA_INST regi_dma7
-#define DMA_OUT_INTR_VECT DMA6_INTR_VECT
-#define DMA_IN_INTR_VECT DMA7_INTR_VECT
-#define REQ_DMA_SYNCSER dma_sser
+#define artpec_pinmux_alloc_fixed crisv32_pinmux_alloc_fixed
+#define artpec_request_dma crisv32_request_dma
+#define artpec_free_dma crisv32_free_dma
+
+#ifdef CONFIG_ETRAXFS
+/* ETRAX FS */
+#define DMA_OUT_NBR0		SYNC_SER0_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER0_RX_DMA_NBR
+#define DMA_OUT_NBR1		SYNC_SER1_TX_DMA_NBR
+#define DMA_IN_NBR1		SYNC_SER1_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser0
+#define PINMUX_SSER1		pinmux_sser1
+#define SYNCSER_INST0		regi_sser0
+#define SYNCSER_INST1		regi_sser1
+#define SYNCSER_INTR_VECT0	SSER0_INTR_VECT
+#define SYNCSER_INTR_VECT1	SSER1_INTR_VECT
+#define OUT_DMA_INST0		regi_dma4
+#define IN_DMA_INST0		regi_dma5
+#define DMA_OUT_INTR_VECT0	DMA4_INTR_VECT
+#define DMA_OUT_INTR_VECT1	DMA7_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA5_INTR_VECT
+#define DMA_IN_INTR_VECT1	DMA6_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser0
+#define REQ_DMA_SYNCSER1	dma_sser1
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
+#define PORT1_DMA 1
+#else
+#define PORT1_DMA 0
+#endif
+#elif defined(CONFIG_CRIS_MACH_ARTPEC3)
+/* ARTPEC-3 */
+#define DMA_OUT_NBR0		SYNC_SER_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser
+#define SYNCSER_INST0		regi_sser
+#define SYNCSER_INTR_VECT0	SSER_INTR_VECT
+#define OUT_DMA_INST0		regi_dma6
+#define IN_DMA_INST0		regi_dma7
+#define DMA_OUT_INTR_VECT0	DMA6_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA7_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser
+#define REQ_DMA_SYNCSER1	dma_sser
 #endif
 
-/* The ports */
-static struct sync_port ports[]=
-{
-	{
-		.regi_sser             = SYNCSER_INST,
-		.regi_dmaout           = OUT_DMA_INST,
-		.regi_dmain            = IN_DMA_INST,
 #if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)
-                .use_dma               = 1,
+#define PORT0_DMA 1
 #else
-                .use_dma               = 0,
+#define PORT0_DMA 0
 #endif
-	}
-#ifdef CONFIG_ETRAXFS
-	,
 
+/* The ports */
+static struct sync_port ports[] = {
 	{
-		.regi_sser             = regi_sser1,
-		.regi_dmaout           = regi_dma6,
-		.regi_dmain            = regi_dma7,
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
-                .use_dma               = 1,
-#else
-                .use_dma               = 0,
-#endif
-	}
+		.regi_sser		= SYNCSER_INST0,
+		.regi_dmaout		= OUT_DMA_INST0,
+		.regi_dmain		= IN_DMA_INST0,
+		.use_dma		= PORT0_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT0,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT0,
+		.dma_in_nbr		= DMA_IN_NBR0,
+		.dma_out_nbr		= DMA_OUT_NBR0,
+		.req_dma		= REQ_DMA_SYNCSER0,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT0,
+	},
+#ifdef CONFIG_ETRAXFS
+	{
+		.regi_sser		= SYNCSER_INST1,
+		.regi_dmaout		= regi_dma6,
+		.regi_dmain		= regi_dma7,
+		.use_dma		= PORT1_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT1,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT1,
+		.dma_in_nbr		= DMA_IN_NBR1,
+		.dma_out_nbr		= DMA_OUT_NBR1,
+		.req_dma		= REQ_DMA_SYNCSER1,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT1,
+	},
 #endif
 };
 
 #define NBR_PORTS ARRAY_SIZE(ports)
 
-static const struct file_operations sync_serial_fops = {
+static const struct file_operations syncser_fops = {
 	.owner		= THIS_MODULE,
 	.write		= sync_serial_write,
 	.read		= sync_serial_read,
@@ -253,61 +302,40 @@ static const struct file_operations sync_serial_fops = {
 	.llseek		= noop_llseek,
 };
 
-static int __init etrax_sync_serial_init(void)
-{
-	ports[0].enabled = 0;
-#ifdef CONFIG_ETRAXFS
-	ports[1].enabled = 0;
-#endif
-	if (register_chrdev(SYNC_SERIAL_MAJOR, "sync serial",
-			&sync_serial_fops) < 0) {
-		printk(KERN_WARNING
-			"Unable to get major for synchronous serial port\n");
-		return -EBUSY;
-	}
-
-	/* Initialize Ports */
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
-	if (crisv32_pinmux_alloc_fixed(PINMUX_SSER)) {
-		printk(KERN_WARNING
-			"Unable to alloc pins for synchronous serial port 0\n");
-		return -EIO;
-	}
-	ports[0].enabled = 1;
-	initialize_port(0);
-#endif
-
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
-	if (crisv32_pinmux_alloc_fixed(pinmux_sser1)) {
-		printk(KERN_WARNING
-			"Unable to alloc pins for synchronous serial port 0\n");
-		return -EIO;
-	}
-	ports[1].enabled = 1;
-	initialize_port(1);
-#endif
+static dev_t syncser_first;
+static int minor_count = NBR_PORTS;
+#define SYNCSER_NAME "syncser"
+static struct cdev *syncser_cdev;
+static struct class *syncser_class;
 
-#ifdef CONFIG_ETRAXFS
-	printk(KERN_INFO "ETRAX FS synchronous serial port driver\n");
-#else
-	printk(KERN_INFO "Artpec-3 synchronous serial port driver\n");
-#endif
-	return 0;
+static void sync_serial_start_port(struct sync_port *port)
+{
+	reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
+	reg_sser_rw_tr_cfg tr_cfg =
+		REG_RD(sser, port->regi_sser, rw_tr_cfg);
+	reg_sser_rw_rec_cfg rec_cfg =
+		REG_RD(sser, port->regi_sser, rw_rec_cfg);
+	cfg.en = regk_sser_yes;
+	tr_cfg.tr_en = regk_sser_yes;
+	rec_cfg.rec_en = regk_sser_yes;
+	REG_WR(sser, port->regi_sser, rw_cfg, cfg);
+	REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
+	port->started = 1;
 }
 
 static void __init initialize_port(int portnbr)
 {
-	int __attribute__((unused)) i;
 	struct sync_port *port = &ports[portnbr];
-	reg_sser_rw_cfg cfg = {0};
-	reg_sser_rw_frm_cfg frm_cfg = {0};
-	reg_sser_rw_tr_cfg tr_cfg = {0};
-	reg_sser_rw_rec_cfg rec_cfg = {0};
+	reg_sser_rw_cfg cfg = { 0 };
+	reg_sser_rw_frm_cfg frm_cfg = { 0 };
+	reg_sser_rw_tr_cfg tr_cfg = { 0 };
+	reg_sser_rw_rec_cfg rec_cfg = { 0 };
 
-	DEBUG(printk(KERN_DEBUG "Init sync serial port %d\n", portnbr));
+	DEBUG(pr_info("Init sync serial port %d\n", portnbr));
 
 	port->port_nbr = portnbr;
-	port->init_irqs = 1;
+	port->init_irqs = no_irq_setup;
 
 	port->out_rd_ptr = port->out_buffer;
 	port->out_buf_count = 0;
@@ -318,10 +346,11 @@ static void __init initialize_port(int portnbr)
 	port->readp = port->flip;
 	port->writep = port->flip;
 	port->in_buffer_size = IN_BUFFER_SIZE;
+	port->in_buffer_len = 0;
 	port->inbufchunk = IN_DESCR_SIZE;
-	port->next_rx_desc = &port->in_descr[0];
-	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR-1];
-	port->prev_rx_desc->eol = 1;
+
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
 
 	init_waitqueue_head(&port->out_wait_q);
 	init_waitqueue_head(&port->in_wait_q);
@@ -368,14 +397,18 @@ static void __init initialize_port(int portnbr)
 	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
 
 #ifdef SYNC_SER_DMA
-	/* Setup the descriptor ring for dma out/transmit. */
-	for (i = 0; i < NBR_OUT_DESCR; i++) {
-		port->out_descr[i].wait = 0;
-		port->out_descr[i].intr = 1;
-		port->out_descr[i].eol = 0;
-		port->out_descr[i].out_eop = 0;
-		port->out_descr[i].next =
-			(dma_descr_data *)virt_to_phys(&port->out_descr[i+1]);
+	{
+		int i;
+		/* Setup the descriptor ring for dma out/transmit. */
+		for (i = 0; i < NBR_OUT_DESCR; i++) {
+			dma_descr_data *descr = &port->out_descr[i];
+			descr->wait = 0;
+			descr->intr = 1;
+			descr->eol = 0;
+			descr->out_eop = 0;
+			descr->next =
+				(dma_descr_data *)virt_to_phys(&descr[i+1]);
+		}
 	}
 
 	/* Create a ring from the list. */
@@ -391,201 +424,116 @@ static void __init initialize_port(int portnbr)
 
 static inline int sync_data_avail(struct sync_port *port)
 {
-	int avail;
-	unsigned char *start;
-	unsigned char *end;
-
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	/* 0123456789  0123456789
-	 *  -----      -    -----
-	 *  ^rp  ^wp    ^wp ^rp
-	 */
-
-	if (end >= start)
-		avail = end - start;
-	else
-		avail = port->in_buffer_size - (start - end);
-	return avail;
-}
-
-static inline int sync_data_avail_to_end(struct sync_port *port)
-{
-	int avail;
-	unsigned char *start;
-	unsigned char *end;
-
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	/* 0123456789  0123456789
-	 *  -----           -----
-	 *  ^rp  ^wp    ^wp ^rp
-	 */
-
-	if (end >= start)
-		avail = end - start;
-	else
-		avail = port->flip + port->in_buffer_size - start;
-	return avail;
+	return port->in_buffer_len;
 }
 
 static int sync_serial_open(struct inode *inode, struct file *file)
 {
+	int ret = 0;
 	int dev = iminor(inode);
-	int ret = -EBUSY;
-	sync_port *port;
-	reg_dma_rw_cfg cfg = {.en = regk_dma_yes};
-	reg_dma_rw_intr_mask intr_mask = {.data = regk_dma_yes};
+	struct sync_port *port;
+#ifdef SYNC_SER_DMA
+	reg_dma_rw_cfg cfg = { .en = regk_dma_yes };
+	reg_dma_rw_intr_mask intr_mask = { .data = regk_dma_yes };
+#endif
 
-	mutex_lock(&sync_serial_mutex);
-	DEBUG(printk(KERN_DEBUG "Open sync serial port %d\n", dev));
+	DEBUG(pr_debug("Open sync serial port %d\n", dev));
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk(KERN_DEBUG "Invalid minor %d\n", dev));
-		ret = -ENODEV;
-		goto out;
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
 	}
 	port = &ports[dev];
 	/* Allow open this device twice (assuming one reader and one writer) */
-	if (port->busy == 2)
-	{
-		DEBUG(printk(KERN_DEBUG "Device is busy.. \n"));
-		goto out;
+	if (port->busy == 2) {
+		DEBUG(pr_info("syncser%d is busy\n", dev));
+		return -EBUSY;
 	}
 
+	mutex_lock(&sync_serial_mutex);
 
-	if (port->init_irqs) {
-		if (port->use_dma) {
-			if (port == &ports[0]) {
-#ifdef SYNC_SER_DMA
-				if (request_irq(DMA_OUT_INTR_VECT,
-						tr_interrupt,
-						0,
-						"synchronous serial 0 dma tr",
-						&ports[0])) {
-					printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ");
-					goto out;
-				} else if (request_irq(DMA_IN_INTR_VECT,
-						rx_interrupt,
-						0,
-						"synchronous serial 1 dma rx",
-						&ports[0])) {
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ");
-					goto out;
-				} else if (crisv32_request_dma(OUT_DMA_NBR,
-						"synchronous serial 0 dma tr",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						REQ_DMA_SYNCSER)) {
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					free_irq(DMA_IN_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 0 TX DMA channel");
-					goto out;
-				} else if (crisv32_request_dma(IN_DMA_NBR,
-						"synchronous serial 0 dma rec",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						REQ_DMA_SYNCSER)) {
-					crisv32_free_dma(OUT_DMA_NBR);
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					free_irq(DMA_IN_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 1 RX DMA channel");
-					goto out;
-				}
-#endif
-			}
-#ifdef CONFIG_ETRAXFS
-			else if (port == &ports[1]) {
+	/* Clear any stale date left in the flip buffer */
+	port->readp = port->writep = port->flip;
+	port->in_buffer_len = 0;
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
+
+	if (port->init_irqs != no_irq_setup) {
+		/* Init only on first call. */
+		port->busy++;
+		mutex_unlock(&sync_serial_mutex);
+		return 0;
+	}
+	if (port->use_dma) {
 #ifdef SYNC_SER_DMA
-				if (request_irq(DMA6_INTR_VECT,
-						tr_interrupt,
-						0,
-						"synchronous serial 1 dma tr",
-						&ports[1])) {
-					printk(KERN_CRIT "Can't allocate sync serial port 1 IRQ");
-					goto out;
-				} else if (request_irq(DMA7_INTR_VECT,
-						       rx_interrupt,
-						       0,
-						       "synchronous serial 1 dma rx",
-						       &ports[1])) {
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 IRQ");
-					goto out;
-				} else if (crisv32_request_dma(
-						SYNC_SER1_TX_DMA_NBR,
-						"synchronous serial 1 dma tr",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						dma_sser1)) {
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					free_irq(DMA7_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 TX DMA channel");
-					goto out;
-				} else if (crisv32_request_dma(
-						SYNC_SER1_RX_DMA_NBR,
-						"synchronous serial 3 dma rec",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						dma_sser1)) {
-					crisv32_free_dma(SYNC_SER1_TX_DMA_NBR);
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					free_irq(DMA7_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 RX DMA channel");
-					goto out;
-				}
-#endif
-			}
+		const char *tmp;
+		DEBUG(pr_info("Using DMA for syncser%d\n", dev));
+
+		tmp = dev == 0 ? "syncser0 tx" : "syncser1 tx";
+		if (request_irq(port->dma_out_intr_vect, tr_interrupt, 0,
+				tmp, port)) {
+			pr_err("Can't alloc syncser%d TX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		if (artpec_request_dma(port->dma_out_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d TX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		tmp = dev == 0 ? "syncser0 rx" : "syncser1 rx";
+		if (request_irq(port->dma_in_intr_vect, rx_interrupt, 0,
+				tmp, port)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		if (artpec_request_dma(port->dma_in_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		/* Enable DMAs */
+		REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
+		REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
+		/* Enable DMA IRQs */
+		REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
+		REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
+		/* Set up wordsize = 1 for DMAs. */
+		DMA_WR_CMD(port->regi_dmain, regk_dma_set_w_size1);
+		DMA_WR_CMD(port->regi_dmaout, regk_dma_set_w_size1);
+
+		start_dma_in(port);
+		port->init_irqs = dma_irq_setup;
 #endif
-                        /* Enable DMAs */
-			REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
-			REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
-			/* Enable DMA IRQs */
-			REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
-			REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
-			/* Set up wordsize = 1 for DMAs. */
-			DMA_WR_CMD (port->regi_dmain, regk_dma_set_w_size1);
-			DMA_WR_CMD (port->regi_dmaout, regk_dma_set_w_size1);
-
-			start_dma_in(port);
-			port->init_irqs = 0;
-		} else { /* !port->use_dma */
+	} else { /* !port->use_dma */
 #ifdef SYNC_SER_MANUAL
-			if (port == &ports[0]) {
-				if (request_irq(SYNCSER_INTR_VECT,
-						manual_interrupt,
-						0,
-						"synchronous serial manual irq",
-						&ports[0])) {
-					printk("Can't allocate sync serial manual irq");
-					goto out;
-				}
-			}
-#ifdef CONFIG_ETRAXFS
-			else if (port == &ports[1]) {
-				if (request_irq(SSER1_INTR_VECT,
-						manual_interrupt,
-						0,
-						"synchronous serial manual irq",
-						&ports[1])) {
-					printk(KERN_CRIT "Can't allocate sync serial manual irq");
-					goto out;
-				}
-			}
-#endif
-			port->init_irqs = 0;
+		const char *tmp = dev == 0 ? "syncser0 manual irq" :
+					     "syncser1 manual irq";
+		if (request_irq(port->syncser_intr_vect, manual_interrupt,
+				0, tmp, port)) {
+			pr_err("Can't alloc syncser%d manual irq",
+				dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		port->init_irqs = manual_irq_setup;
 #else
-			panic("sync_serial: Manual mode not supported.\n");
+		panic("sync_serial: Manual mode not supported\n");
 #endif /* SYNC_SER_MANUAL */
-		}
-
-	} /* port->init_irqs */
-
+	}
 	port->busy++;
 	ret = 0;
-out:
+
+unlock_and_exit:
 	mutex_unlock(&sync_serial_mutex);
 	return ret;
 }
@@ -593,18 +541,17 @@ out:
 static int sync_serial_release(struct inode *inode, struct file *file)
 {
 	int dev = iminor(inode);
-	sync_port *port;
+	struct sync_port *port;
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -ENODEV;
 	}
 	port = &ports[dev];
 	if (port->busy)
 		port->busy--;
 	if (!port->busy)
-          /* XXX */ ;
+		/* XXX */;
 	return 0;
 }
 
@@ -612,21 +559,15 @@ static unsigned int sync_serial_poll(struct file *file, poll_table *wait)
 {
 	int dev = iminor(file_inode(file));
 	unsigned int mask = 0;
-	sync_port *port;
-	DEBUGPOLL( static unsigned int prev_mask = 0; );
+	struct sync_port *port;
+	DEBUGPOLL(
+	static unsigned int prev_mask;
+	);
 
 	port = &ports[dev];
 
-	if (!port->started) {
-		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_rec_cfg rec_cfg =
-			REG_RD(sser, port->regi_sser, rw_rec_cfg);
-		cfg.en = regk_sser_yes;
-		rec_cfg.rec_en = port->input;
-		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
-		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
-		port->started = 1;
-	}
+	if (!port->started)
+		sync_serial_start_port(port);
 
 	poll_wait(file, &port->out_wait_q, wait);
 	poll_wait(file, &port->in_wait_q, wait);
@@ -645,33 +586,175 @@ static unsigned int sync_serial_poll(struct file *file, poll_table *wait)
 	if (port->input && sync_data_avail(port) >= port->inbufchunk)
 		mask |= POLLIN | POLLRDNORM;
 
-	DEBUGPOLL(if (mask != prev_mask)
-	      printk("sync_serial_poll: mask 0x%08X %s %s\n", mask,
-		     mask&POLLOUT?"POLLOUT":"", mask&POLLIN?"POLLIN":"");
-	      prev_mask = mask;
-	      );
+	DEBUGPOLL(
+	if (mask != prev_mask)
+		pr_info("sync_serial_poll: mask 0x%08X %s %s\n",
+			mask,
+			mask & POLLOUT ? "POLLOUT" : "",
+			mask & POLLIN ? "POLLIN" : "");
+		prev_mask = mask;
+	);
 	return mask;
 }
 
-static int sync_serial_ioctl(struct file *file,
-		  unsigned int cmd, unsigned long arg)
+static ssize_t __sync_serial_read(struct file *file,
+				  char __user *buf,
+				  size_t count,
+				  loff_t *ppos,
+				  struct timespec *ts)
+{
+	unsigned long flags;
+	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int avail;
+	struct sync_port *port;
+	unsigned char *start;
+	unsigned char *end;
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
+	}
+	port = &ports[dev];
+
+	if (!port->started)
+		sync_serial_start_port(port);
+
+	/* Calculate number of available bytes */
+	/* Save pointers to avoid that they are modified by interrupt */
+	spin_lock_irqsave(&port->lock, flags);
+	start = port->readp;
+	end = port->writep;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	while ((start == end) && !port->in_buffer_len) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		wait_event_interruptible(port->in_wait_q,
+					 !(start == end && !port->full));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+		spin_lock_irqsave(&port->lock, flags);
+		start = port->readp;
+		end = port->writep;
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	DEBUGREAD(pr_info("R%d c %d ri %u wi %u /%u\n",
+			  dev, count,
+			  start - port->flip, end - port->flip,
+			  port->in_buffer_size));
+
+	/* Lazy read, never return wrapped data. */
+	if (end > start)
+		avail = end - start;
+	else
+		avail = port->flip + port->in_buffer_size - start;
+
+	count = count > avail ? avail : count;
+	if (copy_to_user(buf, start, count))
+		return -EFAULT;
+
+	/* If timestamp requested, find timestamp of first returned byte
+	 * and copy it.
+	 * N.B: Applications that request timstamps MUST read data in
+	 * chunks that are multiples of IN_DESCR_SIZE.
+	 * Otherwise the timestamps will not be aligned to the data read.
+	 */
+	if (ts != NULL) {
+		int idx = port->read_ts_idx;
+		memcpy(ts, &port->timestamp[idx], sizeof(struct timespec));
+		port->read_ts_idx += count / IN_DESCR_SIZE;
+		if (port->read_ts_idx >= NBR_IN_DESCR)
+			port->read_ts_idx = 0;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->readp += count;
+	/* Check for wrap */
+	if (port->readp >= port->flip + port->in_buffer_size)
+		port->readp = port->flip;
+	port->in_buffer_len -= count;
+	port->full = 0;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	DEBUGREAD(pr_info("r %d\n", count));
+
+	return count;
+}
+
+static ssize_t sync_serial_input(struct file *file, unsigned long arg)
+{
+	struct ssp_request req;
+	int count;
+	int ret;
+
+	/* Copy the request structure from user-mode. */
+	ret = copy_from_user(&req, (struct ssp_request __user *)arg,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("sync_serial_input copy from user failed\n"));
+		return -EFAULT;
+	}
+
+	/* To get the timestamps aligned, make sure that 'len'
+	 * is a multiple of IN_DESCR_SIZE.
+	 */
+	if ((req.len % IN_DESCR_SIZE) != 0) {
+		DEBUG(pr_info("sync_serial: req.len %x, IN_DESCR_SIZE %x\n",
+			      req.len, IN_DESCR_SIZE));
+		return -EFAULT;
+	}
+
+	/* Do the actual read. */
+	/* Note that req.buf is actually a pointer to user space. */
+	count = __sync_serial_read(file, req.buf, req.len,
+				   NULL, &req.ts);
+
+	if (count < 0) {
+		DEBUG(pr_info("sync_serial_input read failed\n"));
+		return count;
+	}
+
+	/* Copy the request back to user-mode. */
+	ret = copy_to_user((struct ssp_request __user *)arg, &req,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("syncser input copy2user failed\n"));
+		return -EFAULT;
+	}
+
+	/* Return the number of bytes read. */
+	return count;
+}
+
+
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg)
 {
 	int return_val = 0;
 	int dma_w_size = regk_dma_set_w_size1;
 	int dev = iminor(file_inode(file));
-	sync_port *port;
+	struct sync_port *port;
 	reg_sser_rw_tr_cfg tr_cfg;
 	reg_sser_rw_rec_cfg rec_cfg;
 	reg_sser_rw_frm_cfg frm_cfg;
 	reg_sser_rw_cfg gen_cfg;
 	reg_sser_rw_intr_mask intr_mask;
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -1;
 	}
-        port = &ports[dev];
+
+	if (cmd == SSP_INPUT)
+		return sync_serial_input(file, arg);
+
+	port = &ports[dev];
 	spin_lock_irq(&port->lock);
 
 	tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
@@ -680,11 +763,9 @@ static int sync_serial_ioctl(struct file *file,
 	gen_cfg = REG_RD(sser, port->regi_sser, rw_cfg);
 	intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
 
-	switch(cmd)
-	{
+	switch (cmd) {
 	case SSP_SPEED:
-		if (GET_SPEED(arg) == CODEC)
-		{
+		if (GET_SPEED(arg) == CODEC) {
 			unsigned int freq;
 
 			gen_cfg.base_freq = regk_sser_f32;
@@ -701,15 +782,25 @@ static int sync_serial_ioctl(struct file *file,
 			case FREQ_256kHz:
 				gen_cfg.clk_div = 125 *
 					(1 << (freq - FREQ_256kHz)) - 1;
-			break;
+				break;
 			case FREQ_512kHz:
 				gen_cfg.clk_div = 62;
-			break;
+				break;
 			case FREQ_1MHz:
 			case FREQ_2MHz:
 			case FREQ_4MHz:
 				gen_cfg.clk_div = 8 * (1 << freq) - 1;
-			break;
+				break;
+			}
+		} else if (GET_SPEED(arg) == CODEC_f32768) {
+			gen_cfg.base_freq = regk_sser_f32_768;
+			switch (GET_FREQ(arg)) {
+			case FREQ_4096kHz:
+				gen_cfg.clk_div = 7;
+				break;
+			default:
+				spin_unlock_irq(&port->lock);
+				return -EINVAL;
 			}
 		} else {
 			gen_cfg.base_freq = regk_sser_f29_493;
@@ -767,62 +858,64 @@ static int sync_serial_ioctl(struct file *file,
 
 		break;
 	case SSP_MODE:
-		switch(arg)
-		{
-			case MASTER_OUTPUT:
-				port->output = 1;
-				port->input = 0;
-				frm_cfg.out_on = regk_sser_tr;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_OUTPUT:
-				port->output = 1;
-				port->input = 0;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			case MASTER_INPUT:
-				port->output = 0;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				frm_cfg.out_on = regk_sser_intern_tb;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_INPUT:
-				port->output = 0;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			case MASTER_BIDIR:
-				port->output = 1;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				frm_cfg.out_on = regk_sser_intern_tb;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_BIDIR:
-				port->output = 1;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			default:
-				spin_unlock_irq(&port->lock);
-				return -EINVAL;
+		switch (arg) {
+		case MASTER_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.out_on = regk_sser_tr;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		default:
+			spin_unlock_irq(&port->lock);
+			return -EINVAL;
 		}
-		if (!port->use_dma || (arg == MASTER_OUTPUT || arg == SLAVE_OUTPUT))
+		if (!port->use_dma || arg == MASTER_OUTPUT ||
+				arg == SLAVE_OUTPUT)
 			intr_mask.rdav = regk_sser_yes;
 		break;
 	case SSP_FRAME_SYNC:
 		if (arg & NORMAL_SYNC) {
 			frm_cfg.rec_delay = 1;
 			frm_cfg.tr_delay = 1;
-		}
-		else if (arg & EARLY_SYNC)
+		} else if (arg & EARLY_SYNC)
 			frm_cfg.rec_delay = frm_cfg.tr_delay = 0;
-		else if (arg & SECOND_WORD_SYNC) {
+		else if (arg & LATE_SYNC) {
+			frm_cfg.tr_delay = 2;
+			frm_cfg.rec_delay = 2;
+		} else if (arg & SECOND_WORD_SYNC) {
 			frm_cfg.rec_delay = 7;
 			frm_cfg.tr_delay = 1;
 		}
@@ -914,15 +1007,12 @@ static int sync_serial_ioctl(struct file *file,
 		frm_cfg.type = regk_sser_level;
 		frm_cfg.tr_delay = 1;
 		frm_cfg.level = regk_sser_neg_lo;
-		if (arg & SPI_SLAVE)
-		{
+		if (arg & SPI_SLAVE) {
 			rec_cfg.clk_pol = regk_sser_neg;
 			gen_cfg.clk_dir = regk_sser_in;
 			port->input = 1;
 			port->output = 0;
-		}
-		else
-		{
+		} else {
 			gen_cfg.out_clk_pol = regk_sser_pos;
 			port->input = 0;
 			port->output = 1;
@@ -965,19 +1055,19 @@ static int sync_serial_ioctl(struct file *file,
 }
 
 static long sync_serial_ioctl(struct file *file,
-                             unsigned int cmd, unsigned long arg)
+		unsigned int cmd, unsigned long arg)
 {
-       long ret;
+	long ret;
 
-       mutex_lock(&sync_serial_mutex);
-       ret = sync_serial_ioctl_unlocked(file, cmd, arg);
-       mutex_unlock(&sync_serial_mutex);
+	mutex_lock(&sync_serial_mutex);
+	ret = sync_serial_ioctl_unlocked(file, cmd, arg);
+	mutex_unlock(&sync_serial_mutex);
 
-       return ret;
+	return ret;
 }
 
 /* NOTE: sync_serial_write does not support concurrency */
-static ssize_t sync_serial_write(struct file *file, const char *buf,
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
 	int dev = iminor(file_inode(file));
@@ -993,7 +1083,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	unsigned char *buf_stop_ptr; /* Last byte + 1 */
 
 	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
-		DEBUG(printk("Invalid minor %d\n", dev));
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -ENODEV;
 	}
 	port = &ports[dev];
@@ -1006,9 +1096,9 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	 * |_________|___________________|________________________|
 	 *           ^ rd_ptr            ^ wr_ptr
 	 */
-	DEBUGWRITE(printk(KERN_DEBUG "W d%d c %lu a: %p c: %p\n",
-			  port->port_nbr, count, port->active_tr_descr,
-			  port->catch_tr_descr));
+	DEBUGWRITE(pr_info("W d%d c %u a: %p c: %p\n",
+			   port->port_nbr, count, port->active_tr_descr,
+			   port->catch_tr_descr));
 
 	/* Read variables that may be updated by interrupts */
 	spin_lock_irqsave(&port->lock, flags);
@@ -1020,7 +1110,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	if (port->tr_running &&
 	    ((port->use_dma && port->active_tr_descr == port->catch_tr_descr) ||
 	     out_buf_count >= OUT_BUFFER_SIZE)) {
-		DEBUGWRITE(printk(KERN_DEBUG "sser%d full\n", dev));
+		DEBUGWRITE(pr_info("sser%d full\n", dev));
 		return -EAGAIN;
 	}
 
@@ -1043,15 +1133,16 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	if (copy_from_user(wr_ptr, buf, trunc_count))
 		return -EFAULT;
 
-	DEBUGOUTBUF(printk(KERN_DEBUG "%-4d + %-4d = %-4d     %p %p %p\n",
-			   out_buf_count, trunc_count,
-			   port->out_buf_count, port->out_buffer,
-			   wr_ptr, buf_stop_ptr));
+	DEBUGOUTBUF(pr_info("%-4d + %-4d = %-4d     %p %p %p\n",
+			    out_buf_count, trunc_count,
+			    port->out_buf_count, port->out_buffer,
+			    wr_ptr, buf_stop_ptr));
 
 	/* Make sure transmitter/receiver is running */
 	if (!port->started) {
 		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
+		reg_sser_rw_rec_cfg rec_cfg =
+			REG_RD(sser, port->regi_sser, rw_rec_cfg);
 		cfg.en = regk_sser_yes;
 		rec_cfg.rec_en = port->input;
 		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
@@ -1068,8 +1159,11 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	spin_lock_irqsave(&port->lock, flags);
 	port->out_buf_count += trunc_count;
 	if (port->use_dma) {
+#ifdef SYNC_SER_DMA
 		start_dma_out(port, wr_ptr, trunc_count);
+#endif
 	} else if (!port->tr_running) {
+#ifdef SYNC_SER_MANUAL
 		reg_sser_rw_intr_mask intr_mask;
 		intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
 		/* Start sender by writing data */
@@ -1077,14 +1171,15 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 		/* and enable transmitter ready IRQ */
 		intr_mask.trdy = 1;
 		REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
+#endif
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	/* Exit if non blocking */
 	if (file->f_flags & O_NONBLOCK) {
-		DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu  %08x\n",
-				  port->port_nbr, trunc_count,
-				  REG_RD_INT(dma, port->regi_dmaout, r_intr)));
+		DEBUGWRITE(pr_info("w d%d c %u  %08x\n",
+				   port->port_nbr, trunc_count,
+				   REG_RD_INT(dma, port->regi_dmaout, r_intr)));
 		return trunc_count;
 	}
 
@@ -1094,105 +1189,32 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 	if (signal_pending(current))
 		return -EINTR;
 
-	DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu\n",
-			  port->port_nbr, trunc_count));
+	DEBUGWRITE(pr_info("w d%d c %u\n", port->port_nbr, trunc_count));
 	return trunc_count;
 }
 
-static ssize_t sync_serial_read(struct file * file, char * buf,
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	int dev = iminor(file_inode(file));
-	int avail;
-	sync_port *port;
-	unsigned char* start;
-	unsigned char* end;
-	unsigned long flags;
-
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
-		return -ENODEV;
-	}
-	port = &ports[dev];
-
-	DEBUGREAD(printk("R%d c %d ri %lu wi %lu /%lu\n", dev, count, port->readp - port->flip, port->writep - port->flip, port->in_buffer_size));
-
-	if (!port->started)
-	{
-		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
-		reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
-		cfg.en = regk_sser_yes;
-		tr_cfg.tr_en = regk_sser_yes;
-		rec_cfg.rec_en = regk_sser_yes;
-		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
-		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
-		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
-		port->started = 1;
-	}
-
-	/* Calculate number of available bytes */
-	/* Save pointers to avoid that they are modified by interrupt */
-	spin_lock_irqsave(&port->lock, flags);
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	spin_unlock_irqrestore(&port->lock, flags);
-	while ((start == end) && !port->full) /* No data */
-	{
-		DEBUGREAD(printk(KERN_DEBUG "&"));
-		if (file->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-
-		wait_event_interruptible(port->in_wait_q,
-					 !(start == end && !port->full));
-		if (signal_pending(current))
-			return -EINTR;
-
-		spin_lock_irqsave(&port->lock, flags);
-		start = (unsigned char*)port->readp; /* cast away volatile */
-		end = (unsigned char*)port->writep;  /* cast away volatile */
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
-
-	/* Lazy read, never return wrapped data. */
-	if (port->full)
-		avail = port->in_buffer_size;
-	else if (end > start)
-		avail = end - start;
-	else
-		avail = port->flip + port->in_buffer_size - start;
-
-	count = count > avail ? avail : count;
-	if (copy_to_user(buf, start, count))
-		return -EFAULT;
-	/* Disable interrupts while updating readp */
-	spin_lock_irqsave(&port->lock, flags);
-	port->readp += count;
-	if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */
-		port->readp = port->flip;
-	port->full = 0;
-	spin_unlock_irqrestore(&port->lock, flags);
-	DEBUGREAD(printk("r %d\n", count));
-	return count;
+	return __sync_serial_read(file, buf, count, ppos, NULL);
 }
 
-static void send_word(sync_port* port)
+#ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port)
 {
 	reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
 	reg_sser_rw_tr_data tr_data =  {0};
 
-	switch(tr_cfg.sample_size)
+	switch (tr_cfg.sample_size) {
+	case 8:
+		port->out_buf_count--;
+		tr_data.data = *port->out_rd_ptr++;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	case 12:
 	{
-	 case 8:
-		 port->out_buf_count--;
-		 tr_data.data = *port->out_rd_ptr++;
-		 REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
-		 if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
-			 port->out_rd_ptr = port->out_buffer;
-		 break;
-	 case 12:
-	 {
 		int data = (*port->out_rd_ptr++) << 8;
 		data |= *port->out_rd_ptr++;
 		port->out_buf_count -= 2;
@@ -1200,8 +1222,8 @@ static void send_word(sync_port* port)
 		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
 		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
 			port->out_rd_ptr = port->out_buffer;
+		break;
 	}
-	break;
 	case 16:
 		port->out_buf_count -= 2;
 		tr_data.data = *(unsigned short *)port->out_rd_ptr;
@@ -1233,27 +1255,28 @@ static void send_word(sync_port* port)
 		break;
 	}
 }
+#endif
 
-static void start_dma_out(struct sync_port *port,
-			  const char *data, int count)
+#ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count)
 {
-	port->active_tr_descr->buf = (char *) virt_to_phys((char *) data);
+	port->active_tr_descr->buf = (char *)virt_to_phys((char *)data);
 	port->active_tr_descr->after = port->active_tr_descr->buf + count;
 	port->active_tr_descr->intr = 1;
 
 	port->active_tr_descr->eol = 1;
 	port->prev_tr_descr->eol = 0;
 
-	DEBUGTRDMA(printk(KERN_DEBUG "Inserting eolr:%p eol@:%p\n",
+	DEBUGTRDMA(pr_info("Inserting eolr:%p eol@:%p\n",
 		port->prev_tr_descr, port->active_tr_descr));
 	port->prev_tr_descr = port->active_tr_descr;
-	port->active_tr_descr = phys_to_virt((int) port->active_tr_descr->next);
+	port->active_tr_descr = phys_to_virt((int)port->active_tr_descr->next);
 
 	if (!port->tr_running) {
 		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser,
 			rw_tr_cfg);
 
-		port->out_context.next = 0;
+		port->out_context.next = NULL;
 		port->out_context.saved_data =
 			(dma_descr_data *)virt_to_phys(port->prev_tr_descr);
 		port->out_context.saved_data_buf = port->prev_tr_descr->buf;
@@ -1263,57 +1286,58 @@ static void start_dma_out(struct sync_port *port,
 
 		tr_cfg.tr_en = regk_sser_yes;
 		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
-		DEBUGTRDMA(printk(KERN_DEBUG "dma s\n"););
+		DEBUGTRDMA(pr_info(KERN_INFO "dma s\n"););
 	} else {
 		DMA_CONTINUE_DATA(port->regi_dmaout);
-		DEBUGTRDMA(printk(KERN_DEBUG "dma c\n"););
+		DEBUGTRDMA(pr_info("dma c\n"););
 	}
 
 	port->tr_running = 1;
 }
 
-static void start_dma_in(sync_port *port)
+static void start_dma_in(struct sync_port *port)
 {
 	int i;
 	char *buf;
+	unsigned long flags;
+	spin_lock_irqsave(&port->lock, flags);
 	port->writep = port->flip;
+	spin_unlock_irqrestore(&port->lock, flags);
 
-	if (port->writep > port->flip + port->in_buffer_size) {
-		panic("Offset too large in sync serial driver\n");
-		return;
-	}
-	buf = (char*)virt_to_phys(port->in_buffer);
+	buf = (char *)virt_to_phys(port->in_buffer);
 	for (i = 0; i < NBR_IN_DESCR; i++) {
 		port->in_descr[i].buf = buf;
 		port->in_descr[i].after = buf + port->inbufchunk;
 		port->in_descr[i].intr = 1;
-		port->in_descr[i].next = (dma_descr_data*)virt_to_phys(&port->in_descr[i+1]);
+		port->in_descr[i].next =
+			(dma_descr_data *)virt_to_phys(&port->in_descr[i+1]);
 		port->in_descr[i].buf = buf;
 		buf += port->inbufchunk;
 	}
 	/* Link the last descriptor to the first */
-	port->in_descr[i-1].next = (dma_descr_data*)virt_to_phys(&port->in_descr[0]);
+	port->in_descr[i-1].next =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
 	port->in_descr[i-1].eol = regk_sser_yes;
 	port->next_rx_desc = &port->in_descr[0];
 	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR - 1];
-	port->in_context.saved_data = (dma_descr_data*)virt_to_phys(&port->in_descr[0]);
+	port->in_context.saved_data =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
 	port->in_context.saved_data_buf = port->in_descr[0].buf;
 	DMA_START_CONTEXT(port->regi_dmain, virt_to_phys(&port->in_context));
 }
 
-#ifdef SYNC_SER_DMA
 static irqreturn_t tr_interrupt(int irq, void *dev_id)
 {
 	reg_dma_r_masked_intr masked;
-	reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes};
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
 	reg_dma_rw_stat stat;
 	int i;
 	int found = 0;
 	int stop_sser = 0;
 
 	for (i = 0; i < NBR_PORTS; i++) {
-		sync_port *port = &ports[i];
-		if (!port->enabled  || !port->use_dma)
+		struct sync_port *port = &ports[i];
+		if (!port->enabled || !port->use_dma)
 			continue;
 
 		/* IRQ active for the port? */
@@ -1338,19 +1362,20 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 			int sent;
 			sent = port->catch_tr_descr->after -
 				port->catch_tr_descr->buf;
-			DEBUGTXINT(printk(KERN_DEBUG "%-4d - %-4d = %-4d\t"
-					  "in descr %p (ac: %p)\n",
-					  port->out_buf_count, sent,
-					  port->out_buf_count - sent,
-					  port->catch_tr_descr,
-					  port->active_tr_descr););
+			DEBUGTXINT(pr_info("%-4d - %-4d = %-4d\t"
+					   "in descr %p (ac: %p)\n",
+					   port->out_buf_count, sent,
+					   port->out_buf_count - sent,
+					   port->catch_tr_descr,
+					   port->active_tr_descr););
 			port->out_buf_count -= sent;
 			port->catch_tr_descr =
 				phys_to_virt((int) port->catch_tr_descr->next);
 			port->out_rd_ptr =
 				phys_to_virt((int) port->catch_tr_descr->buf);
 		} else {
-			int i, sent;
+			reg_sser_rw_tr_cfg tr_cfg;
+			int j, sent;
 			/* EOL handler.
 			 * Note that if an EOL was encountered during the irq
 			 * locked section of sync_ser_write the DMA will be
@@ -1358,11 +1383,11 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 			 * The remaining descriptors will be traversed by
 			 * the descriptor interrupts as usual.
 			 */
-			i = 0;
+			j = 0;
 			while (!port->catch_tr_descr->eol) {
 				sent = port->catch_tr_descr->after -
 					port->catch_tr_descr->buf;
-				DEBUGOUTBUF(printk(KERN_DEBUG
+				DEBUGOUTBUF(pr_info(
 					"traversing descr %p -%d (%d)\n",
 					port->catch_tr_descr,
 					sent,
@@ -1370,16 +1395,15 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 				port->out_buf_count -= sent;
 				port->catch_tr_descr = phys_to_virt(
 					(int)port->catch_tr_descr->next);
-				i++;
-				if (i >= NBR_OUT_DESCR) {
+				j++;
+				if (j >= NBR_OUT_DESCR) {
 					/* TODO: Reset and recover */
 					panic("sync_serial: missing eol");
 				}
 			}
 			sent = port->catch_tr_descr->after -
 				port->catch_tr_descr->buf;
-			DEBUGOUTBUF(printk(KERN_DEBUG
-				"eol at descr %p -%d (%d)\n",
+			DEBUGOUTBUF(pr_info("eol at descr %p -%d (%d)\n",
 				port->catch_tr_descr,
 				sent,
 				port->out_buf_count));
@@ -1394,15 +1418,13 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 					OUT_BUFFER_SIZE)
 				port->out_rd_ptr = port->out_buffer;
 
-			reg_sser_rw_tr_cfg tr_cfg =
-				REG_RD(sser, port->regi_sser, rw_tr_cfg);
-			DEBUGTXINT(printk(KERN_DEBUG
+			tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
+			DEBUGTXINT(pr_info(
 				"tr_int DMA stop %d, set catch @ %p\n",
 				port->out_buf_count,
 				port->active_tr_descr));
 			if (port->out_buf_count != 0)
-				printk(KERN_CRIT "sync_ser: buffer not "
-					"empty after eol.\n");
+				pr_err("sync_ser: buf not empty after eol\n");
 			port->catch_tr_descr = port->active_tr_descr;
 			port->tr_running = 0;
 			tr_cfg.tr_en = regk_sser_no;
@@ -1414,62 +1436,79 @@ static irqreturn_t tr_interrupt(int irq, void *dev_id)
 	return IRQ_RETVAL(found);
 } /* tr_interrupt */
 
+
+static inline void handle_rx_packet(struct sync_port *port)
+{
+	int idx;
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
+	unsigned long flags;
+
+	DEBUGRXINT(pr_info(KERN_INFO "!"));
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* If we overrun the user experience is crap regardless if we
+	 * drop new or old data. Its much easier to get it right when
+	 * dropping new data so lets do that.
+	 */
+	if ((port->writep + port->inbufchunk <=
+	     port->flip + port->in_buffer_size) &&
+	    (port->in_buffer_len + port->inbufchunk < IN_BUFFER_SIZE)) {
+		memcpy(port->writep,
+		       phys_to_virt((unsigned)port->next_rx_desc->buf),
+		       port->inbufchunk);
+		port->writep += port->inbufchunk;
+		if (port->writep >= port->flip + port->in_buffer_size)
+			port->writep = port->flip;
+
+		/* Timestamp the new data chunk. */
+		if (port->write_ts_idx == NBR_IN_DESCR)
+			port->write_ts_idx = 0;
+		idx = port->write_ts_idx++;
+		do_posix_clock_monotonic_gettime(&port->timestamp[idx]);
+		port->in_buffer_len += port->inbufchunk;
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	port->next_rx_desc->eol = 1;
+	port->prev_rx_desc->eol = 0;
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 0);
+	port->prev_rx_desc = port->next_rx_desc;
+	port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 1);
+	/* wake up the waiting process */
+	wake_up_interruptible(&port->in_wait_q);
+	DMA_CONTINUE(port->regi_dmain);
+	REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
+
+}
+
 static irqreturn_t rx_interrupt(int irq, void *dev_id)
 {
 	reg_dma_r_masked_intr masked;
-	reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes};
 
 	int i;
 	int found = 0;
 
-	for (i = 0; i < NBR_PORTS; i++)
-	{
-		sync_port *port = &ports[i];
+	DEBUG(pr_info("rx_interrupt\n"));
+
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
 
-		if (!port->enabled || !port->use_dma )
+		if (!port->enabled || !port->use_dma)
 			continue;
 
 		masked = REG_RD(dma, port->regi_dmain, r_masked_intr);
 
-		if (masked.data) /* Descriptor interrupt */
-		{
-			found = 1;
-			while (REG_RD(dma, port->regi_dmain, rw_data) !=
-			       virt_to_phys(port->next_rx_desc)) {
-				DEBUGRXINT(printk(KERN_DEBUG "!"));
-				if (port->writep + port->inbufchunk > port->flip + port->in_buffer_size) {
-					int first_size = port->flip + port->in_buffer_size - port->writep;
-					memcpy((char*)port->writep, phys_to_virt((unsigned)port->next_rx_desc->buf), first_size);
-					memcpy(port->flip, phys_to_virt((unsigned)port->next_rx_desc->buf+first_size), port->inbufchunk - first_size);
-					port->writep = port->flip + port->inbufchunk - first_size;
-				} else {
-					memcpy((char*)port->writep,
-					       phys_to_virt((unsigned)port->next_rx_desc->buf),
-					       port->inbufchunk);
-					port->writep += port->inbufchunk;
-					if (port->writep >= port->flip + port->in_buffer_size)
-						port->writep = port->flip;
-				}
-                                if (port->writep == port->readp)
-                                {
-				  port->full = 1;
-                                }
-
-				port->next_rx_desc->eol = 1;
-				port->prev_rx_desc->eol = 0;
-				/* Cache bug workaround */
-				flush_dma_descr(port->prev_rx_desc, 0);
-				port->prev_rx_desc = port->next_rx_desc;
-				port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
-				/* Cache bug workaround */
-				flush_dma_descr(port->prev_rx_desc, 1);
-				/* wake up the waiting process */
-				wake_up_interruptible(&port->in_wait_q);
-				DMA_CONTINUE(port->regi_dmain);
-				REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
+		if (!masked.data)
+			continue;
 
-			}
-		}
+		/* Descriptor interrupt */
+		found = 1;
+		while (REG_RD(dma, port->regi_dmain, rw_data) !=
+				virt_to_phys(port->next_rx_desc))
+			handle_rx_packet(port);
 	}
 	return IRQ_RETVAL(found);
 } /* rx_interrupt */
@@ -1478,75 +1517,83 @@ static irqreturn_t rx_interrupt(int irq, void *dev_id)
 #ifdef SYNC_SER_MANUAL
 static irqreturn_t manual_interrupt(int irq, void *dev_id)
 {
+	unsigned long flags;
 	int i;
 	int found = 0;
 	reg_sser_r_masked_intr masked;
 
-	for (i = 0; i < NBR_PORTS; i++)
-	{
-		sync_port *port = &ports[i];
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
 
 		if (!port->enabled || port->use_dma)
-		{
 			continue;
-		}
 
 		masked = REG_RD(sser, port->regi_sser, r_masked_intr);
-		if (masked.rdav)	/* Data received? */
-		{
-			reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
-			reg_sser_r_rec_data data = REG_RD(sser, port->regi_sser, r_rec_data);
+		/* Data received? */
+		if (masked.rdav) {
+			reg_sser_rw_rec_cfg rec_cfg =
+				REG_RD(sser, port->regi_sser, rw_rec_cfg);
+			reg_sser_r_rec_data data = REG_RD(sser,
+				port->regi_sser, r_rec_data);
 			found = 1;
 			/* Read data */
-			switch(rec_cfg.sample_size)
-			{
+			spin_lock_irqsave(&port->lock, flags);
+			switch (rec_cfg.sample_size) {
 			case 8:
 				*port->writep++ = data.data & 0xff;
 				break;
 			case 12:
 				*port->writep = (data.data & 0x0ff0) >> 4;
 				*(port->writep + 1) = data.data & 0x0f;
-				port->writep+=2;
+				port->writep += 2;
 				break;
 			case 16:
-				*(unsigned short*)port->writep = data.data;
-				port->writep+=2;
+				*(unsigned short *)port->writep = data.data;
+				port->writep += 2;
 				break;
 			case 24:
-				*(unsigned int*)port->writep = data.data;
-				port->writep+=3;
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 3;
 				break;
 			case 32:
-				*(unsigned int*)port->writep = data.data;
-				port->writep+=4;
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 4;
 				break;
 			}
 
-			if (port->writep >= port->flip + port->in_buffer_size) /* Wrap? */
+			/* Wrap? */
+			if (port->writep >= port->flip + port->in_buffer_size)
 				port->writep = port->flip;
 			if (port->writep == port->readp) {
-				/* receive buffer overrun, discard oldest data
-				 */
+				/* Receive buf overrun, discard oldest data */
 				port->readp++;
-				if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */
+				/* Wrap? */
+				if (port->readp >= port->flip +
+						port->in_buffer_size)
 					port->readp = port->flip;
 			}
+			spin_unlock_irqrestore(&port->lock, flags);
 			if (sync_data_avail(port) >= port->inbufchunk)
-				wake_up_interruptible(&port->in_wait_q); /* Wake up application */
+				/* Wake up application */
+				wake_up_interruptible(&port->in_wait_q);
 		}
 
-		if (masked.trdy) /* Transmitter ready? */
-		{
+		/* Transmitter ready? */
+		if (masked.trdy) {
 			found = 1;
-			if (port->out_buf_count > 0) /* More data to send */
+			/* More data to send */
+			if (port->out_buf_count > 0)
 				send_word(port);
-			else /* transmission finished */
-			{
+			else {
+				/* Transmission finished */
 				reg_sser_rw_intr_mask intr_mask;
-				intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
+				intr_mask = REG_RD(sser, port->regi_sser,
+					rw_intr_mask);
 				intr_mask.trdy = 0;
-				REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
-				wake_up_interruptible(&port->out_wait_q); /* Wake up application */
+				REG_WR(sser, port->regi_sser,
+					rw_intr_mask, intr_mask);
+				/* Wake up application */
+				wake_up_interruptible(&port->out_wait_q);
 			}
 		}
 	}
@@ -1554,4 +1601,109 @@ static irqreturn_t manual_interrupt(int irq, void *dev_id)
 }
 #endif
 
+static int __init etrax_sync_serial_init(void)
+{
+#if 1
+	/* This code will be removed when we move to udev for all devices. */
+	syncser_first = MKDEV(SYNC_SERIAL_MAJOR, 0);
+	if (register_chrdev_region(syncser_first, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to register major %d\n", SYNC_SERIAL_MAJOR);
+		return -1;
+	}
+#else
+	/* Allocate dynamic major number. */
+	if (alloc_chrdev_region(&syncser_first, 0, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to allocate character device region\n");
+		return -1;
+	}
+#endif
+	syncser_cdev = cdev_alloc();
+	if (!syncser_cdev) {
+		pr_err("Failed to allocate cdev for syncser\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+	cdev_init(syncser_cdev, &syncser_fops);
+
+	/* Create a sysfs class for syncser */
+	syncser_class = class_create(THIS_MODULE, "syncser_class");
+
+	/* Initialize Ports */
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER0)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 0\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -EIO;
+	}
+	initialize_port(0);
+	ports[0].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+#if defined(CONFIG_ETRAXFS) && defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER1)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 1\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		class_destroy(syncser_class);
+		return -EIO;
+	}
+	initialize_port(1);
+	ports[1].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+	/* Add it to system */
+	if (cdev_add(syncser_cdev, syncser_first, minor_count) < 0) {
+		pr_err("Failed to add syncser as char device\n");
+		device_destroy(syncser_class, syncser_first);
+		class_destroy(syncser_class);
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+
+
+	pr_info("ARTPEC synchronous serial port (%s: %d, %d)\n",
+		SYNCSER_NAME, MAJOR(syncser_first), MINOR(syncser_first));
+
+	return 0;
+}
+
+static void __exit etrax_sync_serial_exit(void)
+{
+	int i;
+	device_destroy(syncser_class, syncser_first);
+	class_destroy(syncser_class);
+
+	if (syncser_cdev) {
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+	}
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+		if (port->init_irqs == dma_irq_setup) {
+			/* Free dma irqs and dma channels. */
+#ifdef SYNC_SER_DMA
+			artpec_free_dma(port->dma_in_nbr);
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+#endif
+		} else if (port->init_irqs == manual_irq_setup) {
+			/* Free manual irq. */
+			free_irq(port->syncser_intr_vect, port);
+		}
+	}
+
+	pr_info("ARTPEC synchronous serial port unregistered\n");
+}
+
 module_init(etrax_sync_serial_init);
+module_exit(etrax_sync_serial_exit);
+
+MODULE_LICENSE("GPL");
+
diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c
index 610909b..02e33eb 100644
--- a/arch/cris/arch-v32/kernel/debugport.c
+++ b/arch/cris/arch-v32/kernel/debugport.c
@@ -3,7 +3,9 @@
  */
 
 #include <linux/console.h>
+#include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/string.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/reg_map.h>
 #include <hwregs/ser_defs.h>
@@ -65,6 +67,7 @@ struct dbg_port ports[] =
   },
 #endif
 };
+
 static struct dbg_port *port =
 #if defined(CONFIG_ETRAX_DEBUG_PORT0)
 	&ports[0];
@@ -97,14 +100,19 @@ static struct dbg_port *kgdb_port =
 #endif
 #endif
 
-static void
-start_port(struct dbg_port* p)
+static void start_port(struct dbg_port *p)
 {
-	if (!p)
-		return;
+	/* Set up serial port registers */
+	reg_ser_rw_tr_ctrl tr_ctrl = {0};
+	reg_ser_rw_tr_dma_en tr_dma_en = {0};
 
-	if (p->started)
+	reg_ser_rw_rec_ctrl rec_ctrl = {0};
+	reg_ser_rw_tr_baud_div tr_baud_div = {0};
+	reg_ser_rw_rec_baud_div rec_baud_div = {0};
+
+	if (!p || p->started)
 		return;
+
 	p->started = 1;
 
 	if (p->nbr == 1)
@@ -118,36 +126,24 @@ start_port(struct dbg_port* p)
 		crisv32_pinmux_alloc_fixed(pinmux_ser4);
 #endif
 
-	/* Set up serial port registers */
-	reg_ser_rw_tr_ctrl tr_ctrl = {0};
-	reg_ser_rw_tr_dma_en tr_dma_en = {0};
-
-	reg_ser_rw_rec_ctrl rec_ctrl = {0};
-	reg_ser_rw_tr_baud_div tr_baud_div = {0};
-	reg_ser_rw_rec_baud_div rec_baud_div = {0};
-
 	tr_ctrl.base_freq = rec_ctrl.base_freq = regk_ser_f29_493;
 	tr_dma_en.en = rec_ctrl.dma_mode = regk_ser_no;
 	tr_baud_div.div = rec_baud_div.div = 29493000 / p->baudrate / 8;
 	tr_ctrl.en = rec_ctrl.en = 1;
 
-	if (p->parity == 'O')
-	{
+	if (p->parity == 'O') {
 		tr_ctrl.par_en = regk_ser_yes;
 		tr_ctrl.par = regk_ser_odd;
 		rec_ctrl.par_en = regk_ser_yes;
 		rec_ctrl.par = regk_ser_odd;
-	}
-	else if (p->parity == 'E')
-	{
+	} else if (p->parity == 'E') {
 		tr_ctrl.par_en = regk_ser_yes;
 		tr_ctrl.par = regk_ser_even;
 		rec_ctrl.par_en = regk_ser_yes;
 		rec_ctrl.par = regk_ser_odd;
 	}
 
-	if (p->bits == 7)
-	{
+	if (p->bits == 7) {
 		tr_ctrl.data_bits = regk_ser_bits7;
 		rec_ctrl.data_bits = regk_ser_bits7;
 	}
@@ -161,8 +157,7 @@ start_port(struct dbg_port* p)
 
 #ifdef CONFIG_ETRAX_KGDB
 /* Use polling to get a single character from the kernel debug port */
-int
-getDebugChar(void)
+int getDebugChar(void)
 {
 	reg_ser_rs_stat_din stat;
 	reg_ser_rw_ack_intr ack_intr = { 0 };
@@ -179,8 +174,7 @@ getDebugChar(void)
 }
 
 /* Use polling to put a single character to the kernel debug port */
-void
-putDebugChar(int val)
+void putDebugChar(int val)
 {
 	reg_ser_r_stat_din stat;
 	do {
@@ -190,12 +184,48 @@ putDebugChar(int val)
 }
 #endif /* CONFIG_ETRAX_KGDB */
 
+static void __init early_putch(int c)
+{
+	reg_ser_r_stat_din stat;
+	/* Wait until transmitter is ready and send. */
+	do
+		stat = REG_RD(ser, port->instance, r_stat_din);
+	while (!stat.tr_rdy);
+	REG_WR_INT(ser, port->instance, rw_dout, c);
+}
+
+static void __init
+early_console_write(struct console *con, const char *s, unsigned n)
+{
+	extern void reset_watchdog(void);
+	int i;
+
+	/* Send data. */
+	for (i = 0; i < n; i++) {
+		/* TODO: the '\n' -> '\n\r' translation should be done at the
+		   receiver. Remove it when the serial driver removes it.   */
+		if (s[i] == '\n')
+			early_putch('\r');
+		early_putch(s[i]);
+		reset_watchdog();
+	}
+}
+
+static struct console early_console_dev __initdata = {
+	.name   = "early",
+	.write  = early_console_write,
+	.flags  = CON_PRINTBUFFER | CON_BOOT,
+	.index  = -1
+};
+
 /* Register console for printk's, etc. */
-int __init
-init_etrax_debug(void)
+int __init init_etrax_debug(void)
 {
         start_port(port);
 
+	/* Register an early console if a debug port was chosen.  */
+	register_console(&early_console_dev);
+
 #ifdef CONFIG_ETRAX_KGDB
 	start_port(kgdb_port);
 #endif /* CONFIG_ETRAX_KGDB */
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index ee66866..eb74dab 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/cpufreq.h>
+#include <linux/mm.h>
 #include <asm/types.h>
 #include <asm/signal.h>
 #include <asm/io.h>
@@ -56,7 +57,6 @@ static int __init etrax_init_cont_rotime(void)
 }
 arch_initcall(etrax_init_cont_rotime);
 
-
 unsigned long timer_regs[NR_CPUS] =
 {
 	regi_timer0,
@@ -68,9 +68,8 @@ unsigned long timer_regs[NR_CPUS] =
 extern int set_rtc_mmss(unsigned long nowtime);
 
 #ifdef CONFIG_CPU_FREQ
-static int
-cris_time_freq_notifier(struct notifier_block *nb, unsigned long val,
-			void *data);
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data);
 
 static struct notifier_block cris_time_freq_notifier_block = {
 	.notifier_call = cris_time_freq_notifier,
@@ -87,7 +86,6 @@ unsigned long get_ns_in_jiffie(void)
 	return ns;
 }
 
-
 /* From timer MDS describing the hardware watchdog:
  * 4.3.1 Watchdog Operation
  * The watchdog timer is an 8-bit timer with a configurable start value.
@@ -109,11 +107,18 @@ static short int watchdog_key = 42;  /* arbitrary 7 bit number */
  * is used though, so set this really low. */
 #define WATCHDOG_MIN_FREE_PAGES 8
 
+/* for reliable NICE_DOGGY behaviour */
+static int bite_in_progress;
+
 void reset_watchdog(void)
 {
 #if defined(CONFIG_ETRAX_WATCHDOG)
 	reg_timer_rw_wd_ctrl wd_ctrl = { 0 };
 
+#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY)
+	if (unlikely(bite_in_progress))
+		return;
+#endif
 	/* Only keep watchdog happy as long as we have memory left! */
 	if(nr_free_pages() > WATCHDOG_MIN_FREE_PAGES) {
 		/* Reset the watchdog with the inverse of the old key */
@@ -148,7 +153,9 @@ void handle_watchdog_bite(struct pt_regs *regs)
 #if defined(CONFIG_ETRAX_WATCHDOG)
 	extern int cause_of_death;
 
+	nmi_enter();
 	oops_in_progress = 1;
+	bite_in_progress = 1;
 	printk(KERN_WARNING "Watchdog bite\n");
 
 	/* Check if forced restart or unexpected watchdog */
@@ -170,6 +177,7 @@ void handle_watchdog_bite(struct pt_regs *regs)
 	printk(KERN_WARNING "Oops: bitten by watchdog\n");
 	show_registers(regs);
 	oops_in_progress = 0;
+	printk("\n"); /* Flush mtdoops.  */
 #ifndef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	reset_watchdog();
 #endif
@@ -202,7 +210,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
 	/* Reset watchdog otherwise it resets us! */
 	reset_watchdog();
 
-        /* Update statistics. */
+	/* Update statistics. */
 	update_process_times(user_mode(regs));
 
 	cris_do_profile(regs); /* Save profiling information */
@@ -213,7 +221,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
 
 	/* Call the real timer interrupt handler */
 	xtime_update(1);
-        return IRQ_HANDLED;
+	return IRQ_HANDLED;
 }
 
 /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */
@@ -293,14 +301,13 @@ void __init time_init(void)
 
 #ifdef CONFIG_CPU_FREQ
 	cpufreq_register_notifier(&cris_time_freq_notifier_block,
-		CPUFREQ_TRANSITION_NOTIFIER);
+				  CPUFREQ_TRANSITION_NOTIFIER);
 #endif
 }
 
 #ifdef CONFIG_CPU_FREQ
-static int
-cris_time_freq_notifier(struct notifier_block *nb, unsigned long val,
-			void *data)
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data)
 {
 	struct cpufreq_freqs *freqs = data;
 	if (val == CPUFREQ_POSTCHANGE) {
diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c
index 0b5b70d..f0f335d 100644
--- a/arch/cris/arch-v32/lib/usercopy.c
+++ b/arch/cris/arch-v32/lib/usercopy.c
@@ -26,8 +26,7 @@
 /* Copy to userspace.  This is based on the memcpy used for
    kernel-to-kernel copying; see "string.c".  */
 
-unsigned long
-__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -155,13 +154,13 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
 
   return retn;
 }
+EXPORT_SYMBOL(__copy_user);
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
    userland.  The return-value is the number of bytes that were
    inaccessible.  */
-
-unsigned long
-__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -321,11 +320,10 @@ copy_exception_bytes:
 
   return retn + n;
 }
+EXPORT_SYMBOL(__copy_user_zeroing);
 
 /* Zero userspace.  */
-
-unsigned long
-__do_clear_user (void __user *pto, unsigned long pn)
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -468,3 +466,4 @@ __do_clear_user (void __user *pto, unsigned long pn)
 
   return retn;
 }
+EXPORT_SYMBOL(__do_clear_user);
diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c
index 38f29ee..05a0470 100644
--- a/arch/cris/arch-v32/mach-fs/pinmux.c
+++ b/arch/cris/arch-v32/mach-fs/pinmux.c
@@ -26,7 +26,29 @@ static DEFINE_SPINLOCK(pinmux_lock);
 
 static void crisv32_pinmux_set(int port);
 
-int crisv32_pinmux_init(void)
+static int __crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+				 enum pin_mode mode)
+{
+	int i;
+
+	for (i = first_pin; i <= last_pin; i++) {
+		if ((pins[port][i] != pinmux_none)
+		    && (pins[port][i] != pinmux_gpio)
+		    && (pins[port][i] != mode)) {
+#ifdef DEBUG
+			panic("Pinmux alloc failed!\n");
+#endif
+			return -EPERM;
+		}
+	}
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port][i] = mode;
+
+	crisv32_pinmux_set(port);
+}
+
+static int crisv32_pinmux_init(void)
 {
 	static int initialized;
 
@@ -37,20 +59,20 @@ int crisv32_pinmux_init(void)
 		pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 =
 		    pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes;
 		REG_WR(pinmux, regi_pinmux, rw_pa, pa);
-		crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
 	}
 
 	return 0;
 }
 
-int
-crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode)
+int crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+			 enum pin_mode mode)
 {
-	int i;
 	unsigned long flags;
+	int ret;
 
 	crisv32_pinmux_init();
 
@@ -59,26 +81,11 @@ crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode)
 
 	spin_lock_irqsave(&pinmux_lock, flags);
 
-	for (i = first_pin; i <= last_pin; i++) {
-		if ((pins[port][i] != pinmux_none)
-		    && (pins[port][i] != pinmux_gpio)
-		    && (pins[port][i] != mode)) {
-			spin_unlock_irqrestore(&pinmux_lock, flags);
-#ifdef DEBUG
-			panic("Pinmux alloc failed!\n");
-#endif
-			return -EPERM;
-		}
-	}
-
-	for (i = first_pin; i <= last_pin; i++)
-		pins[port][i] = mode;
-
-	crisv32_pinmux_set(port);
+	ret = __crisv32_pinmux_alloc(port, first_pin, last_pin, mode);
 
 	spin_unlock_irqrestore(&pinmux_lock, flags);
 
-	return 0;
+	return ret;
 }
 
 int crisv32_pinmux_alloc_fixed(enum fixed_function function)
@@ -98,58 +105,58 @@ int crisv32_pinmux_alloc_fixed(enum fixed_function function)
 
 	switch (function) {
 	case pinmux_ser1:
-		ret = crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
 		hwprot.ser1 = regk_pinmux_yes;
 		break;
 	case pinmux_ser2:
-		ret = crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
 		hwprot.ser2 = regk_pinmux_yes;
 		break;
 	case pinmux_ser3:
-		ret = crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
 		hwprot.ser3 = regk_pinmux_yes;
 		break;
 	case pinmux_sser0:
-		ret = crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
 		hwprot.sser0 = regk_pinmux_yes;
 		break;
 	case pinmux_sser1:
-		ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
 		hwprot.sser1 = regk_pinmux_yes;
 		break;
 	case pinmux_ata0:
-		ret = crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
 		hwprot.ata0 = regk_pinmux_yes;
 		break;
 	case pinmux_ata1:
-		ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
 		hwprot.ata1 = regk_pinmux_yes;
 		break;
 	case pinmux_ata2:
-		ret = crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
 		hwprot.ata2 = regk_pinmux_yes;
 		break;
 	case pinmux_ata3:
-		ret = crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
 		hwprot.ata2 = regk_pinmux_yes;
 		break;
 	case pinmux_ata:
-		ret = crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
 		hwprot.ata = regk_pinmux_yes;
 		break;
 	case pinmux_eth1:
-		ret = crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
 		hwprot.eth1 = regk_pinmux_yes;
 		hwprot.eth1_mgm = regk_pinmux_yes;
 		break;
 	case pinmux_timer:
-		ret = crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
 		hwprot.timer = regk_pinmux_yes;
 		spin_unlock_irqrestore(&pinmux_lock, flags);
 		return ret;
@@ -188,9 +195,19 @@ void crisv32_pinmux_set(int port)
 #endif
 }
 
-int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+static int __crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
 {
 	int i;
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port][i] = pinmux_none;
+
+	crisv32_pinmux_set(port);
+	return 0;
+}
+
+int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+{
 	unsigned long flags;
 
 	crisv32_pinmux_init();
@@ -199,11 +216,7 @@ int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
 		return -EINVAL;
 
 	spin_lock_irqsave(&pinmux_lock, flags);
-
-	for (i = first_pin; i <= last_pin; i++)
-		pins[port][i] = pinmux_none;
-
-	crisv32_pinmux_set(port);
+	__crisv32_pinmux_dealloc(port, first_pin, last_pin);
 	spin_unlock_irqrestore(&pinmux_lock, flags);
 
 	return 0;
@@ -226,58 +239,58 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function)
 
 	switch (function) {
 	case pinmux_ser1:
-		ret = crisv32_pinmux_dealloc(PORT_C, 4, 7);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 4, 7);
 		hwprot.ser1 = regk_pinmux_no;
 		break;
 	case pinmux_ser2:
-		ret = crisv32_pinmux_dealloc(PORT_C, 8, 11);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 11);
 		hwprot.ser2 = regk_pinmux_no;
 		break;
 	case pinmux_ser3:
-		ret = crisv32_pinmux_dealloc(PORT_C, 12, 15);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 12, 15);
 		hwprot.ser3 = regk_pinmux_no;
 		break;
 	case pinmux_sser0:
-		ret = crisv32_pinmux_dealloc(PORT_C, 0, 3);
-		ret |= crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 0, 3);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 16, 16);
 		hwprot.sser0 = regk_pinmux_no;
 		break;
 	case pinmux_sser1:
-		ret = crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
 		hwprot.sser1 = regk_pinmux_no;
 		break;
 	case pinmux_ata0:
-		ret = crisv32_pinmux_dealloc(PORT_D, 5, 7);
-		ret |= crisv32_pinmux_dealloc(PORT_D, 15, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 5, 7);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 15, 17);
 		hwprot.ata0 = regk_pinmux_no;
 		break;
 	case pinmux_ata1:
-		ret = crisv32_pinmux_dealloc(PORT_D, 0, 4);
-		ret |= crisv32_pinmux_dealloc(PORT_E, 17, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 17, 17);
 		hwprot.ata1 = regk_pinmux_no;
 		break;
 	case pinmux_ata2:
-		ret = crisv32_pinmux_dealloc(PORT_C, 11, 15);
-		ret |= crisv32_pinmux_dealloc(PORT_E, 3, 3);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 11, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 3, 3);
 		hwprot.ata2 = regk_pinmux_no;
 		break;
 	case pinmux_ata3:
-		ret = crisv32_pinmux_dealloc(PORT_C, 8, 10);
-		ret |= crisv32_pinmux_dealloc(PORT_C, 0, 2);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 10);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 0, 2);
 		hwprot.ata2 = regk_pinmux_no;
 		break;
 	case pinmux_ata:
-		ret = crisv32_pinmux_dealloc(PORT_B, 0, 15);
-		ret |= crisv32_pinmux_dealloc(PORT_D, 8, 15);
+		ret = __crisv32_pinmux_dealloc(PORT_B, 0, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 8, 15);
 		hwprot.ata = regk_pinmux_no;
 		break;
 	case pinmux_eth1:
-		ret = crisv32_pinmux_dealloc(PORT_E, 0, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_E, 0, 17);
 		hwprot.eth1 = regk_pinmux_no;
 		hwprot.eth1_mgm = regk_pinmux_no;
 		break;
 	case pinmux_timer:
-		ret = crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 16, 16);
 		hwprot.timer = regk_pinmux_no;
 		spin_unlock_irqrestore(&pinmux_lock, flags);
 		return ret;
@@ -293,7 +306,8 @@ int crisv32_pinmux_dealloc_fixed(enum fixed_function function)
 	return ret;
 }
 
-void crisv32_pinmux_dump(void)
+#ifdef DEBUG
+static void crisv32_pinmux_dump(void)
 {
 	int i, j;
 
@@ -305,5 +319,5 @@ void crisv32_pinmux_dump(void)
 			printk(KERN_DEBUG "  Pin %d = %d\n", j, pins[i][j]);
 	}
 }
-
+#endif
 __initcall(crisv32_pinmux_init);
diff --git a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
index c2b3036..09bf0c9 100644
--- a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
+++ b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
@@ -28,11 +28,9 @@ enum fixed_function {
   pinmux_timer
 };
 
-int crisv32_pinmux_init(void);
 int crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode);
 int crisv32_pinmux_alloc_fixed(enum fixed_function function);
 int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin);
 int crisv32_pinmux_dealloc_fixed(enum fixed_function function);
-void crisv32_pinmux_dump(void);
 
 #endif
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index d5f1248..889f2de 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -1,8 +1,4 @@
 
-header-y += arch-v10/
-header-y += arch-v32/
-
-
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 7d47b36..01f66b8 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -1,8 +1,8 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += arch-v10/
-header-y += arch-v32/
+header-y += ../arch-v10/arch/
+header-y += ../arch-v32/arch/
 header-y += auxvec.h
 header-y += bitsperlong.h
 header-y += byteorder.h
diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index 5868cee..3908b94 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c
@@ -47,16 +47,16 @@ EXPORT_SYMBOL(__negdi2);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 
-/* Userspace access functions */
-EXPORT_SYMBOL(__copy_user_zeroing);
-EXPORT_SYMBOL(__copy_user);
-
 #undef memcpy
 #undef memset
 extern void * memset(void *, int, __kernel_size_t);
 extern void * memcpy(void *, const void *, __kernel_size_t);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
+#ifdef CONFIG_ETRAX_ARCH_V32
+#undef strcmp
+EXPORT_SYMBOL(strcmp);
+#endif
 
 #ifdef CONFIG_ETRAX_FAST_TIMER
 /* Fast timer functions */
@@ -66,3 +66,4 @@ EXPORT_SYMBOL(del_fast_timer);
 EXPORT_SYMBOL(schedule_usleep);
 #endif
 EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_from_user);
diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index 0ffda73..da4c724 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c
@@ -14,6 +14,10 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/utsname.h>
+#ifdef CONFIG_KALLSYMS
+#include <linux/kallsyms.h>
+#endif
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -34,25 +38,24 @@ static int kstack_depth_to_print = 24;
 
 void (*nmi_handler)(struct pt_regs *);
 
-void
-show_trace(unsigned long *stack)
+void show_trace(unsigned long *stack)
 {
 	unsigned long addr, module_start, module_end;
 	extern char _stext, _etext;
 	int i;
 
-	printk("\nCall Trace: ");
+	pr_err("\nCall Trace: ");
 
 	i = 1;
 	module_start = VMALLOC_START;
 	module_end = VMALLOC_END;
 
-	while (((long)stack & (THREAD_SIZE-1)) != 0) {
+	while (((long)stack & (THREAD_SIZE - 1)) != 0) {
 		if (__get_user(addr, stack)) {
 			/* This message matches "failing address" marked
 			   s390 in ksymoops, so lines containing it will
 			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
+			pr_err("Failing address 0x%lx\n", (unsigned long)stack);
 			break;
 		}
 		stack++;
@@ -68,10 +71,14 @@ show_trace(unsigned long *stack)
 		if (((addr >= (unsigned long)&_stext) &&
 		     (addr <= (unsigned long)&_etext)) ||
 		    ((addr >= module_start) && (addr <= module_end))) {
+#ifdef CONFIG_KALLSYMS
+			print_ip_sym(addr);
+#else
 			if (i && ((i % 8) == 0))
-				printk("\n       ");
-			printk("[<%08lx>] ", addr);
+				pr_err("\n       ");
+			pr_err("[<%08lx>] ", addr);
 			i++;
+#endif
 		}
 	}
 }
@@ -111,21 +118,21 @@ show_stack(struct task_struct *task, unsigned long *sp)
 
 	stack = sp;
 
-	printk("\nStack from %08lx:\n       ", (unsigned long)stack);
+	pr_err("\nStack from %08lx:\n       ", (unsigned long)stack);
 	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (((long)stack & (THREAD_SIZE-1)) == 0)
 			break;
 		if (i && ((i % 8) == 0))
-			printk("\n       ");
+			pr_err("\n       ");
 		if (__get_user(addr, stack)) {
 			/* This message matches "failing address" marked
 			   s390 in ksymoops, so lines containing it will
 			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
+			pr_err("Failing address 0x%lx\n", (unsigned long)stack);
 			break;
 		}
 		stack++;
-		printk("%08lx ", addr);
+		pr_err("%08lx ", addr);
 	}
 	show_trace(sp);
 }
@@ -139,33 +146,32 @@ show_stack(void)
 	unsigned long *sp = (unsigned long *)rdusp();
 	int i;
 
-	printk("Stack dump [0x%08lx]:\n", (unsigned long)sp);
+	pr_err("Stack dump [0x%08lx]:\n", (unsigned long)sp);
 	for (i = 0; i < 16; i++)
-		printk("sp + %d: 0x%08lx\n", i*4, sp[i]);
+		pr_err("sp + %d: 0x%08lx\n", i*4, sp[i]);
 	return 0;
 }
 #endif
 
-void
-set_nmi_handler(void (*handler)(struct pt_regs *))
+void set_nmi_handler(void (*handler)(struct pt_regs *))
 {
 	nmi_handler = handler;
 	arch_enable_nmi();
 }
 
 #ifdef CONFIG_DEBUG_NMI_OOPS
-void
-oops_nmi_handler(struct pt_regs *regs)
+void oops_nmi_handler(struct pt_regs *regs)
 {
 	stop_watchdog();
 	oops_in_progress = 1;
-	printk("NMI!\n");
+	pr_err("NMI!\n");
 	show_registers(regs);
 	oops_in_progress = 0;
+	oops_exit();
+	pr_err("\n"); /* Flush mtdoops.  */
 }
 
-static int __init
-oops_nmi_register(void)
+static int __init oops_nmi_register(void)
 {
 	set_nmi_handler(oops_nmi_handler);
 	return 0;
@@ -180,8 +186,7 @@ __initcall(oops_nmi_register);
  * similar to an Oops dump, and if the kernel is configured to be a nice
  * doggy, then halt instead of reboot.
  */
-void
-watchdog_bite_hook(struct pt_regs *regs)
+void watchdog_bite_hook(struct pt_regs *regs)
 {
 #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	local_irq_disable();
@@ -196,8 +201,7 @@ watchdog_bite_hook(struct pt_regs *regs)
 }
 
 /* This is normally the Oops function. */
-void
-die_if_kernel(const char *str, struct pt_regs *regs, long err)
+void die_if_kernel(const char *str, struct pt_regs *regs, long err)
 {
 	if (user_mode(regs))
 		return;
@@ -211,13 +215,17 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
 	stop_watchdog();
 #endif
 
+	oops_enter();
 	handle_BUG(regs);
 
-	printk("%s: %04lx\n", str, err & 0xffff);
+	pr_err("Linux %s %s\n", utsname()->release, utsname()->version);
+	pr_err("%s: %04lx\n", str, err & 0xffff);
 
 	show_registers(regs);
 
+	oops_exit();
 	oops_in_progress = 0;
+	pr_err("\n"); /* Flush mtdoops.  */
 
 #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	reset_watchdog();
@@ -225,8 +233,7 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
 	do_exit(SIGSEGV);
 }
 
-void __init
-trap_init(void)
+void __init trap_init(void)
 {
 	/* Nothing needs to be done */
 }
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index c81af5b..1e7fd45 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -11,13 +11,15 @@
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <linux/kcore.h>
 #include <asm/tlb.h>
 #include <asm/sections.h>
 
 unsigned long empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
 
-void __init
-mem_init(void)
+void __init mem_init(void)
 {
 	BUG_ON(!mem_map);
 
@@ -31,10 +33,36 @@ mem_init(void)
 	mem_init_print_info(NULL);
 }
 
-/* free the pages occupied by initialization code */
+/* Free a range of init pages. Virtual addresses. */
 
-void 
-free_initmem(void)
+void free_init_pages(const char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		free_page(addr);
+		totalram_pages++;
+	}
+
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+/* Free the pages occupied by initialization code. */
+
+void free_initmem(void)
 {
 	free_initmem_default(-1);
 }
+
+/* Free the pages occupied by initrd code. */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_init_pages("initrd memory",
+	                start,
+	                end);
+}
+#endif
diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index f9ca44b..80fdb99 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c
@@ -76,10 +76,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
  * Must be freed with iounmap.
  */
 
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
         return __ioremap(phys_addr | MEM_NON_CACHEABLE, size, 0);
 }
+EXPORT_SYMBOL(ioremap_nocache);
 
 void iounmap(volatile void __iomem *addr)
 {
diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h
index 2635117..69952c1 100644
--- a/arch/hexagon/include/asm/cache.h
+++ b/arch/hexagon/include/asm/cache.h
@@ -1,7 +1,7 @@
 /*
  * Cache definitions for the Hexagon architecture
  *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2011,2014 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -25,6 +25,8 @@
 #define L1_CACHE_SHIFT		(5)
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+
 #define __cacheline_aligned	__aligned(L1_CACHE_BYTES)
 #define ____cacheline_aligned	__aligned(L1_CACHE_BYTES)
 
diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h
index 49e0896..b86f9f3 100644
--- a/arch/hexagon/include/asm/cacheflush.h
+++ b/arch/hexagon/include/asm/cacheflush.h
@@ -21,10 +21,7 @@
 #ifndef _ASM_CACHEFLUSH_H
 #define _ASM_CACHEFLUSH_H
 
-#include <linux/cache.h>
-#include <linux/mm.h>
-#include <asm/string.h>
-#include <asm-generic/cacheflush.h>
+#include <linux/mm_types.h>
 
 /* Cache flushing:
  *
@@ -41,6 +38,20 @@
 #define LINESIZE	32
 #define LINEBITS	5
 
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+#define flush_icache_user_range(vma, pg, adr, len)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
 /*
  * Flush Dcache range through current map.
  */
@@ -49,7 +60,6 @@ extern void flush_dcache_range(unsigned long start, unsigned long end);
 /*
  * Flush Icache range through current map.
  */
-#undef flush_icache_range
 extern void flush_icache_range(unsigned long start, unsigned long end);
 
 /*
@@ -79,19 +89,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	/*  generic_ptrace_pokedata doesn't wind up here, does it?  */
 }
 
-#undef copy_to_user_page
-static inline void copy_to_user_page(struct vm_area_struct *vma,
-					     struct page *page,
-					     unsigned long vaddr,
-					     void *dst, void *src, int len)
-{
-	memcpy(dst, src, len);
-	if (vma->vm_flags & VM_EXEC) {
-		flush_icache_range((unsigned long) dst,
-		(unsigned long) dst + len);
-	}
-}
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, void *src, int len);
 
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
 
 extern void hexagon_inv_dcache_range(unsigned long start, unsigned long end);
 extern void hexagon_clean_dcache_range(unsigned long start, unsigned long end);
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index 7029899..66f5e9a 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -24,14 +24,9 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/vmalloc.h>
-#include <asm/string.h>
-#include <asm/mem-layout.h>
 #include <asm/iomap.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
 
 /*
  * We don't have PCI yet.
diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
index 0e7c1db..6981949 100644
--- a/arch/hexagon/kernel/setup.c
+++ b/arch/hexagon/kernel/setup.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/delay.h>
 #include <linux/bootmem.h>
 #include <linux/mmzone.h>
 #include <linux/mm.h>
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 7858663..110dab1 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -1,7 +1,7 @@
 /*
  * Kernel traps/events for Hexagon processor
  *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -423,7 +423,7 @@ void do_trap0(struct pt_regs *regs)
 			 */
 			info.si_code = TRAP_BRKPT;
 			info.si_addr = (void __user *) pt_elr(regs);
-			send_sig_info(SIGTRAP, &info, current);
+			force_sig_info(SIGTRAP, &info, current);
 		} else {
 #ifdef CONFIG_KGDB
 			kgdb_handle_exception(pt_cause(regs), SIGTRAP,
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
index 44d8c47..5f268c1 100644
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -1,7 +1,7 @@
 /*
  * Linker script for Hexagon kernel
  *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -59,7 +59,7 @@ SECTIONS
 	INIT_DATA_SECTION(PAGE_SIZE)
 
 	_sdata = .;
-		RW_DATA_SECTION(32,PAGE_SIZE,PAGE_SIZE)
+		RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE)
 		RO_DATA_SECTION(PAGE_SIZE)
 	_edata = .;
 
diff --git a/arch/hexagon/mm/cache.c b/arch/hexagon/mm/cache.c
index 0c76c80..a7c6d82 100644
--- a/arch/hexagon/mm/cache.c
+++ b/arch/hexagon/mm/cache.c
@@ -127,3 +127,13 @@ void flush_cache_all_hexagon(void)
 	local_irq_restore(flags);
 	mb();
 }
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, void *src, int len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC) {
+		flush_icache_range((unsigned long) dst,
+		(unsigned long) dst + len);
+	}
+}
diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c
index 5905fd5..d27d672 100644
--- a/arch/hexagon/mm/ioremap.c
+++ b/arch/hexagon/mm/ioremap.c
@@ -20,6 +20,7 @@
 
 #include <linux/io.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 
 void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 536d13b..074e52b 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -20,7 +20,6 @@ config IA64
 	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
 	select HAVE_FUNCTION_TRACER
 	select HAVE_DMA_ATTRS
-	select HAVE_KVM
 	select TTY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
@@ -232,7 +231,7 @@ config IA64_SGI_UV
 config IA64_HP_SIM
 	bool "Ski-simulator"
 	select SWIOTLB
-	depends on !PM_RUNTIME
+	depends on !PM
 
 endchoice
 
@@ -640,8 +639,6 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
-source "arch/ia64/kvm/Kconfig"
-
 source "lib/Kconfig"
 
 config IOMMU_HELPER
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 5441b14..970d0bd 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -53,7 +53,6 @@ core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
 core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
-core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
deleted file mode 100644
index 4729752..0000000
--- a/arch/ia64/include/asm/kvm_host.h
+++ /dev/null
@@ -1,609 +0,0 @@
-/*
- * kvm_host.h: used for kvm module, and hold ia64-specific sections.
- *
- * Copyright (C) 2007, Intel Corporation.
- *
- * Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef __ASM_KVM_HOST_H
-#define __ASM_KVM_HOST_H
-
-#define KVM_USER_MEM_SLOTS 32
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
-
-/* define exit reasons from vmm to kvm*/
-#define EXIT_REASON_VM_PANIC		0
-#define EXIT_REASON_MMIO_INSTRUCTION	1
-#define EXIT_REASON_PAL_CALL		2
-#define EXIT_REASON_SAL_CALL		3
-#define EXIT_REASON_SWITCH_RR6		4
-#define EXIT_REASON_VM_DESTROY		5
-#define EXIT_REASON_EXTERNAL_INTERRUPT	6
-#define EXIT_REASON_IPI			7
-#define EXIT_REASON_PTC_G		8
-#define EXIT_REASON_DEBUG		20
-
-/*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
-#define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000
-#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * Define vm_buffer, used by PAL Services, base address.
- * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
- */
-#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * kvm guest's data area looks as follow:
- *
- *            +----------------------+	-------	KVM_VM_DATA_SIZE
- *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
- *     	      |			     |	 |    /			  |
- *     	      |	       ..........    |	 |   /vcpu's struct&stack |
- *     	      |	       ..........    |	 |  /---------------------|---- 0
- *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
- *	      |	    vcpu[4]'s data   |	 |/-----------------------|
- *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
- *	      |	    vcpu[2]'s data   |	/|------------------------|
- *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
- *	      |	    vcpu[0]'s data   |____________________________|
- *            +----------------------+	 |
- *	      |	   memory dirty log  |	 |
- *            +----------------------+	 |
- *	      |	   vm's data struct  |	 |
- *            +----------------------+	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |	  vm's p2m table  |	 |
- *	      |			     |	 |
- *            |			     |	 |
- *	      |			     |	 |  |
- * vm's data->|			     |   |  |
- *	      +----------------------+ ------- 0
- * To support large memory, needs to increase the size of p2m.
- * To support more vcpus, needs to ensure it has enough space to
- * hold vcpus' data.
- */
-
-#define KVM_VM_DATA_SHIFT	26
-#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
-
-#define KVM_P2M_BASE		KVM_VM_DATA_BASE
-#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
-
-#define VHPT_SHIFT		16
-#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
-
-#define VTLB_SHIFT		16
-#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
-
-#define VPD_SHIFT		16
-#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
-
-#define VCPU_STRUCT_SHIFT	16
-#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-
-/*
- * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
- */
-#define KVM_STK_SHIFT		16
-#define KVM_STK_OFFSET		(__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
-
-#define KVM_VM_STRUCT_SHIFT	19
-#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
-
-#define KVM_MEM_DIRY_LOG_SHIFT	19
-#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-/*Define the max vcpus and memory for Guests.*/
-#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
-			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
-#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
-
-#define VMM_LOG_LEN 256
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/page.h>
-
-struct kvm_vcpu_data {
-	char vcpu_vhpt[VHPT_SIZE];
-	char vcpu_vtlb[VTLB_SIZE];
-	char vcpu_vpd[VPD_SIZE];
-	char vcpu_struct[VCPU_STRUCT_SIZE];
-};
-
-struct kvm_vm_data {
-	char kvm_p2m[KVM_P2M_SIZE];
-	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
-	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
-	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
-};
-
-#define VCPU_BASE(n)	(KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, vcpu_data[n]))
-#define KVM_VM_BASE	(KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_vm_struct))
-#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
-
-#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
-#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
-#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
-#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
-				offsetof(struct kvm_vcpu_data, vcpu_struct))
-
-/*IO section definitions*/
-#define IOREQ_READ      1
-#define IOREQ_WRITE     0
-
-#define STATE_IOREQ_NONE        0
-#define STATE_IOREQ_READY       1
-#define STATE_IOREQ_INPROCESS   2
-#define STATE_IORESP_READY      3
-
-/*Guest Physical address layout.*/
-#define GPFN_MEM        (0UL << 60) /* Guest pfn is normal mem */
-#define GPFN_FRAME_BUFFER   (1UL << 60) /* VGA framebuffer */
-#define GPFN_LOW_MMIO       (2UL << 60) /* Low MMIO range */
-#define GPFN_PIB        (3UL << 60) /* PIB base */
-#define GPFN_IOSAPIC        (4UL << 60) /* IOSAPIC base */
-#define GPFN_LEGACY_IO      (5UL << 60) /* Legacy I/O base */
-#define GPFN_GFW        (6UL << 60) /* Guest Firmware */
-#define GPFN_PHYS_MMIO      (7UL << 60) /* Directed MMIO Range */
-
-#define GPFN_IO_MASK        (7UL << 60) /* Guest pfn is I/O type */
-#define GPFN_INV_MASK       (1UL << 63) /* Guest pfn is invalid */
-#define INVALID_MFN       (~0UL)
-#define MEM_G   (1UL << 30)
-#define MEM_M   (1UL << 20)
-#define MMIO_START       (3 * MEM_G)
-#define MMIO_SIZE        (512 * MEM_M)
-#define VGA_IO_START     0xA0000UL
-#define VGA_IO_SIZE      0x20000
-#define LEGACY_IO_START  (MMIO_START + MMIO_SIZE)
-#define LEGACY_IO_SIZE   (64 * MEM_M)
-#define IO_SAPIC_START   0xfec00000UL
-#define IO_SAPIC_SIZE    0x100000
-#define PIB_START 0xfee00000UL
-#define PIB_SIZE 0x200000
-#define GFW_START        (4 * MEM_G - 16 * MEM_M)
-#define GFW_SIZE         (16 * MEM_M)
-
-/*Deliver mode, defined for ioapic.c*/
-#define dest_Fixed IOSAPIC_FIXED
-#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
-
-#define NMI_VECTOR      		2
-#define ExtINT_VECTOR       		0
-#define NULL_VECTOR     		(-1)
-#define IA64_SPURIOUS_INT_VECTOR    	0x0f
-
-#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
-
-/*
- *Delivery mode
- */
-#define SAPIC_DELIV_SHIFT      8
-#define SAPIC_FIXED            0x0
-#define SAPIC_LOWEST_PRIORITY  0x1
-#define SAPIC_PMI              0x2
-#define SAPIC_NMI              0x4
-#define SAPIC_INIT             0x5
-#define SAPIC_EXTINT           0x7
-
-/*
- * vcpu->requests bit members for arch
- */
-#define KVM_REQ_PTC_G		32
-#define KVM_REQ_RESUME		33
-
-struct kvm_mmio_req {
-	uint64_t addr;          /*  physical address		*/
-	uint64_t size;          /*  size in bytes		*/
-	uint64_t data;          /*  data (or paddr of data)     */
-	uint8_t state:4;
-	uint8_t dir:1;          /*  1=read, 0=write             */
-};
-
-/*Pal data struct */
-struct kvm_pal_call{
-	/*In area*/
-	uint64_t gr28;
-	uint64_t gr29;
-	uint64_t gr30;
-	uint64_t gr31;
-	/*Out area*/
-	struct ia64_pal_retval ret;
-};
-
-/* Sal data structure */
-struct kvm_sal_call{
-	/*In area*/
-	uint64_t in0;
-	uint64_t in1;
-	uint64_t in2;
-	uint64_t in3;
-	uint64_t in4;
-	uint64_t in5;
-	uint64_t in6;
-	uint64_t in7;
-	struct sal_ret_values ret;
-};
-
-/*Guest change rr6*/
-struct kvm_switch_rr6 {
-	uint64_t old_rr;
-	uint64_t new_rr;
-};
-
-union ia64_ipi_a{
-	unsigned long val;
-	struct {
-		unsigned long rv  : 3;
-		unsigned long ir  : 1;
-		unsigned long eid : 8;
-		unsigned long id  : 8;
-		unsigned long ib_base : 44;
-	};
-};
-
-union ia64_ipi_d {
-	unsigned long val;
-	struct {
-		unsigned long vector : 8;
-		unsigned long dm  : 3;
-		unsigned long ig  : 53;
-	};
-};
-
-/*ipi check exit data*/
-struct kvm_ipi_data{
-	union ia64_ipi_a addr;
-	union ia64_ipi_d data;
-};
-
-/*global purge data*/
-struct kvm_ptc_g {
-	unsigned long vaddr;
-	unsigned long rr;
-	unsigned long ps;
-	struct kvm_vcpu *vcpu;
-};
-
-/*Exit control data */
-struct exit_ctl_data{
-	uint32_t exit_reason;
-	uint32_t vm_status;
-	union {
-		struct kvm_mmio_req	ioreq;
-		struct kvm_pal_call	pal_data;
-		struct kvm_sal_call	sal_data;
-		struct kvm_switch_rr6	rr_data;
-		struct kvm_ipi_data	ipi_data;
-		struct kvm_ptc_g	ptc_g_data;
-	} u;
-};
-
-union pte_flags {
-	unsigned long val;
-	struct {
-		unsigned long p    :  1; /*0      */
-		unsigned long      :  1; /* 1     */
-		unsigned long ma   :  3; /* 2-4   */
-		unsigned long a    :  1; /* 5     */
-		unsigned long d    :  1; /* 6     */
-		unsigned long pl   :  2; /* 7-8   */
-		unsigned long ar   :  3; /* 9-11  */
-		unsigned long ppn  : 38; /* 12-49 */
-		unsigned long      :  2; /* 50-51 */
-		unsigned long ed   :  1; /* 52    */
-	};
-};
-
-union ia64_pta {
-	unsigned long val;
-	struct {
-		unsigned long ve : 1;
-		unsigned long reserved0 : 1;
-		unsigned long size : 6;
-		unsigned long vf : 1;
-		unsigned long reserved1 : 6;
-		unsigned long base : 49;
-	};
-};
-
-struct thash_cb {
-	/* THASH base information */
-	struct thash_data	*hash; /* hash table pointer */
-	union ia64_pta		pta;
-	int           num;
-};
-
-struct kvm_vcpu_stat {
-	u32 halt_wakeup;
-};
-
-struct kvm_vcpu_arch {
-	int launched;
-	int last_exit;
-	int last_run_cpu;
-	int vmm_tr_slot;
-	int vm_tr_slot;
-	int sn_rtc_tr_slot;
-
-#define KVM_MP_STATE_RUNNABLE          0
-#define KVM_MP_STATE_UNINITIALIZED     1
-#define KVM_MP_STATE_INIT_RECEIVED     2
-#define KVM_MP_STATE_HALTED            3
-	int mp_state;
-
-#define MAX_PTC_G_NUM			3
-	int ptc_g_count;
-	struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
-
-	/*halt timer to wake up sleepy vcpus*/
-	struct hrtimer hlt_timer;
-	long ht_active;
-
-	struct kvm_lapic *apic;    /* kernel irqchip context */
-	struct vpd *vpd;
-
-	/* Exit data for vmm_transition*/
-	struct exit_ctl_data exit_data;
-
-	cpumask_t cache_coherent_map;
-
-	unsigned long vmm_rr;
-	unsigned long host_rr6;
-	unsigned long psbits[8];
-	unsigned long cr_iipa;
-	unsigned long cr_isr;
-	unsigned long vsa_base;
-	unsigned long dirty_log_lock_pa;
-	unsigned long __gp;
-	/* TR and TC.  */
-	struct thash_data itrs[NITRS];
-	struct thash_data dtrs[NDTRS];
-	/* Bit is set if there is a tr/tc for the region.  */
-	unsigned char itr_regions;
-	unsigned char dtr_regions;
-	unsigned char tc_regions;
-	/* purge all */
-	unsigned long ptce_base;
-	unsigned long ptce_count[2];
-	unsigned long ptce_stride[2];
-	/* itc/itm */
-	unsigned long last_itc;
-	long itc_offset;
-	unsigned long itc_check;
-	unsigned long timer_check;
-	unsigned int timer_pending;
-	unsigned int timer_fired;
-
-	unsigned long vrr[8];
-	unsigned long ibr[8];
-	unsigned long dbr[8];
-	unsigned long insvc[4];		/* Interrupt in service.  */
-	unsigned long xtp;
-
-	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-	unsigned long fp_psr;       /*used for lazy float register */
-	unsigned long saved_gp;
-	/*for phycial  emulation */
-	int mode_flags;
-	struct thash_cb vtlb;
-	struct thash_cb vhpt;
-	char irq_check;
-	char irq_new_pending;
-
-	unsigned long opcode;
-	unsigned long cause;
-	char log_buf[VMM_LOG_LEN];
-	union context host;
-	union context guest;
-
-	char mmio_data[8];
-};
-
-struct kvm_vm_stat {
-	u64 remote_tlb_flush;
-};
-
-struct kvm_sal_data {
-	unsigned long boot_ip;
-	unsigned long boot_gp;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-struct kvm_arch {
-	spinlock_t dirty_log_lock;
-
-	unsigned long	vm_base;
-	unsigned long	metaphysical_rr0;
-	unsigned long	metaphysical_rr4;
-	unsigned long	vmm_init_rr;
-
-	int		is_sn2;
-
-	struct kvm_ioapic *vioapic;
-	struct kvm_vm_stat stat;
-	struct kvm_sal_data rdv_sal_data;
-
-	struct list_head assigned_dev_head;
-	struct iommu_domain *iommu_domain;
-	bool iommu_noncoherent;
-
-	unsigned long irq_sources_bitmap;
-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
-};
-
-union cpuid3_t {
-	u64 value;
-	struct {
-		u64 number : 8;
-		u64 revision : 8;
-		u64 model : 8;
-		u64 family : 8;
-		u64 archrev : 8;
-		u64 rv : 24;
-	};
-};
-
-struct kvm_pt_regs {
-	/* The following registers are saved by SAVE_MIN: */
-	unsigned long b6;  /* scratch */
-	unsigned long b7;  /* scratch */
-
-	unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
-	unsigned long ar_ssd; /* reserved for future use (scratch) */
-
-	unsigned long r8;  /* scratch (return value register 0) */
-	unsigned long r9;  /* scratch (return value register 1) */
-	unsigned long r10; /* scratch (return value register 2) */
-	unsigned long r11; /* scratch (return value register 3) */
-
-	unsigned long cr_ipsr; /* interrupted task's psr */
-	unsigned long cr_iip;  /* interrupted task's instruction pointer */
-	unsigned long cr_ifs;  /* interrupted task's function state */
-
-	unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
-	unsigned long ar_pfs;  /* prev function state  */
-	unsigned long ar_rsc;  /* RSE configuration */
-	/* The following two are valid only if cr_ipsr.cpl > 0: */
-	unsigned long ar_rnat;  /* RSE NaT */
-	unsigned long ar_bspstore; /* RSE bspstore */
-
-	unsigned long pr;  /* 64 predicate registers (1 bit each) */
-	unsigned long b0;  /* return pointer (bp) */
-	unsigned long loadrs;  /* size of dirty partition << 16 */
-
-	unsigned long r1;  /* the gp pointer */
-	unsigned long r12; /* interrupted task's memory stack pointer */
-	unsigned long r13; /* thread pointer */
-
-	unsigned long ar_fpsr;  /* floating point status (preserved) */
-	unsigned long r15;  /* scratch */
-
-	/* The remaining registers are NOT saved for system calls.  */
-	unsigned long r14;  /* scratch */
-	unsigned long r2;  /* scratch */
-	unsigned long r3;  /* scratch */
-	unsigned long r16;  /* scratch */
-	unsigned long r17;  /* scratch */
-	unsigned long r18;  /* scratch */
-	unsigned long r19;  /* scratch */
-	unsigned long r20;  /* scratch */
-	unsigned long r21;  /* scratch */
-	unsigned long r22;  /* scratch */
-	unsigned long r23;  /* scratch */
-	unsigned long r24;  /* scratch */
-	unsigned long r25;  /* scratch */
-	unsigned long r26;  /* scratch */
-	unsigned long r27;  /* scratch */
-	unsigned long r28;  /* scratch */
-	unsigned long r29;  /* scratch */
-	unsigned long r30;  /* scratch */
-	unsigned long r31;  /* scratch */
-	unsigned long ar_ccv;  /* compare/exchange value (scratch) */
-
-	/*
-	 * Floating point registers that the kernel considers scratch:
-	 */
-	struct ia64_fpreg f6;  /* scratch */
-	struct ia64_fpreg f7;  /* scratch */
-	struct ia64_fpreg f8;  /* scratch */
-	struct ia64_fpreg f9;  /* scratch */
-	struct ia64_fpreg f10;  /* scratch */
-	struct ia64_fpreg f11;  /* scratch */
-
-	unsigned long r4;  /* preserved */
-	unsigned long r5;  /* preserved */
-	unsigned long r6;  /* preserved */
-	unsigned long r7;  /* preserved */
-	unsigned long eml_unat;    /* used for emulating instruction */
-	unsigned long pad0;     /* alignment pad */
-};
-
-static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
-{
-	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
-}
-
-typedef int kvm_vmm_entry(void);
-typedef void kvm_tramp_entry(union context *host, union context *guest);
-
-struct kvm_vmm_info{
-	struct module	*module;
-	kvm_vmm_entry 	*vmm_entry;
-	kvm_tramp_entry *tramp_entry;
-	unsigned long 	vmm_ivt;
-	unsigned long	patch_mov_ar;
-	unsigned long	patch_mov_ar_sn2;
-};
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
-void kvm_sal_emul(struct kvm_vcpu *vcpu);
-
-#define __KVM_HAVE_ARCH_VM_ALLOC 1
-struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
-
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_free_memslot(struct kvm *kvm,
-		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
-static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
-		const struct kvm_memory_slot *old,
-		enum kvm_mr_change change) {}
-static inline void kvm_arch_hardware_unsetup(void) {}
-
-#endif /* __ASSEMBLY__*/
-
-#endif
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 14aa1c5..0ec484d 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -35,8 +35,8 @@ extern void *per_cpu_init(void);
 
 /*
  * Be extremely careful when taking the address of this variable!  Due to virtual
- * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
- * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
+ * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)!
+ * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly
  * more efficient.
  */
 #define __ia64_per_cpu_var(var) (*({					\
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
deleted file mode 100644
index 42b233b..0000000
--- a/arch/ia64/include/asm/pvclock-abi.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * same structure to x86's
- * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
- * For now, define same duplicated definitions.
- */
-
-#ifndef _ASM_IA64__PVCLOCK_ABI_H
-#define _ASM_IA64__PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
-
-/*
- * These structs MUST NOT be changed.
- * They are the ABI between hypervisor and guest OS.
- * KVM is using this.
- *
- * pvclock_vcpu_time_info holds the system time and the tsc timestamp
- * of the last update. So the guest can use the tsc delta to get a
- * more precise system time.  There is one per virtual cpu.
- *
- * pvclock_wall_clock references the point in time when the system
- * time was zero (usually boot time), thus the guest calculates the
- * current wall clock by adding the system time.
- *
- * Protocol for the "version" fields is: hypervisor raises it (making
- * it uneven) before it starts updating the fields and raises it again
- * (making it even) when it is done.  Thus the guest can make sure the
- * time values it got are consistent by checking the version before
- * and after reading them.
- */
-
-struct pvclock_vcpu_time_info {
-	u32   version;
-	u32   pad0;
-	u64   tsc_timestamp;
-	u64   system_time;
-	u32   tsc_to_system_mul;
-	s8    tsc_shift;
-	u8    pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
-	u32   version;
-	u32   sec;
-	u32   nsec;
-} __attribute__((__packed__));
-
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
deleted file mode 100644
index 99503c2..0000000
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef __ASM_IA64_KVM_H
-#define __ASM_IA64_KVM_H
-
-/*
- * kvm structure definitions  for ia64
- *
- * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-/* Select x86 specific features in <linux/kvm.h> */
-#define __KVM_HAVE_IOAPIC
-#define __KVM_HAVE_IRQ_LINE
-
-/* Architectural interrupt line count. */
-#define KVM_NR_INTERRUPTS 256
-
-#define KVM_IOAPIC_NUM_PINS  48
-
-struct kvm_ioapic_state {
-	__u64 base_address;
-	__u32 ioregsel;
-	__u32 id;
-	__u32 irr;
-	__u32 pad;
-	union {
-		__u64 bits;
-		struct {
-			__u8 vector;
-			__u8 delivery_mode:3;
-			__u8 dest_mode:1;
-			__u8 delivery_status:1;
-			__u8 polarity:1;
-			__u8 remote_irr:1;
-			__u8 trig_mode:1;
-			__u8 mask:1;
-			__u8 reserve:7;
-			__u8 reserved[4];
-			__u8 dest_id;
-		} fields;
-	} redirtbl[KVM_IOAPIC_NUM_PINS];
-};
-
-#define KVM_IRQCHIP_PIC_MASTER   0
-#define KVM_IRQCHIP_PIC_SLAVE    1
-#define KVM_IRQCHIP_IOAPIC       2
-#define KVM_NR_IRQCHIPS          3
-
-#define KVM_CONTEXT_SIZE	8*1024
-
-struct kvm_fpreg {
-	union {
-		unsigned long bits[2];
-		long double __dummy;	/* force 16-byte alignment */
-	} u;
-};
-
-union context {
-	/* 8K size */
-	char	dummy[KVM_CONTEXT_SIZE];
-	struct {
-		unsigned long       psr;
-		unsigned long       pr;
-		unsigned long       caller_unat;
-		unsigned long       pad;
-		unsigned long       gr[32];
-		unsigned long       ar[128];
-		unsigned long       br[8];
-		unsigned long       cr[128];
-		unsigned long       rr[8];
-		unsigned long       ibr[8];
-		unsigned long       dbr[8];
-		unsigned long       pkr[8];
-		struct kvm_fpreg   fr[128];
-	};
-};
-
-struct thash_data {
-	union {
-		struct {
-			unsigned long p    :  1; /* 0 */
-			unsigned long rv1  :  1; /* 1 */
-			unsigned long ma   :  3; /* 2-4 */
-			unsigned long a    :  1; /* 5 */
-			unsigned long d    :  1; /* 6 */
-			unsigned long pl   :  2; /* 7-8 */
-			unsigned long ar   :  3; /* 9-11 */
-			unsigned long ppn  : 38; /* 12-49 */
-			unsigned long rv2  :  2; /* 50-51 */
-			unsigned long ed   :  1; /* 52 */
-			unsigned long ig1  : 11; /* 53-63 */
-		};
-		struct {
-			unsigned long __rv1 : 53;     /* 0-52 */
-			unsigned long contiguous : 1; /*53 */
-			unsigned long tc : 1;         /* 54 TR or TC */
-			unsigned long cl : 1;
-			/* 55 I side or D side cache line */
-			unsigned long len  :  4;      /* 56-59 */
-			unsigned long io  : 1;	/* 60 entry is for io or not */
-			unsigned long nomap : 1;
-			/* 61 entry cann't be inserted into machine TLB.*/
-			unsigned long checked : 1;
-			/* 62 for VTLB/VHPT sanity check */
-			unsigned long invalid : 1;
-			/* 63 invalid entry */
-		};
-		unsigned long page_flags;
-	};                  /* same for VHPT and TLB */
-
-	union {
-		struct {
-			unsigned long rv3  :  2;
-			unsigned long ps   :  6;
-			unsigned long key  : 24;
-			unsigned long rv4  : 32;
-		};
-		unsigned long itir;
-	};
-	union {
-		struct {
-			unsigned long ig2  :  12;
-			unsigned long vpn  :  49;
-			unsigned long vrn  :   3;
-		};
-		unsigned long ifa;
-		unsigned long vadr;
-		struct {
-			unsigned long tag  :  63;
-			unsigned long ti   :  1;
-		};
-		unsigned long etag;
-	};
-	union {
-		struct thash_data *next;
-		unsigned long rid;
-		unsigned long gpaddr;
-	};
-};
-
-#define	NITRS	8
-#define NDTRS	8
-
-struct saved_vpd {
-	unsigned long  vhpi;
-	unsigned long  vgr[16];
-	unsigned long  vbgr[16];
-	unsigned long  vnat;
-	unsigned long  vbnat;
-	unsigned long  vcpuid[5];
-	unsigned long  vpsr;
-	unsigned long  vpr;
-	union {
-		unsigned long  vcr[128];
-		struct {
-			unsigned long dcr;
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;
-			unsigned long rsv2[7];
-			unsigned long ipsr;
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-};
-
-struct kvm_regs {
-	struct saved_vpd vpd;
-	/*Arch-regs*/
-	int mp_state;
-	unsigned long vmm_rr;
-	/* TR and TC.  */
-	struct thash_data itrs[NITRS];
-	struct thash_data dtrs[NDTRS];
-	/* Bit is set if there is a tr/tc for the region.  */
-	unsigned char itr_regions;
-	unsigned char dtr_regions;
-	unsigned char tc_regions;
-
-	char irq_check;
-	unsigned long saved_itc;
-	unsigned long itc_check;
-	unsigned long timer_check;
-	unsigned long timer_pending;
-	unsigned long last_itc;
-
-	unsigned long vrr[8];
-	unsigned long ibr[8];
-	unsigned long dbr[8];
-	unsigned long insvc[4];		/* Interrupt in service.  */
-	unsigned long xtp;
-
-	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-	unsigned long fp_psr;       /*used for lazy float register */
-	unsigned long saved_gp;
-	/*for phycial  emulation */
-
-	union context saved_guest;
-
-	unsigned long reserved[64];	/* for future use */
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-#define KVM_IA64_VCPU_STACK_SHIFT	16
-#define KVM_IA64_VCPU_STACK_SIZE	(1UL << KVM_IA64_VCPU_STACK_SHIFT)
-
-struct kvm_ia64_vcpu_stack {
-	unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
-};
-
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
-/* definition of registers in kvm_run */
-struct kvm_sync_regs {
-};
-
-#endif
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
deleted file mode 100644
index 3d50ea9..0000000
--- a/arch/ia64/kvm/Kconfig
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# KVM configuration
-#
-
-source "virt/kvm/Kconfig"
-
-menuconfig VIRTUALIZATION
-	bool "Virtualization"
-	depends on HAVE_KVM || IA64
-	default y
-	---help---
-	  Say Y here to get to see options for using your Linux host to run other
-	  operating systems inside virtual machines (guests).
-	  This option alone does not add any kernel code.
-
-	  If you say N, all options in this submenu will be skipped and disabled.
-
-if VIRTUALIZATION
-
-config KVM
-	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on BROKEN
-	depends on HAVE_KVM && MODULES
-	depends on BROKEN
-	select PREEMPT_NOTIFIERS
-	select ANON_INODES
-	select HAVE_KVM_IRQCHIP
-	select HAVE_KVM_IRQFD
-	select HAVE_KVM_IRQ_ROUTING
-	select KVM_APIC_ARCHITECTURE
-	select KVM_MMIO
-	---help---
-	  Support hosting fully virtualized guest machines using hardware
-	  virtualization extensions.  You will need a fairly recent
-	  processor equipped with virtualization extensions. You will also
-	  need to select one or more of the processor modules below.
-
-	  This module provides access to the hardware capabilities through
-	  a character device node named /dev/kvm.
-
-	  To compile this as a module, choose M here: the module
-	  will be called kvm.
-
-	  If unsure, say N.
-
-config KVM_INTEL
-	tristate "KVM for Intel Itanium 2 processors support"
-	depends on KVM && m
-	---help---
-	  Provides support for KVM on Itanium 2 processors equipped with the VT
-	  extensions.
-
-config KVM_DEVICE_ASSIGNMENT
-	bool "KVM legacy PCI device assignment support"
-	depends on KVM && PCI && IOMMU_API
-	default y
-	---help---
-	  Provide support for legacy PCI device assignment through KVM.  The
-	  kernel now also supports a full featured userspace device driver
-	  framework through VFIO, which supersedes much of this support.
-
-	  If unsure, say Y.
-
-source drivers/vhost/Kconfig
-
-endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
deleted file mode 100644
index 18e45ec..0000000
--- a/arch/ia64/kvm/Makefile
+++ /dev/null
@@ -1,67 +0,0 @@
-#This Make file is to generate asm-offsets.h and build source.
-#
-
-#Generate asm-offsets.h for vmm module build
-offsets-file := asm-offsets.h
-
-always  := $(offsets-file)
-targets := $(offsets-file)
-targets += arch/ia64/kvm/asm-offsets.s
-
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN     $@
-define cmd_offsets
-	(set -e; \
-	 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
-	 echo "#define __ASM_KVM_OFFSETS_H__"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Makefile"; \
-	 echo " *"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y) $<; \
-	 echo ""; \
-	 echo "#endif" ) > $@
-endef
-
-# We use internal rules to avoid the "is up to date" message from make
-arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
-			$(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
-			$(wildcard $(srctree)/include/linux/*.h)
-	$(call if_changed_dep,cc_s_c)
-
-$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
-	$(call cmd,offsets)
-
-FORCE : $(obj)/$(offsets-file)
-
-#
-# Makefile for Kernel-based Virtual Machine module
-#
-
-ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-KVM := ../../../virt/kvm
-
-common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
-
-ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
-endif
-
-kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
-obj-$(CONFIG_KVM) += kvm.o
-
-CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
-kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-	vtlb.o process.o kvm_lib.o
-#Add link memcpy and memset to avoid possible structure assignment error
-kvm-intel-objs += memcpy.o memset.o
-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
deleted file mode 100644
index 9324c87..0000000
--- a/arch/ia64/kvm/asm-offsets.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * asm-offsets.c Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- *
- * Anthony Xu    <anthony.xu@intel.com>
- * Xiantao Zhang <xiantao.zhang@intel.com>
- * Copyright (c) 2007 Intel Corporation  KVM support.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kbuild.h>
-
-#include "vcpu.h"
-
-void foo(void)
-{
-	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
-	DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
-
-	BLANK();
-
-	DEFINE(VMM_VCPU_META_RR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
-	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-			offsetof(struct kvm_vcpu,
-				arch.metaphysical_saved_rr0));
-	DEFINE(VMM_VCPU_VRR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vrr[0]));
-	DEFINE(VMM_VPD_IRR0_OFFSET,
-			offsetof(struct vpd, irr[0]));
-	DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
-			offsetof(struct kvm_vcpu, arch.itc_check));
-	DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
-			offsetof(struct kvm_vcpu, arch.irq_check));
-	DEFINE(VMM_VPD_VHPI_OFFSET,
-			offsetof(struct vpd, vhpi));
-	DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vsa_base));
-	DEFINE(VMM_VCPU_VPD_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vpd));
-	DEFINE(VMM_VCPU_IRQ_CHECK,
-			offsetof(struct kvm_vcpu, arch.irq_check));
-	DEFINE(VMM_VCPU_TIMER_PENDING,
-			offsetof(struct kvm_vcpu, arch.timer_pending));
-	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
-	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-			offsetof(struct kvm_vcpu, arch.mode_flags));
-	DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
-			offsetof(struct kvm_vcpu, arch.itc_offset));
-	DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
-			offsetof(struct kvm_vcpu, arch.last_itc));
-	DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
-			offsetof(struct kvm_vcpu, arch.saved_gp));
-
-	BLANK();
-
-	DEFINE(VMM_PT_REGS_B6_OFFSET,
-				offsetof(struct kvm_pt_regs, b6));
-	DEFINE(VMM_PT_REGS_B7_OFFSET,
-				offsetof(struct kvm_pt_regs, b7));
-	DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_csd));
-	DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_ssd));
-	DEFINE(VMM_PT_REGS_R8_OFFSET,
-				offsetof(struct kvm_pt_regs, r8));
-	DEFINE(VMM_PT_REGS_R9_OFFSET,
-				offsetof(struct kvm_pt_regs, r9));
-	DEFINE(VMM_PT_REGS_R10_OFFSET,
-				offsetof(struct kvm_pt_regs, r10));
-	DEFINE(VMM_PT_REGS_R11_OFFSET,
-				offsetof(struct kvm_pt_regs, r11));
-	DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_ipsr));
-	DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_iip));
-	DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_ifs));
-	DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_unat));
-	DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_pfs));
-	DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_rsc));
-	DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_rnat));
-
-	DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_bspstore));
-	DEFINE(VMM_PT_REGS_PR_OFFSET,
-				offsetof(struct kvm_pt_regs, pr));
-	DEFINE(VMM_PT_REGS_B0_OFFSET,
-				offsetof(struct kvm_pt_regs, b0));
-	DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
-				offsetof(struct kvm_pt_regs, loadrs));
-	DEFINE(VMM_PT_REGS_R1_OFFSET,
-				offsetof(struct kvm_pt_regs, r1));
-	DEFINE(VMM_PT_REGS_R12_OFFSET,
-				offsetof(struct kvm_pt_regs, r12));
-	DEFINE(VMM_PT_REGS_R13_OFFSET,
-				offsetof(struct kvm_pt_regs, r13));
-	DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_fpsr));
-	DEFINE(VMM_PT_REGS_R15_OFFSET,
-				offsetof(struct kvm_pt_regs, r15));
-	DEFINE(VMM_PT_REGS_R14_OFFSET,
-				offsetof(struct kvm_pt_regs, r14));
-	DEFINE(VMM_PT_REGS_R2_OFFSET,
-				offsetof(struct kvm_pt_regs, r2));
-	DEFINE(VMM_PT_REGS_R3_OFFSET,
-				offsetof(struct kvm_pt_regs, r3));
-	DEFINE(VMM_PT_REGS_R16_OFFSET,
-				offsetof(struct kvm_pt_regs, r16));
-	DEFINE(VMM_PT_REGS_R17_OFFSET,
-				offsetof(struct kvm_pt_regs, r17));
-	DEFINE(VMM_PT_REGS_R18_OFFSET,
-				offsetof(struct kvm_pt_regs, r18));
-	DEFINE(VMM_PT_REGS_R19_OFFSET,
-				offsetof(struct kvm_pt_regs, r19));
-	DEFINE(VMM_PT_REGS_R20_OFFSET,
-				offsetof(struct kvm_pt_regs, r20));
-	DEFINE(VMM_PT_REGS_R21_OFFSET,
-				offsetof(struct kvm_pt_regs, r21));
-	DEFINE(VMM_PT_REGS_R22_OFFSET,
-				offsetof(struct kvm_pt_regs, r22));
-	DEFINE(VMM_PT_REGS_R23_OFFSET,
-				offsetof(struct kvm_pt_regs, r23));
-	DEFINE(VMM_PT_REGS_R24_OFFSET,
-				offsetof(struct kvm_pt_regs, r24));
-	DEFINE(VMM_PT_REGS_R25_OFFSET,
-				offsetof(struct kvm_pt_regs, r25));
-	DEFINE(VMM_PT_REGS_R26_OFFSET,
-				offsetof(struct kvm_pt_regs, r26));
-	DEFINE(VMM_PT_REGS_R27_OFFSET,
-				offsetof(struct kvm_pt_regs, r27));
-	DEFINE(VMM_PT_REGS_R28_OFFSET,
-				offsetof(struct kvm_pt_regs, r28));
-	DEFINE(VMM_PT_REGS_R29_OFFSET,
-				offsetof(struct kvm_pt_regs, r29));
-	DEFINE(VMM_PT_REGS_R30_OFFSET,
-				offsetof(struct kvm_pt_regs, r30));
-	DEFINE(VMM_PT_REGS_R31_OFFSET,
-				offsetof(struct kvm_pt_regs, r31));
-	DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_ccv));
-	DEFINE(VMM_PT_REGS_F6_OFFSET,
-				offsetof(struct kvm_pt_regs, f6));
-	DEFINE(VMM_PT_REGS_F7_OFFSET,
-				offsetof(struct kvm_pt_regs, f7));
-	DEFINE(VMM_PT_REGS_F8_OFFSET,
-				offsetof(struct kvm_pt_regs, f8));
-	DEFINE(VMM_PT_REGS_F9_OFFSET,
-				offsetof(struct kvm_pt_regs, f9));
-	DEFINE(VMM_PT_REGS_F10_OFFSET,
-				offsetof(struct kvm_pt_regs, f10));
-	DEFINE(VMM_PT_REGS_F11_OFFSET,
-				offsetof(struct kvm_pt_regs, f11));
-	DEFINE(VMM_PT_REGS_R4_OFFSET,
-				offsetof(struct kvm_pt_regs, r4));
-	DEFINE(VMM_PT_REGS_R5_OFFSET,
-				offsetof(struct kvm_pt_regs, r5));
-	DEFINE(VMM_PT_REGS_R6_OFFSET,
-				offsetof(struct kvm_pt_regs, r6));
-	DEFINE(VMM_PT_REGS_R7_OFFSET,
-				offsetof(struct kvm_pt_regs, r7));
-	DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, eml_unat));
-	DEFINE(VMM_VCPU_IIPA_OFFSET,
-				offsetof(struct kvm_vcpu, arch.cr_iipa));
-	DEFINE(VMM_VCPU_OPCODE_OFFSET,
-				offsetof(struct kvm_vcpu, arch.opcode));
-	DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
-	DEFINE(VMM_VCPU_ISR_OFFSET,
-				offsetof(struct kvm_vcpu, arch.cr_isr));
-	DEFINE(VMM_PT_REGS_R16_SLOT,
-				(((offsetof(struct kvm_pt_regs, r16)
-				- sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
-	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-				offsetof(struct kvm_vcpu, arch.mode_flags));
-	DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
-	BLANK();
-
-	DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
-	DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
-	DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
-			offsetof(struct kvm_vcpu, arch.insvc[0]));
-	DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
-	DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
-
-	DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
-	DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
-	DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
-	DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
-	DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
-	DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
-	DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
-	DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
-	DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
-	DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
-	DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
-	DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
-	DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
-	DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
-	DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
-	DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
-	DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
-	DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
-	DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
-	DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
-	DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
-	DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
-	DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
-	DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
-	DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
-	DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
-	DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
-	DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
-	DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
-	DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
-	DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
-	DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
-	DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
-	DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
-	BLANK();
-}
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
deleted file mode 100644
index c0785a7..0000000
--- a/arch/ia64/kvm/irq.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * irq.h: In-kernel interrupt controller related definitions
- * Copyright (c) 2008, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Authors:
- *   Xiantao Zhang <xiantao.zhang@intel.com>
- *
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include "lapic.h"
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 1;
-}
-
-#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
deleted file mode 100644
index dbe46f4..0000000
--- a/arch/ia64/kvm/kvm-ia64.c
+++ /dev/null
@@ -1,1942 +0,0 @@
-/*
- * kvm_ia64.c: Basic KVM support On Itanium series processors
- *
- *
- * 	Copyright (C) 2007, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/percpu.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/bitops.h>
-#include <linux/hrtimer.h>
-#include <linux/uaccess.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-#include <linux/pci.h>
-
-#include <asm/pgtable.h>
-#include <asm/gcc_intrin.h>
-#include <asm/pal.h>
-#include <asm/cacheflush.h>
-#include <asm/div64.h>
-#include <asm/tlb.h>
-#include <asm/elf.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "misc.h"
-#include "vti.h"
-#include "iodev.h"
-#include "ioapic.h"
-#include "lapic.h"
-#include "irq.h"
-
-static unsigned long kvm_vmm_base;
-static unsigned long kvm_vsa_base;
-static unsigned long kvm_vm_buffer;
-static unsigned long kvm_vm_buffer_size;
-unsigned long kvm_vmm_gp;
-
-static long vp_env_info;
-
-static struct kvm_vmm_info *kvm_vmm_info;
-
-static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ NULL }
-};
-
-static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (vcpu->kvm->arch.is_sn2)
-		return rtc_time();
-	else
-#endif
-		return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-static void kvm_flush_icache(unsigned long start, unsigned long len)
-{
-	int l;
-
-	for (l = 0; l < (len + 32); l += 32)
-		ia64_fc((void *)(start + l));
-
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-static void kvm_flush_tlb_all(void)
-{
-	unsigned long i, j, count0, count1, stride0, stride1, addr;
-	long flags;
-
-	addr    = local_cpu_data->ptce_base;
-	count0  = local_cpu_data->ptce_count[0];
-	count1  = local_cpu_data->ptce_count[1];
-	stride0 = local_cpu_data->ptce_stride[0];
-	stride1 = local_cpu_data->ptce_stride[1];
-
-	local_irq_save(flags);
-	for (i = 0; i < count0; ++i) {
-		for (j = 0; j < count1; ++j) {
-			ia64_ptce(addr);
-			addr += stride1;
-		}
-		addr += stride0;
-	}
-	local_irq_restore(flags);
-	ia64_srlz_i();			/* srlz.i implies srlz.d */
-}
-
-long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
-			(u64)opt_handler);
-
-	return iprv.status;
-}
-
-static  DEFINE_SPINLOCK(vp_lock);
-
-int kvm_arch_hardware_enable(void)
-{
-	long  status;
-	long  tmp_base;
-	unsigned long pte;
-	unsigned long saved_psr;
-	int slot;
-
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-	local_irq_save(saved_psr);
-	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	local_irq_restore(saved_psr);
-	if (slot < 0)
-		return -EINVAL;
-
-	spin_lock(&vp_lock);
-	status = ia64_pal_vp_init_env(kvm_vsa_base ?
-				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
-			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
-	if (status != 0) {
-		spin_unlock(&vp_lock);
-		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-		return -EINVAL;
-	}
-
-	if (!kvm_vsa_base) {
-		kvm_vsa_base = tmp_base;
-		printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
-	}
-	spin_unlock(&vp_lock);
-	ia64_ptr_entry(0x3, slot);
-
-	return 0;
-}
-
-void kvm_arch_hardware_disable(void)
-{
-
-	long status;
-	int slot;
-	unsigned long pte;
-	unsigned long saved_psr;
-	unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
-
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-				PAGE_KERNEL));
-
-	local_irq_save(saved_psr);
-	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	local_irq_restore(saved_psr);
-	if (slot < 0)
-		return;
-
-	status = ia64_pal_vp_exit_env(host_iva);
-	if (status)
-		printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
-				status);
-	ia64_ptr_entry(0x3, slot);
-}
-
-void kvm_arch_check_processor_compat(void *rtn)
-{
-	*(int *)rtn = 0;
-}
-
-int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
-
-	int r;
-
-	switch (ext) {
-	case KVM_CAP_IRQCHIP:
-	case KVM_CAP_MP_STATE:
-	case KVM_CAP_IRQ_INJECT_STATUS:
-	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
-		r = 1;
-		break;
-	case KVM_CAP_COALESCED_MMIO:
-		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
-		break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-	case KVM_CAP_IOMMU:
-		r = iommu_present(&pci_bus_type);
-		break;
-#endif
-	default:
-		r = 0;
-	}
-	return r;
-
-}
-
-static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-	kvm_run->hw.hardware_exit_reason = 1;
-	return 0;
-}
-
-static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct kvm_mmio_req *p;
-	struct kvm_io_device *mmio_dev;
-	int r;
-
-	p = kvm_get_vcpu_ioreq(vcpu);
-
-	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
-		goto mmio;
-	vcpu->mmio_needed = 1;
-	vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
-	vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
-	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
-
-	if (vcpu->mmio_is_write)
-		memcpy(vcpu->arch.mmio_data, &p->data, p->size);
-	memcpy(kvm_run->mmio.data, &p->data, p->size);
-	kvm_run->exit_reason = KVM_EXIT_MMIO;
-	return 0;
-mmio:
-	if (p->dir)
-		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-				    p->size, &p->data);
-	else
-		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-				     p->size, &p->data);
-	if (r)
-		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
-	p->state = STATE_IORESP_READY;
-
-	return 1;
-}
-
-static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_PAL_CALL)
-		return kvm_pal_emul(vcpu, kvm_run);
-	else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = 2;
-		return 0;
-	}
-}
-
-static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		kvm_sal_emul(vcpu);
-		return 1;
-	} else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = 3;
-		return 0;
-	}
-
-}
-
-static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (!test_and_set_bit(vector, &vpd->irr[0])) {
-		vcpu->arch.irq_new_pending = 1;
-		kvm_vcpu_kick(vcpu);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- *  offset: address offset to IPI space.
- *  value:  deliver value.
- */
-static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
-				uint64_t vector)
-{
-	switch (dm) {
-	case SAPIC_FIXED:
-		break;
-	case SAPIC_NMI:
-		vector = 2;
-		break;
-	case SAPIC_EXTINT:
-		vector = 0;
-		break;
-	case SAPIC_INIT:
-	case SAPIC_PMI:
-	default:
-		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-		return;
-	}
-	__apic_accept_irq(vcpu, vector);
-}
-
-static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
-			unsigned long eid)
-{
-	union ia64_lid lid;
-	int i;
-	struct kvm_vcpu *vcpu;
-
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		lid.val = VCPU_LID(vcpu);
-		if (lid.id == id && lid.eid == eid)
-			return vcpu;
-	}
-
-	return NULL;
-}
-
-static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-	struct kvm_vcpu *target_vcpu;
-	struct kvm_pt_regs *regs;
-	union ia64_ipi_a addr = p->u.ipi_data.addr;
-	union ia64_ipi_d data = p->u.ipi_data.data;
-
-	target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
-	if (!target_vcpu)
-		return handle_vm_error(vcpu, kvm_run);
-
-	if (!target_vcpu->arch.launched) {
-		regs = vcpu_regs(target_vcpu);
-
-		regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
-		regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
-
-		target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-		if (waitqueue_active(&target_vcpu->wq))
-			wake_up_interruptible(&target_vcpu->wq);
-	} else {
-		vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
-		if (target_vcpu != vcpu)
-			kvm_vcpu_kick(target_vcpu);
-	}
-
-	return 1;
-}
-
-struct call_data {
-	struct kvm_ptc_g ptc_g_data;
-	struct kvm_vcpu *vcpu;
-};
-
-static void vcpu_global_purge(void *info)
-{
-	struct call_data *p = (struct call_data *)info;
-	struct kvm_vcpu *vcpu = p->vcpu;
-
-	if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-		return;
-
-	set_bit(KVM_REQ_PTC_G, &vcpu->requests);
-	if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
-		vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
-							p->ptc_g_data;
-	} else {
-		clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
-		vcpu->arch.ptc_g_count = 0;
-		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
-	}
-}
-
-static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-	struct kvm *kvm = vcpu->kvm;
-	struct call_data call_data;
-	int i;
-	struct kvm_vcpu *vcpui;
-
-	call_data.ptc_g_data = p->u.ptc_g_data;
-
-	kvm_for_each_vcpu(i, vcpui, kvm) {
-		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
-				vcpu == vcpui)
-			continue;
-
-		if (waitqueue_active(&vcpui->wq))
-			wake_up_interruptible(&vcpui->wq);
-
-		if (vcpui->cpu != -1) {
-			call_data.vcpu = vcpui;
-			smp_call_function_single(vcpui->cpu,
-					vcpu_global_purge, &call_data, 1);
-		} else
-			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
-
-	}
-	return 1;
-}
-
-static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	return 1;
-}
-
-static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
-{
-	unsigned long pte, rtc_phys_addr, map_addr;
-	int slot;
-
-	map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
-	rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
-	pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
-	slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
-	vcpu->arch.sn_rtc_tr_slot = slot;
-	if (slot < 0) {
-		printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
-		slot = 0;
-	}
-	return slot;
-}
-
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-{
-
-	ktime_t kt;
-	long itc_diff;
-	unsigned long vcpu_now_itc;
-	unsigned long expires;
-	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-	unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (irqchip_in_kernel(vcpu->kvm)) {
-
-		vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
-
-		if (time_after(vcpu_now_itc, vpd->itm)) {
-			vcpu->arch.timer_check = 1;
-			return 1;
-		}
-		itc_diff = vpd->itm - vcpu_now_itc;
-		if (itc_diff < 0)
-			itc_diff = -itc_diff;
-
-		expires = div64_u64(itc_diff, cyc_per_usec);
-		kt = ktime_set(0, 1000 * expires);
-
-		vcpu->arch.ht_active = 1;
-		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
-
-		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-		kvm_vcpu_block(vcpu);
-		hrtimer_cancel(p_ht);
-		vcpu->arch.ht_active = 0;
-
-		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
-				kvm_cpu_has_pending_timer(vcpu))
-			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-				vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-			return -EINTR;
-		return 1;
-	} else {
-		printk(KERN_ERR"kvm: Unsupported userspace halt!");
-		return 0;
-	}
-}
-
-static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run)
-{
-	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
-	return 0;
-}
-
-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run)
-{
-	return 1;
-}
-
-static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
-				struct kvm_run *kvm_run)
-{
-	printk("VMM: %s", vcpu->arch.log_buf);
-	return 1;
-}
-
-static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run) = {
-	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
-	[EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
-	[EXIT_REASON_PAL_CALL]              = handle_pal_call,
-	[EXIT_REASON_SAL_CALL]              = handle_sal_call,
-	[EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
-	[EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
-	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
-	[EXIT_REASON_IPI]		    = handle_ipi,
-	[EXIT_REASON_PTC_G]		    = handle_global_purge,
-	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
-
-};
-
-static const int kvm_vti_max_exit_handlers =
-		sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
-
-static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p_exit_data;
-
-	p_exit_data = kvm_get_exit_data(vcpu);
-	return p_exit_data->exit_reason;
-}
-
-/*
- * The guest has exited.  See if we can fix it or if we need userspace
- * assistance.
- */
-static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
-{
-	u32 exit_reason = kvm_get_exit_reason(vcpu);
-	vcpu->arch.last_exit = exit_reason;
-
-	if (exit_reason < kvm_vti_max_exit_handlers
-			&& kvm_vti_exit_handlers[exit_reason])
-		return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
-	else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = exit_reason;
-	}
-	return 0;
-}
-
-static inline void vti_set_rr6(unsigned long rr6)
-{
-	ia64_set_rr(RR6, rr6);
-	ia64_srlz_i();
-}
-
-static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-	unsigned long pte;
-	struct kvm *kvm = vcpu->kvm;
-	int r;
-
-	/*Insert a pair of tr to map vmm*/
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-	r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	if (r < 0)
-		goto out;
-	vcpu->arch.vmm_tr_slot = r;
-	/*Insert a pairt of tr to map data of vm*/
-	pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
-	r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
-					pte, KVM_VM_DATA_SHIFT);
-	if (r < 0)
-		goto out;
-	vcpu->arch.vm_tr_slot = r;
-
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (kvm->arch.is_sn2) {
-		r = kvm_sn2_setup_mappings(vcpu);
-		if (r < 0)
-			goto out;
-	}
-#endif
-
-	r = 0;
-out:
-	return r;
-}
-
-static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
-	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (kvm->arch.is_sn2)
-		ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
-#endif
-}
-
-static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-	int r;
-	int cpu = smp_processor_id();
-
-	if (vcpu->arch.last_run_cpu != cpu ||
-			per_cpu(last_vcpu, cpu) != vcpu) {
-		per_cpu(last_vcpu, cpu) = vcpu;
-		vcpu->arch.last_run_cpu = cpu;
-		kvm_flush_tlb_all();
-	}
-
-	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
-	vti_set_rr6(vcpu->arch.vmm_rr);
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	return r;
-}
-
-static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
-{
-	kvm_purge_vmm_mapping(vcpu);
-	vti_set_rr6(vcpu->arch.host_rr6);
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	union context *host_ctx, *guest_ctx;
-	int r, idx;
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-again:
-	if (signal_pending(current)) {
-		r = -EINTR;
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		goto out;
-	}
-
-	preempt_disable();
-	local_irq_disable();
-
-	/*Get host and guest context with guest address space.*/
-	host_ctx = kvm_get_host_context(vcpu);
-	guest_ctx = kvm_get_guest_context(vcpu);
-
-	clear_bit(KVM_REQ_KICK, &vcpu->requests);
-
-	r = kvm_vcpu_pre_transition(vcpu);
-	if (r < 0)
-		goto vcpu_run_fail;
-
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	vcpu->mode = IN_GUEST_MODE;
-	kvm_guest_enter();
-
-	/*
-	 * Transition to the guest
-	 */
-	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-
-	kvm_vcpu_post_transition(vcpu);
-
-	vcpu->arch.launched = 1;
-	set_bit(KVM_REQ_KICK, &vcpu->requests);
-	local_irq_enable();
-
-	/*
-	 * We must have an instruction between local_irq_enable() and
-	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
-	 * the interrupt shadow.  The stat.exits increment will do nicely.
-	 * But we need to prevent reordering, hence this barrier():
-	 */
-	barrier();
-	kvm_guest_exit();
-	vcpu->mode = OUTSIDE_GUEST_MODE;
-	preempt_enable();
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-	r = kvm_handle_exit(kvm_run, vcpu);
-
-	if (r > 0) {
-		if (!need_resched())
-			goto again;
-	}
-
-out:
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	if (r > 0) {
-		cond_resched();
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		goto again;
-	}
-
-	return r;
-
-vcpu_run_fail:
-	local_irq_enable();
-	preempt_enable();
-	kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-	goto out;
-}
-
-static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
-
-	if (!vcpu->mmio_is_write)
-		memcpy(&p->data, vcpu->arch.mmio_data, 8);
-	p->state = STATE_IORESP_READY;
-}
-
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	int r;
-	sigset_t sigsaved;
-
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
-	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
-		kvm_vcpu_block(vcpu);
-		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
-		r = -EAGAIN;
-		goto out;
-	}
-
-	if (vcpu->mmio_needed) {
-		memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
-		kvm_set_mmio_data(vcpu);
-		vcpu->mmio_read_completed = 1;
-		vcpu->mmio_needed = 0;
-	}
-	r = __vcpu_run(vcpu, kvm_run);
-out:
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return r;
-}
-
-struct kvm *kvm_arch_alloc_vm(void)
-{
-
-	struct kvm *kvm;
-	uint64_t  vm_base;
-
-	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
-
-	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
-
-	if (!vm_base)
-		return NULL;
-
-	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-	kvm = (struct kvm *)(vm_base +
-			offsetof(struct kvm_vm_data, kvm_vm_struct));
-	kvm->arch.vm_base = vm_base;
-	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
-
-	return kvm;
-}
-
-struct kvm_ia64_io_range {
-	unsigned long start;
-	unsigned long size;
-	unsigned long type;
-};
-
-static const struct kvm_ia64_io_range io_ranges[] = {
-	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
-	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
-	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
-	{IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
-	{PIB_START, PIB_SIZE, GPFN_PIB},
-};
-
-static void kvm_build_io_pmt(struct kvm *kvm)
-{
-	unsigned long i, j;
-
-	/* Mark I/O ranges */
-	for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
-							i++) {
-		for (j = io_ranges[i].start;
-				j < io_ranges[i].start + io_ranges[i].size;
-				j += PAGE_SIZE)
-			kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
-					io_ranges[i].type, 0);
-	}
-
-}
-
-/*Use unused rids to virtualize guest rid.*/
-#define GUEST_PHYSICAL_RR0	0x1739
-#define GUEST_PHYSICAL_RR4	0x2739
-#define VMM_INIT_RR		0x1660
-
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-{
-	BUG_ON(!kvm);
-
-	if (type)
-		return -EINVAL;
-
-	kvm->arch.is_sn2 = ia64_platform_is("sn2");
-
-	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
-	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
-	kvm->arch.vmm_init_rr = VMM_INIT_RR;
-
-	/*
-	 *Fill P2M entries for MMIO/IO ranges
-	 */
-	kvm_build_io_pmt(kvm);
-
-	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
-
-	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
-
-	return 0;
-}
-
-static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
-					struct kvm_irqchip *chip)
-{
-	int r;
-
-	r = 0;
-	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
-		break;
-	default:
-		r = -EINVAL;
-		break;
-	}
-	return r;
-}
-
-static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
-{
-	int r;
-
-	r = 0;
-	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
-		break;
-	default:
-		r = -EINVAL;
-		break;
-	}
-	return r;
-}
-
-#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		vpd->vgr[i] = regs->vpd.vgr[i];
-		vpd->vbgr[i] = regs->vpd.vbgr[i];
-	}
-	for (i = 0; i < 128; i++)
-		vpd->vcr[i] = regs->vpd.vcr[i];
-	vpd->vhpi = regs->vpd.vhpi;
-	vpd->vnat = regs->vpd.vnat;
-	vpd->vbnat = regs->vpd.vbnat;
-	vpd->vpsr = regs->vpd.vpsr;
-
-	vpd->vpr = regs->vpd.vpr;
-
-	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
-
-	RESTORE_REGS(mp_state);
-	RESTORE_REGS(vmm_rr);
-	memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
-	memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
-	RESTORE_REGS(itr_regions);
-	RESTORE_REGS(dtr_regions);
-	RESTORE_REGS(tc_regions);
-	RESTORE_REGS(irq_check);
-	RESTORE_REGS(itc_check);
-	RESTORE_REGS(timer_check);
-	RESTORE_REGS(timer_pending);
-	RESTORE_REGS(last_itc);
-	for (i = 0; i < 8; i++) {
-		vcpu->arch.vrr[i] = regs->vrr[i];
-		vcpu->arch.ibr[i] = regs->ibr[i];
-		vcpu->arch.dbr[i] = regs->dbr[i];
-	}
-	for (i = 0; i < 4; i++)
-		vcpu->arch.insvc[i] = regs->insvc[i];
-	RESTORE_REGS(xtp);
-	RESTORE_REGS(metaphysical_rr0);
-	RESTORE_REGS(metaphysical_rr4);
-	RESTORE_REGS(metaphysical_saved_rr0);
-	RESTORE_REGS(metaphysical_saved_rr4);
-	RESTORE_REGS(fp_psr);
-	RESTORE_REGS(saved_gp);
-
-	vcpu->arch.irq_new_pending = 1;
-	vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
-	set_bit(KVM_REQ_RESUME, &vcpu->requests);
-
-	return 0;
-}
-
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
-		bool line_status)
-{
-	if (!irqchip_in_kernel(kvm))
-		return -ENXIO;
-
-	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-					irq_event->irq, irq_event->level,
-					line_status);
-	return 0;
-}
-
-long kvm_arch_vm_ioctl(struct file *filp,
-		unsigned int ioctl, unsigned long arg)
-{
-	struct kvm *kvm = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	int r = -ENOTTY;
-
-	switch (ioctl) {
-	case KVM_CREATE_IRQCHIP:
-		r = -EFAULT;
-		r = kvm_ioapic_init(kvm);
-		if (r)
-			goto out;
-		r = kvm_setup_default_irq_routing(kvm);
-		if (r) {
-			mutex_lock(&kvm->slots_lock);
-			kvm_ioapic_destroy(kvm);
-			mutex_unlock(&kvm->slots_lock);
-			goto out;
-		}
-		break;
-	case KVM_GET_IRQCHIP: {
-		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip chip;
-
-		r = -EFAULT;
-		if (copy_from_user(&chip, argp, sizeof chip))
-				goto out;
-		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
-			goto out;
-		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
-		if (r)
-			goto out;
-		r = -EFAULT;
-		if (copy_to_user(argp, &chip, sizeof chip))
-				goto out;
-		r = 0;
-		break;
-		}
-	case KVM_SET_IRQCHIP: {
-		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip chip;
-
-		r = -EFAULT;
-		if (copy_from_user(&chip, argp, sizeof chip))
-				goto out;
-		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
-			goto out;
-		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
-		if (r)
-			goto out;
-		r = 0;
-		break;
-		}
-	default:
-		;
-	}
-out:
-	return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-		struct kvm_sregs *sregs)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-		struct kvm_sregs *sregs)
-{
-	return -EINVAL;
-
-}
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-		struct kvm_translation *tr)
-{
-
-	return -EINVAL;
-}
-
-static int kvm_alloc_vmm_area(void)
-{
-	if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
-		kvm_vmm_base = __get_free_pages(GFP_KERNEL,
-				get_order(KVM_VMM_SIZE));
-		if (!kvm_vmm_base)
-			return -ENOMEM;
-
-		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-		kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
-
-		printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
-				kvm_vmm_base, kvm_vm_buffer);
-	}
-
-	return 0;
-}
-
-static void kvm_free_vmm_area(void)
-{
-	if (kvm_vmm_base) {
-		/*Zero this area before free to avoid bits leak!!*/
-		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-		free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
-		kvm_vmm_base  = 0;
-		kvm_vm_buffer = 0;
-		kvm_vsa_base = 0;
-	}
-}
-
-static int vti_init_vpd(struct kvm_vcpu *vcpu)
-{
-	int i;
-	union cpuid3_t cpuid3;
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (IS_ERR(vpd))
-		return PTR_ERR(vpd);
-
-	/* CPUID init */
-	for (i = 0; i < 5; i++)
-		vpd->vcpuid[i] = ia64_get_cpuid(i);
-
-	/* Limit the CPUID number to 5 */
-	cpuid3.value = vpd->vcpuid[3];
-	cpuid3.number = 4;	/* 5 - 1 */
-	vpd->vcpuid[3] = cpuid3.value;
-
-	/*Set vac and vdc fields*/
-	vpd->vac.a_from_int_cr = 1;
-	vpd->vac.a_to_int_cr = 1;
-	vpd->vac.a_from_psr = 1;
-	vpd->vac.a_from_cpuid = 1;
-	vpd->vac.a_cover = 1;
-	vpd->vac.a_bsw = 1;
-	vpd->vac.a_int = 1;
-	vpd->vdc.d_vmsw = 1;
-
-	/*Set virtual buffer*/
-	vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
-
-	return 0;
-}
-
-static int vti_create_vp(struct kvm_vcpu *vcpu)
-{
-	long ret;
-	struct vpd *vpd = vcpu->arch.vpd;
-	unsigned long  vmm_ivt;
-
-	vmm_ivt = kvm_vmm_info->vmm_ivt;
-
-	printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
-
-	ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
-
-	if (ret) {
-		printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void init_ptce_info(struct kvm_vcpu *vcpu)
-{
-	ia64_ptce_info_t ptce = {0};
-
-	ia64_get_ptce(&ptce);
-	vcpu->arch.ptce_base = ptce.base;
-	vcpu->arch.ptce_count[0] = ptce.count[0];
-	vcpu->arch.ptce_count[1] = ptce.count[1];
-	vcpu->arch.ptce_stride[0] = ptce.stride[0];
-	vcpu->arch.ptce_stride[1] = ptce.stride[1];
-}
-
-static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
-{
-	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-
-	if (hrtimer_cancel(p_ht))
-		hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
-}
-
-static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
-{
-	struct kvm_vcpu *vcpu;
-	wait_queue_head_t *q;
-
-	vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
-	q = &vcpu->wq;
-
-	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
-		goto out;
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-out:
-	vcpu->arch.timer_fired = 1;
-	vcpu->arch.timer_check = 1;
-	return HRTIMER_NORESTART;
-}
-
-#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
-
-bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
-{
-	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct kvm_vcpu *v;
-	int r;
-	int i;
-	long itc_offset;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	union context *p_ctx = &vcpu->arch.guest;
-	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
-
-	/*Init vcpu context for first run.*/
-	if (IS_ERR(vmm_vcpu))
-		return PTR_ERR(vmm_vcpu);
-
-	if (kvm_vcpu_is_bsp(vcpu)) {
-		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-		/*Set entry address for first run.*/
-		regs->cr_iip = PALE_RESET_ENTRY;
-
-		/*Initialize itc offset for vcpus*/
-		itc_offset = 0UL - kvm_get_itc(vcpu);
-		for (i = 0; i < KVM_MAX_VCPUS; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
-			v->arch.itc_offset = itc_offset;
-			v->arch.last_itc = 0;
-		}
-	} else
-		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
-
-	r = -ENOMEM;
-	vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
-	if (!vcpu->arch.apic)
-		goto out;
-	vcpu->arch.apic->vcpu = vcpu;
-
-	p_ctx->gr[1] = 0;
-	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
-	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
-	p_ctx->psr = 0x1008522000UL;
-	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
-	p_ctx->caller_unat = 0;
-	p_ctx->pr = 0x0;
-	p_ctx->ar[36] = 0x0; /*unat*/
-	p_ctx->ar[19] = 0x0; /*rnat*/
-	p_ctx->ar[18] = (unsigned long)vmm_vcpu +
-				((sizeof(struct kvm_vcpu)+15) & ~15);
-	p_ctx->ar[64] = 0x0; /*pfs*/
-	p_ctx->cr[0] = 0x7e04UL;
-	p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
-	p_ctx->cr[8] = 0x3c;
-
-	/*Initialize region register*/
-	p_ctx->rr[0] = 0x30;
-	p_ctx->rr[1] = 0x30;
-	p_ctx->rr[2] = 0x30;
-	p_ctx->rr[3] = 0x30;
-	p_ctx->rr[4] = 0x30;
-	p_ctx->rr[5] = 0x30;
-	p_ctx->rr[7] = 0x30;
-
-	/*Initialize branch register 0*/
-	p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
-
-	vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
-	vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
-	vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
-
-	hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	vcpu->arch.hlt_timer.function = hlt_timer_fn;
-
-	vcpu->arch.last_run_cpu = -1;
-	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
-	vcpu->arch.vsa_base = kvm_vsa_base;
-	vcpu->arch.__gp = kvm_vmm_gp;
-	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
-	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
-	init_ptce_info(vcpu);
-
-	r = 0;
-out:
-	return r;
-}
-
-static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
-{
-	unsigned long psr;
-	int r;
-
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	if (r)
-		goto fail;
-	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
-	if (r)
-		goto fail;
-
-	r = vti_init_vpd(vcpu);
-	if (r) {
-		printk(KERN_DEBUG"kvm: vpd init error!!\n");
-		goto uninit;
-	}
-
-	r = vti_create_vp(vcpu);
-	if (r)
-		goto uninit;
-
-	kvm_purge_vmm_mapping(vcpu);
-
-	return 0;
-uninit:
-	kvm_vcpu_uninit(vcpu);
-fail:
-	return r;
-}
-
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-		unsigned int id)
-{
-	struct kvm_vcpu *vcpu;
-	unsigned long vm_base = kvm->arch.vm_base;
-	int r;
-	int cpu;
-
-	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
-
-	r = -EINVAL;
-	if (id >= KVM_MAX_VCPUS) {
-		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
-				KVM_MAX_VCPUS);
-		goto fail;
-	}
-
-	r = -ENOMEM;
-	if (!vm_base) {
-		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
-		goto fail;
-	}
-	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
-					vcpu_data[id].vcpu_struct));
-	vcpu->kvm = kvm;
-
-	cpu = get_cpu();
-	r = vti_vcpu_setup(vcpu, id);
-	put_cpu();
-
-	if (r) {
-		printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
-		goto fail;
-	}
-
-	return vcpu;
-fail:
-	return ERR_PTR(r);
-}
-
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-					struct kvm_guest_debug *dbg)
-{
-	return -EINVAL;
-}
-
-void kvm_arch_free_vm(struct kvm *kvm)
-{
-	unsigned long vm_base = kvm->arch.vm_base;
-
-	if (vm_base) {
-		memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-		free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
-	}
-
-}
-
-static void kvm_release_vm_pages(struct kvm *kvm)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int j;
-
-	slots = kvm_memslots(kvm);
-	kvm_for_each_memslot(memslot, slots) {
-		for (j = 0; j < memslot->npages; j++) {
-			if (memslot->rmap[j])
-				put_page((struct page *)memslot->rmap[j]);
-		}
-	}
-}
-
-void kvm_arch_destroy_vm(struct kvm *kvm)
-{
-	kvm_iommu_unmap_guest(kvm);
-	kvm_free_all_assigned_devices(kvm);
-	kfree(kvm->arch.vioapic);
-	kvm_release_vm_pages(kvm);
-}
-
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-	if (cpu != vcpu->cpu) {
-		vcpu->cpu = cpu;
-		if (vcpu->arch.ht_active)
-			kvm_migrate_hlt_timer(vcpu);
-	}
-}
-
-#define SAVE_REGS(_x) 	regs->_x = vcpu->arch._x
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int i;
-
-	vcpu_load(vcpu);
-
-	for (i = 0; i < 16; i++) {
-		regs->vpd.vgr[i] = vpd->vgr[i];
-		regs->vpd.vbgr[i] = vpd->vbgr[i];
-	}
-	for (i = 0; i < 128; i++)
-		regs->vpd.vcr[i] = vpd->vcr[i];
-	regs->vpd.vhpi = vpd->vhpi;
-	regs->vpd.vnat = vpd->vnat;
-	regs->vpd.vbnat = vpd->vbnat;
-	regs->vpd.vpsr = vpd->vpsr;
-	regs->vpd.vpr = vpd->vpr;
-
-	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
-
-	SAVE_REGS(mp_state);
-	SAVE_REGS(vmm_rr);
-	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
-	memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
-	SAVE_REGS(itr_regions);
-	SAVE_REGS(dtr_regions);
-	SAVE_REGS(tc_regions);
-	SAVE_REGS(irq_check);
-	SAVE_REGS(itc_check);
-	SAVE_REGS(timer_check);
-	SAVE_REGS(timer_pending);
-	SAVE_REGS(last_itc);
-	for (i = 0; i < 8; i++) {
-		regs->vrr[i] = vcpu->arch.vrr[i];
-		regs->ibr[i] = vcpu->arch.ibr[i];
-		regs->dbr[i] = vcpu->arch.dbr[i];
-	}
-	for (i = 0; i < 4; i++)
-		regs->insvc[i] = vcpu->arch.insvc[i];
-	regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
-	SAVE_REGS(xtp);
-	SAVE_REGS(metaphysical_rr0);
-	SAVE_REGS(metaphysical_rr4);
-	SAVE_REGS(metaphysical_saved_rr0);
-	SAVE_REGS(metaphysical_saved_rr4);
-	SAVE_REGS(fp_psr);
-	SAVE_REGS(saved_gp);
-
-	vcpu_put(vcpu);
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
-				  struct kvm_ia64_vcpu_stack *stack)
-{
-	memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
-				  struct kvm_ia64_vcpu_stack *stack)
-{
-	memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
-	       sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
-
-	vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
-	return 0;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-
-	hrtimer_cancel(&vcpu->arch.hlt_timer);
-	kfree(vcpu->arch.apic);
-}
-
-long kvm_arch_vcpu_ioctl(struct file *filp,
-			 unsigned int ioctl, unsigned long arg)
-{
-	struct kvm_vcpu *vcpu = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	struct kvm_ia64_vcpu_stack *stack = NULL;
-	long r;
-
-	switch (ioctl) {
-	case KVM_IA64_VCPU_GET_STACK: {
-		struct kvm_ia64_vcpu_stack __user *user_stack;
-	        void __user *first_p = argp;
-
-		r = -EFAULT;
-		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-			goto out;
-
-		if (!access_ok(VERIFY_WRITE, user_stack,
-			       sizeof(struct kvm_ia64_vcpu_stack))) {
-			printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
-			       "Illegal user destination address for stack\n");
-			goto out;
-		}
-		stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-		if (!stack) {
-			r = -ENOMEM;
-			goto out;
-		}
-
-		r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
-		if (r)
-			goto out;
-
-		if (copy_to_user(user_stack, stack,
-				 sizeof(struct kvm_ia64_vcpu_stack))) {
-			r = -EFAULT;
-			goto out;
-		}
-
-		break;
-	}
-	case KVM_IA64_VCPU_SET_STACK: {
-		struct kvm_ia64_vcpu_stack __user *user_stack;
-	        void __user *first_p = argp;
-
-		r = -EFAULT;
-		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-			goto out;
-
-		if (!access_ok(VERIFY_READ, user_stack,
-			    sizeof(struct kvm_ia64_vcpu_stack))) {
-			printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
-			       "Illegal user address for stack\n");
-			goto out;
-		}
-		stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-		if (!stack) {
-			r = -ENOMEM;
-			goto out;
-		}
-		if (copy_from_user(stack, user_stack,
-				   sizeof(struct kvm_ia64_vcpu_stack)))
-			goto out;
-
-		r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
-		break;
-	}
-
-	default:
-		r = -EINVAL;
-	}
-
-out:
-	kfree(stack);
-	return r;
-}
-
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
-{
-	return VM_FAULT_SIGBUS;
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-		struct kvm_memory_slot *memslot,
-		struct kvm_userspace_memory_region *mem,
-		enum kvm_mr_change change)
-{
-	unsigned long i;
-	unsigned long pfn;
-	int npages = memslot->npages;
-	unsigned long base_gfn = memslot->base_gfn;
-
-	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
-		return -ENOMEM;
-
-	for (i = 0; i < npages; i++) {
-		pfn = gfn_to_pfn(kvm, base_gfn + i);
-		if (!kvm_is_reserved_pfn(pfn)) {
-			kvm_set_pmt_entry(kvm, base_gfn + i,
-					pfn << PAGE_SHIFT,
-				_PAGE_AR_RWX | _PAGE_MA_WB);
-			memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
-		} else {
-			kvm_set_pmt_entry(kvm, base_gfn + i,
-					GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
-					_PAGE_MA_UC);
-			memslot->rmap[i] = 0;
-			}
-	}
-
-	return 0;
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-	kvm_flush_remote_tlbs(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-	kvm_arch_flush_shadow_all();
-}
-
-long kvm_arch_dev_ioctl(struct file *filp,
-			unsigned int ioctl, unsigned long arg)
-{
-	return -EINVAL;
-}
-
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-	kvm_vcpu_uninit(vcpu);
-}
-
-static int vti_cpu_has_kvm_support(void)
-{
-	long  avail = 1, status = 1, control = 1;
-	long ret;
-
-	ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
-	if (ret)
-		goto out;
-
-	if (!(avail & PAL_PROC_VM_BIT))
-		goto out;
-
-	printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
-
-	ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
-	if (ret)
-		goto out;
-	printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
-
-	if (!(vp_env_info & VP_OPCODE)) {
-		printk(KERN_WARNING"kvm: No opcode ability on hardware, "
-				"vm_env_info:0x%lx\n", vp_env_info);
-	}
-
-	return 1;
-out:
-	return 0;
-}
-
-
-/*
- * On SN2, the ITC isn't stable, so copy in fast path code to use the
- * SN2 RTC, replacing the ITC based default verion.
- */
-static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
-			  struct module *module)
-{
-	unsigned long new_ar, new_ar_sn2;
-	unsigned long module_base;
-
-	if (!ia64_platform_is("sn2"))
-		return;
-
-	module_base = (unsigned long)module->module_core;
-
-	new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
-	new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
-
-	printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
-	       "as source\n");
-
-	/*
-	 * Copy the SN2 version of mov_ar into place. They are both
-	 * the same size, so 6 bundles is sufficient (6 * 0x10).
-	 */
-	memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
-}
-
-static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-			    struct module *module)
-{
-	unsigned long module_base;
-	unsigned long vmm_size;
-
-	unsigned long vmm_offset, func_offset, fdesc_offset;
-	struct fdesc *p_fdesc;
-
-	BUG_ON(!module);
-
-	if (!kvm_vmm_base) {
-		printk("kvm: kvm area hasn't been initialized yet!!\n");
-		return -EFAULT;
-	}
-
-	/*Calculate new position of relocated vmm module.*/
-	module_base = (unsigned long)module->module_core;
-	vmm_size = module->core_size;
-	if (unlikely(vmm_size > KVM_VMM_SIZE))
-		return -EFAULT;
-
-	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
-	kvm_patch_vmm(vmm_info, module);
-	kvm_flush_icache(kvm_vmm_base, vmm_size);
-
-	/*Recalculate kvm_vmm_info based on new VMM*/
-	vmm_offset = vmm_info->vmm_ivt - module_base;
-	kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
-	printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
-			kvm_vmm_info->vmm_ivt);
-
-	fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
-	kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
-							fdesc_offset);
-	func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
-	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-	p_fdesc->ip = KVM_VMM_BASE + func_offset;
-	p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
-
-	printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
-			KVM_VMM_BASE+func_offset);
-
-	fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
-	kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
-			fdesc_offset);
-	func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
-	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-	p_fdesc->ip = KVM_VMM_BASE + func_offset;
-	p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
-
-	kvm_vmm_gp = p_fdesc->gp;
-
-	printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
-						kvm_vmm_info->vmm_entry);
-	printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
-						KVM_VMM_BASE + func_offset);
-
-	return 0;
-}
-
-int kvm_arch_init(void *opaque)
-{
-	int r;
-	struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
-
-	if (!vti_cpu_has_kvm_support()) {
-		printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
-		r = -EOPNOTSUPP;
-		goto out;
-	}
-
-	if (kvm_vmm_info) {
-		printk(KERN_ERR "kvm: Already loaded VMM module!\n");
-		r = -EEXIST;
-		goto out;
-	}
-
-	r = -ENOMEM;
-	kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
-	if (!kvm_vmm_info)
-		goto out;
-
-	if (kvm_alloc_vmm_area())
-		goto out_free0;
-
-	r = kvm_relocate_vmm(vmm_info, vmm_info->module);
-	if (r)
-		goto out_free1;
-
-	return 0;
-
-out_free1:
-	kvm_free_vmm_area();
-out_free0:
-	kfree(kvm_vmm_info);
-out:
-	return r;
-}
-
-void kvm_arch_exit(void)
-{
-	kvm_free_vmm_area();
-	kfree(kvm_vmm_info);
-	kvm_vmm_info = NULL;
-}
-
-static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
-				    struct kvm_memory_slot *memslot)
-{
-	int i;
-	long base;
-	unsigned long n;
-	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
-			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-	base = memslot->base_gfn / BITS_PER_LONG;
-
-	spin_lock(&kvm->arch.dirty_log_lock);
-	for (i = 0; i < n/sizeof(long); ++i) {
-		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
-		dirty_bitmap[base + i] = 0;
-	}
-	spin_unlock(&kvm->arch.dirty_log_lock);
-}
-
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-		struct kvm_dirty_log *log)
-{
-	int r;
-	unsigned long n;
-	struct kvm_memory_slot *memslot;
-	int is_dirty = 0;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-	r = -ENOENT;
-	if (!memslot->dirty_bitmap)
-		goto out;
-
-	kvm_ia64_sync_dirty_log(kvm, memslot);
-	r = kvm_get_dirty_log(kvm, log, &is_dirty);
-	if (r)
-		goto out;
-
-	/* If nothing is dirty, don't bother messing with page tables. */
-	if (is_dirty) {
-		kvm_flush_remote_tlbs(kvm);
-		n = kvm_dirty_bitmap_bytes(memslot);
-		memset(memslot->dirty_bitmap, 0, n);
-	}
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
-int kvm_arch_hardware_setup(void)
-{
-	return 0;
-}
-
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
-{
-	return __apic_accept_irq(vcpu, irq->vector);
-}
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
-{
-	return apic->vcpu->vcpu_id == dest;
-}
-
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
-{
-	return 0;
-}
-
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
-{
-	return vcpu1->arch.xtp - vcpu2->arch.xtp;
-}
-
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode)
-{
-	struct kvm_lapic *target = vcpu->arch.apic;
-	return (dest_mode == 0) ?
-		kvm_apic_match_physical_addr(target, dest) :
-		kvm_apic_match_logical_addr(target, dest);
-}
-
-static int find_highest_bits(int *dat)
-{
-	u32  bits, bitnum;
-	int i;
-
-	/* loop for all 256 bits */
-	for (i = 7; i >= 0 ; i--) {
-		bits = dat[i];
-		if (bits) {
-			bitnum = fls(bits);
-			return i * 32 + bitnum - 1;
-		}
-	}
-
-	return -1;
-}
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-    if (vpd->irr[0] & (1UL << NMI_VECTOR))
-		return NMI_VECTOR;
-    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-    return find_highest_bits((int *)&vpd->irr[0]);
-}
-
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.timer_fired;
-}
-
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-{
-	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
-		(kvm_highest_pending_irq(vcpu) != -1);
-}
-
-int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
-{
-	return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
-}
-
-int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	mp_state->mp_state = vcpu->arch.mp_state;
-	return 0;
-}
-
-static int vcpu_reset(struct kvm_vcpu *vcpu)
-{
-	int r;
-	long psr;
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	if (r)
-		goto fail;
-
-	vcpu->arch.launched = 0;
-	kvm_arch_vcpu_uninit(vcpu);
-	r = kvm_arch_vcpu_init(vcpu);
-	if (r)
-		goto fail;
-
-	kvm_purge_vmm_mapping(vcpu);
-	r = 0;
-fail:
-	return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	int r = 0;
-
-	vcpu->arch.mp_state = mp_state->mp_state;
-	if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
-		r = vcpu_reset(vcpu);
-	return r;
-}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
deleted file mode 100644
index cb548ee..0000000
--- a/arch/ia64/kvm/kvm_fw.c
+++ /dev/null
@@ -1,674 +0,0 @@
-/*
- * PAL/SAL call delegation
- *
- * Copyright (c) 2004 Li Susie <susie.li@intel.com>
- * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
- * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/smp.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "vti.h"
-#include "misc.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/tlb.h>
-
-/*
- * Handy macros to make sure that the PAL return values start out
- * as something meaningful.
- */
-#define INIT_PAL_STATUS_UNIMPLEMENTED(x)		\
-	{						\
-		x.status = PAL_STATUS_UNIMPLEMENTED;	\
-		x.v0 = 0;				\
-		x.v1 = 0;				\
-		x.v2 = 0;				\
-	}
-
-#define INIT_PAL_STATUS_SUCCESS(x)			\
-	{						\
-		x.status = PAL_STATUS_SUCCESS;		\
-		x.v0 = 0;				\
-		x.v1 = 0;				\
-		x.v2 = 0;				\
-    }
-
-static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
-		u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
-	struct exit_ctl_data *p;
-
-	if (vcpu) {
-		p = &vcpu->arch.exit_data;
-		if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-			*gr28 = p->u.pal_data.gr28;
-			*gr29 = p->u.pal_data.gr29;
-			*gr30 = p->u.pal_data.gr30;
-			*gr31 = p->u.pal_data.gr31;
-			return ;
-		}
-	}
-	printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
-}
-
-static void set_pal_result(struct kvm_vcpu *vcpu,
-		struct ia64_pal_retval result) {
-
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-		p->u.pal_data.ret = result;
-		return ;
-	}
-	INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
-}
-
-static void set_sal_result(struct kvm_vcpu *vcpu,
-		struct sal_ret_values result) {
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		p->u.sal_data.ret = result;
-		return ;
-	}
-	printk(KERN_WARNING"Failed to set sal result!!\n");
-}
-
-struct cache_flush_args {
-	u64 cache_type;
-	u64 operation;
-	u64 progress;
-	long status;
-};
-
-cpumask_t cpu_cache_coherent_map;
-
-static void remote_pal_cache_flush(void *data)
-{
-	struct cache_flush_args *args = data;
-	long status;
-	u64 progress = args->progress;
-
-	status = ia64_pal_cache_flush(args->cache_type, args->operation,
-					&progress, NULL);
-	if (status != 0)
-	args->status = status;
-}
-
-static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
-{
-	u64 gr28, gr29, gr30, gr31;
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	struct cache_flush_args args = {0, 0, 0, 0};
-	long psr;
-
-	gr28 = gr29 = gr30 = gr31 = 0;
-	kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
-
-	if (gr31 != 0)
-		printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
-
-	/* Always call Host Pal in int=1 */
-	gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
-	args.cache_type = gr29;
-	args.operation = gr30;
-	smp_call_function(remote_pal_cache_flush,
-				(void *)&args, 1);
-	if (args.status != 0)
-		printk(KERN_ERR"pal_cache_flush error!,"
-				"status:0x%lx\n", args.status);
-	/*
-	 * Call Host PAL cache flush
-	 * Clear psr.ic when call PAL_CACHE_FLUSH
-	 */
-	local_irq_save(psr);
-	result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
-						&result.v0);
-	local_irq_restore(psr);
-	if (result.status != 0)
-		printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
-				"in1:%lx,in2:%lx\n",
-				vcpu, result.status, gr29, gr30);
-
-#if 0
-	if (gr29 == PAL_CACHE_TYPE_COHERENT) {
-		cpus_setall(vcpu->arch.cache_coherent_map);
-		cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
-		cpus_setall(cpu_cache_coherent_map);
-		cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
-	}
-#endif
-	return result;
-}
-
-struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
-	return result;
-}
-
-static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
-
-	/*
-	 * PAL_FREQ_BASE may not be implemented in some platforms,
-	 * call SAL instead.
-	 */
-	if (result.v0 == 0) {
-		result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-							&result.v0,
-							&result.v1);
-		result.v2 = 0;
-	}
-
-	return result;
-}
-
-/*
- * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
- * RTC is used instead. This function patches the ratios from SAL
- * to match the RTC before providing them to the guest.
- */
-static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
-{
-	struct pal_freq_ratio *ratio;
-	unsigned long sal_freq, sal_drift, factor;
-
-	result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-					    &sal_freq, &sal_drift);
-	ratio = (struct pal_freq_ratio *)&result->v2;
-	factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
-		sn_rtc_cycles_per_second;
-
-	ratio->num = 3;
-	ratio->den = factor;
-}
-
-static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
-
-	if (vcpu->kvm->arch.is_sn2)
-		sn2_patch_itc_freq_ratios(&result);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-
-	INIT_PAL_STATUS_UNIMPLEMENTED(result);
-	return result;
-}
-
-static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	INIT_PAL_STATUS_SUCCESS(result);
-	return result;
-}
-
-static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
-			&result.v2, in2);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
-{
-
-	pal_cache_config_info_t ci;
-	long status;
-	unsigned long in0, in1, in2, in3, r9, r10;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	status = ia64_pal_cache_config_info(in1, in2, &ci);
-	r9 = ci.pcci_info_1.pcci1_data;
-	r10 = ci.pcci_info_2.pcci2_data;
-	return ((struct ia64_pal_retval){status, r9, r10, 0});
-}
-
-#define GUEST_IMPL_VA_MSB	59
-#define GUEST_RID_BITS		18
-
-static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
-{
-
-	pal_vm_info_1_u_t vminfo1;
-	pal_vm_info_2_u_t vminfo2;
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
-	if (!result.status) {
-		vminfo1.pvi1_val = result.v0;
-		vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
-		vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
-		result.v0 = vminfo1.pvi1_val;
-		vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
-		vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
-		result.v1 = vminfo2.pvi2_val;
-	}
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-	unsigned long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-	result.status = ia64_pal_vm_info(in1, in2,
-			(pal_tc_info_u_t *)&result.v1, &result.v2);
-
-	return result;
-}
-
-static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
-{
-	u64 index = 0;
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_PAL_CALL)
-		index = p->u.pal_data.gr28;
-
-	return index;
-}
-
-static void prepare_for_halt(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.timer_pending = 1;
-	vcpu->arch.timer_fired = 0;
-}
-
-static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
-{
-	long status;
-	unsigned long in0, in1, in2, in3, r9;
-	unsigned long pm_buffer[16];
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	status = ia64_pal_perf_mon_info(pm_buffer,
-				(pal_perf_mon_info_u_t *) &r9);
-	if (status != 0) {
-		printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
-	} else {
-		if (in1)
-			memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
-		else {
-			status = PAL_STATUS_EINVAL;
-			printk(KERN_WARNING"Invalid parameters "
-						"for PAL call:0x%lx!\n", in0);
-		}
-	}
-	return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
-{
-	unsigned long in0, in1, in2, in3;
-	long status;
-	unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
-					| (1UL << 61) | (1UL << 60);
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	if (in1) {
-		memcpy((void *)in1, &res, sizeof(res));
-		status = 0;
-	} else{
-		status = PAL_STATUS_EINVAL;
-		printk(KERN_WARNING"Invalid parameters "
-					"for PAL call:0x%lx!\n", in0);
-	}
-
-	return (struct ia64_pal_retval){status, 0, 0, 0};
-}
-
-static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
-{
-	unsigned long r9;
-	long status;
-
-	status = ia64_pal_mem_attrib(&r9);
-
-	return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static void remote_pal_prefetch_visibility(void *v)
-{
-	s64 trans_type = (s64)v;
-	ia64_pal_prefetch_visibility(trans_type);
-}
-
-static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	unsigned long in0, in1, in2, in3;
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_prefetch_visibility(in1);
-	if (result.status == 0) {
-		/* Must be performed on all remote processors
-		in the coherence domain. */
-		smp_call_function(remote_pal_prefetch_visibility,
-					(void *)in1, 1);
-		/* Unnecessary on remote processor for other vcpus!*/
-		result.status = 1;
-	}
-	return result;
-}
-
-static void remote_pal_mc_drain(void *v)
-{
-	ia64_pal_mc_drain();
-}
-
-static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	unsigned long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-	if (in1 == 0 && in2) {
-		char brand_info[128];
-		result.status = ia64_pal_get_brand_info(brand_info);
-		if (result.status == PAL_STATUS_SUCCESS)
-			memcpy((void *)in2, brand_info, 128);
-	} else {
-		result.status = PAL_STATUS_REQUIRES_MEMORY;
-		printk(KERN_WARNING"Invalid parameters for "
-					"PAL call:0x%lx!\n", in0);
-	}
-
-	return result;
-}
-
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-
-	u64 gr28;
-	struct ia64_pal_retval result;
-	int ret = 1;
-
-	gr28 = kvm_get_pal_call_index(vcpu);
-	switch (gr28) {
-	case PAL_CACHE_FLUSH:
-		result = pal_cache_flush(vcpu);
-		break;
-	case PAL_MEM_ATTRIB:
-		result = pal_mem_attrib(vcpu);
-		break;
-	case PAL_CACHE_SUMMARY:
-		result = pal_cache_summary(vcpu);
-		break;
-	case PAL_PERF_MON_INFO:
-		result = pal_perf_mon_info(vcpu);
-		break;
-	case PAL_HALT_INFO:
-		result = pal_halt_info(vcpu);
-		break;
-	case PAL_HALT_LIGHT:
-	{
-		INIT_PAL_STATUS_SUCCESS(result);
-		prepare_for_halt(vcpu);
-		if (kvm_highest_pending_irq(vcpu) == -1)
-			ret = kvm_emulate_halt(vcpu);
-	}
-		break;
-
-	case PAL_PREFETCH_VISIBILITY:
-		result = pal_prefetch_visibility(vcpu);
-		break;
-	case PAL_MC_DRAIN:
-		result.status = ia64_pal_mc_drain();
-		/* FIXME: All vcpus likely call PAL_MC_DRAIN.
-		   That causes the congestion. */
-		smp_call_function(remote_pal_mc_drain, NULL, 1);
-		break;
-
-	case PAL_FREQ_RATIOS:
-		result = pal_freq_ratios(vcpu);
-		break;
-
-	case PAL_FREQ_BASE:
-		result = pal_freq_base(vcpu);
-		break;
-
-	case PAL_LOGICAL_TO_PHYSICAL :
-		result = pal_logical_to_physica(vcpu);
-		break;
-
-	case PAL_VM_SUMMARY :
-		result = pal_vm_summary(vcpu);
-		break;
-
-	case PAL_VM_INFO :
-		result = pal_vm_info(vcpu);
-		break;
-	case PAL_PLATFORM_ADDR :
-		result = pal_platform_addr(vcpu);
-		break;
-	case PAL_CACHE_INFO:
-		result = pal_cache_info(vcpu);
-		break;
-	case PAL_PTCE_INFO:
-		INIT_PAL_STATUS_SUCCESS(result);
-		result.v1 = (1L << 32) | 1L;
-		break;
-	case PAL_REGISTER_INFO:
-		result = pal_register_info(vcpu);
-		break;
-	case PAL_VM_PAGE_SIZE:
-		result.status = ia64_pal_vm_page_size(&result.v0,
-							&result.v1);
-		break;
-	case PAL_RSE_INFO:
-		result.status = ia64_pal_rse_info(&result.v0,
-					(pal_hints_u_t *)&result.v1);
-		break;
-	case PAL_PROC_GET_FEATURES:
-		result = pal_proc_get_features(vcpu);
-		break;
-	case PAL_DEBUG_INFO:
-		result.status = ia64_pal_debug_info(&result.v0,
-							&result.v1);
-		break;
-	case PAL_VERSION:
-		result.status = ia64_pal_version(
-				(pal_version_u_t *)&result.v0,
-				(pal_version_u_t *)&result.v1);
-		break;
-	case PAL_FIXED_ADDR:
-		result.status = PAL_STATUS_SUCCESS;
-		result.v0 = vcpu->vcpu_id;
-		break;
-	case PAL_BRAND_INFO:
-		result = pal_get_brand_info(vcpu);
-		break;
-	case PAL_GET_PSTATE:
-	case PAL_CACHE_SHARED_INFO:
-		INIT_PAL_STATUS_UNIMPLEMENTED(result);
-		break;
-	default:
-		INIT_PAL_STATUS_UNIMPLEMENTED(result);
-		printk(KERN_WARNING"kvm: Unsupported pal call,"
-					" index:0x%lx\n", gr28);
-	}
-	set_pal_result(vcpu, result);
-	return ret;
-}
-
-static struct sal_ret_values sal_emulator(struct kvm *kvm,
-				long index, unsigned long in1,
-				unsigned long in2, unsigned long in3,
-				unsigned long in4, unsigned long in5,
-				unsigned long in6, unsigned long in7)
-{
-	unsigned long r9  = 0;
-	unsigned long r10 = 0;
-	long r11 = 0;
-	long status;
-
-	status = 0;
-	switch (index) {
-	case SAL_FREQ_BASE:
-		status = ia64_sal_freq_base(in1, &r9, &r10);
-		break;
-	case SAL_PCI_CONFIG_READ:
-		printk(KERN_WARNING"kvm: Not allowed to call here!"
-			" SAL_PCI_CONFIG_READ\n");
-		break;
-	case SAL_PCI_CONFIG_WRITE:
-		printk(KERN_WARNING"kvm: Not allowed to call here!"
-			" SAL_PCI_CONFIG_WRITE\n");
-		break;
-	case SAL_SET_VECTORS:
-		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
-			if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
-				status = -2;
-			} else {
-				kvm->arch.rdv_sal_data.boot_ip = in2;
-				kvm->arch.rdv_sal_data.boot_gp = in3;
-			}
-			printk("Rendvous called! iip:%lx\n\n", in2);
-		} else
-			printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
-							"ignored...\n", in1);
-		break;
-	case SAL_GET_STATE_INFO:
-		/* No more info.  */
-		status = -5;
-		r9 = 0;
-		break;
-	case SAL_GET_STATE_INFO_SIZE:
-		/* Return a dummy size.  */
-		status = 0;
-		r9 = 128;
-		break;
-	case SAL_CLEAR_STATE_INFO:
-		/* Noop.  */
-		break;
-	case SAL_MC_RENDEZ:
-		printk(KERN_WARNING
-			"kvm: called SAL_MC_RENDEZ. ignored...\n");
-		break;
-	case SAL_MC_SET_PARAMS:
-		printk(KERN_WARNING
-			"kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
-		break;
-	case SAL_CACHE_FLUSH:
-		if (1) {
-			/*Flush using SAL.
-			This method is faster but has a side
-			effect on other vcpu running on
-			this cpu.  */
-			status = ia64_sal_cache_flush(in1);
-		} else {
-			/*Maybe need to implement the method
-			without side effect!*/
-			status = 0;
-		}
-		break;
-	case SAL_CACHE_INIT:
-		printk(KERN_WARNING
-			"kvm: called SAL_CACHE_INIT.  ignored...\n");
-		break;
-	case SAL_UPDATE_PAL:
-		printk(KERN_WARNING
-			"kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
-		break;
-	default:
-		printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
-						" index:%ld\n", index);
-		status = -1;
-		break;
-	}
-	return ((struct sal_ret_values) {status, r9, r10, r11});
-}
-
-static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
-		u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
-
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		*in0 = p->u.sal_data.in0;
-		*in1 = p->u.sal_data.in1;
-		*in2 = p->u.sal_data.in2;
-		*in3 = p->u.sal_data.in3;
-		*in4 = p->u.sal_data.in4;
-		*in5 = p->u.sal_data.in5;
-		*in6 = p->u.sal_data.in6;
-		*in7 = p->u.sal_data.in7;
-		return ;
-	}
-	*in0 = 0;
-}
-
-void kvm_sal_emul(struct kvm_vcpu *vcpu)
-{
-
-	struct sal_ret_values result;
-	u64 index, in1, in2, in3, in4, in5, in6, in7;
-
-	kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
-			&in3, &in4, &in5, &in6, &in7);
-	result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
-					in4, in5, in6, in7);
-	set_sal_result(vcpu, result);
-}
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
deleted file mode 100644
index f1268b8..0000000
--- a/arch/ia64/kvm/kvm_lib.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * kvm_lib.c: Compile some libraries for kvm-intel module.
- *
- *	Just include kernel's library, and disable symbols export.
- * 	Copyright (C) 2008, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#undef CONFIG_MODULES
-#include <linux/module.h>
-#undef CONFIG_KALLSYMS
-#undef EXPORT_SYMBOL
-#undef EXPORT_SYMBOL_GPL
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#include "../../../lib/vsprintf.c"
-#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
deleted file mode 100644
index b2bcaa2..0000000
--- a/arch/ia64/kvm/kvm_minstate.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- *  kvm_minstate.h: min save macros
- *  Copyright (c) 2007, Intel Corporation.
- *
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/types.h>
-#include <asm/kregs.h>
-#include <asm/kvm_host.h>
-
-#include "asm-offsets.h"
-
-#define KVM_MINSTATE_START_SAVE_MIN	     					\
-	mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
-	;;									\
-	mov.m r28 = ar.rnat;                                  			\
-	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
-	;;									\
-	lfetch.fault.excl.nt1 [r22];						\
-	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
-	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
-	;;									\
-	mov ar.bspstore = r22;				/* switch to kernel RBS */\
-	;;									\
-	mov r18 = ar.bsp;							\
-	mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
-
-
-
-#define KVM_MINSTATE_END_SAVE_MIN						\
-	bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
-	;;
-
-
-#define PAL_VSA_SYNC_READ						\
-	/* begin to call pal vps sync_read */				\
-{.mii;									\
-	add r25 = VMM_VPD_BASE_OFFSET, r21;				\
-	nop 0x0;							\
-	mov r24=ip;							\
-	;;								\
-}									\
-{.mmb									\
-	add r24=0x20, r24;						\
-	ld8 r25 = [r25];      /* read vpd base */			\
-	br.cond.sptk kvm_vps_sync_read;		/*call the service*/	\
-	;;								\
-};									\
-
-
-#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
-
-/*
- * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- *  psr.ic: off
- *  r31:	contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- *  psr.ic: off
- *   r2 = points to &pt_regs.r16
- *   r8 = contents of ar.ccv
- *   r9 = contents of ar.csd
- *  r10 = contents of ar.ssd
- *  r11 = FPSR_DEFAULT
- *  r12 = kernel sp (kernel virtual address)
- *  r13 = points to current task_struct (kernel virtual address)
- *  p15 = TRUE if psr.i is set in cr.ipsr
- *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- *	  preserved
- *
- * Note that psr.ic is NOT turned on by this macro.  This is so that
- * we can pass interruption state as arguments to a handler.
- */
-
-
-#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
-
-#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)			\
-	KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */	\
-	mov r27 = ar.rsc;         /* M */			\
-	mov r20 = r1;         /* A */				\
-	mov r25 = ar.unat;        /* M */			\
-	mov r29 = cr.ipsr;        /* M */			\
-	mov r26 = ar.pfs;         /* I */			\
-	mov r18 = cr.isr;         				\
-	COVER;              /* B;; (or nothing) */		\
-	;;							\
-	tbit.z p0,p15 = r29,IA64_PSR_I_BIT;			\
-	mov r1 = r16;						\
-/*	mov r21=r16;	*/					\
-	/* switch from user to kernel RBS: */			\
-	;;							\
-	invala;             /* M */				\
-	SAVE_IFS;						\
-	;;							\
-	KVM_MINSTATE_START_SAVE_MIN				\
-	adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */	\
-	adds r16 = PT(CR_IPSR),r1;				\
-	;;							\
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;		\
-	st8 [r16] = r29;      /* save cr.ipsr */		\
-	;;							\
-	lfetch.fault.excl.nt1 [r17];				\
-	tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;			\
-	mov r29 = b0						\
-	;;							\
-	adds r16 = PT(R8),r1; /* initialize first base pointer */\
-	adds r17 = PT(R9),r1; /* initialize second base pointer */\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r8,16;			\
-.mem.offset 8,0; st8.spill [r17] = r9,16;			\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r10,24;			\
-.mem.offset 8,0; st8.spill [r17] = r11,24;			\
-	;;							\
-	mov r9 = cr.iip;         /* M */			\
-	mov r10 = ar.fpsr;        /* M */			\
-	;;							\
-	st8 [r16] = r9,16;    /* save cr.iip */			\
-	st8 [r17] = r30,16;   /* save cr.ifs */			\
-	sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */		\
-	;;							\
-	st8 [r16] = r25,16;   /* save ar.unat */		\
-	st8 [r17] = r26,16;    /* save ar.pfs */		\
-	shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
-	;;							\
-	st8 [r16] = r27,16;   /* save ar.rsc */			\
-	st8 [r17] = r28,16;   /* save ar.rnat */		\
-	;;          /* avoid RAW on r16 & r17 */		\
-	st8 [r16] = r23,16;   /* save ar.bspstore */		\
-	st8 [r17] = r31,16;   /* save predicates */		\
-	;;							\
-	st8 [r16] = r29,16;   /* save b0 */			\
-	st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
-.mem.offset 8,0; st8.spill [r17] = r12,16;			\
-	adds r12 = -16,r1;    /* switch to kernel memory stack */  \
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r13,16;			\
-.mem.offset 8,0; st8.spill [r17] = r10,16;	/* save ar.fpsr */\
-	mov r13 = r21;   /* establish `current' */		\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r15,16;			\
-.mem.offset 8,0; st8.spill [r17] = r14,16;			\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r2,16;			\
-.mem.offset 8,0; st8.spill [r17] = r3,16;			\
-	adds r2 = VMM_PT_REGS_R16_OFFSET,r1;			\
-	 ;;							\
-	adds r16 = VMM_VCPU_IIPA_OFFSET,r13;			\
-	adds r17 = VMM_VCPU_ISR_OFFSET,r13;			\
-	mov r26 = cr.iipa;					\
-	mov r27 = cr.isr;					\
-	;;							\
-	st8 [r16] = r26;					\
-	st8 [r17] = r27;					\
-	;;							\
-	EXTRA;							\
-	mov r8 = ar.ccv;					\
-	mov r9 = ar.csd;					\
-	mov r10 = ar.ssd;					\
-	movl r11 = FPSR_DEFAULT;   /* L-unit */			\
-	adds r17 = VMM_VCPU_GP_OFFSET,r13;			\
-	;;							\
-	ld8 r1 = [r17];/* establish kernel global pointer */	\
-	;;							\
-	PAL_VSA_SYNC_READ					\
-	KVM_MINSTATE_END_SAVE_MIN
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- *  psr.ic: on
- *  r2: points to &pt_regs.f6
- *  r3: points to &pt_regs.f7
- *  r8: contents of ar.ccv
- *  r9: contents of ar.csd
- *  r10:	contents of ar.ssd
- *  r11:	FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define KVM_SAVE_REST				\
-.mem.offset 0,0; st8.spill [r2] = r16,16;	\
-.mem.offset 8,0; st8.spill [r3] = r17,16;	\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r18,16;	\
-.mem.offset 8,0; st8.spill [r3] = r19,16;	\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r20,16;	\
-.mem.offset 8,0; st8.spill [r3] = r21,16;	\
-	mov r18=b6;			\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r22,16;	\
-.mem.offset 8,0; st8.spill [r3] = r23,16;	\
-	mov r19 = b7;				\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r24,16;	\
-.mem.offset 8,0; st8.spill [r3] = r25,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r26,16;	\
-.mem.offset 8,0; st8.spill [r3] = r27,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r28,16;	\
-.mem.offset 8,0; st8.spill [r3] = r29,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r30,16;	\
-.mem.offset 8,0; st8.spill [r3] = r31,32;	\
-	;;					\
-	mov ar.fpsr = r11;			\
-	st8 [r2] = r8,8;			\
-	adds r24 = PT(B6)-PT(F7),r3;		\
-	adds r25 = PT(B7)-PT(F7),r3;		\
-	;;					\
-	st8 [r24] = r18,16;       /* b6 */	\
-	st8 [r25] = r19,16;       /* b7 */	\
-	adds r2 = PT(R4)-PT(F6),r2;		\
-	adds r3 = PT(R5)-PT(F7),r3;		\
-	;;					\
-	st8 [r24] = r9;	/* ar.csd */		\
-	st8 [r25] = r10;	/* ar.ssd */	\
-	;;					\
-	mov r18 = ar.unat;			\
-	adds r19 = PT(EML_UNAT)-PT(R4),r2;	\
-	;;					\
-	st8 [r19] = r18; /* eml_unat */ 	\
-
-
-#define KVM_SAVE_EXTRA				\
-.mem.offset 0,0; st8.spill [r2] = r4,16;	\
-.mem.offset 8,0; st8.spill [r3] = r5,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r6,16;	\
-.mem.offset 8,0; st8.spill [r3] = r7;		\
-	;;					\
-	mov r26 = ar.unat;			\
-	;;					\
-	st8 [r2] = r26;/* eml_unat */ 		\
-
-#define KVM_SAVE_MIN_WITH_COVER		KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
-#define KVM_SAVE_MIN_WITH_COVER_R19	KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
-#define KVM_SAVE_MIN			KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
deleted file mode 100644
index c5f92a9..0000000
--- a/arch/ia64/kvm/lapic.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef __KVM_IA64_LAPIC_H
-#define __KVM_IA64_LAPIC_H
-
-#include <linux/kvm_host.h>
-
-/*
- * vlsapic
- */
-struct kvm_lapic{
-	struct kvm_vcpu *vcpu;
-	uint64_t insvc[4];
-	uint64_t vhpi;
-	uint8_t xtp;
-	uint8_t pal_init_pending;
-	uint8_t pad[2];
-};
-
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
-void kvm_free_lapic(struct kvm_vcpu *vcpu);
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
-#define kvm_apic_present(x) (true)
-#define kvm_lapic_enabled(x) (true)
-
-#endif
diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S
deleted file mode 100644
index c04cdbe..0000000
--- a/arch/ia64/kvm/memcpy.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memcpy.S"
diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S
deleted file mode 100644
index 83c3066..0000000
--- a/arch/ia64/kvm/memset.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memset.S"
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
deleted file mode 100644
index dd979e0..0000000
--- a/arch/ia64/kvm/misc.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef __KVM_IA64_MISC_H
-#define __KVM_IA64_MISC_H
-
-#include <linux/kvm_host.h>
-/*
- * misc.h
- * 	Copyright (C) 2007, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-/*
- *Return p2m base address at host side!
- */
-static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
-{
-	return (uint64_t *)(kvm->arch.vm_base +
-				offsetof(struct kvm_vm_data, kvm_p2m));
-}
-
-static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
-		u64 paddr, u64 mem_flags)
-{
-	uint64_t *pmt_base = kvm_host_get_pmt(kvm);
-	unsigned long pte;
-
-	pte = PAGE_ALIGN(paddr) | mem_flags;
-	pmt_base[gfn] = pte;
-}
-
-/*Function for translating host address to guest address*/
-
-static inline void *to_guest(struct kvm *kvm, void *addr)
-{
-	return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
-			KVM_VM_DATA_BASE);
-}
-
-/*Function for translating guest address to host address*/
-
-static inline void *to_host(struct kvm *kvm, void *addr)
-{
-	return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
-			+ kvm->arch.vm_base);
-}
-
-/* Get host context of the vcpu */
-static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
-{
-	union context *ctx = &vcpu->arch.host;
-	return to_guest(vcpu->kvm, ctx);
-}
-
-/* Get guest context of the vcpu */
-static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
-{
-	union context *ctx = &vcpu->arch.guest;
-	return  to_guest(vcpu->kvm, ctx);
-}
-
-/* kvm get exit data from gvmm! */
-static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
-{
-	return &vcpu->arch.exit_data;
-}
-
-/*kvm get vcpu ioreq for kvm module!*/
-static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p_ctl_data;
-
-	if (vcpu) {
-		p_ctl_data = kvm_get_exit_data(vcpu);
-		if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
-			return &p_ctl_data->u.ioreq;
-	}
-
-	return NULL;
-}
-
-#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
deleted file mode 100644
index f1e17d3..0000000
--- a/arch/ia64/kvm/mmio.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * mmio.c: MMIO emulation components.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
- *
- * Copyright (c) 2007 Intel Corporation  KVM support.
- * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- * Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-
-#include "vcpu.h"
-
-static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
-{
-	VLSAPIC_XTP(v) = val;
-}
-
-/*
- * LSAPIC OFFSET
- */
-#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
-#define PIB_OFST_INTA          0x1E0000
-#define PIB_OFST_XTP           0x1E0008
-
-/*
- * execute write IPI op.
- */
-static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
-					uint64_t addr, uint64_t data)
-{
-	struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
-	unsigned long psr;
-
-	local_irq_save(psr);
-
-	p->exit_reason = EXIT_REASON_IPI;
-	p->u.ipi_data.addr.val = addr;
-	p->u.ipi_data.data.val = data;
-	vmm_transition(current_vcpu);
-
-	local_irq_restore(psr);
-
-}
-
-void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
-			unsigned long length, unsigned long val)
-{
-	addr &= (PIB_SIZE - 1);
-
-	switch (addr) {
-	case PIB_OFST_INTA:
-		panic_vm(v, "Undefined write on PIB INTA\n");
-		break;
-	case PIB_OFST_XTP:
-		if (length == 1) {
-			vlsapic_write_xtp(v, val);
-		} else {
-			panic_vm(v, "Undefined write on PIB XTP\n");
-		}
-		break;
-	default:
-		if (PIB_LOW_HALF(addr)) {
-			/*Lower half */
-			if (length != 8)
-				panic_vm(v, "Can't LHF write with size %ld!\n",
-						length);
-			else
-				vlsapic_write_ipi(v, addr, val);
-		} else {   /*Upper half */
-			panic_vm(v, "IPI-UHF write %lx\n", addr);
-		}
-		break;
-	}
-}
-
-unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
-		unsigned long length)
-{
-	uint64_t result = 0;
-
-	addr &= (PIB_SIZE - 1);
-
-	switch (addr) {
-	case PIB_OFST_INTA:
-		if (length == 1) /* 1 byte load */
-			; /* There is no i8259, there is no INTA access*/
-		else
-			panic_vm(v, "Undefined read on PIB INTA\n");
-
-		break;
-	case PIB_OFST_XTP:
-		if (length == 1) {
-			result = VLSAPIC_XTP(v);
-		} else {
-			panic_vm(v, "Undefined read on PIB XTP\n");
-		}
-		break;
-	default:
-		panic_vm(v, "Undefined addr access for lsapic!\n");
-		break;
-	}
-	return result;
-}
-
-static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
-					u16 s, int ma, int dir)
-{
-	unsigned long iot;
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long psr;
-
-	iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
-
-	local_irq_save(psr);
-
-	/*Intercept the access for PIB range*/
-	if (iot == GPFN_PIB) {
-		if (!dir)
-			lsapic_write(vcpu, src_pa, s, *dest);
-		else
-			*dest = lsapic_read(vcpu, src_pa, s);
-		goto out;
-	}
-	p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
-	p->u.ioreq.addr = src_pa;
-	p->u.ioreq.size = s;
-	p->u.ioreq.dir = dir;
-	if (dir == IOREQ_WRITE)
-		p->u.ioreq.data = *dest;
-	p->u.ioreq.state = STATE_IOREQ_READY;
-	vmm_transition(vcpu);
-
-	if (p->u.ioreq.state == STATE_IORESP_READY) {
-		if (dir == IOREQ_READ)
-			/* it's necessary to ensure zero extending */
-			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
-	} else
-		panic_vm(vcpu, "Unhandled mmio access returned!\n");
-out:
-	local_irq_restore(psr);
-	return ;
-}
-
-/*
-   dir 1: read 0:write
-   inst_type 0:integer 1:floating point
- */
-#define SL_INTEGER	0	/* store/load interger*/
-#define SL_FLOATING	1     	/* store/load floating*/
-
-void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
-{
-	struct kvm_pt_regs *regs;
-	IA64_BUNDLE bundle;
-	int slot, dir = 0;
-	int inst_type = -1;
-	u16 size = 0;
-	u64 data, slot1a, slot1b, temp, update_reg;
-	s32 imm;
-	INST64 inst;
-
-	regs = vcpu_regs(vcpu);
-
-	if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
-		/* if fetch code fail, return and try again */
-		return;
-	}
-	slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
-	if (!slot)
-		inst.inst = bundle.slot0;
-	else if (slot == 1) {
-		slot1a = bundle.slot1a;
-		slot1b = bundle.slot1b;
-		inst.inst = slot1a + (slot1b << 18);
-	} else if (slot == 2)
-		inst.inst = bundle.slot2;
-
-	/* Integer Load/Store */
-	if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
-		inst_type = SL_INTEGER;
-		size = (inst.M1.x6 & 0x3);
-		if ((inst.M1.x6 >> 2) > 0xb) {
-			/*write*/
-			dir = IOREQ_WRITE;
-			data = vcpu_get_gr(vcpu, inst.M4.r2);
-		} else if ((inst.M1.x6 >> 2) < 0xb) {
-			/*read*/
-			dir = IOREQ_READ;
-		}
-	} else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
-		/* Integer Load + Reg update */
-		inst_type = SL_INTEGER;
-		dir = IOREQ_READ;
-		size = (inst.M2.x6 & 0x3);
-		temp = vcpu_get_gr(vcpu, inst.M2.r3);
-		update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
-		temp += update_reg;
-		vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
-	} else if (inst.M3.major == 5) {
-		/*Integer Load/Store + Imm update*/
-		inst_type = SL_INTEGER;
-		size = (inst.M3.x6&0x3);
-		if ((inst.M5.x6 >> 2) > 0xb) {
-			/*write*/
-			dir = IOREQ_WRITE;
-			data = vcpu_get_gr(vcpu, inst.M5.r2);
-			temp = vcpu_get_gr(vcpu, inst.M5.r3);
-			imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
-				(inst.M5.imm7 << 23);
-			temp += imm >> 23;
-			vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
-
-		} else if ((inst.M3.x6 >> 2) < 0xb) {
-			/*read*/
-			dir = IOREQ_READ;
-			temp = vcpu_get_gr(vcpu, inst.M3.r3);
-			imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
-				(inst.M3.imm7 << 23);
-			temp += imm >> 23;
-			vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
-
-		}
-	} else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
-				&& inst.M9.m == 0 && inst.M9.x == 0) {
-		/* Floating-point spill*/
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
-		/* Write high word. FIXME: this is a kludge!  */
-		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
-			    ma, IOREQ_WRITE);
-		data = v.u.bits[0];
-		size = 3;
-	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
-		/* Floating-point spill + Imm update */
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-		temp = vcpu_get_gr(vcpu, inst.M10.r3);
-		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-			(inst.M10.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-
-		/* Write high word.FIXME: this is a kludge!  */
-		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
-			    8, ma, IOREQ_WRITE);
-		data = v.u.bits[0];
-		size = 3;
-	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
-		/* Floating-point stf8 + Imm update */
-		struct ia64_fpreg v;
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		size = 3;
-		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-		data = v.u.bits[0]; /* Significand.  */
-		temp = vcpu_get_gr(vcpu, inst.M10.r3);
-		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-			(inst.M10.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-	} else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
-			&& inst.M15.x6 <= 0x2f) {
-		temp = vcpu_get_gr(vcpu, inst.M15.r3);
-		imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
-			(inst.M15.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
-
-		vcpu_increment_iip(vcpu);
-		return;
-	} else if (inst.M12.major == 6 && inst.M12.m == 1
-			&& inst.M12.x == 1 && inst.M12.x6 == 1) {
-		/* Floating-point Load Pair + Imm ldfp8 M12*/
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_READ;
-		size = 8;     /*ldfd*/
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		v.u.bits[0] = data;
-		v.u.bits[1] = 0x1003E;
-		vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
-		padr += 8;
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		v.u.bits[0] = data;
-		v.u.bits[1] = 0x1003E;
-		vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
-		padr += 8;
-		vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
-		vcpu_increment_iip(vcpu);
-		return;
-	} else {
-		inst_type = -1;
-		panic_vm(vcpu, "Unsupported MMIO access instruction! "
-				"Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
-				bundle.i64[0], bundle.i64[1]);
-	}
-
-	size = 1 << size;
-	if (dir == IOREQ_WRITE) {
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-	} else {
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		if (inst_type == SL_INTEGER)
-			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
-		else
-			panic_vm(vcpu, "Unsupported instruction type!\n");
-
-	}
-	vcpu_increment_iip(vcpu);
-}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
deleted file mode 100644
index f793be3..0000000
--- a/arch/ia64/kvm/optvfault.S
+++ /dev/null
@@ -1,1090 +0,0 @@
-/*
- * arch/ia64/kvm/optvfault.S
- * optimize virtualization fault handler
- *
- * Copyright (C) 2006 Intel Co
- *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- * Copyright (C) 2008 Intel Co
- *      Add the support for Tukwila processors.
- *	Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/kvm_host.h>
-
-#include "vti.h"
-#include "asm-offsets.h"
-
-#define ACCE_MOV_FROM_AR
-#define ACCE_MOV_FROM_RR
-#define ACCE_MOV_TO_RR
-#define ACCE_RSM
-#define ACCE_SSM
-#define ACCE_MOV_TO_PSR
-#define ACCE_THASH
-
-#define VMX_VPS_SYNC_READ			\
-	add r16=VMM_VPD_BASE_OFFSET,r21;	\
-	mov r17 = b0;				\
-	mov r18 = r24;				\
-	mov r19 = r25;				\
-	mov r20 = r31;				\
-	;;					\
-{.mii;						\
-	ld8 r16 = [r16];			\
-	nop 0x0;				\
-	mov r24 = ip;				\
-	;;					\
-};						\
-{.mmb;						\
-	add r24=0x20, r24;			\
-	mov r25 =r16;				\
-	br.sptk.many kvm_vps_sync_read;		\
-};						\
-	mov b0 = r17;				\
-	mov r24 = r18;				\
-	mov r25 = r19;				\
-	mov r31 = r20
-
-ENTRY(kvm_vps_entry)
-	adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
-	;;
-	ld8 r29 = [r29]
-	;;
-	add r29 = r29, r30
-	;;
-	mov b0 = r29
-	br.sptk.many b0
-END(kvm_vps_entry)
-
-/*
- *	Inputs:
- *	r24 : return address
- *  	r25 : vpd
- *	r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_read)
-	movl r30 = PAL_VPS_SYNC_READ
-	;;
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_read)
-
-/*
- *	Inputs:
- *	r24 : return address
- *  	r25 : vpd
- *	r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_write)
-	movl r30 = PAL_VPS_SYNC_WRITE
-	;;
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_write)
-
-/*
- *	Inputs:
- *	r23 : pr
- *	r24 : guest b0
- *  	r25 : vpd
- *
- */
-GLOBAL_ENTRY(kvm_vps_resume_normal)
-	movl r30 = PAL_VPS_RESUME_NORMAL
-	;;
-	mov pr=r23,-2
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_normal)
-
-/*
- *	Inputs:
- *	r23 : pr
- *	r24 : guest b0
- *  	r25 : vpd
- *	r17 : isr
- */
-GLOBAL_ENTRY(kvm_vps_resume_handler)
-	movl r30 = PAL_VPS_RESUME_HANDLER
-	;;
-	ld8 r26=[r25]
-	shr r17=r17,IA64_ISR_IR_BIT
-	;;
-	dep r26=r17,r26,63,1   // bit 63 of r26 indicate whether enable CFLE
-	mov pr=r23,-2
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_handler)
-
-//mov r1=ar3
-GLOBAL_ENTRY(kvm_asm_mov_from_ar)
-#ifndef ACCE_MOV_FROM_AR
-	br.many kvm_virtualization_fault_back
-#endif
-	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-	extr.u r17=r25,6,7
-	;;
-	ld8 r18=[r18]
-	mov r19=ar.itc
-	mov r24=b0
-	;;
-	add r19=r19,r18
-	addl r20=@gprel(asm_mov_to_reg),gp
-	;;
-	st8 [r16] = r19
-	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-	shladd r17=r17,4,r20
-	;;
-	mov b0=r17
-	br.sptk.few b0
-	;;
-END(kvm_asm_mov_from_ar)
-
-/*
- * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
- * clock as it's source for emulating the ITC. This version will be
- * copied on top of the original version if the host is determined to
- * be an SN2.
- */
-GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
-	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-	movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
-
-	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-	extr.u r17=r25,6,7
-	mov r24=b0
-	;;
-	ld8 r18=[r18]
-	ld8 r19=[r19]
-	addl r20=@gprel(asm_mov_to_reg),gp
-	;;
-	add r19=r19,r18
-	shladd r17=r17,4,r20
-	;;
-	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-	st8 [r16] = r19
-	mov b0=r17
-	br.sptk.few b0
-	;;
-END(kvm_asm_mov_from_ar_sn2)
-
-
-
-// mov r1=rr[r3]
-GLOBAL_ENTRY(kvm_asm_mov_from_rr)
-#ifndef ACCE_MOV_FROM_RR
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r16=r25,20,7
-	extr.u r17=r25,6,7
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
-	shladd r16=r16,4,r20
-	mov r24=b0
-	;;
-	add r27=VMM_VCPU_VRR0_OFFSET,r21
-	mov b0=r16
-	br.many b0
-	;;
-kvm_asm_mov_from_rr_back_1:
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
-	shr.u r26=r19,61
-	;;
-	shladd r17=r17,4,r22
-	shladd r27=r26,3,r27
-	;;
-	ld8 r19=[r27]
-	mov b0=r17
-	br.many b0
-END(kvm_asm_mov_from_rr)
-
-
-// mov rr[r3]=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_rr)
-#ifndef ACCE_MOV_TO_RR
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r16=r25,20,7
-	extr.u r17=r25,13,7
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
-	shladd r16=r16,4,r20
-	mov r22=b0
-	;;
-	add r27=VMM_VCPU_VRR0_OFFSET,r21
-	mov b0=r16
-	br.many b0
-	;;
-kvm_asm_mov_to_rr_back_1:
-	adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
-	shr.u r23=r19,61
-	shladd r17=r17,4,r20
-	;;
-	//if rr6, go back
-	cmp.eq p6,p0=6,r23
-	mov b0=r22
-	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
-	;;
-	mov r28=r19
-	mov b0=r17
-	br.many b0
-kvm_asm_mov_to_rr_back_2:
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	shladd r27=r23,3,r27
-	;; // vrr.rid<<4 |0xe
-	st8 [r27]=r19
-	mov b0=r30
-	;;
-	extr.u r16=r19,8,26
-	extr.u r18 =r19,2,6
-	mov r17 =0xe
-	;;
-	shladd r16 = r16, 4, r17
-	extr.u r19 =r19,0,8
-	;;
-	shl r16 = r16,8
-	;;
-	add r19 = r19, r16
-	;; //set ve 1
-	dep r19=-1,r19,0,1
-	cmp.lt p6,p0=14,r18
-	;;
-	(p6) mov r18=14
-	;;
-	(p6) dep r19=r18,r19,2,6
-	;;
-	cmp.eq p6,p0=0,r23
-	;;
-	cmp.eq.or p6,p0=4,r23
-	;;
-	adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	(p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	;;
-	ld4 r16=[r16]
-	cmp.eq p7,p0=r0,r0
-	(p6) shladd r17=r23,1,r17
-	;;
-	(p6) st8 [r17]=r19
-	(p6) tbit.nz p6,p7=r16,0
-	;;
-	(p7) mov rr[r28]=r19
-	mov r24=r22
-	br.many b0
-END(kvm_asm_mov_to_rr)
-
-
-//rsm
-GLOBAL_ENTRY(kvm_asm_rsm)
-#ifndef ACCE_RSM
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,6,21
-	extr.u r27=r25,31,2
-	;;
-	extr.u r28=r25,36,1
-	dep r26=r27,r26,21,2
-	;;
-	add r17=VPD_VPSR_START_OFFSET,r16
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	//r26 is imm24
-	dep r26=r28,r26,23,1
-	;;
-	ld8 r18=[r17]
-	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
-	ld4 r23=[r22]
-	sub r27=-1,r26
-	mov r24=b0
-	;;
-	mov r20=cr.ipsr
-	or r28=r27,r28
-	and r19=r18,r27
-	;;
-	st8 [r17]=r19
-	and r20=r20,r28
-	/* Comment it out due to short of fp lazy alorgithm support
-	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-	;;
-	ld8 r27=[r27]
-	;;
-	tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
-	;;
-	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-	*/
-	;;
-	mov cr.ipsr=r20
-	tbit.nz p6,p0=r23,0
-	;;
-	tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
-	(p6) br.dptk kvm_resume_to_guest_with_sync
-	;;
-	add r26=VMM_VCPU_META_RR0_OFFSET,r21
-	add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-	dep r23=-1,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_rsm)
-
-
-//ssm
-GLOBAL_ENTRY(kvm_asm_ssm)
-#ifndef ACCE_SSM
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,6,21
-	extr.u r27=r25,31,2
-	;;
-	extr.u r28=r25,36,1
-	dep r26=r27,r26,21,2
-	;;  //r26 is imm24
-	add r27=VPD_VPSR_START_OFFSET,r16
-	dep r26=r28,r26,23,1
-	;;  //r19 vpsr
-	ld8 r29=[r27]
-	mov r24=b0
-	;;
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	mov r20=cr.ipsr
-	or r19=r29,r26
-	;;
-	ld4 r23=[r22]
-	st8 [r27]=r19
-	or r20=r20,r26
-	;;
-	mov cr.ipsr=r20
-	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-	;;
-	and r19=r28,r19
-	tbit.z p6,p0=r23,0
-	;;
-	cmp.ne.or p6,p0=r28,r19
-	(p6) br.dptk kvm_asm_ssm_1
-	;;
-	add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-	dep r23=0,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	;;
-kvm_asm_ssm_1:
-	tbit.nz p6,p0=r29,IA64_PSR_I_BIT
-	;;
-	tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
-	(p6) br.dptk kvm_resume_to_guest_with_sync
-	;;
-	add r29=VPD_VTPR_START_OFFSET,r16
-	add r30=VPD_VHPI_START_OFFSET,r16
-	;;
-	ld8 r29=[r29]
-	ld8 r30=[r30]
-	;;
-	extr.u r17=r29,4,4
-	extr.u r18=r29,16,1
-	;;
-	dep r17=r18,r17,4,1
-	;;
-	cmp.gt p6,p0=r30,r17
-	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_ssm)
-
-
-//mov psr.l=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_psr)
-#ifndef ACCE_MOV_TO_PSR
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,13,7 //r2
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
-	shladd r26=r26,4,r20
-	mov r24=b0
-	;;
-	add r27=VPD_VPSR_START_OFFSET,r16
-	mov b0=r26
-	br.many b0
-	;;
-kvm_asm_mov_to_psr_back:
-	ld8 r17=[r27]
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	dep r19=0,r19,32,32
-	;;
-	ld4 r23=[r22]
-	dep r18=0,r17,0,32
-	;;
-	add r30=r18,r19
-	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-	;;
-	st8 [r27]=r30
-	and r27=r28,r30
-	and r29=r28,r17
-	;;
-	cmp.eq p5,p0=r29,r27
-	cmp.eq p6,p7=r28,r27
-	(p5) br.many kvm_asm_mov_to_psr_1
-	;;
-	//virtual to physical
-	(p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
-	(p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-	(p7) dep r23=-1,r23,0,1
-	;;
-	//physical to virtual
-	(p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	(p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-	(p6) dep r23=0,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	;;
-kvm_asm_mov_to_psr_1:
-	mov r20=cr.ipsr
-	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
-	;;
-	or r19=r19,r28
-	dep r20=0,r20,0,32
-	;;
-	add r20=r19,r20
-	mov b0=r24
-	;;
-	/* Comment it out due to short of fp lazy algorithm support
-	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-	;;
-	ld8 r27=[r27]
-	;;
-	tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
-	;;
-	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-	;;
-	*/
-	mov cr.ipsr=r20
-	cmp.ne p6,p0=r0,r0
-	;;
-	tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
-	tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
-	(p6) br.dpnt.few kvm_resume_to_guest_with_sync
-	;;
-	add r29=VPD_VTPR_START_OFFSET,r16
-	add r30=VPD_VHPI_START_OFFSET,r16
-	;;
-	ld8 r29=[r29]
-	ld8 r30=[r30]
-	;;
-	extr.u r17=r29,4,4
-	extr.u r18=r29,16,1
-	;;
-	dep r17=r18,r17,4,1
-	;;
-	cmp.gt p6,p0=r30,r17
-	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_mov_to_psr)
-
-
-ENTRY(kvm_asm_dispatch_vexirq)
-//increment iip
-	mov r17 = b0
-	mov r18 = r31
-{.mii
-	add r25=VMM_VPD_BASE_OFFSET,r21
-	nop 0x0
-	mov r24 = ip
-	;;
-}
-{.mmb
-	add r24 = 0x20, r24
-	ld8 r25 = [r25]
-	br.sptk.many kvm_vps_sync_write
-}
-	mov b0 =r17
-	mov r16=cr.ipsr
-	mov r31 = r18
-	mov r19 = 37
-	;;
-	extr.u r17=r16,IA64_PSR_RI_BIT,2
-	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-	;;
-	(p6) mov r18=cr.iip
-	(p6) mov r17=r0
-	(p7) add r17=1,r17
-	;;
-	(p6) add r18=0x10,r18
-	dep r16=r17,r16,IA64_PSR_RI_BIT,2
-	;;
-	(p6) mov cr.iip=r18
-	mov cr.ipsr=r16
-	mov r30 =1
-	br.many kvm_dispatch_vexirq
-END(kvm_asm_dispatch_vexirq)
-
-// thash
-// TODO: add support when pta.vf = 1
-GLOBAL_ENTRY(kvm_asm_thash)
-#ifndef ACCE_THASH
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r17=r25,20,7		// get r3 from opcode in r25
-	extr.u r18=r25,6,7		// get r1 from opcode in r25
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
-	shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
-	adds r16=VMM_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
-	;;
-	mov r24=b0
-	;;
-	ld8 r16=[r16]		// get VPD addr
-	mov b0=r17
-	br.many b0			// r19 return value
-	;;
-kvm_asm_thash_back1:
-	shr.u r23=r19,61		// get RR number
-	adds r28=VMM_VCPU_VRR0_OFFSET,r21	// get vcpu->arch.vrr[0]'s addr
-	adds r16=VMM_VPD_VPTA_OFFSET,r16	// get vpta
-	;;
-	shladd r27=r23,3,r28	// get vcpu->arch.vrr[r23]'s addr
-	ld8 r17=[r16]		// get PTA
-	mov r26=1
-	;;
-	extr.u r29=r17,2,6	// get pta.size
-	ld8 r28=[r27]		// get vcpu->arch.vrr[r23]'s value
-	;;
-	mov b0=r24
-	//Fallback to C if pta.vf is set
-	tbit.nz p6,p0=r17, 8
-	;;
-	(p6) mov r24=EVENT_THASH
-	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
-	extr.u r28=r28,2,6	// get rr.ps
-	shl r22=r26,r29		// 1UL << pta.size
-	;;
-	shr.u r23=r19,r28	// vaddr >> rr.ps
-	adds r26=3,r29		// pta.size + 3
-	shl r27=r17,3		// pta << 3
-	;;
-	shl r23=r23,3		// (vaddr >> rr.ps) << 3
-	shr.u r27=r27,r26	// (pta << 3) >> (pta.size+3)
-	movl r16=7<<61
-	;;
-	adds r22=-1,r22		// (1UL << pta.size) - 1
-	shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
-	and r19=r19,r16		// vaddr & VRN_MASK
-	;;
-	and r22=r22,r23		// vhpt_offset
-	or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
-	adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
-	;;
-	or r19=r19,r22		// calc pval
-	shladd r17=r18,4,r26
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	;;
-	mov b0=r17
-	br.many b0
-END(kvm_asm_thash)
-
-#define MOV_TO_REG0	\
-{;			\
-	nop.b 0x0;		\
-	nop.b 0x0;		\
-	nop.b 0x0;		\
-	;;			\
-};
-
-
-#define MOV_TO_REG(n)	\
-{;			\
-	mov r##n##=r19;	\
-	mov b0=r30;	\
-	br.sptk.many b0;	\
-	;;			\
-};
-
-
-#define MOV_FROM_REG(n)	\
-{;				\
-	mov r19=r##n##;		\
-	mov b0=r30;		\
-	br.sptk.many b0;		\
-	;;				\
-};
-
-
-#define MOV_TO_BANK0_REG(n)			\
-ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
-{;						\
-	mov r26=r2;				\
-	mov r2=r19;				\
-	bsw.1;					\
-	;;						\
-};						\
-{;						\
-	mov r##n##=r2;				\
-	nop.b 0x0;					\
-	bsw.0;					\
-	;;						\
-};						\
-{;						\
-	mov r2=r26;				\
-	mov b0=r30;				\
-	br.sptk.many b0;				\
-	;;						\
-};						\
-END(asm_mov_to_bank0_reg##n##)
-
-
-#define MOV_FROM_BANK0_REG(n)			\
-ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
-{;						\
-	mov r26=r2;				\
-	nop.b 0x0;					\
-	bsw.1;					\
-	;;						\
-};						\
-{;						\
-	mov r2=r##n##;				\
-	nop.b 0x0;					\
-	bsw.0;					\
-	;;						\
-};						\
-{;						\
-	mov r19=r2;				\
-	mov r2=r26;				\
-	mov b0=r30;				\
-};						\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many b0;				\
-	;;						\
-};						\
-END(asm_mov_from_bank0_reg##n##)
-
-
-#define JMP_TO_MOV_TO_BANK0_REG(n)		\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many asm_mov_to_bank0_reg##n##;	\
-	;;						\
-}
-
-
-#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many asm_mov_from_bank0_reg##n##;	\
-	;;						\
-}
-
-
-MOV_FROM_BANK0_REG(16)
-MOV_FROM_BANK0_REG(17)
-MOV_FROM_BANK0_REG(18)
-MOV_FROM_BANK0_REG(19)
-MOV_FROM_BANK0_REG(20)
-MOV_FROM_BANK0_REG(21)
-MOV_FROM_BANK0_REG(22)
-MOV_FROM_BANK0_REG(23)
-MOV_FROM_BANK0_REG(24)
-MOV_FROM_BANK0_REG(25)
-MOV_FROM_BANK0_REG(26)
-MOV_FROM_BANK0_REG(27)
-MOV_FROM_BANK0_REG(28)
-MOV_FROM_BANK0_REG(29)
-MOV_FROM_BANK0_REG(30)
-MOV_FROM_BANK0_REG(31)
-
-
-// mov from reg table
-ENTRY(asm_mov_from_reg)
-	MOV_FROM_REG(0)
-	MOV_FROM_REG(1)
-	MOV_FROM_REG(2)
-	MOV_FROM_REG(3)
-	MOV_FROM_REG(4)
-	MOV_FROM_REG(5)
-	MOV_FROM_REG(6)
-	MOV_FROM_REG(7)
-	MOV_FROM_REG(8)
-	MOV_FROM_REG(9)
-	MOV_FROM_REG(10)
-	MOV_FROM_REG(11)
-	MOV_FROM_REG(12)
-	MOV_FROM_REG(13)
-	MOV_FROM_REG(14)
-	MOV_FROM_REG(15)
-	JMP_TO_MOV_FROM_BANK0_REG(16)
-	JMP_TO_MOV_FROM_BANK0_REG(17)
-	JMP_TO_MOV_FROM_BANK0_REG(18)
-	JMP_TO_MOV_FROM_BANK0_REG(19)
-	JMP_TO_MOV_FROM_BANK0_REG(20)
-	JMP_TO_MOV_FROM_BANK0_REG(21)
-	JMP_TO_MOV_FROM_BANK0_REG(22)
-	JMP_TO_MOV_FROM_BANK0_REG(23)
-	JMP_TO_MOV_FROM_BANK0_REG(24)
-	JMP_TO_MOV_FROM_BANK0_REG(25)
-	JMP_TO_MOV_FROM_BANK0_REG(26)
-	JMP_TO_MOV_FROM_BANK0_REG(27)
-	JMP_TO_MOV_FROM_BANK0_REG(28)
-	JMP_TO_MOV_FROM_BANK0_REG(29)
-	JMP_TO_MOV_FROM_BANK0_REG(30)
-	JMP_TO_MOV_FROM_BANK0_REG(31)
-	MOV_FROM_REG(32)
-	MOV_FROM_REG(33)
-	MOV_FROM_REG(34)
-	MOV_FROM_REG(35)
-	MOV_FROM_REG(36)
-	MOV_FROM_REG(37)
-	MOV_FROM_REG(38)
-	MOV_FROM_REG(39)
-	MOV_FROM_REG(40)
-	MOV_FROM_REG(41)
-	MOV_FROM_REG(42)
-	MOV_FROM_REG(43)
-	MOV_FROM_REG(44)
-	MOV_FROM_REG(45)
-	MOV_FROM_REG(46)
-	MOV_FROM_REG(47)
-	MOV_FROM_REG(48)
-	MOV_FROM_REG(49)
-	MOV_FROM_REG(50)
-	MOV_FROM_REG(51)
-	MOV_FROM_REG(52)
-	MOV_FROM_REG(53)
-	MOV_FROM_REG(54)
-	MOV_FROM_REG(55)
-	MOV_FROM_REG(56)
-	MOV_FROM_REG(57)
-	MOV_FROM_REG(58)
-	MOV_FROM_REG(59)
-	MOV_FROM_REG(60)
-	MOV_FROM_REG(61)
-	MOV_FROM_REG(62)
-	MOV_FROM_REG(63)
-	MOV_FROM_REG(64)
-	MOV_FROM_REG(65)
-	MOV_FROM_REG(66)
-	MOV_FROM_REG(67)
-	MOV_FROM_REG(68)
-	MOV_FROM_REG(69)
-	MOV_FROM_REG(70)
-	MOV_FROM_REG(71)
-	MOV_FROM_REG(72)
-	MOV_FROM_REG(73)
-	MOV_FROM_REG(74)
-	MOV_FROM_REG(75)
-	MOV_FROM_REG(76)
-	MOV_FROM_REG(77)
-	MOV_FROM_REG(78)
-	MOV_FROM_REG(79)
-	MOV_FROM_REG(80)
-	MOV_FROM_REG(81)
-	MOV_FROM_REG(82)
-	MOV_FROM_REG(83)
-	MOV_FROM_REG(84)
-	MOV_FROM_REG(85)
-	MOV_FROM_REG(86)
-	MOV_FROM_REG(87)
-	MOV_FROM_REG(88)
-	MOV_FROM_REG(89)
-	MOV_FROM_REG(90)
-	MOV_FROM_REG(91)
-	MOV_FROM_REG(92)
-	MOV_FROM_REG(93)
-	MOV_FROM_REG(94)
-	MOV_FROM_REG(95)
-	MOV_FROM_REG(96)
-	MOV_FROM_REG(97)
-	MOV_FROM_REG(98)
-	MOV_FROM_REG(99)
-	MOV_FROM_REG(100)
-	MOV_FROM_REG(101)
-	MOV_FROM_REG(102)
-	MOV_FROM_REG(103)
-	MOV_FROM_REG(104)
-	MOV_FROM_REG(105)
-	MOV_FROM_REG(106)
-	MOV_FROM_REG(107)
-	MOV_FROM_REG(108)
-	MOV_FROM_REG(109)
-	MOV_FROM_REG(110)
-	MOV_FROM_REG(111)
-	MOV_FROM_REG(112)
-	MOV_FROM_REG(113)
-	MOV_FROM_REG(114)
-	MOV_FROM_REG(115)
-	MOV_FROM_REG(116)
-	MOV_FROM_REG(117)
-	MOV_FROM_REG(118)
-	MOV_FROM_REG(119)
-	MOV_FROM_REG(120)
-	MOV_FROM_REG(121)
-	MOV_FROM_REG(122)
-	MOV_FROM_REG(123)
-	MOV_FROM_REG(124)
-	MOV_FROM_REG(125)
-	MOV_FROM_REG(126)
-	MOV_FROM_REG(127)
-END(asm_mov_from_reg)
-
-
-/* must be in bank 0
- * parameter:
- * r31: pr
- * r24: b0
- */
-ENTRY(kvm_resume_to_guest_with_sync)
-	adds r19=VMM_VPD_BASE_OFFSET,r21
-	mov r16 = r31
-	mov r17 = r24
-	;;
-{.mii
-	ld8 r25 =[r19]
-	nop 0x0
-	mov r24 = ip
-	;;
-}
-{.mmb
-	add r24 =0x20, r24
-	nop 0x0
-	br.sptk.many kvm_vps_sync_write
-}
-
-	mov r31 = r16
-	mov r24 =r17
-	;;
-	br.sptk.many kvm_resume_to_guest
-END(kvm_resume_to_guest_with_sync)
-
-ENTRY(kvm_resume_to_guest)
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 =[r16]
-	adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
-	;;
-	mov r16=cr.ipsr
-	;;
-	ld8 r20 = [r20]
-	adds r19=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r25=[r19]
-	extr.u r17=r16,IA64_PSR_RI_BIT,2
-	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-	;;
-	(p6) mov r18=cr.iip
-	(p6) mov r17=r0
-	;;
-	(p6) add r18=0x10,r18
-	(p7) add r17=1,r17
-	;;
-	(p6) mov cr.iip=r18
-	dep r16=r17,r16,IA64_PSR_RI_BIT,2
-	;;
-	mov cr.ipsr=r16
-	adds r19= VPD_VPSR_START_OFFSET,r25
-	add r28=PAL_VPS_RESUME_NORMAL,r20
-	add r29=PAL_VPS_RESUME_HANDLER,r20
-	;;
-	ld8 r19=[r19]
-	mov b0=r29
-	mov r27=cr.isr
-	;;
-	tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p7=vpsr.ic
-	shr r27=r27,IA64_ISR_IR_BIT
-	;;
-	(p6) ld8 r26=[r25]
-	(p7) mov b0=r28
-	;;
-	(p6) dep r26=r27,r26,63,1
-	mov pr=r31,-2
-	br.sptk.many b0             // call pal service
-	;;
-END(kvm_resume_to_guest)
-
-
-MOV_TO_BANK0_REG(16)
-MOV_TO_BANK0_REG(17)
-MOV_TO_BANK0_REG(18)
-MOV_TO_BANK0_REG(19)
-MOV_TO_BANK0_REG(20)
-MOV_TO_BANK0_REG(21)
-MOV_TO_BANK0_REG(22)
-MOV_TO_BANK0_REG(23)
-MOV_TO_BANK0_REG(24)
-MOV_TO_BANK0_REG(25)
-MOV_TO_BANK0_REG(26)
-MOV_TO_BANK0_REG(27)
-MOV_TO_BANK0_REG(28)
-MOV_TO_BANK0_REG(29)
-MOV_TO_BANK0_REG(30)
-MOV_TO_BANK0_REG(31)
-
-
-// mov to reg table
-ENTRY(asm_mov_to_reg)
-	MOV_TO_REG0
-	MOV_TO_REG(1)
-	MOV_TO_REG(2)
-	MOV_TO_REG(3)
-	MOV_TO_REG(4)
-	MOV_TO_REG(5)
-	MOV_TO_REG(6)
-	MOV_TO_REG(7)
-	MOV_TO_REG(8)
-	MOV_TO_REG(9)
-	MOV_TO_REG(10)
-	MOV_TO_REG(11)
-	MOV_TO_REG(12)
-	MOV_TO_REG(13)
-	MOV_TO_REG(14)
-	MOV_TO_REG(15)
-	JMP_TO_MOV_TO_BANK0_REG(16)
-	JMP_TO_MOV_TO_BANK0_REG(17)
-	JMP_TO_MOV_TO_BANK0_REG(18)
-	JMP_TO_MOV_TO_BANK0_REG(19)
-	JMP_TO_MOV_TO_BANK0_REG(20)
-	JMP_TO_MOV_TO_BANK0_REG(21)
-	JMP_TO_MOV_TO_BANK0_REG(22)
-	JMP_TO_MOV_TO_BANK0_REG(23)
-	JMP_TO_MOV_TO_BANK0_REG(24)
-	JMP_TO_MOV_TO_BANK0_REG(25)
-	JMP_TO_MOV_TO_BANK0_REG(26)
-	JMP_TO_MOV_TO_BANK0_REG(27)
-	JMP_TO_MOV_TO_BANK0_REG(28)
-	JMP_TO_MOV_TO_BANK0_REG(29)
-	JMP_TO_MOV_TO_BANK0_REG(30)
-	JMP_TO_MOV_TO_BANK0_REG(31)
-	MOV_TO_REG(32)
-	MOV_TO_REG(33)
-	MOV_TO_REG(34)
-	MOV_TO_REG(35)
-	MOV_TO_REG(36)
-	MOV_TO_REG(37)
-	MOV_TO_REG(38)
-	MOV_TO_REG(39)
-	MOV_TO_REG(40)
-	MOV_TO_REG(41)
-	MOV_TO_REG(42)
-	MOV_TO_REG(43)
-	MOV_TO_REG(44)
-	MOV_TO_REG(45)
-	MOV_TO_REG(46)
-	MOV_TO_REG(47)
-	MOV_TO_REG(48)
-	MOV_TO_REG(49)
-	MOV_TO_REG(50)
-	MOV_TO_REG(51)
-	MOV_TO_REG(52)
-	MOV_TO_REG(53)
-	MOV_TO_REG(54)
-	MOV_TO_REG(55)
-	MOV_TO_REG(56)
-	MOV_TO_REG(57)
-	MOV_TO_REG(58)
-	MOV_TO_REG(59)
-	MOV_TO_REG(60)
-	MOV_TO_REG(61)
-	MOV_TO_REG(62)
-	MOV_TO_REG(63)
-	MOV_TO_REG(64)
-	MOV_TO_REG(65)
-	MOV_TO_REG(66)
-	MOV_TO_REG(67)
-	MOV_TO_REG(68)
-	MOV_TO_REG(69)
-	MOV_TO_REG(70)
-	MOV_TO_REG(71)
-	MOV_TO_REG(72)
-	MOV_TO_REG(73)
-	MOV_TO_REG(74)
-	MOV_TO_REG(75)
-	MOV_TO_REG(76)
-	MOV_TO_REG(77)
-	MOV_TO_REG(78)
-	MOV_TO_REG(79)
-	MOV_TO_REG(80)
-	MOV_TO_REG(81)
-	MOV_TO_REG(82)
-	MOV_TO_REG(83)
-	MOV_TO_REG(84)
-	MOV_TO_REG(85)
-	MOV_TO_REG(86)
-	MOV_TO_REG(87)
-	MOV_TO_REG(88)
-	MOV_TO_REG(89)
-	MOV_TO_REG(90)
-	MOV_TO_REG(91)
-	MOV_TO_REG(92)
-	MOV_TO_REG(93)
-	MOV_TO_REG(94)
-	MOV_TO_REG(95)
-	MOV_TO_REG(96)
-	MOV_TO_REG(97)
-	MOV_TO_REG(98)
-	MOV_TO_REG(99)
-	MOV_TO_REG(100)
-	MOV_TO_REG(101)
-	MOV_TO_REG(102)
-	MOV_TO_REG(103)
-	MOV_TO_REG(104)
-	MOV_TO_REG(105)
-	MOV_TO_REG(106)
-	MOV_TO_REG(107)
-	MOV_TO_REG(108)
-	MOV_TO_REG(109)
-	MOV_TO_REG(110)
-	MOV_TO_REG(111)
-	MOV_TO_REG(112)
-	MOV_TO_REG(113)
-	MOV_TO_REG(114)
-	MOV_TO_REG(115)
-	MOV_TO_REG(116)
-	MOV_TO_REG(117)
-	MOV_TO_REG(118)
-	MOV_TO_REG(119)
-	MOV_TO_REG(120)
-	MOV_TO_REG(121)
-	MOV_TO_REG(122)
-	MOV_TO_REG(123)
-	MOV_TO_REG(124)
-	MOV_TO_REG(125)
-	MOV_TO_REG(126)
-	MOV_TO_REG(127)
-END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
deleted file mode 100644
index b039874..0000000
--- a/arch/ia64/kvm/process.c
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * process.c: handle interruption inject for guests.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  	Shaofan Li (Susue Li) <susie.li@intel.com>
- *  	Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  	Xiantao Zhang (xiantao.zhang@intel.com)
- */
-#include "vcpu.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/fpswa.h>
-#include <asm/kregs.h>
-#include <asm/tlb.h>
-
-fpswa_interface_t *vmm_fpswa_interface;
-
-#define IA64_VHPT_TRANS_VECTOR			0x0000
-#define IA64_INST_TLB_VECTOR			0x0400
-#define IA64_DATA_TLB_VECTOR			0x0800
-#define IA64_ALT_INST_TLB_VECTOR		0x0c00
-#define IA64_ALT_DATA_TLB_VECTOR		0x1000
-#define IA64_DATA_NESTED_TLB_VECTOR		0x1400
-#define IA64_INST_KEY_MISS_VECTOR		0x1800
-#define IA64_DATA_KEY_MISS_VECTOR		0x1c00
-#define IA64_DIRTY_BIT_VECTOR			0x2000
-#define IA64_INST_ACCESS_BIT_VECTOR		0x2400
-#define IA64_DATA_ACCESS_BIT_VECTOR		0x2800
-#define IA64_BREAK_VECTOR			0x2c00
-#define IA64_EXTINT_VECTOR			0x3000
-#define IA64_PAGE_NOT_PRESENT_VECTOR		0x5000
-#define IA64_KEY_PERMISSION_VECTOR		0x5100
-#define IA64_INST_ACCESS_RIGHTS_VECTOR		0x5200
-#define IA64_DATA_ACCESS_RIGHTS_VECTOR		0x5300
-#define IA64_GENEX_VECTOR			0x5400
-#define IA64_DISABLED_FPREG_VECTOR		0x5500
-#define IA64_NAT_CONSUMPTION_VECTOR		0x5600
-#define IA64_SPECULATION_VECTOR		0x5700 /* UNUSED */
-#define IA64_DEBUG_VECTOR			0x5900
-#define IA64_UNALIGNED_REF_VECTOR		0x5a00
-#define IA64_UNSUPPORTED_DATA_REF_VECTOR	0x5b00
-#define IA64_FP_FAULT_VECTOR			0x5c00
-#define IA64_FP_TRAP_VECTOR			0x5d00
-#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 	0x5e00
-#define IA64_TAKEN_BRANCH_TRAP_VECTOR		0x5f00
-#define IA64_SINGLE_STEP_TRAP_VECTOR		0x6000
-
-/* SDM vol2 5.5 - IVA based interruption handling */
-#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
-			IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |    	\
-			IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
-
-#define DOMN_PAL_REQUEST    0x110000
-#define DOMN_SAL_REQUEST    0x110001
-
-static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
-	0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
-	0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
-	0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
-	0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
-	0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
-	0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
-	0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
-};
-
-static void collect_interruption(struct kvm_vcpu *vcpu)
-{
-	u64 ipsr;
-	u64 vdcr;
-	u64 vifs;
-	unsigned long vpsr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	vpsr = vcpu_get_psr(vcpu);
-	vcpu_bsw0(vcpu);
-	if (vpsr & IA64_PSR_IC) {
-
-		/* Sync mpsr id/da/dd/ss/ed bits to vipsr
-		 * since after guest do rfi, we still want these bits on in
-		 * mpsr
-		 */
-
-		ipsr = regs->cr_ipsr;
-		vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
-					| IA64_PSR_DD | IA64_PSR_SS
-					| IA64_PSR_ED));
-		vcpu_set_ipsr(vcpu, vpsr);
-
-		/* Currently, for trap, we do not advance IIP to next
-		 * instruction. That's because we assume caller already
-		 * set up IIP correctly
-		 */
-
-		vcpu_set_iip(vcpu , regs->cr_iip);
-
-		/* set vifs.v to zero */
-		vifs = VCPU(vcpu, ifs);
-		vifs &= ~IA64_IFS_V;
-		vcpu_set_ifs(vcpu, vifs);
-
-		vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
-	}
-
-	vdcr = VCPU(vcpu, dcr);
-
-	/* Set guest psr
-	 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
-	 * be: set to the value of dcr.be
-	 * pp: set to the value of dcr.pp
-	 */
-	vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
-	vpsr |= (vdcr & IA64_DCR_BE);
-
-	/* VDCR pp bit position is different from VPSR pp bit */
-	if (vdcr & IA64_DCR_PP) {
-		vpsr |= IA64_PSR_PP;
-	} else {
-		vpsr &= ~IA64_PSR_PP;
-	}
-
-	vcpu_set_psr(vcpu, vpsr);
-
-}
-
-void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
-{
-	u64 viva;
-	struct kvm_pt_regs *regs;
-	union ia64_isr pt_isr;
-
-	regs = vcpu_regs(vcpu);
-
-	/* clear cr.isr.ir (incomplete register frame)*/
-	pt_isr.val = VMX(vcpu, cr_isr);
-	pt_isr.ir = 0;
-	VMX(vcpu, cr_isr) = pt_isr.val;
-
-	collect_interruption(vcpu);
-
-	viva = vcpu_get_iva(vcpu);
-	regs->cr_iip = viva + vec;
-}
-
-static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
-{
-	union ia64_rr rr, rr1;
-
-	rr.val = vcpu_get_rr(vcpu, ifa);
-	rr1.val = 0;
-	rr1.ps = rr.ps;
-	rr1.rid = rr.rid;
-	return (rr1.val);
-}
-
-/*
- * Set vIFA & vITIR & vIHA, when vPSR.ic =1
- * Parameter:
- *  set_ifa: if true, set vIFA
- *  set_itir: if true, set vITIR
- *  set_iha: if true, set vIHA
- */
-void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
-		int set_ifa, int set_itir, int set_iha)
-{
-	long vpsr;
-	u64 value;
-
-	vpsr = VCPU(vcpu, vpsr);
-	/* Vol2, Table 8-1 */
-	if (vpsr & IA64_PSR_IC) {
-		if (set_ifa)
-			vcpu_set_ifa(vcpu, vadr);
-		if (set_itir) {
-			value = vcpu_get_itir_on_fault(vcpu, vadr);
-			vcpu_set_itir(vcpu, value);
-		}
-
-		if (set_iha) {
-			value = vcpu_thash(vcpu, vadr);
-			vcpu_set_iha(vcpu, value);
-		}
-	}
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
-}
-
-/*
- * Instruction TLB Fault
- *  @ Instruction TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
-}
-
-/*
- * Data Nested TLB Fault
- *  @ Data Nested TLB Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void nested_dtlb(struct kvm_vcpu *vcpu)
-{
-	inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
-}
-
-/*
- * Alternate Data TLB Fault
- *  @ Alternate Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
-}
-
-/* Deal with:
- *  VHPT Translation Vector
- */
-static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA*/
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-}
-
-/*
- * VHPT Instruction Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_vhpt_fault(vcpu, vadr);
-}
-
-/*
- * VHPT Data Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_vhpt_fault(vcpu, vadr);
-}
-
-/*
- * Deal with:
- *  General Exception vector
- */
-void _general_exception(struct kvm_vcpu *vcpu)
-{
-	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
-}
-
-/*
- * Illegal Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_op(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Illegal Dependency Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_dep(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Reserved Register/Field Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rsv_reg_field(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-/*
- * Privileged Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-
-void privilege_op(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Unimplement Data Address Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void unimpl_daddr(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Privileged Register Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void privilege_reg(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/* Deal with
- *  Nat consumption vector
- * Parameter:
- *  vaddr: Optional, if t == REGISTER
- */
-static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
-						enum tlb_miss_type t)
-{
-	/* If vPSR.ic && t == DATA/INST, IFA */
-	if (t == DATA || t == INSTRUCTION) {
-		/* IFA */
-		set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
-	}
-
-	inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
-}
-
-/*
- * Instruction Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_nat_consumption_fault(vcpu, vadr, INSTRUCTION);
-}
-
-/*
- * Register Nat Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rnat_consumption(struct kvm_vcpu *vcpu)
-{
-	_nat_consumption_fault(vcpu, 0, REGISTER);
-}
-
-/*
- * Data Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_nat_consumption_fault(vcpu, vadr, DATA);
-}
-
-/* Deal with
- *  Page not present vector
- */
-static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
-}
-
-void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	__page_not_present(vcpu, vadr);
-}
-
-void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	__page_not_present(vcpu, vadr);
-}
-
-/* Deal with
- *  Data access rights vector
- */
-void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
-}
-
-fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
-		unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
-		unsigned long *ifs, struct kvm_pt_regs *regs)
-{
-	fp_state_t fp_state;
-	fpswa_ret_t ret;
-	struct kvm_vcpu *vcpu = current_vcpu;
-
-	uint64_t old_rr7 = ia64_get_rr(7UL<<61);
-
-	if (!vmm_fpswa_interface)
-		return (fpswa_ret_t) {-1, 0, 0, 0};
-
-	memset(&fp_state, 0, sizeof(fp_state_t));
-
-	/*
-	 * compute fp_state.  only FP registers f6 - f11 are used by the
-	 * vmm, so set those bits in the mask and set the low volatile
-	 * pointer to point to these registers.
-	 */
-	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
-
-	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
-
-   /*
-	 * unsigned long (*EFI_FPSWA) (
-	 *      unsigned long    trap_type,
-	 *      void             *Bundle,
-	 *      unsigned long    *pipsr,
-	 *      unsigned long    *pfsr,
-	 *      unsigned long    *pisr,
-	 *      unsigned long    *ppreds,
-	 *      unsigned long    *pifs,
-	 *      void             *fp_state);
-	 */
-	/*Call host fpswa interface directly to virtualize
-	 *guest fpswa request!
-	 */
-	ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
-	ia64_srlz_d();
-
-	ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
-			ipsr, fpsr, isr, pr, ifs, &fp_state);
-	ia64_set_rr(7UL << 61, old_rr7);
-	ia64_srlz_d();
-	return ret;
-}
-
-/*
- * Handle floating-point assist faults and traps for domain.
- */
-unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
-					unsigned long isr)
-{
-	struct kvm_vcpu *v = current_vcpu;
-	IA64_BUNDLE bundle;
-	unsigned long fault_ip;
-	fpswa_ret_t ret;
-
-	fault_ip = regs->cr_iip;
-	/*
-	 * When the FP trap occurs, the trapping instruction is completed.
-	 * If ipsr.ri == 0, there is the trapping instruction in previous
-	 * bundle.
-	 */
-	if (!fp_fault && (ia64_psr(regs)->ri == 0))
-		fault_ip -= 16;
-
-	if (fetch_code(v, fault_ip, &bundle))
-		return -EAGAIN;
-
-	if (!bundle.i64[0] && !bundle.i64[1])
-		return -EACCES;
-
-	ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
-			&isr, &regs->pr, &regs->cr_ifs, regs);
-	return ret.status;
-}
-
-void reflect_interruption(u64 ifa, u64 isr, u64 iim,
-		u64 vec, struct kvm_pt_regs *regs)
-{
-	u64 vector;
-	int status ;
-	struct kvm_vcpu *vcpu = current_vcpu;
-	u64 vpsr = VCPU(vcpu, vpsr);
-
-	vector = vec2off[vec];
-
-	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
-						"with psr.ic = 0\n", vector);
-		return;
-	}
-
-	switch (vec) {
-	case 32: 	/*IA64_FP_FAULT_VECTOR*/
-		status = vmm_handle_fpu_swa(1, regs, isr);
-		if (!status) {
-			vcpu_increment_iip(vcpu);
-			return;
-		} else if (-EAGAIN == status)
-			return;
-		break;
-	case 33:	/*IA64_FP_TRAP_VECTOR*/
-		status = vmm_handle_fpu_swa(0, regs, isr);
-		if (!status)
-			return ;
-		break;
-	}
-
-	VCPU(vcpu, isr) = isr;
-	VCPU(vcpu, iipa) = regs->cr_iip;
-	if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
-		VCPU(vcpu, iim) = iim;
-	else
-		set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
-
-	inject_guest_interruption(vcpu, vector);
-}
-
-static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
-						unsigned long arg)
-{
-	struct thash_data *data;
-	unsigned long gpa, poff;
-
-	if (!is_physical_mode(vcpu)) {
-		/* Depends on caller to provide the DTR or DTC mapping.*/
-		data = vtlb_lookup(vcpu, arg, D_TLB);
-		if (data)
-			gpa = data->page_flags & _PAGE_PPN_MASK;
-		else {
-			data = vhpt_lookup(arg);
-			if (!data)
-				return 0;
-			gpa = data->gpaddr & _PAGE_PPN_MASK;
-		}
-
-		poff = arg & (PSIZE(data->ps) - 1);
-		arg = PAGEALIGN(gpa, data->ps) | poff;
-	}
-	arg = kvm_gpa_to_mpa(arg << 1 >> 1);
-
-	return (unsigned long)__va(arg);
-}
-
-static void set_pal_call_data(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long gr28 = vcpu_get_gr(vcpu, 28);
-	unsigned long gr29 = vcpu_get_gr(vcpu, 29);
-	unsigned long gr30 = vcpu_get_gr(vcpu, 30);
-
-	/*FIXME:For static and stacked convention, firmware
-	 * has put the parameters in gr28-gr31 before
-	 * break to vmm  !!*/
-
-	switch (gr28) {
-	case PAL_PERF_MON_INFO:
-	case PAL_HALT_INFO:
-		p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
-		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-		break;
-	case PAL_BRAND_INFO:
-		p->u.pal_data.gr29 = gr29;
-		p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
-		break;
-	default:
-		p->u.pal_data.gr29 = gr29;
-		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-	}
-	p->u.pal_data.gr28 = gr28;
-	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
-
-	p->exit_reason = EXIT_REASON_PAL_CALL;
-}
-
-static void get_pal_call_result(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-		vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
-		vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
-		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
-		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
-	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-static void set_sal_call_data(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
-	p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
-	p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
-	p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
-	p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
-	p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
-	p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
-	p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
-	p->exit_reason = EXIT_REASON_SAL_CALL;
-}
-
-static void get_sal_call_result(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
-		vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
-		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
-		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
-	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
-		unsigned long isr, unsigned long iim)
-{
-	struct kvm_vcpu *v = current_vcpu;
-	long psr;
-
-	if (ia64_psr(regs)->cpl == 0) {
-		/* Allow hypercalls only when cpl = 0.  */
-		if (iim == DOMN_PAL_REQUEST) {
-			local_irq_save(psr);
-			set_pal_call_data(v);
-			vmm_transition(v);
-			get_pal_call_result(v);
-			vcpu_increment_iip(v);
-			local_irq_restore(psr);
-			return;
-		} else if (iim == DOMN_SAL_REQUEST) {
-			local_irq_save(psr);
-			set_sal_call_data(v);
-			vmm_transition(v);
-			get_sal_call_result(v);
-			vcpu_increment_iip(v);
-			local_irq_restore(psr);
-			return;
-		}
-	}
-	reflect_interruption(ifa, isr, iim, 11, regs);
-}
-
-void check_pending_irq(struct kvm_vcpu *vcpu)
-{
-	int  mask, h_pending, h_inservice;
-	u64 isr;
-	unsigned long  vpsr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	h_pending = highest_pending_irq(vcpu);
-	if (h_pending == NULL_VECTOR) {
-		update_vhpi(vcpu, NULL_VECTOR);
-		return;
-	}
-	h_inservice = highest_inservice_irq(vcpu);
-
-	vpsr = VCPU(vcpu, vpsr);
-	mask = irq_masked(vcpu, h_pending, h_inservice);
-	if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
-		isr = vpsr & IA64_PSR_RI;
-		update_vhpi(vcpu, h_pending);
-		reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-	} else if (mask == IRQ_MASKED_BY_INSVC) {
-		if (VCPU(vcpu, vhpi))
-			update_vhpi(vcpu, NULL_VECTOR);
-	} else {
-		/* masked by vpsr.i or vtpr.*/
-		update_vhpi(vcpu, h_pending);
-	}
-}
-
-static void generate_exirq(struct kvm_vcpu *vcpu)
-{
-	unsigned  vpsr;
-	uint64_t isr;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	vpsr = VCPU(vcpu, vpsr);
-	isr = vpsr & IA64_PSR_RI;
-	if (!(vpsr & IA64_PSR_IC))
-		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
-	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-}
-
-void vhpi_detection(struct kvm_vcpu *vcpu)
-{
-	uint64_t    threshold, vhpi;
-	union ia64_tpr       vtpr;
-	struct ia64_psr vpsr;
-
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	vtpr.val = VCPU(vcpu, tpr);
-
-	threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
-	vhpi = VCPU(vcpu, vhpi);
-	if (vhpi > threshold) {
-		/* interrupt actived*/
-		generate_exirq(vcpu);
-	}
-}
-
-void leave_hypervisor_tail(void)
-{
-	struct kvm_vcpu *v = current_vcpu;
-
-	if (VMX(v, timer_check)) {
-		VMX(v, timer_check) = 0;
-		if (VMX(v, itc_check)) {
-			if (vcpu_get_itc(v) > VCPU(v, itm)) {
-				if (!(VCPU(v, itv) & (1 << 16))) {
-					vcpu_pend_interrupt(v, VCPU(v, itv)
-							& 0xff);
-					VMX(v, itc_check) = 0;
-				} else {
-					v->arch.timer_pending = 1;
-				}
-				VMX(v, last_itc) = VCPU(v, itm) + 1;
-			}
-		}
-	}
-
-	rmb();
-	if (v->arch.irq_new_pending) {
-		v->arch.irq_new_pending = 0;
-		VMX(v, irq_check) = 0;
-		check_pending_irq(v);
-		return;
-	}
-	if (VMX(v, irq_check)) {
-		VMX(v, irq_check) = 0;
-		vhpi_detection(v);
-	}
-}
-
-static inline void handle_lds(struct kvm_pt_regs *regs)
-{
-	regs->cr_ipsr |= IA64_PSR_ED;
-}
-
-void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
-{
-	unsigned long pte;
-	union ia64_rr rr;
-
-	rr.val = ia64_get_rr(vadr);
-	pte =  vadr & _PAGE_PPN_MASK;
-	pte = pte | PHY_PAGE_WB;
-	thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
-	return;
-}
-
-void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
-{
-	unsigned long vpsr;
-	int type;
-
-	u64 vhpt_adr, gppa, pteval, rr, itir;
-	union ia64_isr misr;
-	union ia64_pta vpta;
-	struct thash_data *data;
-	struct kvm_vcpu *v = current_vcpu;
-
-	vpsr = VCPU(v, vpsr);
-	misr.val = VMX(v, cr_isr);
-
-	type = vec;
-
-	if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
-		if (vec == 2) {
-			if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
-				emulate_io_inst(v, ((vadr << 1) >> 1), 4);
-				return;
-			}
-		}
-		physical_tlb_miss(v, vadr, type);
-		return;
-	}
-	data = vtlb_lookup(v, vadr, type);
-	if (data != 0) {
-		if (type == D_TLB) {
-			gppa = (vadr & ((1UL << data->ps) - 1))
-				+ (data->ppn >> (data->ps - 12) << data->ps);
-			if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
-				if (data->pl >= ((regs->cr_ipsr >>
-						IA64_PSR_CPL0_BIT) & 3))
-					emulate_io_inst(v, gppa, data->ma);
-				else {
-					vcpu_set_isr(v, misr.val);
-					data_access_rights(v, vadr);
-				}
-				return ;
-			}
-		}
-		thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
-
-	} else if (type == D_TLB) {
-		if (misr.sp) {
-			handle_lds(regs);
-			return;
-		}
-
-		rr = vcpu_get_rr(v, vadr);
-		itir = rr & (RR_RID_MASK | RR_PS_MASK);
-
-		if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
-			if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				alt_dtlb(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-			return ;
-		}
-
-		vpta.val = vcpu_get_pta(v);
-		/* avoid recursively walking (short format) VHPT */
-
-		vhpt_adr = vcpu_thash(v, vadr);
-		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-			/* VHPT successfully read.  */
-			if (!(pteval & _PAGE_P)) {
-				if (vpsr & IA64_PSR_IC) {
-					vcpu_set_isr(v, misr.val);
-					dtlb_fault(v, vadr);
-				} else {
-					nested_dtlb(v);
-				}
-			} else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
-				thash_purge_and_insert(v, pteval, itir,
-								vadr, D_TLB);
-			} else if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				dtlb_fault(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-		} else {
-			/* Can't read VHPT.  */
-			if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				dvhpt_fault(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-		}
-	} else if (type == I_TLB) {
-		if (!(vpsr & IA64_PSR_IC))
-			misr.ni = 1;
-		if (!vhpt_enabled(v, vadr, INST_REF)) {
-			vcpu_set_isr(v, misr.val);
-			alt_itlb(v, vadr);
-			return;
-		}
-
-		vpta.val = vcpu_get_pta(v);
-
-		vhpt_adr = vcpu_thash(v, vadr);
-		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-			/* VHPT successfully read.  */
-			if (pteval & _PAGE_P) {
-				if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
-					vcpu_set_isr(v, misr.val);
-					itlb_fault(v, vadr);
-					return ;
-				}
-				rr = vcpu_get_rr(v, vadr);
-				itir = rr & (RR_RID_MASK | RR_PS_MASK);
-				thash_purge_and_insert(v, pteval, itir,
-							vadr, I_TLB);
-			} else {
-				vcpu_set_isr(v, misr.val);
-				inst_page_not_present(v, vadr);
-			}
-		} else {
-			vcpu_set_isr(v, misr.val);
-			ivhpt_fault(v, vadr);
-		}
-	}
-}
-
-void kvm_vexirq(struct kvm_vcpu *vcpu)
-{
-	u64 vpsr, isr;
-	struct kvm_pt_regs *regs;
-
-	regs = vcpu_regs(vcpu);
-	vpsr = VCPU(vcpu, vpsr);
-	isr = vpsr & IA64_PSR_RI;
-	reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
-}
-
-void kvm_ia64_handle_irq(struct kvm_vcpu *v)
-{
-	struct exit_ctl_data *p = &v->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-	vmm_transition(v);
-	local_irq_restore(psr);
-
-	VMX(v, timer_check) = 1;
-
-}
-
-static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
-{
-	u64 oldrid, moldrid, oldpsbits, vaddr;
-	struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
-	vaddr = p->vaddr;
-
-	oldrid = VMX(v, vrr[0]);
-	VMX(v, vrr[0]) = p->rr;
-	oldpsbits = VMX(v, psbits[0]);
-	VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
-	moldrid = ia64_get_rr(0x0);
-	ia64_set_rr(0x0, vrrtomrr(p->rr));
-	ia64_srlz_d();
-
-	vaddr = PAGEALIGN(vaddr, p->ps);
-	thash_purge_entries_remote(v, vaddr, p->ps);
-
-	VMX(v, vrr[0]) = oldrid;
-	VMX(v, psbits[0]) = oldpsbits;
-	ia64_set_rr(0x0, moldrid);
-	ia64_dv_serialize_data();
-}
-
-static void vcpu_do_resume(struct kvm_vcpu *vcpu)
-{
-	/*Re-init VHPT and VTLB once from resume*/
-	vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
-	thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
-	vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
-	thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
-
-	ia64_set_pta(vcpu->arch.vhpt.pta.val);
-}
-
-static void vmm_sanity_check(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
-		panic_vm(vcpu, "Failed to do vmm sanity check,"
-			"it maybe caused by crashed vmm!!\n\n");
-	}
-}
-
-static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
-{
-	vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
-
-	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
-		vcpu_do_resume(vcpu);
-		return;
-	}
-
-	if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
-		thash_purge_all(vcpu);
-		return;
-	}
-
-	if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
-		while (vcpu->arch.ptc_g_count > 0)
-			ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
-	}
-}
-
-void vmm_transition(struct kvm_vcpu *vcpu)
-{
-	ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
-			1, 0, 0, 0, 0, 0);
-	vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
-	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
-						1, 0, 0, 0, 0, 0);
-	kvm_do_resume_op(vcpu);
-}
-
-void vmm_panic_handler(u64 vec)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	vmm_sanity = 0;
-	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
-			vec2off[vec]);
-}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
deleted file mode 100644
index 30897d4..0000000
--- a/arch/ia64/kvm/trampoline.S
+++ /dev/null
@@ -1,1038 +0,0 @@
-/* Save all processor states
- *
- * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
- * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include "asm-offsets.h"
-
-
-#define CTX(name)    VMM_CTX_##name##_OFFSET
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_BRANCH_REGS			\
-	add	r2 = CTX(B0),r32;		\
-	add	r3 = CTX(B1),r32;		\
-	mov	r16 = b0;			\
-	mov	r17 = b1;			\
-	;;					\
-	st8	[r2]=r16,16;			\
-	st8	[r3]=r17,16;			\
-	;;					\
-	mov	r16 = b2;			\
-	mov	r17 = b3;			\
-	;;					\
-	st8	[r2]=r16,16;			\
-	st8	[r3]=r17,16;			\
-	;;					\
-	mov	r16 = b4;			\
-	mov	r17 = b5;			\
-	;;					\
-	st8	[r2]=r16;   			\
-	st8	[r3]=r17;   			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_BRANCH_REGS			\
-	add	r2 = CTX(B0),r33;		\
-	add	r3 = CTX(B1),r33;		\
-	;;					\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	b0 = r16;			\
-	mov	b1 = r17;			\
-	;;					\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	b2 = r16;			\
-	mov	b3 = r17;			\
-	;;					\
-	ld8	r16=[r2];   			\
-	ld8	r17=[r3];   			\
-	;;					\
-	mov	b4=r16;				\
-	mov	b5=r17;				\
-	;;
-
-
-	/*
-	 *	r32: context_t base address
-	 *	bsw == 1
-	 *	Save all bank1 general registers, r4 ~ r7
-	 */
-#define	SAVE_GENERAL_REGS			\
-	add	r2=CTX(R4),r32;			\
-	add	r3=CTX(R5),r32;			\
-	;;					\
-.mem.offset 0,0;        			\
-	st8.spill	[r2]=r4,16;		\
-.mem.offset 8,0;        			\
-	st8.spill	[r3]=r5,16;		\
-	;;					\
-.mem.offset 0,0;        			\
-	st8.spill	[r2]=r6,48;		\
-.mem.offset 8,0;        			\
-	st8.spill	[r3]=r7,48;		\
-	;;                          		\
-.mem.offset 0,0;        			\
-    st8.spill    [r2]=r12;			\
-.mem.offset 8,0;				\
-    st8.spill    [r3]=r13;			\
-    ;;
-
-	/*
-	 *	r33: context_t base address
-	 *	bsw == 1
-	 */
-#define	RESTORE_GENERAL_REGS			\
-	add	r2=CTX(R4),r33;			\
-	add	r3=CTX(R5),r33;			\
-	;;					\
-	ld8.fill	r4=[r2],16;		\
-	ld8.fill	r5=[r3],16;		\
-	;;					\
-	ld8.fill	r6=[r2],48;		\
-	ld8.fill	r7=[r3],48;		\
-	;;					\
-	ld8.fill    r12=[r2];			\
-	ld8.fill    r13 =[r3];			\
-	;;
-
-
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_KERNEL_REGS			\
-	add	r2 = CTX(KR0),r32;		\
-	add	r3 = CTX(KR1),r32;		\
-	mov	r16 = ar.k0;			\
-	mov	r17 = ar.k1;			\
-	;;		        		\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;		        		\
-	mov	r16 = ar.k2;			\
-	mov	r17 = ar.k3;			\
-	;;		        		\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;					\
-	mov	r16 = ar.k4;			\
-	mov	r17 = ar.k5;			\
-	;;				    	\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;					\
-	mov	r16 = ar.k6;			\
-	mov	r17 = ar.k7;			\
-	;;		    			\
-	st8	[r2] = r16;     		\
-	st8	[r3] = r17;			\
-	;;
-
-
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_KERNEL_REGS			\
-	add	r2 = CTX(KR0),r33;		\
-	add	r3 = CTX(KR1),r33;		\
-	;;		    			\
-	ld8	r16=[r2],16;     		\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k0=r16;  			\
-	mov	ar.k1=r17;	    		\
-	;;		        		\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;		        		\
-	mov	ar.k2=r16;   			\
-	mov	ar.k3=r17;	    		\
-	;;		        		\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k4=r16;			\
-	mov	ar.k5=r17;	    		\
-	;;				    	\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k6=r16;  			\
-	mov	ar.k7=r17;	    		\
-	;;
-
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_APP_REGS				\
-	add  r2 = CTX(BSPSTORE),r32;		\
-	mov  r16 = ar.bspstore;			\
-	;;					\
-	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
-	mov  r16 = ar.rnat;			\
-	;;					\
-	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
-	mov  r16 = ar.fcr;			\
-	;;					\
-	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
-	mov  r16 = ar.eflag;			\
-	;;					\
-	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
-	mov  r16 = ar.cflg;			\
-	;;					\
-	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
-	mov  r16 = ar.fsr;			\
-	;;					\
-	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
-	mov  r16 = ar.fir;			\
-	;;					\
-	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
-	mov  r16 = ar.fdr;			\
-	;;					\
-	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
-	mov  r16 = ar.unat;			\
-	;;					\
-	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
-	mov  r16 = ar.fpsr;			\
-	;;					\
-	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
-	mov  r16 = ar.pfs;			\
-	;;					\
-	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
-	mov  r16 = ar.lc;			\
-	;;					\
-	st8  [r2] = r16;			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_APP_REGS			\
-	add  r2=CTX(BSPSTORE),r33;		\
-	;;					\
-	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
-	;;					\
-	mov  ar.bspstore=r16;			\
-	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
-	;;					\
-	mov  ar.rnat=r16;			\
-	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
-	;;					\
-	mov  ar.fcr=r16;			\
-	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
-	;;					\
-	mov  ar.eflag=r16;			\
-	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
-	;;					\
-	mov  ar.cflg=r16;			\
-	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
-	;;					\
-	mov  ar.fsr=r16;			\
-	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
-	;;					\
-	mov  ar.fir=r16;			\
-	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
-	;;					\
-	mov  ar.fdr=r16;			\
-	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
-	;;					\
-	mov  ar.unat=r16;			\
-	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
-	;;					\
-	mov  ar.fpsr=r16;			\
-	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
-	;;					\
-	mov  ar.pfs=r16;			\
-	ld8  r16=[r2];				\
-	;;					\
-	mov  ar.lc=r16;				\
-	;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_CTL_REGS				\
-	add	r2 = CTX(DCR),r32;		\
-	mov	r16 = cr.dcr;			\
-	;;					\
-	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
-	;;                          		\
-	mov	r16 = cr.iva;			\
-	;;					\
-	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
-	;;					\
-	mov r16 = cr.pta;			\
-	;;					\
-	st8 [r2] = r16 ;			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_CTL_REGS				\
-	add	r2 = CTX(DCR),r33;	        	\
-	;;						\
-	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
-	;;                      			\
-	mov	cr.dcr = r16;				\
-	dv_serialize_data;				\
-	;;						\
-	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
-	;;						\
-	mov	cr.iva = r16;				\
-	dv_serialize_data;				\
-	;;						\
-	ld8 r16 = [r2];					\
-	;;						\
-	mov cr.pta = r16;				\
-	dv_serialize_data;				\
-	;;
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_REGION_REGS			\
-	add	r2=CTX(RR0),r32;		\
-	mov	r16=rr[r0];			\
-	dep.z	r18=1,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=2,61,3;			\
-	;;					\
-	st8	[r2]=r17,8;			\
-	mov	r16=rr[r18];			\
-	dep.z	r18=3,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=4,61,3;			\
-	;;					\
-	st8	[r2]=r17,8;			\
-	mov	r16=rr[r18];			\
-	dep.z	r18=5,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=7,61,3;			\
-	;;					\
-	st8	[r2]=r17,16;			\
-	mov	r16=rr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	;;
-
-	/*
-	 *	r33:context_t base address
-	 */
-#define	RESTORE_REGION_REGS	\
-	add	r2=CTX(RR0),r33;\
-	mov r18=r0;		\
-	;;			\
-	ld8	r20=[r2],8;	\
-	;;	/* rr0 */	\
-	ld8	r21=[r2],8;	\
-	;;	/* rr1 */	\
-	ld8	r22=[r2],8;	\
-	;;	/* rr2 */	\
-	ld8	r23=[r2],8;	\
-	;;	/* rr3 */	\
-	ld8	r24=[r2],8;	\
-	;;	/* rr4 */	\
-	ld8	r25=[r2],16;	\
-	;;	/* rr5 */	\
-	ld8	r27=[r2];	\
-	;;	/* rr7 */	\
-	mov rr[r18]=r20;	\
-	dep.z	r18=1,61,3;	\
-	;;  /* rr1 */		\
-	mov rr[r18]=r21;	\
-	dep.z	r18=2,61,3;	\
-	;;  /* rr2 */		\
-	mov rr[r18]=r22;	\
-	dep.z	r18=3,61,3;	\
-	;;  /* rr3 */		\
-	mov rr[r18]=r23;	\
-	dep.z	r18=4,61,3;	\
-	;;  /* rr4 */		\
-	mov rr[r18]=r24;	\
-	dep.z	r18=5,61,3;	\
-	;;  /* rr5 */		\
-	mov rr[r18]=r25;	\
-	dep.z	r18=7,61,3;	\
-	;;  /* rr7 */		\
-	mov rr[r18]=r27;	\
-	;;			\
-	srlz.i;			\
-	;;
-
-
-
-	/*
-	 *	r32:	context_t base address
-	 *	r36~r39:scratch registers
-	 */
-#define	SAVE_DEBUG_REGS				\
-	add	r2=CTX(IBR0),r32;		\
-	add	r3=CTX(DBR0),r32;		\
-	mov	r16=ibr[r0];			\
-	mov	r17=dbr[r0];			\
-	;;					\
-	st8	[r2]=r16,8; 			\
-	st8	[r3]=r17,8;	    		\
-	add	r18=1,r0;		    	\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=2,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=2,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=3,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=4,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=5,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=6,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=7,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	;;
-
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_DEBUG_REGS			\
-	add	r2=CTX(IBR0),r33;		\
-	add	r3=CTX(DBR0),r33;		\
-	mov r16=7;    				\
-	mov r17=r0;				\
-	;;                    			\
-	mov ar.lc = r16;			\
-	;; 					\
-1:						\
-	ld8 r18=[r2],8;		    		\
-	ld8 r19=[r3],8;				\
-	;;					\
-	mov ibr[r17]=r18;			\
-	mov dbr[r17]=r19;			\
-	;;   					\
-	srlz.i;					\
-	;; 					\
-	add r17=1,r17;				\
-	br.cloop.sptk 1b;			\
-	;;
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_FPU_LOW				\
-	add	r2=CTX(F2),r32;			\
-	add	r3=CTX(F3),r32;			\
-	;;					\
-	stf.spill.nta	[r2]=f2,32;		\
-	stf.spill.nta	[r3]=f3,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f4,32;		\
-	stf.spill.nta	[r3]=f5,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f6,32;		\
-	stf.spill.nta	[r3]=f7,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f8,32;		\
-	stf.spill.nta	[r3]=f9,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f10,32;		\
-	stf.spill.nta	[r3]=f11,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f12,32;		\
-	stf.spill.nta	[r3]=f13,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f14,32;		\
-	stf.spill.nta	[r3]=f15,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f16,32;		\
-	stf.spill.nta	[r3]=f17,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f18,32;		\
-	stf.spill.nta	[r3]=f19,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f20,32;		\
-	stf.spill.nta	[r3]=f21,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f22,32;		\
-	stf.spill.nta	[r3]=f23,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f24,32;		\
-	stf.spill.nta	[r3]=f25,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f26,32;		\
-	stf.spill.nta	[r3]=f27,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f28,32;		\
-	stf.spill.nta	[r3]=f29,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f30;		\
-	stf.spill.nta	[r3]=f31;		\
-	;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_FPU_HIGH				\
-	add	r2=CTX(F32),r32;		\
-	add	r3=CTX(F33),r32;		\
-	;;					\
-	stf.spill.nta	[r2]=f32,32;		\
-	stf.spill.nta	[r3]=f33,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f34,32;		\
-	stf.spill.nta	[r3]=f35,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f36,32;		\
-	stf.spill.nta	[r3]=f37,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f38,32;		\
-	stf.spill.nta	[r3]=f39,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f40,32;		\
-	stf.spill.nta	[r3]=f41,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f42,32;		\
-	stf.spill.nta	[r3]=f43,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f44,32;		\
-	stf.spill.nta	[r3]=f45,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f46,32;		\
-	stf.spill.nta	[r3]=f47,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f48,32;		\
-	stf.spill.nta	[r3]=f49,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f50,32;		\
-	stf.spill.nta	[r3]=f51,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f52,32;		\
-	stf.spill.nta	[r3]=f53,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f54,32;		\
-	stf.spill.nta	[r3]=f55,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f56,32;		\
-	stf.spill.nta	[r3]=f57,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f58,32;		\
-	stf.spill.nta	[r3]=f59,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f60,32;		\
-	stf.spill.nta	[r3]=f61,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f62,32;		\
-	stf.spill.nta	[r3]=f63,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f64,32;		\
-	stf.spill.nta	[r3]=f65,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f66,32;		\
-	stf.spill.nta	[r3]=f67,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f68,32;		\
-	stf.spill.nta	[r3]=f69,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f70,32;		\
-	stf.spill.nta	[r3]=f71,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f72,32;		\
-	stf.spill.nta	[r3]=f73,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f74,32;		\
-	stf.spill.nta	[r3]=f75,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f76,32;		\
-	stf.spill.nta	[r3]=f77,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f78,32;		\
-	stf.spill.nta	[r3]=f79,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f80,32;		\
-	stf.spill.nta	[r3]=f81,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f82,32;		\
-	stf.spill.nta	[r3]=f83,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f84,32;		\
-	stf.spill.nta	[r3]=f85,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f86,32;		\
-	stf.spill.nta	[r3]=f87,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f88,32;		\
-	stf.spill.nta	[r3]=f89,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f90,32;		\
-	stf.spill.nta	[r3]=f91,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f92,32;		\
-	stf.spill.nta	[r3]=f93,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f94,32;		\
-	stf.spill.nta	[r3]=f95,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f96,32;		\
-	stf.spill.nta	[r3]=f97,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f98,32;		\
-	stf.spill.nta	[r3]=f99,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f100,32;		\
-	stf.spill.nta	[r3]=f101,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f102,32;		\
-	stf.spill.nta	[r3]=f103,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f104,32;		\
-	stf.spill.nta	[r3]=f105,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f106,32;		\
-	stf.spill.nta	[r3]=f107,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f108,32;		\
-	stf.spill.nta	[r3]=f109,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f110,32;		\
-	stf.spill.nta	[r3]=f111,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f112,32;		\
-	stf.spill.nta	[r3]=f113,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f114,32;		\
-	stf.spill.nta	[r3]=f115,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f116,32;		\
-	stf.spill.nta	[r3]=f117,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f118,32;		\
-	stf.spill.nta	[r3]=f119,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f120,32;		\
-	stf.spill.nta	[r3]=f121,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f122,32;		\
-	stf.spill.nta	[r3]=f123,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f124,32;		\
-	stf.spill.nta	[r3]=f125,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f126;		\
-	stf.spill.nta	[r3]=f127;		\
-	;;
-
-     /*
-      *      r33:    point to context_t structure
-      */
-#define	RESTORE_FPU_LOW				\
-    add     r2 = CTX(F2), r33;			\
-    add     r3 = CTX(F3), r33;			\
-    ;;						\
-    ldf.fill.nta f2 = [r2], 32;			\
-    ldf.fill.nta f3 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f4 = [r2], 32;			\
-    ldf.fill.nta f5 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f6 = [r2], 32;			\
-    ldf.fill.nta f7 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f8 = [r2], 32;			\
-    ldf.fill.nta f9 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f10 = [r2], 32;		\
-    ldf.fill.nta f11 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f12 = [r2], 32;		\
-    ldf.fill.nta f13 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f14 = [r2], 32;		\
-    ldf.fill.nta f15 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f16 = [r2], 32;		\
-    ldf.fill.nta f17 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f18 = [r2], 32;		\
-    ldf.fill.nta f19 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f20 = [r2], 32;		\
-    ldf.fill.nta f21 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f22 = [r2], 32;		\
-    ldf.fill.nta f23 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f24 = [r2], 32;		\
-    ldf.fill.nta f25 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f26 = [r2], 32;		\
-    ldf.fill.nta f27 = [r3], 32;		\
-	;;					\
-    ldf.fill.nta f28 = [r2], 32;		\
-    ldf.fill.nta f29 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f30 = [r2], 32;		\
-    ldf.fill.nta f31 = [r3], 32;		\
-    ;;
-
-
-
-    /*
-     *      r33:    point to context_t structure
-     */
-#define	RESTORE_FPU_HIGH			\
-    add     r2 = CTX(F32), r33;			\
-    add     r3 = CTX(F33), r33;			\
-    ;;						\
-    ldf.fill.nta f32 = [r2], 32;		\
-    ldf.fill.nta f33 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f34 = [r2], 32;		\
-    ldf.fill.nta f35 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f36 = [r2], 32;		\
-    ldf.fill.nta f37 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f38 = [r2], 32;		\
-    ldf.fill.nta f39 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f40 = [r2], 32;		\
-    ldf.fill.nta f41 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f42 = [r2], 32;		\
-    ldf.fill.nta f43 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f44 = [r2], 32;		\
-    ldf.fill.nta f45 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f46 = [r2], 32;		\
-    ldf.fill.nta f47 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f48 = [r2], 32;		\
-    ldf.fill.nta f49 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f50 = [r2], 32;		\
-    ldf.fill.nta f51 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f52 = [r2], 32;		\
-    ldf.fill.nta f53 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f54 = [r2], 32;		\
-    ldf.fill.nta f55 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f56 = [r2], 32;		\
-    ldf.fill.nta f57 = [r3], 32;   		\
-    ;;						\
-    ldf.fill.nta f58 = [r2], 32;		\
-    ldf.fill.nta f59 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f60 = [r2], 32;		\
-    ldf.fill.nta f61 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f62 = [r2], 32;		\
-    ldf.fill.nta f63 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f64 = [r2], 32;		\
-    ldf.fill.nta f65 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f66 = [r2], 32;		\
-    ldf.fill.nta f67 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f68 = [r2], 32;		\
-    ldf.fill.nta f69 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f70 = [r2], 32;		\
-    ldf.fill.nta f71 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f72 = [r2], 32;		\
-    ldf.fill.nta f73 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f74 = [r2], 32;		\
-    ldf.fill.nta f75 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f76 = [r2], 32;		\
-    ldf.fill.nta f77 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f78 = [r2], 32;		\
-    ldf.fill.nta f79 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f80 = [r2], 32;		\
-    ldf.fill.nta f81 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f82 = [r2], 32;		\
-    ldf.fill.nta f83 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f84 = [r2], 32;		\
-    ldf.fill.nta f85 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f86 = [r2], 32;		\
-    ldf.fill.nta f87 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f88 = [r2], 32;		\
-    ldf.fill.nta f89 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f90 = [r2], 32;		\
-    ldf.fill.nta f91 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f92 = [r2], 32;		\
-    ldf.fill.nta f93 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f94 = [r2], 32;		\
-    ldf.fill.nta f95 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f96 = [r2], 32;		\
-    ldf.fill.nta f97 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f98 = [r2], 32;		\
-    ldf.fill.nta f99 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f100 = [r2], 32;		\
-    ldf.fill.nta f101 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f102 = [r2], 32;		\
-    ldf.fill.nta f103 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f104 = [r2], 32;		\
-    ldf.fill.nta f105 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f106 = [r2], 32;		\
-    ldf.fill.nta f107 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f108 = [r2], 32;		\
-    ldf.fill.nta f109 = [r3], 32;   		\
-    ;;						\
-    ldf.fill.nta f110 = [r2], 32;		\
-    ldf.fill.nta f111 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f112 = [r2], 32;		\
-    ldf.fill.nta f113 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f114 = [r2], 32;		\
-    ldf.fill.nta f115 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f116 = [r2], 32;		\
-    ldf.fill.nta f117 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f118 = [r2], 32;		\
-    ldf.fill.nta f119 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f120 = [r2], 32;		\
-    ldf.fill.nta f121 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f122 = [r2], 32;		\
-    ldf.fill.nta f123 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f124 = [r2], 32;		\
-    ldf.fill.nta f125 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f126 = [r2], 32;		\
-    ldf.fill.nta f127 = [r3], 32;		\
-    ;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_PTK_REGS				\
-    add r2=CTX(PKR0), r32;			\
-    mov r16=7;    				\
-    ;;                         			\
-    mov ar.lc=r16;  				\
-    mov r17=r0;					\
-    ;;						\
-1:						\
-    mov r18=pkr[r17];				\
-    ;;                     			\
-    srlz.i;					\
-    ;; 						\
-    st8 [r2]=r18, 8;				\
-    ;;    					\
-    add r17 =1,r17;				\
-    ;;                     			\
-    br.cloop.sptk 1b;				\
-    ;;
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_PTK_REGS	    		\
-    add r2=CTX(PKR0), r33;			\
-    mov r16=7;    				\
-    ;;                         			\
-    mov ar.lc=r16;  				\
-    mov r17=r0;					\
-    ;;						\
-1: 						\
-    ld8 r18=[r2], 8;				\
-    ;;						\
-    mov pkr[r17]=r18;				\
-    ;;    					\
-    srlz.i;					\
-    ;; 						\
-    add r17 =1,r17;				\
-    ;;                     			\
-    br.cloop.sptk 1b;				\
-    ;;
-
-
-/*
- * void vmm_trampoline( context_t * from,
- *			context_t * to)
- *
- * 	from:	r32
- *	to:	r33
- *  note: interrupt disabled before call this function.
- */
-GLOBAL_ENTRY(vmm_trampoline)
-    mov r16 = psr
-    adds r2 = CTX(PSR), r32
-    ;;
-    st8 [r2] = r16, 8       // psr
-    mov r17 = pr
-    ;;
-    st8 [r2] = r17, 8       // pr
-    mov r18 = ar.unat
-    ;;
-    st8 [r2] = r18
-    mov r17 = ar.rsc
-    ;;
-    adds r2 = CTX(RSC),r32
-    ;;
-    st8 [r2]= r17
-    mov ar.rsc =0
-    flushrs
-    ;;
-    SAVE_GENERAL_REGS
-    ;;
-    SAVE_KERNEL_REGS
-    ;;
-    SAVE_APP_REGS
-    ;;
-    SAVE_BRANCH_REGS
-    ;;
-    SAVE_CTL_REGS
-    ;;
-    SAVE_REGION_REGS
-    ;;
-    //SAVE_DEBUG_REGS
-    ;;
-    rsm  psr.dfl
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_LOW
-    ;;
-    rsm  psr.dfh
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_HIGH
-    ;;
-    SAVE_PTK_REGS
-    ;;
-    RESTORE_PTK_REGS
-    ;;
-    RESTORE_FPU_HIGH
-    ;;
-    RESTORE_FPU_LOW
-    ;;
-    //RESTORE_DEBUG_REGS
-    ;;
-    RESTORE_REGION_REGS
-    ;;
-    RESTORE_CTL_REGS
-    ;;
-    RESTORE_BRANCH_REGS
-    ;;
-    RESTORE_APP_REGS
-    ;;
-    RESTORE_KERNEL_REGS
-    ;;
-    RESTORE_GENERAL_REGS
-    ;;
-    adds r2=CTX(PSR), r33
-    ;;
-    ld8 r16=[r2], 8       // psr
-    ;;
-    mov psr.l=r16
-    ;;
-    srlz.d
-    ;;
-    ld8 r16=[r2], 8       // pr
-    ;;
-    mov pr =r16,-1
-    ld8 r16=[r2]       // unat
-    ;;
-    mov ar.unat=r16
-    ;;
-    adds r2=CTX(RSC),r33
-    ;;
-    ld8 r16 =[r2]
-    ;;
-    mov ar.rsc = r16
-    ;;
-    br.ret.sptk.few b0
-END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
deleted file mode 100644
index 958815c..0000000
--- a/arch/ia64/kvm/vcpu.c
+++ /dev/null
@@ -1,2209 +0,0 @@
-/*
- * kvm_vcpu.c: handling all virtual cpu related thing.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  Shaofan Li (Susue Li) <susie.li@intel.com>
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/ia64regs.h>
-#include <asm/gcc_intrin.h>
-#include <asm/kregs.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-
-/*
- * Special notes:
- * - Index by it/dt/rt sequence
- * - Only existing mode transitions are allowed in this table
- * - RSE is placed at lazy mode when emulating guest partial mode
- * - If gva happens to be rr0 and rr4, only allowed case is identity
- *   mapping (gva=gpa), or panic! (How?)
- */
-int mm_switch_table[8][8] = {
-	/*  2004/09/12(Kevin): Allow switch to self */
-	/*
-	 *  (it,dt,rt): (0,0,0) -> (1,1,1)
-	 *  This kind of transition usually occurs in the very early
-	 *  stage of Linux boot up procedure. Another case is in efi
-	 *  and pal calls. (see "arch/ia64/kernel/head.S")
-	 *
-	 *  (it,dt,rt): (0,0,0) -> (0,1,1)
-	 *  This kind of transition is found when OSYa exits efi boot
-	 *  service. Due to gva = gpa in this case (Same region),
-	 *  data access can be satisfied though itlb entry for physical
-	 *  emulation is hit.
-	 */
-	{SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	/*
-	 *  (it,dt,rt): (0,1,1) -> (1,1,1)
-	 *  This kind of transition is found in OSYa.
-	 *
-	 *  (it,dt,rt): (0,1,1) -> (0,0,0)
-	 *  This kind of transition is found in OSYa
-	 */
-	{SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
-	/* (1,0,0)->(1,1,1) */
-	{0,  0,  0,  0,  0,  0,  0,  SW_P2V},
-	/*
-	 *  (it,dt,rt): (1,0,1) -> (1,1,1)
-	 *  This kind of transition usually occurs when Linux returns
-	 *  from the low level TLB miss handlers.
-	 *  (see "arch/ia64/kernel/ivt.S")
-	 */
-	{0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	/*
-	 *  (it,dt,rt): (1,1,1) -> (1,0,1)
-	 *  This kind of transition usually occurs in Linux low level
-	 *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
-	 *
-	 *  (it,dt,rt): (1,1,1) -> (0,0,0)
-	 *  This kind of transition usually occurs in pal and efi calls,
-	 *  which requires running in physical mode.
-	 *  (see "arch/ia64/kernel/head.S")
-	 *  (1,1,1)->(1,0,0)
-	 */
-
-	{SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
-};
-
-void physical_mode_init(struct kvm_vcpu  *vcpu)
-{
-	vcpu->arch.mode_flags = GUEST_IN_PHY;
-}
-
-void switch_to_physical_rid(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	/* Save original virtual mode rr[0] and rr[4] */
-	psr = ia64_clear_ic();
-	ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
-	ia64_srlz_d();
-	ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
-	ia64_srlz_d();
-
-	ia64_set_psr(psr);
-	return;
-}
-
-void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	psr = ia64_clear_ic();
-	ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
-	ia64_srlz_d();
-	ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
-	ia64_srlz_d();
-	ia64_set_psr(psr);
-	return;
-}
-
-static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
-{
-	return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
-}
-
-void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
-					struct ia64_psr new_psr)
-{
-	int act;
-	act = mm_switch_action(old_psr, new_psr);
-	switch (act) {
-	case SW_V2P:
-		/*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
-		old_psr.val, new_psr.val);*/
-		switch_to_physical_rid(vcpu);
-		/*
-		 * Set rse to enforced lazy, to prevent active rse
-		 *save/restor when guest physical mode.
-		 */
-		vcpu->arch.mode_flags |= GUEST_IN_PHY;
-		break;
-	case SW_P2V:
-		switch_to_virtual_rid(vcpu);
-		/*
-		 * recover old mode which is saved when entering
-		 * guest physical mode
-		 */
-		vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
-		break;
-	case SW_SELF:
-		break;
-	case SW_NOP:
-		break;
-	default:
-		/* Sanity check */
-		break;
-	}
-	return;
-}
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
-					struct ia64_psr new_psr)
-{
-
-	if ((old_psr.dt != new_psr.dt)
-			|| (old_psr.it != new_psr.it)
-			|| (old_psr.rt != new_psr.rt))
-		switch_mm_mode(vcpu, old_psr, new_psr);
-
-	return;
-}
-
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-	if (is_physical_mode(vcpu)) {
-		vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
-		switch_to_virtual_rid(vcpu);
-	}
-	return;
-}
-
-/* Recover always follows prepare */
-void recover_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-	if (is_physical_mode(vcpu))
-		switch_to_physical_rid(vcpu);
-	vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
-	return;
-}
-
-#define RPT(x)	((u16) &((struct kvm_pt_regs *)0)->x)
-
-static u16 gr_info[32] = {
-	0, 	/* r0 is read-only : WE SHOULD NEVER GET THIS */
-	RPT(r1), RPT(r2), RPT(r3),
-	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
-	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
-	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
-	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
-	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
-	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
-	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
-};
-
-#define IA64_FIRST_STACKED_GR   32
-#define IA64_FIRST_ROTATING_FR  32
-
-static inline unsigned long
-rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
-{
-	reg += rrb;
-	if (reg >= sor)
-		reg -= sor;
-	return reg;
-}
-
-/*
- * Return the (rotated) index for floating point register
- * be in the REGNUM (REGNUM must range from 32-127,
- * result is in the range from 0-95.
- */
-static inline unsigned long fph_index(struct kvm_pt_regs *regs,
-						long regnum)
-{
-	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
-	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
-}
-
-/*
- * The inverse of the above: given bspstore and the number of
- * registers, calculate ar.bsp.
- */
-static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
-							long num_regs)
-{
-	long delta = ia64_rse_slot_num(addr) + num_regs;
-	int i = 0;
-
-	if (num_regs < 0)
-		delta -= 0x3e;
-	if (delta < 0) {
-		while (delta <= -0x3f) {
-			i--;
-			delta += 0x3f;
-		}
-	} else {
-		while (delta >= 0x3f) {
-			i++;
-			delta -= 0x3f;
-		}
-	}
-
-	return addr + num_regs + i;
-}
-
-static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-					unsigned long *val, int *nat)
-{
-	unsigned long *bsp, *addr, *rnat_addr, *bspstore;
-	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-	unsigned long nat_mask;
-	unsigned long old_rsc, new_rsc;
-	long sof = (regs->cr_ifs) & 0x7f;
-	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-	long ridx = r1 - 32;
-
-	if (ridx < sor)
-		ridx = rotate_reg(sor, rrb_gr, ridx);
-
-	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	new_rsc = old_rsc&(~(0x3));
-	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-
-	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	bsp = kbs + (regs->loadrs >> 19);
-
-	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-	nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
-	if (addr >= bspstore) {
-		ia64_flushrs();
-		ia64_mf();
-		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	}
-	*val = *addr;
-	if (nat) {
-		if (bspstore < rnat_addr)
-			*nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
-							& nat_mask);
-		else
-			*nat = (int)!!((*rnat_addr) & nat_mask);
-		ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-	}
-}
-
-void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-				unsigned long val, unsigned long nat)
-{
-	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
-	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-	unsigned long nat_mask;
-	unsigned long old_rsc, new_rsc, psr;
-	unsigned long rnat;
-	long sof = (regs->cr_ifs) & 0x7f;
-	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-	long ridx = r1 - 32;
-
-	if (ridx < sor)
-		ridx = rotate_reg(sor, rrb_gr, ridx);
-
-	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	/* put RSC to lazy mode, and set loadrs 0 */
-	new_rsc = old_rsc & (~0x3fff0003);
-	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-	bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
-
-	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-	nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
-	local_irq_save(psr);
-	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	if (addr >= bspstore) {
-
-		ia64_flushrs();
-		ia64_mf();
-		*addr = val;
-		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-		if (bspstore < rnat_addr)
-			rnat = rnat & (~nat_mask);
-		else
-			*rnat_addr = (*rnat_addr)&(~nat_mask);
-
-		ia64_mf();
-		ia64_loadrs();
-		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-	} else {
-		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-		*addr = val;
-		if (bspstore < rnat_addr)
-			rnat = rnat&(~nat_mask);
-		else
-			*rnat_addr = (*rnat_addr) & (~nat_mask);
-
-		ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
-		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-	}
-	local_irq_restore(psr);
-	ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-}
-
-void getreg(unsigned long regnum, unsigned long *val,
-				int *nat, struct kvm_pt_regs *regs)
-{
-	unsigned long addr, *unat;
-	if (regnum >= IA64_FIRST_STACKED_GR) {
-		get_rse_reg(regs, regnum, val, nat);
-		return;
-	}
-
-	/*
-	 * Now look at registers in [0-31] range and init correct UNAT
-	 */
-	addr = (unsigned long)regs;
-	unat = &regs->eml_unat;
-
-	addr += gr_info[regnum];
-
-	*val  = *(unsigned long *)addr;
-	/*
-	 * do it only when requested
-	 */
-	if (nat)
-		*nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
-}
-
-void setreg(unsigned long regnum, unsigned long val,
-			int nat, struct kvm_pt_regs *regs)
-{
-	unsigned long addr;
-	unsigned long bitmask;
-	unsigned long *unat;
-
-	/*
-	 * First takes care of stacked registers
-	 */
-	if (regnum >= IA64_FIRST_STACKED_GR) {
-		set_rse_reg(regs, regnum, val, nat);
-		return;
-	}
-
-	/*
-	 * Now look at registers in [0-31] range and init correct UNAT
-	 */
-	addr = (unsigned long)regs;
-	unat = &regs->eml_unat;
-	/*
-	 * add offset from base of struct
-	 * and do it !
-	 */
-	addr += gr_info[regnum];
-
-	*(unsigned long *)addr = val;
-
-	/*
-	 * We need to clear the corresponding UNAT bit to fully emulate the load
-	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
-	 */
-	bitmask   = 1UL << ((addr >> 3) & 0x3f);
-	if (nat)
-		*unat |= bitmask;
-	 else
-		*unat &= ~bitmask;
-
-}
-
-u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long val;
-
-	if (!reg)
-		return 0;
-	getreg(reg, &val, 0, regs);
-	return val;
-}
-
-void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	long sof = (regs->cr_ifs) & 0x7f;
-
-	if (!reg)
-		return;
-	if (reg >= sof + 32)
-		return;
-	setreg(reg, value, nat, regs);	/* FIXME: handle NATs later*/
-}
-
-void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-				struct kvm_pt_regs *regs)
-{
-	/* Take floating register rotation into consideration*/
-	if (regnum >= IA64_FIRST_ROTATING_FR)
-		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-#define CASE_FIXED_FP(reg)			\
-	case  (reg) :				\
-		ia64_stf_spill(fpval, reg);	\
-	break
-
-	switch (regnum) {
-		CASE_FIXED_FP(0);
-		CASE_FIXED_FP(1);
-		CASE_FIXED_FP(2);
-		CASE_FIXED_FP(3);
-		CASE_FIXED_FP(4);
-		CASE_FIXED_FP(5);
-
-		CASE_FIXED_FP(6);
-		CASE_FIXED_FP(7);
-		CASE_FIXED_FP(8);
-		CASE_FIXED_FP(9);
-		CASE_FIXED_FP(10);
-		CASE_FIXED_FP(11);
-
-		CASE_FIXED_FP(12);
-		CASE_FIXED_FP(13);
-		CASE_FIXED_FP(14);
-		CASE_FIXED_FP(15);
-		CASE_FIXED_FP(16);
-		CASE_FIXED_FP(17);
-		CASE_FIXED_FP(18);
-		CASE_FIXED_FP(19);
-		CASE_FIXED_FP(20);
-		CASE_FIXED_FP(21);
-		CASE_FIXED_FP(22);
-		CASE_FIXED_FP(23);
-		CASE_FIXED_FP(24);
-		CASE_FIXED_FP(25);
-		CASE_FIXED_FP(26);
-		CASE_FIXED_FP(27);
-		CASE_FIXED_FP(28);
-		CASE_FIXED_FP(29);
-		CASE_FIXED_FP(30);
-		CASE_FIXED_FP(31);
-		CASE_FIXED_FP(32);
-		CASE_FIXED_FP(33);
-		CASE_FIXED_FP(34);
-		CASE_FIXED_FP(35);
-		CASE_FIXED_FP(36);
-		CASE_FIXED_FP(37);
-		CASE_FIXED_FP(38);
-		CASE_FIXED_FP(39);
-		CASE_FIXED_FP(40);
-		CASE_FIXED_FP(41);
-		CASE_FIXED_FP(42);
-		CASE_FIXED_FP(43);
-		CASE_FIXED_FP(44);
-		CASE_FIXED_FP(45);
-		CASE_FIXED_FP(46);
-		CASE_FIXED_FP(47);
-		CASE_FIXED_FP(48);
-		CASE_FIXED_FP(49);
-		CASE_FIXED_FP(50);
-		CASE_FIXED_FP(51);
-		CASE_FIXED_FP(52);
-		CASE_FIXED_FP(53);
-		CASE_FIXED_FP(54);
-		CASE_FIXED_FP(55);
-		CASE_FIXED_FP(56);
-		CASE_FIXED_FP(57);
-		CASE_FIXED_FP(58);
-		CASE_FIXED_FP(59);
-		CASE_FIXED_FP(60);
-		CASE_FIXED_FP(61);
-		CASE_FIXED_FP(62);
-		CASE_FIXED_FP(63);
-		CASE_FIXED_FP(64);
-		CASE_FIXED_FP(65);
-		CASE_FIXED_FP(66);
-		CASE_FIXED_FP(67);
-		CASE_FIXED_FP(68);
-		CASE_FIXED_FP(69);
-		CASE_FIXED_FP(70);
-		CASE_FIXED_FP(71);
-		CASE_FIXED_FP(72);
-		CASE_FIXED_FP(73);
-		CASE_FIXED_FP(74);
-		CASE_FIXED_FP(75);
-		CASE_FIXED_FP(76);
-		CASE_FIXED_FP(77);
-		CASE_FIXED_FP(78);
-		CASE_FIXED_FP(79);
-		CASE_FIXED_FP(80);
-		CASE_FIXED_FP(81);
-		CASE_FIXED_FP(82);
-		CASE_FIXED_FP(83);
-		CASE_FIXED_FP(84);
-		CASE_FIXED_FP(85);
-		CASE_FIXED_FP(86);
-		CASE_FIXED_FP(87);
-		CASE_FIXED_FP(88);
-		CASE_FIXED_FP(89);
-		CASE_FIXED_FP(90);
-		CASE_FIXED_FP(91);
-		CASE_FIXED_FP(92);
-		CASE_FIXED_FP(93);
-		CASE_FIXED_FP(94);
-		CASE_FIXED_FP(95);
-		CASE_FIXED_FP(96);
-		CASE_FIXED_FP(97);
-		CASE_FIXED_FP(98);
-		CASE_FIXED_FP(99);
-		CASE_FIXED_FP(100);
-		CASE_FIXED_FP(101);
-		CASE_FIXED_FP(102);
-		CASE_FIXED_FP(103);
-		CASE_FIXED_FP(104);
-		CASE_FIXED_FP(105);
-		CASE_FIXED_FP(106);
-		CASE_FIXED_FP(107);
-		CASE_FIXED_FP(108);
-		CASE_FIXED_FP(109);
-		CASE_FIXED_FP(110);
-		CASE_FIXED_FP(111);
-		CASE_FIXED_FP(112);
-		CASE_FIXED_FP(113);
-		CASE_FIXED_FP(114);
-		CASE_FIXED_FP(115);
-		CASE_FIXED_FP(116);
-		CASE_FIXED_FP(117);
-		CASE_FIXED_FP(118);
-		CASE_FIXED_FP(119);
-		CASE_FIXED_FP(120);
-		CASE_FIXED_FP(121);
-		CASE_FIXED_FP(122);
-		CASE_FIXED_FP(123);
-		CASE_FIXED_FP(124);
-		CASE_FIXED_FP(125);
-		CASE_FIXED_FP(126);
-		CASE_FIXED_FP(127);
-	}
-#undef CASE_FIXED_FP
-}
-
-void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-					struct kvm_pt_regs *regs)
-{
-	/* Take floating register rotation into consideration*/
-	if (regnum >= IA64_FIRST_ROTATING_FR)
-		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-
-#define CASE_FIXED_FP(reg)			\
-	case (reg) :				\
-		ia64_ldf_fill(reg, fpval);	\
-	break
-
-	switch (regnum) {
-		CASE_FIXED_FP(2);
-		CASE_FIXED_FP(3);
-		CASE_FIXED_FP(4);
-		CASE_FIXED_FP(5);
-
-		CASE_FIXED_FP(6);
-		CASE_FIXED_FP(7);
-		CASE_FIXED_FP(8);
-		CASE_FIXED_FP(9);
-		CASE_FIXED_FP(10);
-		CASE_FIXED_FP(11);
-
-		CASE_FIXED_FP(12);
-		CASE_FIXED_FP(13);
-		CASE_FIXED_FP(14);
-		CASE_FIXED_FP(15);
-		CASE_FIXED_FP(16);
-		CASE_FIXED_FP(17);
-		CASE_FIXED_FP(18);
-		CASE_FIXED_FP(19);
-		CASE_FIXED_FP(20);
-		CASE_FIXED_FP(21);
-		CASE_FIXED_FP(22);
-		CASE_FIXED_FP(23);
-		CASE_FIXED_FP(24);
-		CASE_FIXED_FP(25);
-		CASE_FIXED_FP(26);
-		CASE_FIXED_FP(27);
-		CASE_FIXED_FP(28);
-		CASE_FIXED_FP(29);
-		CASE_FIXED_FP(30);
-		CASE_FIXED_FP(31);
-		CASE_FIXED_FP(32);
-		CASE_FIXED_FP(33);
-		CASE_FIXED_FP(34);
-		CASE_FIXED_FP(35);
-		CASE_FIXED_FP(36);
-		CASE_FIXED_FP(37);
-		CASE_FIXED_FP(38);
-		CASE_FIXED_FP(39);
-		CASE_FIXED_FP(40);
-		CASE_FIXED_FP(41);
-		CASE_FIXED_FP(42);
-		CASE_FIXED_FP(43);
-		CASE_FIXED_FP(44);
-		CASE_FIXED_FP(45);
-		CASE_FIXED_FP(46);
-		CASE_FIXED_FP(47);
-		CASE_FIXED_FP(48);
-		CASE_FIXED_FP(49);
-		CASE_FIXED_FP(50);
-		CASE_FIXED_FP(51);
-		CASE_FIXED_FP(52);
-		CASE_FIXED_FP(53);
-		CASE_FIXED_FP(54);
-		CASE_FIXED_FP(55);
-		CASE_FIXED_FP(56);
-		CASE_FIXED_FP(57);
-		CASE_FIXED_FP(58);
-		CASE_FIXED_FP(59);
-		CASE_FIXED_FP(60);
-		CASE_FIXED_FP(61);
-		CASE_FIXED_FP(62);
-		CASE_FIXED_FP(63);
-		CASE_FIXED_FP(64);
-		CASE_FIXED_FP(65);
-		CASE_FIXED_FP(66);
-		CASE_FIXED_FP(67);
-		CASE_FIXED_FP(68);
-		CASE_FIXED_FP(69);
-		CASE_FIXED_FP(70);
-		CASE_FIXED_FP(71);
-		CASE_FIXED_FP(72);
-		CASE_FIXED_FP(73);
-		CASE_FIXED_FP(74);
-		CASE_FIXED_FP(75);
-		CASE_FIXED_FP(76);
-		CASE_FIXED_FP(77);
-		CASE_FIXED_FP(78);
-		CASE_FIXED_FP(79);
-		CASE_FIXED_FP(80);
-		CASE_FIXED_FP(81);
-		CASE_FIXED_FP(82);
-		CASE_FIXED_FP(83);
-		CASE_FIXED_FP(84);
-		CASE_FIXED_FP(85);
-		CASE_FIXED_FP(86);
-		CASE_FIXED_FP(87);
-		CASE_FIXED_FP(88);
-		CASE_FIXED_FP(89);
-		CASE_FIXED_FP(90);
-		CASE_FIXED_FP(91);
-		CASE_FIXED_FP(92);
-		CASE_FIXED_FP(93);
-		CASE_FIXED_FP(94);
-		CASE_FIXED_FP(95);
-		CASE_FIXED_FP(96);
-		CASE_FIXED_FP(97);
-		CASE_FIXED_FP(98);
-		CASE_FIXED_FP(99);
-		CASE_FIXED_FP(100);
-		CASE_FIXED_FP(101);
-		CASE_FIXED_FP(102);
-		CASE_FIXED_FP(103);
-		CASE_FIXED_FP(104);
-		CASE_FIXED_FP(105);
-		CASE_FIXED_FP(106);
-		CASE_FIXED_FP(107);
-		CASE_FIXED_FP(108);
-		CASE_FIXED_FP(109);
-		CASE_FIXED_FP(110);
-		CASE_FIXED_FP(111);
-		CASE_FIXED_FP(112);
-		CASE_FIXED_FP(113);
-		CASE_FIXED_FP(114);
-		CASE_FIXED_FP(115);
-		CASE_FIXED_FP(116);
-		CASE_FIXED_FP(117);
-		CASE_FIXED_FP(118);
-		CASE_FIXED_FP(119);
-		CASE_FIXED_FP(120);
-		CASE_FIXED_FP(121);
-		CASE_FIXED_FP(122);
-		CASE_FIXED_FP(123);
-		CASE_FIXED_FP(124);
-		CASE_FIXED_FP(125);
-		CASE_FIXED_FP(126);
-		CASE_FIXED_FP(127);
-	}
-}
-
-void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-						struct ia64_fpreg *val)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-						struct ia64_fpreg *val)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	if (reg > 1)
-		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-/*
- * The Altix RTC is mapped specially here for the vmm module
- */
-#define SN_RTC_BASE	(u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
-static long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
-
-	if (kvm->arch.is_sn2)
-		return (*SN_RTC_BASE);
-	else
-#endif
-		return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-/************************************************************************
- * lsapic timer
- ***********************************************************************/
-u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
-{
-	unsigned long guest_itc;
-	guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
-
-	if (guest_itc >= VMX(vcpu, last_itc)) {
-		VMX(vcpu, last_itc) = guest_itc;
-		return  guest_itc;
-	} else
-		return VMX(vcpu, last_itc);
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
-static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
-{
-	struct kvm_vcpu *v;
-	struct kvm *kvm;
-	int i;
-	long itc_offset = val - kvm_get_itc(vcpu);
-	unsigned long vitv = VCPU(vcpu, itv);
-
-	kvm = (struct kvm *)KVM_VM_BASE;
-
-	if (kvm_vcpu_is_bsp(vcpu)) {
-		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
-			VMX(v, itc_offset) = itc_offset;
-			VMX(v, last_itc) = 0;
-		}
-	}
-	VMX(vcpu, last_itc) = 0;
-	if (VCPU(vcpu, itm) <= val) {
-		VMX(vcpu, itc_check) = 0;
-		vcpu_unpend_interrupt(vcpu, vitv);
-	} else {
-		VMX(vcpu, itc_check) = 1;
-		vcpu_set_itm(vcpu, VCPU(vcpu, itm));
-	}
-
-}
-
-static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, itm));
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
-{
-	unsigned long vitv = VCPU(vcpu, itv);
-	VCPU(vcpu, itm) = val;
-
-	if (val > vcpu_get_itc(vcpu)) {
-		VMX(vcpu, itc_check) = 1;
-		vcpu_unpend_interrupt(vcpu, vitv);
-		VMX(vcpu, timer_pending) = 0;
-	} else
-		VMX(vcpu, itc_check) = 0;
-}
-
-#define  ITV_VECTOR(itv)    (itv&0xff)
-#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
-
-static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, itv) = val;
-	if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
-		vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
-		vcpu->arch.timer_pending = 0;
-	}
-}
-
-static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
-{
-	int vec;
-
-	vec = highest_inservice_irq(vcpu);
-	if (vec == NULL_VECTOR)
-		return;
-	VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
-	VCPU(vcpu, eoi) = 0;
-	vcpu->arch.irq_new_pending = 1;
-
-}
-
-/* See Table 5-8 in SDM vol2 for the definition */
-int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
-{
-	union ia64_tpr vtpr;
-
-	vtpr.val = VCPU(vcpu, tpr);
-
-	if (h_inservice == NMI_VECTOR)
-		return IRQ_MASKED_BY_INSVC;
-
-	if (h_pending == NMI_VECTOR) {
-		/* Non Maskable Interrupt */
-		return IRQ_NO_MASKED;
-	}
-
-	if (h_inservice == ExtINT_VECTOR)
-		return IRQ_MASKED_BY_INSVC;
-
-	if (h_pending == ExtINT_VECTOR) {
-		if (vtpr.mmi) {
-			/* mask all external IRQ */
-			return IRQ_MASKED_BY_VTPR;
-		} else
-			return IRQ_NO_MASKED;
-	}
-
-	if (is_higher_irq(h_pending, h_inservice)) {
-		if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
-			return IRQ_NO_MASKED;
-		else
-			return IRQ_MASKED_BY_VTPR;
-	} else {
-		return IRQ_MASKED_BY_INSVC;
-	}
-}
-
-void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-	long spsr;
-	int ret;
-
-	local_irq_save(spsr);
-	ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
-	local_irq_restore(spsr);
-
-	vcpu->arch.irq_new_pending = 1;
-}
-
-void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-	long spsr;
-	int ret;
-
-	local_irq_save(spsr);
-	ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
-	local_irq_restore(spsr);
-	if (ret) {
-		vcpu->arch.irq_new_pending = 1;
-		wmb();
-	}
-}
-
-void update_vhpi(struct kvm_vcpu *vcpu, int vec)
-{
-	u64 vhpi;
-
-	if (vec == NULL_VECTOR)
-		vhpi = 0;
-	else if (vec == NMI_VECTOR)
-		vhpi = 32;
-	else if (vec == ExtINT_VECTOR)
-		vhpi = 16;
-	else
-		vhpi = vec >> 4;
-
-	VCPU(vcpu, vhpi) = vhpi;
-	if (VCPU(vcpu, vac).a_int)
-		ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
-				(u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
-}
-
-u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
-{
-	int vec, h_inservice, mask;
-
-	vec = highest_pending_irq(vcpu);
-	h_inservice = highest_inservice_irq(vcpu);
-	mask = irq_masked(vcpu, vec, h_inservice);
-	if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
-		if (VCPU(vcpu, vhpi))
-			update_vhpi(vcpu, NULL_VECTOR);
-		return IA64_SPURIOUS_INT_VECTOR;
-	}
-	if (mask == IRQ_MASKED_BY_VTPR) {
-		update_vhpi(vcpu, vec);
-		return IA64_SPURIOUS_INT_VECTOR;
-	}
-	VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
-	vcpu_unpend_interrupt(vcpu, vec);
-	return  (u64)vec;
-}
-
-/**************************************************************************
-  Privileged operation emulation routines
- **************************************************************************/
-u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	union ia64_pta vpta;
-	union ia64_rr vrr;
-	u64 pval;
-	u64 vhpt_offset;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
-	if (vpta.vf) {
-		pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
-				vpta.val, 0, 0, 0, 0);
-	} else {
-		pval = (vadr & VRN_MASK) | vhpt_offset |
-			(vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
-	}
-	return  pval;
-}
-
-u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	union ia64_rr vrr;
-	union ia64_pta vpta;
-	u64 pval;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	if (vpta.vf) {
-		pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
-						0, 0, 0, 0, 0);
-	} else
-		pval = 1;
-
-	return  pval;
-}
-
-u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	struct thash_data *data;
-	union ia64_pta vpta;
-	u64 key;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	if (vpta.vf == 0) {
-		key = 1;
-		return key;
-	}
-	data = vtlb_lookup(vcpu, vadr, D_TLB);
-	if (!data || !data->p)
-		key = 1;
-	else
-		key = data->key;
-
-	return key;
-}
-
-void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long thash, vadr;
-
-	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-	thash = vcpu_thash(vcpu, vadr);
-	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
-}
-
-void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long tag, vadr;
-
-	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-	tag = vcpu_ttag(vcpu, vadr);
-	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
-}
-
-int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
-{
-	struct thash_data *data;
-	union ia64_isr visr, pt_isr;
-	struct kvm_pt_regs *regs;
-	struct ia64_psr vpsr;
-
-	regs = vcpu_regs(vcpu);
-	pt_isr.val = VMX(vcpu, cr_isr);
-	visr.val = 0;
-	visr.ei = pt_isr.ei;
-	visr.ir = pt_isr.ir;
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	visr.na = 1;
-
-	data = vhpt_lookup(vadr);
-	if (data) {
-		if (data->p == 0) {
-			vcpu_set_isr(vcpu, visr.val);
-			data_page_not_present(vcpu, vadr);
-			return IA64_FAULT;
-		} else if (data->ma == VA_MATTR_NATPAGE) {
-			vcpu_set_isr(vcpu, visr.val);
-			dnat_page_consumption(vcpu, vadr);
-			return IA64_FAULT;
-		} else {
-			*padr = (data->gpaddr >> data->ps << data->ps) |
-				(vadr & (PSIZE(data->ps) - 1));
-			return IA64_NO_FAULT;
-		}
-	}
-
-	data = vtlb_lookup(vcpu, vadr, D_TLB);
-	if (data) {
-		if (data->p == 0) {
-			vcpu_set_isr(vcpu, visr.val);
-			data_page_not_present(vcpu, vadr);
-			return IA64_FAULT;
-		} else if (data->ma == VA_MATTR_NATPAGE) {
-			vcpu_set_isr(vcpu, visr.val);
-			dnat_page_consumption(vcpu, vadr);
-			return IA64_FAULT;
-		} else{
-			*padr = ((data->ppn >> (data->ps - 12)) << data->ps)
-				| (vadr & (PSIZE(data->ps) - 1));
-			return IA64_NO_FAULT;
-		}
-	}
-	if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
-		if (vpsr.ic) {
-			vcpu_set_isr(vcpu, visr.val);
-			alt_dtlb(vcpu, vadr);
-			return IA64_FAULT;
-		} else {
-			nested_dtlb(vcpu);
-			return IA64_FAULT;
-		}
-	} else {
-		if (vpsr.ic) {
-			vcpu_set_isr(vcpu, visr.val);
-			dvhpt_fault(vcpu, vadr);
-			return IA64_FAULT;
-		} else{
-			nested_dtlb(vcpu);
-			return IA64_FAULT;
-		}
-	}
-
-	return IA64_NO_FAULT;
-}
-
-int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1, r3;
-
-	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-
-	if (vcpu_tpa(vcpu, r3, &r1))
-		return IA64_FAULT;
-
-	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-	return(IA64_NO_FAULT);
-}
-
-void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1, r3;
-
-	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-	r1 = vcpu_tak(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-}
-
-/************************************
- * Insert/Purge translation register/cache
- ************************************/
-void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-	thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
-}
-
-void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-	thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
-}
-
-void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-	u64 ps, va, rid;
-	struct thash_data *p_itr;
-
-	ps = itir_ps(itir);
-	va = PAGEALIGN(ifa, ps);
-	pte &= ~PAGE_FLAGS_RV_MASK;
-	rid = vcpu_get_rr(vcpu, ifa);
-	rid = rid & RR_RID_MASK;
-	p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
-	vcpu_set_tr(p_itr, pte, itir, va, rid);
-	vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
-}
-
-
-void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-	u64 gpfn;
-	u64 ps, va, rid;
-	struct thash_data *p_dtr;
-
-	ps = itir_ps(itir);
-	va = PAGEALIGN(ifa, ps);
-	pte &= ~PAGE_FLAGS_RV_MASK;
-
-	if (ps != _PAGE_SIZE_16M)
-		thash_purge_entries(vcpu, va, ps);
-	gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-	if (__gpfn_is_io(gpfn))
-		pte |= VTLB_PTE_IO;
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
-	vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
-							pte, itir, va, rid);
-	vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
-}
-
-void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-	int index;
-	u64 va;
-
-	va = PAGEALIGN(ifa, ps);
-	while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
-		vcpu->arch.dtrs[index].page_flags = 0;
-
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-	int index;
-	u64 va;
-
-	va = PAGEALIGN(ifa, ps);
-	while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
-		vcpu->arch.itrs[index].page_flags = 0;
-
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	va = PAGEALIGN(va, ps);
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
-{
-	thash_purge_all(vcpu);
-}
-
-void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_PTC_G;
-
-	p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
-	p->u.ptc_g_data.vaddr = va;
-	p->u.ptc_g_data.ps = ps;
-	vmm_transition(vcpu);
-	/* Do Local Purge Here*/
-	vcpu_ptc_l(vcpu, va, ps);
-	local_irq_restore(psr);
-}
-
-
-void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	vcpu_ptc_ga(vcpu, va, ps);
-}
-
-void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	vcpu_ptc_e(vcpu, ifa);
-}
-
-void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte, slot;
-
-	slot = vcpu_get_gr(vcpu, inst.M45.r3);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	vcpu_itr_d(vcpu, slot, pte, itir, ifa);
-}
-
-
-
-void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte, slot;
-
-	slot = vcpu_get_gr(vcpu, inst.M45.r3);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	vcpu_itr_i(vcpu, slot, pte, itir, ifa);
-}
-
-void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte;
-
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_itc_d(vcpu, pte, itir, ifa);
-}
-
-void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte;
-
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_itc_i(vcpu, pte, itir, ifa);
-}
-
-/*************************************
- * Moves to semi-privileged registers
- *************************************/
-
-void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long imm;
-
-	if (inst.M30.s)
-		imm = -inst.M30.imm;
-	else
-		imm = inst.M30.imm;
-
-	vcpu_set_itc(vcpu, imm);
-}
-
-void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r2;
-
-	r2 = vcpu_get_gr(vcpu, inst.M29.r2);
-	vcpu_set_itc(vcpu, r2);
-}
-
-void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1;
-
-	r1 = vcpu_get_itc(vcpu);
-	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
-}
-
-/**************************************************************************
-  struct kvm_vcpu protection key register access routines
- **************************************************************************/
-
-unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	return ((unsigned long)ia64_get_pkr(reg));
-}
-
-void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
-{
-	ia64_set_pkr(reg, val);
-}
-
-/********************************
- * Moves to privileged registers
- ********************************/
-unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
-					unsigned long val)
-{
-	union ia64_rr oldrr, newrr;
-	unsigned long rrval;
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long psr;
-
-	oldrr.val = vcpu_get_rr(vcpu, reg);
-	newrr.val = val;
-	vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
-
-	switch ((unsigned long)(reg >> VRN_SHIFT)) {
-	case VRN6:
-		vcpu->arch.vmm_rr = vrrtomrr(val);
-		local_irq_save(psr);
-		p->exit_reason = EXIT_REASON_SWITCH_RR6;
-		vmm_transition(vcpu);
-		local_irq_restore(psr);
-		break;
-	case VRN4:
-		rrval = vrrtomrr(val);
-		vcpu->arch.metaphysical_saved_rr4 = rrval;
-		if (!is_physical_mode(vcpu))
-			ia64_set_rr(reg, rrval);
-		break;
-	case VRN0:
-		rrval = vrrtomrr(val);
-		vcpu->arch.metaphysical_saved_rr0 = rrval;
-		if (!is_physical_mode(vcpu))
-			ia64_set_rr(reg, rrval);
-		break;
-	default:
-		ia64_set_rr(reg, vrrtomrr(val));
-		break;
-	}
-
-	return (IA64_NO_FAULT);
-}
-
-void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_rr(vcpu, r3, r2);
-}
-
-void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pmc(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pmd(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	u64 r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pkr(vcpu, r3, r2);
-}
-
-void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_rr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_pkr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_dbr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_ibr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_pmc(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	/* FIXME: This could get called as a result of a rsvd-reg fault */
-	if (reg > (ia64_get_cpuid(3) & 0xff))
-		return 0;
-	else
-		return ia64_get_cpuid(reg);
-}
-
-void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_cpuid(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-	VCPU(vcpu, tpr) = val;
-	vcpu->arch.irq_check = 1;
-}
-
-unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r2;
-
-	r2 = vcpu_get_gr(vcpu, inst.M32.r2);
-	VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
-
-	switch (inst.M32.cr3) {
-	case 0:
-		vcpu_set_dcr(vcpu, r2);
-		break;
-	case 1:
-		vcpu_set_itm(vcpu, r2);
-		break;
-	case 66:
-		vcpu_set_tpr(vcpu, r2);
-		break;
-	case 67:
-		vcpu_set_eoi(vcpu, r2);
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long tgt = inst.M33.r1;
-	unsigned long val;
-
-	switch (inst.M33.cr3) {
-	case 65:
-		val = vcpu_get_ivr(vcpu);
-		vcpu_set_gr(vcpu, tgt, val, 0);
-		break;
-
-	case 67:
-		vcpu_set_gr(vcpu, tgt, 0L, 0);
-		break;
-	default:
-		val = VCPU(vcpu, vcr[inst.M33.cr3]);
-		vcpu_set_gr(vcpu, tgt, val, 0);
-		break;
-	}
-
-	return 0;
-}
-
-void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-
-	unsigned long mask;
-	struct kvm_pt_regs *regs;
-	struct ia64_psr old_psr, new_psr;
-
-	old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	regs = vcpu_regs(vcpu);
-	/* We only support guest as:
-	 *  vpsr.pk = 0
-	 *  vpsr.is = 0
-	 * Otherwise panic
-	 */
-	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-		panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
-				"& vpsr.is=0\n");
-
-	/*
-	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
-	 * Since these bits will become 0, after success execution of each
-	 * instruction, we will change set them to mIA64_PSR
-	 */
-	VCPU(vcpu, vpsr) = val
-		& (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
-			IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
-
-	if (!old_psr.i && (val & IA64_PSR_I)) {
-		/* vpsr.i 0->1 */
-		vcpu->arch.irq_check = 1;
-	}
-	new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	/*
-	 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
-	 * , except for the following bits:
-	 *  ic/i/dt/si/rt/mc/it/bn/vm
-	 */
-	mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
-		IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
-		IA64_PSR_VM;
-
-	regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
-
-	check_mm_mode_switch(vcpu, old_psr, new_psr);
-
-	return ;
-}
-
-unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
-{
-	struct ia64_psr vpsr;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	if (!vpsr.ic)
-		VCPU(vcpu, ifs) = regs->cr_ifs;
-	regs->cr_ifs = IA64_IFS_V;
-	return (IA64_NO_FAULT);
-}
-
-
-
-/**************************************************************************
-  VCPU banked general register access routines
- **************************************************************************/
-#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
-	do {     							\
-		__asm__ __volatile__ (					\
-				";;extr.u %0 = %3,%6,16;;\n"		\
-				"dep %1 = %0, %1, 0, 16;;\n"		\
-				"st8 [%4] = %1\n"			\
-				"extr.u %0 = %2, 16, 16;;\n"		\
-				"dep %3 = %0, %3, %6, 16;;\n"		\
-				"st8 [%5] = %3\n"			\
-				::"r"(i), "r"(*b1unat), "r"(*b0unat),	\
-				"r"(*runat), "r"(b1unat), "r"(runat),	\
-				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
-	} while (0)
-
-void vcpu_bsw0(struct kvm_vcpu *vcpu)
-{
-	unsigned long i;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long *r = &regs->r16;
-	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-	unsigned long *runat = &regs->eml_unat;
-	unsigned long *b0unat = &VCPU(vcpu, vbnat);
-	unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-
-	if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
-		for (i = 0; i < 16; i++) {
-			*b1++ = *r;
-			*r++ = *b0++;
-		}
-		vcpu_bsw0_unat(i, b0unat, b1unat, runat,
-				VMM_PT_REGS_R16_SLOT);
-		VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
-	}
-}
-
-#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
-	do {             						\
-		__asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"	\
-				"dep %1 = %0, %1, 16, 16;;\n"		\
-				"st8 [%4] = %1\n"			\
-				"extr.u %0 = %2, 0, 16;;\n"		\
-				"dep %3 = %0, %3, %6, 16;;\n"		\
-				"st8 [%5] = %3\n"			\
-				::"r"(i), "r"(*b0unat), "r"(*b1unat),	\
-				"r"(*runat), "r"(b0unat), "r"(runat),	\
-				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
-	} while (0)
-
-void vcpu_bsw1(struct kvm_vcpu *vcpu)
-{
-	unsigned long i;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long *r = &regs->r16;
-	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-	unsigned long *runat = &regs->eml_unat;
-	unsigned long *b0unat = &VCPU(vcpu, vbnat);
-	unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-	if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
-		for (i = 0; i < 16; i++) {
-			*b0++ = *r;
-			*r++ = *b1++;
-		}
-		vcpu_bsw1_unat(i, b0unat, b1unat, runat,
-				VMM_PT_REGS_R16_SLOT);
-		VCPU(vcpu, vpsr) |= IA64_PSR_BN;
-	}
-}
-
-void vcpu_rfi(struct kvm_vcpu *vcpu)
-{
-	unsigned long ifs, psr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	psr = VCPU(vcpu, ipsr);
-	if (psr & IA64_PSR_BN)
-		vcpu_bsw1(vcpu);
-	else
-		vcpu_bsw0(vcpu);
-	vcpu_set_psr(vcpu, psr);
-	ifs = VCPU(vcpu, ifs);
-	if (ifs >> 63)
-		regs->cr_ifs = ifs;
-	regs->cr_iip = VCPU(vcpu, iip);
-}
-
-/*
-   VPSR can't keep track of below bits of guest PSR
-   This function gets guest PSR
- */
-
-unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
-{
-	unsigned long mask;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
-		IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
-	return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
-}
-
-void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long vpsr;
-	unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
-					| inst.M44.imm;
-
-	vpsr = vcpu_get_psr(vcpu);
-	vpsr &= (~imm24);
-	vcpu_set_psr(vcpu, vpsr);
-}
-
-void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long vpsr;
-	unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
-				| inst.M44.imm;
-
-	vpsr = vcpu_get_psr(vcpu);
-	vpsr |= imm24;
-	vcpu_set_psr(vcpu, vpsr);
-}
-
-/* Generate Mask
- * Parameter:
- *  bit -- starting bit
- *  len -- how many bits
- */
-#define MASK(bit,len)				   	\
-({							\
-		__u64	ret;				\
-							\
-		__asm __volatile("dep %0=-1, r0, %1, %2"\
-				: "=r" (ret):		\
-		  "M" (bit),				\
-		  "M" (len));				\
-		ret;					\
-})
-
-void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
-{
-	val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
-	vcpu_set_psr(vcpu, val);
-}
-
-void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long val;
-
-	val = vcpu_get_gr(vcpu, inst.M35.r2);
-	vcpu_set_psr_l(vcpu, val);
-}
-
-void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long val;
-
-	val = vcpu_get_psr(vcpu);
-	val = (val & MASK(0, 32)) | (val & MASK(35, 2));
-	vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
-}
-
-void vcpu_increment_iip(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-	if (ipsr->ri == 2) {
-		ipsr->ri = 0;
-		regs->cr_iip += 16;
-	} else
-		ipsr->ri++;
-}
-
-void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-
-	if (ipsr->ri == 0) {
-		ipsr->ri = 2;
-		regs->cr_iip -= 16;
-	} else
-		ipsr->ri--;
-}
-
-/** Emulate a privileged operation.
- *
- *
- * @param vcpu virtual cpu
- * @cause the reason cause virtualization fault
- * @opcode the instruction code which cause virtualization fault
- */
-
-void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
-{
-	unsigned long status, cause, opcode ;
-	INST64 inst;
-
-	status = IA64_NO_FAULT;
-	cause = VMX(vcpu, cause);
-	opcode = VMX(vcpu, opcode);
-	inst.inst = opcode;
-	/*
-	 * Switch to actual virtual rid in rr0 and rr4,
-	 * which is required by some tlb related instructions.
-	 */
-	prepare_if_physical_mode(vcpu);
-
-	switch (cause) {
-	case EVENT_RSM:
-		kvm_rsm(vcpu, inst);
-		break;
-	case EVENT_SSM:
-		kvm_ssm(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PSR:
-		kvm_mov_to_psr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PSR:
-		kvm_mov_from_psr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_CR:
-		kvm_mov_from_cr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_CR:
-		kvm_mov_to_cr(vcpu, inst);
-		break;
-	case EVENT_BSW_0:
-		vcpu_bsw0(vcpu);
-		break;
-	case EVENT_BSW_1:
-		vcpu_bsw1(vcpu);
-		break;
-	case EVENT_COVER:
-		vcpu_cover(vcpu);
-		break;
-	case EVENT_RFI:
-		vcpu_rfi(vcpu);
-		break;
-	case EVENT_ITR_D:
-		kvm_itr_d(vcpu, inst);
-		break;
-	case EVENT_ITR_I:
-		kvm_itr_i(vcpu, inst);
-		break;
-	case EVENT_PTR_D:
-		kvm_ptr_d(vcpu, inst);
-		break;
-	case EVENT_PTR_I:
-		kvm_ptr_i(vcpu, inst);
-		break;
-	case EVENT_ITC_D:
-		kvm_itc_d(vcpu, inst);
-		break;
-	case EVENT_ITC_I:
-		kvm_itc_i(vcpu, inst);
-		break;
-	case EVENT_PTC_L:
-		kvm_ptc_l(vcpu, inst);
-		break;
-	case EVENT_PTC_G:
-		kvm_ptc_g(vcpu, inst);
-		break;
-	case EVENT_PTC_GA:
-		kvm_ptc_ga(vcpu, inst);
-		break;
-	case EVENT_PTC_E:
-		kvm_ptc_e(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_RR:
-		kvm_mov_to_rr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_RR:
-		kvm_mov_from_rr(vcpu, inst);
-		break;
-	case EVENT_THASH:
-		kvm_thash(vcpu, inst);
-		break;
-	case EVENT_TTAG:
-		kvm_ttag(vcpu, inst);
-		break;
-	case EVENT_TPA:
-		status = kvm_tpa(vcpu, inst);
-		break;
-	case EVENT_TAK:
-		kvm_tak(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_AR_IMM:
-		kvm_mov_to_ar_imm(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_AR:
-		kvm_mov_to_ar_reg(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_AR:
-		kvm_mov_from_ar_reg(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_DBR:
-		kvm_mov_to_dbr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_IBR:
-		kvm_mov_to_ibr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PMC:
-		kvm_mov_to_pmc(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PMD:
-		kvm_mov_to_pmd(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PKR:
-		kvm_mov_to_pkr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_DBR:
-		kvm_mov_from_dbr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_IBR:
-		kvm_mov_from_ibr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PMC:
-		kvm_mov_from_pmc(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PKR:
-		kvm_mov_from_pkr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_CPUID:
-		kvm_mov_from_cpuid(vcpu, inst);
-		break;
-	case EVENT_VMSW:
-		status = IA64_FAULT;
-		break;
-	default:
-		break;
-	};
-	/*Assume all status is NO_FAULT ?*/
-	if (status == IA64_NO_FAULT && cause != EVENT_RFI)
-		vcpu_increment_iip(vcpu);
-
-	recover_if_physical_mode(vcpu);
-}
-
-void init_vcpu(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	vcpu->arch.mode_flags = GUEST_IN_PHY;
-	VMX(vcpu, vrr[0]) = 0x38;
-	VMX(vcpu, vrr[1]) = 0x38;
-	VMX(vcpu, vrr[2]) = 0x38;
-	VMX(vcpu, vrr[3]) = 0x38;
-	VMX(vcpu, vrr[4]) = 0x38;
-	VMX(vcpu, vrr[5]) = 0x38;
-	VMX(vcpu, vrr[6]) = 0x38;
-	VMX(vcpu, vrr[7]) = 0x38;
-	VCPU(vcpu, vpsr) = IA64_PSR_BN;
-	VCPU(vcpu, dcr) = 0;
-	/* pta.size must not be 0.  The minimum is 15 (32k) */
-	VCPU(vcpu, pta) = 15 << 2;
-	VCPU(vcpu, itv) = 0x10000;
-	VCPU(vcpu, itm) = 0;
-	VMX(vcpu, last_itc) = 0;
-
-	VCPU(vcpu, lid) = VCPU_LID(vcpu);
-	VCPU(vcpu, ivr) = 0;
-	VCPU(vcpu, tpr) = 0x10000;
-	VCPU(vcpu, eoi) = 0;
-	VCPU(vcpu, irr[0]) = 0;
-	VCPU(vcpu, irr[1]) = 0;
-	VCPU(vcpu, irr[2]) = 0;
-	VCPU(vcpu, irr[3]) = 0;
-	VCPU(vcpu, pmv) = 0x10000;
-	VCPU(vcpu, cmcv) = 0x10000;
-	VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
-	VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
-	update_vhpi(vcpu, NULL_VECTOR);
-	VLSAPIC_XTP(vcpu) = 0x80;	/* disabled */
-
-	for (i = 0; i < 4; i++)
-		VLSAPIC_INSVC(vcpu, i) = 0;
-}
-
-void kvm_init_all_rr(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	local_irq_save(psr);
-
-	/* WARNING: not allow co-exist of both virtual mode and physical
-	 * mode in same region
-	 */
-
-	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
-	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
-
-	if (is_physical_mode(vcpu)) {
-		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-			panic_vm(vcpu, "Machine Status conflicts!\n");
-
-		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
-		ia64_dv_serialize_data();
-		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
-		ia64_dv_serialize_data();
-	} else {
-		ia64_set_rr((VRN0 << VRN_SHIFT),
-				vcpu->arch.metaphysical_saved_rr0);
-		ia64_dv_serialize_data();
-		ia64_set_rr((VRN4 << VRN_SHIFT),
-				vcpu->arch.metaphysical_saved_rr4);
-		ia64_dv_serialize_data();
-	}
-	ia64_set_rr((VRN1 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN1])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN2 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN2])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN3 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN3])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN5 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN5])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN7 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN7])));
-	ia64_dv_serialize_data();
-	ia64_srlz_d();
-	ia64_set_psr(psr);
-}
-
-int vmm_entry(void)
-{
-	struct kvm_vcpu *v;
-	v = current_vcpu;
-
-	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
-						0, 0, 0, 0, 0, 0);
-	kvm_init_vtlb(v);
-	kvm_init_vhpt(v);
-	init_vcpu(v);
-	kvm_init_all_rr(v);
-	vmm_reset_entry();
-
-	return 0;
-}
-
-static void kvm_show_registers(struct kvm_pt_regs *regs)
-{
-	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-
-	struct kvm_vcpu *vcpu = current_vcpu;
-	if (vcpu != NULL)
-		printk("vcpu 0x%p vcpu %d\n",
-		       vcpu, vcpu->vcpu_id);
-
-	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
-	       regs->cr_ipsr, regs->cr_ifs, ip);
-
-	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
-	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
-	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
-	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
-	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
-	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
-	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
-	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
-							regs->b6, regs->b7);
-	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
-	       regs->f6.u.bits[1], regs->f6.u.bits[0],
-	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
-	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
-	       regs->f8.u.bits[1], regs->f8.u.bits[0],
-	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
-	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
-	       regs->f10.u.bits[1], regs->f10.u.bits[0],
-	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
-
-	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
-							regs->r2, regs->r3);
-	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
-							regs->r9, regs->r10);
-	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
-							regs->r12, regs->r13);
-	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
-							regs->r15, regs->r16);
-	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
-							regs->r18, regs->r19);
-	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
-							regs->r21, regs->r22);
-	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
-							regs->r24, regs->r25);
-	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
-							regs->r27, regs->r28);
-	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
-							regs->r30, regs->r31);
-
-}
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
-{
-	va_list args;
-	char buf[256];
-
-	struct kvm_pt_regs *regs = vcpu_regs(v);
-	struct exit_ctl_data *p = &v->arch.exit_data;
-	va_start(args, fmt);
-	vsnprintf(buf, sizeof(buf), fmt, args);
-	va_end(args);
-	printk(buf);
-	kvm_show_registers(regs);
-	p->exit_reason = EXIT_REASON_VM_PANIC;
-	vmm_transition(v);
-	/*Never to return*/
-	while (1);
-}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
deleted file mode 100644
index 988911b..0000000
--- a/arch/ia64/kvm/vcpu.h
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- *  vcpu.h: vcpu routines
- *  	Copyright (c) 2005, Intel Corporation.
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  	Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *
- * 	Copyright (c) 2007, Intel Corporation.
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *	Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#ifndef __KVM_VCPU_H__
-#define __KVM_VCPU_H__
-
-#include <asm/types.h>
-#include <asm/fpu.h>
-#include <asm/processor.h>
-
-#ifndef __ASSEMBLY__
-#include "vti.h"
-
-#include <linux/kvm_host.h>
-#include <linux/spinlock.h>
-
-typedef unsigned long IA64_INST;
-
-typedef union U_IA64_BUNDLE {
-	unsigned long i64[2];
-	struct { unsigned long template:5, slot0:41, slot1a:18,
-		slot1b:23, slot2:41; };
-	/* NOTE: following doesn't work because bitfields can't cross natural
-	   size boundaries
-	   struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
-} IA64_BUNDLE;
-
-typedef union U_INST64_A5 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
-		imm9d:9, s:1, major:4; };
-} INST64_A5;
-
-typedef union U_INST64_B4 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
-		wh:2, d:1, un1:1, major:4; };
-} INST64_B4;
-
-typedef union U_INST64_B8 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
-} INST64_B8;
-
-typedef union U_INST64_B9 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
-} INST64_B9;
-
-typedef union U_INST64_I19 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
-} INST64_I19;
-
-typedef union U_INST64_I26 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I26;
-
-typedef union U_INST64_I27 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
-} INST64_I27;
-
-typedef union U_INST64_I28 { /* not privileged (mov from AR) */
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I28;
-
-typedef union U_INST64_M28 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M28;
-
-typedef union U_INST64_M29 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M29;
-
-typedef union U_INST64_M30 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
-		x3:3, s:1, major:4; };
-} INST64_M30;
-
-typedef union U_INST64_M31 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M31;
-
-typedef union U_INST64_M32 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M32;
-
-typedef union U_INST64_M33 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M33;
-
-typedef union U_INST64_M35 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-
-} INST64_M35;
-
-typedef union U_INST64_M36 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
-} INST64_M36;
-
-typedef union U_INST64_M37 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
-		i:1, major:4; };
-} INST64_M37;
-
-typedef union U_INST64_M41 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-} INST64_M41;
-
-typedef union U_INST64_M42 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M42;
-
-typedef union U_INST64_M43 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M43;
-
-typedef union U_INST64_M44 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
-} INST64_M44;
-
-typedef union U_INST64_M45 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M45;
-
-typedef union U_INST64_M46 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
-		x3:3, un1:1, major:4; };
-} INST64_M46;
-
-typedef union U_INST64_M47 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
-} INST64_M47;
-
-typedef union U_INST64_M1{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M1;
-
-typedef union U_INST64_M2{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M2;
-
-typedef union U_INST64_M3{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M3;
-
-typedef union U_INST64_M4 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M4;
-
-typedef union U_INST64_M5 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M5;
-
-typedef union U_INST64_M6 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M6;
-
-typedef union U_INST64_M9 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M9;
-
-typedef union U_INST64_M10 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M10;
-
-typedef union U_INST64_M12 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M12;
-
-typedef union U_INST64_M15 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M15;
-
-typedef union U_INST64 {
-	IA64_INST inst;
-	struct { unsigned long :37, major:4; } generic;
-	INST64_A5 A5;	/* used in build_hypercall_bundle only */
-	INST64_B4 B4;	/* used in build_hypercall_bundle only */
-	INST64_B8 B8;	/* rfi, bsw.[01] */
-	INST64_B9 B9;	/* break.b */
-	INST64_I19 I19;	/* used in build_hypercall_bundle only */
-	INST64_I26 I26;	/* mov register to ar (I unit) */
-	INST64_I27 I27;	/* mov immediate to ar (I unit) */
-	INST64_I28 I28;	/* mov from ar (I unit) */
-	INST64_M1  M1;	/* ld integer */
-	INST64_M2  M2;
-	INST64_M3  M3;
-	INST64_M4  M4;	/* st integer */
-	INST64_M5  M5;
-	INST64_M6  M6;	/* ldfd floating pointer 		*/
-	INST64_M9  M9;	/* stfd floating pointer		*/
-	INST64_M10 M10;	/* stfd floating pointer		*/
-	INST64_M12 M12;     /* ldfd pair floating pointer		*/
-	INST64_M15 M15;	/* lfetch + imm update			*/
-	INST64_M28 M28;	/* purge translation cache entry	*/
-	INST64_M29 M29;	/* mov register to ar (M unit)		*/
-	INST64_M30 M30;	/* mov immediate to ar (M unit)		*/
-	INST64_M31 M31;	/* mov from ar (M unit)			*/
-	INST64_M32 M32;	/* mov reg to cr			*/
-	INST64_M33 M33;	/* mov from cr				*/
-	INST64_M35 M35;	/* mov to psr				*/
-	INST64_M36 M36;	/* mov from psr				*/
-	INST64_M37 M37;	/* break.m				*/
-	INST64_M41 M41;	/* translation cache insert		*/
-	INST64_M42 M42;	/* mov to indirect reg/translation reg insert*/
-	INST64_M43 M43;	/* mov from indirect reg		*/
-	INST64_M44 M44;	/* set/reset system mask		*/
-	INST64_M45 M45;	/* translation purge			*/
-	INST64_M46 M46;	/* translation access (tpa,tak)		*/
-	INST64_M47 M47;	/* purge translation entry		*/
-} INST64;
-
-#define MASK_41 ((unsigned long)0x1ffffffffff)
-
-/* Virtual address memory attributes encoding */
-#define VA_MATTR_WB         0x0
-#define VA_MATTR_UC         0x4
-#define VA_MATTR_UCE        0x5
-#define VA_MATTR_WC         0x6
-#define VA_MATTR_NATPAGE    0x7
-
-#define PMASK(size)         (~((size) - 1))
-#define PSIZE(size)         (1UL<<(size))
-#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
-#define PAGEALIGN(va, ps)	CLEARLSB(va, ps)
-#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
-#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
-
-#define ARCH_PAGE_SHIFT   12
-
-#define INVALID_TI_TAG (1UL << 63)
-
-#define VTLB_PTE_P_BIT      0
-#define VTLB_PTE_IO_BIT     60
-#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
-#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
-
-#define vcpu_quick_region_check(_tr_regions,_ifa)		\
-	(_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
-
-#define vcpu_quick_region_set(_tr_regions,_ifa)             \
-	do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
-
-static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
-		u64 va, u64 rid)
-{
-	trp->page_flags = pte;
-	trp->itir = itir;
-	trp->vadr = va;
-	trp->rid = rid;
-}
-
-extern u64 kvm_get_mpt_entry(u64 gpfn);
-
-/* Return I/ */
-static inline u64 __gpfn_is_io(u64 gpfn)
-{
-	u64  pte;
-	pte = kvm_get_mpt_entry(gpfn);
-	if (!(pte & GPFN_INV_MASK)) {
-		pte = pte & GPFN_IO_MASK;
-		if (pte != GPFN_PHYS_MMIO)
-			return pte;
-	}
-	return 0;
-}
-#endif
-#define IA64_NO_FAULT	0
-#define IA64_FAULT	1
-
-#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
-
-#define SW_BAD  0   /* Bad mode transitition */
-#define SW_V2P  1   /* Physical emulatino is activated */
-#define SW_P2V  2   /* Exit physical mode emulation */
-#define SW_SELF 3   /* No mode transition */
-#define SW_NOP  4   /* Mode transition, but without action required */
-
-#define GUEST_IN_PHY    0x1
-#define GUEST_PHY_EMUL  0x2
-
-#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
-
-#define VRN_SHIFT	61
-#define VRN_MASK	0xe000000000000000
-#define VRN0		0x0UL
-#define VRN1		0x1UL
-#define VRN2		0x2UL
-#define VRN3		0x3UL
-#define VRN4		0x4UL
-#define VRN5		0x5UL
-#define VRN6		0x6UL
-#define VRN7		0x7UL
-
-#define IRQ_NO_MASKED         0
-#define IRQ_MASKED_BY_VTPR    1
-#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
-
-#define PTA_BASE_SHIFT      15
-
-#define IA64_PSR_VM_BIT     46
-#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
-
-/* Interruption Function State */
-#define IA64_IFS_V_BIT      63
-#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
-
-#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
-#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
-
-#ifndef __ASSEMBLY__
-
-#include <asm/gcc_intrin.h>
-
-#define is_physical_mode(v)		\
-	((v->arch.mode_flags) & GUEST_IN_PHY)
-
-#define is_virtual_mode(v)	\
-	(!is_physical_mode(v))
-
-#define MODE_IND(psr)	\
-	(((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
-
-#ifndef CONFIG_SMP
-#define _vmm_raw_spin_lock(x)	 do {}while(0)
-#define _vmm_raw_spin_unlock(x) do {}while(0)
-#else
-typedef struct {
-	volatile unsigned int lock;
-} vmm_spinlock_t;
-#define _vmm_raw_spin_lock(x)						\
-	do {								\
-		__u32 *ia64_spinlock_ptr = (__u32 *) (x);		\
-		__u64 ia64_spinlock_val;				\
-		ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-		if (unlikely(ia64_spinlock_val)) {			\
-			do {						\
-				while (*ia64_spinlock_ptr)		\
-				ia64_barrier();				\
-				ia64_spinlock_val =			\
-				ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-			} while (ia64_spinlock_val);			\
-		}							\
-	} while (0)
-
-#define _vmm_raw_spin_unlock(x)				\
-	do { barrier();				\
-		((vmm_spinlock_t *)x)->lock = 0; } \
-while (0)
-#endif
-
-void vmm_spin_lock(vmm_spinlock_t *lock);
-void vmm_spin_unlock(vmm_spinlock_t *lock);
-enum {
-	I_TLB = 1,
-	D_TLB = 2
-};
-
-union kvm_va {
-	struct {
-		unsigned long off : 60;		/* intra-region offset */
-		unsigned long reg :  4;		/* region number */
-	} f;
-	unsigned long l;
-	void *p;
-};
-
-#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);		\
-						_v.f.reg = 0; _v.l; })
-#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);		\
-				_v.f.reg = -1; _v.p; })
-
-#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
-						_v.rid; })
-#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
-						_v.ps; })
-#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);	\
-						_v.ve; })
-
-enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
-enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
-
-#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
-#define VMX(_v, _x)  ((_v)->arch._x)
-
-#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
-#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
-
-static inline unsigned long itir_ps(unsigned long itir)
-{
-	return ((itir >> 2) & 0x3f);
-}
-
-
-/**************************************************************************
-  VCPU control register access routines
- **************************************************************************/
-
-static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, itir));
-}
-
-static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, itir) = val;
-}
-
-static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, ifa));
-}
-
-static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ifa) = val;
-}
-
-static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, iva));
-}
-
-static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, pta));
-}
-
-static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, lid));
-}
-
-static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, tpr));
-}
-
-static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
-{
-	return (0UL);		/*reads of eoi always return 0 */
-}
-
-static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[0]));
-}
-
-static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[1]));
-}
-
-static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[2]));
-}
-
-static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[3]));
-}
-
-static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
-{
-	ia64_setreg(_IA64_REG_CR_DCR, val);
-}
-
-static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, isr) = val;
-}
-
-static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, lid) = val;
-}
-
-static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ipsr) = val;
-}
-
-static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iip) = val;
-}
-
-static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ifs) = val;
-}
-
-static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iipa) = val;
-}
-
-static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iha) = val;
-}
-
-
-static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return vcpu->arch.vrr[reg>>61];
-}
-
-/**************************************************************************
-  VCPU debug breakpoint register access routines
- **************************************************************************/
-
-static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	__ia64_set_dbr(reg, val);
-}
-
-static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	ia64_set_ibr(reg, val);
-}
-
-static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return ((u64)__ia64_get_dbr(reg));
-}
-
-static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return ((u64)ia64_get_ibr(reg));
-}
-
-/**************************************************************************
-  VCPU performance monitor register access routines
- **************************************************************************/
-static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	/* NOTE: Writes to unimplemented PMC registers are discarded */
-	ia64_set_pmc(reg, val);
-}
-
-static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	/* NOTE: Writes to unimplemented PMD registers are discarded */
-	ia64_set_pmd(reg, val);
-}
-
-static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
-{
-	/* NOTE: Reads from unimplemented PMC registers return zero */
-	return ((u64)ia64_get_pmc(reg));
-}
-
-static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
-{
-	/* NOTE: Reads from unimplemented PMD registers return zero */
-	return ((u64)ia64_get_pmd(reg));
-}
-
-static inline unsigned long vrrtomrr(unsigned long val)
-{
-	union ia64_rr rr;
-	rr.val = val;
-	rr.rid = (rr.rid << 4) | 0xe;
-	if (rr.ps > PAGE_SHIFT)
-		rr.ps = PAGE_SHIFT;
-	rr.ve = 1;
-	return rr.val;
-}
-
-
-static inline int highest_bits(int *dat)
-{
-	u32  bits, bitnum;
-	int i;
-
-	/* loop for all 256 bits */
-	for (i = 7; i >= 0 ; i--) {
-		bits = dat[i];
-		if (bits) {
-			bitnum = fls(bits);
-			return i * 32 + bitnum - 1;
-		}
-	}
-	return NULL_VECTOR;
-}
-
-/*
- * The pending irq is higher than the inservice one.
- *
- */
-static inline int is_higher_irq(int pending, int inservice)
-{
-	return ((pending > inservice)
-			|| ((pending != NULL_VECTOR)
-				&& (inservice == NULL_VECTOR)));
-}
-
-static inline int is_higher_class(int pending, int mic)
-{
-	return ((pending >> 4) > mic);
-}
-
-/*
- * Return 0-255 for pending irq.
- *        NULL_VECTOR: when no pending.
- */
-static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-	if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
-		return NMI_VECTOR;
-	if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-	return highest_bits((int *)&VCPU(vcpu, irr[0]));
-}
-
-static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
-{
-	if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
-		return NMI_VECTOR;
-	if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
-}
-
-extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-					struct ia64_fpreg *val);
-extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-					struct ia64_fpreg *val);
-extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
-extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
-			u64 val, int nat);
-extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
-extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
-extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
-extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
-extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
-					u64 itir, u64 va, int type);
-extern struct thash_data *vhpt_lookup(u64 va);
-extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
-extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
-extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
-extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
-		u64 itir, u64 ifa, int type);
-extern void thash_purge_all(struct kvm_vcpu *v);
-extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
-						u64 va, int is_data);
-extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
-						u64 ps, int is_data);
-
-extern void vcpu_increment_iip(struct kvm_vcpu *v);
-extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
-extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
-extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
-extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
-extern void nested_dtlb(struct kvm_vcpu *vcpu);
-extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
-extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
-
-extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
-extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
-
-extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
-extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
-extern void vmm_transition(struct kvm_vcpu *vcpu);
-extern void vmm_trampoline(union context *from, union context *to);
-extern int vmm_entry(void);
-extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
-
-extern void vmm_reset_entry(void);
-void kvm_init_vtlb(struct kvm_vcpu *v);
-void kvm_init_vhpt(struct kvm_vcpu *v);
-void thash_init(struct thash_cb *hcb, u64 sz);
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-u64 kvm_gpa_to_mpa(u64 gpa);
-extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
-		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-
-extern long vmm_sanity;
-
-#endif
-#endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
deleted file mode 100644
index 176a12c..0000000
--- a/arch/ia64/kvm/vmm.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * vmm.c: vmm module interface with kvm module
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpswa.h>
-
-#include "vcpu.h"
-
-MODULE_AUTHOR("Intel");
-MODULE_LICENSE("GPL");
-
-extern char kvm_ia64_ivt;
-extern char kvm_asm_mov_from_ar;
-extern char kvm_asm_mov_from_ar_sn2;
-extern fpswa_interface_t *vmm_fpswa_interface;
-
-long vmm_sanity = 1;
-
-struct kvm_vmm_info vmm_info = {
-	.module			= THIS_MODULE,
-	.vmm_entry		= vmm_entry,
-	.tramp_entry		= vmm_trampoline,
-	.vmm_ivt		= (unsigned long)&kvm_ia64_ivt,
-	.patch_mov_ar		= (unsigned long)&kvm_asm_mov_from_ar,
-	.patch_mov_ar_sn2	= (unsigned long)&kvm_asm_mov_from_ar_sn2,
-};
-
-static int __init  kvm_vmm_init(void)
-{
-
-	vmm_fpswa_interface = fpswa_interface;
-
-	/*Register vmm data to kvm side*/
-	return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
-}
-
-static void __exit kvm_vmm_exit(void)
-{
-	kvm_exit();
-	return ;
-}
-
-void vmm_spin_lock(vmm_spinlock_t *lock)
-{
-	_vmm_raw_spin_lock(lock);
-}
-
-void vmm_spin_unlock(vmm_spinlock_t *lock)
-{
-	_vmm_raw_spin_unlock(lock);
-}
-
-static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_DEBUG;
-	vmm_transition(vcpu);
-	local_irq_restore(psr);
-}
-
-asmlinkage int printk(const char *fmt, ...)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	va_list args;
-	int r;
-
-	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
-	va_start(args, fmt);
-	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
-	va_end(args);
-	vcpu_debug_exit(vcpu);
-	return r;
-}
-
-module_init(kvm_vmm_init)
-module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
deleted file mode 100644
index 397e34a..0000000
--- a/arch/ia64/kvm/vmm_ivt.S
+++ /dev/null
@@ -1,1392 +0,0 @@
-/*
- * arch/ia64/kvm/vmm_ivt.S
- *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
- *      Stephane Eranian <eranian@hpl.hp.com>
- *      David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 2000, 2002-2003 Intel Co
- *      Asit Mallick <asit.k.mallick@intel.com>
- *      Suresh Siddha <suresh.b.siddha@intel.com>
- *      Kenneth Chen <kenneth.w.chen@intel.com>
- *      Fenghua Yu <fenghua.yu@intel.com>
- *
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
- * for SMP
- * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
- * handler now uses virtual PT.
- *
- * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
- *              Supporting Intel virtualization architecture
- *
- */
-
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for
- * critical
- * interruptions like TLB misses.
- *
- *  For each entry, the comment is as follows:
- *
- *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
- *              (12,51)
- *  entry offset ----/     /         /                  /
- *  /
- *  entry number ---------/         /                  /
- *  /
- *  size of the entry -------------/                  /
- *  /
- *  vector name -------------------------------------/
- *  /
- *  interruptions triggering this vector
- *  ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB
- * boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/pgtable.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-#include "kvm_minstate.h"
-#include "vti.h"
-
-#if 0
-# define PSR_DEFAULT_BITS   psr.ac
-#else
-# define PSR_DEFAULT_BITS   0
-#endif
-
-#define KVM_FAULT(n)    \
-	kvm_fault_##n:;          \
-	mov r19=n;;          \
-	br.sptk.many kvm_vmm_panic;         \
-	;;                  \
-
-#define KVM_REFLECT(n)    \
-	mov r31=pr;           \
-	mov r19=n;       /* prepare to save predicates */ \
-	mov r29=cr.ipsr;      \
-	;;      \
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)	br.sptk.many kvm_dispatch_reflection;        \
-	br.sptk.many kvm_vmm_panic;      \
-
-GLOBAL_ENTRY(kvm_vmm_panic)
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	br.call.sptk.many b6=vmm_panic_handler;
-END(kvm_vmm_panic)
-
-    .section .text..ivt,"ax"
-
-    .align 32768    // align on 32KB boundary
-    .global kvm_ia64_ivt
-kvm_ia64_ivt:
-///////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(kvm_vhpt_miss)
-	KVM_FAULT(0)
-END(kvm_vhpt_miss)
-
-    .org kvm_ia64_ivt+0x400
-////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(kvm_itlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_itlb_miss
-	mov r19 = 1
-	br.sptk kvm_itlb_miss_dispatch
-	KVM_FAULT(1);
-END(kvm_itlb_miss)
-
-    .org kvm_ia64_ivt+0x0800
-//////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(kvm_dtlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_dtlb_miss
-	br.sptk kvm_dtlb_miss_dispatch
-END(kvm_dtlb_miss)
-
-     .org kvm_ia64_ivt+0x0c00
-////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(kvm_alt_itlb_miss)
-	mov r16=cr.ifa    // get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	mov r24=cr.ipsr
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r17,r19      // insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.i r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-END(kvm_alt_itlb_miss)
-
-    .org kvm_ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(kvm_alt_dtlb_miss)
-	mov r16=cr.ifa		// get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r24=cr.ipsr
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r19,r17	// insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.d r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-END(kvm_alt_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1400
-//////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(kvm_nested_dtlb_miss)
-	KVM_FAULT(5)
-END(kvm_nested_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(kvm_ikey_miss)
-	KVM_REFLECT(6)
-END(kvm_ikey_miss)
-
-    .org kvm_ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(kvm_dkey_miss)
-	KVM_REFLECT(7)
-END(kvm_dkey_miss)
-
-    .org kvm_ia64_ivt+0x2000
-////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(kvm_dirty_bit)
-	KVM_REFLECT(8)
-END(kvm_dirty_bit)
-
-    .org kvm_ia64_ivt+0x2400
-////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(kvm_iaccess_bit)
-	KVM_REFLECT(9)
-END(kvm_iaccess_bit)
-
-    .org kvm_ia64_ivt+0x2800
-///////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(kvm_daccess_bit)
-	KVM_REFLECT(10)
-END(kvm_daccess_bit)
-
-    .org kvm_ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(kvm_break_fault)
-	mov r31=pr
-	mov r19=11
-	mov r29=cr.ipsr
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
-	mov out0=cr.ifa
-	mov out2=cr.isr     // FIXME: pity to make this slow access twice
-	mov out3=cr.iim     // FIXME: pity to make this slow access twice
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i         // guarantee that interruption collection is on
-	;;
-	(p15)ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out1=16,sp
-	br.call.sptk.many b6=kvm_ia64_handle_break
-	;;
-END(kvm_break_fault)
-
-    .org kvm_ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(kvm_interrupt)
-	mov r31=pr		// prepare to save predicates
-	mov r19=12
-	mov r29=cr.ipsr
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-	tbit.z p0,p15=r29,IA64_PSR_I_BIT
-	;;
-(p7)	br.sptk kvm_dispatch_interrupt
-	;;
-	mov r27=ar.rsc		/* M */
-	mov r20=r1			/* A */
-	mov r25=ar.unat		/* M */
-	mov r26=ar.pfs		/* I */
-	mov r28=cr.iip		/* M */
-	cover			/* B (or nothing) */
-	;;
-	mov r1=sp
-	;;
-	invala			/* M */
-	mov r30=cr.ifs
-	;;
-	addl r1=-VMM_PT_REGS_SIZE,r1
-	;;
-	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
-	adds r16=PT(CR_IPSR),r1
-	;;
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-	st8 [r16]=r29			/* save cr.ipsr */
-	;;
-	lfetch.fault.excl.nt1 [r17]
-	mov r29=b0
-	;;
-	adds r16=PT(R8),r1  	/* initialize first base pointer */
-	adds r17=PT(R9),r1  	/* initialize second base pointer */
-	mov r18=r0      		/* make sure r18 isn't NaT */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r8,16
-.mem.offset 8,0; st8.spill [r17]=r9,16
-        ;;
-.mem.offset 0,0; st8.spill [r16]=r10,24
-.mem.offset 8,0; st8.spill [r17]=r11,24
-        ;;
-	st8 [r16]=r28,16		/* save cr.iip */
-	st8 [r17]=r30,16		/* save cr.ifs */
-	mov r8=ar.fpsr		/* M */
-	mov r9=ar.csd
-	mov r10=ar.ssd
-	movl r11=FPSR_DEFAULT	/* L-unit */
-	;;
-	st8 [r16]=r25,16		/* save ar.unat */
-	st8 [r17]=r26,16		/* save ar.pfs */
-	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
-	;;
-	st8 [r16]=r27,16		/* save ar.rsc */
-	adds r17=16,r17		/* skip over ar_rnat field */
-	;;
-	st8 [r17]=r31,16		/* save predicates */
-	adds r16=16,r16		/* skip over ar_bspstore field */
-	;;
-	st8 [r16]=r29,16		/* save b0 */
-	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
-.mem.offset 8,0; st8.spill [r17]=r12,16
-	adds r12=-16,r1
-	/* switch to kernel memory stack (with 16 bytes of scratch) */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r13,16
-.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r15,16
-.mem.offset 8,0; st8.spill [r17]=r14,16
-	dep r14=-1,r0,60,4
-	;;
-.mem.offset 0,0; st8.spill [r16]=r2,16
-.mem.offset 8,0; st8.spill [r17]=r3,16
-	adds r2=VMM_PT_REGS_R16_OFFSET,r1
-	adds r14 = VMM_VCPU_GP_OFFSET,r13
-	;;
-	mov r8=ar.ccv
-	ld8 r14 = [r14]
-	;;
-	mov r1=r14       /* establish kernel global pointer */
-	;;                                          \
-	bsw.1
-	;;
-	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
-	mov out0=r13
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	//(p15) ssm psr.i
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
-	mov r18=b6
-	;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
-	mov r19=b7
-	;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,32
-	;;
-	mov ar.fpsr=r11       /* M-unit */
-	st8 [r2]=r8,8         /* ar.ccv */
-	adds r24=PT(B6)-PT(F7),r3
-	;;
-	stf.spill [r2]=f6,32
-	stf.spill [r3]=f7,32
-	;;
-	stf.spill [r2]=f8,32
-	stf.spill [r3]=f9,32
-	;;
-	stf.spill [r2]=f10
-	stf.spill [r3]=f11
-	adds r25=PT(B7)-PT(F11),r3
-	;;
-	st8 [r24]=r18,16       /* b6 */
-	st8 [r25]=r19,16       /* b7 */
-	;;
-	st8 [r24]=r9           /* ar.csd */
-	st8 [r25]=r10          /* ar.ssd */
-	;;
-	srlz.d		// make sure we see the effect of cr.ivr
-	addl r14=@gprel(ia64_leave_nested),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-	;;
-END(kvm_interrupt)
-
-    .global kvm_dispatch_vexirq
-    .org kvm_ia64_ivt+0x3400
-//////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-ENTRY(kvm_virtual_exirq)
-	mov r31=pr
-	mov r19=13
-	mov r30 =r0
-	;;
-kvm_dispatch_vexirq:
-	cmp.eq p6,p0 = 1,r30
-	;;
-(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-(p6)	ld8 r1 = [r29]
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r13
-
-	ssm psr.ic
-	;;
-	srlz.i // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	adds r3=8,r2                // set up second base pointer
-	;;
-	KVM_SAVE_REST
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_vexirq
-END(kvm_virtual_exirq)
-
-    .org kvm_ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
-	KVM_FAULT(14)
-	// this code segment is from 2.6.16.13
-
-    .org kvm_ia64_ivt+0x3c00
-///////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
-	KVM_FAULT(15)
-
-    .org kvm_ia64_ivt+0x4000
-///////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-	KVM_FAULT(16)
-
-    .org kvm_ia64_ivt+0x4400
-//////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
-	KVM_FAULT(17)
-
-    .org kvm_ia64_ivt+0x4800
-//////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
-	KVM_FAULT(18)
-
-    .org kvm_ia64_ivt+0x4c00
-//////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
-	KVM_FAULT(19)
-
-    .org kvm_ia64_ivt+0x5000
-//////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present
-ENTRY(kvm_page_not_present)
-	KVM_REFLECT(20)
-END(kvm_page_not_present)
-
-    .org kvm_ia64_ivt+0x5100
-///////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
-ENTRY(kvm_key_permission)
-	KVM_REFLECT(21)
-END(kvm_key_permission)
-
-    .org kvm_ia64_ivt+0x5200
-//////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(kvm_iaccess_rights)
-	KVM_REFLECT(22)
-END(kvm_iaccess_rights)
-
-    .org kvm_ia64_ivt+0x5300
-//////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(kvm_daccess_rights)
-	KVM_REFLECT(23)
-END(kvm_daccess_rights)
-
-    .org kvm_ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(kvm_general_exception)
-	KVM_REFLECT(24)
-	KVM_FAULT(24)
-END(kvm_general_exception)
-
-    .org kvm_ia64_ivt+0x5500
-//////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(kvm_disabled_fp_reg)
-	KVM_REFLECT(25)
-END(kvm_disabled_fp_reg)
-
-    .org kvm_ia64_ivt+0x5600
-////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(kvm_nat_consumption)
-	KVM_REFLECT(26)
-END(kvm_nat_consumption)
-
-    .org kvm_ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(kvm_speculation_vector)
-	KVM_REFLECT(27)
-END(kvm_speculation_vector)
-
-    .org kvm_ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
-	KVM_FAULT(28)
-
-    .org kvm_ia64_ivt+0x5900
-///////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(kvm_debug_vector)
-	KVM_FAULT(29)
-END(kvm_debug_vector)
-
-    .org kvm_ia64_ivt+0x5a00
-///////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(kvm_unaligned_access)
-	KVM_REFLECT(30)
-END(kvm_unaligned_access)
-
-    .org kvm_ia64_ivt+0x5b00
-//////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(kvm_unsupported_data_reference)
-	KVM_REFLECT(31)
-END(kvm_unsupported_data_reference)
-
-    .org kvm_ia64_ivt+0x5c00
-////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
-ENTRY(kvm_floating_point_fault)
-	KVM_REFLECT(32)
-END(kvm_floating_point_fault)
-
-    .org kvm_ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(kvm_floating_point_trap)
-	KVM_REFLECT(33)
-END(kvm_floating_point_trap)
-
-    .org kvm_ia64_ivt+0x5e00
-//////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(kvm_lower_privilege_trap)
-	KVM_REFLECT(34)
-END(kvm_lower_privilege_trap)
-
-    .org kvm_ia64_ivt+0x5f00
-//////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(kvm_taken_branch_trap)
-	KVM_REFLECT(35)
-END(kvm_taken_branch_trap)
-
-    .org kvm_ia64_ivt+0x6000
-////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(kvm_single_step_trap)
-	KVM_REFLECT(36)
-END(kvm_single_step_trap)
-    .global kvm_virtualization_fault_back
-    .org kvm_ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
-ENTRY(kvm_virtualization_fault)
-	mov r31=pr
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	st8 [r16] = r1
-	adds r17 = VMM_VCPU_GP_OFFSET, r21
-	;;
-	ld8 r1 = [r17]
-	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-	cmp.eq p9,p0=EVENT_RSM,r24
-	cmp.eq p10,p0=EVENT_SSM,r24
-	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-	cmp.eq p12,p0=EVENT_THASH,r24
-(p6)	br.dptk.many kvm_asm_mov_from_ar
-(p7)	br.dptk.many kvm_asm_mov_from_rr
-(p8)	br.dptk.many kvm_asm_mov_to_rr
-(p9)	br.dptk.many kvm_asm_rsm
-(p10)	br.dptk.many kvm_asm_ssm
-(p11)	br.dptk.many kvm_asm_mov_to_psr
-(p12)	br.dptk.many kvm_asm_thash
-	;;
-kvm_virtualization_fault_back:
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 = [r16]
-	;;
-	mov r19=37
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	cmp.ne p6,p0=EVENT_RFI, r24
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]
-	;;
-	adds r18=VMM_VPD_VIFS_OFFSET,r18
-	;;
-	ld8 r18=[r18]
-	;;
-	tbit.z p6,p0=r18,63
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-//if vifs.v=1 desert current register frame
-	alloc r18=ar.pfs,0,0,0,0
-	br.sptk kvm_dispatch_virtualization_fault
-END(kvm_virtualization_fault)
-
-    .org kvm_ia64_ivt+0x6200
-//////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
-	KVM_FAULT(38)
-
-    .org kvm_ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
-	KVM_FAULT(39)
-
-    .org kvm_ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
-	KVM_FAULT(40)
-
-    .org kvm_ia64_ivt+0x6500
-//////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
-	KVM_FAULT(41)
-
-    .org kvm_ia64_ivt+0x6600
-//////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
-	KVM_FAULT(42)
-
-    .org kvm_ia64_ivt+0x6700
-//////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
-	KVM_FAULT(43)
-
-    .org kvm_ia64_ivt+0x6800
-//////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
-	KVM_FAULT(44)
-
-    .org kvm_ia64_ivt+0x6900
-///////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
-//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(kvm_ia32_exception)
-	KVM_FAULT(45)
-END(kvm_ia32_exception)
-
-    .org kvm_ia64_ivt+0x6a00
-////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
-ENTRY(kvm_ia32_intercept)
-	KVM_FAULT(47)
-END(kvm_ia32_intercept)
-
-    .org kvm_ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
-	KVM_FAULT(48)
-
-    .org kvm_ia64_ivt+0x6d00
-//////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
-	KVM_FAULT(49)
-
-    .org kvm_ia64_ivt+0x6e00
-//////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
-	KVM_FAULT(50)
-
-    .org kvm_ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
-	KVM_FAULT(52)
-
-    .org kvm_ia64_ivt+0x7100
-////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
-	KVM_FAULT(53)
-
-    .org kvm_ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
-	KVM_FAULT(54)
-
-    .org kvm_ia64_ivt+0x7300
-////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
-	KVM_FAULT(55)
-
-    .org kvm_ia64_ivt+0x7400
-////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
-	KVM_FAULT(56)
-
-    .org kvm_ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
-	KVM_FAULT(57)
-
-    .org kvm_ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
-	KVM_FAULT(58)
-
-    .org kvm_ia64_ivt+0x7700
-////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
-	KVM_FAULT(59)
-
-    .org kvm_ia64_ivt+0x7800
-////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
-	KVM_FAULT(60)
-
-    .org kvm_ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
-	KVM_FAULT(61)
-
-    .org kvm_ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
-	KVM_FAULT(62)
-
-    .org kvm_ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
-	KVM_FAULT(63)
-
-    .org kvm_ia64_ivt+0x7c00
-////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
-	KVM_FAULT(64)
-
-    .org kvm_ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
-	KVM_FAULT(65)
-
-    .org kvm_ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
-	KVM_FAULT(66)
-
-    .org kvm_ia64_ivt+0x7f00
-////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
-	KVM_FAULT(67)
-
-    .org kvm_ia64_ivt+0x8000
-// There is no particular reason for this code to be here, other than that
-// there happens to be space here that would go unused otherwise.  If this
-// fault ever gets "unreserved", simply moved the following code to a more
-// suitable spot...
-
-
-ENTRY(kvm_dtlb_miss_dispatch)
-	mov r19 = 2
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i     // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
-END(kvm_dtlb_miss_dispatch)
-
-ENTRY(kvm_itlb_miss_dispatch)
-
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
-END(kvm_itlb_miss_dispatch)
-
-ENTRY(kvm_dispatch_reflection)
-/*
- * Input:
- *  psr.ic: off
- *  r19:    intr type (offset into ivt, see ia64_int.h)
- *  r31:    contains saved predicates (pr)
- */
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	mov out0=cr.ifa
-	mov out1=cr.isr
-	mov out2=cr.iim
-	mov out3=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out4=16,r12
-	br.call.sptk.many b6=reflect_interruption
-END(kvm_dispatch_reflection)
-
-ENTRY(kvm_dispatch_virtualization_fault)
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
-	mov out0=r13        //vcpu
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out1=16,sp         //regs
-	br.call.sptk.many b6=kvm_emulate
-END(kvm_dispatch_virtualization_fault)
-
-
-ENTRY(kvm_dispatch_interrupt)
-	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
-	;;
-	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	(p15) ssm psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	mov out0=r13		// pass pointer to pt_regs as second arg
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-END(kvm_dispatch_interrupt)
-
-GLOBAL_ENTRY(ia64_leave_nested)
-	rsm psr.i
-	;;
-	adds r21=PT(PR)+16,r12
-	;;
-	lfetch [r21],PT(CR_IPSR)-PT(PR)
-	adds r2=PT(B6)+16,r12
-	adds r3=PT(R16)+16,r12
-	;;
-	lfetch [r21]
-	ld8 r28=[r2],8		// load b6
-	adds r29=PT(R24)+16,r12
-
-	ld8.fill r16=[r3]
-	adds r3=PT(AR_CSD)-PT(R16),r3
-	adds r30=PT(AR_CCV)+16,r12
-	;;
-	ld8.fill r24=[r29]
-	ld8 r15=[r30]		// load ar.ccv
-	;;
-	ld8 r29=[r2],16		// load b7
-	ld8 r30=[r3],16		// load ar.csd
-	;;
-	ld8 r31=[r2],16		// load ar.ssd
-	ld8.fill r8=[r3],16
-	;;
-	ld8.fill r9=[r2],16
-	ld8.fill r10=[r3],PT(R17)-PT(R10)
-	;;
-	ld8.fill r11=[r2],PT(R18)-PT(R11)
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	mov ar.csd=r30
-	mov ar.ssd=r31
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala			// invalidate ALAT
-	;;
-	srlz.i
-	;;
-	ld8.fill r22=[r2],24
-	ld8.fill r23=[r3],24
-	mov b6=r28
-	;;
-	ld8.fill r25=[r2],16
-	ld8.fill r26=[r3],16
-	mov b7=r29
-	;;
-	ld8.fill r27=[r2],16
-	ld8.fill r28=[r3],16
-	;;
-	ld8.fill r29=[r2],16
-	ld8.fill r30=[r3],24
-	;;
-	ld8.fill r31=[r2],PT(F9)-PT(R31)
-	adds r3=PT(F10)-PT(F6),r3
-	;;
-	ldf.fill f9=[r2],PT(F6)-PT(F9)
-	ldf.fill f10=[r3],PT(F8)-PT(F10)
-	;;
-	ldf.fill f6=[r2],PT(F7)-PT(F6)
-	;;
-	ldf.fill f7=[r2],PT(F11)-PT(F7)
-	ldf.fill f8=[r3],32
-	;;
-	srlz.i			// ensure interruption collection is off
-	mov ar.ccv=r15
-	;;
-	bsw.0	// switch back to bank 0 (no stop bit required beforehand...)
-	;;
-	ldf.fill f11=[r2]
-//	mov r18=r13
-//	mov r21=r13
-	adds r16=PT(CR_IPSR)+16,r12
-	adds r17=PT(CR_IIP)+16,r12
-	;;
-	ld8 r29=[r16],16	// load cr.ipsr
-	ld8 r28=[r17],16	// load cr.iip
-	;;
-	ld8 r30=[r16],16	// load cr.ifs
-	ld8 r25=[r17],16	// load ar.unat
-	;;
-	ld8 r26=[r16],16	// load ar.pfs
-	ld8 r27=[r17],16	// load ar.rsc
-	cmp.eq p9,p0=r0,r0
-	// set p9 to indicate that we should restore cr.ifs
-	;;
-	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
-	ld8 r23=[r17],16// load ar.bspstore (may be garbage)
-	;;
-	ld8 r31=[r16],16	// load predicates
-	ld8 r22=[r17],16	// load b0
-	;;
-	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16	// load r1
-	;;
-	ld8.fill r12=[r16],16
-	ld8.fill r13=[r17],16
-	;;
-	ld8 r20=[r16],16	// ar.fpsr
-	ld8.fill r15=[r17],16
-	;;
-	ld8.fill r14=[r16],16
-	ld8.fill r2=[r17]
-	;;
-	ld8.fill r3=[r16]
-	;;
-	mov r16=ar.bsp		// get existing backing store pointer
-	;;
-	mov b0=r22
-	mov ar.pfs=r26
-	mov cr.ifs=r30
-	mov cr.ipsr=r29
-	mov ar.fpsr=r20
-	mov cr.iip=r28
-	;;
-	mov ar.rsc=r27
-	mov ar.unat=r25
-	mov pr=r31,-1
-	rfi
-END(ia64_leave_nested)
-
-GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-/*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on:
- */
-	adds r2 = PT(R4)+16,r12
-	adds r3 = PT(R5)+16,r12
-	adds r8 = PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8 = [r8]
-	;;
-	mov ar.unat=r8
-	;;
-	ld8.fill r4=[r2],16    //load r4
-	ld8.fill r5=[r3],16    //load r5
-	;;
-	ld8.fill r6=[r2]    //load r6
-	ld8.fill r7=[r3]    //load r7
-	;;
-END(ia64_leave_hypervisor_prepare)
-//fall through
-GLOBAL_ENTRY(ia64_leave_hypervisor)
-	rsm psr.i
-	;;
-	br.call.sptk.many b0=leave_hypervisor_tail
-	;;
-	adds r20=PT(PR)+16,r12
-	adds r8=PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8=[r8]
-	;;
-	mov ar.unat=r8
-	;;
-	lfetch [r20],PT(CR_IPSR)-PT(PR)
-	adds r2 = PT(B6)+16,r12
-	adds r3 = PT(B7)+16,r12
-	;;
-	lfetch [r20]
-	;;
-	ld8 r24=[r2],16        /* B6 */
-	ld8 r25=[r3],16        /* B7 */
-	;;
-	ld8 r26=[r2],16        /* ar_csd */
-	ld8 r27=[r3],16        /* ar_ssd */
-	mov b6 = r24
-	;;
-	ld8.fill r8=[r2],16
-	ld8.fill r9=[r3],16
-	mov b7 = r25
-	;;
-	mov ar.csd = r26
-	mov ar.ssd = r27
-	;;
-	ld8.fill r10=[r2],PT(R15)-PT(R10)
-	ld8.fill r11=[r3],PT(R14)-PT(R11)
-	;;
-	ld8.fill r15=[r2],PT(R16)-PT(R15)
-	ld8.fill r14=[r3],PT(R17)-PT(R14)
-	;;
-	ld8.fill r16=[r2],16
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	;;
-	ld8.fill r22=[r2],16
-	ld8.fill r23=[r3],16
-	;;
-	ld8.fill r24=[r2],16
-	ld8.fill r25=[r3],16
-	;;
-	ld8.fill r26=[r2],16
-	ld8.fill r27=[r3],16
-	;;
-	ld8.fill r28=[r2],16
-	ld8.fill r29=[r3],16
-	;;
-	ld8.fill r30=[r2],PT(F6)-PT(R30)
-	ld8.fill r31=[r3],PT(F7)-PT(R31)
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala          // invalidate ALAT
-	;;
-	srlz.i          // ensure interruption collection is off
-	;;
-	bsw.0
-	;;
-	adds r16 = PT(CR_IPSR)+16,r12
-	adds r17 = PT(CR_IIP)+16,r12
-	mov r21=r13		// get current
-	;;
-	ld8 r31=[r16],16    // load cr.ipsr
-	ld8 r30=[r17],16    // load cr.iip
-	;;
-	ld8 r29=[r16],16    // load cr.ifs
-	ld8 r28=[r17],16    // load ar.unat
-	;;
-	ld8 r27=[r16],16    // load ar.pfs
-	ld8 r26=[r17],16    // load ar.rsc
-	;;
-	ld8 r25=[r16],16    // load ar.rnat
-	ld8 r24=[r17],16    // load ar.bspstore
-	;;
-	ld8 r23=[r16],16    // load predicates
-	ld8 r22=[r17],16    // load b0
-	;;
-	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16    //load r1
-	;;
-	ld8.fill r12=[r16],16    //load r12
-	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-	;;
-	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-	;;
-	ld8.fill r3=[r16]	//load r3
-	ld8 r18=[r17]	//load ar_ccv
-	;;
-	mov ar.fpsr=r19
-	mov ar.ccv=r18
-	shr.u r18=r20,16
-	;;
-kvm_rbs_switch:
-	mov r19=96
-
-kvm_dont_preserve_current_frame:
-/*
-    * To prevent leaking bits between the hypervisor and guest domain,
-    * we must clear the stacked registers in the "invalid" partition here.
-    * 5 registers/cycle on McKinley).
-    */
-#   define pRecurse	p6
-#   define pReturn	p7
-#   define Nregs	14
-
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
-	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
-	;;
-	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
-	shladd in0=loc1,3,r19
-	mov in1=0
-	;;
-	TEXT_ALIGN(32)
-kvm_rse_clear_invalid:
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0
-	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-	add out1=1,in1		// increment recursion count
-	mov loc1=0
-	mov loc2=0
-	;;
-	mov loc3=0
-	mov loc4=0
-	mov loc5=0
-	mov loc6=0
-	mov loc7=0
-(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-	;;
-	mov loc8=0
-	mov loc9=0
-	cmp.ne pReturn,p0=r0,in1
-	// if recursion count != 0, we need to do a br.ret
-	mov loc10=0
-	mov loc11=0
-(pReturn) br.ret.dptk.many b0
-
-#	undef pRecurse
-#	undef pReturn
-
-// loadrs has already been shifted
-	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-	;;
-	loadrs
-	;;
-	mov ar.bspstore=r24
-	;;
-	mov ar.unat=r28
-	mov ar.rnat=r25
-	mov ar.rsc=r26
-	;;
-	mov cr.ipsr=r31
-	mov cr.iip=r30
-	mov cr.ifs=r29
-	mov ar.pfs=r27
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]   //vpd
-	adds r17=VMM_VCPU_ISR_OFFSET,r21
-	;;
-	ld8 r17=[r17]
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]        //vpsr
-	mov r25=r18
-	adds r16= VMM_VCPU_GP_OFFSET,r21
-	;;
-	ld8 r16= [r16] // Put gp in r24
-	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-	;;
-	add  r24=r24,r16
-	;;
-	br.sptk.many  kvm_vps_sync_write       // call the service
-	;;
-END(ia64_leave_hypervisor)
-// fall through
-GLOBAL_ENTRY(ia64_vmm_entry)
-/*
- *  must be at bank 0
- *  parameter:
- *  r17:cr.isr
- *  r18:vpd
- *  r19:vpsr
- *  r22:b0
- *  r23:predicate
- */
-	mov r24=r22
-	mov r25=r18
-	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-(p1) 	br.cond.sptk.few kvm_vps_resume_normal
-(p2)	br.cond.sptk.many kvm_vps_resume_handler
-	;;
-END(ia64_vmm_entry)
-
-/*
- * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
- *                  u64 arg3, u64 arg4, u64 arg5,
- *                  u64 arg6, u64 arg7);
- *
- * XXX: The currently defined services use only 4 args at the max. The
- *  rest are not consumed.
- */
-GLOBAL_ENTRY(ia64_call_vsa)
-    .regstk 4,4,0,0
-
-rpsave  =   loc0
-pfssave =   loc1
-psrsave =   loc2
-entry   =   loc3
-hostret =   r24
-
-	alloc   pfssave=ar.pfs,4,4,0,0
-	mov rpsave=rp
-	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-	;;
-	ld8 entry=[entry]
-1:	mov hostret=ip
-	mov r25=in1         // copy arguments
-	mov r26=in2
-	mov r27=in3
-	mov psrsave=psr
-	;;
-	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-	;;
-	add hostret=2f-1b,hostret   // calculate return address
-	add entry=entry,in0
-	;;
-	rsm psr.i | psr.ic
-	;;
-	srlz.i
-	mov b6=entry
-	br.cond.sptk b6         // call the service
-2:
-// Architectural sequence for enabling interrupts if necessary
-(p7)    ssm psr.ic
-	;;
-(p7)    srlz.i
-	;;
-(p6)    ssm psr.i
-	;;
-	mov rp=rpsave
-	mov ar.pfs=pfssave
-	mov r8=r31
-	;;
-	srlz.d
-	br.ret.sptk rp
-
-END(ia64_call_vsa)
-
-#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
-
-GLOBAL_ENTRY(vmm_reset_entry)
-	//set up ipsr, iip, vpd.vpsr, dcr
-	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-	// For DCR: all bits 0
-	bsw.0
-	;;
-	mov r21 =r13
-	adds r14=-VMM_PT_REGS_SIZE, r12
-	;;
-	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-	movl r10=0x8000000000000000
-	adds r16=PT(CR_IIP), r14
-	adds r20=PT(R1), r14
-	;;
-	rsm psr.ic | psr.i
-	;;
-	srlz.i
-	;;
-	mov ar.rsc = 0
-	;;
-	flushrs
-	;;
-	mov ar.bspstore = 0
-	// clear BSPSTORE
-	;;
-	mov cr.ipsr=r6
-	mov cr.ifs=r10
-	ld8 r4 = [r16] // Set init iip for first run.
-	ld8 r1 = [r20]
-	;;
-	mov cr.iip=r4
-	adds r16=VMM_VPD_BASE_OFFSET,r13
-	;;
-	ld8 r18=[r16]
-	;;
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]
-	mov r17=r0
-	mov r22=r0
-	mov r23=r0
-	br.cond.sptk ia64_vmm_entry
-	br.ret.sptk  b0
-END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
deleted file mode 100644
index b214b5b..0000000
--- a/arch/ia64/kvm/vti.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * vti.h: prototype for generial vt related interface
- *   	Copyright (c) 2004, Intel Corporation.
- *
- *	Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- *	Fred Yang (fred.yang@intel.com)
- * 	Kun Tian (Kevin Tian) (kevin.tian@intel.com)
- *
- *  	Copyright (c) 2007, Intel Corporation.
- *  	Zhang xiantao <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#ifndef _KVM_VT_I_H
-#define _KVM_VT_I_H
-
-#ifndef __ASSEMBLY__
-#include <asm/page.h>
-
-#include <linux/kvm_host.h>
-
-/* define itr.i and itr.d  in ia64_itr function */
-#define	ITR	0x01
-#define	DTR	0x02
-#define	IaDTR	0x03
-
-#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
-#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
-
-#define RR6 (6UL<<61)
-#define RR7 (7UL<<61)
-
-
-/* config_options in pal_vp_init_env */
-#define	VP_INITIALIZE	1UL
-#define	VP_FR_PMC	1UL<<1
-#define	VP_OPCODE	1UL<<8
-#define	VP_CAUSE	1UL<<9
-#define VP_FW_ACC   	1UL<<63
-
-/* init vp env with initializing vm_buffer */
-#define	VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
-	VP_OPCODE | VP_CAUSE | VP_FW_ACC)
-/* init vp env without initializing vm_buffer */
-#define	VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
-
-#define		PAL_VP_CREATE   265
-/* Stacked Virt. Initializes a new VPD for the operation of
- * a new virtual processor in the virtual environment.
- */
-#define		PAL_VP_ENV_INFO 266
-/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
-#define		PAL_VP_EXIT_ENV 267
-/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
-#define		PAL_VP_INIT_ENV 268
-/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
-#define		PAL_VP_REGISTER 269
-/*Stacked Virt. Register a different host IVT for the virtual processor.*/
-#define		PAL_VP_RESUME   270
-/* Renamed from PAL_VP_RESUME */
-#define		PAL_VP_RESTORE  270
-/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
-#define		PAL_VP_SUSPEND  271
-/* Renamed from PAL_VP_SUSPEND */
-#define		PAL_VP_SAVE	271
-/* Stacked Virt. Suspends operation for the specified virtual processor on
- * the logical processor.
- */
-#define		PAL_VP_TERMINATE 272
-/* Stacked Virt. Terminates operation for the specified virtual processor.*/
-
-union vac {
-	unsigned long value;
-	struct {
-		unsigned int a_int:1;
-		unsigned int a_from_int_cr:1;
-		unsigned int a_to_int_cr:1;
-		unsigned int a_from_psr:1;
-		unsigned int a_from_cpuid:1;
-		unsigned int a_cover:1;
-		unsigned int a_bsw:1;
-		long reserved:57;
-	};
-};
-
-union vdc {
-	unsigned long value;
-	struct {
-		unsigned int d_vmsw:1;
-		unsigned int d_extint:1;
-		unsigned int d_ibr_dbr:1;
-		unsigned int d_pmc:1;
-		unsigned int d_to_pmd:1;
-		unsigned int d_itm:1;
-		long reserved:58;
-	};
-};
-
-struct vpd {
-	union vac   vac;
-	union vdc   vdc;
-	unsigned long  virt_env_vaddr;
-	unsigned long  reserved1[29];
-	unsigned long  vhpi;
-	unsigned long  reserved2[95];
-	unsigned long  vgr[16];
-	unsigned long  vbgr[16];
-	unsigned long  vnat;
-	unsigned long  vbnat;
-	unsigned long  vcpuid[5];
-	unsigned long  reserved3[11];
-	unsigned long  vpsr;
-	unsigned long  vpr;
-	unsigned long  reserved4[76];
-	union {
-		unsigned long  vcr[128];
-		struct {
-			unsigned long dcr;
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;
-			unsigned long rsv2[7];
-			unsigned long ipsr;
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-	unsigned long  reserved5[128];
-	unsigned long  reserved6[3456];
-	unsigned long  vmm_avail[128];
-	unsigned long  reserved7[4096];
-};
-
-#define PAL_PROC_VM_BIT		(1UL << 40)
-#define PAL_PROC_VMSW_BIT	(1UL << 54)
-
-static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
-		u64 *vp_env_info)
-{
-	struct ia64_pal_retval iprv;
-	PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
-	*buffer_size = iprv.v0;
-	*vp_env_info = iprv.v1;
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_exit_env(u64 iva)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
-			u64 vbase_addr, u64 *vsa_base)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
-			vbase_addr);
-	*vsa_base = iprv.v0;
-
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
-
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
-
-	return iprv.status;
-}
-
-#endif
-
-/*VPD field offset*/
-#define VPD_VAC_START_OFFSET		0
-#define VPD_VDC_START_OFFSET		8
-#define VPD_VHPI_START_OFFSET		256
-#define VPD_VGR_START_OFFSET		1024
-#define VPD_VBGR_START_OFFSET		1152
-#define VPD_VNAT_START_OFFSET		1280
-#define VPD_VBNAT_START_OFFSET		1288
-#define VPD_VCPUID_START_OFFSET		1296
-#define VPD_VPSR_START_OFFSET		1424
-#define VPD_VPR_START_OFFSET		1432
-#define VPD_VRSE_CFLE_START_OFFSET	1440
-#define VPD_VCR_START_OFFSET		2048
-#define VPD_VTPR_START_OFFSET		2576
-#define VPD_VRR_START_OFFSET		3072
-#define VPD_VMM_VAIL_START_OFFSET	31744
-
-/*Virtualization faults*/
-
-#define EVENT_MOV_TO_AR			 1
-#define EVENT_MOV_TO_AR_IMM		 2
-#define EVENT_MOV_FROM_AR		 3
-#define EVENT_MOV_TO_CR			 4
-#define EVENT_MOV_FROM_CR		 5
-#define EVENT_MOV_TO_PSR		 6
-#define EVENT_MOV_FROM_PSR		 7
-#define EVENT_ITC_D			 8
-#define EVENT_ITC_I			 9
-#define EVENT_MOV_TO_RR			 10
-#define EVENT_MOV_TO_DBR		 11
-#define EVENT_MOV_TO_IBR		 12
-#define EVENT_MOV_TO_PKR		 13
-#define EVENT_MOV_TO_PMC		 14
-#define EVENT_MOV_TO_PMD		 15
-#define EVENT_ITR_D			 16
-#define EVENT_ITR_I			 17
-#define EVENT_MOV_FROM_RR		 18
-#define EVENT_MOV_FROM_DBR		 19
-#define EVENT_MOV_FROM_IBR		 20
-#define EVENT_MOV_FROM_PKR		 21
-#define EVENT_MOV_FROM_PMC		 22
-#define EVENT_MOV_FROM_CPUID		 23
-#define EVENT_SSM			 24
-#define EVENT_RSM			 25
-#define EVENT_PTC_L			 26
-#define EVENT_PTC_G			 27
-#define EVENT_PTC_GA			 28
-#define EVENT_PTR_D			 29
-#define EVENT_PTR_I			 30
-#define EVENT_THASH			 31
-#define EVENT_TTAG			 32
-#define EVENT_TPA			 33
-#define EVENT_TAK			 34
-#define EVENT_PTC_E			 35
-#define EVENT_COVER			 36
-#define EVENT_RFI			 37
-#define EVENT_BSW_0			 38
-#define EVENT_BSW_1			 39
-#define EVENT_VMSW			 40
-
-/**PAL virtual services offsets */
-#define PAL_VPS_RESUME_NORMAL           0x0000
-#define PAL_VPS_RESUME_HANDLER          0x0400
-#define PAL_VPS_SYNC_READ               0x0800
-#define PAL_VPS_SYNC_WRITE              0x0c00
-#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
-#define PAL_VPS_THASH                   0x1400
-#define PAL_VPS_TTAG                    0x1800
-#define PAL_VPS_RESTORE                 0x1c00
-#define PAL_VPS_SAVE                    0x2000
-
-#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
deleted file mode 100644
index a7869f8..0000000
--- a/arch/ia64/kvm/vtlb.c
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
- * vtlb.c: guest virtual tlb handling module.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *
- * Copyright (c) 2007, Intel Corporation.
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include "vcpu.h"
-
-#include <linux/rwsem.h>
-
-#include <asm/tlb.h>
-
-/*
- * Check to see if the address rid:va is translated by the TLB
- */
-
-static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
-{
-	return ((trp->p) && (trp->rid == rid)
-				&& ((va-trp->vadr) < PSIZE(trp->ps)));
-}
-
-/*
- * Only for GUEST TR format.
- */
-static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
-{
-	u64 sa1, ea1;
-
-	if (!trp->p || trp->rid != rid)
-		return 0;
-
-	sa1 = trp->vadr;
-	ea1 = sa1 + PSIZE(trp->ps) - 1;
-	eva -= 1;
-	if ((sva > ea1) || (sa1 > eva))
-		return 0;
-	else
-		return 1;
-
-}
-
-void machine_tlb_purge(u64 va, u64 ps)
-{
-	ia64_ptcl(va, ps << 2);
-}
-
-void local_flush_tlb_all(void)
-{
-	int i, j;
-	unsigned long flags, count0, count1;
-	unsigned long stride0, stride1, addr;
-
-	addr    = current_vcpu->arch.ptce_base;
-	count0  = current_vcpu->arch.ptce_count[0];
-	count1  = current_vcpu->arch.ptce_count[1];
-	stride0 = current_vcpu->arch.ptce_stride[0];
-	stride1 = current_vcpu->arch.ptce_stride[1];
-
-	local_irq_save(flags);
-	for (i = 0; i < count0; ++i) {
-		for (j = 0; j < count1; ++j) {
-			ia64_ptce(addr);
-			addr += stride1;
-		}
-		addr += stride0;
-	}
-	local_irq_restore(flags);
-	ia64_srlz_i();          /* srlz.i implies srlz.d */
-}
-
-int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
-{
-	union ia64_rr    vrr;
-	union ia64_pta   vpta;
-	struct  ia64_psr   vpsr;
-
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	vpta.val = vcpu_get_pta(vcpu);
-
-	if (vrr.ve & vpta.ve) {
-		switch (ref) {
-		case DATA_REF:
-		case NA_REF:
-			return vpsr.dt;
-		case INST_REF:
-			return vpsr.dt && vpsr.it && vpsr.ic;
-		case RSE_REF:
-			return vpsr.dt && vpsr.rt;
-
-		}
-	}
-	return 0;
-}
-
-struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
-{
-	u64 index, pfn, rid, pfn_bits;
-
-	pfn_bits = vpta.size - 5 - 8;
-	pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
-	rid = _REGION_ID(vrr);
-	index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
-	*tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
-
-	return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
-				(index << 5));
-}
-
-struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
-{
-
-	struct thash_data *trp;
-	int  i;
-	u64 rid;
-
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	if (type == D_TLB) {
-		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-						i < NDTRS; i++, trp++) {
-				if (__is_tr_translated(trp, rid, va))
-					return trp;
-			}
-		}
-	} else {
-		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-					i < NITRS; i++, trp++) {
-				if (__is_tr_translated(trp, rid, va))
-					return trp;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
-{
-	union ia64_rr rr;
-	struct thash_data *head;
-	unsigned long ps, gpaddr;
-
-	ps = itir_ps(itir);
-	rr.val = ia64_get_rr(ifa);
-
-	 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
-					(ifa & ((1UL << ps) - 1));
-
-	head = (struct thash_data *)ia64_thash(ifa);
-	head->etag = INVALID_TI_TAG;
-	ia64_mf();
-	head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
-	head->itir = rr.ps << 2;
-	head->etag = ia64_ttag(ifa);
-	head->gpaddr = gpaddr;
-}
-
-void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
-{
-	u64 i, dirty_pages = 1;
-	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
-	vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
-
-	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
-
-	vmm_spin_lock(lock);
-	for (i = 0; i < dirty_pages; i++) {
-		/* avoid RMW */
-		if (!test_bit(base_gfn + i, dirty_bitmap))
-			set_bit(base_gfn + i , dirty_bitmap);
-	}
-	vmm_spin_unlock(lock);
-}
-
-void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
-{
-	u64 phy_pte, psr;
-	union ia64_rr mrr;
-
-	mrr.val = ia64_get_rr(va);
-	phy_pte = translate_phy_pte(&pte, itir, va);
-
-	if (itir_ps(itir) >= mrr.ps) {
-		vhpt_insert(phy_pte, itir, va, pte);
-	} else {
-		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-		psr = ia64_clear_ic();
-		ia64_itc(type, va, phy_pte, itir_ps(itir));
-		paravirt_dv_serialize_data();
-		ia64_set_psr(psr);
-	}
-
-	if (!(pte&VTLB_PTE_IO))
-		mark_pages_dirty(v, pte, itir_ps(itir));
-}
-
-/*
- *   vhpt lookup
- */
-struct thash_data *vhpt_lookup(u64 va)
-{
-	struct thash_data *head;
-	u64 tag;
-
-	head = (struct thash_data *)ia64_thash(va);
-	tag = ia64_ttag(va);
-	if (head->etag == tag)
-		return head;
-	return NULL;
-}
-
-u64 guest_vhpt_lookup(u64 iha, u64 *pte)
-{
-	u64 ret;
-	struct thash_data *data;
-
-	data = __vtr_lookup(current_vcpu, iha, D_TLB);
-	if (data != NULL)
-		thash_vhpt_insert(current_vcpu, data->page_flags,
-			data->itir, iha, D_TLB);
-
-	asm volatile ("rsm psr.ic|psr.i;;"
-			"srlz.d;;"
-			"ld8.s r9=[%1];;"
-			"tnat.nz p6,p7=r9;;"
-			"(p6) mov %0=1;"
-			"(p6) mov r9=r0;"
-			"(p7) extr.u r9=r9,0,53;;"
-			"(p7) mov %0=r0;"
-			"(p7) st8 [%2]=r9;;"
-			"ssm psr.ic;;"
-			"srlz.d;;"
-			"ssm psr.i;;"
-			"srlz.d;;"
-			: "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
-
-	return ret;
-}
-
-/*
- *  purge software guest tlb
- */
-
-static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	struct thash_data *cur;
-	u64 start, curadr, size, psbits, tag, rr_ps, num;
-	union ia64_rr vrr;
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	vrr.val = vcpu_get_rr(v, va);
-	psbits = VMX(v, psbits[(va >> 61)]);
-	start = va & ~((1UL << ps) - 1);
-	while (psbits) {
-		curadr = start;
-		rr_ps = __ffs(psbits);
-		psbits &= ~(1UL << rr_ps);
-		num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
-		size = PSIZE(rr_ps);
-		vrr.ps = rr_ps;
-		while (num) {
-			cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
-			if (cur->etag == tag && cur->ps == rr_ps)
-				cur->etag = INVALID_TI_TAG;
-			curadr += size;
-			num--;
-		}
-	}
-}
-
-
-/*
- *  purge VHPT and machine TLB
- */
-static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	struct thash_data *cur;
-	u64 start, size, tag, num;
-	union ia64_rr rr;
-
-	start = va & ~((1UL << ps) - 1);
-	rr.val = ia64_get_rr(va);
-	size = PSIZE(rr.ps);
-	num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
-	while (num) {
-		cur = (struct thash_data *)ia64_thash(start);
-		tag = ia64_ttag(start);
-		if (cur->etag == tag)
-			cur->etag = INVALID_TI_TAG;
-		start += size;
-		num--;
-	}
-	machine_tlb_purge(va, ps);
-}
-
-/*
- * Insert an entry into hash TLB or VHPT.
- * NOTES:
- *  1: When inserting VHPT to thash, "va" is a must covered
- *  address by the inserted machine VHPT entry.
- *  2: The format of entry is always in TLB.
- *  3: The caller need to make sure the new entry will not overlap
- *     with any existed entry.
- */
-void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
-{
-	struct thash_data *head;
-	union ia64_rr vrr;
-	u64 tag;
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	vrr.val = vcpu_get_rr(v, va);
-	vrr.ps = itir_ps(itir);
-	VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
-	head = vsa_thash(hcb->pta, va, vrr.val, &tag);
-	head->page_flags = pte;
-	head->itir = itir;
-	head->etag = tag;
-}
-
-int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
-{
-	struct thash_data  *trp;
-	int  i;
-	u64 end, rid;
-
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	end = va + PSIZE(ps);
-	if (type == D_TLB) {
-		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-					i < NDTRS; i++, trp++) {
-				if (__is_tr_overlap(trp, rid, va, end))
-					return i;
-			}
-		}
-	} else {
-		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-					i < NITRS; i++, trp++) {
-				if (__is_tr_overlap(trp, rid, va, end))
-					return i;
-			}
-		}
-	}
-	return -1;
-}
-
-/*
- * Purge entries in VTLB and VHPT
- */
-void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	if (vcpu_quick_region_check(v->arch.tc_regions, va))
-		vtlb_purge(v, va, ps);
-	vhpt_purge(v, va, ps);
-}
-
-void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	u64 old_va = va;
-	va = REGION_OFFSET(va);
-	if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
-		vtlb_purge(v, va, ps);
-	vhpt_purge(v, va, ps);
-}
-
-u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
-{
-	u64 ps, ps_mask, paddr, maddr, io_mask;
-	union pte_flags phy_pte;
-
-	ps = itir_ps(itir);
-	ps_mask = ~((1UL << ps) - 1);
-	phy_pte.val = *pte;
-	paddr = *pte;
-	paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
-	maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
-	io_mask = maddr & GPFN_IO_MASK;
-	if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
-		*pte |= VTLB_PTE_IO;
-		return -1;
-	}
-	maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
-					(paddr & ~PAGE_MASK);
-	phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
-	return phy_pte.val;
-}
-
-/*
- * Purge overlap TCs and then insert the new entry to emulate itc ops.
- * Notes: Only TC entry can purge and insert.
- */
-void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
-						u64 ifa, int type)
-{
-	u64 ps;
-	u64 phy_pte, io_mask, index;
-	union ia64_rr vrr, mrr;
-
-	ps = itir_ps(itir);
-	vrr.val = vcpu_get_rr(v, ifa);
-	mrr.val = ia64_get_rr(ifa);
-
-	index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-	io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
-	phy_pte = translate_phy_pte(&pte, itir, ifa);
-
-	/* Ensure WB attribute if pte is related to a normal mem page,
-	 * which is required by vga acceleration since qemu maps shared
-	 * vram buffer with WB.
-	 */
-	if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
-			io_mask != GPFN_PHYS_MMIO) {
-		pte &= ~_PAGE_MA_MASK;
-		phy_pte &= ~_PAGE_MA_MASK;
-	}
-
-	vtlb_purge(v, ifa, ps);
-	vhpt_purge(v, ifa, ps);
-
-	if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
-		vtlb_insert(v, pte, itir, ifa);
-		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-	}
-	if (pte & VTLB_PTE_IO)
-		return;
-
-	if (ps >= mrr.ps)
-		vhpt_insert(phy_pte, itir, ifa, pte);
-	else {
-		u64 psr;
-		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-		psr = ia64_clear_ic();
-		ia64_itc(type, ifa, phy_pte, ps);
-		paravirt_dv_serialize_data();
-		ia64_set_psr(psr);
-	}
-	if (!(pte&VTLB_PTE_IO))
-		mark_pages_dirty(v, pte, ps);
-
-}
-
-/*
- * Purge all TCs or VHPT entries including those in Hash table.
- *
- */
-
-void thash_purge_all(struct kvm_vcpu *v)
-{
-	int i;
-	struct thash_data *head;
-	struct thash_cb  *vtlb, *vhpt;
-	vtlb = &v->arch.vtlb;
-	vhpt = &v->arch.vhpt;
-
-	for (i = 0; i < 8; i++)
-		VMX(v, psbits[i]) = 0;
-
-	head = vtlb->hash;
-	for (i = 0; i < vtlb->num; i++) {
-		head->page_flags = 0;
-		head->etag = INVALID_TI_TAG;
-		head->itir = 0;
-		head->next = 0;
-		head++;
-	};
-
-	head = vhpt->hash;
-	for (i = 0; i < vhpt->num; i++) {
-		head->page_flags = 0;
-		head->etag = INVALID_TI_TAG;
-		head->itir = 0;
-		head->next = 0;
-		head++;
-	};
-
-	local_flush_tlb_all();
-}
-
-/*
- * Lookup the hash table and its collision chain to find an entry
- * covering this address rid:va or the entry.
- *
- * INPUT:
- *  in: TLB format for both VHPT & TLB.
- */
-struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
-{
-	struct thash_data  *cch;
-	u64    psbits, ps, tag;
-	union ia64_rr vrr;
-
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	cch = __vtr_lookup(v, va, is_data);
-	if (cch)
-		return cch;
-
-	if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
-		return NULL;
-
-	psbits = VMX(v, psbits[(va >> 61)]);
-	vrr.val = vcpu_get_rr(v, va);
-	while (psbits) {
-		ps = __ffs(psbits);
-		psbits &= ~(1UL << ps);
-		vrr.ps = ps;
-		cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
-		if (cch->etag == tag && cch->ps == ps)
-			return cch;
-	}
-
-	return NULL;
-}
-
-/*
- * Initialize internal control data before service.
- */
-void thash_init(struct thash_cb *hcb, u64 sz)
-{
-	int i;
-	struct thash_data *head;
-
-	hcb->pta.val = (unsigned long)hcb->hash;
-	hcb->pta.vf = 1;
-	hcb->pta.ve = 1;
-	hcb->pta.size = sz;
-	head = hcb->hash;
-	for (i = 0; i < hcb->num; i++) {
-		head->page_flags = 0;
-		head->itir = 0;
-		head->etag = INVALID_TI_TAG;
-		head->next = 0;
-		head++;
-	}
-}
-
-u64 kvm_get_mpt_entry(u64 gpfn)
-{
-	u64 *base = (u64 *) KVM_P2M_BASE;
-
-	if (gpfn >= (KVM_P2M_SIZE >> 3))
-		panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
-
-	return *(base + gpfn);
-}
-
-u64 kvm_lookup_mpa(u64 gpfn)
-{
-	u64 maddr;
-	maddr = kvm_get_mpt_entry(gpfn);
-	return maddr&_PAGE_PPN_MASK;
-}
-
-u64 kvm_gpa_to_mpa(u64 gpa)
-{
-	u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
-	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
-}
-
-/*
- * Fetch guest bundle code.
- * INPUT:
- *  gip: guest ip
- *  pbundle: used to return fetched bundle.
- */
-int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
-{
-	u64     gpip = 0;   /* guest physical IP*/
-	u64     *vpa;
-	struct thash_data    *tlb;
-	u64     maddr;
-
-	if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
-		/* I-side physical mode */
-		gpip = gip;
-	} else {
-		tlb = vtlb_lookup(vcpu, gip, I_TLB);
-		if (tlb)
-			gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
-				(gip & (PSIZE(tlb->ps) - 1));
-	}
-	if (gpip) {
-		maddr = kvm_gpa_to_mpa(gpip);
-	} else {
-		tlb = vhpt_lookup(gip);
-		if (tlb == NULL) {
-			ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
-			return IA64_FAULT;
-		}
-		maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
-					| (gip & (PSIZE(tlb->ps) - 1));
-	}
-	vpa = (u64 *)__kvm_va(maddr);
-
-	pbundle->i64[0] = *vpa++;
-	pbundle->i64[1] = *vpa;
-
-	return IA64_NO_FAULT;
-}
-
-void kvm_init_vhpt(struct kvm_vcpu *v)
-{
-	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
-	thash_init(&v->arch.vhpt, VHPT_SHIFT);
-	ia64_set_pta(v->arch.vhpt.pta.val);
-	/*Enable VHPT here?*/
-}
-
-void kvm_init_vtlb(struct kvm_vcpu *v)
-{
-	v->arch.vtlb.num = VTLB_NUM_ENTRIES;
-	thash_init(&v->arch.vtlb, VTLB_SHIFT);
-}
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 95cef0b..df19d0c 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -565,6 +565,7 @@ void consistent_free(size_t size, void *vaddr);
 void consistent_sync(void *vaddr, size_t size, int direction);
 void consistent_sync_page(struct page *page, unsigned long offset,
 	size_t size, int direction);
+unsigned long consistent_virt_to_pfn(void *vaddr);
 
 void setup_memory(void);
 #endif /* __ASSEMBLY__ */
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 4633c36..ed7ba8a 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -154,9 +154,36 @@ dma_direct_sync_sg_for_device(struct device *dev,
 			__dma_sync(sg->dma_address, sg->length, direction);
 }
 
+int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+			     void *cpu_addr, dma_addr_t handle, size_t size,
+			     struct dma_attrs *attrs)
+{
+#ifdef CONFIG_MMU
+	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
+	unsigned long pfn;
+
+	if (off >= count || user_count > (count - off))
+		return -ENXIO;
+
+#ifdef NOT_COHERENT_CACHE
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	pfn = consistent_virt_to_pfn(cpu_addr);
+#else
+	pfn = virt_to_pfn(cpu_addr);
+#endif
+	return remap_pfn_range(vma, vma->vm_start, pfn + off,
+			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+#else
+	return -ENXIO;
+#endif
+}
+
 struct dma_map_ops dma_direct_ops = {
 	.alloc		= dma_direct_alloc_coherent,
 	.free		= dma_direct_free_coherent,
+	.mmap		= dma_direct_mmap_coherent,
 	.map_sg		= dma_direct_map_sg,
 	.dma_supported	= dma_direct_dma_supported,
 	.map_page	= dma_direct_map_page,
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index e10ad93..b06c3a7 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -156,6 +156,25 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
 }
 EXPORT_SYMBOL(consistent_alloc);
 
+#ifdef CONFIG_MMU
+static pte_t *consistent_virt_to_pte(void *vaddr)
+{
+	unsigned long addr = (unsigned long)vaddr;
+
+	return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr);
+}
+
+unsigned long consistent_virt_to_pfn(void *vaddr)
+{
+	pte_t *ptep = consistent_virt_to_pte(vaddr);
+
+	if (pte_none(*ptep) || !pte_present(*ptep))
+		return 0;
+
+	return pte_pfn(*ptep);
+}
+#endif
+
 /*
  * free page(s) as defined by the above mapping.
  */
@@ -181,13 +200,9 @@ void consistent_free(size_t size, void *vaddr)
 	} while (size -= PAGE_SIZE);
 #else
 	do {
-		pte_t *ptep;
+		pte_t *ptep = consistent_virt_to_pte(vaddr);
 		unsigned long pfn;
 
-		ptep = pte_offset_kernel(pmd_offset(pgd_offset_k(
-						(unsigned int)vaddr),
-					(unsigned int)vaddr),
-				(unsigned int)vaddr);
 		if (!pte_none(*ptep) && pte_present(*ptep)) {
 			pfn = pte_pfn(*ptep);
 			pte_clear(&init_mm, (unsigned int)vaddr, ptep);
diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig
index 46e8f76..3bdb72a 100644
--- a/arch/mips/configs/db1xxx_defconfig
+++ b/arch/mips/configs/db1xxx_defconfig
@@ -36,7 +36,7 @@ CONFIG_PCI=y
 CONFIG_PCI_REALLOC_ENABLE_AUTO=y
 CONFIG_PCCARD=y
 CONFIG_PCMCIA_ALCHEMY_DEVBOARD=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 227a9de..e51aad9 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -37,7 +37,6 @@ CONFIG_MIPS32_N32=y
 CONFIG_PM=y
 CONFIG_HIBERNATION=y
 CONFIG_PM_STD_PARTITION="/dev/hda3"
-CONFIG_PM_RUNTIME=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_DEBUG=y
 CONFIG_CPU_FREQ_STAT=m
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 1c6191e..7eabcd2 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -58,7 +58,7 @@ CONFIG_BINFMT_MISC=m
 CONFIG_MIPS32_COMPAT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig
index 70509a4..b3d1d37 100644
--- a/arch/mips/configs/nlm_xlp_defconfig
+++ b/arch/mips/configs/nlm_xlp_defconfig
@@ -61,7 +61,7 @@ CONFIG_BINFMT_MISC=y
 CONFIG_MIPS32_COMPAT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
index 82207e8..3d8016d 100644
--- a/arch/mips/configs/nlm_xlr_defconfig
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -41,7 +41,7 @@ CONFIG_PCI=y
 CONFIG_PCI_MSI=y
 CONFIG_PCI_DEBUG=y
 CONFIG_BINFMT_MISC=m
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index e142c9ee..2328f82 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -14,6 +14,8 @@
 # Nios2 port by Wind River Systems Inc trough:
 #   fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
 
+KBUILD_DEFCONFIG := 3c120_defconfig
+
 UTS_SYSNAME = Linux
 
 export MMU
diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h
index 9102bfd..6e24d7c 100644
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h
@@ -45,6 +45,8 @@ static inline void iounmap(void __iomem *addr)
 	__iounmap(addr);
 }
 
+#define ioremap_wc ioremap_nocache
+
 /* Pages to physical address... */
 #define page_to_phys(page)	virt_to_phys(page_to_virt(page))
 #define page_to_bus(page)	page_to_virt(page)
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index acedc0a..caa51ff 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -168,7 +168,7 @@ do {									\
 	const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);		\
 	unsigned long __gu_val;						\
 	__get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\
-	(x) = (__typeof__(x))__gu_val;					\
+	(x) = (__force __typeof__(x))__gu_val;				\
 	__gu_err;							\
 	})
 
@@ -180,7 +180,7 @@ do {									\
 	if (access_ok(VERIFY_READ,  __gu_ptr, sizeof(*__gu_ptr)))	\
 		__get_user_common(__gu_val, sizeof(*__gu_ptr),		\
 			__gu_ptr, __gu_err);				\
-	(x) = (__typeof__(x))__gu_val;					\
+	(x) = (__force __typeof__(x))__gu_val;				\
 	__gu_err;							\
 })
 
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 2e637c8..879de5e 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -36,7 +36,7 @@ CONFIG_KEXEC=y
 CONFIG_SCHED_SMT=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE=""
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 # CONFIG_SECCOMP is not set
 # CONFIG_PCI is not set
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 0000000..d2f99ca
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_POWERPC_CPUIDLE_H
+#define _ASM_POWERPC_CPUIDLE_H
+
+#ifdef CONFIG_PPC_POWERNV
+/* Used in powernv idle state management */
+#define PNV_THREAD_RUNNING              0
+#define PNV_THREAD_NAP                  1
+#define PNV_THREAD_SLEEP                2
+#define PNV_THREAD_WINKLE               3
+#define PNV_CORE_IDLE_LOCK_BIT          0x100
+#define PNV_CORE_IDLE_THREAD_BITS       0x0FF
+
+#ifndef __ASSEMBLY__
+extern u32 pnv_fastsleep_workaround_at_entry[];
+extern u32 pnv_fastsleep_workaround_at_exit[];
+#endif
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6acf0c2..942c7b1 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
 			unsigned long *nb_ret);
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
 			unsigned long gpa, bool dirty);
-extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			long pte_index, unsigned long pteh, unsigned long ptel);
 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel,
 			pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0aa8179..2d81e20 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
@@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	/* This covers 14..54 bits of va*/
 	rb = (v & ~0x7fUL) << 16;		/* AVA field */
 
-	rb |= v >> (62 - 8);			/*  B field */
+	rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;	/*  B field */
 	/*
 	 * AVA in v had cleared lower 23 bits. We need to derive
 	 * that from pteg index
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0478556..7efd666a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvm_rma_info {
-	atomic_t use_count;
-	unsigned long base_pfn;
-};
-
 /* XICS components, defined in book3s_xics.c */
 struct kvmppc_xics;
 struct kvmppc_icp;
@@ -214,16 +209,9 @@ struct revmap_entry {
 #define KVMPPC_RMAP_PRESENT	0x100000000ul
 #define KVMPPC_RMAP_INDEX	0xfffffffful
 
-/* Low-order bits in memslot->arch.slot_phys[] */
-#define KVMPPC_PAGE_ORDER_MASK	0x1f
-#define KVMPPC_PAGE_NO_CACHE	HPTE_R_I	/* 0x20 */
-#define KVMPPC_PAGE_WRITETHRU	HPTE_R_W	/* 0x40 */
-#define KVMPPC_GOT_PAGE		0x80
-
 struct kvm_arch_memory_slot {
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	unsigned long *rmap;
-	unsigned long *slot_phys;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
@@ -242,14 +230,12 @@ struct kvm_arch {
 	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
-	int using_mmu_notifiers;
 	u32 hpt_order;
 	atomic_t vcpus_running;
 	u32 online_vcores;
 	unsigned long hpt_npte;
 	unsigned long hpt_mask;
 	atomic_t hpte_mod_interest;
-	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -297,6 +283,7 @@ struct kvmppc_vcore {
 	struct list_head runnable_threads;
 	spinlock_t lock;
 	wait_queue_head_t wq;
+	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
 	struct kvm_vcpu *runner;
@@ -308,6 +295,7 @@ struct kvmppc_vcore {
 	ulong dpdes;		/* doorbell state (POWER8) */
 	void *mpp_buffer; /* Micro Partition Prefetch buffer */
 	bool mpp_buffer_is_valid;
+	ulong conferring_threads;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
@@ -664,6 +652,8 @@ struct kvm_vcpu_arch {
 	spinlock_t tbacct_lock;
 	u64 busy_stolen;
 	u64 busy_preempt;
+
+	u32 emul_inst;
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a6dcdb6..46bf652 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba);
-extern struct kvm_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5cd8d2f..eb95b67 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -56,6 +56,14 @@ struct opal_sg_list {
 #define OPAL_HARDWARE_FROZEN	-13
 #define OPAL_WRONG_STATE	-14
 #define OPAL_ASYNC_COMPLETION	-15
+#define OPAL_I2C_TIMEOUT	-17
+#define OPAL_I2C_INVALID_CMD	-18
+#define OPAL_I2C_LBUS_PARITY	-19
+#define OPAL_I2C_BKEND_OVERRUN	-20
+#define OPAL_I2C_BKEND_ACCESS	-21
+#define OPAL_I2C_ARBT_LOST	-22
+#define OPAL_I2C_NACK_RCVD	-23
+#define OPAL_I2C_STOP_ERR	-24
 
 /* API Tokens (in r0) */
 #define OPAL_INVALID_CALL			-1
@@ -152,12 +160,25 @@ struct opal_sg_list {
 #define OPAL_PCI_ERR_INJECT			96
 #define OPAL_PCI_EEH_FREEZE_SET			97
 #define OPAL_HANDLE_HMI				98
+#define OPAL_CONFIG_CPU_IDLE_STATE		99
+#define OPAL_SLW_SET_REG			100
 #define OPAL_REGISTER_DUMP_REGION		101
 #define OPAL_UNREGISTER_DUMP_REGION		102
 #define OPAL_WRITE_TPO				103
 #define OPAL_READ_TPO				104
 #define OPAL_IPMI_SEND				107
 #define OPAL_IPMI_RECV				108
+#define OPAL_I2C_REQUEST			109
+
+/* Device tree flags */
+
+/* Flags set in power-mgmt nodes in device tree if
+ * respective idle states are supported in the platform.
+ */
+#define OPAL_PM_NAP_ENABLED	0x00010000
+#define OPAL_PM_SLEEP_ENABLED	0x00020000
+#define OPAL_PM_WINKLE_ENABLED	0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000
 
 #ifndef __ASSEMBLY__
 
@@ -712,6 +733,24 @@ typedef struct oppanel_line {
 	uint64_t 	line_len;
 } oppanel_line_t;
 
+/* OPAL I2C request */
+struct opal_i2c_request {
+	uint8_t	type;
+#define OPAL_I2C_RAW_READ	0
+#define OPAL_I2C_RAW_WRITE	1
+#define OPAL_I2C_SM_READ	2
+#define OPAL_I2C_SM_WRITE	3
+	uint8_t flags;
+#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
+	uint8_t	subaddr_sz;		/* Max 4 */
+	uint8_t reserved;
+	__be16 addr;			/* 7 or 10 bit address */
+	__be16 reserved2;
+	__be32 subaddr;		/* Sub-address if any */
+	__be32 size;			/* Data size */
+	__be64 buffer_ra;		/* Buffer real address */
+};
+
 /* /sys/firmware/opal */
 extern struct kobject *opal_kobj;
 
@@ -876,11 +915,14 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
 int64_t opal_handle_hmi(void);
 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
 int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t *msg_len);
+int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+			 struct opal_i2c_request *oreq);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 24a386c..e5f22c6 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,6 +152,16 @@ struct paca_struct {
 	u64 tm_scratch;                 /* TM scratch area for reclaim */
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+	/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
+	u32 *core_idle_state_ptr;
+	u8 thread_idle_state;		/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
+	/* Mask to indicate thread id in core */
+	u8 thread_mask;
+	/* Mask to denote subcore sibling threads */
+	u8 subcore_sibling_mask;
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* Exclusive emergency stack pointer for machine check exception. */
 	void *mc_emergency_sp;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1a52877..03cd858 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -194,6 +194,7 @@
 
 #define PPC_INST_NAP			0x4c000364
 #define PPC_INST_SLEEP			0x4c0003a4
+#define PPC_INST_WINKLE			0x4c0003e4
 
 /* A2 specific instructions */
 #define PPC_INST_ERATWE			0x7c0001a6
@@ -375,6 +376,7 @@
 
 #define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
 #define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
+#define PPC_WINKLE		stringify_in_c(.long PPC_INST_WINKLE)
 
 /* BHRB instructions */
 #define PPC_CLRBHRB		stringify_in_c(.long PPC_INST_CLRBHRB)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 29c3798..bf117d8 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -452,7 +452,8 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 extern unsigned long power7_nap(int check_irq);
-extern void power7_sleep(void);
+extern unsigned long power7_sleep(void);
+extern unsigned long power7_winkle(void);
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c998279..1c874fb 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -118,8 +118,10 @@
 #define __MSR		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
 #ifdef __BIG_ENDIAN__
 #define MSR_		__MSR
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV)
 #else
 #define MSR_		(__MSR | MSR_LE)
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV | MSR_LE)
 #endif
 #define MSR_KERNEL	(MSR_ | MSR_64BIT)
 #define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
@@ -371,6 +373,7 @@
 #define SPRN_DBAT7L	0x23F	/* Data BAT 7 Lower Register */
 #define SPRN_DBAT7U	0x23E	/* Data BAT 7 Upper Register */
 #define SPRN_PPR	0x380	/* SMT Thread status Register */
+#define SPRN_TSCR	0x399	/* Thread Switch Control Register */
 
 #define SPRN_DEC	0x016		/* Decrement Register */
 #define SPRN_DER	0x095		/* Debug Enable Regsiter */
@@ -728,6 +731,7 @@
 #define SPRN_BESCR	806	/* Branch event status and control register */
 #define   BESCR_GE	0x8000000000000000ULL /* Global Enable */
 #define SPRN_WORT	895	/* Workload optimization register - thread */
+#define SPRN_WORC	863	/* Workload optimization register - core */
 
 #define SPRN_PMC1	787
 #define SPRN_PMC2	788
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6240698..ff21b7a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
 
 static inline int syscall_get_arch(void)
 {
-	return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+	int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#ifdef __LITTLE_ENDIAN__
+	arch |= __AUDIT_ARCH_LE;
+#endif
+	return arch;
 }
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9485b43..a0c071d 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -284,7 +284,7 @@ do {								\
 	if (!is_kernel_addr((unsigned long)__gu_addr))		\
 		might_fault();					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 #endif /* __powerpc64__ */
@@ -297,7 +297,7 @@ do {								\
 	might_fault();							\
 	if (access_ok(VERIFY_READ, __gu_addr, (size)))			\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
 })
 
@@ -308,7 +308,7 @@ do {								\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c161ef3..e624f96 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -489,7 +489,6 @@ int main(void)
 	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
 	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
-	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
 	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
@@ -499,6 +498,7 @@ int main(void)
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+	DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -726,5 +726,16 @@ int main(void)
 					arch.timing_last_enter.tv32.tbl));
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+	DEFINE(PACA_CORE_IDLE_STATE_PTR,
+			offsetof(struct paca_struct, core_idle_state_ptr));
+	DEFINE(PACA_THREAD_IDLE_STATE,
+			offsetof(struct paca_struct, thread_idle_state));
+	DEFINE(PACA_THREAD_MASK,
+			offsetof(struct paca_struct, thread_mask));
+	DEFINE(PACA_SUBCORE_SIBLING_MASK,
+			offsetof(struct paca_struct, subcore_sibling_mask));
+#endif
+
 	return 0;
 }
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index db08382..c2df815 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -15,6 +15,7 @@
 #include <asm/hw_irq.h>
 #include <asm/exception-64s.h>
 #include <asm/ptrace.h>
+#include <asm/cpuidle.h>
 
 /*
  * We layout physical memory as follows:
@@ -101,23 +102,34 @@ system_reset_pSeries:
 #ifdef CONFIG_PPC_P7_NAP
 BEGIN_FTR_SECTION
 	/* Running native on arch 2.06 or later, check if we are
-	 * waking up from nap. We only handle no state loss and
-	 * supervisor state loss. We do -not- handle hypervisor
-	 * state loss at this time.
+	 * waking up from nap/sleep/winkle.
 	 */
 	mfspr	r13,SPRN_SRR1
 	rlwinm.	r13,r13,47-31,30,31
 	beq	9f
 
-	/* waking up from powersave (nap) state */
-	cmpwi	cr1,r13,2
-	/* Total loss of HV state is fatal, we could try to use the
-	 * PIR to locate a PACA, then use an emergency stack etc...
-	 * OPAL v3 based powernv platforms have new idle states
-	 * which fall in this catagory.
+	cmpwi	cr3,r13,2
+
+	/*
+	 * Check if last bit of HSPGR0 is set. This indicates whether we are
+	 * waking up from winkle.
 	 */
-	bgt	cr1,8f
 	GET_PACA(r13)
+	clrldi	r5,r13,63
+	clrrdi	r13,r13,1
+	cmpwi	cr4,r5,1
+	mtspr	SPRN_HSPRG0,r13
+
+	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi   cr2,r0,PNV_THREAD_NAP
+	bgt     cr2,8f				/* Either sleep or Winkle */
+
+	/* Waking up from nap should not cause hypervisor state loss */
+	bgt	cr3,.
+
+	/* Waking up from nap */
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	li	r0,KVM_HWTHREAD_IN_KERNEL
@@ -133,7 +145,7 @@ BEGIN_FTR_SECTION
 
 	/* Return SRR1 from power7_nap() */
 	mfspr	r3,SPRN_SRR1
-	beq	cr1,2f
+	beq	cr3,2f
 	b	power7_wakeup_noloss
 2:	b	power7_wakeup_loss
 
@@ -1382,6 +1394,7 @@ machine_check_handle_early:
 	MACHINE_CHECK_HANDLER_WINDUP
 	GET_PACA(r13)
 	ld	r1,PACAR1(r13)
+	li	r3,PNV_THREAD_NAP
 	b	power7_enter_nap_mode
 4:
 #endif
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 18c0687..05adc8b 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -18,9 +18,25 @@
 #include <asm/hw_irq.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/opal.h>
+#include <asm/cpuidle.h>
+#include <asm/mmu-hash64.h>
 
 #undef DEBUG
 
+/*
+ * Use unused space in the interrupt stack to save and restore
+ * registers for winkle support.
+ */
+#define _SDR1	GPR3
+#define _RPR	GPR4
+#define _SPURR	GPR5
+#define _PURR	GPR6
+#define _TSCR	GPR7
+#define _DSCR	GPR8
+#define _AMOR	GPR9
+#define _WORT	GPR10
+#define _WORC	GPR11
+
 /* Idle state entry routines */
 
 #define	IDLE_STATE_ENTER_SEQ(IDLE_INST)				\
@@ -37,8 +53,7 @@
 
 /*
  * Pass requested state in r3:
- * 	0 - nap
- * 	1 - sleep
+ *	r3 - PNV_THREAD_NAP/SLEEP/WINKLE
  *
  * To check IRQ_HAPPENED in r4
  * 	0 - don't check
@@ -101,18 +116,105 @@ _GLOBAL(power7_powersave_common)
 	std	r9,_MSR(r1)
 	std	r1,PACAR1(r13)
 
-_GLOBAL(power7_enter_nap_mode)
+	/*
+	 * Go to real mode to do the nap, as required by the architecture.
+	 * Also, we need to be in real mode before setting hwthread_state,
+	 * because as soon as we do that, another thread can switch
+	 * the MMU context to the guest.
+	 */
+	LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
+	li	r6, MSR_RI
+	andc	r6, r9, r6
+	LOAD_REG_ADDR(r7, power7_enter_nap_mode)
+	mtmsrd	r6, 1		/* clear RI before setting SRR0/1 */
+	mtspr	SPRN_SRR0, r7
+	mtspr	SPRN_SRR1, r5
+	rfid
+
+	.globl	power7_enter_nap_mode
+power7_enter_nap_mode:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	/* Tell KVM we're napping */
 	li	r4,KVM_HWTHREAD_IN_NAP
 	stb	r4,HSTATE_HWTHREAD_STATE(r13)
 #endif
-	cmpwi	cr0,r3,1
-	beq	2f
+	stb	r3,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi	cr3,r3,PNV_THREAD_SLEEP
+	bge	cr3,2f
 	IDLE_STATE_ENTER_SEQ(PPC_NAP)
 	/* No return */
-2:	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
-	/* No return */
+2:
+	/* Sleep or winkle */
+	lbz	r7,PACA_THREAD_MASK(r13)
+	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop1:
+	lwarx	r15,0,r14
+	andc	r15,r15,r7			/* Clear thread bit */
+
+	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+
+/*
+ * If cr0 = 0, then current thread is the last thread of the core entering
+ * sleep. Last thread needs to execute the hardware bug workaround code if
+ * required by the platform.
+ * Make the workaround call unconditionally here. The below branch call is
+ * patched out when the idle states are discovered if the platform does not
+ * require it.
+ */
+.global pnv_fastsleep_workaround_at_entry
+pnv_fastsleep_workaround_at_entry:
+	beq	fastsleep_workaround_at_entry
+
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop1
+	isync
+
+common_enter: /* common code for all the threads entering sleep or winkle */
+	bgt	cr3,enter_winkle
+	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+
+fastsleep_workaround_at_entry:
+	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop1
+	isync
+
+	/* Fast sleep workaround */
+	li	r3,1
+	li	r4,1
+	li	r0,OPAL_CONFIG_CPU_IDLE_STATE
+	bl	opal_call_realmode
+
+	/* Clear Lock bit */
+	li	r0,0
+	lwsync
+	stw	r0,0(r14)
+	b	common_enter
+
+enter_winkle:
+	/*
+	 * Note all register i.e per-core, per-subcore or per-thread is saved
+	 * here since any thread in the core might wake up first
+	 */
+	mfspr	r3,SPRN_SDR1
+	std	r3,_SDR1(r1)
+	mfspr	r3,SPRN_RPR
+	std	r3,_RPR(r1)
+	mfspr	r3,SPRN_SPURR
+	std	r3,_SPURR(r1)
+	mfspr	r3,SPRN_PURR
+	std	r3,_PURR(r1)
+	mfspr	r3,SPRN_TSCR
+	std	r3,_TSCR(r1)
+	mfspr	r3,SPRN_DSCR
+	std	r3,_DSCR(r1)
+	mfspr	r3,SPRN_AMOR
+	std	r3,_AMOR(r1)
+	mfspr	r3,SPRN_WORT
+	std	r3,_WORT(r1)
+	mfspr	r3,SPRN_WORC
+	std	r3,_WORC(r1)
+	IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
 
 _GLOBAL(power7_idle)
 	/* Now check if user or arch enabled NAP mode */
@@ -125,48 +227,21 @@ _GLOBAL(power7_idle)
 
 _GLOBAL(power7_nap)
 	mr	r4,r3
-	li	r3,0
+	li	r3,PNV_THREAD_NAP
 	b	power7_powersave_common
 	/* No return */
 
 _GLOBAL(power7_sleep)
-	li	r3,1
+	li	r3,PNV_THREAD_SLEEP
 	li	r4,1
 	b	power7_powersave_common
 	/* No return */
 
-/*
- * Make opal call in realmode. This is a generic function to be called
- * from realmode from reset vector. It handles endianess.
- *
- * r13 - paca pointer
- * r1  - stack pointer
- * r3  - opal token
- */
-opal_call_realmode:
-	mflr	r12
-	std	r12,_LINK(r1)
-	ld	r2,PACATOC(r13)
-	/* Set opal return address */
-	LOAD_REG_ADDR(r0,return_from_opal_call)
-	mtlr	r0
-	/* Handle endian-ness */
-	li	r0,MSR_LE
-	mfmsr	r12
-	andc	r12,r12,r0
-	mtspr	SPRN_HSRR1,r12
-	mr	r0,r3			/* Move opal token to r0 */
-	LOAD_REG_ADDR(r11,opal)
-	ld	r12,8(r11)
-	ld	r2,0(r11)
-	mtspr	SPRN_HSRR0,r12
-	hrfid
-
-return_from_opal_call:
-	FIXUP_ENDIAN
-	ld	r0,_LINK(r1)
-	mtlr	r0
-	blr
+_GLOBAL(power7_winkle)
+	li	r3,3
+	li	r4,1
+	b	power7_powersave_common
+	/* No return */
 
 #define CHECK_HMI_INTERRUPT						\
 	mfspr	r0,SPRN_SRR1;						\
@@ -181,7 +256,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
 	ld	r2,PACATOC(r13);					\
 	ld	r1,PACAR1(r13);						\
 	std	r3,ORIG_GPR3(r1);	/* Save original r3 */		\
-	li	r3,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
+	li	r0,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
 	bl	opal_call_realmode;					\
 	ld	r3,ORIG_GPR3(r1);	/* Restore original r3 */	\
 20:	nop;
@@ -190,16 +265,190 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
 _GLOBAL(power7_wakeup_tb_loss)
 	ld	r2,PACATOC(r13);
 	ld	r1,PACAR1(r13)
+	/*
+	 * Before entering any idle state, the NVGPRs are saved in the stack
+	 * and they are restored before switching to the process context. Hence
+	 * until they are restored, they are free to be used.
+	 *
+	 * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
+	 * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
+	 * wakeup reason if we branch to kvm_start_guest.
+	 */
 
+	mfspr	r16,SPRN_SRR1
 BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
+	lbz	r7,PACA_THREAD_MASK(r13)
+	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop2:
+	lwarx	r15,0,r14
+	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	/*
+	 * Lock bit is set in one of the 2 cases-
+	 * a. In the sleep/winkle enter path, the last thread is executing
+	 * fastsleep workaround code.
+	 * b. In the wake up path, another thread is executing fastsleep
+	 * workaround undo code or resyncing timebase or restoring context
+	 * In either case loop until the lock bit is cleared.
+	 */
+	bne	core_idle_lock_held
+
+	cmpwi	cr2,r15,0
+	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
+	and	r4,r4,r15
+	cmpwi	cr1,r4,0	/* Check if first in subcore */
+
+	/*
+	 * At this stage
+	 * cr1 - 0b0100 if first thread to wakeup in subcore
+	 * cr2 - 0b0100 if first thread to wakeup in core
+	 * cr3-  0b0010 if waking up from sleep or winkle
+	 * cr4 - 0b0100 if waking up from winkle
+	 */
+
+	or	r15,r15,r7		/* Set thread bit */
+
+	beq	cr1,first_thread_in_subcore
+
+	/* Not first thread in subcore to wake up */
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop2
+	isync
+	b	common_exit
+
+core_idle_lock_held:
+	HMT_LOW
+core_idle_lock_loop:
+	lwz	r15,0(14)
+	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	bne	core_idle_lock_loop
+	HMT_MEDIUM
+	b	lwarx_loop2
+
+first_thread_in_subcore:
+	/* First thread in subcore to wakeup */
+	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop2
+	isync
+
+	/*
+	 * If waking up from sleep, subcore state is not lost. Hence
+	 * skip subcore state restore
+	 */
+	bne	cr4,subcore_state_restored
+
+	/* Restore per-subcore state */
+	ld      r4,_SDR1(r1)
+	mtspr   SPRN_SDR1,r4
+	ld      r4,_RPR(r1)
+	mtspr   SPRN_RPR,r4
+	ld	r4,_AMOR(r1)
+	mtspr	SPRN_AMOR,r4
+
+subcore_state_restored:
+	/*
+	 * Check if the thread is also the first thread in the core. If not,
+	 * skip to clear_lock.
+	 */
+	bne	cr2,clear_lock
+
+first_thread_in_core:
+
+	/*
+	 * First thread in the core waking up from fastsleep. It needs to
+	 * call the fastsleep workaround code if the platform requires it.
+	 * Call it unconditionally here. The below branch instruction will
+	 * be patched out when the idle states are discovered if platform
+	 * does not require workaround.
+	 */
+.global pnv_fastsleep_workaround_at_exit
+pnv_fastsleep_workaround_at_exit:
+	b	fastsleep_workaround_at_exit
+
+timebase_resync:
+	/* Do timebase resync if we are waking up from sleep. Use cr3 value
+	 * set in exceptions-64s.S */
+	ble	cr3,clear_lock
 	/* Time base re-sync */
-	li	r3,OPAL_RESYNC_TIMEBASE
+	li	r0,OPAL_RESYNC_TIMEBASE
 	bl	opal_call_realmode;
-
 	/* TODO: Check r3 for failure */
 
+	/*
+	 * If waking up from sleep, per core state is not lost, skip to
+	 * clear_lock.
+	 */
+	bne	cr4,clear_lock
+
+	/* Restore per core state */
+	ld	r4,_TSCR(r1)
+	mtspr	SPRN_TSCR,r4
+	ld	r4,_WORC(r1)
+	mtspr	SPRN_WORC,r4
+
+clear_lock:
+	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+	lwsync
+	stw	r15,0(r14)
+
+common_exit:
+	/*
+	 * Common to all threads.
+	 *
+	 * If waking up from sleep, hypervisor state is not lost. Hence
+	 * skip hypervisor state restore.
+	 */
+	bne	cr4,hypervisor_state_restored
+
+	/* Waking up from winkle */
+
+	/* Restore per thread state */
+	bl	__restore_cpu_power8
+
+	/* Restore SLB  from PACA */
+	ld	r8,PACA_SLBSHADOWPTR(r13)
+
+	.rept	SLB_NUM_BOLTED
+	li	r3, SLBSHADOW_SAVEAREA
+	LDX_BE	r5, r8, r3
+	addi	r3, r3, 8
+	LDX_BE	r6, r8, r3
+	andis.	r7,r5,SLB_ESID_V@h
+	beq	1f
+	slbmte	r6,r5
+1:	addi	r8,r8,16
+	.endr
+
+	ld	r4,_SPURR(r1)
+	mtspr	SPRN_SPURR,r4
+	ld	r4,_PURR(r1)
+	mtspr	SPRN_PURR,r4
+	ld	r4,_DSCR(r1)
+	mtspr	SPRN_DSCR,r4
+	ld	r4,_WORT(r1)
+	mtspr	SPRN_WORT,r4
+
+hypervisor_state_restored:
+
+	li	r5,PNV_THREAD_RUNNING
+	stb     r5,PACA_THREAD_IDLE_STATE(r13)
+
+	mtspr	SPRN_SRR1,r16
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	li      r0,KVM_HWTHREAD_IN_KERNEL
+	stb     r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz     r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi   r0,0
+	beq     6f
+	b       kvm_start_guest
+6:
+#endif
+
 	REST_NVGPRS(r1)
 	REST_GPR(2, r1)
 	ld	r3,_CCR(r1)
@@ -212,6 +461,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	mtspr	SPRN_SRR0,r5
 	rfid
 
+fastsleep_workaround_at_exit:
+	li	r3,1
+	li	r4,0
+	li	r0,OPAL_CONFIG_CPU_IDLE_STATE
+	bl	opal_call_realmode
+	b	timebase_resync
+
 /*
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8b2d2dc..8ec017c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -700,7 +700,6 @@ void start_secondary(void *unused)
 	smp_store_cpu_info(cpu);
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
-	cpu_callin_map[cpu] = 1;
 
 	if (smp_ops->setup_cpu)
 		smp_ops->setup_cpu(cpu);
@@ -739,6 +738,14 @@ void start_secondary(void *unused)
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 
+	/*
+	 * CPU must be marked active and online before we signal back to the
+	 * master, because the scheduler needs to see the cpu_online and
+	 * cpu_active bits set.
+	 */
+	smp_wmb();
+	cpu_callin_map[cpu] = 1;
+
 	local_irq_enable();
 
 	cpu_startup_entry(CPUHP_ONLINE);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 602eb51..f5769f1 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -172,6 +172,7 @@ config KVM_XICS
 	depends on KVM_BOOK3S_64 && !KVM_MPIC
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQFD
+	default y
 	---help---
 	  Include support for the XICS (eXternal Interrupt Controller
 	  Specification) interrupt controller architecture used on
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b32db4b..888bf46 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index cd0b073..a2eb6d3 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw)
 	return (sr_raw & 0x20000000) ? true: false;
 }
 
-static inline bool sr_nx(u32 sr_raw)
-{
-	return (sr_raw & 0x10000000) ? true: false;
-}
-
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 					  struct kvmppc_pte *pte, bool data,
 					  bool iswrite);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d407702..534acb3 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
-/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
-#define MAX_LPID_970	63
+#include "trace_hv.h"
 
 /* Power architecture requires HPT is at least 256kB */
 #define PPC_MIN_HPT_ORDER	18
@@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void)
 	if (!cpu_has_feature(CPU_FTR_HVMODE))
 		return -EINVAL;
 
-	/* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
-	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
-		host_lpid = mfspr(SPRN_LPID);	/* POWER7 */
-		rsvd_lpid = LPID_RSVD;
-	} else {
-		host_lpid = 0;			/* PPC970 */
-		rsvd_lpid = MAX_LPID_970;
-	}
+	/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
+	host_lpid = mfspr(SPRN_LPID);
+	rsvd_lpid = LPID_RSVD;
 
 	kvmppc_init_lpid(rsvd_lpid + 1);
 
@@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
 	kvmppc_set_msr(vcpu, msr);
 }
 
-/*
- * This is called to get a reference to a guest page if there isn't
- * one already in the memslot->arch.slot_phys[] array.
- */
-static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
-				  struct kvm_memory_slot *memslot,
-				  unsigned long psize)
-{
-	unsigned long start;
-	long np, err;
-	struct page *page, *hpage, *pages[1];
-	unsigned long s, pgsize;
-	unsigned long *physp;
-	unsigned int is_io, got, pgorder;
-	struct vm_area_struct *vma;
-	unsigned long pfn, i, npages;
-
-	physp = memslot->arch.slot_phys;
-	if (!physp)
-		return -EINVAL;
-	if (physp[gfn - memslot->base_gfn])
-		return 0;
-
-	is_io = 0;
-	got = 0;
-	page = NULL;
-	pgsize = psize;
-	err = -EINVAL;
-	start = gfn_to_hva_memslot(memslot, gfn);
-
-	/* Instantiate and get the page we want access to */
-	np = get_user_pages_fast(start, 1, 1, pages);
-	if (np != 1) {
-		/* Look up the vma for the page */
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, start);
-		if (!vma || vma->vm_start > start ||
-		    start + psize > vma->vm_end ||
-		    !(vma->vm_flags & VM_PFNMAP))
-			goto up_err;
-		is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
-		pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-		/* check alignment of pfn vs. requested page size */
-		if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
-			goto up_err;
-		up_read(&current->mm->mmap_sem);
-
-	} else {
-		page = pages[0];
-		got = KVMPPC_GOT_PAGE;
-
-		/* See if this is a large page */
-		s = PAGE_SIZE;
-		if (PageHuge(page)) {
-			hpage = compound_head(page);
-			s <<= compound_order(hpage);
-			/* Get the whole large page if slot alignment is ok */
-			if (s > psize && slot_is_aligned(memslot, s) &&
-			    !(memslot->userspace_addr & (s - 1))) {
-				start &= ~(s - 1);
-				pgsize = s;
-				get_page(hpage);
-				put_page(page);
-				page = hpage;
-			}
-		}
-		if (s < psize)
-			goto out;
-		pfn = page_to_pfn(page);
-	}
-
-	npages = pgsize >> PAGE_SHIFT;
-	pgorder = __ilog2(npages);
-	physp += (gfn - memslot->base_gfn) & ~(npages - 1);
-	spin_lock(&kvm->arch.slot_phys_lock);
-	for (i = 0; i < npages; ++i) {
-		if (!physp[i]) {
-			physp[i] = ((pfn + i) << PAGE_SHIFT) +
-				got + is_io + pgorder;
-			got = 0;
-		}
-	}
-	spin_unlock(&kvm->arch.slot_phys_lock);
-	err = 0;
-
- out:
-	if (got)
-		put_page(page);
-	return err;
-
- up_err:
-	up_read(&current->mm->mmap_sem);
-	return err;
-}
-
 long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 				long pte_index, unsigned long pteh,
 				unsigned long ptel, unsigned long *pte_idx_ret)
 {
-	unsigned long psize, gpa, gfn;
-	struct kvm_memory_slot *memslot;
 	long ret;
 
-	if (kvm->arch.using_mmu_notifiers)
-		goto do_insert;
-
-	psize = hpte_page_size(pteh, ptel);
-	if (!psize)
-		return H_PARAMETER;
-
-	pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
-
-	/* Find the memslot (if any) for this address */
-	gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
-	gfn = gpa >> PAGE_SHIFT;
-	memslot = gfn_to_memslot(kvm, gfn);
-	if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
-		if (!slot_is_aligned(memslot, psize))
-			return H_PARAMETER;
-		if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
-			return H_PARAMETER;
-	}
-
- do_insert:
 	/* Protect linux PTE lookup from page table destruction */
 	rcu_read_lock_sched();	/* this disables preemption too */
 	ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
@@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 }
 
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			     long pte_index, unsigned long pteh,
-			     unsigned long ptel)
-{
-	return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
-					  pteh, ptel, &vcpu->arch.gpr[4]);
-}
-
 static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
 							 gva_t eaddr)
 {
@@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
 
 	/* Storage key permission check for POWER7 */
-	if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+	if (data && virtmode) {
 		int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
 		if (amrfield & 1)
 			gpte->may_read = 0;
@@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	gfn = gpa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(kvm, gfn);
 
+	trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
 	/* No memslot means it's an emulated MMIO region */
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
 					      dsisr & DSISR_ISSTORE);
 
-	if (!kvm->arch.using_mmu_notifiers)
-		return -EFAULT;		/* should never get here */
-
 	/*
 	 * This should never happen, because of the slot_is_aligned()
 	 * check in kvmppc_do_h_enter().
@@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	mmu_seq = kvm->mmu_notifier_seq;
 	smp_rmb();
 
+	ret = -EFAULT;
 	is_io = 0;
 	pfn = 0;
 	page = NULL;
@@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 		up_read(&current->mm->mmap_sem);
 		if (!pfn)
-			return -EFAULT;
+			goto out_put;
 	} else {
 		page = pages[0];
 		pfn = page_to_pfn(page);
@@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 	}
 
-	ret = -EFAULT;
 	if (psize > pte_size)
 		goto out_put;
 
 	/* Check WIMG vs. the actual page we're accessing */
 	if (!hpte_cache_flags_ok(r, is_io)) {
 		if (is_io)
-			return -EFAULT;
+			goto out_put;
+
 		/*
 		 * Allow guest to map emulated device memory as
 		 * uncacheable, but actually make it cacheable.
@@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		SetPageDirty(page);
 
  out_put:
+	trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
 	if (page) {
 		/*
 		 * We drop pages[0] here, not page because page might
@@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
 		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
 		    hpte_rpn(ptel, psize) == gfn) {
-			if (kvm->arch.using_mmu_notifiers)
-				hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+			hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 			kvmppc_invalidate_hpte(kvm, hptep, i);
 			/* Harvest R and C */
 			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
@@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-	if (kvm->arch.using_mmu_notifiers)
-		kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 	return 0;
 }
 
 int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	if (kvm->arch.using_mmu_notifiers)
-		kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+	kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
 	return 0;
 }
 
@@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return 0;
 	return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
@@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return 0;
 	return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
 }
 
 void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return;
 	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 }
 
@@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 		}
 
 		/* Now check and modify the HPTE */
-		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
+		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+			/* unlock and continue */
+			hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 			continue;
+		}
 
 		/* need to make it temporarily absent so C is stable */
 		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 	struct page *page, *pages[1];
 	int npages;
 	unsigned long hva, offset;
-	unsigned long pa;
-	unsigned long *physp;
 	int srcu_idx;
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	memslot = gfn_to_memslot(kvm, gfn);
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		goto err;
-	if (!kvm->arch.using_mmu_notifiers) {
-		physp = memslot->arch.slot_phys;
-		if (!physp)
-			goto err;
-		physp += gfn - memslot->base_gfn;
-		pa = *physp;
-		if (!pa) {
-			if (kvmppc_get_guest_page(kvm, gfn, memslot,
-						  PAGE_SIZE) < 0)
-				goto err;
-			pa = *physp;
-		}
-		page = pfn_to_page(pa >> PAGE_SHIFT);
-		get_page(page);
-	} else {
-		hva = gfn_to_hva_memslot(memslot, gfn);
-		npages = get_user_pages_fast(hva, 1, 1, pages);
-		if (npages < 1)
-			goto err;
-		page = pages[0];
-	}
+	hva = gfn_to_hva_memslot(memslot, gfn);
+	npages = get_user_pages_fast(hva, 1, 1, pages);
+	if (npages < 1)
+		goto err;
+	page = pages[0];
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
 	offset = gpa & (PAGE_SIZE - 1);
@@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
 
 	put_page(page);
 
-	if (!dirty || !kvm->arch.using_mmu_notifiers)
+	if (!dirty)
 		return;
 
 	/* We need to mark this page dirty in the rmap chain */
@@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 		hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 		lbuf = (unsigned long __user *)buf;
 		for (j = 0; j < hdr.n_valid; ++j) {
+			__be64 hpte_v;
+			__be64 hpte_r;
+
 			err = -EFAULT;
-			if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+			if (__get_user(hpte_v, lbuf) ||
+			    __get_user(hpte_r, lbuf + 1))
 				goto out;
+			v = be64_to_cpu(hpte_v);
+			r = be64_to_cpu(hpte_r);
 			err = -EINVAL;
 			if (!(v & HPTE_V_VALID))
 				goto out;
@@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
-		vcpu->arch.slb_nr = 32;		/* POWER7 */
-	else
-		vcpu->arch.slb_nr = 64;
+	vcpu->arch.slb_nr = 32;		/* POWER7/POWER8 */
 
 	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
 	mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e63587d..de4018a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -58,6 +58,9 @@
 
 #include "book3s.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
@@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  * stolen.
  *
  * Updates to busy_stolen are protected by arch.tbacct_lock;
- * updates to vc->stolen_tb are protected by the arch.tbacct_lock
- * of the vcpu that has taken responsibility for running the vcore
- * (i.e. vc->runner).  The stolen times are measured in units of
- * timebase ticks.  (Note that the != TB_NIL checks below are
- * purely defensive; they should never fail.)
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock.  The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
  */
 
 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
-	    vc->preempt_tb != TB_NIL) {
-		vc->stolen_tb += mftb() - vc->preempt_tb;
-		vc->preempt_tb = TB_NIL;
+	/*
+	 * We can test vc->runner without taking the vcore lock,
+	 * because only this task ever sets vc->runner to this
+	 * vcpu, and once it is set to this vcpu, only this task
+	 * ever sets it to NULL.
+	 */
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+		spin_lock_irqsave(&vc->stoltb_lock, flags);
+		if (vc->preempt_tb != TB_NIL) {
+			vc->stolen_tb += mftb() - vc->preempt_tb;
+			vc->preempt_tb = TB_NIL;
+		}
+		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 	}
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
 	    vcpu->arch.busy_preempt != TB_NIL) {
 		vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+		spin_lock_irqsave(&vc->stoltb_lock, flags);
 		vc->preempt_tb = mftb();
+		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+	}
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
 		vcpu->arch.busy_preempt = mftb();
 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	if (arch_compat) {
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			return -EINVAL;	/* 970 has no compat mode support */
-
 		switch (arch_compat) {
 		case PVR_ARCH_205:
 			/*
@@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
 {
 	u64 p;
+	unsigned long flags;
 
-	/*
-	 * If we are the task running the vcore, then since we hold
-	 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
-	 * can't be updated, so we don't need the tbacct_lock.
-	 * If the vcore is inactive, it can't become active (since we
-	 * hold the vcore lock), so the vcpu load/put functions won't
-	 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
-	 */
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	p = vc->stolen_tb;
 	if (vc->vcore_state != VCORE_INACTIVE &&
-	    vc->runner->arch.run_task != current) {
-		spin_lock_irq(&vc->runner->arch.tbacct_lock);
-		p = vc->stolen_tb;
-		if (vc->preempt_tb != TB_NIL)
-			p += now - vc->preempt_tb;
-		spin_unlock_irq(&vc->runner->arch.tbacct_lock);
-	} else {
-		p = vc->stolen_tb;
-	}
+	    vc->preempt_tb != TB_NIL)
+		p += now - vc->preempt_tb;
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 	return p;
 }
 
@@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 	}
 }
 
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+	struct kvmppc_vcore *vcore = target->arch.vcore;
+
+	/*
+	 * We expect to have been called by the real mode handler
+	 * (kvmppc_rm_h_confer()) which would have directly returned
+	 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+	 * have useful work to do and should not confer) so we don't
+	 * recheck that here.
+	 */
+
+	spin_lock(&vcore->lock);
+	if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	    vcore->vcore_state != VCORE_INACTIVE)
+		target = vcore->runner;
+	spin_unlock(&vcore->lock);
+
+	return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+	int yield_count = 0;
+	struct lppaca *lppaca;
+
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+	if (lppaca)
+		yield_count = lppaca->yield_count;
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+	return yield_count;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
+	int yield_count;
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
@@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		return RESUME_HOST;
 
 	switch (req) {
-	case H_ENTER:
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
-					      kvmppc_get_gpr(vcpu, 5),
-					      kvmppc_get_gpr(vcpu, 6),
-					      kvmppc_get_gpr(vcpu, 7));
-		srcu_read_unlock(&vcpu->kvm->srcu, idx);
-		break;
 	case H_CEDE:
 		break;
 	case H_PROD:
@@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 			ret = H_PARAMETER;
 			break;
 		}
-		kvm_vcpu_yield_to(tvcpu);
+		yield_count = kvmppc_get_gpr(vcpu, 5);
+		if (kvmppc_get_yield_count(tvcpu) != yield_count)
+			break;
+		kvm_arch_vcpu_yield_to(tvcpu);
 		break;
 	case H_REGISTER_VPA:
 		ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
+	/* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+	case BOOK3S_INTERRUPT_HMI:
 	case BOOK3S_INTERRUPT_PERFMON:
 		r = RESUME_GUEST;
 		break;
@@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * Accordingly return to Guest or Host.
 	 */
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+			vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+				swab32(vcpu->arch.emul_inst) :
+				vcpu->arch.emul_inst;
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
 			r = kvmppc_emulate_debug_inst(run, vcpu);
 		} else {
@@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 
 	INIT_LIST_HEAD(&vcore->runnable_threads);
 	spin_lock_init(&vcore->lock);
+	spin_lock_init(&vcore->stoltb_lock);
 	init_waitqueue_head(&vcore->wq);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
@@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	vc->n_woken = 0;
 	vc->nap_count = 0;
 	vc->entry_exit_count = 0;
+	vc->preempt_tb = TB_NIL;
 	vc->vcore_state = VCORE_STARTING;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
+	vc->conferring_threads = 0;
 
 	/*
 	 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
@@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		kvmppc_start_thread(vcpu);
 		kvmppc_create_dtl_entry(vcpu, vc);
+		trace_kvm_guest_enter(vcpu);
 	}
 
 	/* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	vc->vcore_state = VCORE_RUNNING;
 	preempt_disable();
+
+	trace_kvmppc_run_core(vc, 0);
+
 	spin_unlock(&vc->lock);
 
 	kvm_guest_enter();
@@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 		    kvmppc_core_pending_dec(vcpu))
 			kvmppc_core_dequeue_dec(vcpu);
 
+		trace_kvm_guest_exit(vcpu);
+
 		ret = RESUME_GUEST;
 		if (vcpu->arch.trap)
 			ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 			wake_up(&vcpu->arch.cpu_run);
 		}
 	}
+
+	trace_kvmppc_run_core(vc, 1);
 }
 
 /*
@@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
  */
 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 {
+	struct kvm_vcpu *vcpu;
+	int do_sleep = 1;
+
 	DEFINE_WAIT(wait);
 
 	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+	/*
+	 * Check one last time for pending exceptions and ceded state after
+	 * we put ourselves on the wait queue
+	 */
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+		if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
+			do_sleep = 0;
+			break;
+		}
+	}
+
+	if (!do_sleep) {
+		finish_wait(&vc->wq, &wait);
+		return;
+	}
+
 	vc->vcore_state = VCORE_SLEEPING;
+	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
 	schedule();
 	finish_wait(&vc->wq, &wait);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
+	trace_kvmppc_vcore_blocked(vc, 1);
 }
 
 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	struct kvmppc_vcore *vc;
 	struct kvm_vcpu *v, *vn;
 
+	trace_kvmppc_run_vcpu_enter(vcpu);
+
 	kvm_run->exit_reason = 0;
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
@@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		    VCORE_EXIT_COUNT(vc) == 0) {
 			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_start_thread(vcpu);
+			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
 			wake_up(&vc->wq);
 		}
@@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		wake_up(&v->arch.cpu_run);
 	}
 
+	trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
 	spin_unlock(&vc->lock);
 	return vcpu->arch.ret;
 }
@@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
 	smp_mb();
 
-	/* On the first time here, set up HTAB and VRMA or RMA */
+	/* On the first time here, set up HTAB and VRMA */
 	if (!vcpu->kvm->arch.rma_setup_done) {
 		r = kvmppc_hv_setup_htab_rma(vcpu);
 		if (r)
@@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 		if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
 		    !(vcpu->arch.shregs.msr & MSR_PR)) {
+			trace_kvm_hcall_enter(vcpu);
 			r = kvmppc_pseries_do_hcall(vcpu);
+			trace_kvm_hcall_exit(vcpu, r);
 			kvmppc_core_prepare_to_enter(vcpu);
 		} else if (r == RESUME_PAGE_FAULT) {
 			srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-	switch (rma_size) {
-	case 32ul << 20:	/* 32 MB */
-		if (cpu_has_feature(CPU_FTR_ARCH_206))
-			return 8;	/* only supported on POWER7 */
-		return -1;
-	case 64ul << 20:	/* 64 MB */
-		return 3;
-	case 128ul << 20:	/* 128 MB */
-		return 7;
-	case 256ul << 20:	/* 256 MB */
-		return 4;
-	case 1ul << 30:		/* 1 GB */
-		return 2;
-	case 16ul << 30:	/* 16 GB */
-		return 1;
-	case 256ul << 30:	/* 256 GB */
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *page;
-	struct kvm_rma_info *ri = vma->vm_file->private_data;
-
-	if (vmf->pgoff >= kvm_rma_pages)
-		return VM_FAULT_SIGBUS;
-
-	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
-	get_page(page);
-	vmf->page = page;
-	return 0;
-}
-
-static const struct vm_operations_struct kvm_rma_vm_ops = {
-	.fault = kvm_rma_fault,
-};
-
-static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_ops = &kvm_rma_vm_ops;
-	return 0;
-}
-
-static int kvm_rma_release(struct inode *inode, struct file *filp)
-{
-	struct kvm_rma_info *ri = filp->private_data;
-
-	kvm_release_rma(ri);
-	return 0;
-}
-
-static const struct file_operations kvm_rma_fops = {
-	.mmap           = kvm_rma_mmap,
-	.release	= kvm_rma_release,
-};
-
-static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
-				      struct kvm_allocate_rma *ret)
-{
-	long fd;
-	struct kvm_rma_info *ri;
-	/*
-	 * Only do this on PPC970 in HV mode
-	 */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return -EINVAL;
-
-	if (!kvm_rma_pages)
-		return -EINVAL;
-
-	ri = kvm_alloc_rma();
-	if (!ri)
-		return -ENOMEM;
-
-	fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
-	if (fd < 0)
-		kvm_release_rma(ri);
-
-	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
-	return fd;
-}
-
 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 				     int linux_psize)
 {
@@ -2167,26 +2150,6 @@ out:
 	return r;
 }
 
-static void unpin_slot(struct kvm_memory_slot *memslot)
-{
-	unsigned long *physp;
-	unsigned long j, npages, pfn;
-	struct page *page;
-
-	physp = memslot->arch.slot_phys;
-	npages = memslot->npages;
-	if (!physp)
-		return;
-	for (j = 0; j < npages; j++) {
-		if (!(physp[j] & KVMPPC_GOT_PAGE))
-			continue;
-		pfn = physp[j] >> PAGE_SHIFT;
-		page = pfn_to_page(pfn);
-		SetPageDirty(page);
-		put_page(page);
-	}
-}
-
 static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
 					struct kvm_memory_slot *dont)
 {
@@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
 		vfree(free->arch.rmap);
 		free->arch.rmap = NULL;
 	}
-	if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
-		unpin_slot(free);
-		vfree(free->arch.slot_phys);
-		free->arch.slot_phys = NULL;
-	}
 }
 
 static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
@@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
 	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
 	if (!slot->arch.rmap)
 		return -ENOMEM;
-	slot->arch.slot_phys = NULL;
 
 	return 0;
 }
@@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
 					struct kvm_memory_slot *memslot,
 					struct kvm_userspace_memory_region *mem)
 {
-	unsigned long *phys;
-
-	/* Allocate a slot_phys array if needed */
-	phys = memslot->arch.slot_phys;
-	if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
-		phys = vzalloc(memslot->npages * sizeof(unsigned long));
-		if (!phys)
-			return -ENOMEM;
-		memslot->arch.slot_phys = phys;
-	}
-
 	return 0;
 }
 
@@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
 	unsigned long lpcr = 0, senc;
-	unsigned long lpcr_mask = 0;
 	unsigned long psize, porder;
-	unsigned long rma_size;
-	unsigned long rmls;
-	unsigned long *physp;
-	unsigned long i, npages;
 	int srcu_idx;
 
 	mutex_lock(&kvm->lock);
@@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	psize = vma_kernel_pagesize(vma);
 	porder = __ilog2(psize);
 
-	/* Is this one of our preallocated RMAs? */
-	if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
-	    hva == vma->vm_start)
-		ri = vma->vm_file->private_data;
-
 	up_read(&current->mm->mmap_sem);
 
-	if (!ri) {
-		/* On POWER7, use VRMA; on PPC970, give up */
-		err = -EPERM;
-		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-			pr_err("KVM: CPU requires an RMO\n");
-			goto out_srcu;
-		}
+	/* We can handle 4k, 64k or 16M pages in the VRMA */
+	err = -EINVAL;
+	if (!(psize == 0x1000 || psize == 0x10000 ||
+	      psize == 0x1000000))
+		goto out_srcu;
 
-		/* We can handle 4k, 64k or 16M pages in the VRMA */
-		err = -EINVAL;
-		if (!(psize == 0x1000 || psize == 0x10000 ||
-		      psize == 0x1000000))
-			goto out_srcu;
+	/* Update VRMASD field in the LPCR */
+	senc = slb_pgsize_encoding(psize);
+	kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* the -4 is to account for senc values starting at 0x10 */
+	lpcr = senc << (LPCR_VRMASD_SH - 4);
 
-		/* Update VRMASD field in the LPCR */
-		senc = slb_pgsize_encoding(psize);
-		kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
-			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		lpcr_mask = LPCR_VRMASD;
-		/* the -4 is to account for senc values starting at 0x10 */
-		lpcr = senc << (LPCR_VRMASD_SH - 4);
+	/* Create HPTEs in the hash page table for the VRMA */
+	kvmppc_map_vrma(vcpu, memslot, porder);
 
-		/* Create HPTEs in the hash page table for the VRMA */
-		kvmppc_map_vrma(vcpu, memslot, porder);
-
-	} else {
-		/* Set up to use an RMO region */
-		rma_size = kvm_rma_pages;
-		if (rma_size > memslot->npages)
-			rma_size = memslot->npages;
-		rma_size <<= PAGE_SHIFT;
-		rmls = lpcr_rmls(rma_size);
-		err = -EINVAL;
-		if ((long)rmls < 0) {
-			pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
-			goto out_srcu;
-		}
-		atomic_inc(&ri->use_count);
-		kvm->arch.rma = ri;
-
-		/* Update LPCR and RMOR */
-		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-			/* PPC970; insert RMLS value (split field) in HID4 */
-			lpcr_mask = (1ul << HID4_RMLS0_SH) |
-				(3ul << HID4_RMLS2_SH) | HID4_RMOR;
-			lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
-				((rmls & 3) << HID4_RMLS2_SH);
-			/* RMOR is also in HID4 */
-			lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
-				<< HID4_RMOR_SH;
-		} else {
-			/* POWER7 */
-			lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
-			lpcr = rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
-		}
-		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
-			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
-
-		/* Initialize phys addrs of pages in RMO */
-		npages = kvm_rma_pages;
-		porder = __ilog2(npages);
-		physp = memslot->arch.slot_phys;
-		if (physp) {
-			if (npages > memslot->npages)
-				npages = memslot->npages;
-			spin_lock(&kvm->arch.slot_phys_lock);
-			for (i = 0; i < npages; ++i)
-				physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
-					porder;
-			spin_unlock(&kvm->arch.slot_phys_lock);
-		}
-	}
-
-	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+	kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
 
 	/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
 	smp_wmb();
@@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
 	       sizeof(kvm->arch.enabled_hcalls));
 
-	kvm->arch.rma = NULL;
-
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-		/* PPC970; HID4 is effectively the LPCR */
-		kvm->arch.host_lpid = 0;
-		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
-		lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
-		lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
-			((lpid & 0xf) << HID4_LPID5_SH);
-	} else {
-		/* POWER7; init LPCR for virtual RMA mode */
-		kvm->arch.host_lpid = mfspr(SPRN_LPID);
-		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
-		lpcr &= LPCR_PECE | LPCR_LPES;
-		lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
-			LPCR_VPM0 | LPCR_VPM1;
-		kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
-			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		/* On POWER8 turn on online bit to enable PURR/SPURR */
-		if (cpu_has_feature(CPU_FTR_ARCH_207S))
-			lpcr |= LPCR_ONL;
-	}
+	/* Init LPCR for virtual RMA mode */
+	kvm->arch.host_lpid = mfspr(SPRN_LPID);
+	kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+	lpcr &= LPCR_PECE | LPCR_LPES;
+	lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+		LPCR_VPM0 | LPCR_VPM1;
+	kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* On POWER8 turn on online bit to enable PURR/SPURR */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		lpcr |= LPCR_ONL;
 	kvm->arch.lpcr = lpcr;
 
-	kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
-	spin_lock_init(&kvm->arch.slot_phys_lock);
-
 	/*
 	 * Track that we now have a HV mode VM active. This blocks secondary
 	 * CPU threads from coming online.
@@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 	kvm_hv_vm_deactivated();
 
 	kvmppc_free_vcores(kvm);
-	if (kvm->arch.rma) {
-		kvm_release_rma(kvm->arch.rma);
-		kvm->arch.rma = NULL;
-	}
 
 	kvmppc_free_hpt(kvm);
 }
@@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
 
 static int kvmppc_core_check_processor_compat_hv(void)
 {
-	if (!cpu_has_feature(CPU_FTR_HVMODE))
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_206))
 		return -EIO;
 	return 0;
 }
@@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 
 	switch (ioctl) {
 
-	case KVM_ALLOCATE_RMA: {
-		struct kvm_allocate_rma rma;
-		struct kvm *kvm = filp->private_data;
-
-		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
-		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
-			r = -EFAULT;
-		break;
-	}
-
 	case KVM_PPC_ALLOCATE_HTAB: {
 		u32 htab_order;
 
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3f1bb5a..1f083ff 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,6 +16,7 @@
 #include <linux/memblock.h>
 #include <linux/sizes.h>
 #include <linux/cma.h>
+#include <linux/bitops.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
@@ -32,95 +33,9 @@
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-/*
- * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
- * Each RMA has to be physically contiguous and of a size that the
- * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
- * and other larger sizes.  Since we are unlikely to be allocate that
- * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot using CMA.
- * should be power of 2.
- */
-unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
-EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 static struct cma *kvm_cma;
 
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-	switch (rma_size) {
-	case 32ul << 20:	/* 32 MB */
-		if (cpu_has_feature(CPU_FTR_ARCH_206))
-			return 8;	/* only supported on POWER7 */
-		return -1;
-	case 64ul << 20:	/* 64 MB */
-		return 3;
-	case 128ul << 20:	/* 128 MB */
-		return 7;
-	case 256ul << 20:	/* 256 MB */
-		return 4;
-	case 1ul << 30:		/* 1 GB */
-		return 2;
-	case 16ul << 30:	/* 16 GB */
-		return 1;
-	case 256ul << 30:	/* 256 GB */
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int __init early_parse_rma_size(char *p)
-{
-	unsigned long kvm_rma_size;
-
-	pr_debug("%s(%s)\n", __func__, p);
-	if (!p)
-		return -EINVAL;
-	kvm_rma_size = memparse(p, &p);
-	/*
-	 * Check that the requested size is one supported in hardware
-	 */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return -EINVAL;
-	}
-	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
-	return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-struct kvm_rma_info *kvm_alloc_rma()
-{
-	struct page *page;
-	struct kvm_rma_info *ri;
-
-	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
-	if (!ri)
-		return NULL;
-	page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
-	if (!page)
-		goto err_out;
-	atomic_set(&ri->use_count, 1);
-	ri->base_pfn = page_to_pfn(page);
-	return ri;
-err_out:
-	kfree(ri);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-
-void kvm_release_rma(struct kvm_rma_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
-		kfree(ri);
-	}
-}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
-
 static int __init early_parse_kvm_cma_resv(char *p)
 {
 	pr_debug("%s(%s)\n", __func__, p);
@@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
 struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	unsigned long align_pages = HPT_ALIGN_PAGES;
-
 	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
-	/* Old CPUs require HPT aligned on a multiple of its size */
-	if (!cpu_has_feature(CPU_FTR_ARCH_206))
-		align_pages = nr_pages;
-	return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
+	return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
@@ -180,22 +90,44 @@ void __init kvm_cma_reserve(void)
 	if (selected_size) {
 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
 			 (unsigned long)selected_size / SZ_1M);
-		/*
-		 * Old CPUs require HPT aligned on a multiple of its size. So for them
-		 * make the alignment as max size we could request.
-		 */
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			align_size = __rounddown_pow_of_two(selected_size);
-		else
-			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
-
-		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+		align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
 		cma_declare_contiguous(0, selected_size, 0, align_size,
 			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
 	}
 }
 
 /*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded.  If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+			    unsigned int yield_count)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	int threads_running;
+	int threads_ceded;
+	int threads_conferring;
+	u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+	int rv = H_SUCCESS; /* => don't yield */
+
+	set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
+		threads_running = VCORE_ENTRY_COUNT(vc);
+		threads_ceded = hweight32(vc->napping_threads);
+		threads_conferring = hweight32(vc->conferring_threads);
+		if (threads_ceded + threads_conferring >= threads_running) {
+			rv = H_TOO_HARD; /* => do yield */
+			break;
+		}
+	}
+	clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	return rv;
+}
+
+/*
  * When running HV mode KVM we need to block certain operations while KVM VMs
  * exist in the system. We use a counter of VMs to track this.
  *
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 731be74..36540a9 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry)
 	std	r3, _CCR(r1)
 
 	/* Save host DSCR */
-BEGIN_FTR_SECTION
 	mfspr	r3, SPRN_DSCR
 	std	r3, HSTATE_DSCR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
 	/* Save host DABR */
@@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable interrupts */
 	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
+	/* Clear MMCRA in order to disable SDAR updates */
 	li	r5, 0
 	mtspr	SPRN_MMCRA, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	isync
 	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
 	lbz	r5, LPPACA_PMCINUSE(r3)
@@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r7, SPRN_PMC4
 	mfspr	r8, SPRN_PMC5
 	mfspr	r9, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r3, HSTATE_PMC(r13)
 	stw	r5, HSTATE_PMC + 4(r13)
 	stw	r6, HSTATE_PMC + 8(r13)
 	stw	r7, HSTATE_PMC + 12(r13)
 	stw	r8, HSTATE_PMC + 16(r13)
 	stw	r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-	stw	r10, HSTATE_PMC + 24(r13)
-	stw	r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 31:
 
 	/*
@@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
-#ifdef CONFIG_SMP
-	/*
-	 * On PPC970, if the guest vcpu has an external interrupt pending,
-	 * send ourselves an IPI so as to interrupt the guest once it
-	 * enables interrupts.  (It must have interrupts disabled,
-	 * otherwise we would already have delivered the interrupt.)
-	 *
-	 * XXX If this is a UP build, smp_send_reschedule is not available,
-	 * so the interrupt will be delayed until the next time the vcpu
-	 * enters the guest with interrupts enabled.
-	 */
-BEGIN_FTR_SECTION
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	ld	r0, VCPU_PENDING_EXC(r4)
-	li	r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
-	oris	r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-	and.	r0, r0, r7
-	beq	32f
-	lhz	r3, PACAPACAINDEX(r13)
-	bl	smp_send_reschedule
-	nop
-32:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-#endif /* CONFIG_SMP */
-
 	/* Jump to partition switch code */
 	bl	kvmppc_hv_entry_trampoline
 	nop
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d562c8e..60081bd 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -138,8 +138,5 @@ out:
 
 long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
-		return kvmppc_realmode_mc_power7(vcpu);
-
-	return 0;
+	return kvmppc_realmode_mc_power7(vcpu);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 084ad54..510bdfb 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
 	 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
 	 * we can use tlbiel as long as we mark all other physical
 	 * cores as potentially having stale TLB entries for this lpid.
-	 * If we're not using MMU notifiers, we never take pages away
-	 * from the guest, so we can use tlbiel if requested.
 	 * Otherwise, don't use tlbiel.
 	 */
 	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
 		global = 0;
-	else if (kvm->arch.using_mmu_notifiers)
-		global = 1;
 	else
-		global = !(flags & H_LOCAL);
+		global = 1;
 
 	if (!global) {
 		/* any other core might now have stale TLB entries... */
@@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	struct revmap_entry *rev;
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
-	unsigned long *physp, pte_size;
+	unsigned long pte_size;
 	unsigned long is_io;
 	unsigned long *rmap;
 	pte_t pte;
@@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	is_io = ~0ul;
 	rmap = NULL;
 	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
-		/* PPC970 can't do emulated MMIO */
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			return H_PARAMETER;
 		/* Emulated MMIO - mark this with key=31 */
 		pteh |= HPTE_V_ABSENT;
 		ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	slot_fn = gfn - memslot->base_gfn;
 	rmap = &memslot->arch.rmap[slot_fn];
 
-	if (!kvm->arch.using_mmu_notifiers) {
-		physp = memslot->arch.slot_phys;
-		if (!physp)
-			return H_PARAMETER;
-		physp += slot_fn;
-		if (realmode)
-			physp = real_vmalloc_addr(physp);
-		pa = *physp;
-		if (!pa)
-			return H_TOO_HARD;
-		is_io = pa & (HPTE_R_I | HPTE_R_W);
-		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
-		pa &= PAGE_MASK;
+	/* Translate to host virtual address */
+	hva = __gfn_to_hva_memslot(memslot, gfn);
+
+	/* Look up the Linux PTE for the backing page */
+	pte_size = psize;
+	pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
+	if (pte_present(pte) && !pte_numa(pte)) {
+		if (writing && !pte_write(pte))
+			/* make the actual HPTE be read-only */
+			ptel = hpte_make_readonly(ptel);
+		is_io = hpte_cache_bits(pte_val(pte));
+		pa = pte_pfn(pte) << PAGE_SHIFT;
+		pa |= hva & (pte_size - 1);
 		pa |= gpa & ~PAGE_MASK;
-	} else {
-		/* Translate to host virtual address */
-		hva = __gfn_to_hva_memslot(memslot, gfn);
-
-		/* Look up the Linux PTE for the backing page */
-		pte_size = psize;
-		pte = lookup_linux_pte_and_update(pgdir, hva, writing,
-						  &pte_size);
-		if (pte_present(pte) && !pte_numa(pte)) {
-			if (writing && !pte_write(pte))
-				/* make the actual HPTE be read-only */
-				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
-			pa = pte_pfn(pte) << PAGE_SHIFT;
-			pa |= hva & (pte_size - 1);
-			pa |= gpa & ~PAGE_MASK;
-		}
 	}
 
 	if (pte_size < psize)
@@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			rmap = real_vmalloc_addr(rmap);
 		lock_rmap(rmap);
 		/* Check for pending invalidations under the rmap chain lock */
-		if (kvm->arch.using_mmu_notifiers &&
-		    mmu_notifier_retry(kvm, mmu_seq)) {
+		if (mmu_notifier_retry(kvm, mmu_seq)) {
 			/* inval in progress, write a non-present HPTE */
 			pteh |= HPTE_V_ABSENT;
 			pteh &= ~HPTE_V_VALID;
@@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock)
 	return old == 0;
 }
 
-/*
- * tlbie/tlbiel is a bit different on the PPC970 compared to later
- * processors such as POWER7; the large page bit is in the instruction
- * not RB, and the top 16 bits and the bottom 12 bits of the VA
- * in RB must be 0.
- */
-static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
-			  long npages, int global, bool need_sync)
-{
-	long i;
-
-	if (global) {
-		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-			cpu_relax();
-		if (need_sync)
-			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i) {
-			unsigned long rb = rbvalues[i];
-
-			if (rb & 1)		/* large page */
-				asm volatile("tlbie %0,1" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-			else
-				asm volatile("tlbie %0,0" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-		}
-		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-		kvm->arch.tlbie_lock = 0;
-	} else {
-		if (need_sync)
-			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i) {
-			unsigned long rb = rbvalues[i];
-
-			if (rb & 1)		/* large page */
-				asm volatile("tlbiel %0,1" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-			else
-				asm volatile("tlbiel %0,0" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-		}
-		asm volatile("ptesync" : : : "memory");
-	}
-}
-
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		      long npages, int global, bool need_sync)
 {
 	long i;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-		/* PPC970 tlbie instruction is a bit different */
-		do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
-		return;
-	}
 	if (global) {
 		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 			cpu_relax();
@@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		rev->guest_rpte = r;
 		note_hpte_modification(kvm, rev);
 	}
-	r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
 
 	/* Update HPTE */
 	if (v & HPTE_V_VALID) {
-		rb = compute_tlbie_rb(v, r, pte_index);
-		hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
-		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/*
-		 * If the host has this page as readonly but the guest
-		 * wants to make it read/write, reduce the permissions.
-		 * Checking the host permissions involves finding the
-		 * memslot and then the Linux PTE for the page.
+		 * If the page is valid, don't let it transition from
+		 * readonly to writable.  If it should be writable, we'll
+		 * take a trap and let the page fault code sort it out.
 		 */
-		if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
-			unsigned long psize, gfn, hva;
-			struct kvm_memory_slot *memslot;
-			pgd_t *pgdir = vcpu->arch.pgdir;
-			pte_t pte;
-
-			psize = hpte_page_size(v, r);
-			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
-			memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
-			if (memslot) {
-				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte_and_update(pgdir, hva,
-								  1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
-			}
+		pte = be64_to_cpu(hpte[1]);
+		r = (pte & ~mask) | bits;
+		if (hpte_is_writable(r) && !hpte_is_writable(pte))
+			r = hpte_make_readonly(r);
+		/* If the PTE is changing, invalidate it first */
+		if (r != pte) {
+			rb = compute_tlbie_rb(v, r, pte_index);
+			hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+					      HPTE_V_ABSENT);
+			do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
+				  true);
+			hpte[1] = cpu_to_be64(r);
 		}
 	}
-	hpte[1] = cpu_to_be64(r);
-	eieio();
-	hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 	asm volatile("ptesync" : : : "memory");
 	return H_SUCCESS;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3ee38e6..7b066f6 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 	 * state update in HW (ie bus transactions) so we can handle them
 	 * separately here as well.
 	 */
-	if (resend)
+	if (resend) {
 		icp->rm_action |= XICS_RM_CHECK_RESEND;
+		icp->rm_resend_icp = icp;
+	}
 }
 
 
@@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	 * nothing needs to be done as there can be no XISR to
 	 * reject.
 	 *
+	 * ICP state: Check_IPI
+	 *
 	 * If the CPPR is less favored, then we might be replacing
-	 * an interrupt, and thus need to possibly reject it as in
+	 * an interrupt, and thus need to possibly reject it.
 	 *
-	 * ICP state: Check_IPI
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
 	 */
 	do {
 		old_state = new_state = ACCESS_ONCE(icp->state);
@@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 		resend = false;
 		if (mfrr < new_state.cppr) {
 			/* Reject a pending interrupt if not an IPI */
-			if (mfrr <= new_state.pending_pri)
+			if (mfrr <= new_state.pending_pri) {
 				reject = new_state.xisr;
-			new_state.pending_pri = mfrr;
-			new_state.xisr = XICS_IPI;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
 		}
 
-		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+		if (mfrr > old_state.mfrr) {
 			resend = new_state.need_resend;
 			new_state.need_resend = 0;
 		}
@@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	}
 
 	/* Pass resends to virtual mode */
-	if (resend)
+	if (resend) {
 		this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+		this_icp->rm_resend_icp = icp;
+	}
 
 	return check_too_hard(xics, this_icp);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 65c105b..10554df 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	lwz	r6, HSTATE_PMC + 12(r13)
 	lwz	r8, HSTATE_PMC + 16(r13)
 	lwz	r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-	lwz	r10, HSTATE_PMC + 24(r13)
-	lwz	r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_PMC1, r3
 	mtspr	SPRN_PMC2, r4
 	mtspr	SPRN_PMC3, r5
 	mtspr	SPRN_PMC4, r6
 	mtspr	SPRN_PMC5, r8
 	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, HSTATE_MMCR(r13)
 	ld	r4, HSTATE_MMCR + 8(r13)
 	ld	r5, HSTATE_MMCR + 16(r13)
@@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
 	beq	11f
 	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
 	beq	cr2, 14f			/* HMI check */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* RFI into the highmem handler, or branch to interrupt handler */
 	mfmsr	r6
@@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	beqa	0x500			/* external interrupt (PPC970) */
 	beq	cr1, 13f		/* machine check */
 	RFI
 
@@ -393,11 +382,8 @@ kvmppc_hv_entry:
 	slbia
 	ptesync
 
-BEGIN_FTR_SECTION
-	b	30f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
-	 * POWER7 host -> guest partition switch code.
+	 * POWER7/POWER8 host -> guest partition switch code.
 	 * We don't have to lock against concurrent tlbies,
 	 * but we do have to coordinate across hardware threads.
 	 */
@@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	cmpwi	r3,512		/* 1 microsecond */
 	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	blt	hdec_soon
-	b	31f
-
-	/*
-	 * PPC970 host -> guest partition switch code.
-	 * We have to lock against concurrent tlbies,
-	 * using native_tlbie_lock to lock against host tlbies
-	 * and kvm->arch.tlbie_lock to lock against guest tlbies.
-	 * We also have to invalidate the TLB since its
-	 * entries aren't tagged with the LPID.
-	 */
-30:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* first take native_tlbie_lock */
-	.section ".toc","aw"
-toc_tlbie_lock:
-	.tc	native_tlbie_lock[TC],native_tlbie_lock
-	.previous
-	ld	r3,toc_tlbie_lock@toc(r2)
-#ifdef __BIG_ENDIAN__
-	lwz	r8,PACA_LOCK_TOKEN(r13)
-#else
-	lwz	r8,PACAPACAINDEX(r13)
-#endif
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r7,VCORE_LPCR(r5)	/* use vcore->lpcr to store HID4 */
-	li	r0,0x18f
-	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
-	or	r0,r7,r0
-	ptesync
-	sync
-	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop native_tlbie_lock */
-
-	/* invalidate the whole TLB */
-	li	r0,256
-	mtctr	r0
-	li	r6,0
-25:	tlbiel	r6
-	addi	r6,r6,0x1000
-	bdnz	25b
-	ptesync
 
-	/* Take the guest's tlbie_lock */
-	addi	r3,r9,KVM_TLBIE_LOCK
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-	ld	r6,KVM_SDR1(r9)
-	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
-
-	/* Set up HID4 with the guest's LPID etc. */
-	sync
-	mtspr	SPRN_HID4,r7
-	isync
-
-	/* drop the guest's tlbie_lock */
-	li	r0,0
-	stw	r0,0(r3)
-
-	/* Check if HDEC expires soon */
-	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,10
-	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	blt	hdec_soon
-
-	/* Enable HDEC interrupts */
-	mfspr	r0,SPRN_HID0
-	li	r3,1
-	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-	sync
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-31:
 	/* Do we have a guest vcpu to run? */
 	cmpdi	r4, 0
 	beq	kvmppc_primary_no_guest
@@ -625,7 +521,6 @@ kvmppc_got_guest:
 	stb	r6, VCPU_VPA_DIRTY(r4)
 25:
 
-BEGIN_FTR_SECTION
 	/* Save purr/spurr */
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
@@ -635,7 +530,6 @@ BEGIN_FTR_SECTION
 	ld	r8,VCPU_SPURR(r4)
 	mtspr	SPRN_PURR,r7
 	mtspr	SPRN_SPURR,r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
 	/* Set partition DABR */
@@ -644,9 +538,7 @@ BEGIN_FTR_SECTION
 	ld	r6,VCPU_DABR(r4)
 	mtspr	SPRN_DABRX,r5
 	mtspr	SPRN_DABR,r6
- BEGIN_FTR_SECTION_NESTED(89)
 	isync
- END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
 	lwz	r7, VCPU_PMC + 12(r4)
 	lwz	r8, VCPU_PMC + 16(r4)
 	lwz	r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
-	lwz	r10, VCPU_PMC + 24(r4)
-	lwz	r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_PMC1, r3
 	mtspr	SPRN_PMC2, r5
 	mtspr	SPRN_PMC3, r6
 	mtspr	SPRN_PMC4, r7
 	mtspr	SPRN_PMC5, r8
 	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, VCPU_MMCR(r4)
 	ld	r5, VCPU_MMCR + 8(r4)
 	ld	r6, VCPU_MMCR + 16(r4)
@@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	ld	r30, VCPU_GPR(R30)(r4)
 	ld	r31, VCPU_GPR(R31)(r4)
 
-BEGIN_FTR_SECTION
 	/* Switch DSCR to guest value */
 	ld	r5, VCPU_DSCR(r4)
 	mtspr	SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
-	/* Skip next section on POWER7 or PPC970 */
+	/* Skip next section on POWER7 */
 	b	8f
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
@@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_DAR, r5
 	mtspr	SPRN_DSISR, r6
 
-BEGIN_FTR_SECTION
 	/* Restore AMR and UAMOR, set AMOR to all 1s */
 	ld	r5,VCPU_AMR(r4)
 	ld	r6,VCPU_UAMOR(r4)
@@ -928,7 +809,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_AMR,r5
 	mtspr	SPRN_UAMOR,r6
 	mtspr	SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Restore state of CTRL run bit; assume 1 on entry */
 	lwz	r5,VCPU_CTRL(r4)
@@ -963,13 +843,11 @@ deliver_guest_interrupt:
 	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
 	cmpdi	cr1, r0, 0
 	andi.	r8, r11, MSR_EE
-BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_LPCR
 	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
 	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
 	mtspr	SPRN_LPCR, r8
 	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	beq	5f
 	li	r0, BOOK3S_INTERRUPT_EXTERNAL
 	bne	cr1, 12f
@@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	stw	r12,VCPU_TRAP(r9)
 
-	/* Save HEIR (HV emulation assist reg) in last_inst
+	/* Save HEIR (HV emulation assist reg) in emul_inst
 	   if this is an HEI (HV emulation interrupt, e40) */
 	li	r3,KVM_INST_FETCH_FAILED
-BEGIN_FTR_SECTION
 	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
 	bne	11f
 	mfspr	r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11:	stw	r3,VCPU_LAST_INST(r9)
+11:	stw	r3,VCPU_HEIR(r9)
 
 	/* these are volatile across C function calls */
 	mfctr	r3
@@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r3, VCPU_CTR(r9)
 	stw	r4, VCPU_XER(r9)
 
-BEGIN_FTR_SECTION
 	/* If this is a page table miss then see if it's theirs or ours */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	kvmppc_hdsi
 	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	beq	kvmppc_hisi
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
@@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
 
-	/* Only handle external interrupts here on arch 206 and later */
-BEGIN_FTR_SECTION
-	b	ext_interrupt_to_host
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
-
 	/* External interrupt ? */
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
 	bne+	ext_interrupt_to_host
@@ -1193,11 +1062,9 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	mfdsisr	r7
 	std	r6, VCPU_DAR(r9)
 	stw	r7, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
 	/* don't overwrite fault_dar/fault_dsisr if HDSI */
 	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r6, VCPU_FAULT_DAR(r9)
 	stw	r7, VCPU_FAULT_DSISR(r9)
 
@@ -1236,7 +1103,6 @@ mc_cont:
 	/*
 	 * Save the guest PURR/SPURR
 	 */
-BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
 	ld	r7,VCPU_PURR(r9)
@@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION
 	add	r4,r4,r6
 	mtspr	SPRN_PURR,r3
 	mtspr	SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
 
 	/* Save DEC */
 	mfspr	r5,SPRN_DEC
@@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 8:
 
 	/* Save and reset AMR and UAMOR before turning on the MMU */
-BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_AMR
 	mfspr	r6,SPRN_UAMOR
 	std	r5,VCPU_AMR(r9)
 	std	r6,VCPU_UAMOR(r9)
 	li	r6,0
 	mtspr	SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_DSCR
 	ld	r7, HSTATE_DSCR(r13)
 	std	r8, VCPU_DSCR(r9)
 	mtspr	SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Save non-volatile GPRs */
 	std	r14, VCPU_GPR(R14)(r9)
@@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
 	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
+	/* Clear MMCRA in order to disable SDAR updates */
 	li	r7, 0
 	mtspr	SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	isync
 	beq	21f			/* if no VPA, save PMU stuff anyway */
 	lbz	r7, LPPACA_PMCINUSE(r8)
@@ -1532,10 +1391,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mfspr	r6, SPRN_PMC4
 	mfspr	r7, SPRN_PMC5
 	mfspr	r8, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r3, VCPU_PMC(r9)
 	stw	r4, VCPU_PMC + 4(r9)
 	stw	r5, VCPU_PMC + 8(r9)
@@ -1543,10 +1398,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r7, VCPU_PMC + 16(r9)
 	stw	r8, VCPU_PMC + 20(r9)
 BEGIN_FTR_SECTION
-	stw	r10, VCPU_PMC + 24(r9)
-	stw	r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_SIER
 	mfspr	r6, SPRN_SPMC1
 	mfspr	r7, SPRN_SPMC2
@@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	ptesync
 
 hdec_soon:			/* r12 = trap, r13 = paca */
-BEGIN_FTR_SECTION
-	b	32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
-	 * POWER7 guest -> host partition switch code.
+	 * POWER7/POWER8 guest -> host partition switch code.
 	 * We don't have to lock against tlbies but we do
 	 * have to coordinate the hardware threads.
 	 */
@@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 16:	ld	r8,KVM_HOST_LPCR(r4)
 	mtspr	SPRN_LPCR,r8
 	isync
-	b	33f
-
-	/*
-	 * PPC970 guest -> host partition switch code.
-	 * We have to lock against concurrent tlbies, and
-	 * we have to flush the whole TLB.
-	 */
-32:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* Take the guest's tlbie_lock */
-#ifdef __BIG_ENDIAN__
-	lwz	r8,PACA_LOCK_TOKEN(r13)
-#else
-	lwz	r8,PACAPACAINDEX(r13)
-#endif
-	addi	r3,r4,KVM_TLBIE_LOCK
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r7,KVM_HOST_LPCR(r4)	/* use kvm->arch.host_lpcr for HID4 */
-	li	r0,0x18f
-	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
-	or	r0,r7,r0
-	ptesync
-	sync
-	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop guest tlbie_lock */
-
-	/* invalidate the whole TLB */
-	li	r0,256
-	mtctr	r0
-	li	r6,0
-25:	tlbiel	r6
-	addi	r6,r6,0x1000
-	bdnz	25b
-	ptesync
-
-	/* take native_tlbie_lock */
-	ld	r3,toc_tlbie_lock@toc(2)
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r6,KVM_HOST_SDR1(r4)
-	mtspr	SPRN_SDR1,r6		/* switch to host page table */
-
-	/* Set up host HID4 value */
-	sync
-	mtspr	SPRN_HID4,r7
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop native_tlbie_lock */
-
-	lis	r8,0x7fff		/* MAX_INT@h */
-	mtspr	SPRN_HDEC,r8
-
-	/* Disable HDEC interrupts */
-	mfspr	r0,SPRN_HID0
-	li	r3,0
-	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-	sync
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
 
 	/* load host SLB entries */
-33:	ld	r8,PACA_SLBSHADOWPTR(r13)
+	ld	r8,PACA_SLBSHADOWPTR(r13)
 
 	.rept	SLB_NUM_BOLTED
 	li	r3, SLBSHADOW_SAVEAREA
@@ -2047,7 +1817,7 @@ hcall_real_table:
 	.long	0		/* 0xd8 */
 	.long	0		/* 0xdc */
 	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
-	.long	0		/* 0xe4 */
+	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
 	.long	0		/* 0xe8 */
 	.long	0		/* 0xec */
 	.long	0		/* 0xf0 */
@@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede)
 	stw	r0,VCPU_TRAP(r3)
 	li	r0,H_SUCCESS
 	std	r0,VCPU_GPR(R3)(r3)
-BEGIN_FTR_SECTION
-	b	kvm_cede_exit	/* just send it up to host on 970 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 
 	/*
 	 * Set our bit in the bitmask of napping threads unless all the
@@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	mtmsrd	r8
-	isync
 	addi	r3,r3,VCPU_FPRS
 	bl	store_fp_state
 #ifdef CONFIG_ALTIVEC
@@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	mtmsrd	r8
-	isync
 	addi	r3,r4,VCPU_FPRS
 	bl	load_fp_state
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bfb8035..bd6ab16 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
 	return kvmppc_get_field(inst, msb + 32, lsb + 32);
 }
 
-/*
- * Replaces inst bits with ordering according to spec.
- */
-static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
-{
-	return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
-}
-
 bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
 {
 	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cf2eb16..f573839 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return r;
 }
 
-static inline int get_fpr_index(int i)
-{
-	return i * TS_FPRWIDTH;
-}
-
 /* Give up external provider (FPU, Altivec, VSX) */
 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index eaeb780..807351f 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 	 * there might be a previously-rejected interrupt needing
 	 * to be resent.
 	 *
+	 * ICP state: Check_IPI
+	 *
 	 * If the CPPR is less favored, then we might be replacing
-	 * an interrupt, and thus need to possibly reject it as in
+	 * an interrupt, and thus need to possibly reject it.
 	 *
-	 * ICP state: Check_IPI
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
 	 */
 	do {
 		old_state = new_state = ACCESS_ONCE(icp->state);
@@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 		resend = false;
 		if (mfrr < new_state.cppr) {
 			/* Reject a pending interrupt if not an IPI */
-			if (mfrr <= new_state.pending_pri)
+			if (mfrr <= new_state.pending_pri) {
 				reject = new_state.xisr;
-			new_state.pending_pri = mfrr;
-			new_state.xisr = XICS_IPI;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
 		}
 
-		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+		if (mfrr > old_state.mfrr) {
 			resend = new_state.need_resend;
 			new_state.need_resend = 0;
 		}
@@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
 	if (icp->rm_action & XICS_RM_KICK_VCPU)
 		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
 	if (icp->rm_action & XICS_RM_CHECK_RESEND)
-		icp_check_resend(xics, icp);
+		icp_check_resend(xics, icp->rm_resend_icp);
 	if (icp->rm_action & XICS_RM_REJECT)
 		icp_deliver_irq(xics, icp, icp->rm_reject);
 	if (icp->rm_action & XICS_RM_NOTIFY_EOI)
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index e8aaa7a..73f0f27 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -74,6 +74,7 @@ struct kvmppc_icp {
 #define XICS_RM_NOTIFY_EOI	0x8
 	u32 rm_action;
 	struct kvm_vcpu *rm_kick_target;
+	struct kvmppc_icp *rm_resend_icp;
 	u32  rm_reject;
 	u32  rm_eoied_irq;
 
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 1609584..b29ce75 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -78,7 +78,7 @@ static inline int local_sid_setup_one(struct id *entry)
 
 	sid = __this_cpu_inc_return(pcpu_last_used_sid);
 	if (sid < NUM_TIDS) {
-		__this_cpu_write(pcpu_sids)entry[sid], entry);
+		__this_cpu_write(pcpu_sids.entry[sid], entry);
 		entry->val = sid;
 		entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]);
 		ret = sid;
@@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
 	kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
 }
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvmppc_booke_vcpu_load(vcpu, cpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c1f8f53..c45eaab 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 			r = 0;
 		break;
 	case KVM_CAP_PPC_RMA:
-		r = hv_enabled;
-		/* PPC970 requires an RMA */
-		if (r && cpu_has_feature(CPU_FTR_ARCH_201))
-			r = 2;
+		r = 0;
 		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-		if (hv_enabled)
-			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
-		else
-			r = 0;
+		r = hv_enabled;
 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 		r = 1;
 #else
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644
index 0000000..f647ce0
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index f7537cf..7ec534d 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
 		__entry->pfn, __entry->flags)
 );
 
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+	{BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+	{BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+	kvm_trace_symbol_irqprio_spe \
+	kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+	{BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+	{BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+	{BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+	{BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+	{BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+	{BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+	{BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+	{BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+	{BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+	{BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+	{BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+	{BOOKE_IRQPRIO_FIT, "FIT"}, \
+	{BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+	{BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+	{BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+	{BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+	{BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
 TRACE_EVENT(kvm_booke_queue_irqprio,
 	TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
 	TP_ARGS(vcpu, priority),
@@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
 		__entry->pending	= vcpu->arch.pending_exceptions;
 	),
 
-	TP_printk("vcpu=%x prio=%x pending=%lx",
-		__entry->cpu_nr, __entry->priority, __entry->pending)
+	TP_printk("vcpu=%x prio=%s pending=%lx",
+		__entry->cpu_nr,
+		__print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+		__entry->pending)
 );
 
 #endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644
index 0000000..33d9daf
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -0,0 +1,477 @@
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#define kvm_trace_symbol_hcall \
+	{H_REMOVE,			"H_REMOVE"}, \
+	{H_ENTER,			"H_ENTER"}, \
+	{H_READ,			"H_READ"}, \
+	{H_CLEAR_MOD,			"H_CLEAR_MOD"}, \
+	{H_CLEAR_REF,			"H_CLEAR_REF"}, \
+	{H_PROTECT,			"H_PROTECT"}, \
+	{H_GET_TCE,			"H_GET_TCE"}, \
+	{H_PUT_TCE,			"H_PUT_TCE"}, \
+	{H_SET_SPRG0,			"H_SET_SPRG0"}, \
+	{H_SET_DABR,			"H_SET_DABR"}, \
+	{H_PAGE_INIT,			"H_PAGE_INIT"}, \
+	{H_SET_ASR,			"H_SET_ASR"}, \
+	{H_ASR_ON,			"H_ASR_ON"}, \
+	{H_ASR_OFF,			"H_ASR_OFF"}, \
+	{H_LOGICAL_CI_LOAD,		"H_LOGICAL_CI_LOAD"}, \
+	{H_LOGICAL_CI_STORE,		"H_LOGICAL_CI_STORE"}, \
+	{H_LOGICAL_CACHE_LOAD,		"H_LOGICAL_CACHE_LOAD"}, \
+	{H_LOGICAL_CACHE_STORE,		"H_LOGICAL_CACHE_STORE"}, \
+	{H_LOGICAL_ICBI,		"H_LOGICAL_ICBI"}, \
+	{H_LOGICAL_DCBF,		"H_LOGICAL_DCBF"}, \
+	{H_GET_TERM_CHAR,		"H_GET_TERM_CHAR"}, \
+	{H_PUT_TERM_CHAR,		"H_PUT_TERM_CHAR"}, \
+	{H_REAL_TO_LOGICAL,		"H_REAL_TO_LOGICAL"}, \
+	{H_HYPERVISOR_DATA,		"H_HYPERVISOR_DATA"}, \
+	{H_EOI,				"H_EOI"}, \
+	{H_CPPR,			"H_CPPR"}, \
+	{H_IPI,				"H_IPI"}, \
+	{H_IPOLL,			"H_IPOLL"}, \
+	{H_XIRR,			"H_XIRR"}, \
+	{H_PERFMON,			"H_PERFMON"}, \
+	{H_MIGRATE_DMA,			"H_MIGRATE_DMA"}, \
+	{H_REGISTER_VPA,		"H_REGISTER_VPA"}, \
+	{H_CEDE,			"H_CEDE"}, \
+	{H_CONFER,			"H_CONFER"}, \
+	{H_PROD,			"H_PROD"}, \
+	{H_GET_PPP,			"H_GET_PPP"}, \
+	{H_SET_PPP,			"H_SET_PPP"}, \
+	{H_PURR,			"H_PURR"}, \
+	{H_PIC,				"H_PIC"}, \
+	{H_REG_CRQ,			"H_REG_CRQ"}, \
+	{H_FREE_CRQ,			"H_FREE_CRQ"}, \
+	{H_VIO_SIGNAL,			"H_VIO_SIGNAL"}, \
+	{H_SEND_CRQ,			"H_SEND_CRQ"}, \
+	{H_COPY_RDMA,			"H_COPY_RDMA"}, \
+	{H_REGISTER_LOGICAL_LAN,	"H_REGISTER_LOGICAL_LAN"}, \
+	{H_FREE_LOGICAL_LAN,		"H_FREE_LOGICAL_LAN"}, \
+	{H_ADD_LOGICAL_LAN_BUFFER,	"H_ADD_LOGICAL_LAN_BUFFER"}, \
+	{H_SEND_LOGICAL_LAN,		"H_SEND_LOGICAL_LAN"}, \
+	{H_BULK_REMOVE,			"H_BULK_REMOVE"}, \
+	{H_MULTICAST_CTRL,		"H_MULTICAST_CTRL"}, \
+	{H_SET_XDABR,			"H_SET_XDABR"}, \
+	{H_STUFF_TCE,			"H_STUFF_TCE"}, \
+	{H_PUT_TCE_INDIRECT,		"H_PUT_TCE_INDIRECT"}, \
+	{H_CHANGE_LOGICAL_LAN_MAC,	"H_CHANGE_LOGICAL_LAN_MAC"}, \
+	{H_VTERM_PARTNER_INFO,		"H_VTERM_PARTNER_INFO"}, \
+	{H_REGISTER_VTERM,		"H_REGISTER_VTERM"}, \
+	{H_FREE_VTERM,			"H_FREE_VTERM"}, \
+	{H_RESET_EVENTS,		"H_RESET_EVENTS"}, \
+	{H_ALLOC_RESOURCE,		"H_ALLOC_RESOURCE"}, \
+	{H_FREE_RESOURCE,		"H_FREE_RESOURCE"}, \
+	{H_MODIFY_QP,			"H_MODIFY_QP"}, \
+	{H_QUERY_QP,			"H_QUERY_QP"}, \
+	{H_REREGISTER_PMR,		"H_REREGISTER_PMR"}, \
+	{H_REGISTER_SMR,		"H_REGISTER_SMR"}, \
+	{H_QUERY_MR,			"H_QUERY_MR"}, \
+	{H_QUERY_MW,			"H_QUERY_MW"}, \
+	{H_QUERY_HCA,			"H_QUERY_HCA"}, \
+	{H_QUERY_PORT,			"H_QUERY_PORT"}, \
+	{H_MODIFY_PORT,			"H_MODIFY_PORT"}, \
+	{H_DEFINE_AQP1,			"H_DEFINE_AQP1"}, \
+	{H_GET_TRACE_BUFFER,		"H_GET_TRACE_BUFFER"}, \
+	{H_DEFINE_AQP0,			"H_DEFINE_AQP0"}, \
+	{H_RESIZE_MR,			"H_RESIZE_MR"}, \
+	{H_ATTACH_MCQP,			"H_ATTACH_MCQP"}, \
+	{H_DETACH_MCQP,			"H_DETACH_MCQP"}, \
+	{H_CREATE_RPT,			"H_CREATE_RPT"}, \
+	{H_REMOVE_RPT,			"H_REMOVE_RPT"}, \
+	{H_REGISTER_RPAGES,		"H_REGISTER_RPAGES"}, \
+	{H_DISABLE_AND_GETC,		"H_DISABLE_AND_GETC"}, \
+	{H_ERROR_DATA,			"H_ERROR_DATA"}, \
+	{H_GET_HCA_INFO,		"H_GET_HCA_INFO"}, \
+	{H_GET_PERF_COUNT,		"H_GET_PERF_COUNT"}, \
+	{H_MANAGE_TRACE,		"H_MANAGE_TRACE"}, \
+	{H_FREE_LOGICAL_LAN_BUFFER,	"H_FREE_LOGICAL_LAN_BUFFER"}, \
+	{H_QUERY_INT_STATE,		"H_QUERY_INT_STATE"}, \
+	{H_POLL_PENDING,		"H_POLL_PENDING"}, \
+	{H_ILLAN_ATTRIBUTES,		"H_ILLAN_ATTRIBUTES"}, \
+	{H_MODIFY_HEA_QP,		"H_MODIFY_HEA_QP"}, \
+	{H_QUERY_HEA_QP,		"H_QUERY_HEA_QP"}, \
+	{H_QUERY_HEA,			"H_QUERY_HEA"}, \
+	{H_QUERY_HEA_PORT,		"H_QUERY_HEA_PORT"}, \
+	{H_MODIFY_HEA_PORT,		"H_MODIFY_HEA_PORT"}, \
+	{H_REG_BCMC,			"H_REG_BCMC"}, \
+	{H_DEREG_BCMC,			"H_DEREG_BCMC"}, \
+	{H_REGISTER_HEA_RPAGES,		"H_REGISTER_HEA_RPAGES"}, \
+	{H_DISABLE_AND_GET_HEA,		"H_DISABLE_AND_GET_HEA"}, \
+	{H_GET_HEA_INFO,		"H_GET_HEA_INFO"}, \
+	{H_ALLOC_HEA_RESOURCE,		"H_ALLOC_HEA_RESOURCE"}, \
+	{H_ADD_CONN,			"H_ADD_CONN"}, \
+	{H_DEL_CONN,			"H_DEL_CONN"}, \
+	{H_JOIN,			"H_JOIN"}, \
+	{H_VASI_STATE,			"H_VASI_STATE"}, \
+	{H_ENABLE_CRQ,			"H_ENABLE_CRQ"}, \
+	{H_GET_EM_PARMS,		"H_GET_EM_PARMS"}, \
+	{H_SET_MPP,			"H_SET_MPP"}, \
+	{H_GET_MPP,			"H_GET_MPP"}, \
+	{H_HOME_NODE_ASSOCIATIVITY,	"H_HOME_NODE_ASSOCIATIVITY"}, \
+	{H_BEST_ENERGY,			"H_BEST_ENERGY"}, \
+	{H_XIRR_X,			"H_XIRR_X"}, \
+	{H_RANDOM,			"H_RANDOM"}, \
+	{H_COP,				"H_COP"}, \
+	{H_GET_MPP_X,			"H_GET_MPP_X"}, \
+	{H_SET_MODE,			"H_SET_MODE"}, \
+	{H_RTAS,			"H_RTAS"}
+
+#define kvm_trace_symbol_kvmret \
+	{RESUME_GUEST,			"RESUME_GUEST"}, \
+	{RESUME_GUEST_NV,		"RESUME_GUEST_NV"}, \
+	{RESUME_HOST,			"RESUME_HOST"}, \
+	{RESUME_HOST_NV,		"RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+	{H_SUCCESS,			"H_SUCCESS"}, \
+	{H_BUSY,			"H_BUSY"}, \
+	{H_CLOSED,			"H_CLOSED"}, \
+	{H_NOT_AVAILABLE,		"H_NOT_AVAILABLE"}, \
+	{H_CONSTRAINED,			"H_CONSTRAINED"}, \
+	{H_PARTIAL,			"H_PARTIAL"}, \
+	{H_IN_PROGRESS,			"H_IN_PROGRESS"}, \
+	{H_PAGE_REGISTERED,		"H_PAGE_REGISTERED"}, \
+	{H_PARTIAL_STORE,		"H_PARTIAL_STORE"}, \
+	{H_PENDING,			"H_PENDING"}, \
+	{H_CONTINUE,			"H_CONTINUE"}, \
+	{H_LONG_BUSY_START_RANGE,	"H_LONG_BUSY_START_RANGE"}, \
+	{H_LONG_BUSY_ORDER_1_MSEC,	"H_LONG_BUSY_ORDER_1_MSEC"}, \
+	{H_LONG_BUSY_ORDER_10_MSEC,	"H_LONG_BUSY_ORDER_10_MSEC"}, \
+	{H_LONG_BUSY_ORDER_100_MSEC,	"H_LONG_BUSY_ORDER_100_MSEC"}, \
+	{H_LONG_BUSY_ORDER_1_SEC,	"H_LONG_BUSY_ORDER_1_SEC"}, \
+	{H_LONG_BUSY_ORDER_10_SEC,	"H_LONG_BUSY_ORDER_10_SEC"}, \
+	{H_LONG_BUSY_ORDER_100_SEC,	"H_LONG_BUSY_ORDER_100_SEC"}, \
+	{H_LONG_BUSY_END_RANGE,		"H_LONG_BUSY_END_RANGE"}, \
+	{H_TOO_HARD,			"H_TOO_HARD"}, \
+	{H_HARDWARE,			"H_HARDWARE"}, \
+	{H_FUNCTION,			"H_FUNCTION"}, \
+	{H_PRIVILEGE,			"H_PRIVILEGE"}, \
+	{H_PARAMETER,			"H_PARAMETER"}, \
+	{H_BAD_MODE,			"H_BAD_MODE"}, \
+	{H_PTEG_FULL,			"H_PTEG_FULL"}, \
+	{H_NOT_FOUND,			"H_NOT_FOUND"}, \
+	{H_RESERVED_DABR,		"H_RESERVED_DABR"}, \
+	{H_NO_MEM,			"H_NO_MEM"}, \
+	{H_AUTHORITY,			"H_AUTHORITY"}, \
+	{H_PERMISSION,			"H_PERMISSION"}, \
+	{H_DROPPED,			"H_DROPPED"}, \
+	{H_SOURCE_PARM,			"H_SOURCE_PARM"}, \
+	{H_DEST_PARM,			"H_DEST_PARM"}, \
+	{H_REMOTE_PARM,			"H_REMOTE_PARM"}, \
+	{H_RESOURCE,			"H_RESOURCE"}, \
+	{H_ADAPTER_PARM,		"H_ADAPTER_PARM"}, \
+	{H_RH_PARM,			"H_RH_PARM"}, \
+	{H_RCQ_PARM,			"H_RCQ_PARM"}, \
+	{H_SCQ_PARM,			"H_SCQ_PARM"}, \
+	{H_EQ_PARM,			"H_EQ_PARM"}, \
+	{H_RT_PARM,			"H_RT_PARM"}, \
+	{H_ST_PARM,			"H_ST_PARM"}, \
+	{H_SIGT_PARM,			"H_SIGT_PARM"}, \
+	{H_TOKEN_PARM,			"H_TOKEN_PARM"}, \
+	{H_MLENGTH_PARM,		"H_MLENGTH_PARM"}, \
+	{H_MEM_PARM,			"H_MEM_PARM"}, \
+	{H_MEM_ACCESS_PARM,		"H_MEM_ACCESS_PARM"}, \
+	{H_ATTR_PARM,			"H_ATTR_PARM"}, \
+	{H_PORT_PARM,			"H_PORT_PARM"}, \
+	{H_MCG_PARM,			"H_MCG_PARM"}, \
+	{H_VL_PARM,			"H_VL_PARM"}, \
+	{H_TSIZE_PARM,			"H_TSIZE_PARM"}, \
+	{H_TRACE_PARM,			"H_TRACE_PARM"}, \
+	{H_MASK_PARM,			"H_MASK_PARM"}, \
+	{H_MCG_FULL,			"H_MCG_FULL"}, \
+	{H_ALIAS_EXIST,			"H_ALIAS_EXIST"}, \
+	{H_P_COUNTER,			"H_P_COUNTER"}, \
+	{H_TABLE_FULL,			"H_TABLE_FULL"}, \
+	{H_ALT_TABLE,			"H_ALT_TABLE"}, \
+	{H_MR_CONDITION,		"H_MR_CONDITION"}, \
+	{H_NOT_ENOUGH_RESOURCES,	"H_NOT_ENOUGH_RESOURCES"}, \
+	{H_R_STATE,			"H_R_STATE"}, \
+	{H_RESCINDED,			"H_RESCINDED"}, \
+	{H_P2,				"H_P2"}, \
+	{H_P3,				"H_P3"}, \
+	{H_P4,				"H_P4"}, \
+	{H_P5,				"H_P5"}, \
+	{H_P6,				"H_P6"}, \
+	{H_P7,				"H_P7"}, \
+	{H_P8,				"H_P8"}, \
+	{H_P9,				"H_P9"}, \
+	{H_TOO_BIG,			"H_TOO_BIG"}, \
+	{H_OVERLAP,			"H_OVERLAP"}, \
+	{H_INTERRUPT,			"H_INTERRUPT"}, \
+	{H_BAD_DATA,			"H_BAD_DATA"}, \
+	{H_NOT_ACTIVE,			"H_NOT_ACTIVE"}, \
+	{H_SG_LIST,			"H_SG_LIST"}, \
+	{H_OP_MODE,			"H_OP_MODE"}, \
+	{H_COP_HW,			"H_COP_HW"}, \
+	{H_UNSUPPORTED_FLAG_START,	"H_UNSUPPORTED_FLAG_START"}, \
+	{H_UNSUPPORTED_FLAG_END,	"H_UNSUPPORTED_FLAG_END"}, \
+	{H_MULTI_THREADS_ACTIVE,	"H_MULTI_THREADS_ACTIVE"}, \
+	{H_OUTSTANDING_COP_OPS,		"H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	pc)
+		__field(unsigned long,  pending_exceptions)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu->vcpu_id;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->ceded		= vcpu->arch.ceded;
+		__entry->pending_exceptions  = vcpu->arch.pending_exceptions;
+	),
+
+	TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+			__entry->vcpu_id,
+			__entry->pc,
+			__entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		trap)
+		__field(unsigned long,	pc)
+		__field(unsigned long,	msr)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu->vcpu_id;
+		__entry->trap	 = vcpu->arch.trap;
+		__entry->ceded	 = vcpu->arch.ceded;
+		__entry->pc	 = kvmppc_get_pc(vcpu);
+		__entry->msr	 = vcpu->arch.shregs.msr;
+	),
+
+	TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+		__entry->vcpu_id,
+		__print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+		__entry->pc, __entry->msr, __entry->ceded
+	)
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+		 struct kvm_memory_slot *memslot, unsigned long ea,
+		 unsigned long dsisr),
+
+	TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(unsigned long,	gpte_r)
+		__field(unsigned long,	ea)
+		__field(u64,		base_gfn)
+		__field(u32,		slot_flags)
+		__field(u32,		dsisr)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	  = hptep[0];
+		__entry->hpte_r	  = hptep[1];
+		__entry->gpte_r	  = hptep[2];
+		__entry->ea	  = ea;
+		__entry->dsisr	  = dsisr;
+		__entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+		__entry->slot_flags = memslot ? memslot->flags : 0;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+		   __entry->ea, __entry->dsisr,
+		   __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+	TP_ARGS(vcpu, hptep, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(long,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	= hptep[0];
+		__entry->hpte_r	= hptep[1];
+		__entry->ret = ret;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	req)
+		__field(unsigned long,	gpr4)
+		__field(unsigned long,	gpr5)
+		__field(unsigned long,	gpr6)
+		__field(unsigned long,	gpr7)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->req   = kvmppc_get_gpr(vcpu, 3);
+		__entry->gpr4  = kvmppc_get_gpr(vcpu, 4);
+		__entry->gpr5  = kvmppc_get_gpr(vcpu, 5);
+		__entry->gpr6  = kvmppc_get_gpr(vcpu, 6);
+		__entry->gpr7  = kvmppc_get_gpr(vcpu, 7);
+	),
+
+	TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+		   __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+	TP_ARGS(vcpu, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	ret)
+		__field(unsigned long,	hcall_rc)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->ret	  = ret;
+		__entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+	),
+
+	TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+		   __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+					H_TOO_HARD : __entry->hcall_rc,
+					kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu	= vc->runner->vcpu_id;
+		__entry->n_runnable	= vc->n_runnable;
+		__entry->where		= where;
+		__entry->tgid		= current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+		    __entry->where ? "Exit" : "Enter",
+		    __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu = vc->runner->vcpu_id;
+		__entry->n_runnable  = vc->n_runnable;
+		__entry->where       = where;
+		__entry->tgid	     = current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+		   __entry->where ? "Exit" : "Enter",
+		   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(pid_t,		tgid)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->tgid	  = current->tgid;
+	),
+
+	TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+
+	TP_ARGS(vcpu, run),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		exit)
+		__field(int,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->exit     = run->exit_reason;
+		__entry->ret      = vcpu->arch.ret;
+	),
+
+	TP_printk("VCPU %d: exit=%d, ret=%d",
+			__entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index e1357cd..810507c 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -3,36 +3,13 @@
 #define _TRACE_KVM_PR_H
 
 #include <linux/tracepoint.h>
+#include "trace_book3s.h"
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_pr
 #define TRACE_INCLUDE_PATH .
 #define TRACE_INCLUDE_FILE trace_pr
 
-#define kvm_trace_symbol_exit \
-	{0x100, "SYSTEM_RESET"}, \
-	{0x200, "MACHINE_CHECK"}, \
-	{0x300, "DATA_STORAGE"}, \
-	{0x380, "DATA_SEGMENT"}, \
-	{0x400, "INST_STORAGE"}, \
-	{0x480, "INST_SEGMENT"}, \
-	{0x500, "EXTERNAL"}, \
-	{0x501, "EXTERNAL_LEVEL"}, \
-	{0x502, "EXTERNAL_HV"}, \
-	{0x600, "ALIGNMENT"}, \
-	{0x700, "PROGRAM"}, \
-	{0x800, "FP_UNAVAIL"}, \
-	{0x900, "DECREMENTER"}, \
-	{0x980, "HV_DECREMENTER"}, \
-	{0xc00, "SYSCALL"}, \
-	{0xd00, "TRACE"}, \
-	{0xe00, "H_DATA_STORAGE"}, \
-	{0xe20, "H_INST_STORAGE"}, \
-	{0xe40, "H_EMUL_ASSIST"}, \
-	{0xf00, "PERFMON"}, \
-	{0xf20, "ALTIVEC"}, \
-	{0xf40, "VSX"}
-
 TRACE_EVENT(kvm_book3s_reenter,
 	TP_PROTO(int r, struct kvm_vcpu *vcpu),
 	TP_ARGS(r, vcpu),
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index dba3408..f162d0b 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -177,7 +177,7 @@ static ssize_t _name##_show(struct device *dev,			\
 	}							\
 	ret = sprintf(buf, _fmt, _expr);			\
 e_free:								\
-	kfree(page);						\
+	kmem_cache_free(hv_page_cache, page);			\
 	return ret;						\
 }								\
 static DEVICE_ATTR_RO(_name)
@@ -217,11 +217,14 @@ static bool is_physical_domain(int domain)
 		domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
 }
 
+DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
+
 static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 					 u16 lpar, u64 *res,
 					 bool success_expected)
 {
-	unsigned long ret = -ENOMEM;
+	unsigned long ret;
 
 	/*
 	 * request_buffer and result_buffer are not required to be 4k aligned,
@@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
 	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
 
-	request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
-	if (!request_buffer)
-		goto out;
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
 
-	result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
-	if (!result_buffer)
-		goto out_free_request_buffer;
+	memset(request_buffer, 0, 4096);
+	memset(result_buffer, 0, 4096);
 
 	*request_buffer = (struct reqb) {
 		.buf = {
@@ -278,15 +279,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 				domain, offset, ix, lpar, ret, ret,
 				result_buffer->buf.detailed_rc,
 				result_buffer->buf.failing_request_ix);
-		goto out_free_result_buffer;
+		goto out;
 	}
 
 	*res = be64_to_cpu(result_buffer->result);
 
-out_free_result_buffer:
-	kfree(result_buffer);
-out_free_request_buffer:
-	kfree(request_buffer);
 out:
 	return ret;
 }
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0a299be..54eca8b 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -158,6 +158,43 @@ opal_tracepoint_return:
 	blr
 #endif
 
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode. It handles endianness.
+ *
+ * r13 - paca pointer
+ * r1  - stack pointer
+ * r0  - opal token
+ */
+_GLOBAL(opal_call_realmode)
+	mflr	r12
+	std	r12,PPC_LR_STKOFF(r1)
+	ld	r2,PACATOC(r13)
+	/* Set opal return address */
+	LOAD_REG_ADDR(r12,return_from_opal_call)
+	mtlr	r12
+
+	mfmsr	r12
+#ifdef __LITTLE_ENDIAN__
+	/* Handle endian-ness */
+	li	r11,MSR_LE
+	andc	r12,r12,r11
+#endif
+	mtspr	SPRN_HSRR1,r12
+	LOAD_REG_ADDR(r11,opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+return_from_opal_call:
+#ifdef __LITTLE_ENDIAN__
+	FIXUP_ENDIAN
+#endif
+	ld	r12,PPC_LR_STKOFF(r1)
+	mtlr	r12
+	blr
+
 OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
 OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
 OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
@@ -247,6 +284,7 @@ OPAL_CALL(opal_sensor_read,			OPAL_SENSOR_READ);
 OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
 OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
 OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
+OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
 OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
 OPAL_CALL(opal_unregister_dump_region,		OPAL_UNREGISTER_DUMP_REGION);
 OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CXL_MODE);
@@ -254,3 +292,4 @@ OPAL_CALL(opal_tpo_write,			OPAL_WRITE_TPO);
 OPAL_CALL(opal_tpo_read,			OPAL_READ_TPO);
 OPAL_CALL(opal_ipmi_send,			OPAL_IPMI_SEND);
 OPAL_CALL(opal_ipmi_recv,			OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request,			OPAL_I2C_REQUEST);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cb0b6de..f10b9ec 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -9,8 +9,9 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
+#define pr_fmt(fmt)	"opal: " fmt
 
+#include <linux/printk.h>
 #include <linux/types.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
@@ -625,6 +626,39 @@ static int opal_sysfs_init(void)
 	return 0;
 }
 
+static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buf, loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+				       bin_attr->size);
+}
+
+static BIN_ATTR_RO(symbol_map, 0);
+
+static void opal_export_symmap(void)
+{
+	const __be64 *syms;
+	unsigned int size;
+	struct device_node *fw;
+	int rc;
+
+	fw = of_find_node_by_path("/ibm,opal/firmware");
+	if (!fw)
+		return;
+	syms = of_get_property(fw, "symbol-map", &size);
+	if (!syms || size != 2 * sizeof(__be64))
+		return;
+
+	/* Setup attributes */
+	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+
+	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+	if (rc)
+		pr_warn("Error %d creating OPAL symbols file\n", rc);
+}
+
 static void __init opal_dump_region_init(void)
 {
 	void *addr;
@@ -653,6 +687,14 @@ static void opal_ipmi_init(struct device_node *opal_node)
 			of_platform_device_create(np, NULL, NULL);
 }
 
+static void opal_i2c_create_devs(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
+		of_platform_device_create(np, NULL, NULL);
+}
+
 static int __init opal_init(void)
 {
 	struct device_node *np, *consoles;
@@ -679,6 +721,9 @@ static int __init opal_init(void)
 		of_node_put(consoles);
 	}
 
+	/* Create i2c platform devices */
+	opal_i2c_create_devs();
+
 	/* Find all OPAL interrupts and request them */
 	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
 	pr_debug("opal: Found %d interrupts reserved for OPAL\n",
@@ -702,6 +747,8 @@ static int __init opal_init(void)
 	/* Create "opal" kobject under /sys/firmware */
 	rc = opal_sysfs_init();
 	if (rc == 0) {
+		/* Export symbol map to userspace */
+		opal_export_symmap();
 		/* Setup dump region interface */
 		opal_dump_region_init();
 		/* Setup error log interface */
@@ -824,3 +871,4 @@ EXPORT_SYMBOL_GPL(opal_rtc_read);
 EXPORT_SYMBOL_GPL(opal_rtc_write);
 EXPORT_SYMBOL_GPL(opal_tpo_read);
 EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6c8e2d1..604c48e 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
 }
 #endif
 
+extern u32 pnv_get_supported_cpuidle_states(void);
+
 extern void pnv_lpc_init(void);
 
 bool cpu_core_split_required(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 30b1c3e..b700a32 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,8 +36,12 @@
 #include <asm/opal.h>
 #include <asm/kexec.h>
 #include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
 
 #include "powernv.h"
+#include "subcore.h"
 
 static void __init pnv_setup_arch(void)
 {
@@ -288,6 +292,168 @@ static void __init pnv_setup_machdep_rtas(void)
 }
 #endif /* CONFIG_PPC_POWERNV_RTAS */
 
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+	int cpu;
+	int rc;
+
+	/*
+	 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+	 * all cpus at boot. Get these reg values of current cpu and use the
+	 * same accross all cpus.
+	 */
+	uint64_t lpcr_val = mfspr(SPRN_LPCR);
+	uint64_t hid0_val = mfspr(SPRN_HID0);
+	uint64_t hid1_val = mfspr(SPRN_HID1);
+	uint64_t hid4_val = mfspr(SPRN_HID4);
+	uint64_t hid5_val = mfspr(SPRN_HID5);
+	uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+	for_each_possible_cpu(cpu) {
+		uint64_t pir = get_hard_smp_processor_id(cpu);
+		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+		/*
+		 * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+		 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+		 * with 63rd bit set, so that when a thread wakes up at 0x100 we
+		 * can use this bit to distinguish between fastsleep and
+		 * deep winkle.
+		 */
+		hsprg0_val |= 1;
+
+		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+		if (rc != 0)
+			return rc;
+
+		rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+		if (rc != 0)
+			return rc;
+
+		/* HIDs are per core registers */
+		if (cpu_thread_in_core(cpu) == 0) {
+
+			rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+			if (rc != 0)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+	int i, j;
+	int nr_cores = cpu_nr_cores();
+	u32 *core_idle_state;
+
+	/*
+	 * core_idle_state - First 8 bits track the idle state of each thread
+	 * of the core. The 8th bit is the lock bit. Initially all thread bits
+	 * are set. They are cleared when the thread enters deep idle state
+	 * like sleep and winkle. Initially the lock bit is cleared.
+	 * The lock bit has 2 purposes
+	 * a. While the first thread is restoring core state, it prevents
+	 * other threads in the core from switching to process context.
+	 * b. While the last thread in the core is saving the core state, it
+	 * prevents a different thread from waking up.
+	 */
+	for (i = 0; i < nr_cores; i++) {
+		int first_cpu = i * threads_per_core;
+		int node = cpu_to_node(first_cpu);
+
+		core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+		*core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+		for (j = 0; j < threads_per_core; j++) {
+			int cpu = first_cpu + j;
+
+			paca[cpu].core_idle_state_ptr = core_idle_state;
+			paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+			paca[cpu].thread_mask = 1 << j;
+		}
+	}
+
+	update_subcore_sibling_mask();
+
+	if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+		pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+	return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static int __init pnv_init_idle_states(void)
+{
+	struct device_node *power_mgt;
+	int dt_idle_states;
+	const __be32 *idle_state_flags;
+	u32 len_flags, flags;
+	int i;
+
+	supported_cpuidle_states = 0;
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		return 0;
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+		return 0;
+
+	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+	if (!power_mgt) {
+		pr_warn("opal: PowerMgmt Node not found\n");
+		return 0;
+	}
+
+	idle_state_flags = of_get_property(power_mgt,
+			"ibm,cpu-idle-state-flags", &len_flags);
+	if (!idle_state_flags) {
+		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+		return 0;
+	}
+
+	dt_idle_states = len_flags / sizeof(u32);
+
+	for (i = 0; i < dt_idle_states; i++) {
+		flags = be32_to_cpu(idle_state_flags[i]);
+		supported_cpuidle_states |= flags;
+	}
+	if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_entry,
+			PPC_INST_NOP);
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_exit,
+			PPC_INST_NOP);
+	}
+	pnv_alloc_idle_core_states();
+	return 0;
+}
+
+subsys_initcall(pnv_init_idle_states);
+
 static int __init pnv_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b716f66..fc34025 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -150,6 +150,7 @@ static void pnv_smp_cpu_kill_self(void)
 {
 	unsigned int cpu;
 	unsigned long srr1;
+	u32 idle_states;
 
 	/* Standard hot unplug procedure */
 	local_irq_disable();
@@ -160,13 +161,23 @@ static void pnv_smp_cpu_kill_self(void)
 	generic_set_cpu_dead(cpu);
 	smp_wmb();
 
+	idle_states = pnv_get_supported_cpuidle_states();
 	/* We don't want to take decrementer interrupts while we are offline,
 	 * so clear LPCR:PECE1. We keep PECE2 enabled.
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 	while (!generic_check_cpu_restart(cpu)) {
+
 		ppc64_runlatch_off();
-		srr1 = power7_nap(1);
+
+		if (idle_states & OPAL_PM_WINKLE_ENABLED)
+			srr1 = power7_winkle();
+		else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+				(idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+			srr1 = power7_sleep();
+		else
+			srr1 = power7_nap(1);
+
 		ppc64_runlatch_on();
 
 		/*
@@ -198,13 +209,27 @@ static void pnv_smp_cpu_kill_self(void)
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static int pnv_cpu_bootable(unsigned int nr)
+{
+	/*
+	 * Starting with POWER8, the subcore logic relies on all threads of a
+	 * core being booted so that they can participate in split mode
+	 * switches. So on those machines we ignore the smt_enabled_at_boot
+	 * setting (smt-enabled on the kernel command line).
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 1;
+
+	return smp_generic_cpu_bootable(nr);
+}
+
 static struct smp_ops_t pnv_smp_ops = {
 	.message_pass	= smp_muxed_ipi_message_pass,
 	.cause_ipi	= NULL,	/* Filled at runtime by xics_smp_probe() */
 	.probe		= xics_smp_probe,
 	.kick_cpu	= pnv_smp_kick_cpu,
 	.setup_cpu	= pnv_smp_setup_cpu,
-	.cpu_bootable	= smp_generic_cpu_bootable,
+	.cpu_bootable	= pnv_cpu_bootable,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable	= pnv_smp_cpu_disable,
 	.cpu_die	= generic_cpu_die,
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index c87f96b..f60f80a 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -160,6 +160,18 @@ static void wait_for_sync_step(int step)
 	mb();
 }
 
+static void update_hid_in_slw(u64 hid0)
+{
+	u64 idle_states = pnv_get_supported_cpuidle_states();
+
+	if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+		/* OPAL call to patch slw with the new HID0 value */
+		u64 cpu_pir = hard_smp_processor_id();
+
+		opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+	}
+}
+
 static void unsplit_core(void)
 {
 	u64 hid0, mask;
@@ -179,6 +191,7 @@ static void unsplit_core(void)
 	hid0 = mfspr(SPRN_HID0);
 	hid0 &= ~HID0_POWER8_DYNLPARDIS;
 	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
 
 	while (mfspr(SPRN_HID0) & mask)
 		cpu_relax();
@@ -215,6 +228,7 @@ static void split_core(int new_mode)
 	hid0  = mfspr(SPRN_HID0);
 	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
 	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
 
 	/* Wait for it to happen */
 	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
@@ -251,6 +265,25 @@ bool cpu_core_split_required(void)
 	return true;
 }
 
+void update_subcore_sibling_mask(void)
+{
+	int cpu;
+	/*
+	 * sibling mask for the first cpu. Left shift this by required bits
+	 * to get sibling mask for the rest of the cpus.
+	 */
+	int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
+
+	for_each_possible_cpu(cpu) {
+		int tid = cpu_thread_in_core(cpu);
+		int offset = (tid / threads_per_subcore) * threads_per_subcore;
+		int mask = sibling_mask_first_cpu << offset;
+
+		paca[cpu].subcore_sibling_mask = mask;
+
+	}
+}
+
 static int cpu_update_split_mode(void *data)
 {
 	int cpu, new_mode = *(int *)data;
@@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data)
 		/* Make the new mode public */
 		subcores_per_core = new_mode;
 		threads_per_subcore = threads_per_core / subcores_per_core;
+		update_subcore_sibling_mask();
 
 		/* Make sure the new mode is written before we exit */
 		mb();
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
index 148abc9..84e02ae 100644
--- a/arch/powerpc/platforms/powernv/subcore.h
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -14,5 +14,12 @@
 #define SYNC_STEP_FINISHED	3	/* Set by secondary when split/unsplit is done */
 
 #ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
 void split_core_secondary_loop(u8 *state);
-#endif
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { };
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2175f911..9cba74d5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -123,7 +123,7 @@ struct kvm_s390_sie_block {
 #define ICPT_PARTEXEC	0x38
 #define ICPT_IOINST	0x40
 	__u8	icptcode;		/* 0x0050 */
-	__u8	reserved51;		/* 0x0051 */
+	__u8	icptstatus;		/* 0x0051 */
 	__u16	ihcpu;			/* 0x0052 */
 	__u8	reserved54[2];		/* 0x0054 */
 	__u16	ipa;			/* 0x0056 */
@@ -226,10 +226,17 @@ struct kvm_vcpu_stat {
 	u32 instruction_sigp_sense_running;
 	u32 instruction_sigp_external_call;
 	u32 instruction_sigp_emergency;
+	u32 instruction_sigp_cond_emergency;
+	u32 instruction_sigp_start;
 	u32 instruction_sigp_stop;
+	u32 instruction_sigp_stop_store_status;
+	u32 instruction_sigp_store_status;
 	u32 instruction_sigp_arch;
 	u32 instruction_sigp_prefix;
 	u32 instruction_sigp_restart;
+	u32 instruction_sigp_init_cpu_reset;
+	u32 instruction_sigp_cpu_reset;
+	u32 instruction_sigp_unknown;
 	u32 diagnose_10;
 	u32 diagnose_44;
 	u32 diagnose_9c;
@@ -288,6 +295,79 @@ struct kvm_vcpu_stat {
 #define PGM_PER				0x80
 #define PGM_CRYPTO_OPERATION		0x119
 
+/* irq types in order of priority */
+enum irq_types {
+	IRQ_PEND_MCHK_EX = 0,
+	IRQ_PEND_SVC,
+	IRQ_PEND_PROG,
+	IRQ_PEND_MCHK_REP,
+	IRQ_PEND_EXT_IRQ_KEY,
+	IRQ_PEND_EXT_MALFUNC,
+	IRQ_PEND_EXT_EMERGENCY,
+	IRQ_PEND_EXT_EXTERNAL,
+	IRQ_PEND_EXT_CLOCK_COMP,
+	IRQ_PEND_EXT_CPU_TIMER,
+	IRQ_PEND_EXT_TIMING,
+	IRQ_PEND_EXT_SERVICE,
+	IRQ_PEND_EXT_HOST,
+	IRQ_PEND_PFAULT_INIT,
+	IRQ_PEND_PFAULT_DONE,
+	IRQ_PEND_VIRTIO,
+	IRQ_PEND_IO_ISC_0,
+	IRQ_PEND_IO_ISC_1,
+	IRQ_PEND_IO_ISC_2,
+	IRQ_PEND_IO_ISC_3,
+	IRQ_PEND_IO_ISC_4,
+	IRQ_PEND_IO_ISC_5,
+	IRQ_PEND_IO_ISC_6,
+	IRQ_PEND_IO_ISC_7,
+	IRQ_PEND_SIGP_STOP,
+	IRQ_PEND_RESTART,
+	IRQ_PEND_SET_PREFIX,
+	IRQ_PEND_COUNT
+};
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+		       (1UL << MCHK_EXTD_BIT) | \
+		       (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY)    | \
+			   (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+			   (1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+			   (1UL << IRQ_PEND_EXT_MALFUNC)    | \
+			   (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+			   (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+			   (1UL << IRQ_PEND_EXT_TIMING)     | \
+			   (1UL << IRQ_PEND_EXT_HOST)       | \
+			   (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			   (1UL << IRQ_PEND_VIRTIO)         | \
+			   (1UL << IRQ_PEND_PFAULT_INIT)    | \
+			   (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+			  (1UL << IRQ_PEND_IO_ISC_1) | \
+			  (1UL << IRQ_PEND_IO_ISC_2) | \
+			  (1UL << IRQ_PEND_IO_ISC_3) | \
+			  (1UL << IRQ_PEND_IO_ISC_4) | \
+			  (1UL << IRQ_PEND_IO_ISC_5) | \
+			  (1UL << IRQ_PEND_IO_ISC_6) | \
+			  (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+			    (1UL << IRQ_PEND_MCHK_EX))
+
 struct kvm_s390_interrupt_info {
 	struct list_head list;
 	u64	type;
@@ -306,14 +386,25 @@ struct kvm_s390_interrupt_info {
 #define ACTION_STORE_ON_STOP		(1<<0)
 #define ACTION_STOP_ON_STOP		(1<<1)
 
+struct kvm_s390_irq_payload {
+	struct kvm_s390_io_info io;
+	struct kvm_s390_ext_info ext;
+	struct kvm_s390_pgm_info pgm;
+	struct kvm_s390_emerg_info emerg;
+	struct kvm_s390_extcall_info extcall;
+	struct kvm_s390_prefix_info prefix;
+	struct kvm_s390_mchk_info mchk;
+};
+
 struct kvm_s390_local_interrupt {
 	spinlock_t lock;
-	struct list_head list;
-	atomic_t active;
 	struct kvm_s390_float_interrupt *float_int;
 	wait_queue_head_t *wq;
 	atomic_t *cpuflags;
 	unsigned int action_bits;
+	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+	struct kvm_s390_irq_payload irq;
+	unsigned long pending_irqs;
 };
 
 struct kvm_s390_float_interrupt {
@@ -434,6 +525,8 @@ struct kvm_arch{
 	int user_cpu_state_ctrl;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
+	int ipte_lock_count;
+	struct mutex ipte_mutex;
 	spinlock_t start_stop_lock;
 	struct kvm_s390_crypto crypto;
 };
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index e510b94..3009c2ba 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -24,6 +24,7 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
 
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq);
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 4957611..fad4ae2 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -10,6 +10,7 @@
 #define SIGP_RESTART		      6
 #define SIGP_STOP_AND_STORE_STATUS    9
 #define SIGP_INITIAL_CPU_RESET	     11
+#define SIGP_CPU_RESET		     12
 #define SIGP_SET_PREFIX		     13
 #define SIGP_STORE_STATUS_AT_ADDRESS 14
 #define SIGP_SET_ARCHITECTURE	     18
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index ca38139..437e611 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
 	struct group_info *group_info;
 	int retval;
 
-	if (!capable(CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0f961a1..8b9ccf0 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -207,8 +207,6 @@ union raddress {
 	unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
-static int ipte_lock_count;
-static DEFINE_MUTEX(ipte_mutex);
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -216,47 +214,51 @@ int ipte_lock_held(struct kvm_vcpu *vcpu)
 
 	if (vcpu->arch.sie_block->eca & 1)
 		return ic->kh != 0;
-	return ipte_lock_count != 0;
+	return vcpu->kvm->arch.ipte_lock_count != 0;
 }
 
 static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&ipte_mutex);
-	ipte_lock_count++;
-	if (ipte_lock_count > 1)
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count++;
+	if (vcpu->kvm->arch.ipte_lock_count > 1)
 		goto out;
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		old = ACCESS_ONCE(*ic);
+		old = *ic;
+		barrier();
 		while (old.k) {
 			cond_resched();
-			old = ACCESS_ONCE(*ic);
+			old = *ic;
+			barrier();
 		}
 		new = old;
 		new.k = 1;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 out:
-	mutex_unlock(&ipte_mutex);
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&ipte_mutex);
-	ipte_lock_count--;
-	if (ipte_lock_count)
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count--;
+	if (vcpu->kvm->arch.ipte_lock_count)
 		goto out;
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		new = old = ACCESS_ONCE(*ic);
+		old = *ic;
+		barrier();
+		new = old;
 		new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 	wake_up(&vcpu->kvm->arch.ipte_wq);
 out:
-	mutex_unlock(&ipte_mutex);
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_lock_siif(struct kvm_vcpu *vcpu)
@@ -265,10 +267,12 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
 
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		old = ACCESS_ONCE(*ic);
+		old = *ic;
+		barrier();
 		while (old.kg) {
 			cond_resched();
-			old = ACCESS_ONCE(*ic);
+			old = *ic;
+			barrier();
 		}
 		new = old;
 		new.k = 1;
@@ -282,7 +286,9 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		new = old = ACCESS_ONCE(*ic);
+		old = *ic;
+		barrier();
+		new = old;
 		new.kh--;
 		if (!new.kh)
 			new.k = 0;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index eaf4629..81c77ab 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = {
 	[0xeb] = kvm_s390_handle_eb,
 };
 
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+{
+	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+	/* Use the length of the EXECUTE instruction if necessary */
+	if (sie_block->icptstatus & 1) {
+		ilc = (sie_block->icptstatus >> 4) & 0x6;
+		if (!ilc)
+			ilc = 4;
+	}
+	sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+}
+
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
 	switch (vcpu->arch.sie_block->icptcode) {
@@ -244,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
 	u16 eic = vcpu->arch.sie_block->eic;
-	struct kvm_s390_interrupt irq;
+	struct kvm_s390_irq irq;
 	psw_t newpsw;
 	int rc;
 
@@ -269,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 		if (kvm_s390_si_ext_call_pending(vcpu))
 			return 0;
 		irq.type = KVM_S390_INT_EXTERNAL_CALL;
-		irq.parm = vcpu->arch.sie_block->extcpuaddr;
+		irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
  */
 static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
 	unsigned long srcaddr, dstaddr;
 	int reg1, reg2, rc;
 
@@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 	if (rc != 0)
 		return rc;
 
-	psw->addr = __rewind_psw(*psw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 
 	return 0;
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a398384..f00f31e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -16,6 +16,7 @@
 #include <linux/mmu_context.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/bitmap.h>
 #include <asm/asm-offsets.h>
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
@@ -27,8 +28,8 @@
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
-
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
 
 static int is_ioint(u64 type)
 {
@@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.local_int.pending_irqs;
+}
+
+static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+{
+	unsigned long active_mask = pending_local_irqs(vcpu);
+
+	if (psw_extint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_EXT_MASK;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
+		__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
+		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+	if (psw_mchk_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_MCHK_MASK;
+
+	return active_mask;
+}
+
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
@@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+		return;
+	if (psw_extint_disabled(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+		return;
+	if (psw_mchk_disabled(vcpu))
+		vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+/* Set interception request for non-deliverable local interrupts */
+static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+{
+	set_intercept_indicators_ext(vcpu);
+	set_intercept_indicators_mchk(vcpu);
+}
+
 static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
-	case KVM_S390_INT_EXTERNAL_CALL:
-	case KVM_S390_INT_EMERGENCY:
 	case KVM_S390_INT_SERVICE:
-	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
-	case KVM_S390_INT_CLOCK_COMP:
-	case KVM_S390_INT_CPU_TIMER:
 		if (psw_extint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
 		else
 			vcpu->arch.sie_block->lctl |= LCTL_CR0;
 		break;
-	case KVM_S390_SIGP_STOP:
-		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
-		break;
 	case KVM_S390_MCHK:
 		if (psw_mchk_disabled(vcpu))
 			vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -226,13 +271,236 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
 	}
 }
 
-static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
-			      struct kvm_s390_pgm_info *pgm_info)
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+			   (u16 __user *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_ext_info ext;
+	int rc;
+
+	spin_lock(&li->lock);
+	ext = li->irq.ext;
+	clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	li->irq.ext.ext_params2 = 0;
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
+		   0, ext.ext_params2);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_INIT,
+					 0, ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info mchk;
+	int rc;
+
+	spin_lock(&li->lock);
+	mchk = li->irq.mchk;
+	/*
+	 * If there was an exigent machine check pending, then any repressible
+	 * machine checks that might have been pending are indicated along
+	 * with it, so always clear both bits
+	 */
+	clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+	memset(&li->irq.mchk, 0, sizeof(mchk));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		   mchk.mcic);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+					 mchk.cr14, mchk.mcic);
+
+	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+	rc |= put_guest_lc(vcpu, mchk.mcic,
+			   (u64 __user *) __LC_MCCK_CODE);
+	rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+			   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+			     &mchk.fixed_logout, sizeof(mchk.fixed_logout));
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+	vcpu->stat.deliver_restart_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+	rc  = write_guest_lc(vcpu,
+			     offsetof(struct _lowcore, restart_old_psw),
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+	vcpu->stat.deliver_stop_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
+					 0, 0);
+
+	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+	clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
+	return 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info prefix;
+
+	spin_lock(&li->lock);
+	prefix = li->irq.prefix;
+	li->irq.prefix.address = 0;
+	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
+	vcpu->stat.deliver_prefix_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_SIGP_SET_PREFIX,
+					 prefix.address, 0);
+
+	kvm_s390_set_prefix(vcpu, prefix.address);
+	return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+	int cpu_addr;
+
+	spin_lock(&li->lock);
+	cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	clear_bit(cpu_addr, li->sigp_emerg_pending);
+	if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+	vcpu->stat.deliver_emergency_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+					 cpu_addr, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info extcall;
+	int rc;
+
+	spin_lock(&li->lock);
+	extcall = li->irq.extcall;
+	li->irq.extcall.code = 0;
+	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+	vcpu->stat.deliver_external_call++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_EXTERNAL_CALL,
+					 extcall.code, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_pgm_info pgm_info;
 	int rc = 0;
 	u16 ilc = get_ilc(vcpu);
 
-	switch (pgm_info->code & ~PGM_PER) {
+	spin_lock(&li->lock);
+	pgm_info = li->irq.pgm;
+	clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	memset(&li->irq.pgm, 0, sizeof(pgm_info));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+		   pgm_info.code, ilc);
+	vcpu->stat.deliver_program_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+					 pgm_info.code, 0);
+
+	switch (pgm_info.code & ~PGM_PER) {
 	case PGM_AFX_TRANSLATION:
 	case PGM_ASX_TRANSLATION:
 	case PGM_EX_TRANSLATION:
@@ -243,7 +511,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_PRIMARY_AUTHORITY:
 	case PGM_SECONDARY_AUTHORITY:
 	case PGM_SPACE_SWITCH:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
 		break;
 	case PGM_ALEN_TRANSLATION:
@@ -252,7 +520,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_ASTE_SEQUENCE:
 	case PGM_ASTE_VALIDITY:
 	case PGM_EXTENDED_AUTHORITY:
-		rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
 				  (u8 *)__LC_EXC_ACCESS_ID);
 		break;
 	case PGM_ASCE_TYPE:
@@ -261,247 +529,208 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	case PGM_REGION_SECOND_TRANS:
 	case PGM_REGION_THIRD_TRANS:
 	case PGM_SEGMENT_TRANSLATION:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
-		rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
 				   (u8 *)__LC_OP_ACCESS_ID);
 		break;
 	case PGM_MONITOR:
-		rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
-				  (u64 *)__LC_MON_CLASS_NR);
-		rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+		rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+				  (u16 *)__LC_MON_CLASS_NR);
+		rc |= put_guest_lc(vcpu, pgm_info.mon_code,
 				   (u64 *)__LC_MON_CODE);
 		break;
 	case PGM_DATA:
-		rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
 				  (u32 *)__LC_DATA_EXC_CODE);
 		break;
 	case PGM_PROTECTION:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
 		break;
 	}
 
-	if (pgm_info->code & PGM_PER) {
-		rc |= put_guest_lc(vcpu, pgm_info->per_code,
+	if (pgm_info.code & PGM_PER) {
+		rc |= put_guest_lc(vcpu, pgm_info.per_code,
 				   (u8 *) __LC_PER_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+		rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
 				   (u8 *)__LC_PER_ATMID);
-		rc |= put_guest_lc(vcpu, pgm_info->per_address,
+		rc |= put_guest_lc(vcpu, pgm_info.per_address,
 				   (u64 *) __LC_PER_ADDRESS);
-		rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
 				   (u8 *) __LC_PER_ACCESS_ID);
 	}
 
 	rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
-	rc |= put_guest_lc(vcpu, pgm_info->code,
+	rc |= put_guest_lc(vcpu, pgm_info.code,
 			   (u16 *)__LC_PGM_INT_CODE);
 	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
 			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
 			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
 
-	return rc;
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
+					  struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+		   inti->ext.ext_params);
+	vcpu->stat.deliver_service_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 inti->ext.ext_params, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
-				   struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
 {
-	const unsigned short table[] = { 2, 4, 4, 6 };
-	int rc = 0;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_DONE, 0,
+					 inti->ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+			   (u64 *)__LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
+					 struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
+		   inti->ext.ext_params, inti->ext.ext_params2);
+	vcpu->stat.deliver_virtio_interrupt++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 inti->ext.ext_params,
+					 inti->ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+			   (u64 *)__LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+				     struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+	vcpu->stat.deliver_io_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 ((__u32)inti->io.subchannel_id << 16) |
+						inti->io.subchannel_nr,
+					 ((__u64)inti->io.io_int_parm << 32) |
+						inti->io.io_int_word);
+
+	rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+			   (u16 *)__LC_SUBCHANNEL_ID);
+	rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+			   (u16 *)__LC_SUBCHANNEL_NR);
+	rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+			   (u32 *)__LC_IO_INT_PARM);
+	rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+			   (u32 *)__LC_IO_INT_WORD);
+	rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_mchk_info *mchk = &inti->mchk;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		   mchk->mcic);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+					 mchk->cr14, mchk->mcic);
+
+	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+	rc |= put_guest_lc(vcpu, mchk->mcic,
+			(u64 __user *) __LC_MCCK_CODE);
+	rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+			(u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+			     &mchk->fixed_logout, sizeof(mchk->fixed_logout));
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
+
+static const deliver_irq_t deliver_irq_funcs[] = {
+	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+	[IRQ_PEND_PROG]           = __deliver_prog,
+	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
+	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
+	[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
+	[IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
+	[IRQ_PEND_RESTART]        = __deliver_restart,
+	[IRQ_PEND_SIGP_STOP]      = __deliver_stop,
+	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
+	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+};
+
+static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
 
 	switch (inti->type) {
-	case KVM_S390_INT_EMERGENCY:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
-		vcpu->stat.deliver_emergency_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->emerg.code, 0);
-		rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, inti->emerg.code,
-				   (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		break;
-	case KVM_S390_INT_EXTERNAL_CALL:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
-		vcpu->stat.deliver_external_call++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->extcall.code, 0);
-		rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, inti->extcall.code,
-				   (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
-		break;
-	case KVM_S390_INT_CLOCK_COMP:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc = deliver_ckc_interrupt(vcpu);
-		break;
-	case KVM_S390_INT_CPU_TIMER:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
-				   (u16 *)__LC_EXT_INT_CODE);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		break;
 	case KVM_S390_INT_SERVICE:
-		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-			   inti->ext.ext_params);
-		vcpu->stat.deliver_service_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		break;
-	case KVM_S390_INT_PFAULT_INIT:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
-				   (u16 *) __LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *) __LC_EXT_PARAMS2);
+		rc = __deliver_service(vcpu, inti);
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *)__LC_EXT_PARAMS2);
+		rc = __deliver_pfault_done(vcpu, inti);
 		break;
 	case KVM_S390_INT_VIRTIO:
-		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-			   inti->ext.ext_params, inti->ext.ext_params2);
-		vcpu->stat.deliver_virtio_interrupt++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *)__LC_EXT_PARAMS2);
-		break;
-	case KVM_S390_SIGP_STOP:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
-		vcpu->stat.deliver_stop_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 0, 0);
-		__set_intercept_indicator(vcpu, inti);
-		break;
-
-	case KVM_S390_SIGP_SET_PREFIX:
-		VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
-			   inti->prefix.address);
-		vcpu->stat.deliver_prefix_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->prefix.address, 0);
-		kvm_s390_set_prefix(vcpu, inti->prefix.address);
-		break;
-
-	case KVM_S390_RESTART:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
-		vcpu->stat.deliver_restart_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 0, 0);
-		rc  = write_guest_lc(vcpu,
-				     offsetof(struct _lowcore, restart_old_psw),
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
+		rc = __deliver_virtio(vcpu, inti);
 		break;
-	case KVM_S390_PROGRAM_INT:
-		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
-			   inti->pgm.code,
-			   table[vcpu->arch.sie_block->ipa >> 14]);
-		vcpu->stat.deliver_program_int++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->pgm.code, 0);
-		rc = __deliver_prog_irq(vcpu, &inti->pgm);
-		break;
-
 	case KVM_S390_MCHK:
-		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-			   inti->mchk.mcic);
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->mchk.cr14,
-						 inti->mchk.mcic);
-		rc  = kvm_s390_vcpu_store_status(vcpu,
-						 KVM_S390_STORE_STATUS_PREFIXED);
-		rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
-		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc = __deliver_mchk_floating(vcpu, inti);
 		break;
-
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-	{
-		__u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
-			inti->io.subchannel_nr;
-		__u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
-			inti->io.io_int_word;
-		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-		vcpu->stat.deliver_io_int++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 param0, param1);
-		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-				   (u16 *)__LC_SUBCHANNEL_ID);
-		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-				   (u16 *)__LC_SUBCHANNEL_NR);
-		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-				   (u32 *)__LC_IO_INT_PARM);
-		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-				   (u32 *)__LC_IO_INT_WORD);
-		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
+		rc = __deliver_io(vcpu, inti);
 		break;
-	}
 	default:
 		BUG();
 	}
@@ -509,19 +738,6 @@ static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
-{
-	int rc;
-
-	rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw,
-			    sizeof(psw_t));
-	return rc;
-}
-
 /* Check whether SIGP interpretation facility has an external call pending */
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -538,20 +754,11 @@ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *inti;
-	int rc = 0;
+	int rc;
 
-	if (atomic_read(&li->active)) {
-		spin_lock(&li->lock);
-		list_for_each_entry(inti, &li->list, list)
-			if (__interrupt_is_deliverable(vcpu, inti)) {
-				rc = 1;
-				break;
-			}
-		spin_unlock(&li->lock);
-	}
+	rc = !!deliverable_local_irqs(vcpu);
 
 	if ((!rc) && atomic_read(&fi->active)) {
 		spin_lock(&fi->lock);
@@ -643,18 +850,15 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info  *n, *inti = NULL;
 
 	spin_lock(&li->lock);
-	list_for_each_entry_safe(inti, n, &li->list, list) {
-		list_del(&inti->list);
-		kfree(inti);
-	}
-	atomic_set(&li->active, 0);
+	li->pending_irqs = 0;
+	bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	memset(&li->irq, 0, sizeof(li->irq));
 	spin_unlock(&li->lock);
 
 	/* clear pending external calls set by sigp interpretation facility */
-	atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
 	atomic_clear_mask(SIGP_CTRL_C,
 			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
@@ -664,34 +868,35 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *n, *inti = NULL;
+	deliver_irq_t func;
 	int deliver;
 	int rc = 0;
+	unsigned long irq_type;
+	unsigned long deliverable_irqs;
 
 	__reset_intercept_indicators(vcpu);
-	if (atomic_read(&li->active)) {
-		do {
-			deliver = 0;
-			spin_lock(&li->lock);
-			list_for_each_entry_safe(inti, n, &li->list, list) {
-				if (__interrupt_is_deliverable(vcpu, inti)) {
-					list_del(&inti->list);
-					deliver = 1;
-					break;
-				}
-				__set_intercept_indicator(vcpu, inti);
-			}
-			if (list_empty(&li->list))
-				atomic_set(&li->active, 0);
-			spin_unlock(&li->lock);
-			if (deliver) {
-				rc = __do_deliver_interrupt(vcpu, inti);
-				kfree(inti);
-			}
-		} while (!rc && deliver);
-	}
 
-	if (!rc && kvm_cpu_has_pending_timer(vcpu))
-		rc = deliver_ckc_interrupt(vcpu);
+	/* pending ckc conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	if (kvm_cpu_has_pending_timer(vcpu))
+		set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+	do {
+		deliverable_irqs = deliverable_local_irqs(vcpu);
+		/* bits are in the order of interrupt priority */
+		irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+		if (irq_type == IRQ_PEND_COUNT)
+			break;
+		func = deliver_irq_funcs[irq_type];
+		if (!func) {
+			WARN_ON_ONCE(func == NULL);
+			clear_bit(irq_type, &li->pending_irqs);
+			continue;
+		}
+		rc = func(vcpu);
+	} while (!rc && irq_type != IRQ_PEND_COUNT);
+
+	set_intercept_indicators_local(vcpu);
 
 	if (!rc && atomic_read(&fi->active)) {
 		do {
@@ -710,7 +915,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 				atomic_set(&fi->active, 0);
 			spin_unlock(&fi->lock);
 			if (deliver) {
-				rc = __do_deliver_interrupt(vcpu, inti);
+				rc = __deliver_floating_interrupt(vcpu, inti);
 				kfree(inti);
 			}
 		} while (!rc && deliver);
@@ -719,23 +924,26 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+	li->irq.pgm = irq->u.pgm;
+	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	return 0;
+}
 
-	inti->type = KVM_S390_PROGRAM_INT;
-	inti->pgm.code = code;
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq irq;
 
 	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
+				   0, 1);
 	spin_lock(&li->lock);
-	list_add(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	irq.u.pgm.code = code;
+	__inject_prog(vcpu, &irq);
 	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock(&li->lock);
 	return 0;
@@ -745,27 +953,166 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
 			     struct kvm_s390_pgm_info *pgm_info)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
-
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+	struct kvm_s390_irq irq;
+	int rc;
 
 	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
 		   pgm_info->code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
 				   pgm_info->code, 0, 1);
-
-	inti->type = KVM_S390_PROGRAM_INT;
-	memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
 	spin_lock(&li->lock);
-	list_add(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	irq.u.pgm = *pgm_info;
+	rc = __inject_prog(vcpu, &irq);
 	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock(&li->lock);
+	return rc;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
+		   irq->u.ext.ext_params, irq->u.ext.ext_params2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+				   irq->u.ext.ext_params,
+				   irq->u.ext.ext_params2, 2);
+
+	li->irq.ext = irq->u.ext;
+	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
+int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+
+	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+		   irq->u.extcall.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+				   irq->u.extcall.code, 0, 2);
+
+	*extcall = irq->u.extcall;
+	set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+		   prefix->address);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+				   prefix->address, 0, 2);
+
+	*prefix = irq->u.prefix;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+
+	li->action_bits |= ACTION_STOP_ON_STOP;
+	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+				 struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+
+	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				   struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
+
+	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+		   irq->u.emerg.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+				   emerg->code, 0, 2);
+
+	set_bit(emerg->code, li->sigp_emerg_pending);
+	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+		   mchk->mcic);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+				   mchk->mcic, 2);
+
+	/*
+	 * Because repressible machine checks can be indicated along with
+	 * exigent machine checks (PoP, Chapter 11, Interruption action)
+	 * we need to combine cr14, mcic and external damage code.
+	 * Failing storage address and the logout area should not be or'ed
+	 * together, we just indicate the last occurrence of the corresponding
+	 * machine check
+	 */
+	mchk->cr14 |= irq->u.mchk.cr14;
+	mchk->mcic |= irq->u.mchk.mcic;
+	mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+	mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+	memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+	       sizeof(mchk->fixed_logout));
+	if (mchk->mcic & MCHK_EX_MASK)
+		set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	else if (mchk->mcic & MCHK_REP_MASK)
+		set_bit(IRQ_PEND_MCHK_REP,  &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid)
 {
@@ -851,7 +1198,17 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
 	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	switch (inti->type) {
+	case KVM_S390_MCHK:
+		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+		break;
+	default:
+		atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+		break;
+	}
 	spin_unlock(&li->lock);
 	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
 unlock_fi:
@@ -920,92 +1277,85 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
 	__inject_vm(kvm, inti);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-			 struct kvm_s390_interrupt *s390int)
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+		       struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_local_interrupt *li;
-	struct kvm_s390_interrupt_info *inti;
+	irq->type = s390int->type;
+	switch (irq->type) {
+	case KVM_S390_PROGRAM_INT:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.pgm.code = s390int->parm;
+		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		irq->u.prefix.address = s390int->parm;
+		break;
+	case KVM_S390_INT_EXTERNAL_CALL:
+		if (irq->u.extcall.code & 0xffff0000)
+			return -EINVAL;
+		irq->u.extcall.code = s390int->parm;
+		break;
+	case KVM_S390_INT_EMERGENCY:
+		if (irq->u.emerg.code & 0xffff0000)
+			return -EINVAL;
+		irq->u.emerg.code = s390int->parm;
+		break;
+	case KVM_S390_MCHK:
+		irq->u.mchk.mcic = s390int->parm64;
+		break;
+	}
+	return 0;
+}
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
 
-	switch (s390int->type) {
+	spin_lock(&li->lock);
+	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		inti->type = s390int->type;
-		inti->pgm.code = s390int->parm;
 		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-			   s390int->parm);
+			   irq->u.pgm.code);
+		rc = __inject_prog(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_SET_PREFIX:
-		inti->prefix.address = s390int->parm;
-		inti->type = s390int->type;
-		VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
-			   s390int->parm);
+		rc = __inject_set_prefix(vcpu, irq);
 		break;
 	case KVM_S390_SIGP_STOP:
+		rc = __inject_sigp_stop(vcpu, irq);
+		break;
 	case KVM_S390_RESTART:
+		rc = __inject_sigp_restart(vcpu, irq);
+		break;
 	case KVM_S390_INT_CLOCK_COMP:
+		rc = __inject_ckc(vcpu);
+		break;
 	case KVM_S390_INT_CPU_TIMER:
-		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
-		inti->type = s390int->type;
+		rc = __inject_cpu_timer(vcpu);
 		break;
 	case KVM_S390_INT_EXTERNAL_CALL:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
-			   s390int->parm);
-		inti->type = s390int->type;
-		inti->extcall.code = s390int->parm;
+		rc = __inject_extcall(vcpu, irq);
 		break;
 	case KVM_S390_INT_EMERGENCY:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
-			return -EINVAL;
-		}
-		VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
-		inti->type = s390int->type;
-		inti->emerg.code = s390int->parm;
+		rc = __inject_sigp_emergency(vcpu, irq);
 		break;
 	case KVM_S390_MCHK:
-		VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
-			   s390int->parm64);
-		inti->type = s390int->type;
-		inti->mchk.mcic = s390int->parm64;
+		rc = __inject_mchk(vcpu, irq);
 		break;
 	case KVM_S390_INT_PFAULT_INIT:
-		inti->type = s390int->type;
-		inti->ext.ext_params2 = s390int->parm64;
+		rc = __inject_pfault_init(vcpu, irq);
 		break;
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 	default:
-		kfree(inti);
-		return -EINVAL;
+		rc = -EINVAL;
 	}
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
-				   s390int->parm64, 2);
-
-	li = &vcpu->arch.local_int;
-	spin_lock(&li->lock);
-	if (inti->type == KVM_S390_PROGRAM_INT)
-		list_add(&inti->list, &li->list);
-	else
-		list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
-	if (inti->type == KVM_S390_SIGP_STOP)
-		li->action_bits |= ACTION_STOP_ON_STOP;
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	spin_unlock(&li->lock);
-	kvm_s390_vcpu_wakeup(vcpu);
-	return 0;
+	if (!rc)
+		kvm_s390_vcpu_wakeup(vcpu);
+	return rc;
 }
 
 void kvm_s390_clear_float_irqs(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6b049ee..3e09801 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
@@ -453,6 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	spin_lock_init(&kvm->arch.float_int.lock);
 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
 	init_waitqueue_head(&kvm->arch.ipte_wq);
+	mutex_init(&kvm->arch.ipte_mutex);
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
@@ -711,7 +719,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	}
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
-	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
@@ -1114,13 +1121,15 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
 				      unsigned long token)
 {
 	struct kvm_s390_interrupt inti;
-	inti.parm64 = token;
+	struct kvm_s390_irq irq;
 
 	if (start_token) {
-		inti.type = KVM_S390_INT_PFAULT_INIT;
-		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+		irq.u.ext.ext_params2 = token;
+		irq.type = KVM_S390_INT_PFAULT_INIT;
+		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
 	} else {
 		inti.type = KVM_S390_INT_PFAULT_DONE;
+		inti.parm64 = token;
 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
 	}
 }
@@ -1614,11 +1623,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	switch (ioctl) {
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
+		struct kvm_s390_irq s390irq;
 
 		r = -EFAULT;
 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
 			break;
-		r = kvm_s390_inject_vcpu(vcpu, &s390int);
+		if (s390int_to_s390irq(&s390int, &s390irq))
+			return -EINVAL;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
 		break;
 	}
 	case KVM_S390_STORE_STATUS:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 244d023..a8f3d9b 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 /* declare vfacilities extern */
 extern unsigned long *vfacilities;
 
-int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
 #define TDB_FORMAT1		1
@@ -144,7 +142,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 				    struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-				      struct kvm_s390_interrupt *s390int);
+				      struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
@@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
 			      struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
+/* implemented in intercept.c */
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
 /* implemented in priv.c */
 int is_valid_psw(psw_t *psw);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -222,6 +224,9 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
 	return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 }
 
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+			struct kvm_s390_irq *s390irq);
+
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f47cb0c..1be578d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -180,21 +180,18 @@ static int handle_skey(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	vcpu->arch.sie_block->gpsw.addr =
-		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
 	return 0;
 }
 
 static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-
 	vcpu->stat.instruction_ipte_interlock++;
-	if (psw_bits(*psw).p)
+	if (psw_bits(vcpu->arch.sie_block->gpsw).p)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
-	psw->addr = __rewind_psw(*psw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
 	return 0;
 }
@@ -650,10 +647,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-		if (kvm_s390_check_low_addr_protection(vcpu, start))
-			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
-	}
+	start = kvm_s390_logical_to_effective(vcpu, start);
 
 	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
 	case 0x00000000:
@@ -669,6 +663,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 	default:
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	}
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+		if (kvm_s390_check_low_addr_protection(vcpu, start))
+			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	}
+
 	while (start < end) {
 		unsigned long useraddr, abs_addr;
 
@@ -725,8 +725,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* Rewind PSW to repeat the ESSA instruction */
-	vcpu->arch.sie_block->gpsw.addr =
-		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
 	cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
 	down_read(&gmap->mm->mmap_sem);
@@ -769,8 +768,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u32 val = 0;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
 	u64 ga;
 
 	vcpu->stat.instruction_lctl++;
@@ -786,19 +785,20 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 	reg = reg1;
+	nr_regs = 0;
 	do {
-		rc = read_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
 		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
-		vcpu->arch.sie_block->gcr[reg] |= val;
-		ga += 4;
+		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -806,9 +806,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
 	u64 ga;
-	u32 val;
-	int reg, rc;
 
 	vcpu->stat.instruction_stctl++;
 
@@ -824,26 +824,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
 	reg = reg1;
+	nr_regs = 0;
 	do {
-		val = vcpu->arch.sie_block->gcr[reg] &  0x00000000fffffffful;
-		rc = write_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		ga += 4;
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
-	return 0;
+	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 ga, val;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
 
 	vcpu->stat.instruction_lctlg++;
 
@@ -855,22 +853,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	reg = reg1;
-
 	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	reg = reg1;
+	nr_regs = 0;
 	do {
-		rc = read_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		vcpu->arch.sie_block->gcr[reg] = val;
-		ga += 8;
+		vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -878,8 +876,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 ga, val;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
 
 	vcpu->stat.instruction_stctg++;
 
@@ -891,23 +890,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	reg = reg1;
-
 	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
 
+	reg = reg1;
+	nr_regs = 0;
 	do {
-		val = vcpu->arch.sie_block->gcr[reg];
-		rc = write_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		ga += 8;
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
-	return 0;
+	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static const intercept_handler_t eb_handlers[256] = {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf243ba..6651f9f 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,20 +20,13 @@
 #include "kvm-s390.h"
 #include "trace.h"
 
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 			u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int cpuflags;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
 	cpuflags = atomic_read(li->cpuflags);
@@ -48,55 +41,53 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
 		rc = SIGP_CC_STATUS_STORED;
 	}
 
-	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+		   rc);
 	return rc;
 }
 
-static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_s390_interrupt s390int = {
+	struct kvm_s390_irq irq = {
 		.type = KVM_S390_INT_EMERGENCY,
-		.parm = vcpu->vcpu_id,
+		.u.emerg.code = vcpu->vcpu_id,
 	};
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc = 0;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
 	if (!rc)
-		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+			   dst_vcpu->vcpu_id);
 
 	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+	return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu,
 					u16 asn, u64 *reg)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
 	u16 p_asn, s_asn;
 	psw_t *psw;
 	u32 flags;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
 	psw = &dst_vcpu->arch.sie_block->gpsw;
 	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
 	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
 
-	/* Deliver the emergency signal? */
+	/* Inject the emergency signal? */
 	if (!(flags & CPUSTAT_STOPPED)
 	    || (psw->mask & psw_int_mask) != psw_int_mask
 	    || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
 	    || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
-		return __sigp_emergency(vcpu, cpu_addr);
+		return __inject_sigp_emergency(vcpu, dst_vcpu);
 	} else {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +95,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
 	}
 }
 
-static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_s390_interrupt s390int = {
+	struct kvm_s390_irq irq = {
 		.type = KVM_S390_INT_EXTERNAL_CALL,
-		.parm = vcpu->vcpu_id,
+		.u.extcall.code = vcpu->vcpu_id,
 	};
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
 	if (!rc)
-		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+			   dst_vcpu->vcpu_id);
 
 	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
@@ -128,29 +115,20 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
 static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
 {
 	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
 	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-	inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
-	if (!inti)
-		return -ENOMEM;
-	inti->type = KVM_S390_SIGP_STOP;
-
 	spin_lock(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP) {
 		/* another SIGP STOP is pending */
-		kfree(inti);
 		rc = SIGP_CC_BUSY;
 		goto out;
 	}
 	if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
-		kfree(inti);
 		if ((action & ACTION_STORE_ON_STOP) != 0)
 			rc = -ESHUTDOWN;
 		goto out;
 	}
-	list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
 	li->action_bits |= action;
 	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
 	kvm_s390_vcpu_wakeup(dst_vcpu);
@@ -160,23 +138,27 @@ out:
 	return rc;
 }
 
-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
+	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
 
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
+	return rc;
+}
 
-	rc = __inject_sigp_stop(dst_vcpu, action);
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	int rc;
 
-	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
+					      ACTION_STORE_ON_STOP);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+		   dst_vcpu->vcpu_id);
 
-	if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+	if (rc == -ESHUTDOWN) {
 		/* If the CPU has already been stopped, we still have
 		 * to save the status when doing stop-and-store. This
 		 * has to be done after unlocking all spinlocks. */
@@ -212,18 +194,12 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 	return rc;
 }
 
-static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
-			     u64 *reg)
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+			     u32 address, u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
-	struct kvm_s390_interrupt_info *inti;
 	int rc;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
 	/*
@@ -238,46 +214,34 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 		return SIGP_CC_STATUS_STORED;
 	}
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return SIGP_CC_BUSY;
-
 	spin_lock(&li->lock);
 	/* cpu must be in stopped state */
 	if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
 		rc = SIGP_CC_STATUS_STORED;
-		kfree(inti);
 		goto out_li;
 	}
 
-	inti->type = KVM_S390_SIGP_SET_PREFIX;
-	inti->prefix.address = address;
-
-	list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	li->irq.prefix.address = address;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
 	kvm_s390_vcpu_wakeup(dst_vcpu);
 	rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
+		   address);
 out_li:
 	spin_unlock(&li->lock);
 	return rc;
 }
 
-static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
-					u32 addr, u64 *reg)
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+				       struct kvm_vcpu *dst_vcpu,
+				       u32 addr, u64 *reg)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int flags;
 	int rc;
 
-	if (cpu_id < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
 	spin_lock(&dst_vcpu->arch.local_int.lock);
 	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
 	spin_unlock(&dst_vcpu->arch.local_int.lock);
@@ -297,19 +261,12 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
 	return rc;
 }
 
-static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
-				u64 *reg)
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu, u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
 		/* running */
@@ -321,26 +278,19 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
 		rc = SIGP_CC_STATUS_STORED;
 	}
 
-	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
-		   rc);
+	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+		   dst_vcpu->vcpu_id, rc);
 
 	return rc;
 }
 
-/* Test whether the destination CPU is available and not busy */
-static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+				   struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-	struct kvm_s390_local_interrupt *li;
-	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
-	struct kvm_vcpu *dst_vcpu = NULL;
-
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
+	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+	/* handle (RE)START in user space */
+	int rc = -EOPNOTSUPP;
 
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP)
 		rc = SIGP_CC_BUSY;
@@ -349,90 +299,131 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
 	return rc;
 }
 
-int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
-	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u32 parameter;
-	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
-	u8 order_code;
-	int rc;
+	/* handle (INITIAL) CPU RESET in user space */
+	return -EOPNOTSUPP;
+}
 
-	/* sigp in userspace can exit */
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+				  struct kvm_vcpu *dst_vcpu)
+{
+	/* handle unknown orders in user space */
+	return -EOPNOTSUPP;
+}
 
-	order_code = kvm_s390_get_base_disp_rs(vcpu);
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+			   u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+	int rc;
+	struct kvm_vcpu *dst_vcpu;
 
-	if (r1 % 2)
-		parameter = vcpu->run->s.regs.gprs[r1];
-	else
-		parameter = vcpu->run->s.regs.gprs[r1 + 1];
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return SIGP_CC_NOT_OPERATIONAL;
+
+	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
 
-	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
 	switch (order_code) {
 	case SIGP_SENSE:
 		vcpu->stat.instruction_sigp_sense++;
-		rc = __sigp_sense(vcpu, cpu_addr,
-				  &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_EXTERNAL_CALL:
 		vcpu->stat.instruction_sigp_external_call++;
-		rc = __sigp_external_call(vcpu, cpu_addr);
+		rc = __sigp_external_call(vcpu, dst_vcpu);
 		break;
 	case SIGP_EMERGENCY_SIGNAL:
 		vcpu->stat.instruction_sigp_emergency++;
-		rc = __sigp_emergency(vcpu, cpu_addr);
+		rc = __sigp_emergency(vcpu, dst_vcpu);
 		break;
 	case SIGP_STOP:
 		vcpu->stat.instruction_sigp_stop++;
-		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
+		rc = __sigp_stop(vcpu, dst_vcpu);
 		break;
 	case SIGP_STOP_AND_STORE_STATUS:
-		vcpu->stat.instruction_sigp_stop++;
-		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
-						 ACTION_STOP_ON_STOP);
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_STORE_STATUS_AT_ADDRESS:
-		rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
-						 &vcpu->run->s.regs.gprs[r1]);
-		break;
-	case SIGP_SET_ARCHITECTURE:
-		vcpu->stat.instruction_sigp_arch++;
-		rc = __sigp_set_arch(vcpu, parameter);
+		vcpu->stat.instruction_sigp_store_status++;
+		rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+						 status_reg);
 		break;
 	case SIGP_SET_PREFIX:
 		vcpu->stat.instruction_sigp_prefix++;
-		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
-				       &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
 		break;
 	case SIGP_COND_EMERGENCY_SIGNAL:
-		rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
-						  &vcpu->run->s.regs.gprs[r1]);
+		vcpu->stat.instruction_sigp_cond_emergency++;
+		rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+						  status_reg);
 		break;
 	case SIGP_SENSE_RUNNING:
 		vcpu->stat.instruction_sigp_sense_running++;
-		rc = __sigp_sense_running(vcpu, cpu_addr,
-					  &vcpu->run->s.regs.gprs[r1]);
+		rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
 		break;
 	case SIGP_START:
-		rc = sigp_check_callable(vcpu, cpu_addr);
-		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
-			rc = -EOPNOTSUPP;    /* Handle START in user space */
+		vcpu->stat.instruction_sigp_start++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
 		break;
 	case SIGP_RESTART:
 		vcpu->stat.instruction_sigp_restart++;
-		rc = sigp_check_callable(vcpu, cpu_addr);
-		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
-			VCPU_EVENT(vcpu, 4,
-				   "sigp restart %x to handle userspace",
-				   cpu_addr);
-			/* user space must know about restart */
-			rc = -EOPNOTSUPP;
-		}
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+		rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+	}
+
+	if (rc == -EOPNOTSUPP)
+		VCPU_EVENT(vcpu, 4,
+			   "sigp order %u -> cpu %x: handled in user space",
+			   order_code, dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u32 parameter;
+	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+	u8 order_code;
+	int rc;
+
+	/* sigp in userspace can exit */
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+	if (r1 % 2)
+		parameter = vcpu->run->s.regs.gprs[r1];
+	else
+		parameter = vcpu->run->s.regs.gprs[r1 + 1];
+
+	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+	switch (order_code) {
+	case SIGP_SET_ARCHITECTURE:
+		vcpu->stat.instruction_sigp_arch++;
+		rc = __sigp_set_arch(vcpu, parameter);
 		break;
 	default:
-		return -EOPNOTSUPP;
+		rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+				     parameter,
+				     &vcpu->run->s.regs.gprs[r1]);
 	}
 
 	if (rc < 0)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 71c7eff..be99357 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -844,7 +844,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 
 	down_read(&mm->mmap_sem);
 retry:
-	ptep = get_locked_pte(current->mm, addr, &ptl);
+	ptep = get_locked_pte(mm, addr, &ptl);
 	if (unlikely(!ptep)) {
 		up_read(&mm->mmap_sem);
 		return -EFAULT;
@@ -888,6 +888,45 @@ retry:
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+{
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pte_t *ptep;
+	uint64_t physaddr;
+	unsigned long key = 0;
+
+	down_read(&mm->mmap_sem);
+	ptep = get_locked_pte(mm, addr, &ptl);
+	if (unlikely(!ptep)) {
+		up_read(&mm->mmap_sem);
+		return -EFAULT;
+	}
+	pgste = pgste_get_lock(ptep);
+
+	if (pte_val(*ptep) & _PAGE_INVALID) {
+		key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
+		key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
+	} else {
+		physaddr = pte_val(*ptep) & PAGE_MASK;
+		key = page_get_storage_key(physaddr);
+
+		/* Reflect guest's logical view, not physical */
+		if (pgste_val(pgste) & PGSTE_GR_BIT)
+			key |= _PAGE_REFERENCED;
+		if (pgste_val(pgste) & PGSTE_GC_BIT)
+			key |= _PAGE_CHANGED;
+	}
+
+	pgste_set_unlock(ptep, pgste);
+	pte_unmap_unlock(ptep, ptl);
+	up_read(&mm->mmap_sem);
+	return key;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
 #else /* CONFIG_PGSTE */
 
 static inline int page_table_with_pgste(struct page *page)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c6b6ee5..0f09f52 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -223,7 +223,7 @@ config CPU_SHX3
 config ARCH_SHMOBILE
 	bool
 	select ARCH_SUSPEND_POSSIBLE
-	select PM_RUNTIME
+	select PM
 
 config CPU_HAS_PMU
        depends on CPU_SH4 || CPU_SH4A
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index ec70475..a8d9757 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -47,7 +47,7 @@ CONFIG_PREEMPT=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index 76a76a2..e7e56a4 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -82,7 +82,7 @@ CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be65f03..5cbc96d 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -460,10 +460,12 @@ static void __init sparc_context_init(int numctx)
 void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
 	       struct task_struct *tsk)
 {
+	unsigned long flags;
+
 	if (mm->context == NO_CONTEXT) {
-		spin_lock(&srmmu_context_spinlock);
+		spin_lock_irqsave(&srmmu_context_spinlock, flags);
 		alloc_context(old_mm, mm);
-		spin_unlock(&srmmu_context_spinlock);
+		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
 		srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
 	}
 
@@ -986,14 +988,15 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 
 void destroy_context(struct mm_struct *mm)
 {
+	unsigned long flags;
 
 	if (mm->context != NO_CONTEXT) {
 		flush_cache_mm(mm);
 		srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir);
 		flush_tlb_mm(mm);
-		spin_lock(&srmmu_context_spinlock);
+		spin_lock_irqsave(&srmmu_context_spinlock, flags);
 		free_context(mm->context);
-		spin_unlock(&srmmu_context_spinlock);
+		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
 		mm->context = NO_CONTEXT;
 	}
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d69f1cd..ba397bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -249,10 +249,6 @@ config HAVE_INTEL_TXT
 	def_bool y
 	depends on INTEL_IOMMU && ACPI
 
-config X86_INTEL_MPX
-	def_bool y
-	depends on CPU_SUP_INTEL
-
 config X86_32_SMP
 	def_bool y
 	depends on X86_32 && SMP
@@ -887,11 +883,11 @@ config X86_UP_IOAPIC
 config X86_LOCAL_APIC
 	def_bool y
 	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
+	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 
 config X86_IO_APIC
-	def_bool y
-	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
-	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+	def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
+	depends on X86_LOCAL_APIC
 	select IRQ_DOMAIN
 
 config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1594,6 +1590,32 @@ config X86_SMAP
 
 	  If unsure, say Y.
 
+config X86_INTEL_MPX
+	prompt "Intel MPX (Memory Protection Extensions)"
+	def_bool n
+	depends on CPU_SUP_INTEL
+	---help---
+	  MPX provides hardware features that can be used in
+	  conjunction with compiler-instrumented code to check
+	  memory references.  It is designed to detect buffer
+	  overflow or underflow bugs.
+
+	  This option enables running applications which are
+	  instrumented or otherwise use MPX.  It does not use MPX
+	  itself inside the kernel or to protect the kernel
+	  against bad memory references.
+
+	  Enabling this option will make the kernel larger:
+	  ~8k of kernel text and 36 bytes of data on a 64-bit
+	  defconfig.  It adds a long to the 'mm_struct' which
+	  will increase the kernel memory overhead of each
+	  process and adds some branches to paths used during
+	  exec() and munmap().
+
+	  For details, see Documentation/x86/intel_mpx.txt
+
+	  If unsure, say N.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4615906..9662290 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -94,30 +94,7 @@ extern void trace_call_function_single_interrupt(void);
 #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
 #endif /* CONFIG_TRACING */
 
-/* IOAPIC */
-#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
-extern unsigned long io_apic_irqs;
-
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-
-struct io_apic_irq_attr {
-	int ioapic;
-	int ioapic_pin;
-	int trigger;
-	int polarity;
-};
-
-static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
-					int ioapic, int ioapic_pin,
-					int trigger, int polarity)
-{
-	irq_attr->ioapic	= ioapic;
-	irq_attr->ioapic_pin	= ioapic_pin;
-	irq_attr->trigger	= trigger;
-	irq_attr->polarity	= polarity;
-}
-
+#ifdef CONFIG_IRQ_REMAP
 /* Intel specific interrupt remapping information */
 struct irq_2_iommu {
 	struct intel_iommu *iommu;
@@ -131,14 +108,12 @@ struct irq_2_irte {
 	u16 devid; /* Device ID for IRTE table */
 	u16 index; /* Index into IRTE table*/
 };
+#endif	/* CONFIG_IRQ_REMAP */
+
+#ifdef	CONFIG_X86_LOCAL_APIC
+struct irq_data;
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
 struct irq_cfg {
-	struct irq_pin_list	*irq_2_pin;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
 	u8			vector;
@@ -150,18 +125,39 @@ struct irq_cfg {
 		struct irq_2_irte  irq_2_irte;
 	};
 #endif
+	union {
+#ifdef CONFIG_X86_IO_APIC
+		struct {
+			struct list_head	irq_2_pin;
+		};
+#endif
+	};
 };
 
+extern struct irq_cfg *irq_cfg(unsigned int irq);
+extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
+extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void clear_irq_vector(int irq, struct irq_cfg *cfg);
+extern void setup_vector_irq(int cpu);
+#ifdef CONFIG_SMP
 extern void send_cleanup_vector(struct irq_cfg *);
+extern void irq_complete_move(struct irq_cfg *cfg);
+#else
+static inline void send_cleanup_vector(struct irq_cfg *c) { }
+static inline void irq_complete_move(struct irq_cfg *c) { }
+#endif
 
-struct irq_data;
-int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
-			  unsigned int *dest_id);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
-extern void setup_ioapic_dest(void);
-
-extern void enable_IO_APIC(void);
+extern int apic_retrigger_irq(struct irq_data *data);
+extern void apic_ack_edge(struct irq_data *data);
+extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			     unsigned int *dest_id);
+#else	/*  CONFIG_X86_LOCAL_APIC */
+static inline void lock_vector_lock(void) {}
+static inline void unlock_vector_lock(void) {}
+#endif	/* CONFIG_X86_LOCAL_APIC */
 
 /* Statistics */
 extern atomic_t irq_err_count;
@@ -185,7 +181,8 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
+extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
+				    - FIRST_EXTERNAL_VECTOR])(void);
 #ifdef CONFIG_TRACING
 #define trace_interrupt interrupt
 #endif
@@ -195,17 +192,6 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
 
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-extern void setup_vector_irq(int cpu);
-
-#ifdef CONFIG_X86_IO_APIC
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
-extern void __setup_vector_irq(int cpu);
-#else
-static inline void lock_vector_lock(void) {}
-static inline void unlock_vector_lock(void) {}
-static inline void __setup_vector_irq(int cpu) {}
-#endif
 
 #endif /* !ASSEMBLY_ */
 
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 1733ab4..bf006cc 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -132,6 +132,10 @@ extern int noioapicquirk;
 /* -1 if "noapic" boot option passed */
 extern int noioapicreroute;
 
+extern unsigned long io_apic_irqs;
+
+#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs))
+
 /*
  * If we use the IO-APIC for IRQ routing, disable automatic
  * assignment of PCI IRQ's.
@@ -139,18 +143,15 @@ extern int noioapicreroute;
 #define io_apic_assign_pci_irqs \
 	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
 
-struct io_apic_irq_attr;
 struct irq_cfg;
 extern void ioapic_insert_resources(void);
+extern int arch_early_ioapic_init(void);
 
 extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
 				     unsigned int, int,
 				     struct io_apic_irq_attr *);
 extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
 
-extern void native_compose_msi_msg(struct pci_dev *pdev,
-				   unsigned int irq, unsigned int dest,
-				   struct msi_msg *msg, u8 hpet_id);
 extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
 
 extern int save_ioapic_entries(void);
@@ -160,6 +161,13 @@ extern int restore_ioapic_entries(void);
 extern void setup_ioapic_ids_from_mpc(void);
 extern void setup_ioapic_ids_from_mpc_nocheck(void);
 
+struct io_apic_irq_attr {
+	int ioapic;
+	int ioapic_pin;
+	int trigger;
+	int polarity;
+};
+
 enum ioapic_domain_type {
 	IOAPIC_DOMAIN_INVALID,
 	IOAPIC_DOMAIN_LEGACY,
@@ -188,8 +196,10 @@ extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
 extern u32 mp_pin_to_gsi(int ioapic, int pin);
 extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
 extern void mp_unmap_irq(int irq);
-extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
-				      struct ioapic_domain_cfg *cfg);
+extern int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+			      struct ioapic_domain_cfg *cfg);
+extern int mp_unregister_ioapic(u32 gsi_base);
+extern int mp_ioapic_registered(u32 gsi_base);
 extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
 			    irq_hw_number_t hwirq);
 extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
@@ -227,19 +237,25 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 
 extern void io_apic_eoi(unsigned int apic, unsigned int vector);
 
-extern bool mp_should_keep_irq(struct device *dev);
-
+extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void setup_ioapic_dest(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
+extern void print_IO_APICs(void);
 #else  /* !CONFIG_X86_IO_APIC */
 
+#define IO_APIC_IRQ(x)		0
 #define io_apic_assign_pci_irqs 0
 #define setup_ioapic_ids_from_mpc x86_init_noop
 static inline void ioapic_insert_resources(void) { }
+static inline int arch_early_ioapic_init(void) { return 0; }
+static inline void print_IO_APICs(void) {}
 #define gsi_top (NR_IRQS_LEGACY)
 static inline int mp_find_ioapic(u32 gsi) { return 0; }
 static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
 static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
 static inline void mp_unmap_irq(int irq) { }
-static inline bool mp_should_keep_irq(struct device *dev) { return 1; }
 
 static inline int save_ioapic_entries(void)
 {
@@ -262,7 +278,6 @@ static inline void disable_ioapic_support(void) { }
 #define native_io_apic_print_entries	NULL
 #define native_ioapic_set_affinity	NULL
 #define native_setup_ioapic_entry	NULL
-#define native_compose_msi_msg		NULL
 #define native_eoi_ioapic_pin		NULL
 #endif
 
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5702d7e..666c89e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -126,6 +126,12 @@
 
 #define NR_VECTORS			 256
 
+#ifdef CONFIG_X86_LOCAL_APIC
+#define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
+#else
+#define FIRST_SYSTEM_VECTOR		NR_VECTORS
+#endif
+
 #define FPU_IRQ				  13
 
 #define	FIRST_VM86_IRQ			   3
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30..d89c6b8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -33,7 +33,7 @@
 
 #define KVM_MAX_VCPUS 255
 #define KVM_SOFT_MAX_VCPUS 160
-#define KVM_USER_MEM_SLOTS 125
+#define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
@@ -51,6 +51,7 @@
 			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
 #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
+#define CR3_PCID_INVD		 (1UL << 63)
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
@@ -361,6 +362,7 @@ struct kvm_vcpu_arch {
 	int mp_state;
 	u64 ia32_misc_enable_msr;
 	bool tpr_access_reporting;
+	u64 ia32_xss;
 
 	/*
 	 * Paging state of the vcpu
@@ -542,7 +544,7 @@ struct kvm_apic_map {
 	struct rcu_head rcu;
 	u8 ldr_bits;
 	/* fields bellow are used to decode ldr values in different modes */
-	u32 cid_shift, cid_mask, lid_mask;
+	u32 cid_shift, cid_mask, lid_mask, broadcast;
 	struct kvm_lapic *phys_map[256];
 	/* first index is cluster id second is cpu id in a cluster */
 	struct kvm_lapic *logical_map[16][16];
@@ -602,6 +604,9 @@ struct kvm_arch {
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 
+	/* reads protected by irq_srcu, writes by irq_lock */
+	struct hlist_head mask_notifier_list;
+
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
@@ -659,6 +664,16 @@ struct msr_data {
 	u64 data;
 };
 
+struct kvm_lapic_irq {
+	u32 vector;
+	u32 delivery_mode;
+	u32 dest_mode;
+	u32 level;
+	u32 trig_mode;
+	u32 shorthand;
+	u32 dest_id;
+};
+
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
@@ -767,6 +782,7 @@ struct kvm_x86_ops {
 			       enum x86_intercept_stage stage);
 	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 	bool (*mpx_supported)(void);
+	bool (*xsaves_supported)(void);
 
 	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 
@@ -818,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
 u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+struct kvm_irq_mask_notifier {
+	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+	int irq;
+	struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask);
+
 extern bool tdp_enabled;
 
 u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -863,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 		    int reason, bool has_error_code, u32 error_code);
@@ -895,6 +924,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			    gfn_t gfn, void *data, int offset, int len,
 			    u32 access);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
 
 static inline int __kvm_irq_line_state(unsigned long *irq_state,
 				       int irq_source_id, int level)
@@ -1066,6 +1096,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 0892ea0..4e370a5 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -96,12 +96,15 @@ extern void pci_iommu_alloc(void);
 #ifdef CONFIG_PCI_MSI
 /* implemented in arch/x86/kernel/apic/io_apic. */
 struct msi_desc;
+void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq,
+			    unsigned int dest, struct msi_msg *msg, u8 hpet_id);
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev);
 int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 		  unsigned int irq_base, unsigned int irq_offset);
 #else
+#define native_compose_msi_msg		NULL
 #define native_setup_msi_irqs		NULL
 #define native_teardown_msi_irq		NULL
 #endif
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195d..164e3f8 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade..45afaee 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_XSAVES			0x00100000
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -159,6 +160,8 @@ enum vmcs_field {
 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
 	VMREAD_BITMAP                   = 0x00002026,
 	VMWRITE_BITMAP                  = 0x00002028,
+	XSS_EXIT_BITMAP                 = 0x0000202C,
+	XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
 	VMCS_LINK_POINTER               = 0x00002800,
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 7e7a79a..5fa9770 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,7 @@
 #define XSTATE_Hi16_ZMM		0x80
 
 #define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512	(XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
 /* Bit 63 of XCR0 is reserved for future expansion */
 #define XSTATE_EXTEND_MASK	(~(XSTATE_FPSSE | (1ULL << 63)))
 
diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index 46727eb..6e1aaf7 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
@@ -28,6 +28,13 @@ struct user_desc {
 	unsigned int  seg_not_present:1;
 	unsigned int  useable:1;
 #ifdef __x86_64__
+	/*
+	 * Because this bit is not present in 32-bit user code, user
+	 * programs can pass uninitialized values here.  Therefore, in
+	 * any context in which a user_desc comes from a 32-bit program,
+	 * the kernel must act as though lm == 0, regardless of the
+	 * actual value.
+	 */
 	unsigned int  lm:1;
 #endif
 };
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 990a2fe..b813bf9 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -72,6 +72,8 @@
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_XSAVES              63
+#define EXIT_REASON_XRSTORS             64
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -116,6 +118,8 @@
 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
 	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
-	{ EXIT_REASON_INVPCID,               "INVPCID" }
+	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a142e77..4433a4b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -76,6 +76,19 @@ int acpi_fix_pin2_polarity __initdata;
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 #endif
 
+/*
+ * Locks related to IOAPIC hotplug
+ * Hotplug side:
+ *	->device_hotplug_lock
+ *		->acpi_ioapic_lock
+ *			->ioapic_lock
+ * Interrupt mapping side:
+ *	->acpi_ioapic_lock
+ *		->ioapic_mutex
+ *			->ioapic_lock
+ */
+static DEFINE_MUTEX(acpi_ioapic_lock);
+
 /* --------------------------------------------------------------------------
                               Boot-time Configuration
    -------------------------------------------------------------------------- */
@@ -395,10 +408,6 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return gsi;
 
-	/* Don't set up the ACPI SCI because it's already set up */
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
-
 	trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
 	polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
 	node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
@@ -411,7 +420,8 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
 	if (irq < 0)
 		return irq;
 
-	if (enable_update_mptable)
+	/* Don't set up the ACPI SCI because it's already set up */
+	if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi)
 		mp_config_acpi_gsi(dev, gsi, trigger, polarity);
 
 	return irq;
@@ -424,9 +434,6 @@ static void mp_unregister_gsi(u32 gsi)
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return;
 
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return;
-
 	irq = mp_map_gsi_to_irq(gsi, 0);
 	if (irq > 0)
 		mp_unmap_irq(irq);
@@ -609,8 +616,10 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
 		*irqp = gsi;
 	} else {
+		mutex_lock(&acpi_ioapic_lock);
 		irq = mp_map_gsi_to_irq(gsi,
 					IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
+		mutex_unlock(&acpi_ioapic_lock);
 		if (irq < 0)
 			return -1;
 		*irqp = irq;
@@ -650,7 +659,9 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
 	int irq = gsi;
 
 #ifdef CONFIG_X86_IO_APIC
+	mutex_lock(&acpi_ioapic_lock);
 	irq = mp_register_gsi(dev, gsi, trigger, polarity);
+	mutex_unlock(&acpi_ioapic_lock);
 #endif
 
 	return irq;
@@ -659,7 +670,9 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
 static void acpi_unregister_gsi_ioapic(u32 gsi)
 {
 #ifdef CONFIG_X86_IO_APIC
+	mutex_lock(&acpi_ioapic_lock);
 	mp_unregister_gsi(gsi);
+	mutex_unlock(&acpi_ioapic_lock);
 #endif
 }
 
@@ -690,6 +703,7 @@ void acpi_unregister_gsi(u32 gsi)
 }
 EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
 
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __init acpi_set_irq_model_ioapic(void)
 {
 	acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
@@ -697,6 +711,7 @@ static void __init acpi_set_irq_model_ioapic(void)
 	__acpi_unregister_gsi = acpi_unregister_gsi_ioapic;
 	acpi_ioapic = 1;
 }
+#endif
 
 /*
  *  ACPI based hotplug support for CPU
@@ -759,20 +774,74 @@ EXPORT_SYMBOL(acpi_unmap_lsapic);
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
-}
+	int ret = -ENOSYS;
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	int ioapic_id;
+	u64 addr;
+	struct ioapic_domain_cfg cfg = {
+		.type = IOAPIC_DOMAIN_DYNAMIC,
+		.ops = &acpi_irqdomain_ops,
+	};
+
+	ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr);
+	if (ioapic_id < 0) {
+		unsigned long long uid;
+		acpi_status status;
 
+		status = acpi_evaluate_integer(handle, METHOD_NAME__UID,
+					       NULL, &uid);
+		if (ACPI_FAILURE(status)) {
+			acpi_handle_warn(handle, "failed to get IOAPIC ID.\n");
+			return -EINVAL;
+		}
+		ioapic_id = (int)uid;
+	}
+
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_register_ioapic(ioapic_id, phys_addr, gsi_base, &cfg);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
 EXPORT_SYMBOL(acpi_register_ioapic);
 
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
-}
+	int ret = -ENOSYS;
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_unregister_ioapic(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
+/**
+ * acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base
+ *			    has been registered
+ * @handle:	ACPI handle of the IOAPIC deivce
+ * @gsi_base:	GSI base associated with the IOAPIC
+ *
+ * Assume caller holds some type of lock to serialize acpi_ioapic_registered()
+ * with acpi_register_ioapic()/acpi_unregister_ioapic().
+ */
+int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base)
+{
+	int ret = 0;
+
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_ioapic_registered(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
+
 static int __init acpi_parse_sbf(struct acpi_table_header *table)
 {
 	struct acpi_table_boot *sb;
@@ -1185,7 +1254,9 @@ static void __init acpi_process_madt(void)
 			/*
 			 * Parse MADT IO-APIC entries
 			 */
+			mutex_lock(&acpi_ioapic_lock);
 			error = acpi_parse_madt_ioapic_entries();
+			mutex_unlock(&acpi_ioapic_lock);
 			if (!error) {
 				acpi_set_irq_model_ioapic();
 
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index dcb5b15..8bb12ddc 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,10 +2,12 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
 obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
+obj-$(CONFIG_PCI_MSI)		+= msi.o
+obj-$(CONFIG_HT_IRQ)		+= htirq.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba6cc04..29b5b18 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -196,7 +196,7 @@ static int disable_apic_timer __initdata;
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
-int first_system_vector = 0xfe;
+int first_system_vector = FIRST_SYSTEM_VECTOR;
 
 /*
  * Debug level, exported for io_apic.c
@@ -1930,7 +1930,7 @@ int __init APIC_init_uniprocessor(void)
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-static inline void __smp_spurious_interrupt(void)
+static inline void __smp_spurious_interrupt(u8 vector)
 {
 	u32 v;
 
@@ -1939,30 +1939,32 @@ static inline void __smp_spurious_interrupt(void)
 	 * if it is a vectored one.  Just in case...
 	 * Spurious interrupts should not be ACKed.
 	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
+	if (v & (1 << (vector & 0x1f)))
 		ack_APIC_irq();
 
 	inc_irq_stat(irq_spurious_count);
 
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	pr_info("spurious APIC interrupt on CPU#%d, "
-		"should never happen.\n", smp_processor_id());
+	pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
+		"should never happen.\n", vector, smp_processor_id());
 }
 
 __visible void smp_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
-	__smp_spurious_interrupt();
+	__smp_spurious_interrupt(~regs->orig_ax);
 	exiting_irq();
 }
 
 __visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
+	u8 vector = ~regs->orig_ax;
+
 	entering_irq();
-	trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
-	__smp_spurious_interrupt();
-	trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
+	trace_spurious_apic_entry(vector);
+	__smp_spurious_interrupt(vector);
+	trace_spurious_apic_exit(vector);
 	exiting_irq();
 }
 
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
new file mode 100644
index 0000000..816f36e
--- /dev/null
+++ b/arch/x86/kernel/apic/htirq.c
@@ -0,0 +1,107 @@
+/*
+ * Support Hypertransport IRQ
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/htirq.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/hypertransport.h>
+
+/*
+ * Hypertransport interrupt support
+ */
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	struct ht_irq_msg msg;
+
+	fetch_ht_irq_msg(irq, &msg);
+
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+	write_ht_irq_msg(irq, &msg);
+}
+
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	target_ht_irq(data->irq, dest, cfg->vector);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip ht_irq_chip = {
+	.name			= "PCI-HT",
+	.irq_mask		= mask_ht_irq,
+	.irq_unmask		= unmask_ht_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= ht_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+	struct irq_cfg *cfg;
+	struct ht_irq_msg msg;
+	unsigned dest;
+	int err;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+	msg.address_lo =
+		HT_IRQ_LOW_BASE |
+		HT_IRQ_LOW_DEST_ID(dest) |
+		HT_IRQ_LOW_VECTOR(cfg->vector) |
+		((apic->irq_dest_mode == 0) ?
+			HT_IRQ_LOW_DM_PHYSICAL :
+			HT_IRQ_LOW_DM_LOGICAL) |
+		HT_IRQ_LOW_RQEOI_EDGE |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			HT_IRQ_LOW_MT_FIXED :
+			HT_IRQ_LOW_MT_ARBITRATED) |
+		HT_IRQ_LOW_IRQ_MASKED;
+
+	write_ht_irq_msg(irq, &msg);
+
+	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+				      handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for HT\n", irq);
+
+	return 0;
+}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7ffe0a2..3f5f604 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -32,15 +32,11 @@
 #include <linux/module.h>
 #include <linux/syscore_ops.h>
 #include <linux/irqdomain.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>	/* time_after() */
 #include <linux/slab.h>
 #include <linux/bootmem.h>
-#include <linux/dmar.h>
-#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -52,17 +48,12 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
-#include <asm/hpet.h>
 #include <asm/hw_irq.h>
 
 #include <asm/apic.h>
 
-#define __apicdebuginit(type) static type __init
-
 #define	for_each_ioapic(idx)		\
 	for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
 #define	for_each_ioapic_reverse(idx)	\
@@ -74,7 +65,7 @@
 		for_each_pin((idx), (pin))
 
 #define for_each_irq_pin(entry, head) \
-	for (entry = head; entry; entry = entry->next)
+	list_for_each_entry(entry, &head, list)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -83,7 +74,6 @@
 int sis_apic_bug = -1;
 
 static DEFINE_RAW_SPINLOCK(ioapic_lock);
-static DEFINE_RAW_SPINLOCK(vector_lock);
 static DEFINE_MUTEX(ioapic_mutex);
 static unsigned int ioapic_dynirq_base;
 static int ioapic_initialized;
@@ -112,6 +102,7 @@ static struct ioapic {
 	struct ioapic_domain_cfg irqdomain_cfg;
 	struct irq_domain *irqdomain;
 	struct mp_pin_info *pin_info;
+	struct resource *iomem_res;
 } ioapics[MAX_IO_APICS];
 
 #define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
@@ -205,8 +196,6 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
-
 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
 {
@@ -228,8 +217,8 @@ void mp_save_irq(struct mpc_intsrc *m)
 }
 
 struct irq_pin_list {
+	struct list_head list;
 	int apic, pin;
-	struct irq_pin_list *next;
 };
 
 static struct irq_pin_list *alloc_irq_pin_list(int node)
@@ -237,7 +226,26 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
 	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
 }
 
-int __init arch_early_irq_init(void)
+static void alloc_ioapic_saved_registers(int idx)
+{
+	size_t size;
+
+	if (ioapics[idx].saved_registers)
+		return;
+
+	size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
+	ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
+	if (!ioapics[idx].saved_registers)
+		pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
+}
+
+static void free_ioapic_saved_registers(int idx)
+{
+	kfree(ioapics[idx].saved_registers);
+	ioapics[idx].saved_registers = NULL;
+}
+
+int __init arch_early_ioapic_init(void)
 {
 	struct irq_cfg *cfg;
 	int i, node = cpu_to_node(0);
@@ -245,13 +253,8 @@ int __init arch_early_irq_init(void)
 	if (!nr_legacy_irqs())
 		io_apic_irqs = ~0UL;
 
-	for_each_ioapic(i) {
-		ioapics[i].saved_registers =
-			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				ioapics[i].nr_registers, GFP_KERNEL);
-		if (!ioapics[i].saved_registers)
-			pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
-	}
+	for_each_ioapic(i)
+		alloc_ioapic_saved_registers(i);
 
 	/*
 	 * For legacy IRQ's, start with assigning irq0 to irq15 to
@@ -266,61 +269,6 @@ int __init arch_early_irq_init(void)
 	return 0;
 }
 
-static inline struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	return irq_get_chip_data(irq);
-}
-
-static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
-{
-	struct irq_cfg *cfg;
-
-	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
-	if (!cfg)
-		return NULL;
-	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
-		goto out_cfg;
-	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
-		goto out_domain;
-	return cfg;
-out_domain:
-	free_cpumask_var(cfg->domain);
-out_cfg:
-	kfree(cfg);
-	return NULL;
-}
-
-static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
-{
-	if (!cfg)
-		return;
-	irq_set_chip_data(at, NULL);
-	free_cpumask_var(cfg->domain);
-	free_cpumask_var(cfg->old_domain);
-	kfree(cfg);
-}
-
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
-{
-	int res = irq_alloc_desc_at(at, node);
-	struct irq_cfg *cfg;
-
-	if (res < 0) {
-		if (res != -EEXIST)
-			return NULL;
-		cfg = irq_cfg(at);
-		if (cfg)
-			return cfg;
-	}
-
-	cfg = alloc_irq_cfg(at, node);
-	if (cfg)
-		irq_set_chip_data(at, cfg);
-	else
-		irq_free_desc(at);
-	return cfg;
-}
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -445,15 +393,12 @@ static void ioapic_mask_entry(int apic, int pin)
  */
 static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	struct irq_pin_list **last, *entry;
+	struct irq_pin_list *entry;
 
 	/* don't allow duplicates */
-	last = &cfg->irq_2_pin;
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
+	for_each_irq_pin(entry, cfg->irq_2_pin)
 		if (entry->apic == apic && entry->pin == pin)
 			return 0;
-		last = &entry->next;
-	}
 
 	entry = alloc_irq_pin_list(node);
 	if (!entry) {
@@ -464,22 +409,19 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
 	entry->apic = apic;
 	entry->pin = pin;
 
-	*last = entry;
+	list_add_tail(&entry->list, &cfg->irq_2_pin);
 	return 0;
 }
 
 static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
 {
-	struct irq_pin_list **last, *entry;
+	struct irq_pin_list *tmp, *entry;
 
-	last = &cfg->irq_2_pin;
-	for_each_irq_pin(entry, cfg->irq_2_pin)
+	list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list)
 		if (entry->apic == apic && entry->pin == pin) {
-			*last = entry->next;
+			list_del(&entry->list);
 			kfree(entry);
 			return;
-		} else {
-			last = &entry->next;
 		}
 }
 
@@ -559,7 +501,7 @@ static void mask_ioapic(struct irq_cfg *cfg)
 
 static void mask_ioapic_irq(struct irq_data *data)
 {
-	mask_ioapic(data->chip_data);
+	mask_ioapic(irqd_cfg(data));
 }
 
 static void __unmask_ioapic(struct irq_cfg *cfg)
@@ -578,7 +520,7 @@ static void unmask_ioapic(struct irq_cfg *cfg)
 
 static void unmask_ioapic_irq(struct irq_data *data)
 {
-	unmask_ioapic(data->chip_data);
+	unmask_ioapic(irqd_cfg(data));
 }
 
 /*
@@ -1164,8 +1106,7 @@ void mp_unmap_irq(int irq)
  * Find a specific PCI IRQ entry.
  * Not an __init, possibly needed by modules
  */
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
-				struct io_apic_irq_attr *irq_attr)
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
 	int irq, i, best_ioapic = -1, best_idx = -1;
 
@@ -1219,195 +1160,11 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 		return -1;
 
 out:
-	irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
-			IOAPIC_MAP_ALLOC);
-	if (irq > 0)
-		set_io_apic_irq_attr(irq_attr, best_ioapic,
-				     mp_irqs[best_idx].dstirq,
-				     irq_trigger(best_idx),
-				     irq_polarity(best_idx));
-	return irq;
+	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
+			 IOAPIC_MAP_ALLOC);
 }
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
-void lock_vector_lock(void)
-{
-	/* Used to the online set of cpus does not change
-	 * during assign_irq_vector.
-	 */
-	raw_spin_lock(&vector_lock);
-}
-
-void unlock_vector_lock(void)
-{
-	raw_spin_unlock(&vector_lock);
-}
-
-static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
-	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, err;
-	cpumask_var_t tmp_mask;
-
-	if (cfg->move_in_progress)
-		return -EBUSY;
-
-	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
-		return -ENOMEM;
-
-	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
-	cpumask_clear(cfg->old_domain);
-	cpu = cpumask_first_and(mask, cpu_online_mask);
-	while (cpu < nr_cpu_ids) {
-		int new_cpu, vector, offset;
-
-		apic->vector_allocation_domain(cpu, tmp_mask, mask);
-
-		if (cpumask_subset(tmp_mask, cfg->domain)) {
-			err = 0;
-			if (cpumask_equal(tmp_mask, cfg->domain))
-				break;
-			/*
-			 * New cpumask using the vector is a proper subset of
-			 * the current in use mask. So cleanup the vector
-			 * allocation for the members that are not used anymore.
-			 */
-			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
-			cfg->move_in_progress =
-			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
-			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
-			break;
-		}
-
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 16;
-		if (vector >= first_system_vector) {
-			offset = (offset + 1) % 16;
-			vector = FIRST_EXTERNAL_VECTOR + offset;
-		}
-
-		if (unlikely(current_vector == vector)) {
-			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
-			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
-			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
-			continue;
-		}
-
-		if (test_bit(vector, used_vectors))
-			goto next;
-
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
-			if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED)
-				goto next;
-		}
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		if (cfg->vector) {
-			cpumask_copy(cfg->old_domain, cfg->domain);
-			cfg->move_in_progress =
-			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
-		}
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-		cfg->vector = vector;
-		cpumask_copy(cfg->domain, tmp_mask);
-		err = 0;
-		break;
-	}
-	free_cpumask_var(tmp_mask);
-	return err;
-}
-
-int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-	int err;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, cfg, mask);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
-}
-
-static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
-{
-	int cpu, vector;
-
-	BUG_ON(!cfg->vector);
-
-	vector = cfg->vector;
-	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
-		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-
-	cfg->vector = 0;
-	cpumask_clear(cfg->domain);
-
-	if (likely(!cfg->move_in_progress))
-		return;
-	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
-		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-			if (per_cpu(vector_irq, cpu)[vector] != irq)
-				continue;
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-			break;
-		}
-	}
-	cfg->move_in_progress = 0;
-}
-
-void __setup_vector_irq(int cpu)
-{
-	/* Initialize vector_irq on a new cpu */
-	int irq, vector;
-	struct irq_cfg *cfg;
-
-	/*
-	 * vector_lock will make sure that we don't run into irq vector
-	 * assignments that might be happening on another cpu in parallel,
-	 * while we setup our initial vector to irq mappings.
-	 */
-	raw_spin_lock(&vector_lock);
-	/* Mark the inuse vectors */
-	for_each_active_irq(irq) {
-		cfg = irq_cfg(irq);
-		if (!cfg)
-			continue;
-
-		if (!cpumask_test_cpu(cpu, cfg->domain))
-			continue;
-		vector = cfg->vector;
-		per_cpu(vector_irq, cpu)[vector] = irq;
-	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		irq = per_cpu(vector_irq, cpu)[vector];
-		if (irq <= VECTOR_UNDEFINED)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cpumask_test_cpu(cpu, cfg->domain))
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-	}
-	raw_spin_unlock(&vector_lock);
-}
-
 static struct irq_chip ioapic_chip;
 
 #ifdef CONFIG_X86_32
@@ -1496,7 +1253,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 					 &dest)) {
 		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
-		__clear_irq_vector(irq, cfg);
+		clear_irq_vector(irq, cfg);
 
 		return;
 	}
@@ -1510,7 +1267,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
 	if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
 		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
-		__clear_irq_vector(irq, cfg);
+		clear_irq_vector(irq, cfg);
 
 		return;
 	}
@@ -1641,7 +1398,7 @@ void ioapic_zap_locks(void)
 	raw_spin_lock_init(&ioapic_lock);
 }
 
-__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+static void __init print_IO_APIC(int ioapic_idx)
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
@@ -1698,7 +1455,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 	x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
 }
 
-__apicdebuginit(void) print_IO_APICs(void)
+void __init print_IO_APICs(void)
 {
 	int ioapic_idx;
 	struct irq_cfg *cfg;
@@ -1731,8 +1488,7 @@ __apicdebuginit(void) print_IO_APICs(void)
 		cfg = irq_cfg(irq);
 		if (!cfg)
 			continue;
-		entry = cfg->irq_2_pin;
-		if (!entry)
+		if (list_empty(&cfg->irq_2_pin))
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
 		for_each_irq_pin(entry, cfg->irq_2_pin)
@@ -1743,205 +1499,6 @@ __apicdebuginit(void) print_IO_APICs(void)
 	printk(KERN_INFO ".................................... done.\n");
 }
 
-__apicdebuginit(void) print_APIC_field(int base)
-{
-	int i;
-
-	printk(KERN_DEBUG);
-
-	for (i = 0; i < 8; i++)
-		pr_cont("%08x", apic_read(base + i*0x10));
-
-	pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-	unsigned int i, v, ver, maxlvt;
-	u64 icr;
-
-	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
-	v = apic_read(APIC_LVR);
-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-	ver = GET_APIC_VERSION(v);
-	maxlvt = lapic_get_maxlvt();
-
-	v = apic_read(APIC_TASKPRI);
-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		if (!APIC_XAPIC(ver)) {
-			v = apic_read(APIC_ARBPRI);
-			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			       v & APIC_ARBPRI_MASK);
-		}
-		v = apic_read(APIC_PROCPRI);
-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-	}
-
-	/*
-	 * Remote read supported only in the 82489DX and local APIC for
-	 * Pentium processors.
-	 */
-	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
-		v = apic_read(APIC_RRR);
-		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_LDR);
-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	if (!x2apic_enabled()) {
-		v = apic_read(APIC_DFR);
-		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-	}
-	v = apic_read(APIC_SPIV);
-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_field(APIC_ISR);
-	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_field(APIC_TMR);
-	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_field(APIC_IRR);
-
-	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-
-		v = apic_read(APIC_ESR);
-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-	}
-
-	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
-
-	v = apic_read(APIC_LVTT);
-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-	if (maxlvt > 3) {                       /* PC is LVT#4. */
-		v = apic_read(APIC_LVTPC);
-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-	}
-	v = apic_read(APIC_LVT0);
-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-	v = apic_read(APIC_LVT1);
-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-	if (maxlvt > 2) {			/* ERR is LVT#3. */
-		v = apic_read(APIC_LVTERR);
-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_TMICT);
-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-	v = apic_read(APIC_TMCCT);
-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-	v = apic_read(APIC_TDCR);
-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-
-	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
-		v = apic_read(APIC_EFEAT);
-		maxlvt = (v >> 16) & 0xff;
-		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
-		v = apic_read(APIC_ECTRL);
-		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
-		for (i = 0; i < maxlvt; i++) {
-			v = apic_read(APIC_EILVTn(i));
-			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
-		}
-	}
-	pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APICs(int maxcpu)
-{
-	int cpu;
-
-	if (!maxcpu)
-		return;
-
-	preempt_disable();
-	for_each_online_cpu(cpu) {
-		if (cpu >= maxcpu)
-			break;
-		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
-	}
-	preempt_enable();
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-	unsigned int v;
-	unsigned long flags;
-
-	if (!nr_legacy_irqs())
-		return;
-
-	printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-	raw_spin_lock_irqsave(&i8259A_lock, flags);
-
-	v = inb(0xa1) << 8 | inb(0x21);
-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-	v = inb(0xa0) << 8 | inb(0x20);
-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-	outb(0x0b,0xa0);
-	outb(0x0b,0x20);
-	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a,0xa0);
-	outb(0x0a,0x20);
-
-	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-	v = inb(0x4d1) << 8 | inb(0x4d0);
-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-static int __initdata show_lapic = 1;
-static __init int setup_show_lapic(char *arg)
-{
-	int num = -1;
-
-	if (strcmp(arg, "all") == 0) {
-		show_lapic = CONFIG_NR_CPUS;
-	} else {
-		get_option(&arg, &num);
-		if (num >= 0)
-			show_lapic = num;
-	}
-
-	return 1;
-}
-__setup("show_lapic=", setup_show_lapic);
-
-__apicdebuginit(int) print_ICs(void)
-{
-	if (apic_verbosity == APIC_QUIET)
-		return 0;
-
-	print_PIC();
-
-	/* don't print out if apic is not there */
-	if (!cpu_has_apic && !apic_from_smp_config())
-		return 0;
-
-	print_local_APICs(show_lapic);
-	print_IO_APICs();
-
-	return 0;
-}
-
-late_initcall(print_ICs);
-
-
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
@@ -2244,26 +1801,12 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
 		if (legacy_pic->irq_pending(irq))
 			was_pending = 1;
 	}
-	__unmask_ioapic(data->chip_data);
+	__unmask_ioapic(irqd_cfg(data));
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
 }
 
-static int ioapic_retrigger_irq(struct irq_data *data)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned long flags;
-	int cpu;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
-	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	return 1;
-}
-
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
  * so we use two separate IRQ descriptors. Edge triggered IRQs can be
@@ -2273,113 +1816,6 @@ static int ioapic_retrigger_irq(struct irq_data *data)
  * races.
  */
 
-#ifdef CONFIG_SMP
-void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
-{
-	unsigned vector, me;
-
-	ack_APIC_irq();
-	irq_enter();
-	exit_idle();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		int irq;
-		unsigned int irr;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __this_cpu_read(vector_irq[vector]);
-
-		if (irq <= VECTOR_UNDEFINED)
-			continue;
-
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cfg)
-			continue;
-
-		raw_spin_lock(&desc->lock);
-
-		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
-		 */
-		if (cfg->move_in_progress)
-			goto unlock;
-
-		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-			goto unlock;
-
-		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
-		/*
-		 * Check if the vector that needs to be cleanedup is
-		 * registered at the cpu's IRR. If so, then this is not
-		 * the best time to clean it up. Lets clean it up in the
-		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
-		 * to myself.
-		 */
-		if (irr  & (1 << (vector % 32))) {
-			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
-			goto unlock;
-		}
-		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
-unlock:
-		raw_spin_unlock(&desc->lock);
-	}
-
-	irq_exit();
-}
-
-static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
-{
-	unsigned me;
-
-	if (likely(!cfg->move_in_progress))
-		return;
-
-	me = smp_processor_id();
-
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-		send_cleanup_vector(cfg);
-}
-
-static void irq_complete_move(struct irq_cfg *cfg)
-{
-	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
-}
-
-void irq_force_complete_move(int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	if (!cfg)
-		return;
-
-	__irq_complete_move(cfg, cfg->vector);
-}
-#else
-static inline void irq_complete_move(struct irq_cfg *cfg) { }
-#endif
-
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -2400,41 +1836,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	}
 }
 
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
-			  unsigned int *dest_id)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int irq = data->irq;
-	int err;
-
-	if (!config_enabled(CONFIG_SMP))
-		return -EPERM;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return -EINVAL;
-
-	err = assign_irq_vector(irq, cfg, mask);
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
-	if (err) {
-		if (assign_irq_vector(irq, cfg, data->affinity))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	cpumask_copy(data->affinity, mask);
-
-	return 0;
-}
-
-
 int native_ioapic_set_affinity(struct irq_data *data,
 			       const struct cpumask *mask,
 			       bool force)
@@ -2447,24 +1848,17 @@ int native_ioapic_set_affinity(struct irq_data *data,
 		return -EPERM;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	ret = __ioapic_set_affinity(data, mask, &dest);
+	ret = apic_set_affinity(data, mask, &dest);
 	if (!ret) {
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, data->chip_data);
+		__target_IO_APIC_irq(irq, dest, irqd_cfg(data));
 		ret = IRQ_SET_MASK_OK_NOCOPY;
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return ret;
 }
 
-static void ack_apic_edge(struct irq_data *data)
-{
-	irq_complete_move(data->chip_data);
-	irq_move_irq(data);
-	ack_APIC_irq();
-}
-
 atomic_t irq_mis_count;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -2547,9 +1941,9 @@ static inline void ioapic_irqd_unmask(struct irq_data *data,
 }
 #endif
 
-static void ack_apic_level(struct irq_data *data)
+static void ack_ioapic_level(struct irq_data *data)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	int i, irq = data->irq;
 	unsigned long v;
 	bool masked;
@@ -2619,10 +2013,10 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_startup		= startup_ioapic_irq,
 	.irq_mask		= mask_ioapic_irq,
 	.irq_unmask		= unmask_ioapic_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_eoi		= ack_apic_level,
+	.irq_ack		= apic_ack_edge,
+	.irq_eoi		= ack_ioapic_level,
 	.irq_set_affinity	= native_ioapic_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
+	.irq_retrigger		= apic_retrigger_irq,
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -2965,6 +2359,16 @@ static int mp_irqdomain_create(int ioapic)
 	return 0;
 }
 
+static void ioapic_destroy_irqdomain(int idx)
+{
+	if (ioapics[idx].irqdomain) {
+		irq_domain_remove(ioapics[idx].irqdomain);
+		ioapics[idx].irqdomain = NULL;
+	}
+	kfree(ioapics[idx].pin_info);
+	ioapics[idx].pin_info = NULL;
+}
+
 void __init setup_IO_APIC(void)
 {
 	int ioapic;
@@ -3044,399 +2448,6 @@ static int __init ioapic_init_ops(void)
 
 device_initcall(ioapic_init_ops);
 
-/*
- * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
- */
-int arch_setup_hwirq(unsigned int irq, int node)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	int ret;
-
-	cfg = alloc_irq_cfg(irq, node);
-	if (!cfg)
-		return -ENOMEM;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (!ret)
-		irq_set_chip_data(irq, cfg);
-	else
-		free_irq_cfg(irq, cfg);
-	return ret;
-}
-
-void arch_teardown_hwirq(unsigned int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned long flags;
-
-	free_remapped_irq(irq);
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq, cfg);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	free_irq_cfg(irq, cfg);
-}
-
-/*
- * MSI message composition
- */
-void native_compose_msi_msg(struct pci_dev *pdev,
-			    unsigned int irq, unsigned int dest,
-			    struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	msg->address_hi = MSI_ADDR_BASE_HI;
-
-	if (x2apic_enabled())
-		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
-
-	msg->address_lo =
-		MSI_ADDR_BASE_LO |
-		((apic->irq_dest_mode == 0) ?
-			MSI_ADDR_DEST_MODE_PHYSICAL:
-			MSI_ADDR_DEST_MODE_LOGICAL) |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_ADDR_REDIRECTION_CPU:
-			MSI_ADDR_REDIRECTION_LOWPRI) |
-		MSI_ADDR_DEST_ID(dest);
-
-	msg->data =
-		MSI_DATA_TRIGGER_EDGE |
-		MSI_DATA_LEVEL_ASSERT |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_DATA_DELIVERY_FIXED:
-			MSI_DATA_DELIVERY_LOWPRI) |
-		MSI_DATA_VECTOR(cfg->vector);
-}
-
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
-			   struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg;
-	int err;
-	unsigned dest;
-
-	if (disable_apic)
-		return -ENXIO;
-
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
-
-	return 0;
-}
-
-static int
-msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	struct msi_msg msg;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	__get_cached_msi_msg(data->msi_desc, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	__pci_write_msi_msg(data->msi_desc, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-	.name			= "PCI-MSI",
-	.irq_unmask		= pci_msi_unmask_irq,
-	.irq_mask		= pci_msi_mask_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= msi_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-		  unsigned int irq_base, unsigned int irq_offset)
-{
-	struct irq_chip *chip = &msi_chip;
-	struct msi_msg msg;
-	unsigned int irq = irq_base + irq_offset;
-	int ret;
-
-	ret = msi_compose_msg(dev, irq, &msg, -1);
-	if (ret < 0)
-		return ret;
-
-	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
-
-	/*
-	 * MSI-X message is written per-IRQ, the offset is always 0.
-	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
-	 */
-	if (!irq_offset)
-		pci_write_msi_msg(irq, &msg);
-
-	setup_remapped_irq(irq, irq_cfg(irq), chip);
-
-	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
-
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
-
-	return 0;
-}
-
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	struct msi_desc *msidesc;
-	unsigned int irq;
-	int node, ret;
-
-	/* Multiple MSI vectors only supported with interrupt remapping */
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-
-	node = dev_to_node(&dev->dev);
-
-	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = irq_alloc_hwirq(node);
-		if (!irq)
-			return -ENOSPC;
-
-		ret = setup_msi_irq(dev, msidesc, irq, 0);
-		if (ret < 0) {
-			irq_free_hwirq(irq);
-			return ret;
-		}
-
-	}
-	return 0;
-}
-
-void native_teardown_msi_irq(unsigned int irq)
-{
-	irq_free_hwirq(irq);
-}
-
-#ifdef CONFIG_DMAR_TABLE
-static int
-dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
-		      bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest, irq = data->irq;
-	struct msi_msg msg;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	dmar_msi_read(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
-
-	dmar_msi_write(irq, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip dmar_msi_type = {
-	.name			= "DMAR_MSI",
-	.irq_unmask		= dmar_msi_unmask,
-	.irq_mask		= dmar_msi_mask,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= dmar_msi_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(NULL, irq, &msg, -1);
-	if (ret < 0)
-		return ret;
-	dmar_msi_write(irq, &msg);
-	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-				      "edge");
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_HPET_TIMER
-
-static int hpet_msi_set_affinity(struct irq_data *data,
-				 const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	struct msi_msg msg;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	hpet_msi_read(data->handler_data, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	hpet_msi_write(data->handler_data, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip hpet_msi_type = {
-	.name = "HPET_MSI",
-	.irq_unmask = hpet_msi_unmask,
-	.irq_mask = hpet_msi_mask,
-	.irq_ack = ack_apic_edge,
-	.irq_set_affinity = hpet_msi_set_affinity,
-	.irq_retrigger = ioapic_retrigger_irq,
-	.flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-int default_setup_hpet_msi(unsigned int irq, unsigned int id)
-{
-	struct irq_chip *chip = &hpet_msi_type;
-	struct msi_msg msg;
-	int ret;
-
-	ret = msi_compose_msg(NULL, irq, &msg, id);
-	if (ret < 0)
-		return ret;
-
-	hpet_msi_write(irq_get_handler_data(irq), &msg);
-	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-	setup_remapped_irq(irq, irq_cfg(irq), chip);
-
-	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
-	return 0;
-}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	struct ht_irq_msg msg;
-	fetch_ht_irq_msg(irq, &msg);
-
-	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-	write_ht_irq_msg(irq, &msg);
-}
-
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	target_ht_irq(data->irq, dest, cfg->vector);
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip ht_irq_chip = {
-	.name			= "PCI-HT",
-	.irq_mask		= mask_ht_irq,
-	.irq_unmask		= unmask_ht_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= ht_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-	struct irq_cfg *cfg;
-	struct ht_irq_msg msg;
-	unsigned dest;
-	int err;
-
-	if (disable_apic)
-		return -ENXIO;
-
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-	msg.address_lo =
-		HT_IRQ_LOW_BASE |
-		HT_IRQ_LOW_DEST_ID(dest) |
-		HT_IRQ_LOW_VECTOR(cfg->vector) |
-		((apic->irq_dest_mode == 0) ?
-			HT_IRQ_LOW_DM_PHYSICAL :
-			HT_IRQ_LOW_DM_LOGICAL) |
-		HT_IRQ_LOW_RQEOI_EDGE |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			HT_IRQ_LOW_MT_FIXED :
-			HT_IRQ_LOW_MT_ARBITRATED) |
-		HT_IRQ_LOW_IRQ_MASKED;
-
-	write_ht_irq_msg(irq, &msg);
-
-	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
-				      handle_edge_irq, "edge");
-
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
-
-	return 0;
-}
-#endif /* CONFIG_HT_IRQ */
-
 static int
 io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 {
@@ -3451,7 +2462,7 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 	return ret;
 }
 
-static int __init io_apic_get_redir_entries(int ioapic)
+static int io_apic_get_redir_entries(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3476,28 +2487,8 @@ unsigned int arch_dynirq_lower_bound(unsigned int from)
 	return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
 }
 
-int __init arch_probe_nr_irqs(void)
-{
-	int nr;
-
-	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
-		nr_irqs = NR_VECTORS * nr_cpu_ids;
-
-	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
-	/*
-	 * for MSI and HT dyn irq
-	 */
-	nr += gsi_top * 16;
-#endif
-	if (nr < nr_irqs)
-		nr_irqs = nr;
-
-	return 0;
-}
-
 #ifdef CONFIG_X86_32
-static int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int io_apic_get_unique_id(int ioapic, int apic_id)
 {
 	union IO_APIC_reg_00 reg_00;
 	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3572,30 +2563,63 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id)
 	return apic_id;
 }
 
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
 {
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
 	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return io_apic_get_unique_id(nr_ioapics, id);
+		return io_apic_get_unique_id(idx, id);
 	else
 		return id;
 }
 #else
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
 {
-	int i;
+	union IO_APIC_reg_00 reg_00;
 	DECLARE_BITMAP(used, 256);
+	unsigned long flags;
+	u8 new_id;
+	int i;
 
 	bitmap_zero(used, 256);
 	for_each_ioapic(i)
 		__set_bit(mpc_ioapic_id(i), used);
+
+	/* Hand out the requested id if available */
 	if (!test_bit(id, used))
 		return id;
-	return find_first_zero_bit(used, 256);
+
+	/*
+	 * Read the current id from the ioapic and keep it if
+	 * available.
+	 */
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	new_id = reg_00.bits.ID;
+	if (!test_bit(new_id, used)) {
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
+			 idx, new_id, id);
+		return new_id;
+	}
+
+	/*
+	 * Get the next free id and write it to the ioapic.
+	 */
+	new_id = find_first_zero_bit(used, 256);
+	reg_00.bits.ID = new_id;
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(idx, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	/* Sanity check */
+	BUG_ON(reg_00.bits.ID != new_id);
+
+	return new_id;
 }
 #endif
 
-static int __init io_apic_get_version(int ioapic)
+static int io_apic_get_version(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3702,6 +2726,7 @@ static struct resource * __init ioapic_setup_resources(void)
 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
 		num++;
+		ioapics[i].iomem_res = res;
 	}
 
 	ioapic_resources = res;
@@ -3799,21 +2824,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
 	return gsi - gsi_cfg->gsi_base;
 }
 
-static __init int bad_ioapic(unsigned long address)
-{
-	if (nr_ioapics >= MAX_IO_APICS) {
-		pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
-			MAX_IO_APICS, nr_ioapics);
-		return 1;
-	}
-	if (!address) {
-		pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
-		return 1;
-	}
-	return 0;
-}
-
-static __init int bad_ioapic_register(int idx)
+static int bad_ioapic_register(int idx)
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
@@ -3832,32 +2843,61 @@ static __init int bad_ioapic_register(int idx)
 	return 0;
 }
 
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
-			       struct ioapic_domain_cfg *cfg)
+static int find_free_ioapic_entry(void)
 {
-	int idx = 0;
-	int entries;
+	int idx;
+
+	for (idx = 0; idx < MAX_IO_APICS; idx++)
+		if (ioapics[idx].nr_registers == 0)
+			return idx;
+
+	return MAX_IO_APICS;
+}
+
+/**
+ * mp_register_ioapic - Register an IOAPIC device
+ * @id:		hardware IOAPIC ID
+ * @address:	physical address of IOAPIC register area
+ * @gsi_base:	base of GSI associated with the IOAPIC
+ * @cfg:	configuration information for the IOAPIC
+ */
+int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+		       struct ioapic_domain_cfg *cfg)
+{
+	bool hotplug = !!ioapic_initialized;
 	struct mp_ioapic_gsi *gsi_cfg;
+	int idx, ioapic, entries;
+	u32 gsi_end;
 
-	if (bad_ioapic(address))
-		return;
+	if (!address) {
+		pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
+		return -EINVAL;
+	}
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].mp_config.apicaddr == address) {
+			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
+				address, ioapic);
+			return -EEXIST;
+		}
 
-	idx = nr_ioapics;
+	idx = find_free_ioapic_entry();
+	if (idx >= MAX_IO_APICS) {
+		pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+			MAX_IO_APICS, idx);
+		return -ENOSPC;
+	}
 
 	ioapics[idx].mp_config.type = MP_IOAPIC;
 	ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
 	ioapics[idx].mp_config.apicaddr = address;
-	ioapics[idx].irqdomain = NULL;
-	ioapics[idx].irqdomain_cfg = *cfg;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-
 	if (bad_ioapic_register(idx)) {
 		clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
-		return;
+		return -ENODEV;
 	}
 
-	ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
+	ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
 	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
 
 	/*
@@ -3865,24 +2905,112 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
 	entries = io_apic_get_redir_entries(idx);
+	gsi_end = gsi_base + entries - 1;
+	for_each_ioapic(ioapic) {
+		gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+		if ((gsi_base >= gsi_cfg->gsi_base &&
+		     gsi_base <= gsi_cfg->gsi_end) ||
+		    (gsi_end >= gsi_cfg->gsi_base &&
+		     gsi_end <= gsi_cfg->gsi_end)) {
+			pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
+				gsi_base, gsi_end,
+				gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOSPC;
+		}
+	}
 	gsi_cfg = mp_ioapic_gsi_routing(idx);
 	gsi_cfg->gsi_base = gsi_base;
-	gsi_cfg->gsi_end = gsi_base + entries - 1;
+	gsi_cfg->gsi_end = gsi_end;
+
+	ioapics[idx].irqdomain = NULL;
+	ioapics[idx].irqdomain_cfg = *cfg;
 
 	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
+	 * If mp_register_ioapic() is called during early boot stage when
+	 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
+	 * we are still using bootmem allocator. So delay it to setup_IO_APIC().
 	 */
-	ioapics[idx].nr_registers = entries;
+	if (hotplug) {
+		if (mp_irqdomain_create(idx)) {
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOMEM;
+		}
+		alloc_ioapic_saved_registers(idx);
+	}
 
 	if (gsi_cfg->gsi_end >= gsi_top)
 		gsi_top = gsi_cfg->gsi_end + 1;
+	if (nr_ioapics <= idx)
+		nr_ioapics = idx + 1;
+
+	/* Set nr_registers to mark entry present */
+	ioapics[idx].nr_registers = entries;
 
 	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
 		idx, mpc_ioapic_id(idx),
 		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
 		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
 
-	nr_ioapics++;
+	return 0;
+}
+
+int mp_unregister_ioapic(u32 gsi_base)
+{
+	int ioapic, pin;
+	int found = 0;
+	struct mp_pin_info *pin_info;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
+			found = 1;
+			break;
+		}
+	if (!found) {
+		pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
+		return -ENODEV;
+	}
+
+	for_each_pin(ioapic, pin) {
+		pin_info = mp_pin_info(ioapic, pin);
+		if (pin_info->count) {
+			pr_warn("pin%d on IOAPIC%d is still in use.\n",
+				pin, ioapic);
+			return -EBUSY;
+		}
+	}
+
+	/* Mark entry not present */
+	ioapics[ioapic].nr_registers  = 0;
+	ioapic_destroy_irqdomain(ioapic);
+	free_ioapic_saved_registers(ioapic);
+	if (ioapics[ioapic].iomem_res)
+		release_resource(ioapics[ioapic].iomem_res);
+	clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
+	memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
+
+	return 0;
+}
+
+int mp_ioapic_registered(u32 gsi_base)
+{
+	int ioapic;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
+			return 1;
+
+	return 0;
+}
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+					int ioapic, int ioapic_pin,
+					int trigger, int polarity)
+{
+	irq_attr->ioapic	= ioapic;
+	irq_attr->ioapic_pin	= ioapic_pin;
+	irq_attr->trigger	= trigger;
+	irq_attr->polarity	= polarity;
 }
 
 int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
@@ -3931,7 +3059,7 @@ void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq)
 
 	ioapic_mask_entry(ioapic, pin);
 	__remove_pin_from_irq(cfg, ioapic, pin);
-	WARN_ON(cfg->irq_2_pin != NULL);
+	WARN_ON(!list_empty(&cfg->irq_2_pin));
 	arch_teardown_hwirq(virq);
 }
 
@@ -3964,18 +3092,6 @@ int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
 	return ret;
 }
 
-bool mp_should_keep_irq(struct device *dev)
-{
-	if (dev->power.is_prepared)
-		return true;
-#ifdef	CONFIG_PM_RUNTIME
-	if (dev->power.runtime_status == RPM_SUSPENDING)
-		return true;
-#endif
-
-	return false;
-}
-
 /* Enable IOAPIC early just for system timer */
 void __init pre_init_apic_IRQ0(void)
 {
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
new file mode 100644
index 0000000..d6ba2d6
--- /dev/null
+++ b/arch/x86/kernel/apic/msi.c
@@ -0,0 +1,286 @@
+/*
+ * Support of MSI, HPET and DMAR interrupts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/hpet.h>
+#include <linux/msi.h>
+#include <asm/msidef.h>
+#include <asm/hpet.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/irq_remapping.h>
+
+void native_compose_msi_msg(struct pci_dev *pdev,
+			    unsigned int irq, unsigned int dest,
+			    struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+
+	if (x2apic_enabled())
+		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
+
+	msg->address_lo =
+		MSI_ADDR_BASE_LO |
+		((apic->irq_dest_mode == 0) ?
+			MSI_ADDR_DEST_MODE_PHYSICAL :
+			MSI_ADDR_DEST_MODE_LOGICAL) |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_ADDR_REDIRECTION_CPU :
+			MSI_ADDR_REDIRECTION_LOWPRI) |
+		MSI_ADDR_DEST_ID(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_DATA_DELIVERY_FIXED :
+			MSI_DATA_DELIVERY_LOWPRI) |
+		MSI_DATA_VECTOR(cfg->vector);
+}
+
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+			   struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg;
+	int err;
+	unsigned dest;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+
+	return 0;
+}
+
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	__get_cached_msi_msg(data->msi_desc, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	__pci_write_msi_msg(data->msi_desc, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_unmask		= pci_msi_unmask_irq,
+	.irq_mask		= pci_msi_mask_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset)
+{
+	struct irq_chip *chip = &msi_chip;
+	struct msi_msg msg;
+	unsigned int irq = irq_base + irq_offset;
+	int ret;
+
+	ret = msi_compose_msg(dev, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+
+	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+
+	/*
+	 * MSI-X message is written per-IRQ, the offset is always 0.
+	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+	 */
+	if (!irq_offset)
+		pci_write_msi_msg(irq, &msg);
+
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
+	return 0;
+}
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_desc *msidesc;
+	unsigned int irq;
+	int node, ret;
+
+	/* Multiple MSI vectors only supported with interrupt remapping */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
+	node = dev_to_node(&dev->dev);
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = irq_alloc_hwirq(node);
+		if (!irq)
+			return -ENOSPC;
+
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0) {
+			irq_free_hwirq(irq);
+			return ret;
+		}
+
+	}
+	return 0;
+}
+
+void native_teardown_msi_irq(unsigned int irq)
+{
+	irq_free_hwirq(irq);
+}
+
+#ifdef CONFIG_DMAR_TABLE
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest, irq = data->irq;
+	struct msi_msg msg;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
+
+	dmar_msi_write(irq, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip dmar_msi_type = {
+	.name			= "DMAR_MSI",
+	.irq_unmask		= dmar_msi_unmask,
+	.irq_mask		= dmar_msi_mask,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= dmar_msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
+	return 0;
+}
+#endif
+
+/*
+ * MSI message composition
+ */
+#ifdef CONFIG_HPET_TIMER
+
+static int hpet_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	hpet_msi_read(data->handler_data, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	hpet_msi_write(data->handler_data, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip hpet_msi_type = {
+	.name = "HPET_MSI",
+	.irq_unmask = hpet_msi_unmask,
+	.irq_mask = hpet_msi_mask,
+	.irq_ack = apic_ack_edge,
+	.irq_set_affinity = hpet_msi_set_affinity,
+	.irq_retrigger = apic_retrigger_irq,
+	.flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+{
+	struct irq_chip *chip = &hpet_msi_type;
+	struct msi_msg msg;
+	int ret;
+
+	ret = msi_compose_msg(NULL, irq, &msg, id);
+	if (ret < 0)
+		return ret;
+
+	hpet_msi_write(irq_get_handler_data(irq), &msg);
+	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+	return 0;
+}
+#endif
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
new file mode 100644
index 0000000..6cedd79
--- /dev/null
+++ b/arch/x86/kernel/apic/vector.c
@@ -0,0 +1,719 @@
+/*
+ * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+#include <asm/desc.h>
+#include <asm/irq_remapping.h>
+
+static DEFINE_RAW_SPINLOCK(vector_lock);
+
+void lock_vector_lock(void)
+{
+	/* Used to the online set of cpus does not change
+	 * during assign_irq_vector.
+	 */
+	raw_spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+	raw_spin_unlock(&vector_lock);
+}
+
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq_get_chip_data(irq);
+}
+
+struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
+{
+	return irq_data->chip_data;
+}
+
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+
+	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+	if (!cfg)
+		return NULL;
+	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+		goto out_cfg;
+	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+		goto out_domain;
+#ifdef	CONFIG_X86_IO_APIC
+	INIT_LIST_HEAD(&cfg->irq_2_pin);
+#endif
+	return cfg;
+out_domain:
+	free_cpumask_var(cfg->domain);
+out_cfg:
+	kfree(cfg);
+	return NULL;
+}
+
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
+{
+	int res = irq_alloc_desc_at(at, node);
+	struct irq_cfg *cfg;
+
+	if (res < 0) {
+		if (res != -EEXIST)
+			return NULL;
+		cfg = irq_cfg(at);
+		if (cfg)
+			return cfg;
+	}
+
+	cfg = alloc_irq_cfg(at, node);
+	if (cfg)
+		irq_set_chip_data(at, cfg);
+	else
+		irq_free_desc(at);
+	return cfg;
+}
+
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
+{
+	if (!cfg)
+		return;
+	irq_set_chip_data(at, NULL);
+	free_cpumask_var(cfg->domain);
+	free_cpumask_var(cfg->old_domain);
+	kfree(cfg);
+}
+
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+	static int current_offset = VECTOR_OFFSET_START % 16;
+	int cpu, err;
+	cpumask_var_t tmp_mask;
+
+	if (cfg->move_in_progress)
+		return -EBUSY;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	/* Only try and allocate irqs on cpus that are present */
+	err = -ENOSPC;
+	cpumask_clear(cfg->old_domain);
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	while (cpu < nr_cpu_ids) {
+		int new_cpu, vector, offset;
+
+		apic->vector_allocation_domain(cpu, tmp_mask, mask);
+
+		if (cpumask_subset(tmp_mask, cfg->domain)) {
+			err = 0;
+			if (cpumask_equal(tmp_mask, cfg->domain))
+				break;
+			/*
+			 * New cpumask using the vector is a proper subset of
+			 * the current in use mask. So cleanup the vector
+			 * allocation for the members that are not used anymore.
+			 */
+			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+			break;
+		}
+
+		vector = current_vector;
+		offset = current_offset;
+next:
+		vector += 16;
+		if (vector >= first_system_vector) {
+			offset = (offset + 1) % 16;
+			vector = FIRST_EXTERNAL_VECTOR + offset;
+		}
+
+		if (unlikely(current_vector == vector)) {
+			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
+			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
+			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
+			continue;
+		}
+
+		if (test_bit(vector, used_vectors))
+			goto next;
+
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
+			if (per_cpu(vector_irq, new_cpu)[vector] >
+			    VECTOR_UNDEFINED)
+				goto next;
+		}
+		/* Found one! */
+		current_vector = vector;
+		current_offset = offset;
+		if (cfg->vector) {
+			cpumask_copy(cfg->old_domain, cfg->domain);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+		}
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+		cfg->vector = vector;
+		cpumask_copy(cfg->domain, tmp_mask);
+		err = 0;
+		break;
+	}
+	free_cpumask_var(tmp_mask);
+
+	return err;
+}
+
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	int err;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	err = __assign_irq_vector(irq, cfg, mask);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return err;
+}
+
+void clear_irq_vector(int irq, struct irq_cfg *cfg)
+{
+	int cpu, vector;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	BUG_ON(!cfg->vector);
+
+	vector = cfg->vector;
+	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+
+	cfg->vector = 0;
+	cpumask_clear(cfg->domain);
+
+	if (likely(!cfg->move_in_progress)) {
+		raw_spin_unlock_irqrestore(&vector_lock, flags);
+		return;
+	}
+
+	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+		     vector++) {
+			if (per_cpu(vector_irq, cpu)[vector] != irq)
+				continue;
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+			break;
+		}
+	}
+	cfg->move_in_progress = 0;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+	int nr;
+
+	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+		nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+	/*
+	 * for MSI and HT dyn irq
+	 */
+	if (gsi_top <= NR_IRQS_LEGACY)
+		nr +=  8 * nr_cpu_ids;
+	else
+		nr += gsi_top * 16;
+#endif
+	if (nr < nr_irqs)
+		nr_irqs = nr;
+
+	return nr_legacy_irqs();
+}
+
+int __init arch_early_irq_init(void)
+{
+	return arch_early_ioapic_init();
+}
+
+static void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	int irq, vector;
+	struct irq_cfg *cfg;
+
+	/*
+	 * vector_lock will make sure that we don't run into irq vector
+	 * assignments that might be happening on another cpu in parallel,
+	 * while we setup our initial vector to irq mappings.
+	 */
+	raw_spin_lock(&vector_lock);
+	/* Mark the inuse vectors */
+	for_each_active_irq(irq) {
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			continue;
+		vector = cfg->vector;
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+	}
+	raw_spin_unlock(&vector_lock);
+}
+
+/*
+ * Setup the vector to irq mappings.
+ */
+void setup_vector_irq(int cpu)
+{
+	int irq;
+
+	/*
+	 * On most of the platforms, legacy PIC delivers the interrupts on the
+	 * boot cpu. But there are certain platforms where PIC interrupts are
+	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
+	 * legacy PIC, for the new cpu that is coming online, setup the static
+	 * legacy vector to irq mapping:
+	 */
+	for (irq = 0; irq < nr_legacy_irqs(); irq++)
+		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
+
+	__setup_vector_irq(cpu);
+}
+
+int apic_retrigger_irq(struct irq_data *data)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned long flags;
+	int cpu;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
+	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	return 1;
+}
+
+void apic_ack_edge(struct irq_data *data)
+{
+	irq_complete_move(irqd_cfg(data));
+	irq_move_irq(data);
+	ack_APIC_irq();
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      unsigned int *dest_id)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int irq = data->irq;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -EPERM;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i),
+					    IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+
+	ack_APIC_irq();
+	irq_enter();
+	exit_idle();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		int irq;
+		unsigned int irr;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+
+		irq = __this_cpu_read(vector_irq[vector]);
+
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		raw_spin_lock(&desc->lock);
+
+		/*
+		 * Check if the irq migration is in progress. If so, we
+		 * haven't received the cleanup request yet for this irq.
+		 */
+		if (cfg->move_in_progress)
+			goto unlock;
+
+		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+			goto unlock;
+
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		/*
+		 * Check if the vector that needs to be cleanedup is
+		 * registered at the cpu's IRR. If so, then this is not
+		 * the best time to clean it up. Lets clean it up in the
+		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+		 * to myself.
+		 */
+		if (irr  & (1 << (vector % 32))) {
+			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+			goto unlock;
+		}
+		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
+unlock:
+		raw_spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
+{
+	unsigned me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	me = smp_processor_id();
+
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		send_cleanup_vector(cfg);
+}
+
+void irq_complete_move(struct irq_cfg *cfg)
+{
+	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	if (!cfg)
+		return;
+
+	__irq_complete_move(cfg, cfg->vector);
+}
+#endif
+
+/*
+ * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
+ */
+int arch_setup_hwirq(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	int ret;
+
+	cfg = alloc_irq_cfg(irq, node);
+	if (!cfg)
+		return -ENOMEM;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	if (!ret)
+		irq_set_chip_data(irq, cfg);
+	else
+		free_irq_cfg(irq, cfg);
+	return ret;
+}
+
+void arch_teardown_hwirq(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	free_remapped_irq(irq);
+	clear_irq_vector(irq, cfg);
+	free_irq_cfg(irq, cfg);
+}
+
+static void __init print_APIC_field(int base)
+{
+	int i;
+
+	printk(KERN_DEBUG);
+
+	for (i = 0; i < 8; i++)
+		pr_cont("%08x", apic_read(base + i*0x10));
+
+	pr_cont("\n");
+}
+
+static void __init print_local_APIC(void *dummy)
+{
+	unsigned int i, v, ver, maxlvt;
+	u64 icr;
+
+	pr_debug("printing local APIC contents on CPU#%d/%d:\n",
+		 smp_processor_id(), hard_smp_processor_id());
+	v = apic_read(APIC_ID);
+	pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+	v = apic_read(APIC_LVR);
+	pr_info("... APIC VERSION: %08x\n", v);
+	ver = GET_APIC_VERSION(v);
+	maxlvt = lapic_get_maxlvt();
+
+	v = apic_read(APIC_TASKPRI);
+	pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		if (!APIC_XAPIC(ver)) {
+			v = apic_read(APIC_ARBPRI);
+			pr_debug("... APIC ARBPRI: %08x (%02x)\n",
+				 v, v & APIC_ARBPRI_MASK);
+		}
+		v = apic_read(APIC_PROCPRI);
+		pr_debug("... APIC PROCPRI: %08x\n", v);
+	}
+
+	/*
+	 * Remote read supported only in the 82489DX and local APIC for
+	 * Pentium processors.
+	 */
+	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+		v = apic_read(APIC_RRR);
+		pr_debug("... APIC RRR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_LDR);
+	pr_debug("... APIC LDR: %08x\n", v);
+	if (!x2apic_enabled()) {
+		v = apic_read(APIC_DFR);
+		pr_debug("... APIC DFR: %08x\n", v);
+	}
+	v = apic_read(APIC_SPIV);
+	pr_debug("... APIC SPIV: %08x\n", v);
+
+	pr_debug("... APIC ISR field:\n");
+	print_APIC_field(APIC_ISR);
+	pr_debug("... APIC TMR field:\n");
+	print_APIC_field(APIC_TMR);
+	pr_debug("... APIC IRR field:\n");
+	print_APIC_field(APIC_IRR);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		/* Due to the Pentium erratum 3AP. */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		pr_debug("... APIC ESR: %08x\n", v);
+	}
+
+	icr = apic_icr_read();
+	pr_debug("... APIC ICR: %08x\n", (u32)icr);
+	pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
+
+	v = apic_read(APIC_LVTT);
+	pr_debug("... APIC LVTT: %08x\n", v);
+
+	if (maxlvt > 3) {
+		/* PC is LVT#4. */
+		v = apic_read(APIC_LVTPC);
+		pr_debug("... APIC LVTPC: %08x\n", v);
+	}
+	v = apic_read(APIC_LVT0);
+	pr_debug("... APIC LVT0: %08x\n", v);
+	v = apic_read(APIC_LVT1);
+	pr_debug("... APIC LVT1: %08x\n", v);
+
+	if (maxlvt > 2) {
+		/* ERR is LVT#3. */
+		v = apic_read(APIC_LVTERR);
+		pr_debug("... APIC LVTERR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_TMICT);
+	pr_debug("... APIC TMICT: %08x\n", v);
+	v = apic_read(APIC_TMCCT);
+	pr_debug("... APIC TMCCT: %08x\n", v);
+	v = apic_read(APIC_TDCR);
+	pr_debug("... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		pr_debug("... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		pr_debug("... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			pr_debug("... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
+	pr_cont("\n");
+}
+
+static void __init print_local_APICs(int maxcpu)
+{
+	int cpu;
+
+	if (!maxcpu)
+		return;
+
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu >= maxcpu)
+			break;
+		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	}
+	preempt_enable();
+}
+
+static void __init print_PIC(void)
+{
+	unsigned int v;
+	unsigned long flags;
+
+	if (!nr_legacy_irqs())
+		return;
+
+	pr_debug("\nprinting PIC contents\n");
+
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
+	v = inb(0xa1) << 8 | inb(0x21);
+	pr_debug("... PIC  IMR: %04x\n", v);
+
+	v = inb(0xa0) << 8 | inb(0x20);
+	pr_debug("... PIC  IRR: %04x\n", v);
+
+	outb(0x0b, 0xa0);
+	outb(0x0b, 0x20);
+	v = inb(0xa0) << 8 | inb(0x20);
+	outb(0x0a, 0xa0);
+	outb(0x0a, 0x20);
+
+	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	pr_debug("... PIC  ISR: %04x\n", v);
+
+	v = inb(0x4d1) << 8 | inb(0x4d0);
+	pr_debug("... PIC ELCR: %04x\n", v);
+}
+
+static int show_lapic __initdata = 1;
+static __init int setup_show_lapic(char *arg)
+{
+	int num = -1;
+
+	if (strcmp(arg, "all") == 0) {
+		show_lapic = CONFIG_NR_CPUS;
+	} else {
+		get_option(&arg, &num);
+		if (num >= 0)
+			show_lapic = num;
+	}
+
+	return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+static int __init print_ICs(void)
+{
+	if (apic_verbosity == APIC_QUIET)
+		return 0;
+
+	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic && !apic_from_smp_config())
+		return 0;
+
+	print_local_APICs(show_lapic);
+	print_IO_APICs();
+
+	return 0;
+}
+
+late_initcall(print_ICs);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 08f3fed..10b8d3e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -276,6 +276,17 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
 	return box;
 }
 
+/*
+ * Using uncore_pmu_event_init pmu event_init callback
+ * as a detection point for uncore events.
+ */
+static int uncore_pmu_event_init(struct perf_event *event);
+
+static bool is_uncore_event(struct perf_event *event)
+{
+	return event->pmu->event_init == uncore_pmu_event_init;
+}
+
 static int
 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
 {
@@ -290,13 +301,18 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, b
 		return -EINVAL;
 
 	n = box->n_events;
-	box->event_list[n] = leader;
-	n++;
+
+	if (is_uncore_event(leader)) {
+		box->event_list[n] = leader;
+		n++;
+	}
+
 	if (!dogrp)
 		return n;
 
 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
-		if (event->state <= PERF_EVENT_STATE_OFF)
+		if (!is_uncore_event(event) ||
+		    event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 
 		if (n >= max_count)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index f5ab56d..aceb2f9 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -28,6 +28,7 @@
 #include <asm/nmi.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/hpet.h>
 #include <linux/kdebug.h>
 #include <asm/cpu.h>
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 1cf7c97..000d419 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -732,10 +732,10 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
 	.balign 32
   .rept	7
-    .if vector < NR_VECTORS
+    .if vector < FIRST_SYSTEM_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -4
       .endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 90878aa..9ebaf63 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -740,10 +740,10 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
 	INTR_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
 	.balign 32
   .rept	7
-    .if vector < NR_VECTORS
+    .if vector < FIRST_SYSTEM_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -8
       .endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 4de73ee..70e181e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -99,32 +99,9 @@ void __init init_IRQ(void)
 	x86_init.irqs.intr_init();
 }
 
-/*
- * Setup the vector to irq mappings.
- */
-void setup_vector_irq(int cpu)
-{
-#ifndef CONFIG_X86_IO_APIC
-	int irq;
-
-	/*
-	 * On most of the platforms, legacy PIC delivers the interrupts on the
-	 * boot cpu. But there are certain platforms where PIC interrupts are
-	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
-	 * legacy PIC, for the new cpu that is coming online, setup the static
-	 * legacy vector to irq mapping:
-	 */
-	for (irq = 0; irq < nr_legacy_irqs(); irq++)
-		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
-#endif
-
-	__setup_vector_irq(cpu);
-}
-
 static void __init smp_intr_init(void)
 {
 #ifdef CONFIG_SMP
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -144,7 +121,6 @@ static void __init smp_intr_init(void)
 
 	/* IPI used for rebooting/stopping */
 	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
-#endif
 #endif /* CONFIG_SMP */
 }
 
@@ -159,7 +135,7 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 #endif
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+#ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -197,10 +173,17 @@ void __init native_init_IRQ(void)
 	 * 'special' SMP interrupts)
 	 */
 	i = FIRST_EXTERNAL_VECTOR;
-	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+#ifndef CONFIG_X86_LOCAL_APIC
+#define first_system_vector NR_VECTORS
+#endif
+	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
 		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
+#ifdef CONFIG_X86_LOCAL_APIC
+	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
+		set_intr_gate(i, spurious_interrupt);
+#endif
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f6945bef..94f6434 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault);
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "KVM";
-	pv_info.paravirt_enabled = 1;
+
+	/*
+	 * KVM isn't paravirt in the sense of paravirt_enabled.  A KVM
+	 * guest kernel works like a bare metal kernel with additional
+	 * features, and paravirt_enabled is about features that are
+	 * missing.
+	 */
+	pv_info.paravirt_enabled = 0;
 
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
 		pv_cpu_ops.io_delay = kvm_io_delay;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d9156ce..42caaef 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -59,13 +59,12 @@ static void kvm_get_wallclock(struct timespec *now)
 
 	native_write_msr(msr_kvm_wall_clock, low, high);
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
 	pvclock_read_wallclock(&wall_clock, vcpu_time, now);
 
-	preempt_enable();
+	put_cpu();
 }
 
 static int kvm_set_wallclock(const struct timespec *now)
@@ -107,11 +106,10 @@ static unsigned long kvm_get_tsc_khz(void)
 	int cpu;
 	unsigned long tsc_khz;
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	src = &hv_clock[cpu].pvti;
 	tsc_khz = pvclock_tsc_khz(src);
-	preempt_enable();
+	put_cpu();
 	return tsc_khz;
 }
 
@@ -263,7 +261,6 @@ void __init kvmclock_init(void)
 #endif
 	kvm_get_preset_lpj();
 	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
-	pv_info.paravirt_enabled = 1;
 	pv_info.name = "KVM";
 
 	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
@@ -284,23 +281,22 @@ int __init kvm_setup_vsyscall_timeinfo(void)
 
 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
 	flags = pvclock_read_flags(vcpu_time);
 
 	if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
-		preempt_enable();
+		put_cpu();
 		return 1;
 	}
 
 	if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
-		preempt_enable();
+		put_cpu();
 		return ret;
 	}
 
-	preempt_enable();
+	put_cpu();
 
 	kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
 #endif
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 72e8e31..469b23d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -20,6 +20,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/cpufeature.h>
 #include <asm/desc.h>
 #include <asm/cacheflush.h>
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4859810..415480d 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -22,6 +22,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
+#include <asm/io_apic.h>
 #include <asm/debugreg.h>
 #include <asm/kexec-bzimage64.h>
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 17962e6..bae6c60 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/desc.h>
 #include <asm/hpet.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7a8f584..6d7022c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1084,7 +1084,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int i;
 
-	preempt_disable();
 	smp_cpu_index_default();
 
 	/*
@@ -1102,22 +1101,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	set_cpu_sibling_map(0);
 
-
 	if (smp_sanity_check(max_cpus) < 0) {
 		pr_info("SMP disabled\n");
 		disable_smp();
-		goto out;
+		return;
 	}
 
 	default_setup_apic_routing();
 
-	preempt_disable();
 	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 		     read_apic_id(), boot_cpu_physical_apicid);
 		/* Or can we switch back to PIC here? */
 	}
-	preempt_enable();
 
 	connect_bsp_APIC();
 
@@ -1151,8 +1147,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 		uv_system_init();
 
 	set_mtrr_aps_delayed_init();
-out:
-	preempt_enable();
 }
 
 void arch_enable_nonboot_cpus_begin(void)
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 3e551ee..4e942f3 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -55,12 +55,6 @@ static bool tls_desc_okay(const struct user_desc *info)
 	if (info->seg_not_present)
 		return false;
 
-#ifdef CONFIG_X86_64
-	/* The L bit makes no sense for data. */
-	if (info->lm)
-		return false;
-#endif
-
 	return true;
 }
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a9ae205..88900e2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -331,7 +331,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 		break; /* Success, it was handled */
 	case 1: /* Bound violation. */
 		info = mpx_generate_siginfo(regs, xsave_buf);
-		if (PTR_ERR(info)) {
+		if (IS_ERR(info)) {
 			/*
 			 * We failed to decode the MPX instruction.  Act as if
 			 * the exception was not caused by MPX.
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 4c540c4..0de1fae 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -738,3 +738,4 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
 
 	return (void *)xsave + xstate_comp_offsets[feature];
 }
+EXPORT_SYMBOL_GPL(get_xsave_addr);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 25d22b2..08f790d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
 
-kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o cpuid.o pmu.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/virt/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index e05000e..6eb5c20 100644
--- a/virt/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -20,6 +20,32 @@
 #include <linux/namei.h>
 #include <linux/fs.h>
 #include "irq.h"
+#include "assigned-dev.h"
+
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct list_head list;
+	int assigned_dev_id;
+	int host_segnr;
+	int host_busnr;
+	int host_devfn;
+	unsigned int entries_nr;
+	int host_irq;
+	bool host_irq_disabled;
+	bool pci_2_3;
+	struct msix_entry *host_msix_entries;
+	int guest_irq;
+	struct msix_entry *guest_msix_entries;
+	unsigned long irq_requested_type;
+	int irq_source_id;
+	int flags;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+	spinlock_t intx_lock;
+	spinlock_t intx_mask_lock;
+	char irq_name[32];
+	struct pci_saved_state *pci_saved_state;
+};
 
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
@@ -748,7 +774,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 		if (r)
 			goto out_list_del;
 	}
-	r = kvm_assign_device(kvm, match);
+	r = kvm_assign_device(kvm, match->dev);
 	if (r)
 		goto out_list_del;
 
@@ -790,7 +816,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
 		goto out;
 	}
 
-	kvm_deassign_device(kvm, match);
+	kvm_deassign_device(kvm, match->dev);
 
 	kvm_free_assigned_device(kvm, match);
 
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
new file mode 100644
index 0000000..a428c1a
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.h
@@ -0,0 +1,32 @@
+#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
+#define ARCH_X86_KVM_ASSIGNED_DEV_H
+
+#include <linux/kvm_host.h>
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
+
+int kvm_iommu_map_guest(struct kvm *kvm);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg);
+
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+#else
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+						unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
+#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 976e3a5..8a80737 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,7 +23,7 @@
 #include "mmu.h"
 #include "trace.h"
 
-static u32 xstate_required_size(u64 xstate_bv)
+static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
 	int feature_bit = 0;
 	u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -31,9 +31,10 @@ static u32 xstate_required_size(u64 xstate_bv)
 	xstate_bv &= XSTATE_EXTEND_MASK;
 	while (xstate_bv) {
 		if (xstate_bv & 0x1) {
-		        u32 eax, ebx, ecx, edx;
+		        u32 eax, ebx, ecx, edx, offset;
 		        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
-			ret = max(ret, eax + ebx);
+			offset = compacted ? ret : ebx;
+			ret = max(ret, offset + eax);
 		}
 
 		xstate_bv >>= 1;
@@ -53,6 +54,8 @@ u64 kvm_supported_xcr0(void)
 	return xcr0;
 }
 
+#define F(x) bit(X86_FEATURE_##x)
+
 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
@@ -64,13 +67,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 
 	/* Update OSXSAVE bit */
 	if (cpu_has_xsave && best->function == 0x1) {
-		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+		best->ecx &= ~F(OSXSAVE);
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
-			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+			best->ecx |= F(OSXSAVE);
 	}
 
 	if (apic) {
-		if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+		if (best->ecx & F(TSC_DEADLINE_TIMER))
 			apic->lapic_timer.timer_mode_mask = 3 << 17;
 		else
 			apic->lapic_timer.timer_mode_mask = 1 << 17;
@@ -85,9 +88,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 			(best->eax | ((u64)best->edx << 32)) &
 			kvm_supported_xcr0();
 		vcpu->arch.guest_xstate_size = best->ebx =
-			xstate_required_size(vcpu->arch.xcr0);
+			xstate_required_size(vcpu->arch.xcr0, false);
 	}
 
+	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
+		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+
 	/*
 	 * The existing code assumes virtual address is 48-bit in the canonical
 	 * address checks; exit if it is ever changed.
@@ -122,8 +129,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 			break;
 		}
 	}
-	if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
-		entry->edx &= ~bit(X86_FEATURE_NX);
+	if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
+		entry->edx &= ~F(NX);
 		printk(KERN_INFO "kvm: guest NX capability removed\n");
 	}
 }
@@ -227,8 +234,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	entry->flags = 0;
 }
 
-#define F(x) bit(X86_FEATURE_##x)
-
 static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
 				   u32 func, u32 index, int *nent, int maxnent)
 {
@@ -267,6 +272,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
 	unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
+	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -317,7 +323,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	const u32 kvm_supported_word9_x86_features =
 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
-		F(ADX) | F(SMAP);
+		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
+		F(AVX512CD);
+
+	/* cpuid 0xD.1.eax */
+	const u32 kvm_supported_word10_x86_features =
+		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
@@ -453,16 +464,34 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		u64 supported = kvm_supported_xcr0();
 
 		entry->eax &= supported;
+		entry->ebx = xstate_required_size(supported, false);
+		entry->ecx = entry->ebx;
 		entry->edx &= supported >> 32;
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		if (!supported)
+			break;
+
 		for (idx = 1, i = 1; idx < 64; ++idx) {
 			u64 mask = ((u64)1 << idx);
 			if (*nent >= maxnent)
 				goto out;
 
 			do_cpuid_1_ent(&entry[i], function, idx);
-			if (entry[i].eax == 0 || !(supported & mask))
-				continue;
+			if (idx == 1) {
+				entry[i].eax &= kvm_supported_word10_x86_features;
+				entry[i].ebx = 0;
+				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
+					entry[i].ebx =
+						xstate_required_size(supported,
+								     true);
+			} else {
+				if (entry[i].eax == 0 || !(supported & mask))
+					continue;
+				if (WARN_ON_ONCE(entry[i].ecx & 1))
+					continue;
+			}
+			entry[i].ecx = 0;
+			entry[i].edx = 0;
 			entry[i].flags |=
 			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 			++*nent;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9f8a2fa..169b09d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -123,6 +123,7 @@
 #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
 #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
 #define Escape      (5<<15)     /* Escape to coprocessor instruction */
+#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
 #define Sse         (1<<18)     /* SSE Vector instruction */
 /* Generic ModRM decode. */
 #define ModRM       (1<<19)
@@ -166,6 +167,8 @@
 #define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
 #define NoBigReal   ((u64)1 << 50)  /* No big real mode */
 #define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
+#define NearBranch  ((u64)1 << 52)  /* Near branches */
+#define No16	    ((u64)1 << 53)  /* No 16 bit operand */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -209,6 +212,7 @@ struct opcode {
 		const struct group_dual *gdual;
 		const struct gprefix *gprefix;
 		const struct escape *esc;
+		const struct instr_dual *idual;
 		void (*fastop)(struct fastop *fake);
 	} u;
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
@@ -231,6 +235,11 @@ struct escape {
 	struct opcode high[64];
 };
 
+struct instr_dual {
+	struct opcode mod012;
+	struct opcode mod3;
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_ID (1<<21)
 #define EFLG_VIP (1<<20)
@@ -379,6 +388,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 	ON64(FOP2E(op##q, rax, cl)) \
 	FOP_END
 
+/* 2 operand, src and dest are reversed */
+#define FASTOP2R(op, name) \
+	FOP_START(name) \
+	FOP2E(op##b, dl, al) \
+	FOP2E(op##w, dx, ax) \
+	FOP2E(op##l, edx, eax) \
+	ON64(FOP2E(op##q, rdx, rax)) \
+	FOP_END
+
 #define FOP3E(op,  dst, src, src2) \
 	FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
 
@@ -477,9 +495,9 @@ address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 }
 
 static inline unsigned long
-register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
+register_address(struct x86_emulate_ctxt *ctxt, int reg)
 {
-	return address_mask(ctxt, reg);
+	return address_mask(ctxt, reg_read(ctxt, reg));
 }
 
 static void masked_increment(ulong *reg, ulong mask, int inc)
@@ -488,7 +506,7 @@ static void masked_increment(ulong *reg, ulong mask, int inc)
 }
 
 static inline void
-register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
+register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
 {
 	ulong mask;
 
@@ -496,7 +514,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
 		mask = ~0UL;
 	else
 		mask = ad_mask(ctxt);
-	masked_increment(reg, mask, inc);
+	masked_increment(reg_rmw(ctxt, reg), mask, inc);
 }
 
 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
@@ -564,40 +582,6 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
 }
 
-static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
-			       int cs_l)
-{
-	switch (ctxt->op_bytes) {
-	case 2:
-		ctxt->_eip = (u16)dst;
-		break;
-	case 4:
-		ctxt->_eip = (u32)dst;
-		break;
-#ifdef CONFIG_X86_64
-	case 8:
-		if ((cs_l && is_noncanonical_address(dst)) ||
-		    (!cs_l && (dst >> 32) != 0))
-			return emulate_gp(ctxt, 0);
-		ctxt->_eip = dst;
-		break;
-#endif
-	default:
-		WARN(1, "unsupported eip assignment size\n");
-	}
-	return X86EMUL_CONTINUE;
-}
-
-static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
-{
-	return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
-}
-
-static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
-	return assign_eip_near(ctxt, ctxt->_eip + rel);
-}
-
 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
 {
 	u16 selector;
@@ -641,25 +625,24 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
 		return true;
 }
 
-static int __linearize(struct x86_emulate_ctxt *ctxt,
-		     struct segmented_address addr,
-		     unsigned *max_size, unsigned size,
-		     bool write, bool fetch,
-		     ulong *linear)
+static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
+				       struct segmented_address addr,
+				       unsigned *max_size, unsigned size,
+				       bool write, bool fetch,
+				       enum x86emul_mode mode, ulong *linear)
 {
 	struct desc_struct desc;
 	bool usable;
 	ulong la;
 	u32 lim;
 	u16 sel;
-	unsigned cpl;
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
 	*max_size = 0;
-	switch (ctxt->mode) {
+	switch (mode) {
 	case X86EMUL_MODE_PROT64:
-		if (((signed long)la << 16) >> 16 != la)
-			return emulate_gp(ctxt, 0);
+		if (is_noncanonical_address(la))
+			goto bad;
 
 		*max_size = min_t(u64, ~0u, (1ull << 48) - la);
 		if (size > *max_size)
@@ -678,46 +661,20 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 		if (!fetch && (desc.type & 8) && !(desc.type & 2))
 			goto bad;
 		lim = desc_limit_scaled(&desc);
-		if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
-		    (ctxt->d & NoBigReal)) {
-			/* la is between zero and 0xffff */
-			if (la > 0xffff)
-				goto bad;
-			*max_size = 0x10000 - la;
-		} else if ((desc.type & 8) || !(desc.type & 4)) {
-			/* expand-up segment */
-			if (addr.ea > lim)
-				goto bad;
-			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
-		} else {
+		if (!(desc.type & 8) && (desc.type & 4)) {
 			/* expand-down segment */
 			if (addr.ea <= lim)
 				goto bad;
 			lim = desc.d ? 0xffffffff : 0xffff;
-			if (addr.ea > lim)
-				goto bad;
-			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
 		}
+		if (addr.ea > lim)
+			goto bad;
+		*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
 		if (size > *max_size)
 			goto bad;
-		cpl = ctxt->ops->cpl(ctxt);
-		if (!(desc.type & 8)) {
-			/* data segment */
-			if (cpl > desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && !(desc.type & 4)) {
-			/* nonconforming code segment */
-			if (cpl != desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && (desc.type & 4)) {
-			/* conforming code segment */
-			if (cpl < desc.dpl)
-				goto bad;
-		}
+		la &= (u32)-1;
 		break;
 	}
-	if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
-		la &= (u32)-1;
 	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
 		return emulate_gp(ctxt, 0);
 	*linear = la;
@@ -735,9 +692,55 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
 		     ulong *linear)
 {
 	unsigned max_size;
-	return __linearize(ctxt, addr, &max_size, size, write, false, linear);
+	return __linearize(ctxt, addr, &max_size, size, write, false,
+			   ctxt->mode, linear);
+}
+
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+			     enum x86emul_mode mode)
+{
+	ulong linear;
+	int rc;
+	unsigned max_size;
+	struct segmented_address addr = { .seg = VCPU_SREG_CS,
+					   .ea = dst };
+
+	if (ctxt->op_bytes != sizeof(unsigned long))
+		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
+	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+	if (rc == X86EMUL_CONTINUE)
+		ctxt->_eip = addr.ea;
+	return rc;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+	return assign_eip(ctxt, dst, ctxt->mode);
 }
 
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+			  const struct desc_struct *cs_desc)
+{
+	enum x86emul_mode mode = ctxt->mode;
+
+#ifdef CONFIG_X86_64
+	if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
+		u64 efer = 0;
+
+		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+		if (efer & EFER_LMA)
+			mode = X86EMUL_MODE_PROT64;
+	}
+#endif
+	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
+		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+	return assign_eip(ctxt, dst, mode);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+	return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
 
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 			      struct segmented_address addr,
@@ -776,7 +779,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
 	 * boundary check itself.  Instead, we use max_size to check
 	 * against op_size.
 	 */
-	rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear);
+	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
+			 &linear);
 	if (unlikely(rc != X86EMUL_CONTINUE))
 		return rc;
 
@@ -911,6 +915,8 @@ FASTOP2W(btc);
 
 FASTOP2(xadd);
 
+FASTOP2R(cmp, cmp_r);
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
@@ -1221,6 +1227,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			if (index_reg != 4)
 				modrm_ea += reg_read(ctxt, index_reg) << scale;
 		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
+			modrm_ea += insn_fetch(s32, ctxt);
 			if (ctxt->mode == X86EMUL_MODE_PROT64)
 				ctxt->rip_relative = 1;
 		} else {
@@ -1229,10 +1236,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			adjust_modrm_seg(ctxt, base_reg);
 		}
 		switch (ctxt->modrm_mod) {
-		case 0:
-			if (ctxt->modrm_rm == 5)
-				modrm_ea += insn_fetch(s32, ctxt);
-			break;
 		case 1:
 			modrm_ea += insn_fetch(s8, ctxt);
 			break;
@@ -1284,7 +1287,8 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
 		else
 			sv = (s64)ctxt->src.val & (s64)mask;
 
-		ctxt->dst.addr.mem.ea += (sv >> 3);
+		ctxt->dst.addr.mem.ea = address_mask(ctxt,
+					   ctxt->dst.addr.mem.ea + (sv >> 3));
 	}
 
 	/* only subword offset */
@@ -1610,6 +1614,9 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 				sizeof(base3), &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
+		if (is_noncanonical_address(get_desc_base(&seg_desc) |
+					     ((u64)base3 << 32)))
+			return emulate_gp(ctxt, 0);
 	}
 load:
 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
@@ -1807,6 +1814,10 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
 	int seg = ctxt->src2.val;
 
 	ctxt->src.val = get_segment_selector(ctxt, seg);
+	if (ctxt->op_bytes == 4) {
+		rsp_increment(ctxt, -2);
+		ctxt->op_bytes = 2;
+	}
 
 	return em_push(ctxt);
 }
@@ -1850,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->src.val =  (unsigned long)ctxt->eflags;
+	ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
 	return em_push(ctxt);
 }
 
@@ -2035,7 +2046,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
 	if (rc != X86EMUL_CONTINUE) {
 		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
 		/* assigning eip failed; restore the old cs */
@@ -2045,31 +2056,22 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
-static int em_grp45(struct x86_emulate_ctxt *ctxt)
+static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
 {
-	int rc = X86EMUL_CONTINUE;
+	return assign_eip_near(ctxt, ctxt->src.val);
+}
 
-	switch (ctxt->modrm_reg) {
-	case 2: /* call near abs */ {
-		long int old_eip;
-		old_eip = ctxt->_eip;
-		rc = assign_eip_near(ctxt, ctxt->src.val);
-		if (rc != X86EMUL_CONTINUE)
-			break;
-		ctxt->src.val = old_eip;
-		rc = em_push(ctxt);
-		break;
-	}
-	case 4: /* jmp abs */
-		rc = assign_eip_near(ctxt, ctxt->src.val);
-		break;
-	case 5: /* jmp far */
-		rc = em_jmp_far(ctxt);
-		break;
-	case 6:	/* push */
-		rc = em_push(ctxt);
-		break;
-	}
+static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
+{
+	int rc;
+	long int old_eip;
+
+	old_eip = ctxt->_eip;
+	rc = assign_eip_near(ctxt, ctxt->src.val);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	ctxt->src.val = old_eip;
+	rc = em_push(ctxt);
 	return rc;
 }
 
@@ -2128,11 +2130,11 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 	/* Outer-privilege level return is not implemented */
 	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
 		return X86EMUL_UNHANDLEABLE;
-	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
+	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
 				       &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	rc = assign_eip_far(ctxt, eip, new_desc.l);
+	rc = assign_eip_far(ctxt, eip, &new_desc);
 	if (rc != X86EMUL_CONTINUE) {
 		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
 		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
@@ -2316,6 +2318,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
 		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
 		ctxt->eflags &= ~msr_data;
+		ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
 #endif
 	} else {
 		/* legacy mode */
@@ -2349,11 +2352,9 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	    && !vendor_intel(ctxt))
 		return emulate_ud(ctxt);
 
-	/* XXX sysenter/sysexit have not been tested in 64bit mode.
-	* Therefore, we inject an #UD.
-	*/
+	/* sysenter/sysexit have not been tested in 64bit mode. */
 	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		return emulate_ud(ctxt);
+		return X86EMUL_UNHANDLEABLE;
 
 	setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -2425,6 +2426,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 		if ((msr_data & 0xfffc) == 0x0)
 			return emulate_gp(ctxt, 0);
 		ss_sel = (u16)(msr_data + 24);
+		rcx = (u32)rcx;
+		rdx = (u32)rdx;
 		break;
 	case X86EMUL_MODE_PROT64:
 		cs_sel = (u16)(msr_data + 32);
@@ -2599,7 +2602,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	save_state_to_tss16(ctxt, &tss_seg);
@@ -2607,13 +2609,11 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			     &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	if (old_tss_sel != 0xffff) {
@@ -2624,7 +2624,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 				     sizeof tss_seg.prev_task_link,
 				     &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
-			/* FIXME: need to provide precise fault address */
 			return ret;
 	}
 
@@ -2813,7 +2812,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 	 *
 	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
 	 * 2. Exception/IRQ/iret: No check is performed
-	 * 3. jmp/call to TSS: Check against DPL of the TSS
+	 * 3. jmp/call to TSS/task-gate: No check is performed since the
+	 *    hardware checks it before exiting.
 	 */
 	if (reason == TASK_SWITCH_GATE) {
 		if (idt_index != -1) {
@@ -2830,13 +2830,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
 				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
 		}
-	} else if (reason != TASK_SWITCH_IRET) {
-		int dpl = next_tss_desc.dpl;
-		if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
-			return emulate_gp(ctxt, tss_selector);
 	}
 
-
 	desc_limit = desc_limit_scaled(&next_tss_desc);
 	if (!next_tss_desc.p ||
 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2913,8 +2908,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
-	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
-	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
+	register_address_increment(ctxt, reg, df * op->bytes);
+	op->addr.mem.ea = register_address(ctxt, reg);
 }
 
 static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -3025,7 +3020,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
 
-	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		goto fail;
 
@@ -3215,6 +3210,8 @@ static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
 		return emulate_ud(ctxt);
 
 	ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
+		ctxt->dst.bytes = 2;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3317,7 +3314,7 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt)
 	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
 }
 
-static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
 {
 	struct desc_ptr desc_ptr;
 	int rc;
@@ -3329,12 +3326,23 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
 			     ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
+	    is_noncanonical_address(desc_ptr.address))
+		return emulate_gp(ctxt, 0);
+	if (lgdt)
+		ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	else
+		ctxt->ops->set_idt(ctxt, &desc_ptr);
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
 	return X86EMUL_CONTINUE;
 }
 
+static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+{
+	return em_lgdt_lidt(ctxt, true);
+}
+
 static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
@@ -3348,20 +3356,7 @@ static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
-	struct desc_ptr desc_ptr;
-	int rc;
-
-	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		ctxt->op_bytes = 8;
-	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
-			     &desc_ptr.size, &desc_ptr.address,
-			     ctxt->op_bytes);
-	if (rc != X86EMUL_CONTINUE)
-		return rc;
-	ctxt->ops->set_idt(ctxt, &desc_ptr);
-	/* Disable writeback. */
-	ctxt->dst.type = OP_NONE;
-	return X86EMUL_CONTINUE;
+	return em_lgdt_lidt(ctxt, false);
 }
 
 static int em_smsw(struct x86_emulate_ctxt *ctxt)
@@ -3384,7 +3379,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
 	int rc = X86EMUL_CONTINUE;
 
-	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+	register_address_increment(ctxt, VCPU_REGS_RCX, -1);
 	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
 		rc = jmp_rel(ctxt, ctxt->src.val);
@@ -3554,7 +3549,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 
 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 		if (efer & EFER_LMA)
-			rsvd = CR3_L_MODE_RESERVED_BITS;
+			rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
 
 		if (new_val & rsvd)
 			return emulate_gp(ctxt, 0);
@@ -3596,8 +3591,15 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
 	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
 		return emulate_ud(ctxt);
 
-	if (check_dr7_gd(ctxt))
+	if (check_dr7_gd(ctxt)) {
+		ulong dr6;
+
+		ctxt->ops->get_dr(ctxt, 6, &dr6);
+		dr6 &= ~15;
+		dr6 |= DR6_BD | DR6_RTM;
+		ctxt->ops->set_dr(ctxt, 6, dr6);
 		return emulate_db(ctxt);
+	}
 
 	return X86EMUL_CONTINUE;
 }
@@ -3684,6 +3686,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
+#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
@@ -3780,11 +3783,11 @@ static const struct opcode group4[] = {
 static const struct opcode group5[] = {
 	F(DstMem | SrcNone | Lock,		em_inc),
 	F(DstMem | SrcNone | Lock,		em_dec),
-	I(SrcMem | Stack,			em_grp45),
+	I(SrcMem | NearBranch,			em_call_near_abs),
 	I(SrcMemFAddr | ImplicitOps | Stack,	em_call_far),
-	I(SrcMem | Stack,			em_grp45),
-	I(SrcMemFAddr | ImplicitOps,		em_grp45),
-	I(SrcMem | Stack,			em_grp45), D(Undefined),
+	I(SrcMem | NearBranch,			em_jmp_abs),
+	I(SrcMemFAddr | ImplicitOps,		em_jmp_far),
+	I(SrcMem | Stack,			em_push), D(Undefined),
 };
 
 static const struct opcode group6[] = {
@@ -3845,8 +3848,12 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
 	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
 };
 
+static const struct instr_dual instr_dual_0f_2b = {
+	I(0, em_mov), N
+};
+
 static const struct gprefix pfx_0f_2b = {
-	I(0, em_mov), I(0, em_mov), N, N,
+	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
 };
 
 static const struct gprefix pfx_0f_28_0f_29 = {
@@ -3920,6 +3927,10 @@ static const struct escape escape_dd = { {
 	N, N, N, N, N, N, N, N,
 } };
 
+static const struct instr_dual instr_dual_0f_c3 = {
+	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
+};
+
 static const struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	F6ALU(Lock, em_add),
@@ -3964,7 +3975,7 @@ static const struct opcode opcode_table[256] = {
 	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
-	X16(D(SrcImmByte)),
+	X16(D(SrcImmByte | NearBranch)),
 	/* 0x80 - 0x87 */
 	G(ByteOp | DstMem | SrcImm, group1),
 	G(DstMem | SrcImm, group1),
@@ -3991,20 +4002,20 @@ static const struct opcode opcode_table[256] = {
 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
 	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
-	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
+	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
 	/* 0xA8 - 0xAF */
 	F2bv(DstAcc | SrcImm | NoWrite, em_test),
 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
-	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
+	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
 	/* 0xB0 - 0xB7 */
 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
 	/* 0xB8 - 0xBF */
 	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
 	/* 0xC0 - 0xC7 */
 	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
-	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
-	I(ImplicitOps | Stack, em_ret),
+	I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
+	I(ImplicitOps | NearBranch, em_ret),
 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
 	G(ByteOp, group11), G(0, group11),
@@ -4024,13 +4035,14 @@ static const struct opcode opcode_table[256] = {
 	/* 0xD8 - 0xDF */
 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
 	/* 0xE0 - 0xE7 */
-	X3(I(SrcImmByte, em_loop)),
-	I(SrcImmByte, em_jcxz),
+	X3(I(SrcImmByte | NearBranch, em_loop)),
+	I(SrcImmByte | NearBranch, em_jcxz),
 	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
 	/* 0xE8 - 0xEF */
-	I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
-	I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
+	I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
+	I(SrcImmFAddr | No64, em_jmp_far),
+	D(SrcImmByte | ImplicitOps | NearBranch),
 	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
 	/* 0xF0 - 0xF7 */
@@ -4090,7 +4102,7 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N,
 	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x80 - 0x8F */
-	X16(D(SrcImm)),
+	X16(D(SrcImm | NearBranch)),
 	/* 0x90 - 0x9F */
 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
 	/* 0xA0 - 0xA7 */
@@ -4121,7 +4133,7 @@ static const struct opcode twobyte_table[256] = {
 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xC0 - 0xC7 */
 	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
-	N, D(DstMem | SrcReg | ModRM | Mov),
+	N, ID(0, &instr_dual_0f_c3),
 	N, N, N, GD(0, &group9),
 	/* 0xC8 - 0xCF */
 	X8(I(DstReg, em_bswap)),
@@ -4134,12 +4146,20 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
 };
 
+static const struct instr_dual instr_dual_0f_38_f0 = {
+	I(DstReg | SrcMem | Mov, em_movbe), N
+};
+
+static const struct instr_dual instr_dual_0f_38_f1 = {
+	I(DstMem | SrcReg | Mov, em_movbe), N
+};
+
 static const struct gprefix three_byte_0f_38_f0 = {
-	I(DstReg | SrcMem | Mov, em_movbe), N, N, N
+	ID(0, &instr_dual_0f_38_f0), N, N, N
 };
 
 static const struct gprefix three_byte_0f_38_f1 = {
-	I(DstMem | SrcReg | Mov, em_movbe), N, N, N
+	ID(0, &instr_dual_0f_38_f1), N, N, N
 };
 
 /*
@@ -4152,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = {
 	/* 0x80 - 0xef */
 	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
 	/* 0xf0 - 0xf1 */
-	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
-	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
+	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
+	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
 	/* 0xf2 - 0xff */
 	N, N, X4(N), X8(N)
 };
@@ -4275,7 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
+			register_address(ctxt, VCPU_REGS_RDI);
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
 		op->count = 1;
@@ -4329,7 +4349,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
+			register_address(ctxt, VCPU_REGS_RSI);
 		op->addr.mem.seg = ctxt->seg_override;
 		op->val = 0;
 		op->count = 1;
@@ -4338,7 +4358,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt,
+			address_mask(ctxt,
 				reg_read(ctxt, VCPU_REGS_RBX) +
 				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
 		op->addr.mem.seg = ctxt->seg_override;
@@ -4510,8 +4530,7 @@ done_prefixes:
 
 	/* vex-prefix instructions are not implemented */
 	if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
-	    (mode == X86EMUL_MODE_PROT64 ||
-	    (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
+	    (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
 		ctxt->d = NotImpl;
 	}
 
@@ -4549,6 +4568,12 @@ done_prefixes:
 			else
 				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
 			break;
+		case InstrDual:
+			if ((ctxt->modrm >> 6) == 3)
+				opcode = opcode.u.idual->mod3;
+			else
+				opcode = opcode.u.idual->mod012;
+			break;
 		default:
 			return EMULATION_FAILED;
 		}
@@ -4567,7 +4592,8 @@ done_prefixes:
 		return EMULATION_FAILED;
 
 	if (unlikely(ctxt->d &
-		     (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
+	     No16))) {
 		/*
 		 * These are copied unconditionally here, and checked unconditionally
 		 * in x86_emulate_insn.
@@ -4578,8 +4604,12 @@ done_prefixes:
 		if (ctxt->d & NotImpl)
 			return EMULATION_FAILED;
 
-		if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
-			ctxt->op_bytes = 8;
+		if (mode == X86EMUL_MODE_PROT64) {
+			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
+				ctxt->op_bytes = 8;
+			else if (ctxt->d & NearBranch)
+				ctxt->op_bytes = 8;
+		}
 
 		if (ctxt->d & Op3264) {
 			if (mode == X86EMUL_MODE_PROT64)
@@ -4588,6 +4618,9 @@ done_prefixes:
 				ctxt->op_bytes = 4;
 		}
 
+		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
+			ctxt->op_bytes = 4;
+
 		if (ctxt->d & Sse)
 			ctxt->op_bytes = 16;
 		else if (ctxt->d & Mmx)
@@ -4631,7 +4664,8 @@ done_prefixes:
 	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
 
 	if (ctxt->rip_relative)
-		ctxt->memopp->addr.mem.ea += ctxt->_eip;
+		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
+					ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
 done:
 	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4775,6 +4809,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 				goto done;
 		}
 
+		/* Instruction can only be executed in protected mode */
+		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
+			rc = emulate_ud(ctxt);
+			goto done;
+		}
+
 		/* Privileged instruction can be executed only in CPL=0 */
 		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
 			if (ctxt->d & PrivUD)
@@ -4784,12 +4824,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 			goto done;
 		}
 
-		/* Instruction can only be executed in protected mode */
-		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
-			rc = emulate_ud(ctxt);
-			goto done;
-		}
-
 		/* Do instruction specific permission checks */
 		if (ctxt->d & CheckPerm) {
 			rc = ctxt->check_perm(ctxt);
@@ -4974,8 +5008,7 @@ writeback:
 			count = ctxt->src.count;
 		else
 			count = ctxt->dst.count;
-		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
-				-count);
+		register_address_increment(ctxt, VCPU_REGS_RCX, -count);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
@@ -5053,11 +5086,6 @@ twobyte_insn:
 		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
 							(s16) ctxt->src.val;
 		break;
-	case 0xc3:		/* movnti */
-		ctxt->dst.bytes = ctxt->op_bytes;
-		ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
-							(u32) ctxt->src.val;
-		break;
 	default:
 		goto cannot_emulate;
 	}
diff --git a/virt/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 0ba4057..b1947e0 100644
--- a/virt/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -270,7 +270,6 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 	spin_unlock(&ioapic->lock);
 }
 
-#ifdef CONFIG_X86
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
@@ -279,12 +278,6 @@ void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 		return;
 	kvm_make_scan_ioapic_request(kvm);
 }
-#else
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
-{
-	return;
-}
-#endif
 
 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
@@ -586,11 +579,6 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	case IOAPIC_REG_WINDOW:
 		ioapic_write_indirect(ioapic, data);
 		break;
-#ifdef	CONFIG_IA64
-	case IOAPIC_REG_EOI:
-		__kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
-		break;
-#endif
 
 	default:
 		break;
diff --git a/virt/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index e23b706..3c91955 100644
--- a/virt/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -19,7 +19,6 @@ struct kvm_vcpu;
 /* Direct registers. */
 #define IOAPIC_REG_SELECT  0x00
 #define IOAPIC_REG_WINDOW  0x10
-#define IOAPIC_REG_EOI     0x40	/* IA64 IOSAPIC only */
 
 /* Indirect registers. */
 #define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
@@ -45,6 +44,23 @@ struct rtc_status {
 	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
 };
 
+union kvm_ioapic_redirect_entry {
+	u64 bits;
+	struct {
+		u8 vector;
+		u8 delivery_mode:3;
+		u8 dest_mode:1;
+		u8 delivery_status:1;
+		u8 polarity:1;
+		u8 remote_irr:1;
+		u8 trig_mode:1;
+		u8 mask:1;
+		u8 reserve:7;
+		u8 reserved[4];
+		u8 dest_id;
+	} fields;
+};
+
 struct kvm_ioapic {
 	u64 base_address;
 	u32 ioregsel;
@@ -83,7 +99,7 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode);
+		int short_hand, unsigned int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
 			int trigger_mode);
@@ -97,7 +113,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 			u32 *tmr);
 
diff --git a/virt/kvm/iommu.c b/arch/x86/kvm/iommu.c
index c1e6ae9..17b73ee 100644
--- a/virt/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -31,6 +31,7 @@
 #include <linux/dmar.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include "assigned-dev.h"
 
 static bool allow_unsafe_assigned_interrupts;
 module_param_named(allow_unsafe_assigned_interrupts,
@@ -169,10 +170,8 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	return r;
 }
 
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
 {
-	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	int r;
 	bool noncoherent;
@@ -181,7 +180,6 @@ int kvm_assign_device(struct kvm *kvm,
 	if (!domain)
 		return 0;
 
-	pdev = assigned_dev->dev;
 	if (pdev == NULL)
 		return -ENODEV;
 
@@ -212,17 +210,14 @@ out_unmap:
 	return r;
 }
 
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
 {
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	struct pci_dev *pdev = NULL;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
-	pdev = assigned_dev->dev;
 	if (pdev == NULL)
 		return -ENODEV;
 
diff --git a/virt/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 963b899..72298b3 100644
--- a/virt/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -26,9 +26,6 @@
 #include <trace/events/kvm.h>
 
 #include <asm/msidef.h>
-#ifdef CONFIG_IA64
-#include <asm/iosapic.h>
-#endif
 
 #include "irq.h"
 
@@ -38,12 +35,8 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
 			   struct kvm *kvm, int irq_source_id, int level,
 			   bool line_status)
 {
-#ifdef CONFIG_X86
 	struct kvm_pic *pic = pic_irqchip(kvm);
 	return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
-#else
-	return -1;
-#endif
 }
 
 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
@@ -57,12 +50,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
 {
-#ifdef CONFIG_IA64
-	return irq->delivery_mode ==
-		(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
-#else
 	return irq->delivery_mode == APIC_DM_LOWEST;
-#endif
 }
 
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
@@ -202,9 +190,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 	}
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
 	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
 	set_bit(irq_source_id, bitmap);
 unlock:
 	mutex_unlock(&kvm->irq_lock);
@@ -215,9 +201,7 @@ unlock:
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
 	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
 
 	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
@@ -230,9 +214,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 		goto unlock;
 
 	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
-#ifdef CONFIG_X86
 	kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
-#endif
 unlock:
 	mutex_unlock(&kvm->irq_lock);
 }
@@ -242,7 +224,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 {
 	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
-	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
+	hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -264,7 +246,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
-		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
+		hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
@@ -322,16 +304,11 @@ out:
 	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
 #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
 
-#ifdef CONFIG_X86
-#  define PIC_ROUTING_ENTRY(irq) \
+#define PIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
 	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
-#  define ROUTING_ENTRY2(irq) \
+#define ROUTING_ENTRY2(irq) \
 	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
-#else
-#  define ROUTING_ENTRY2(irq) \
-	IOAPIC_ROUTING_ENTRY(irq)
-#endif
 
 static const struct kvm_irq_routing_entry default_routing[] = {
 	ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
@@ -346,20 +323,6 @@ static const struct kvm_irq_routing_entry default_routing[] = {
 	ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
 	ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
 	ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
-#ifdef CONFIG_IA64
-	ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
-	ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
-	ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
-	ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
-	ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
-	ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
-	ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
-	ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
-	ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
-	ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
-	ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
-	ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
-#endif
 };
 
 int kvm_setup_default_irq_routing(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b8345dd..4f0c0b9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -68,6 +68,9 @@
 #define MAX_APIC_VECTOR			256
 #define APIC_VECTORS_PER_REG		32
 
+#define APIC_BROADCAST			0xFF
+#define X2APIC_BROADCAST		0xFFFFFFFFul
+
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
 
@@ -129,8 +132,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
-#define KVM_X2APIC_CID_BITS 0
-
 static void recalculate_apic_map(struct kvm *kvm)
 {
 	struct kvm_apic_map *new, *old = NULL;
@@ -149,42 +150,56 @@ static void recalculate_apic_map(struct kvm *kvm)
 	new->cid_shift = 8;
 	new->cid_mask = 0;
 	new->lid_mask = 0xff;
+	new->broadcast = APIC_BROADCAST;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
-		u16 cid, lid;
-		u32 ldr;
 
 		if (!kvm_apic_present(vcpu))
 			continue;
 
+		if (apic_x2apic_mode(apic)) {
+			new->ldr_bits = 32;
+			new->cid_shift = 16;
+			new->cid_mask = new->lid_mask = 0xffff;
+			new->broadcast = X2APIC_BROADCAST;
+		} else if (kvm_apic_get_reg(apic, APIC_LDR)) {
+			if (kvm_apic_get_reg(apic, APIC_DFR) ==
+							APIC_DFR_CLUSTER) {
+				new->cid_shift = 4;
+				new->cid_mask = 0xf;
+				new->lid_mask = 0xf;
+			} else {
+				new->cid_shift = 8;
+				new->cid_mask = 0;
+				new->lid_mask = 0xff;
+			}
+		}
+
 		/*
 		 * All APICs have to be configured in the same mode by an OS.
 		 * We take advatage of this while building logical id loockup
-		 * table. After reset APICs are in xapic/flat mode, so if we
-		 * find apic with different setting we assume this is the mode
+		 * table. After reset APICs are in software disabled mode, so if
+		 * we find apic with different setting we assume this is the mode
 		 * OS wants all apics to be in; build lookup table accordingly.
 		 */
-		if (apic_x2apic_mode(apic)) {
-			new->ldr_bits = 32;
-			new->cid_shift = 16;
-			new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
-			new->lid_mask = 0xffff;
-		} else if (kvm_apic_sw_enabled(apic) &&
-				!new->cid_mask /* flat mode */ &&
-				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
-			new->cid_shift = 4;
-			new->cid_mask = 0xf;
-			new->lid_mask = 0xf;
-		}
+		if (kvm_apic_sw_enabled(apic))
+			break;
+	}
 
-		new->phys_map[kvm_apic_id(apic)] = apic;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_lapic *apic = vcpu->arch.apic;
+		u16 cid, lid;
+		u32 ldr, aid;
 
+		aid = kvm_apic_id(apic);
 		ldr = kvm_apic_get_reg(apic, APIC_LDR);
 		cid = apic_cluster_id(new, ldr);
 		lid = apic_logical_id(new, ldr);
 
-		if (lid)
+		if (aid < ARRAY_SIZE(new->phys_map))
+			new->phys_map[aid] = apic;
+		if (lid && cid < ARRAY_SIZE(new->logical_map))
 			new->logical_map[cid][ffs(lid) - 1] = apic;
 	}
 out:
@@ -201,11 +216,13 @@ out:
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 {
-	u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+	bool enabled = val & APIC_SPIV_APIC_ENABLED;
 
 	apic_set_reg(apic, APIC_SPIV, val);
-	if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
-		if (val & APIC_SPIV_APIC_ENABLED) {
+
+	if (enabled != apic->sw_enabled) {
+		apic->sw_enabled = enabled;
+		if (enabled) {
 			static_key_slow_dec_deferred(&apic_sw_disabled);
 			recalculate_apic_map(apic->vcpu->kvm);
 		} else
@@ -237,21 +254,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 
 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 }
 
 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 }
 
 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) ==
-			APIC_LVT_TIMER_TSCDEADLINE);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 }
 
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -326,8 +339,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
 static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
 {
-	apic->irr_pending = true;
 	apic_set_vector(vec, apic->regs + APIC_IRR);
+	/*
+	 * irr_pending must be true if any interrupt is pending; set it after
+	 * APIC_IRR to avoid race with apic_clear_irr
+	 */
+	apic->irr_pending = true;
 }
 
 static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -359,13 +376,15 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
 	vcpu = apic->vcpu;
 
-	apic_clear_vector(vec, apic->regs + APIC_IRR);
-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
 		/* try to update RVI */
+		apic_clear_vector(vec, apic->regs + APIC_IRR);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
-	else {
-		vec = apic_search_irr(apic);
-		apic->irr_pending = (vec != -1);
+	} else {
+		apic->irr_pending = false;
+		apic_clear_vector(vec, apic->regs + APIC_IRR);
+		if (apic_search_irr(apic) != -1)
+			apic->irr_pending = true;
 	}
 }
 
@@ -558,16 +577,25 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 	apic_update_ppr(apic);
 }
 
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+{
+	return dest == (apic_x2apic_mode(apic) ?
+			X2APIC_BROADCAST : APIC_BROADCAST);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
 {
-	return dest == 0xff || kvm_apic_id(apic) == dest;
+	return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
 }
 
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 {
 	int result = 0;
 	u32 logical_id;
 
+	if (kvm_apic_broadcast(apic, mda))
+		return 1;
+
 	if (apic_x2apic_mode(apic)) {
 		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
 		return logical_id & mda;
@@ -595,7 +623,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 }
 
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-			   int short_hand, int dest, int dest_mode)
+			   int short_hand, unsigned int dest, int dest_mode)
 {
 	int result = 0;
 	struct kvm_lapic *target = vcpu->arch.apic;
@@ -657,15 +685,24 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	if (!map)
 		goto out;
 
+	if (irq->dest_id == map->broadcast)
+		goto out;
+
+	ret = true;
+
 	if (irq->dest_mode == 0) { /* physical mode */
-		if (irq->delivery_mode == APIC_DM_LOWEST ||
-				irq->dest_id == 0xff)
+		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
 			goto out;
-		dst = &map->phys_map[irq->dest_id & 0xff];
+
+		dst = &map->phys_map[irq->dest_id];
 	} else {
 		u32 mda = irq->dest_id << (32 - map->ldr_bits);
+		u16 cid = apic_cluster_id(map, mda);
+
+		if (cid >= ARRAY_SIZE(map->logical_map))
+			goto out;
 
-		dst = map->logical_map[apic_cluster_id(map, mda)];
+		dst = map->logical_map[cid];
 
 		bitmap = apic_logical_id(map, mda);
 
@@ -691,8 +728,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 			*r = 0;
 		*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 	}
-
-	ret = true;
 out:
 	rcu_read_unlock();
 	return ret;
@@ -1034,6 +1069,26 @@ static void update_divide_count(struct kvm_lapic *apic)
 				   apic->divide_count);
 }
 
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+	struct kvm_vcpu *vcpu = apic->vcpu;
+	wait_queue_head_t *q = &vcpu->wq;
+
+	/*
+	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
+	 * vcpu_enter_guest.
+	 */
+	if (atomic_read(&apic->lapic_timer.pending))
+		return;
+
+	atomic_inc(&apic->lapic_timer.pending);
+	/* FIXME: this code should not know anything about vcpus */
+	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+}
+
 static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now;
@@ -1096,9 +1151,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
 		if (likely(tscdeadline > guest_tsc)) {
 			ns = (tscdeadline - guest_tsc) * 1000000ULL;
 			do_div(ns, this_tsc_khz);
-		}
-		hrtimer_start(&apic->lapic_timer.timer,
-			ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+			hrtimer_start(&apic->lapic_timer.timer,
+				ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+		} else
+			apic_timer_expired(apic);
 
 		local_irq_restore(flags);
 	}
@@ -1203,17 +1259,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 		break;
 
-	case APIC_LVTT:
-		if ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		    apic->lapic_timer.timer_mode_mask) !=
-		   (val & apic->lapic_timer.timer_mode_mask))
+	case APIC_LVTT: {
+		u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
+
+		if (apic->lapic_timer.timer_mode != timer_mode) {
+			apic->lapic_timer.timer_mode = timer_mode;
 			hrtimer_cancel(&apic->lapic_timer.timer);
+		}
 
 		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
 		apic_set_reg(apic, APIC_LVTT, val);
 		break;
+	}
 
 	case APIC_TMICT:
 		if (apic_lvtt_tscdeadline(apic))
@@ -1320,7 +1379,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
 		static_key_slow_dec_deferred(&apic_hw_disabled);
 
-	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+	if (!apic->sw_enabled)
 		static_key_slow_dec_deferred(&apic_sw_disabled);
 
 	if (apic->regs)
@@ -1355,9 +1414,6 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 		return;
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
-	/* Inject here so clearing tscdeadline won't override new value */
-	if (apic_has_pending_timer(vcpu))
-		kvm_inject_apic_timer_irqs(vcpu);
 	apic->lapic_timer.tscdeadline = data;
 	start_apic_timer(apic);
 }
@@ -1422,6 +1478,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
 
+	if ((value & MSR_IA32_APICBASE_ENABLE) &&
+	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
+		pr_warn_once("APIC base relocation is unsupported by KVM");
+
 	/* with FSB delivery interrupt, we can restart APIC functionality */
 	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
 		   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
@@ -1447,6 +1507,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+	apic->lapic_timer.timer_mode = 0;
 	apic_set_reg(apic, APIC_LVT0,
 		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
@@ -1538,23 +1599,8 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 {
 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
 	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
-	struct kvm_vcpu *vcpu = apic->vcpu;
-	wait_queue_head_t *q = &vcpu->wq;
-
-	/*
-	 * There is a race window between reading and incrementing, but we do
-	 * not care about potentially losing timer events in the !reinject
-	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-	 * in vcpu_enter_guest.
-	 */
-	if (!atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
-		/* FIXME: this code should not know anything about vcpus */
-		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-	}
 
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
+	apic_timer_expired(apic);
 
 	if (lapic_is_periodic(apic)) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
@@ -1693,6 +1739,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
 				1 : count_vectors(apic->regs + APIC_ISR);
 	apic->highest_isr_cache = -1;
+	if (kvm_x86_ops->hwapic_irr_update)
+		kvm_x86_ops->hwapic_irr_update(vcpu,
+				apic_find_highest_irr(apic));
 	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	kvm_rtc_eoi_tracking_restore_one(vcpu);
@@ -1837,8 +1886,11 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
+	if (reg == APIC_ICR2)
+		return 1;
+
 	/* if this is ICR write vector before command */
-	if (msr == 0x830)
+	if (reg == APIC_ICR)
 		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
 	return apic_reg_write(apic, reg, (u32)data);
 }
@@ -1851,9 +1903,15 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
+	if (reg == APIC_DFR || reg == APIC_ICR2) {
+		apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
+			   reg);
+		return 1;
+	}
+
 	if (apic_reg_read(apic, reg, 4, &low))
 		return 1;
-	if (msr == 0x830)
+	if (reg == APIC_ICR)
 		apic_reg_read(apic, APIC_ICR2, 4, &high);
 
 	*data = (((u64)high) << 32) | low;
@@ -1908,7 +1966,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	unsigned int sipi_vector;
+	u8 sipi_vector;
 	unsigned long pe;
 
 	if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845..c674fce 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,6 +11,7 @@
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
+	u32 timer_mode;
 	u32 timer_mode_mask;
 	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
@@ -22,6 +23,7 @@ struct kvm_lapic {
 	struct kvm_timer lapic_timer;
 	u32 divide_count;
 	struct kvm_vcpu *vcpu;
+	bool sw_enabled;
 	bool irr_pending;
 	/* Number of bits set in ISR. */
 	s16 isr_count;
@@ -55,8 +57,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 		unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@ -119,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
 
 extern struct static_key_deferred apic_sw_disabled;
 
-static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
 {
 	if (static_key_false(&apic_sw_disabled.key))
-		return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
-	return APIC_SPIV_APIC_ENABLED;
+		return apic->sw_enabled;
+	return true;
 }
 
 static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
@@ -152,8 +154,6 @@ static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
 	ldr >>= 32 - map->ldr_bits;
 	cid = (ldr >> map->cid_shift) & map->cid_mask;
 
-	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
-
 	return cid;
 }
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 978f402..10fbed1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 #define MMIO_GEN_LOW_SHIFT		10
 #define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 2)
 #define MMIO_GEN_MASK			((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN			((1 << MMIO_GEN_SHIFT) - 1)
 
 static u64 generation_mmio_spte_mask(unsigned int gen)
 {
 	u64 mask;
 
-	WARN_ON(gen > MMIO_MAX_GEN);
+	WARN_ON(gen & ~MMIO_GEN_MASK);
 
 	mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
 	mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
@@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte)
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
 	return (spte & ~mask) >> PAGE_SHIFT;
 }
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
 	return (spte & ~mask) & ~PAGE_MASK;
 }
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7527cef..41dd038 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1056,9 +1056,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	WARN_ON(adjustment < 0);
-	if (host)
-		adjustment = svm_scale_tsc(vcpu, adjustment);
+	if (host) {
+		if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
+			WARN_ON(adjustment < 0);
+		adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
+	}
 
 	svm->vmcb->control.tsc_offset += adjustment;
 	if (is_guest_mode(vcpu))
@@ -2999,7 +3001,6 @@ static int dr_interception(struct vcpu_svm *svm)
 {
 	int reg, dr;
 	unsigned long val;
-	int err;
 
 	if (svm->vcpu.guest_debug == 0) {
 		/*
@@ -3019,12 +3020,15 @@ static int dr_interception(struct vcpu_svm *svm)
 	dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
 
 	if (dr >= 16) { /* mov to DRn */
+		if (!kvm_require_dr(&svm->vcpu, dr - 16))
+			return 1;
 		val = kvm_register_read(&svm->vcpu, reg);
 		kvm_set_dr(&svm->vcpu, dr - 16, val);
 	} else {
-		err = kvm_get_dr(&svm->vcpu, dr, &val);
-		if (!err)
-			kvm_register_write(&svm->vcpu, reg, val);
+		if (!kvm_require_dr(&svm->vcpu, dr))
+			return 1;
+		kvm_get_dr(&svm->vcpu, dr, &val);
+		kvm_register_write(&svm->vcpu, reg, val);
 	}
 
 	skip_emulated_instruction(&svm->vcpu);
@@ -4123,6 +4127,11 @@ static bool svm_mpx_supported(void)
 	return false;
 }
 
+static bool svm_xsaves_supported(void)
+{
+	return false;
+}
+
 static bool svm_has_wbinvd_exit(void)
 {
 	return true;
@@ -4410,6 +4419,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.rdtscp_supported = svm_rdtscp_supported,
 	.invpcid_supported = svm_invpcid_supported,
 	.mpx_supported = svm_mpx_supported,
+	.xsaves_supported = svm_xsaves_supported,
 
 	.set_supported_cpuid = svm_set_supported_cpuid,
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6b06ab8..c2a34bb 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -5,6 +5,7 @@
 #include <asm/vmx.h>
 #include <asm/svm.h>
 #include <asm/clocksource.h>
+#include <asm/pvclock-abi.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
@@ -877,6 +878,42 @@ TRACE_EVENT(kvm_ple_window,
 #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
 	trace_kvm_ple_window(false, vcpu_id, new, old)
 
+TRACE_EVENT(kvm_pvclock_update,
+	TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
+	TP_ARGS(vcpu_id, pvclock),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id			)
+		__field(	__u32,		version			)
+		__field(	__u64,		tsc_timestamp		)
+		__field(	__u64,		system_time		)
+		__field(	__u32,		tsc_to_system_mul	)
+		__field(	__s8,		tsc_shift		)
+		__field(	__u8,		flags			)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	   = vcpu_id;
+		__entry->version	   = pvclock->version;
+		__entry->tsc_timestamp	   = pvclock->tsc_timestamp;
+		__entry->system_time	   = pvclock->system_time;
+		__entry->tsc_to_system_mul = pvclock->tsc_to_system_mul;
+		__entry->tsc_shift	   = pvclock->tsc_shift;
+		__entry->flags		   = pvclock->flags;
+	),
+
+	TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, "
+		  "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, "
+		  "flags 0x%x }",
+		  __entry->vcpu_id,
+		  __entry->version,
+		  __entry->tsc_timestamp,
+		  __entry->system_time,
+		  __entry->tsc_to_system_mul,
+		  __entry->tsc_shift,
+		  __entry->flags)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c6..feb852b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 static bool __read_mostly nested = 0;
 module_param(nested, bool, S_IRUGO);
 
+static u64 __read_mostly host_xss;
+
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON						\
 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS				      \
 	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-	 | X86_CR4_OSXMMEXCPT)
+	 | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
 
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -214,6 +216,7 @@ struct __packed vmcs12 {
 	u64 virtual_apic_page_addr;
 	u64 apic_access_addr;
 	u64 ept_pointer;
+	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
 	u64 guest_ia32_debugctl;
@@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
 	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
 	FIELD64(EPT_POINTER, ept_pointer),
+	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
 	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
@@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(HOST_RSP, host_rsp),
 	FIELD(HOST_RIP, host_rip),
 };
-static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
 
 static inline short vmcs_field_to_offset(unsigned long field)
 {
-	if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0)
-		return -1;
+	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+
+	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
+	    vmcs_field_to_offset_table[field] == 0)
+		return -ENOENT;
+
 	return vmcs_field_to_offset_table[field];
 }
 
@@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
+static bool vmx_xsaves_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
 }
 
+static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
+		vmx_xsaves_supported();
+}
+
 static inline bool is_exception(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 	vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
 	clear_atomic_switch_msr(vmx, MSR_EFER);
-	/* On ept, can't emulate nx, and must switch nx atomically */
-	if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) {
+
+	/*
+	 * On EPT, we can't emulate NX, so we must switch EFER atomically.
+	 * On CPUs that support "load IA32_EFER", always switch EFER
+	 * atomically, since it's faster than switching it manually.
+	 */
+	if (cpu_has_load_ia32_efer ||
+	    (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
 		guest_efer = vmx->vcpu.arch.efer;
 		if (!(guest_efer & EFER_LMA))
 			guest_efer &= ~EFER_LME;
-		add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer);
+		if (guest_efer != host_efer)
+			add_atomic_switch_msr(vmx, MSR_EFER,
+					      guest_efer, host_efer);
 		return false;
 	}
 
@@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	nested_vmx_secondary_ctls_low = 0;
 	nested_vmx_secondary_ctls_high &=
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-		SECONDARY_EXEC_UNRESTRICTED_GUEST |
-		SECONDARY_EXEC_WBINVD_EXITING;
+		SECONDARY_EXEC_WBINVD_EXITING |
+		SECONDARY_EXEC_XSAVES;
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
-		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
 			 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
 			 VMX_EPT_INVEPT_BIT;
@@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
 		return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+	case MSR_IA32_XSS:
+		if (!vmx_xsaves_supported())
+			return 1;
+		data = vcpu->arch.ia32_xss;
+		break;
 	case MSR_TSC_AUX:
 		if (!to_vmx(vcpu)->rdtscp_enabled)
 			return 1;
@@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		return 1; /* they are read-only */
+	case MSR_IA32_XSS:
+		if (!vmx_xsaves_supported())
+			return 1;
+		/*
+		 * The only supported bit as of Skylake is bit 8, but
+		 * it is not supported on KVM.
+		 */
+		if (data != 0)
+			return 1;
+		vcpu->arch.ia32_xss = data;
+		if (vcpu->arch.ia32_xss != host_xss)
+			add_atomic_switch_msr(vmx, MSR_IA32_XSS,
+				vcpu->arch.ia32_xss, host_xss);
+		else
+			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
+		break;
 	case MSR_TSC_AUX:
 		if (!vmx->rdtscp_enabled)
 			return 1;
@@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_ENABLE_INVPCID |
 			SECONDARY_EXEC_APIC_REGISTER_VIRT |
 			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-			SECONDARY_EXEC_SHADOW_VMCS;
+			SECONDARY_EXEC_SHADOW_VMCS |
+			SECONDARY_EXEC_XSAVES;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		}
 	}
 
+	if (cpu_has_xsaves)
+		rdmsrl(MSR_IA32_XSS, host_xss);
+
 	return 0;
 }
 
@@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void)
 	return 0;
 }
 
-static __init int hardware_setup(void)
-{
-	if (setup_vmcs_config(&vmcs_config) < 0)
-		return -EIO;
-
-	if (boot_cpu_has(X86_FEATURE_NX))
-		kvm_enable_efer_bits(EFER_NX);
-
-	if (!cpu_has_vmx_vpid())
-		enable_vpid = 0;
-	if (!cpu_has_vmx_shadow_vmcs())
-		enable_shadow_vmcs = 0;
-	if (enable_shadow_vmcs)
-		init_vmcs_shadow_fields();
-
-	if (!cpu_has_vmx_ept() ||
-	    !cpu_has_vmx_ept_4levels()) {
-		enable_ept = 0;
-		enable_unrestricted_guest = 0;
-		enable_ept_ad_bits = 0;
-	}
-
-	if (!cpu_has_vmx_ept_ad_bits())
-		enable_ept_ad_bits = 0;
-
-	if (!cpu_has_vmx_unrestricted_guest())
-		enable_unrestricted_guest = 0;
-
-	if (!cpu_has_vmx_flexpriority()) {
-		flexpriority_enabled = 0;
-
-		/*
-		 * set_apic_access_page_addr() is used to reload apic access
-		 * page upon invalidation.  No need to do anything if the
-		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
-		 */
-		kvm_x86_ops->set_apic_access_page_addr = NULL;
-	}
-
-	if (!cpu_has_vmx_tpr_shadow())
-		kvm_x86_ops->update_cr8_intercept = NULL;
-
-	if (enable_ept && !cpu_has_vmx_ept_2m_page())
-		kvm_disable_largepages();
-
-	if (!cpu_has_vmx_ple())
-		ple_gap = 0;
-
-	if (!cpu_has_vmx_apicv())
-		enable_apicv = 0;
-
-	if (enable_apicv)
-		kvm_x86_ops->update_cr8_intercept = NULL;
-	else {
-		kvm_x86_ops->hwapic_irr_update = NULL;
-		kvm_x86_ops->deliver_posted_interrupt = NULL;
-		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-	}
-
-	if (nested)
-		nested_vmx_setup_ctls_msrs();
-
-	return alloc_kvm_area();
-}
-
-static __exit void hardware_unsetup(void)
-{
-	free_kvm_area();
-}
-
 static bool emulation_required(struct kvm_vcpu *vcpu)
 {
 	return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void)
 	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
+#define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	set_cr4_guest_host_mask(vmx);
 
+	if (vmx_xsaves_supported())
+		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+
 	return 0;
 }
 
@@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 static int handle_dr(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
-	int dr, reg;
+	int dr, dr7, reg;
+
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+	/* First, if DR does not exist, trigger UD */
+	if (!kvm_require_dr(vcpu, dr))
+		return 1;
 
 	/* Do not handle if the CPL > 0, will trigger GP on re-entry */
 	if (!kvm_require_cpl(vcpu, 0))
 		return 1;
-	dr = vmcs_readl(GUEST_DR7);
-	if (dr & DR7_GD) {
+	dr7 = vmcs_readl(GUEST_DR7);
+	if (dr7 & DR7_GD) {
 		/*
 		 * As the vm-exit takes precedence over the debug trap, we
 		 * need to emulate the latter, either for the host or the
@@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 		 */
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
 			vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
-			vcpu->run->debug.arch.dr7 = dr;
-			vcpu->run->debug.arch.pc =
-				vmcs_readl(GUEST_CS_BASE) +
-				vmcs_readl(GUEST_RIP);
+			vcpu->run->debug.arch.dr7 = dr7;
+			vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
 			vcpu->run->debug.arch.exception = DB_VECTOR;
 			vcpu->run->exit_reason = KVM_EXIT_DEBUG;
 			return 0;
 		} else {
-			vcpu->arch.dr7 &= ~DR7_GD;
+			vcpu->arch.dr6 &= ~15;
 			vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
-			vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
 			kvm_queue_exception(vcpu, DB_VECTOR);
 			return 1;
 		}
@@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
 	reg = DEBUG_REG_ACCESS_REG(exit_qualification);
 	if (exit_qualification & TYPE_MOV_FROM_DR) {
 		unsigned long val;
@@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_xsaves(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction(vcpu);
+	WARN(1, "this should never happen\n");
+	return 1;
+}
+
+static int handle_xrstors(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction(vcpu);
+	WARN(1, "this should never happen\n");
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	if (likely(fasteoi)) {
@@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
 	}
 
 	/* clear all local breakpoint enable flags */
-	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
+	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
 
 	/*
 	 * TODO: What about debug traps on tss switch?
@@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	trace_kvm_page_fault(gpa, exit_qualification);
 
 	/* It is a write fault? */
-	error_code = exit_qualification & (1U << 1);
+	error_code = exit_qualification & PFERR_WRITE_MASK;
 	/* It is a fetch fault? */
-	error_code |= (exit_qualification & (1U << 2)) << 2;
+	error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
 	/* ept page table is present? */
-	error_code |= (exit_qualification >> 3) & 0x1;
+	error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
 
 	vcpu->arch.exit_qualification = exit_qualification;
 
@@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void)
 			                    ple_window_grow, INT_MIN);
 }
 
+static __init int hardware_setup(void)
+{
+	int r = -ENOMEM, i, msr;
+
+	rdmsrl_safe(MSR_EFER, &host_efer);
+
+	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
+		kvm_define_shared_msr(i, vmx_msr_index[i]);
+
+	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_a)
+		return r;
+
+	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_b)
+		goto out;
+
+	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy)
+		goto out1;
+
+	vmx_msr_bitmap_legacy_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy_x2apic)
+		goto out2;
+
+	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode)
+		goto out3;
+
+	vmx_msr_bitmap_longmode_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode_x2apic)
+		goto out4;
+	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmread_bitmap)
+		goto out5;
+
+	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmwrite_bitmap)
+		goto out6;
+
+	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+
+	/*
+	 * Allow direct access to the PC debug port (it is often used for I/O
+	 * delays, but the vmexits simply slow things down).
+	 */
+	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+	clear_bit(0x80, vmx_io_bitmap_a);
+
+	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+
+	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+	memcpy(vmx_msr_bitmap_legacy_x2apic,
+			vmx_msr_bitmap_legacy, PAGE_SIZE);
+	memcpy(vmx_msr_bitmap_longmode_x2apic,
+			vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+	if (enable_apicv) {
+		for (msr = 0x800; msr <= 0x8ff; msr++)
+			vmx_disable_intercept_msr_read_x2apic(msr);
+
+		/* According SDM, in x2apic mode, the whole id reg is used.
+		 * But in KVM, it only use the highest eight bits. Need to
+		 * intercept it */
+		vmx_enable_intercept_msr_read_x2apic(0x802);
+		/* TMCCT */
+		vmx_enable_intercept_msr_read_x2apic(0x839);
+		/* TPR */
+		vmx_disable_intercept_msr_write_x2apic(0x808);
+		/* EOI */
+		vmx_disable_intercept_msr_write_x2apic(0x80b);
+		/* SELF-IPI */
+		vmx_disable_intercept_msr_write_x2apic(0x83f);
+	}
+
+	if (enable_ept) {
+		kvm_mmu_set_mask_ptes(0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+			0ull, VMX_EPT_EXECUTABLE_MASK);
+		ept_set_mmio_spte_mask();
+		kvm_enable_tdp();
+	} else
+		kvm_disable_tdp();
+
+	update_ple_window_actual_max();
+
+	if (setup_vmcs_config(&vmcs_config) < 0) {
+		r = -EIO;
+		goto out7;
+    }
+
+	if (boot_cpu_has(X86_FEATURE_NX))
+		kvm_enable_efer_bits(EFER_NX);
+
+	if (!cpu_has_vmx_vpid())
+		enable_vpid = 0;
+	if (!cpu_has_vmx_shadow_vmcs())
+		enable_shadow_vmcs = 0;
+	if (enable_shadow_vmcs)
+		init_vmcs_shadow_fields();
+
+	if (!cpu_has_vmx_ept() ||
+	    !cpu_has_vmx_ept_4levels()) {
+		enable_ept = 0;
+		enable_unrestricted_guest = 0;
+		enable_ept_ad_bits = 0;
+	}
+
+	if (!cpu_has_vmx_ept_ad_bits())
+		enable_ept_ad_bits = 0;
+
+	if (!cpu_has_vmx_unrestricted_guest())
+		enable_unrestricted_guest = 0;
+
+	if (!cpu_has_vmx_flexpriority()) {
+		flexpriority_enabled = 0;
+
+		/*
+		 * set_apic_access_page_addr() is used to reload apic access
+		 * page upon invalidation.  No need to do anything if the
+		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
+		 */
+		kvm_x86_ops->set_apic_access_page_addr = NULL;
+	}
+
+	if (!cpu_has_vmx_tpr_shadow())
+		kvm_x86_ops->update_cr8_intercept = NULL;
+
+	if (enable_ept && !cpu_has_vmx_ept_2m_page())
+		kvm_disable_largepages();
+
+	if (!cpu_has_vmx_ple())
+		ple_gap = 0;
+
+	if (!cpu_has_vmx_apicv())
+		enable_apicv = 0;
+
+	if (enable_apicv)
+		kvm_x86_ops->update_cr8_intercept = NULL;
+	else {
+		kvm_x86_ops->hwapic_irr_update = NULL;
+		kvm_x86_ops->deliver_posted_interrupt = NULL;
+		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+	}
+
+	if (nested)
+		nested_vmx_setup_ctls_msrs();
+
+	return alloc_kvm_area();
+
+out7:
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+	free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+out4:
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+out2:
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+out1:
+	free_page((unsigned long)vmx_io_bitmap_b);
+out:
+	free_page((unsigned long)vmx_io_bitmap_a);
+
+    return r;
+}
+
+static __exit void hardware_unsetup(void)
+{
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+	free_page((unsigned long)vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+	free_page((unsigned long)vmx_vmread_bitmap);
+
+	free_kvm_area();
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field)
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-					unsigned long field, u64 *ret)
+static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+				  unsigned long field, u64 *ret)
 {
 	short offset = vmcs_field_to_offset(field);
 	char *p;
 
 	if (offset < 0)
-		return 0;
+		return offset;
 
 	p = ((char *)(get_vmcs12(vcpu))) + offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 		*ret = *((natural_width *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U16:
 		*ret = *((u16 *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U32:
 		*ret = *((u32 *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U64:
 		*ret = *((u64 *)p);
-		return 1;
+		return 0;
 	default:
-		return 0; /* can never happen. */
+		WARN_ON(1);
+		return -ENOENT;
 	}
 }
 
 
-static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
-				    unsigned long field, u64 field_value){
+static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+				   unsigned long field, u64 field_value){
 	short offset = vmcs_field_to_offset(field);
 	char *p = ((char *) get_vmcs12(vcpu)) + offset;
 	if (offset < 0)
-		return false;
+		return offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_U16:
 		*(u16 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_U32:
 		*(u32 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_U64:
 		*(u64 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 		*(natural_width *)p = field_value;
-		return true;
+		return 0;
 	default:
-		return false; /* can never happen. */
+		WARN_ON(1);
+		return -ENOENT;
 	}
 
 }
@@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 		case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 			field_value = vmcs_readl(field);
 			break;
+		default:
+			WARN_ON(1);
+			continue;
 		}
 		vmcs12_write_any(&vmx->vcpu, field, field_value);
 	}
@@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 			case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 				vmcs_writel(field, (long)field_value);
 				break;
+			default:
+				WARN_ON(1);
+				break;
 			}
 		}
 	}
@@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 	/* Decode instruction info and find the field to read */
 	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
 	/* Read the field, zero-extended to a u64 field_value */
-	if (!vmcs12_read_any(vcpu, field, &field_value)) {
+	if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	if (!vmcs12_write_any(vcpu, field, field_value)) {
+	if (vmcs12_write_any(vcpu, field, field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
+	[EXIT_REASON_XSAVES]                  = handle_xsaves,
+	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
 		return 1;
+	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
+		/*
+		 * This should never happen, since it is not possible to
+		 * set XSS to a non-zero value---neither in L1 nor in L2.
+		 * If if it were, XSS would have to be checked against
+		 * the XSS exit bitmap in vmcs12.
+		 */
+		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 	default:
 		return 1;
 	}
@@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector)
 	u16 status;
 	u8 old;
 
+	if (vector == -1)
+		vector = 0;
+
 	status = vmcs_read16(GUEST_INTR_STATUS);
 	old = (u8)status & 0xff;
 	if ((u8)vector != old) {
@@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector)
 
 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 {
+	if (!is_guest_mode(vcpu)) {
+		vmx_set_rvi(max_irr);
+		return;
+	}
+
 	if (max_irr == -1)
 		return;
 
 	/*
-	 * If a vmexit is needed, vmx_check_nested_events handles it.
+	 * In guest mode.  If a vmexit is needed, vmx_check_nested_events
+	 * handles it.
 	 */
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+	if (nested_exit_on_intr(vcpu))
 		return;
 
-	if (!is_guest_mode(vcpu)) {
-		vmx_set_rvi(max_irr);
-		return;
-	}
-
 	/*
-	 * Fall back to pre-APICv interrupt injection since L2
+	 * Else, fall back to pre-APICv interrupt injection since L2
 	 * is run without virtual interrupt delivery.
 	 */
 	if (!kvm_event_needs_reinjection(vcpu) &&
@@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void)
 		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
 }
 
+static bool vmx_xsaves_supported(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_XSAVES;
+}
+
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
 	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
 
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
 	exec_control = vmcs12->pin_based_vm_exec_control;
@@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
 	if (vmx_mpx_supported())
 		vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
 
 	/* update exit information fields: */
 
@@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.check_intercept = vmx_check_intercept,
 	.handle_external_intr = vmx_handle_external_intr,
 	.mpx_supported = vmx_mpx_supported,
+	.xsaves_supported = vmx_xsaves_supported,
 
 	.check_nested_events = vmx_check_nested_events,
 
@@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 static int __init vmx_init(void)
 {
-	int r, i, msr;
-
-	rdmsrl_safe(MSR_EFER, &host_efer);
-
-	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
-		kvm_define_shared_msr(i, vmx_msr_index[i]);
-
-	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_a)
-		return -ENOMEM;
-
-	r = -ENOMEM;
-
-	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_b)
-		goto out;
-
-	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy)
-		goto out1;
-
-	vmx_msr_bitmap_legacy_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy_x2apic)
-		goto out2;
-
-	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode)
-		goto out3;
-
-	vmx_msr_bitmap_longmode_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode_x2apic)
-		goto out4;
-	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmread_bitmap)
-		goto out5;
-
-	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmwrite_bitmap)
-		goto out6;
-
-	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
-	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
-
-	/*
-	 * Allow direct access to the PC debug port (it is often used for I/O
-	 * delays, but the vmexits simply slow things down).
-	 */
-	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-	clear_bit(0x80, vmx_io_bitmap_a);
-
-	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
-	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
-	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
-
-	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-		     __alignof__(struct vcpu_vmx), THIS_MODULE);
+	int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+                     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out7;
+		return r;
 
 #ifdef CONFIG_KEXEC
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
 			   crash_vmclear_local_loaded_vmcss);
 #endif
 
-	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
-	memcpy(vmx_msr_bitmap_legacy_x2apic,
-			vmx_msr_bitmap_legacy, PAGE_SIZE);
-	memcpy(vmx_msr_bitmap_longmode_x2apic,
-			vmx_msr_bitmap_longmode, PAGE_SIZE);
-
-	if (enable_apicv) {
-		for (msr = 0x800; msr <= 0x8ff; msr++)
-			vmx_disable_intercept_msr_read_x2apic(msr);
-
-		/* According SDM, in x2apic mode, the whole id reg is used.
-		 * But in KVM, it only use the highest eight bits. Need to
-		 * intercept it */
-		vmx_enable_intercept_msr_read_x2apic(0x802);
-		/* TMCCT */
-		vmx_enable_intercept_msr_read_x2apic(0x839);
-		/* TPR */
-		vmx_disable_intercept_msr_write_x2apic(0x808);
-		/* EOI */
-		vmx_disable_intercept_msr_write_x2apic(0x80b);
-		/* SELF-IPI */
-		vmx_disable_intercept_msr_write_x2apic(0x83f);
-	}
-
-	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-			0ull, VMX_EPT_EXECUTABLE_MASK);
-		ept_set_mmio_spte_mask();
-		kvm_enable_tdp();
-	} else
-		kvm_disable_tdp();
-
-	update_ple_window_actual_max();
-
 	return 0;
-
-out7:
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
-	free_page((unsigned long)vmx_vmread_bitmap);
-out5:
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-out3:
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
-	free_page((unsigned long)vmx_io_bitmap_b);
-out:
-	free_page((unsigned long)vmx_io_bitmap_a);
-	return r;
 }
 
 static void __exit vmx_exit(void)
 {
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-	free_page((unsigned long)vmx_io_bitmap_b);
-	free_page((unsigned long)vmx_io_bitmap_a);
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-	free_page((unsigned long)vmx_vmread_bitmap);
-
 #ifdef CONFIG_KEXEC
 	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
 	synchronize_rcu();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df3..c259814 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,6 +27,7 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 #include "cpuid.h"
+#include "assigned-dev.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -353,6 +354,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 
 	if (!vcpu->arch.exception.pending) {
 	queue:
+		if (has_error && !is_protmode(vcpu))
+			has_error = false;
 		vcpu->arch.exception.pending = true;
 		vcpu->arch.exception.has_error_code = has_error;
 		vcpu->arch.exception.nr = nr;
@@ -455,6 +458,16 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
 }
 EXPORT_SYMBOL_GPL(kvm_require_cpl);
 
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
+{
+	if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+		return true;
+
+	kvm_queue_exception(vcpu, UD_VECTOR);
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_dr);
+
 /*
  * This function will be used to read from the physical memory of the currently
  * running guest. The difference to kvm_read_guest_page is that this function
@@ -656,6 +669,12 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 	if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
 		return 1;
 
+	if (xcr0 & XSTATE_AVX512) {
+		if (!(xcr0 & XSTATE_YMM))
+			return 1;
+		if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
+			return 1;
+	}
 	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
 
@@ -732,6 +751,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+#ifdef CONFIG_X86_64
+	cr3 &= ~CR3_PCID_INVD;
+#endif
+
 	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
 		kvm_mmu_sync_roots(vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -811,8 +834,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 			vcpu->arch.eff_db[dr] = val;
 		break;
 	case 4:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1; /* #UD */
 		/* fall through */
 	case 6:
 		if (val & 0xffffffff00000000ULL)
@@ -821,8 +842,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 		kvm_update_dr6(vcpu);
 		break;
 	case 5:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1; /* #UD */
 		/* fall through */
 	default: /* 7 */
 		if (val & 0xffffffff00000000ULL)
@@ -837,27 +856,21 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 {
-	int res;
-
-	res = __kvm_set_dr(vcpu, dr, val);
-	if (res > 0)
-		kvm_queue_exception(vcpu, UD_VECTOR);
-	else if (res < 0)
+	if (__kvm_set_dr(vcpu, dr, val)) {
 		kvm_inject_gp(vcpu, 0);
-
-	return res;
+		return 1;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_dr);
 
-static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 {
 	switch (dr) {
 	case 0 ... 3:
 		*val = vcpu->arch.db[dr];
 		break;
 	case 4:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1;
 		/* fall through */
 	case 6:
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
@@ -866,23 +879,11 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 			*val = kvm_x86_ops->get_dr6(vcpu);
 		break;
 	case 5:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1;
 		/* fall through */
 	default: /* 7 */
 		*val = vcpu->arch.dr7;
 		break;
 	}
-
-	return 0;
-}
-
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
-{
-	if (_kvm_get_dr(vcpu, dr, val)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
-	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1237,21 +1238,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
 	bool vcpus_matched;
-	bool do_request = false;
 	struct kvm_arch *ka = &vcpu->kvm->arch;
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 
 	vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
 			 atomic_read(&vcpu->kvm->online_vcpus));
 
-	if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
-		if (!ka->use_master_clock)
-			do_request = 1;
-
-	if (!vcpus_matched && ka->use_master_clock)
-			do_request = 1;
-
-	if (do_request)
+	/*
+	 * Once the masterclock is enabled, always perform request in
+	 * order to update it.
+	 *
+	 * In order to enable masterclock, the host clocksource must be TSC
+	 * and the vcpus need to have matched TSCs.  When that happens,
+	 * perform request to enable masterclock.
+	 */
+	if (ka->use_master_clock ||
+	    (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
 		kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 
 	trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1637,16 +1639,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
 	vcpu->last_guest_tsc = tsc_timestamp;
 
+	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+		&guest_hv_clock, sizeof(guest_hv_clock))))
+		return 0;
+
 	/*
 	 * The interface expects us to write an even number signaling that the
 	 * update is finished. Since the guest won't see the intermediate
 	 * state, we just increase by 2 at the end.
 	 */
-	vcpu->hv_clock.version += 2;
-
-	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
-		&guest_hv_clock, sizeof(guest_hv_clock))))
-		return 0;
+	vcpu->hv_clock.version = guest_hv_clock.version + 2;
 
 	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
 	pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1662,6 +1664,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	vcpu->hv_clock.flags = pvclock_flags;
 
+	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+
 	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
 				&vcpu->hv_clock,
 				sizeof(vcpu->hv_clock));
@@ -2140,7 +2144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC_ADJUST:
 		if (guest_cpuid_has_tsc_adjust(vcpu)) {
 			if (!msr_info->host_initiated) {
-				u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
 				kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
 			}
 			vcpu->arch.ia32_tsc_adjust_msr = data;
@@ -3106,7 +3110,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
 	unsigned long val;
 
 	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-	_kvm_get_dr(vcpu, 6, &val);
+	kvm_get_dr(vcpu, 6, &val);
 	dbgregs->dr6 = val;
 	dbgregs->dr7 = vcpu->arch.dr7;
 	dbgregs->flags = 0;
@@ -3128,15 +3132,89 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
+
+static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
+{
+	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+	u64 valid;
+
+	/*
+	 * Copy legacy XSAVE area, to avoid complications with CPUID
+	 * leaves 0 and 1 in the loop below.
+	 */
+	memcpy(dest, xsave, XSAVE_HDR_OFFSET);
+
+	/* Set XSTATE_BV */
+	*(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
+
+	/*
+	 * Copy each region from the possibly compacted offset to the
+	 * non-compacted offset.
+	 */
+	valid = xstate_bv & ~XSTATE_FPSSE;
+	while (valid) {
+		u64 feature = valid & -valid;
+		int index = fls64(feature) - 1;
+		void *src = get_xsave_addr(xsave, feature);
+
+		if (src) {
+			u32 size, offset, ecx, edx;
+			cpuid_count(XSTATE_CPUID, index,
+				    &size, &offset, &ecx, &edx);
+			memcpy(dest + offset, src, size);
+		}
+
+		valid -= feature;
+	}
+}
+
+static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
+{
+	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
+	u64 valid;
+
+	/*
+	 * Copy legacy XSAVE area, to avoid complications with CPUID
+	 * leaves 0 and 1 in the loop below.
+	 */
+	memcpy(xsave, src, XSAVE_HDR_OFFSET);
+
+	/* Set XSTATE_BV and possibly XCOMP_BV.  */
+	xsave->xsave_hdr.xstate_bv = xstate_bv;
+	if (cpu_has_xsaves)
+		xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+
+	/*
+	 * Copy each region from the non-compacted offset to the
+	 * possibly compacted offset.
+	 */
+	valid = xstate_bv & ~XSTATE_FPSSE;
+	while (valid) {
+		u64 feature = valid & -valid;
+		int index = fls64(feature) - 1;
+		void *dest = get_xsave_addr(xsave, feature);
+
+		if (dest) {
+			u32 size, offset, ecx, edx;
+			cpuid_count(XSTATE_CPUID, index,
+				    &size, &offset, &ecx, &edx);
+			memcpy(dest, src + offset, size);
+		} else
+			WARN_ON_ONCE(1);
+
+		valid -= feature;
+	}
+}
+
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 					 struct kvm_xsave *guest_xsave)
 {
 	if (cpu_has_xsave) {
-		memcpy(guest_xsave->region,
-			&vcpu->arch.guest_fpu.state->xsave,
-			vcpu->arch.guest_xstate_size);
-		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
-			vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+		memset(guest_xsave, 0, sizeof(struct kvm_xsave));
+		fill_xsave((u8 *) guest_xsave->region, vcpu);
 	} else {
 		memcpy(guest_xsave->region,
 			&vcpu->arch.guest_fpu.state->fxsave,
@@ -3160,8 +3238,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 		 */
 		if (xstate_bv & ~kvm_supported_xcr0())
 			return -EINVAL;
-		memcpy(&vcpu->arch.guest_fpu.state->xsave,
-			guest_xsave->region, vcpu->arch.guest_xstate_size);
+		load_xsave(vcpu, (u8 *)guest_xsave->region);
 	} else {
 		if (xstate_bv & ~XSTATE_FPSSE)
 			return -EINVAL;
@@ -4004,7 +4081,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 
 	default:
-		;
+		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;
@@ -4667,7 +4744,7 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 {
-	return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+	return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
@@ -5211,21 +5288,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
 
 static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 {
-	struct kvm_run *kvm_run = vcpu->run;
-	unsigned long eip = vcpu->arch.emulate_ctxt.eip;
-	u32 dr6 = 0;
-
 	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
 	    (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
-		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+		struct kvm_run *kvm_run = vcpu->run;
+		unsigned long eip = kvm_get_linear_rip(vcpu);
+		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
 					   vcpu->arch.guest_debug_dr7,
 					   vcpu->arch.eff_db);
 
 		if (dr6 != 0) {
 			kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
-			kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
-				get_segment_base(vcpu, VCPU_SREG_CS);
-
+			kvm_run->debug.arch.pc = eip;
 			kvm_run->debug.arch.exception = DB_VECTOR;
 			kvm_run->exit_reason = KVM_EXIT_DEBUG;
 			*r = EMULATE_USER_EXIT;
@@ -5235,7 +5308,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 
 	if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
 	    !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
-		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+		unsigned long eip = kvm_get_linear_rip(vcpu);
+		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
 					   vcpu->arch.dr7,
 					   vcpu->arch.db);
 
@@ -5365,7 +5439,9 @@ restart:
 		kvm_rip_write(vcpu, ctxt->eip);
 		if (r == EMULATE_DONE)
 			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
-		__kvm_set_rflags(vcpu, ctxt->eflags);
+		if (!ctxt->have_exception ||
+		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
+			__kvm_set_rflags(vcpu, ctxt->eflags);
 
 		/*
 		 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@ -5965,6 +6041,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
 					     X86_EFLAGS_RF);
 
+		if (vcpu->arch.exception.nr == DB_VECTOR &&
+		    (vcpu->arch.dr7 & DR7_GD)) {
+			vcpu->arch.dr7 &= ~DR7_GD;
+			kvm_update_dr7(vcpu);
+		}
+
 		kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
 					  vcpu->arch.exception.has_error_code,
 					  vcpu->arch.exception.error_code,
@@ -6873,6 +6955,9 @@ int fx_init(struct kvm_vcpu *vcpu)
 		return err;
 
 	fpu_finit(&vcpu->arch.guest_fpu);
+	if (cpu_has_xsaves)
+		vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 	/*
 	 * Ensure guest xcr0 is valid for loading
@@ -7024,7 +7109,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 {
 	struct kvm_segment cs;
 
@@ -7256,6 +7341,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (type)
 		return -EINVAL;
 
+	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -7536,12 +7622,18 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 	return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
-bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
 {
-	unsigned long current_rip = kvm_rip_read(vcpu) +
-		get_segment_base(vcpu, VCPU_SREG_CS);
+	if (is_64_bit_mode(vcpu))
+		return kvm_rip_read(vcpu);
+	return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
+		     kvm_rip_read(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
 
-	return current_rip == linear_rip;
+bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+{
+	return kvm_get_linear_rip(vcpu) == linear_rip;
 }
 EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7cb9c45..cc1d61a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -162,7 +162,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
 #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
-				| XSTATE_BNDREGS | XSTATE_BNDCSR)
+				| XSTATE_BNDREGS | XSTATE_BNDCSR \
+				| XSTATE_AVX512)
 extern u64 host_xcr0;
 
 extern u64 kvm_supported_xcr0(void);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index aae9413..c1c1544 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -841,7 +841,7 @@ static void __init lguest_init_IRQ(void)
 {
 	unsigned int i;
 
-	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+	for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
 		if (i != SYSCALL_VECTOR)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b74a7e1..38dcec4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1247,7 +1247,7 @@ good_area:
 		}
 
 		/* User mode? Just return to handle the fatal exception */
-		if (fault & FAULT_FLAG_USER)
+		if (flags & FAULT_FLAG_USER)
 			return;
 
 		/* Not returning to user mode? Handle exceptions or die: */
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index b9958c3..44b9271 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -210,6 +210,9 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 {
 	int polarity;
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
 		polarity = 0; /* active high */
 	else
@@ -224,13 +227,18 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
 		return -EBUSY;
 
+	dev->irq_managed = 1;
+
 	return 0;
 }
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (!mp_should_keep_irq(&dev->dev) && dev->irq > 0)
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
+		dev->irq_managed = 0;
+	}
 }
 
 struct pci_ops intel_mid_pci_ops = {
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index eb500c2..5dc6ca5 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1200,11 +1200,12 @@ static int pirq_enable_irq(struct pci_dev *dev)
 #ifdef CONFIG_X86_IO_APIC
 			struct pci_dev *temp_dev;
 			int irq;
-			struct io_apic_irq_attr irq_attr;
+
+			if (dev->irq_managed && dev->irq > 0)
+				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-						PCI_SLOT(dev->devfn),
-						pin - 1, &irq_attr);
+						PCI_SLOT(dev->devfn), pin - 1);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1218,7 +1219,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
 						PCI_SLOT(bridge->devfn),
-						pin - 1, &irq_attr);
+						pin - 1);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
@@ -1228,6 +1229,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				dev->irq_managed = 1;
 				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
@@ -1254,11 +1256,24 @@ static int pirq_enable_irq(struct pci_dev *dev)
 	return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef CONFIG_PM
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
 	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
-	    dev->irq) {
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
 		dev->irq = 0;
+		dev->irq_managed = 0;
 	}
 }
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index b233681..0ce6736 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 		       unsigned long mmr_offset, int limit)
 {
 	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode, err;
@@ -198,13 +198,13 @@ static int
 uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
 		    bool force)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	unsigned int dest;
 	unsigned long mmr_value, mmr_offset;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
+	if (apic_set_affinity(data, mask, &dest))
 		return -1;
 
 	mmr_value = 0;
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index 7556e7c..9b693d5 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -305,60 +305,6 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
 	 */
 
 	/*
-	 * Lenovo has a mix of systems OSI(Linux) situations
-	 * and thus we can not wildcard the vendor.
-	 *
-	 * _OSI(Linux) helps sound
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad R61"),
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T61"),
-	 * T400, T500
-	 * _OSI(Linux) has Linux specific hooks
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X61"),
-	 * _OSI(Linux) is a NOP:
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "3000 N100"),
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "LENOVO3000 V100"),
-	 */
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad R61",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad R61"),
-		},
-	},
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad T61",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T61"),
-		},
-	},
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad X61",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X61"),
-		},
-	},
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad T400",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T400"),
-		},
-	},
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad T500",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T500"),
-		},
-	},
-	/*
 	 * Without this this EEEpc exports a non working WMI interface, with
 	 * this it exports a working "good old" eeepc_laptop interface, fixing
 	 * both brightness control, and rfkill not working.
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 8976401..c2daa85 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -680,13 +680,21 @@ static int acpi_device_wakeup(struct acpi_device *adev, u32 target_state,
 		if (error)
 			return error;
 
+		if (adev->wakeup.flags.enabled)
+			return 0;
+
 		res = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number);
-		if (ACPI_FAILURE(res)) {
+		if (ACPI_SUCCESS(res)) {
+			adev->wakeup.flags.enabled = 1;
+		} else {
 			acpi_disable_wakeup_device_power(adev);
 			return -EIO;
 		}
 	} else {
-		acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number);
+		if (adev->wakeup.flags.enabled) {
+			acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number);
+			adev->wakeup.flags.enabled = 0;
+		}
 		acpi_disable_wakeup_device_power(adev);
 	}
 	return 0;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 5f9b74b..1b5853f 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -844,6 +844,8 @@ static int ec_install_handlers(struct acpi_ec *ec)
 
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
+	if (!test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
+		return;
 	acpi_disable_gpe(NULL, ec->gpe);
 	if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
 				ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index caf9b76..7a36f02 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -325,6 +325,7 @@ static int acpi_fan_probe(struct platform_device *pdev)
 	struct thermal_cooling_device *cdev;
 	struct acpi_fan *fan;
 	struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+	char *name;
 
 	fan = devm_kzalloc(&pdev->dev, sizeof(*fan), GFP_KERNEL);
 	if (!fan) {
@@ -346,7 +347,12 @@ static int acpi_fan_probe(struct platform_device *pdev)
 		}
 	}
 
-	cdev = thermal_cooling_device_register("Fan", device,
+	if (!strncmp(pdev->name, "PNP0C0B", strlen("PNP0C0B")))
+		name = "Fan";
+	else
+		name = acpi_device_bid(device);
+
+	cdev = thermal_cooling_device_register(name, device,
 						&fan_cooling_ops);
 	if (IS_ERR(cdev)) {
 		result = PTR_ERR(cdev);
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 7cc4e33..5277a0e 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -413,6 +413,9 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		return 0;
 	}
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	entry = acpi_pci_irq_lookup(dev, pin);
 	if (!entry) {
 		/*
@@ -456,6 +459,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		return rc;
 	}
 	dev->irq = rc;
+	dev->irq_managed = 1;
 
 	if (link)
 		snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -478,7 +482,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	u8 pin;
 
 	pin = dev->pin;
-	if (!pin)
+	if (!pin || !dev->irq_managed || dev->irq <= 0)
 		return;
 
 	/* Keep IOAPIC pin configuration when suspending */
@@ -506,6 +510,9 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	 */
 
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
-	if (gsi >= 0 && dev->irq > 0)
+	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
+		dev->irq = 0;
+		dev->irq_managed = 0;
+	}
 }
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index ef58f46..342942f 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -125,13 +125,12 @@ static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
 	}
 
 	header = (struct acpi_subtable_header *)obj->buffer.pointer;
-	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
+	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
 		map_lapic_id(header, acpi_id, &apic_id);
-	} else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
+	else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
 		map_lsapic_id(header, type, acpi_id, &apic_id);
-	} else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
+	else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
 		map_x2apic_id(header, type, acpi_id, &apic_id);
-	}
 
 exit:
 	kfree(buffer.pointer);
@@ -164,7 +163,7 @@ int acpi_map_cpuid(int apic_id, u32 acpi_id)
 		 * For example,
 		 *
 		 * Scope (_PR)
-                 * {
+		 * {
 		 *     Processor (CPU0, 0x00, 0x00000410, 0x06) {}
 		 *     Processor (CPU1, 0x01, 0x00000410, 0x06) {}
 		 *     Processor (CPU2, 0x02, 0x00000410, 0x06) {}
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 2ba8f02..782a0d1 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -200,7 +200,7 @@ bool acpi_dev_resource_address_space(struct acpi_resource *ares,
 
 	status = acpi_resource_to_address64(ares, &addr);
 	if (ACPI_FAILURE(status))
-		return true;
+		return false;
 
 	res->start = addr.minimum;
 	res->end = addr.maximum;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 1b1cf55..16914cc 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2214,7 +2214,7 @@ static void acpi_device_dep_initialize(struct acpi_device *adev)
 	status = acpi_evaluate_reference(adev->handle, "_DEP", NULL,
 					&dep_devices);
 	if (ACPI_FAILURE(status)) {
-		dev_err(&adev->dev, "Failed to evaluate _DEP.\n");
+		dev_dbg(&adev->dev, "Failed to evaluate _DEP.\n");
 		return;
 	}
 
@@ -2224,7 +2224,7 @@ static void acpi_device_dep_initialize(struct acpi_device *adev)
 
 		status = acpi_get_object_info(dep_devices.handles[i], &info);
 		if (ACPI_FAILURE(status)) {
-			dev_err(&adev->dev, "Error reading device info\n");
+			dev_dbg(&adev->dev, "Error reading _DEP device info\n");
 			continue;
 		}
 
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index dd8ff63..cd49a39 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -346,22 +346,16 @@ acpi_evaluate_reference(acpi_handle handle,
 	package = buffer.pointer;
 
 	if ((buffer.length == 0) || !package) {
-		printk(KERN_ERR PREFIX "No return object (len %X ptr %p)\n",
-			    (unsigned)buffer.length, package);
 		status = AE_BAD_DATA;
 		acpi_util_eval_error(handle, pathname, status);
 		goto end;
 	}
 	if (package->type != ACPI_TYPE_PACKAGE) {
-		printk(KERN_ERR PREFIX "Expecting a [Package], found type %X\n",
-			    package->type);
 		status = AE_BAD_DATA;
 		acpi_util_eval_error(handle, pathname, status);
 		goto end;
 	}
 	if (!package->package.count) {
-		printk(KERN_ERR PREFIX "[Package] has zero elements (%p)\n",
-			    package);
 		status = AE_BAD_DATA;
 		acpi_util_eval_error(handle, pathname, status);
 		goto end;
@@ -380,17 +374,13 @@ acpi_evaluate_reference(acpi_handle handle,
 
 		if (element->type != ACPI_TYPE_LOCAL_REFERENCE) {
 			status = AE_BAD_DATA;
-			printk(KERN_ERR PREFIX
-				    "Expecting a [Reference] package element, found type %X\n",
-				    element->type);
 			acpi_util_eval_error(handle, pathname, status);
 			break;
 		}
 
 		if (!element->reference.handle) {
-			printk(KERN_WARNING PREFIX "Invalid reference in"
-			       " package %s\n", pathname);
 			status = AE_NULL_ENTRY;
+			acpi_util_eval_error(handle, pathname, status);
 			break;
 		}
 		/* Get the  acpi_handle. */
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 185a57d..1eaadff 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -155,6 +155,7 @@ struct acpi_video_bus {
 	u8 dos_setting;
 	struct acpi_video_enumerated_device *attached_array;
 	u8 attached_count;
+	u8 child_count;
 	struct acpi_video_bus_cap cap;
 	struct acpi_video_bus_flags flags;
 	struct list_head video_device_list;
@@ -1159,8 +1160,12 @@ static bool acpi_video_device_in_dod(struct acpi_video_device *device)
 	struct acpi_video_bus *video = device->video;
 	int i;
 
-	/* If we have a broken _DOD, no need to test */
-	if (!video->attached_count)
+	/*
+	 * If we have a broken _DOD or we have more than 8 output devices
+	 * under the graphics controller node that we can't proper deal with
+	 * in the operation region code currently, no need to test.
+	 */
+	if (!video->attached_count || video->child_count > 8)
 		return true;
 
 	for (i = 0; i < video->attached_count; i++) {
@@ -1413,6 +1418,7 @@ acpi_video_bus_get_devices(struct acpi_video_bus *video,
 			dev_err(&dev->dev, "Can't attach device\n");
 			break;
 		}
+		video->child_count++;
 	}
 	return status;
 }
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index cd4cccb..a3a1360 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -61,7 +61,7 @@ config ATA_ACPI
 
 config SATA_ZPODD
 	bool "SATA Zero Power Optical Disc Drive (ZPODD) support"
-	depends on ATA_ACPI && PM_RUNTIME
+	depends on ATA_ACPI && PM
 	default n
 	help
 	  This option adds support for SATA Zero Power Optical Disc
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 2d195f3..d24dd614a 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -84,7 +84,11 @@ struct dev_pm_opp {
  *
  * This is an internal data structure maintaining the link to opps attached to
  * a device. This structure is not meant to be shared to users as it is
- * meant for book keeping and private to OPP library
+ * meant for book keeping and private to OPP library.
+ *
+ * Because the opp structures can be used from both rcu and srcu readers, we
+ * need to wait for the grace period of both of them before freeing any
+ * resources. And so we have used kfree_rcu() from within call_srcu() handlers.
  */
 struct device_opp {
 	struct list_head node;
@@ -382,12 +386,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
+static struct device_opp *add_device_opp(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	/*
+	 * Allocate a new device OPP table. In the infrequent case where a new
+	 * device is needed to be added, we pay this penalty.
+	 */
+	dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL);
+	if (!dev_opp)
+		return NULL;
+
+	dev_opp->dev = dev;
+	srcu_init_notifier_head(&dev_opp->srcu_head);
+	INIT_LIST_HEAD(&dev_opp->opp_list);
+
+	/* Secure the device list modification */
+	list_add_rcu(&dev_opp->node, &dev_opp_list);
+	return dev_opp;
+}
+
 static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
 				  unsigned long u_volt, bool dynamic)
 {
 	struct device_opp *dev_opp = NULL;
 	struct dev_pm_opp *opp, *new_opp;
 	struct list_head *head;
+	int ret;
 
 	/* allocate new OPP node */
 	new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL);
@@ -400,7 +426,6 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
 	mutex_lock(&dev_opp_list_lock);
 
 	/* populate the opp table */
-	new_opp->dev_opp = dev_opp;
 	new_opp->rate = freq;
 	new_opp->u_volt = u_volt;
 	new_opp->available = true;
@@ -409,27 +434,12 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
 	/* Check for existing list for 'dev' */
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
-		/*
-		 * Allocate a new device OPP table. In the infrequent case
-		 * where a new device is needed to be added, we pay this
-		 * penalty.
-		 */
-		dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL);
+		dev_opp = add_device_opp(dev);
 		if (!dev_opp) {
-			mutex_unlock(&dev_opp_list_lock);
-			kfree(new_opp);
-			dev_warn(dev,
-				"%s: Unable to create device OPP structure\n",
-				__func__);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto free_opp;
 		}
 
-		dev_opp->dev = dev;
-		srcu_init_notifier_head(&dev_opp->srcu_head);
-		INIT_LIST_HEAD(&dev_opp->opp_list);
-
-		/* Secure the device list modification */
-		list_add_rcu(&dev_opp->node, &dev_opp_list);
 		head = &dev_opp->opp_list;
 		goto list_add;
 	}
@@ -448,18 +458,17 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
 
 	/* Duplicate OPPs ? */
 	if (new_opp->rate == opp->rate) {
-		int ret = opp->available && new_opp->u_volt == opp->u_volt ?
+		ret = opp->available && new_opp->u_volt == opp->u_volt ?
 			0 : -EEXIST;
 
 		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
 			 __func__, opp->rate, opp->u_volt, opp->available,
 			 new_opp->rate, new_opp->u_volt, new_opp->available);
-		mutex_unlock(&dev_opp_list_lock);
-		kfree(new_opp);
-		return ret;
+		goto free_opp;
 	}
 
 list_add:
+	new_opp->dev_opp = dev_opp;
 	list_add_rcu(&new_opp->node, head);
 	mutex_unlock(&dev_opp_list_lock);
 
@@ -469,6 +478,11 @@ list_add:
 	 */
 	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
 	return 0;
+
+free_opp:
+	mutex_unlock(&dev_opp_list_lock);
+	kfree(new_opp);
+	return ret;
 }
 
 /**
@@ -511,10 +525,11 @@ static void kfree_device_rcu(struct rcu_head *head)
 {
 	struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
 
-	kfree(device_opp);
+	kfree_rcu(device_opp, rcu_head);
 }
 
-void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp)
+static void __dev_pm_opp_remove(struct device_opp *dev_opp,
+				struct dev_pm_opp *opp)
 {
 	/*
 	 * Notify the changes in the availability of the operable
@@ -592,7 +607,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
 static int opp_set_availability(struct device *dev, unsigned long freq,
 		bool availability_req)
 {
-	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
+	struct device_opp *dev_opp;
 	struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
 	int r = 0;
 
@@ -606,12 +621,7 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
 	mutex_lock(&dev_opp_list_lock);
 
 	/* Find the device_opp */
-	list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) {
-		if (dev == tmp_dev_opp->dev) {
-			dev_opp = tmp_dev_opp;
-			break;
-		}
-	}
+	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
 		r = PTR_ERR(dev_opp);
 		dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
@@ -768,7 +778,7 @@ EXPORT_SYMBOL_GPL(of_init_opp_table);
  */
 void of_free_opp_table(struct device *dev)
 {
-	struct device_opp *dev_opp = find_device_opp(dev);
+	struct device_opp *dev_opp;
 	struct dev_pm_opp *opp, *tmp;
 
 	/* Check for existing list for 'dev' */
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 27b71a0..3ec85df 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2370,8 +2370,12 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
 		opcode = CEPH_OSD_OP_READ;
 	}
 
-	osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length,
-				0, 0);
+	if (opcode == CEPH_OSD_OP_DELETE)
+		osd_req_op_init(osd_request, num_ops, opcode);
+	else
+		osd_req_op_extent_init(osd_request, num_ops, opcode,
+				       offset, length, 0, 0);
+
 	if (obj_request->type == OBJ_REQUEST_BIO)
 		osd_req_op_extent_osd_data_bio(osd_request, num_ops,
 					obj_request->bio_list, length);
@@ -3405,8 +3409,7 @@ err_rq:
 	if (result)
 		rbd_warn(rbd_dev, "%s %llx at %llx result %d",
 			 obj_op_name(op_type), length, offset, result);
-	if (snapc)
-		ceph_put_snap_context(snapc);
+	ceph_put_snap_context(snapc);
 	blk_end_request_all(rq, result);
 }
 
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index fce7588..1ee27ac 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -87,6 +87,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x04CA, 0x3007) },
 	{ USB_DEVICE(0x04CA, 0x3008) },
 	{ USB_DEVICE(0x04CA, 0x300b) },
+	{ USB_DEVICE(0x04CA, 0x3010) },
 	{ USB_DEVICE(0x0930, 0x0219) },
 	{ USB_DEVICE(0x0930, 0x0220) },
 	{ USB_DEVICE(0x0930, 0x0227) },
@@ -140,6 +141,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 31dd24a..19cf2cf 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -167,6 +167,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 1405b39..742eefb 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -199,7 +199,14 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
 
 	pid->integral += fp_error;
 
-	/* limit the integral term */
+	/*
+	 * We limit the integral here so that it will never
+	 * get higher than 30.  This prevents it from becoming
+	 * too large an input over long periods of time and allows
+	 * it to get factored out sooner.
+	 *
+	 * The value of 30 was chosen through experimentation.
+	 */
 	integral_limit = int_tofp(30);
 	if (pid->integral > integral_limit)
 		pid->integral = integral_limit;
@@ -616,6 +623,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	if (limits.no_turbo || limits.turbo_disabled)
 		max_perf = cpu->pstate.max_pstate;
 
+	/*
+	 * performance can be limited by user through sysfs, by cpufreq
+	 * policy, or by cpu specific default values determined through
+	 * experimentation.
+	 */
 	max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
 	*max = clamp_t(int, max_perf_adj,
 			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
@@ -717,11 +729,29 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
 	u32 duration_us;
 	u32 sample_time;
 
+	/*
+	 * core_busy is the ratio of actual performance to max
+	 * max_pstate is the max non turbo pstate available
+	 * current_pstate was the pstate that was requested during
+	 * 	the last sample period.
+	 *
+	 * We normalize core_busy, which was our actual percent
+	 * performance to what we requested during the last sample
+	 * period. The result will be a percentage of busy at a
+	 * specified pstate.
+	 */
 	core_busy = cpu->sample.core_pct_busy;
 	max_pstate = int_tofp(cpu->pstate.max_pstate);
 	current_pstate = int_tofp(cpu->pstate.current_pstate);
 	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
+	/*
+	 * Since we have a deferred timer, it will not fire unless
+	 * we are in C0.  So, determine if the actual elapsed time
+	 * is significantly greater (3x) than our sample interval.  If it
+	 * is, then we were idle for a long enough period of time
+	 * to adjust our busyness.
+	 */
 	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
 	duration_us = (u32) ktime_us_delta(cpu->sample.time,
 					   cpu->last_sample_time);
@@ -948,6 +978,7 @@ static struct cpufreq_driver intel_pstate_driver = {
 
 static int __initdata no_load;
 static int __initdata no_hwp;
+static unsigned int force_load;
 
 static int intel_pstate_msrs_not_valid(void)
 {
@@ -1094,7 +1125,8 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void)
 			case PSS:
 				return intel_pstate_no_acpi_pss();
 			case PPC:
-				return intel_pstate_has_acpi_ppc();
+				return intel_pstate_has_acpi_ppc() &&
+					(!force_load);
 			}
 	}
 
@@ -1175,6 +1207,8 @@ static int __init intel_pstate_setup(char *str)
 		no_load = 1;
 	if (!strcmp(str, "no_hwp"))
 		no_hwp = 1;
+	if (!strcmp(str, "force"))
+		force_load = 1;
 	return 0;
 }
 early_param("intel_pstate", intel_pstate_setup);
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index c913906..0f6b229 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -1,5 +1,5 @@
 /*
- *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
+ *  (C) 2001-2004  Dave Jones.
  *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -1008,7 +1008,7 @@ MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
 module_param(enable, int, 0644);
 MODULE_PARM_DESC(enable, "Enable driver");
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");
 MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index f910272..e6f24b2 100644
--- a/drivers/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c
@@ -300,7 +300,7 @@ static void __exit powernow_k6_exit(void)
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones, "
 		"Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index e61e224..37c5742 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -1,7 +1,6 @@
 /*
  *  AMD K7 Powernow driver.
  *  (C) 2003 Dave Jones on behalf of SuSE Labs.
- *  (C) 2003-2004 Dave Jones <davej@redhat.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
  *  Based upon datasheets & sample CPUs kindly provided by AMD.
@@ -701,7 +700,7 @@ static void __exit powernow_exit(void)
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");
 MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 1a07b59..e56d632 100644
--- a/drivers/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -378,8 +378,7 @@ static void __exit speedstep_exit(void)
 }
 
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
-		"Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Dave Jones, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
 		"with ICH-M southbridges.");
 MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index e9248bb..aedec09 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -16,13 +16,10 @@
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
+#include <asm/opal.h>
 #include <asm/runlatch.h>
 
-/* Flags and constants used in PowerNV platform */
-
 #define MAX_POWERNV_IDLE_STATES	8
-#define IDLE_USE_INST_NAP	0x00010000 /* Use nap instruction */
-#define IDLE_USE_INST_SLEEP	0x00020000 /* Use sleep instruction */
 
 struct cpuidle_driver powernv_idle_driver = {
 	.name             = "powernv_idle",
@@ -197,7 +194,7 @@ static int powernv_add_idle_states(void)
 		 * target residency to be 10x exit_latency
 		 */
 		latency_ns = be32_to_cpu(idle_state_latency[i]);
-		if (flags & IDLE_USE_INST_NAP) {
+		if (flags & OPAL_PM_NAP_ENABLED) {
 			/* Add NAP state */
 			strcpy(powernv_states[nr_idle_states].name, "Nap");
 			strcpy(powernv_states[nr_idle_states].desc, "Nap");
@@ -210,7 +207,8 @@ static int powernv_add_idle_states(void)
 			nr_idle_states++;
 		}
 
-		if (flags & IDLE_USE_INST_SLEEP) {
+		if (flags & OPAL_PM_SLEEP_ENABLED ||
+			flags & OPAL_PM_SLEEP_ENABLED_ER1) {
 			/* Add FASTSLEEP state */
 			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
 			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 6753fd9..fe41d5a 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -177,6 +177,10 @@ static struct attribute *lm75_attrs[] = {
 };
 ATTRIBUTE_GROUPS(lm75);
 
+static const struct thermal_zone_of_device_ops lm75_of_thermal_ops = {
+	.get_temp = lm75_read_temp,
+};
+
 /*-----------------------------------------------------------------------*/
 
 /* device probe and removal */
@@ -296,10 +300,9 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (IS_ERR(data->hwmon_dev))
 		return PTR_ERR(data->hwmon_dev);
 
-	data->tz = thermal_zone_of_sensor_register(data->hwmon_dev,
-						   0,
+	data->tz = thermal_zone_of_sensor_register(data->hwmon_dev, 0,
 						   data->hwmon_dev,
-						   lm75_read_temp, NULL);
+						   &lm75_of_thermal_ops);
 	if (IS_ERR(data->tz))
 		data->tz = NULL;
 
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index fd9a945..112e4d4 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -486,6 +486,10 @@ static const struct attribute_group ntc_attr_group = {
 	.attrs = ntc_attributes,
 };
 
+static const struct thermal_zone_of_device_ops ntc_of_thermal_ops = {
+	.get_temp = ntc_read_temp,
+};
+
 static int ntc_thermistor_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id =
@@ -579,7 +583,7 @@ static int ntc_thermistor_probe(struct platform_device *pdev)
 								pdev_id->name);
 
 	data->tz = thermal_zone_of_sensor_register(data->dev, 0, data->dev,
-						ntc_read_temp, NULL);
+						   &ntc_of_thermal_ops);
 	if (IS_ERR(data->tz)) {
 		dev_dbg(&pdev->dev, "Failed to register to thermal fw.\n");
 		data->tz = NULL;
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index 5171995..ba9f478 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -158,6 +158,10 @@ ATTRIBUTE_GROUPS(tmp102);
 #define TMP102_CONFIG  (TMP102_CONF_TM | TMP102_CONF_EM | TMP102_CONF_CR1)
 #define TMP102_CONFIG_RD_ONLY (TMP102_CONF_R0 | TMP102_CONF_R1 | TMP102_CONF_AL)
 
+static const struct thermal_zone_of_device_ops tmp102_of_thermal_ops = {
+	.get_temp = tmp102_read_temp,
+};
+
 static int tmp102_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
@@ -215,7 +219,7 @@ static int tmp102_probe(struct i2c_client *client,
 	}
 	tmp102->hwmon_dev = hwmon_dev;
 	tmp102->tz = thermal_zone_of_sensor_register(hwmon_dev, 0, hwmon_dev,
-						     tmp102_read_temp, NULL);
+						     &tmp102_of_thermal_ops);
 	if (IS_ERR(tmp102->tz))
 		tmp102->tz = NULL;
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index f08dd20..31e8308 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1073,4 +1073,15 @@ config SCx200_ACB
 	  This support is also available as a module.  If so, the module
 	  will be called scx200_acb.
 
+config I2C_OPAL
+	tristate "IBM OPAL I2C driver"
+	depends on PPC_POWERNV
+	default y
+	help
+	  This exposes the PowerNV platform i2c busses to the linux i2c layer,
+	  the driver is based on the OPAL interfaces.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called as i2c-opal.
+
 endmenu
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 5e6c822..56388f6 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_I2C_ACORN)		+= i2c-acorn.o
 obj-$(CONFIG_I2C_BCM_KONA)	+= i2c-bcm-kona.o
 obj-$(CONFIG_I2C_CROS_EC_TUNNEL)	+= i2c-cros-ec-tunnel.o
 obj-$(CONFIG_I2C_ELEKTOR)	+= i2c-elektor.o
+obj-$(CONFIG_I2C_OPAL)		+= i2c-opal.o
 obj-$(CONFIG_I2C_PCA_ISA)	+= i2c-pca-isa.o
 obj-$(CONFIG_I2C_SIBYTE)	+= i2c-sibyte.o
 obj-$(CONFIG_SCx200_ACB)	+= scx200_acb.o
diff --git a/drivers/i2c/busses/i2c-opal.c b/drivers/i2c/busses/i2c-opal.c
new file mode 100644
index 0000000..16f90b1
--- /dev/null
+++ b/drivers/i2c/busses/i2c-opal.c
@@ -0,0 +1,294 @@
+/*
+ * IBM OPAL I2C driver
+ * Copyright (C) 2014 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ */
+
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/firmware.h>
+#include <asm/opal.h>
+
+static int i2c_opal_translate_error(int rc)
+{
+	switch (rc) {
+	case OPAL_NO_MEM:
+		return -ENOMEM;
+	case OPAL_PARAMETER:
+		return -EINVAL;
+	case OPAL_I2C_ARBT_LOST:
+		return -EAGAIN;
+	case OPAL_I2C_TIMEOUT:
+		return -ETIMEDOUT;
+	case OPAL_I2C_NACK_RCVD:
+		return -ENXIO;
+	case OPAL_I2C_STOP_ERR:
+		return -EBUSY;
+	default:
+		return -EIO;
+	}
+}
+
+static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req)
+{
+	struct opal_msg msg;
+	int token, rc;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("Failed to get the async token\n");
+
+		return token;
+	}
+
+	rc = opal_i2c_request(token, bus_id, req);
+	if (rc != OPAL_ASYNC_COMPLETION) {
+		rc = i2c_opal_translate_error(rc);
+		goto exit;
+	}
+
+	rc = opal_async_wait_response(token, &msg);
+	if (rc)
+		goto exit;
+
+	rc = be64_to_cpu(msg.params[1]);
+	if (rc != OPAL_SUCCESS) {
+		rc = i2c_opal_translate_error(rc);
+		goto exit;
+	}
+
+exit:
+	opal_async_release_token(token);
+	return rc;
+}
+
+static int i2c_opal_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+				int num)
+{
+	unsigned long opal_id = (unsigned long)adap->algo_data;
+	struct opal_i2c_request req;
+	int rc, i;
+
+	/* We only support fairly simple combinations here of one
+	 * or two messages
+	 */
+	memset(&req, 0, sizeof(req));
+	switch(num) {
+	case 0:
+		return 0;
+	case 1:
+		req.type = (msgs[0].flags & I2C_M_RD) ?
+			OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
+		req.addr = cpu_to_be16(msgs[0].addr);
+		req.size = cpu_to_be32(msgs[0].len);
+		req.buffer_ra = cpu_to_be64(__pa(msgs[0].buf));
+		break;
+	case 2:
+		/* For two messages, we basically support only simple
+		 * smbus transactions of a write plus a read. We might
+		 * want to allow also two writes but we'd have to bounce
+		 * the data into a single buffer.
+		 */
+		if ((msgs[0].flags & I2C_M_RD) || !(msgs[1].flags & I2C_M_RD))
+			return -EOPNOTSUPP;
+		if (msgs[0].len > 4)
+			return -EOPNOTSUPP;
+		if (msgs[0].addr != msgs[1].addr)
+			return -EOPNOTSUPP;
+		req.type = OPAL_I2C_SM_READ;
+		req.addr = cpu_to_be16(msgs[0].addr);
+		req.subaddr_sz = msgs[0].len;
+		for (i = 0; i < msgs[0].len; i++)
+			req.subaddr = (req.subaddr << 8) | msgs[0].buf[i];
+		req.subaddr = cpu_to_be32(req.subaddr);
+		req.size = cpu_to_be32(msgs[1].len);
+		req.buffer_ra = cpu_to_be64(__pa(msgs[1].buf));
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	rc = i2c_opal_send_request(opal_id, &req);
+	if (rc)
+		return rc;
+
+	return num;
+}
+
+static int i2c_opal_smbus_xfer(struct i2c_adapter *adap, u16 addr,
+			       unsigned short flags, char read_write,
+			       u8 command, int size, union i2c_smbus_data *data)
+{
+	unsigned long opal_id = (unsigned long)adap->algo_data;
+	struct opal_i2c_request req;
+	u8 local[2];
+	int rc;
+
+	memset(&req, 0, sizeof(req));
+
+	req.addr = cpu_to_be16(addr);
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+		req.buffer_ra = cpu_to_be64(__pa(&data->byte));
+		req.size = cpu_to_be32(1);
+		/* Fall through */
+	case I2C_SMBUS_QUICK:
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		req.buffer_ra = cpu_to_be64(__pa(&data->byte));
+		req.size = cpu_to_be32(1);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		if (!read_write) {
+			local[0] = data->word & 0xff;
+			local[1] = (data->word >> 8) & 0xff;
+		}
+		req.buffer_ra = cpu_to_be64(__pa(local));
+		req.size = cpu_to_be32(2);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		req.buffer_ra = cpu_to_be64(__pa(&data->block[1]));
+		req.size = cpu_to_be32(data->block[0]);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = i2c_opal_send_request(opal_id, &req);
+	if (!rc && read_write && size == I2C_SMBUS_WORD_DATA) {
+		data->word = ((u16)local[1]) << 8;
+		data->word |= local[0];
+	}
+
+	return rc;
+}
+
+static u32 i2c_opal_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+	       I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+	       I2C_FUNC_SMBUS_I2C_BLOCK;
+}
+
+static const struct i2c_algorithm i2c_opal_algo = {
+	.master_xfer	= i2c_opal_master_xfer,
+	.smbus_xfer	= i2c_opal_smbus_xfer,
+	.functionality	= i2c_opal_func,
+};
+
+static int i2c_opal_probe(struct platform_device *pdev)
+{
+	struct i2c_adapter	*adapter;
+	const char		*pname;
+	u32			opal_id;
+	int			rc;
+
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
+	rc = of_property_read_u32(pdev->dev.of_node, "ibm,opal-id", &opal_id);
+	if (rc) {
+		dev_err(&pdev->dev, "Missing ibm,opal-id property !\n");
+		return -EIO;
+	}
+
+	adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	adapter->algo = &i2c_opal_algo;
+	adapter->algo_data = (void *)(unsigned long)opal_id;
+	adapter->dev.parent = &pdev->dev;
+	adapter->dev.of_node = of_node_get(pdev->dev.of_node);
+	pname = of_get_property(pdev->dev.of_node, "ibm,port-name", NULL);
+	if (pname)
+		strlcpy(adapter->name, pname, sizeof(adapter->name));
+	else
+		strlcpy(adapter->name, "opal", sizeof(adapter->name));
+
+	platform_set_drvdata(pdev, adapter);
+	rc = i2c_add_adapter(adapter);
+	if (rc)
+		dev_err(&pdev->dev, "Failed to register the i2c adapter\n");
+
+	return rc;
+}
+
+static int i2c_opal_remove(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+
+	i2c_del_adapter(adapter);
+
+	return 0;
+}
+
+static const struct of_device_id i2c_opal_of_match[] = {
+	{
+		.compatible = "ibm,opal-i2c",
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, i2c_opal_of_match);
+
+static struct platform_driver i2c_opal_driver = {
+	.probe	= i2c_opal_probe,
+	.remove	= i2c_opal_remove,
+	.driver	= {
+		.name		= "i2c-opal",
+		.of_match_table	= i2c_opal_of_match,
+	},
+};
+
+static int __init i2c_opal_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return -ENODEV;
+
+	return platform_driver_register(&i2c_opal_driver);
+}
+module_init(i2c_opal_init);
+
+static void __exit i2c_opal_exit(void)
+{
+	return platform_driver_unregister(&i2c_opal_driver);
+}
+module_exit(i2c_opal_exit);
+
+MODULE_AUTHOR("Neelesh Gupta <neelegup@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM OPAL I2C driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 7708939..b899531 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -38,6 +38,17 @@ config INFINIBAND_USER_MEM
 	depends on INFINIBAND_USER_ACCESS != n
 	default y
 
+config INFINIBAND_ON_DEMAND_PAGING
+	bool "InfiniBand on-demand paging support"
+	depends on INFINIBAND_USER_MEM
+	select MMU_NOTIFIER
+	default y
+	---help---
+	  On demand paging support for the InfiniBand subsystem.
+	  Together with driver support this allows registration of
+	  memory regions without pinning their pages, fetching the
+	  pages on demand instead.
+
 config INFINIBAND_ADDR_TRANS
 	bool
 	depends on INFINIBAND
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index ffd0af6..acf7367 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
 ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
 				device.o fmr_pool.o cache.o netlink.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
 
 ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o
 
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 8172d37..f80da50 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -176,8 +176,8 @@ static void set_timeout(unsigned long time)
 	unsigned long delay;
 
 	delay = time - jiffies;
-	if ((long)delay <= 0)
-		delay = 1;
+	if ((long)delay < 0)
+		delay = 0;
 
 	mod_delayed_work(addr_wq, &work, delay);
 }
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d2360a8..fa17b55 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -525,17 +525,22 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
 	if (status)
 		process_join_error(group, status);
 	else {
+		int mgids_changed, is_mgid0;
 		ib_find_pkey(group->port->dev->device, group->port->port_num,
 			     be16_to_cpu(rec->pkey), &pkey_index);
 
 		spin_lock_irq(&group->port->lock);
-		group->rec = *rec;
 		if (group->state == MCAST_BUSY &&
 		    group->pkey_index == MCAST_INVALID_PKEY_INDEX)
 			group->pkey_index = pkey_index;
-		if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+		mgids_changed = memcmp(&rec->mgid, &group->rec.mgid,
+				       sizeof(group->rec.mgid));
+		group->rec = *rec;
+		if (mgids_changed) {
 			rb_erase(&group->node, &group->port->table);
-			mcast_insert(group->port, group, 1);
+			is_mgid0 = !memcmp(&mgid0, &group->rec.mgid,
+					   sizeof(mgid0));
+			mcast_insert(group->port, group, is_mgid0);
 		}
 		spin_unlock_irq(&group->port->lock);
 	}
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index df0c4f6..aec7a6a 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,6 +39,7 @@
 #include <linux/hugetlb.h>
 #include <linux/dma-attrs.h>
 #include <linux/slab.h>
+#include <rdma/ib_umem_odp.h>
 
 #include "uverbs.h"
 
@@ -69,6 +70,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 
 /**
  * ib_umem_get - Pin and DMA map userspace memory.
+ *
+ * If access flags indicate ODP memory, avoid pinning. Instead, stores
+ * the mm for future page fault handling in conjunction with MMU notifiers.
+ *
  * @context: userspace context to pin memory for
  * @addr: userspace virtual address to start at
  * @size: length of region to pin
@@ -103,17 +108,30 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	umem->context   = context;
 	umem->length    = size;
-	umem->offset    = addr & ~PAGE_MASK;
+	umem->address   = addr;
 	umem->page_size = PAGE_SIZE;
 	umem->pid       = get_task_pid(current, PIDTYPE_PID);
 	/*
-	 * We ask for writable memory if any access flags other than
-	 * "remote read" are set.  "Local write" and "remote write"
+	 * We ask for writable memory if any of the following
+	 * access flags are set.  "Local write" and "remote write"
 	 * obviously require write access.  "Remote atomic" can do
 	 * things like fetch and add, which will modify memory, and
 	 * "MW bind" can change permissions by binding a window.
 	 */
-	umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
+	umem->writable  = !!(access &
+		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
+		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
+
+	if (access & IB_ACCESS_ON_DEMAND) {
+		ret = ib_umem_odp_get(context, umem);
+		if (ret) {
+			kfree(umem);
+			return ERR_PTR(ret);
+		}
+		return umem;
+	}
+
+	umem->odp_data = NULL;
 
 	/* We assume the memory is from hugetlb until proved otherwise */
 	umem->hugetlb   = 1;
@@ -132,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	if (!vma_list)
 		umem->hugetlb = 0;
 
-	npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
+	npages = ib_umem_num_pages(umem);
 
 	down_write(&current->mm->mmap_sem);
 
@@ -235,6 +253,11 @@ void ib_umem_release(struct ib_umem *umem)
 	struct task_struct *task;
 	unsigned long diff;
 
+	if (umem->odp_data) {
+		ib_umem_odp_release(umem);
+		return;
+	}
+
 	__ib_umem_release(umem->context->device, umem, 1);
 
 	task = get_pid_task(umem->pid, PIDTYPE_PID);
@@ -246,7 +269,7 @@ void ib_umem_release(struct ib_umem *umem)
 	if (!mm)
 		goto out;
 
-	diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+	diff = ib_umem_num_pages(umem);
 
 	/*
 	 * We may be called with the mm's mmap_sem already held.  This
@@ -283,6 +306,9 @@ int ib_umem_page_count(struct ib_umem *umem)
 	int n;
 	struct scatterlist *sg;
 
+	if (umem->odp_data)
+		return ib_umem_num_pages(umem);
+
 	shift = ilog2(umem->page_size);
 
 	n = 0;
@@ -292,3 +318,37 @@ int ib_umem_page_count(struct ib_umem *umem)
 	return n;
 }
 EXPORT_SYMBOL(ib_umem_page_count);
+
+/*
+ * Copy from the given ib_umem's pages to the given buffer.
+ *
+ * umem - the umem to copy from
+ * offset - offset to start copying from
+ * dst - destination buffer
+ * length - buffer length
+ *
+ * Returns 0 on success, or an error code.
+ */
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      size_t length)
+{
+	size_t end = offset + length;
+	int ret;
+
+	if (offset > umem->length || length > umem->length - offset) {
+		pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
+		       offset, umem->length, end);
+		return -EINVAL;
+	}
+
+	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
+				 offset + ib_umem_offset(umem));
+
+	if (ret < 0)
+		return ret;
+	else if (ret != length)
+		return -EINVAL;
+	else
+		return 0;
+}
+EXPORT_SYMBOL(ib_umem_copy_from);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
new file mode 100644
index 0000000..6095872
--- /dev/null
+++ b/drivers/infiniband/core/umem_odp.c
@@ -0,0 +1,668 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/vmalloc.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+
+static void ib_umem_notifier_start_account(struct ib_umem *item)
+{
+	mutex_lock(&item->odp_data->umem_mutex);
+
+	/* Only update private counters for this umem if it has them.
+	 * Otherwise skip it. All page faults will be delayed for this umem. */
+	if (item->odp_data->mn_counters_active) {
+		int notifiers_count = item->odp_data->notifiers_count++;
+
+		if (notifiers_count == 0)
+			/* Initialize the completion object for waiting on
+			 * notifiers. Since notifier_count is zero, no one
+			 * should be waiting right now. */
+			reinit_completion(&item->odp_data->notifier_completion);
+	}
+	mutex_unlock(&item->odp_data->umem_mutex);
+}
+
+static void ib_umem_notifier_end_account(struct ib_umem *item)
+{
+	mutex_lock(&item->odp_data->umem_mutex);
+
+	/* Only update private counters for this umem if it has them.
+	 * Otherwise skip it. All page faults will be delayed for this umem. */
+	if (item->odp_data->mn_counters_active) {
+		/*
+		 * This sequence increase will notify the QP page fault that
+		 * the page that is going to be mapped in the spte could have
+		 * been freed.
+		 */
+		++item->odp_data->notifiers_seq;
+		if (--item->odp_data->notifiers_count == 0)
+			complete_all(&item->odp_data->notifier_completion);
+	}
+	mutex_unlock(&item->odp_data->umem_mutex);
+}
+
+/* Account for a new mmu notifier in an ib_ucontext. */
+static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
+{
+	atomic_inc(&context->notifier_count);
+}
+
+/* Account for a terminating mmu notifier in an ib_ucontext.
+ *
+ * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
+ * the function takes the semaphore itself. */
+static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
+{
+	int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
+
+	if (zero_notifiers &&
+	    !list_empty(&context->no_private_counters)) {
+		/* No currently running mmu notifiers. Now is the chance to
+		 * add private accounting to all previously added umems. */
+		struct ib_umem_odp *odp_data, *next;
+
+		/* Prevent concurrent mmu notifiers from working on the
+		 * no_private_counters list. */
+		down_write(&context->umem_rwsem);
+
+		/* Read the notifier_count again, with the umem_rwsem
+		 * semaphore taken for write. */
+		if (!atomic_read(&context->notifier_count)) {
+			list_for_each_entry_safe(odp_data, next,
+						 &context->no_private_counters,
+						 no_private_counters) {
+				mutex_lock(&odp_data->umem_mutex);
+				odp_data->mn_counters_active = true;
+				list_del(&odp_data->no_private_counters);
+				complete_all(&odp_data->notifier_completion);
+				mutex_unlock(&odp_data->umem_mutex);
+			}
+		}
+
+		up_write(&context->umem_rwsem);
+	}
+}
+
+static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
+					       u64 end, void *cookie) {
+	/*
+	 * Increase the number of notifiers running, to
+	 * prevent any further fault handling on this MR.
+	 */
+	ib_umem_notifier_start_account(item);
+	item->odp_data->dying = 1;
+	/* Make sure that the fact the umem is dying is out before we release
+	 * all pending page faults. */
+	smp_wmb();
+	complete_all(&item->odp_data->notifier_completion);
+	item->context->invalidate_range(item, ib_umem_start(item),
+					ib_umem_end(item));
+	return 0;
+}
+
+static void ib_umem_notifier_release(struct mmu_notifier *mn,
+				     struct mm_struct *mm)
+{
+	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+	if (!context->invalidate_range)
+		return;
+
+	ib_ucontext_notifier_start_account(context);
+	down_read(&context->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
+				      ULLONG_MAX,
+				      ib_umem_notifier_release_trampoline,
+				      NULL);
+	up_read(&context->umem_rwsem);
+}
+
+static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
+				      u64 end, void *cookie)
+{
+	ib_umem_notifier_start_account(item);
+	item->context->invalidate_range(item, start, start + PAGE_SIZE);
+	ib_umem_notifier_end_account(item);
+	return 0;
+}
+
+static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn,
+					     struct mm_struct *mm,
+					     unsigned long address)
+{
+	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+	if (!context->invalidate_range)
+		return;
+
+	ib_ucontext_notifier_start_account(context);
+	down_read(&context->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&context->umem_tree, address,
+				      address + PAGE_SIZE,
+				      invalidate_page_trampoline, NULL);
+	up_read(&context->umem_rwsem);
+	ib_ucontext_notifier_end_account(context);
+}
+
+static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
+					     u64 end, void *cookie)
+{
+	ib_umem_notifier_start_account(item);
+	item->context->invalidate_range(item, start, end);
+	return 0;
+}
+
+static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
+						    struct mm_struct *mm,
+						    unsigned long start,
+						    unsigned long end)
+{
+	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+	if (!context->invalidate_range)
+		return;
+
+	ib_ucontext_notifier_start_account(context);
+	down_read(&context->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+				      end,
+				      invalidate_range_start_trampoline, NULL);
+	up_read(&context->umem_rwsem);
+}
+
+static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
+					   u64 end, void *cookie)
+{
+	ib_umem_notifier_end_account(item);
+	return 0;
+}
+
+static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
+						  struct mm_struct *mm,
+						  unsigned long start,
+						  unsigned long end)
+{
+	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+	if (!context->invalidate_range)
+		return;
+
+	down_read(&context->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+				      end,
+				      invalidate_range_end_trampoline, NULL);
+	up_read(&context->umem_rwsem);
+	ib_ucontext_notifier_end_account(context);
+}
+
+static struct mmu_notifier_ops ib_umem_notifiers = {
+	.release                    = ib_umem_notifier_release,
+	.invalidate_page            = ib_umem_notifier_invalidate_page,
+	.invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
+	.invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
+};
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+{
+	int ret_val;
+	struct pid *our_pid;
+	struct mm_struct *mm = get_task_mm(current);
+
+	if (!mm)
+		return -EINVAL;
+
+	/* Prevent creating ODP MRs in child processes */
+	rcu_read_lock();
+	our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+	rcu_read_unlock();
+	put_pid(our_pid);
+	if (context->tgid != our_pid) {
+		ret_val = -EINVAL;
+		goto out_mm;
+	}
+
+	umem->hugetlb = 0;
+	umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
+	if (!umem->odp_data) {
+		ret_val = -ENOMEM;
+		goto out_mm;
+	}
+	umem->odp_data->umem = umem;
+
+	mutex_init(&umem->odp_data->umem_mutex);
+
+	init_completion(&umem->odp_data->notifier_completion);
+
+	umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+					    sizeof(*umem->odp_data->page_list));
+	if (!umem->odp_data->page_list) {
+		ret_val = -ENOMEM;
+		goto out_odp_data;
+	}
+
+	umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+					  sizeof(*umem->odp_data->dma_list));
+	if (!umem->odp_data->dma_list) {
+		ret_val = -ENOMEM;
+		goto out_page_list;
+	}
+
+	/*
+	 * When using MMU notifiers, we will get a
+	 * notification before the "current" task (and MM) is
+	 * destroyed. We use the umem_rwsem semaphore to synchronize.
+	 */
+	down_write(&context->umem_rwsem);
+	context->odp_mrs_count++;
+	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+		rbt_ib_umem_insert(&umem->odp_data->interval_tree,
+				   &context->umem_tree);
+	if (likely(!atomic_read(&context->notifier_count)))
+		umem->odp_data->mn_counters_active = true;
+	else
+		list_add(&umem->odp_data->no_private_counters,
+			 &context->no_private_counters);
+	downgrade_write(&context->umem_rwsem);
+
+	if (context->odp_mrs_count == 1) {
+		/*
+		 * Note that at this point, no MMU notifier is running
+		 * for this context!
+		 */
+		atomic_set(&context->notifier_count, 0);
+		INIT_HLIST_NODE(&context->mn.hlist);
+		context->mn.ops = &ib_umem_notifiers;
+		/*
+		 * Lock-dep detects a false positive for mmap_sem vs.
+		 * umem_rwsem, due to not grasping downgrade_write correctly.
+		 */
+		lockdep_off();
+		ret_val = mmu_notifier_register(&context->mn, mm);
+		lockdep_on();
+		if (ret_val) {
+			pr_err("Failed to register mmu_notifier %d\n", ret_val);
+			ret_val = -EBUSY;
+			goto out_mutex;
+		}
+	}
+
+	up_read(&context->umem_rwsem);
+
+	/*
+	 * Note that doing an mmput can cause a notifier for the relevant mm.
+	 * If the notifier is called while we hold the umem_rwsem, this will
+	 * cause a deadlock. Therefore, we release the reference only after we
+	 * released the semaphore.
+	 */
+	mmput(mm);
+	return 0;
+
+out_mutex:
+	up_read(&context->umem_rwsem);
+	vfree(umem->odp_data->dma_list);
+out_page_list:
+	vfree(umem->odp_data->page_list);
+out_odp_data:
+	kfree(umem->odp_data);
+out_mm:
+	mmput(mm);
+	return ret_val;
+}
+
+void ib_umem_odp_release(struct ib_umem *umem)
+{
+	struct ib_ucontext *context = umem->context;
+
+	/*
+	 * Ensure that no more pages are mapped in the umem.
+	 *
+	 * It is the driver's responsibility to ensure, before calling us,
+	 * that the hardware will not attempt to access the MR any more.
+	 */
+	ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
+				    ib_umem_end(umem));
+
+	down_write(&context->umem_rwsem);
+	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+		rbt_ib_umem_remove(&umem->odp_data->interval_tree,
+				   &context->umem_tree);
+	context->odp_mrs_count--;
+	if (!umem->odp_data->mn_counters_active) {
+		list_del(&umem->odp_data->no_private_counters);
+		complete_all(&umem->odp_data->notifier_completion);
+	}
+
+	/*
+	 * Downgrade the lock to a read lock. This ensures that the notifiers
+	 * (who lock the mutex for reading) will be able to finish, and we
+	 * will be able to enventually obtain the mmu notifiers SRCU. Note
+	 * that since we are doing it atomically, no other user could register
+	 * and unregister while we do the check.
+	 */
+	downgrade_write(&context->umem_rwsem);
+	if (!context->odp_mrs_count) {
+		struct task_struct *owning_process = NULL;
+		struct mm_struct *owning_mm        = NULL;
+
+		owning_process = get_pid_task(context->tgid,
+					      PIDTYPE_PID);
+		if (owning_process == NULL)
+			/*
+			 * The process is already dead, notifier were removed
+			 * already.
+			 */
+			goto out;
+
+		owning_mm = get_task_mm(owning_process);
+		if (owning_mm == NULL)
+			/*
+			 * The process' mm is already dead, notifier were
+			 * removed already.
+			 */
+			goto out_put_task;
+		mmu_notifier_unregister(&context->mn, owning_mm);
+
+		mmput(owning_mm);
+
+out_put_task:
+		put_task_struct(owning_process);
+	}
+out:
+	up_read(&context->umem_rwsem);
+
+	vfree(umem->odp_data->dma_list);
+	vfree(umem->odp_data->page_list);
+	kfree(umem->odp_data);
+	kfree(umem);
+}
+
+/*
+ * Map for DMA and insert a single page into the on-demand paging page tables.
+ *
+ * @umem: the umem to insert the page to.
+ * @page_index: index in the umem to add the page to.
+ * @page: the page struct to map and add.
+ * @access_mask: access permissions needed for this page.
+ * @current_seq: sequence number for synchronization with invalidations.
+ *               the sequence number is taken from
+ *               umem->odp_data->notifiers_seq.
+ *
+ * The function returns -EFAULT if the DMA mapping operation fails. It returns
+ * -EAGAIN if a concurrent invalidation prevents us from updating the page.
+ *
+ * The page is released via put_page even if the operation failed. For
+ * on-demand pinning, the page is released whenever it isn't stored in the
+ * umem.
+ */
+static int ib_umem_odp_map_dma_single_page(
+		struct ib_umem *umem,
+		int page_index,
+		u64 base_virt_addr,
+		struct page *page,
+		u64 access_mask,
+		unsigned long current_seq)
+{
+	struct ib_device *dev = umem->context->device;
+	dma_addr_t dma_addr;
+	int stored_page = 0;
+	int remove_existing_mapping = 0;
+	int ret = 0;
+
+	mutex_lock(&umem->odp_data->umem_mutex);
+	/*
+	 * Note: we avoid writing if seq is different from the initial seq, to
+	 * handle case of a racing notifier. This check also allows us to bail
+	 * early if we have a notifier running in parallel with us.
+	 */
+	if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
+		ret = -EAGAIN;
+		goto out;
+	}
+	if (!(umem->odp_data->dma_list[page_index])) {
+		dma_addr = ib_dma_map_page(dev,
+					   page,
+					   0, PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		if (ib_dma_mapping_error(dev, dma_addr)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
+		umem->odp_data->page_list[page_index] = page;
+		stored_page = 1;
+	} else if (umem->odp_data->page_list[page_index] == page) {
+		umem->odp_data->dma_list[page_index] |= access_mask;
+	} else {
+		pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
+		       umem->odp_data->page_list[page_index], page);
+		/* Better remove the mapping now, to prevent any further
+		 * damage. */
+		remove_existing_mapping = 1;
+	}
+
+out:
+	mutex_unlock(&umem->odp_data->umem_mutex);
+
+	/* On Demand Paging - avoid pinning the page */
+	if (umem->context->invalidate_range || !stored_page)
+		put_page(page);
+
+	if (remove_existing_mapping && umem->context->invalidate_range) {
+		invalidate_page_trampoline(
+			umem,
+			base_virt_addr + (page_index * PAGE_SIZE),
+			base_virt_addr + ((page_index+1)*PAGE_SIZE),
+			NULL);
+		ret = -EAGAIN;
+	}
+
+	return ret;
+}
+
+/**
+ * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ *
+ * Pins the range of pages passed in the argument, and maps them to
+ * DMA addresses. The DMA addresses of the mapped pages is updated in
+ * umem->odp_data->dma_list.
+ *
+ * Returns the number of pages mapped in success, negative error code
+ * for failure.
+ * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
+ * the function from completing its task.
+ *
+ * @umem: the umem to map and pin
+ * @user_virt: the address from which we need to map.
+ * @bcnt: the minimal number of bytes to pin and map. The mapping might be
+ *        bigger due to alignment, and may also be smaller in case of an error
+ *        pinning or mapping a page. The actual pages mapped is returned in
+ *        the return value.
+ * @access_mask: bit mask of the requested access permissions for the given
+ *               range.
+ * @current_seq: the MMU notifiers sequance value for synchronization with
+ *               invalidations. the sequance number is read from
+ *               umem->odp_data->notifiers_seq before calling this function
+ */
+int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
+			      u64 access_mask, unsigned long current_seq)
+{
+	struct task_struct *owning_process  = NULL;
+	struct mm_struct   *owning_mm       = NULL;
+	struct page       **local_page_list = NULL;
+	u64 off;
+	int j, k, ret = 0, start_idx, npages = 0;
+	u64 base_virt_addr;
+
+	if (access_mask == 0)
+		return -EINVAL;
+
+	if (user_virt < ib_umem_start(umem) ||
+	    user_virt + bcnt > ib_umem_end(umem))
+		return -EFAULT;
+
+	local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
+	if (!local_page_list)
+		return -ENOMEM;
+
+	off = user_virt & (~PAGE_MASK);
+	user_virt = user_virt & PAGE_MASK;
+	base_virt_addr = user_virt;
+	bcnt += off; /* Charge for the first page offset as well. */
+
+	owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
+	if (owning_process == NULL) {
+		ret = -EINVAL;
+		goto out_no_task;
+	}
+
+	owning_mm = get_task_mm(owning_process);
+	if (owning_mm == NULL) {
+		ret = -EINVAL;
+		goto out_put_task;
+	}
+
+	start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+	k = start_idx;
+
+	while (bcnt > 0) {
+		const size_t gup_num_pages =
+			min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
+			      PAGE_SIZE / sizeof(struct page *));
+
+		down_read(&owning_mm->mmap_sem);
+		/*
+		 * Note: this might result in redundent page getting. We can
+		 * avoid this by checking dma_list to be 0 before calling
+		 * get_user_pages. However, this make the code much more
+		 * complex (and doesn't gain us much performance in most use
+		 * cases).
+		 */
+		npages = get_user_pages(owning_process, owning_mm, user_virt,
+					gup_num_pages,
+					access_mask & ODP_WRITE_ALLOWED_BIT, 0,
+					local_page_list, NULL);
+		up_read(&owning_mm->mmap_sem);
+
+		if (npages < 0)
+			break;
+
+		bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
+		user_virt += npages << PAGE_SHIFT;
+		for (j = 0; j < npages; ++j) {
+			ret = ib_umem_odp_map_dma_single_page(
+				umem, k, base_virt_addr, local_page_list[j],
+				access_mask, current_seq);
+			if (ret < 0)
+				break;
+			k++;
+		}
+
+		if (ret < 0) {
+			/* Release left over pages when handling errors. */
+			for (++j; j < npages; ++j)
+				put_page(local_page_list[j]);
+			break;
+		}
+	}
+
+	if (ret >= 0) {
+		if (npages < 0 && k == start_idx)
+			ret = npages;
+		else
+			ret = k - start_idx;
+	}
+
+	mmput(owning_mm);
+out_put_task:
+	put_task_struct(owning_process);
+out_no_task:
+	free_page((unsigned long)local_page_list);
+	return ret;
+}
+EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
+				 u64 bound)
+{
+	int idx;
+	u64 addr;
+	struct ib_device *dev = umem->context->device;
+
+	virt  = max_t(u64, virt,  ib_umem_start(umem));
+	bound = min_t(u64, bound, ib_umem_end(umem));
+	/* Note that during the run of this function, the
+	 * notifiers_count of the MR is > 0, preventing any racing
+	 * faults from completion. We might be racing with other
+	 * invalidations, so we must make sure we free each page only
+	 * once. */
+	for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
+		idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+		mutex_lock(&umem->odp_data->umem_mutex);
+		if (umem->odp_data->page_list[idx]) {
+			struct page *page = umem->odp_data->page_list[idx];
+			struct page *head_page = compound_head(page);
+			dma_addr_t dma = umem->odp_data->dma_list[idx];
+			dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
+
+			WARN_ON(!dma_addr);
+
+			ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+					  DMA_BIDIRECTIONAL);
+			if (dma & ODP_WRITE_ALLOWED_BIT)
+				/*
+				 * set_page_dirty prefers being called with
+				 * the page lock. However, MMU notifiers are
+				 * called sometimes with and sometimes without
+				 * the lock. We rely on the umem_mutex instead
+				 * to prevent other mmu notifiers from
+				 * continuing and allowing the page mapping to
+				 * be removed.
+				 */
+				set_page_dirty(head_page);
+			/* on demand pinning support */
+			if (!umem->context->invalidate_range)
+				put_page(page);
+			umem->odp_data->page_list[idx] = NULL;
+			umem->odp_data->dma_list[idx] = 0;
+		}
+		mutex_unlock(&umem->odp_data->umem_mutex);
+	}
+}
+EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
new file mode 100644
index 0000000..727d788
--- /dev/null
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+#include <rdma/ib_umem_odp.h>
+
+/*
+ * The ib_umem list keeps track of memory regions for which the HW
+ * device request to receive notification when the related memory
+ * mapping is changed.
+ *
+ * ib_umem_lock protects the list.
+ */
+
+static inline u64 node_start(struct umem_odp_node *n)
+{
+	struct ib_umem_odp *umem_odp =
+			container_of(n, struct ib_umem_odp, interval_tree);
+
+	return ib_umem_start(umem_odp->umem);
+}
+
+/* Note that the representation of the intervals in the interval tree
+ * considers the ending point as contained in the interval, while the
+ * function ib_umem_end returns the first address which is not contained
+ * in the umem.
+ */
+static inline u64 node_last(struct umem_odp_node *n)
+{
+	struct ib_umem_odp *umem_odp =
+			container_of(n, struct ib_umem_odp, interval_tree);
+
+	return ib_umem_end(umem_odp->umem) - 1;
+}
+
+INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
+		     node_start, node_last, , rbt_ib_umem)
+
+/* @last is not a part of the interval. See comment for function
+ * node_last.
+ */
+int rbt_ib_umem_for_each_in_range(struct rb_root *root,
+				  u64 start, u64 last,
+				  umem_call_back cb,
+				  void *cookie)
+{
+	int ret_val = 0;
+	struct umem_odp_node *node;
+	struct ib_umem_odp *umem;
+
+	if (unlikely(start == last))
+		return ret_val;
+
+	for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
+			node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+		umem = container_of(node, struct ib_umem_odp, interval_tree);
+		ret_val = cb(umem->umem, start, last, cookie) || ret_val;
+	}
+
+	return ret_val;
+}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 643c08a..b716b08 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -258,5 +258,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd);
 
 IB_UVERBS_DECLARE_EX_CMD(create_flow);
 IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
+IB_UVERBS_DECLARE_EX_CMD(query_device);
 
 #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5ba2a86..532d8eba8 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -36,6 +36,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 
 #include <asm/uaccess.h>
 
@@ -288,6 +289,9 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	struct ib_uverbs_get_context_resp resp;
 	struct ib_udata                   udata;
 	struct ib_device                 *ibdev = file->device->ib_dev;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	struct ib_device_attr		  dev_attr;
+#endif
 	struct ib_ucontext		 *ucontext;
 	struct file			 *filp;
 	int ret;
@@ -325,8 +329,25 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	INIT_LIST_HEAD(&ucontext->ah_list);
 	INIT_LIST_HEAD(&ucontext->xrcd_list);
 	INIT_LIST_HEAD(&ucontext->rule_list);
+	rcu_read_lock();
+	ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+	rcu_read_unlock();
 	ucontext->closing = 0;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	ucontext->umem_tree = RB_ROOT;
+	init_rwsem(&ucontext->umem_rwsem);
+	ucontext->odp_mrs_count = 0;
+	INIT_LIST_HEAD(&ucontext->no_private_counters);
+
+	ret = ib_query_device(ibdev, &dev_attr);
+	if (ret)
+		goto err_free;
+	if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+		ucontext->invalidate_range = NULL;
+
+#endif
+
 	resp.num_comp_vectors = file->device->num_comp_vectors;
 
 	ret = get_unused_fd_flags(O_CLOEXEC);
@@ -371,6 +392,7 @@ err_fd:
 	put_unused_fd(resp.async_fd);
 
 err_free:
+	put_pid(ucontext->tgid);
 	ibdev->dealloc_ucontext(ucontext);
 
 err:
@@ -378,6 +400,52 @@ err:
 	return ret;
 }
 
+static void copy_query_dev_fields(struct ib_uverbs_file *file,
+				  struct ib_uverbs_query_device_resp *resp,
+				  struct ib_device_attr *attr)
+{
+	resp->fw_ver		= attr->fw_ver;
+	resp->node_guid		= file->device->ib_dev->node_guid;
+	resp->sys_image_guid	= attr->sys_image_guid;
+	resp->max_mr_size	= attr->max_mr_size;
+	resp->page_size_cap	= attr->page_size_cap;
+	resp->vendor_id		= attr->vendor_id;
+	resp->vendor_part_id	= attr->vendor_part_id;
+	resp->hw_ver		= attr->hw_ver;
+	resp->max_qp		= attr->max_qp;
+	resp->max_qp_wr		= attr->max_qp_wr;
+	resp->device_cap_flags	= attr->device_cap_flags;
+	resp->max_sge		= attr->max_sge;
+	resp->max_sge_rd	= attr->max_sge_rd;
+	resp->max_cq		= attr->max_cq;
+	resp->max_cqe		= attr->max_cqe;
+	resp->max_mr		= attr->max_mr;
+	resp->max_pd		= attr->max_pd;
+	resp->max_qp_rd_atom	= attr->max_qp_rd_atom;
+	resp->max_ee_rd_atom	= attr->max_ee_rd_atom;
+	resp->max_res_rd_atom	= attr->max_res_rd_atom;
+	resp->max_qp_init_rd_atom	= attr->max_qp_init_rd_atom;
+	resp->max_ee_init_rd_atom	= attr->max_ee_init_rd_atom;
+	resp->atomic_cap		= attr->atomic_cap;
+	resp->max_ee			= attr->max_ee;
+	resp->max_rdd			= attr->max_rdd;
+	resp->max_mw			= attr->max_mw;
+	resp->max_raw_ipv6_qp		= attr->max_raw_ipv6_qp;
+	resp->max_raw_ethy_qp		= attr->max_raw_ethy_qp;
+	resp->max_mcast_grp		= attr->max_mcast_grp;
+	resp->max_mcast_qp_attach	= attr->max_mcast_qp_attach;
+	resp->max_total_mcast_qp_attach	= attr->max_total_mcast_qp_attach;
+	resp->max_ah			= attr->max_ah;
+	resp->max_fmr			= attr->max_fmr;
+	resp->max_map_per_fmr		= attr->max_map_per_fmr;
+	resp->max_srq			= attr->max_srq;
+	resp->max_srq_wr		= attr->max_srq_wr;
+	resp->max_srq_sge		= attr->max_srq_sge;
+	resp->max_pkeys			= attr->max_pkeys;
+	resp->local_ca_ack_delay	= attr->local_ca_ack_delay;
+	resp->phys_port_cnt		= file->device->ib_dev->phys_port_cnt;
+}
+
 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 			       const char __user *buf,
 			       int in_len, int out_len)
@@ -398,47 +466,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 		return ret;
 
 	memset(&resp, 0, sizeof resp);
-
-	resp.fw_ver 		       = attr.fw_ver;
-	resp.node_guid 		       = file->device->ib_dev->node_guid;
-	resp.sys_image_guid 	       = attr.sys_image_guid;
-	resp.max_mr_size 	       = attr.max_mr_size;
-	resp.page_size_cap 	       = attr.page_size_cap;
-	resp.vendor_id 		       = attr.vendor_id;
-	resp.vendor_part_id 	       = attr.vendor_part_id;
-	resp.hw_ver 		       = attr.hw_ver;
-	resp.max_qp 		       = attr.max_qp;
-	resp.max_qp_wr 		       = attr.max_qp_wr;
-	resp.device_cap_flags 	       = attr.device_cap_flags;
-	resp.max_sge 		       = attr.max_sge;
-	resp.max_sge_rd 	       = attr.max_sge_rd;
-	resp.max_cq 		       = attr.max_cq;
-	resp.max_cqe 		       = attr.max_cqe;
-	resp.max_mr 		       = attr.max_mr;
-	resp.max_pd 		       = attr.max_pd;
-	resp.max_qp_rd_atom 	       = attr.max_qp_rd_atom;
-	resp.max_ee_rd_atom 	       = attr.max_ee_rd_atom;
-	resp.max_res_rd_atom 	       = attr.max_res_rd_atom;
-	resp.max_qp_init_rd_atom       = attr.max_qp_init_rd_atom;
-	resp.max_ee_init_rd_atom       = attr.max_ee_init_rd_atom;
-	resp.atomic_cap 	       = attr.atomic_cap;
-	resp.max_ee 		       = attr.max_ee;
-	resp.max_rdd 		       = attr.max_rdd;
-	resp.max_mw 		       = attr.max_mw;
-	resp.max_raw_ipv6_qp 	       = attr.max_raw_ipv6_qp;
-	resp.max_raw_ethy_qp 	       = attr.max_raw_ethy_qp;
-	resp.max_mcast_grp 	       = attr.max_mcast_grp;
-	resp.max_mcast_qp_attach       = attr.max_mcast_qp_attach;
-	resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
-	resp.max_ah 		       = attr.max_ah;
-	resp.max_fmr 		       = attr.max_fmr;
-	resp.max_map_per_fmr 	       = attr.max_map_per_fmr;
-	resp.max_srq 		       = attr.max_srq;
-	resp.max_srq_wr 	       = attr.max_srq_wr;
-	resp.max_srq_sge 	       = attr.max_srq_sge;
-	resp.max_pkeys 		       = attr.max_pkeys;
-	resp.local_ca_ack_delay        = attr.local_ca_ack_delay;
-	resp.phys_port_cnt	       = file->device->ib_dev->phys_port_cnt;
+	copy_query_dev_fields(file, &resp, &attr);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
@@ -947,6 +975,18 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 		goto err_free;
 	}
 
+	if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
+		struct ib_device_attr attr;
+
+		ret = ib_query_device(pd->device, &attr);
+		if (ret || !(attr.device_cap_flags &
+				IB_DEVICE_ON_DEMAND_PAGING)) {
+			pr_debug("ODP support not available\n");
+			ret = -EINVAL;
+			goto err_put;
+		}
+	}
+
 	mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
 				     cmd.access_flags, &udata);
 	if (IS_ERR(mr)) {
@@ -3253,3 +3293,52 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 
 	return ret ? ret : in_len;
 }
+
+int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
+			      struct ib_udata *ucore,
+			      struct ib_udata *uhw)
+{
+	struct ib_uverbs_ex_query_device_resp resp;
+	struct ib_uverbs_ex_query_device  cmd;
+	struct ib_device_attr attr;
+	struct ib_device *device;
+	int err;
+
+	device = file->device->ib_dev;
+	if (ucore->inlen < sizeof(cmd))
+		return -EINVAL;
+
+	err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+	if (err)
+		return err;
+
+	if (cmd.reserved)
+		return -EINVAL;
+
+	err = device->query_device(device, &attr);
+	if (err)
+		return err;
+
+	memset(&resp, 0, sizeof(resp));
+	copy_query_dev_fields(file, &resp.base, &attr);
+	resp.comp_mask = 0;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (cmd.comp_mask & IB_USER_VERBS_EX_QUERY_DEVICE_ODP) {
+		resp.odp_caps.general_caps = attr.odp_caps.general_caps;
+		resp.odp_caps.per_transport_caps.rc_odp_caps =
+			attr.odp_caps.per_transport_caps.rc_odp_caps;
+		resp.odp_caps.per_transport_caps.uc_odp_caps =
+			attr.odp_caps.per_transport_caps.uc_odp_caps;
+		resp.odp_caps.per_transport_caps.ud_odp_caps =
+			attr.odp_caps.per_transport_caps.ud_odp_caps;
+		resp.comp_mask |= IB_USER_VERBS_EX_QUERY_DEVICE_ODP;
+	}
+#endif
+
+	err = ib_copy_to_udata(ucore, &resp, sizeof(resp));
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 71ab83f..e6c23b9 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -122,7 +122,8 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
 				    struct ib_udata *ucore,
 				    struct ib_udata *uhw) = {
 	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
-	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow
+	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow,
+	[IB_USER_VERBS_EX_CMD_QUERY_DEVICE]	= ib_uverbs_ex_query_device
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
@@ -296,6 +297,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		kfree(uobj);
 	}
 
+	put_pid(context->tgid);
+
 	return context->device->dealloc_ucontext(context);
 }
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c2b89cc..f93eb8d 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -879,7 +879,8 @@ int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
 		if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
 			rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
 			rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
-			qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+			if (!(*qp_attr_mask & IB_QP_VID))
+				qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
 		} else {
 			ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
 					qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 2d5cbf4..bdf3507 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -476,7 +476,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 					 c2mr->umem->page_size,
 					 i,
 					 length,
-					 c2mr->umem->offset,
+					 ib_umem_offset(c2mr->umem),
 					 &kva,
 					 c2_convert_access(acc),
 					 c2mr);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 4b8c611..9edc200 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1640,7 +1640,8 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
 		__state_set(&ep->com, MPA_REQ_RCVD);
 
 		/* drive upcall */
-		mutex_lock(&ep->parent_ep->com.mutex);
+		mutex_lock_nested(&ep->parent_ep->com.mutex,
+				  SINGLE_DEPTH_NESTING);
 		if (ep->parent_ep->com.state != DEAD) {
 			if (connect_request_upcall(ep))
 				abort_connection(ep, skb, GFP_KERNEL);
@@ -3126,6 +3127,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 		err = c4iw_wait_for_reply(&ep->com.dev->rdev,
 					  &ep->com.wr_wait,
 					  0, 0, __func__);
+	else if (err > 0)
+		err = net_xmit_errno(err);
 	if (err)
 		pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
 		       err, ep->stid,
@@ -3159,6 +3162,8 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 			err = c4iw_wait_for_reply(&ep->com.dev->rdev,
 						  &ep->com.wr_wait,
 						  0, 0, __func__);
+		else if (err > 0)
+			err = net_xmit_errno(err);
 	}
 	if (err)
 		pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 72f1f05..eb5df4e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -670,7 +670,7 @@ static int ep_open(struct inode *inode, struct file *file)
 	idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
 	spin_unlock_irq(&epd->devp->lock);
 
-	epd->bufsize = count * 160;
+	epd->bufsize = count * 240;
 	epd->buf = vmalloc(epd->bufsize);
 	if (!epd->buf) {
 		ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 0744455..cb43c22 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -50,6 +50,13 @@ static int inline_threshold = C4IW_INLINE_THRESHOLD;
 module_param(inline_threshold, int, 0644);
 MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)");
 
+static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
+{
+	return (is_t4(dev->rdev.lldi.adapter_type) ||
+		is_t5(dev->rdev.lldi.adapter_type)) &&
+		length >= 8*1024*1024*1024ULL;
+}
+
 static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
 				       u32 len, dma_addr_t data, int wait)
 {
@@ -369,9 +376,11 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
 	int ret;
 
 	ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
-			      FW_RI_STAG_NSMR, mhp->attr.perms,
+			      FW_RI_STAG_NSMR, mhp->attr.len ?
+			      mhp->attr.perms : 0,
 			      mhp->attr.mw_bind_enable, mhp->attr.zbva,
-			      mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+			      mhp->attr.va_fbo, mhp->attr.len ?
+			      mhp->attr.len : -1, shift - 12,
 			      mhp->attr.pbl_size, mhp->attr.pbl_addr);
 	if (ret)
 		return ret;
@@ -536,6 +545,11 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
 			return ret;
 	}
 
+	if (mr_exceeds_hw_limits(rhp, total_size)) {
+		kfree(page_list);
+		return -EINVAL;
+	}
+
 	ret = reregister_mem(rhp, php, &mh, shift, npages);
 	kfree(page_list);
 	if (ret)
@@ -596,6 +610,12 @@ struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
 	if (ret)
 		goto err;
 
+	if (mr_exceeds_hw_limits(rhp, total_size)) {
+		kfree(page_list);
+		ret = -EINVAL;
+		goto err;
+	}
+
 	ret = alloc_pbl(mhp, npages);
 	if (ret) {
 		kfree(page_list);
@@ -699,6 +719,10 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	php = to_c4iw_pd(pd);
 	rhp = php->rhp;
+
+	if (mr_exceeds_hw_limits(rhp, length))
+		return ERR_PTR(-EINVAL);
+
 	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
 	if (!mhp)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 2ed3ece..bb85d47 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1538,9 +1538,9 @@ err:
 	set_state(qhp, C4IW_QP_STATE_ERROR);
 	free = 1;
 	abort = 1;
-	wake_up(&qhp->wait);
 	BUG_ON(!ep);
 	flush_qp(qhp);
+	wake_up(&qhp->wait);
 out:
 	mutex_unlock(&qhp->mutex);
 
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 3488e8c..f914b30 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -399,7 +399,7 @@ reg_user_mr_fallback:
 	pginfo.num_kpages = num_kpages;
 	pginfo.num_hwpages = num_hwpages;
 	pginfo.u.usr.region = e_mr->umem;
-	pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
+	pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
 	pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
 	ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
 			  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 5e61e9b..c7278f6 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -214,7 +214,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	mr->mr.user_base = start;
 	mr->mr.iova = virt_addr;
 	mr->mr.length = length;
-	mr->mr.offset = umem->offset;
+	mr->mr.offset = ib_umem_offset(umem);
 	mr->mr.access_flags = mr_access_flags;
 	mr->mr.max_segs = n;
 	mr->umem = umem;
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 8f9325c..c36ccbd 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -223,7 +223,6 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 
 	if (flags & IB_MR_REREG_TRANS) {
 		int shift;
-		int err;
 		int n;
 
 		mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 4ea0135..27a7015 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
 mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
+mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 1ba6c42..8a87404 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -244,6 +244,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 					   props->max_mcast_grp;
 	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
+		props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
+	props->odp_caps = dev->odp_caps;
+#endif
+
 out:
 	kfree(in_mad);
 	kfree(out_mad);
@@ -568,6 +574,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 			goto out_count;
 	}
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
+#endif
+
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
 
@@ -858,7 +868,7 @@ static ssize_t show_reg_pages(struct device *device,
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 
-	return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages);
+	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
 }
 
 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -1321,6 +1331,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
 		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
+	dev->ib_dev.uverbs_ex_cmd_mask =
+		(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
 
 	dev->ib_dev.query_device	= mlx5_ib_query_device;
 	dev->ib_dev.query_port		= mlx5_ib_query_port;
@@ -1366,6 +1378,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 
+	mlx5_ib_internal_query_odp_caps(dev);
+
 	if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) {
 		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
 		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@@ -1379,16 +1393,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		goto err_eqs;
 
 	mutex_init(&dev->cap_mask_mutex);
-	spin_lock_init(&dev->mr_lock);
 
 	err = create_dev_resources(&dev->devr);
 	if (err)
 		goto err_eqs;
 
-	err = ib_register_device(&dev->ib_dev, NULL);
+	err = mlx5_ib_odp_init_one(dev);
 	if (err)
 		goto err_rsrc;
 
+	err = ib_register_device(&dev->ib_dev, NULL);
+	if (err)
+		goto err_odp;
+
 	err = create_umr_res(dev);
 	if (err)
 		goto err_dev;
@@ -1410,6 +1427,9 @@ err_umrc:
 err_dev:
 	ib_unregister_device(&dev->ib_dev);
 
+err_odp:
+	mlx5_ib_odp_remove_one(dev);
+
 err_rsrc:
 	destroy_dev_resources(&dev->devr);
 
@@ -1425,8 +1445,10 @@ err_dealloc:
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 {
 	struct mlx5_ib_dev *dev = context;
+
 	ib_unregister_device(&dev->ib_dev);
 	destroy_umrc_res(dev);
+	mlx5_ib_odp_remove_one(dev);
 	destroy_dev_resources(&dev->devr);
 	free_comp_eqs(dev);
 	ib_dealloc_device(&dev->ib_dev);
@@ -1440,15 +1462,30 @@ static struct mlx5_interface mlx5_ib_interface = {
 
 static int __init mlx5_ib_init(void)
 {
+	int err;
+
 	if (deprecated_prof_sel != 2)
 		pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
 
-	return mlx5_register_interface(&mlx5_ib_interface);
+	err = mlx5_ib_odp_init();
+	if (err)
+		return err;
+
+	err = mlx5_register_interface(&mlx5_ib_interface);
+	if (err)
+		goto clean_odp;
+
+	return err;
+
+clean_odp:
+	mlx5_ib_odp_cleanup();
+	return err;
 }
 
 static void __exit mlx5_ib_cleanup(void)
 {
 	mlx5_unregister_interface(&mlx5_ib_interface);
+	mlx5_ib_odp_cleanup();
 }
 
 module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index dae07ea..b56e4c5 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -32,6 +32,7 @@
 
 #include <linux/module.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
 #include "mlx5_ib.h"
 
 /* @umem: umem object to scan
@@ -57,6 +58,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 	int entry;
 	unsigned long page_shift = ilog2(umem->page_size);
 
+	/* With ODP we must always match OS page size. */
+	if (umem->odp_data) {
+		*count = ib_umem_page_count(umem);
+		*shift = PAGE_SHIFT;
+		*ncont = *count;
+		if (order)
+			*order = ilog2(roundup_pow_of_two(*count));
+
+		return;
+	}
+
 	addr = addr >> page_shift;
 	tmp = (unsigned long)addr;
 	m = find_first_bit(&tmp, sizeof(tmp));
@@ -108,8 +120,36 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 	*count = i;
 }
 
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			  int page_shift, __be64 *pas, int umr)
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
+{
+	u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
+
+	if (umem_dma & ODP_READ_ALLOWED_BIT)
+		mtt_entry |= MLX5_IB_MTT_READ;
+	if (umem_dma & ODP_WRITE_ALLOWED_BIT)
+		mtt_entry |= MLX5_IB_MTT_WRITE;
+
+	return mtt_entry;
+}
+#endif
+
+/*
+ * Populate the given array with bus addresses from the umem.
+ *
+ * dev - mlx5_ib device
+ * umem - umem to use to fill the pages
+ * page_shift - determines the page size used in the resulting array
+ * offset - offset into the umem to start from,
+ *          only implemented for ODP umems
+ * num_pages - total number of pages to fill
+ * pas - bus addresses array to fill
+ * access_flags - access flags to set on all present pages.
+		  use enum mlx5_ib_mtt_access_flags for this.
+ */
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			    int page_shift, size_t offset, size_t num_pages,
+			    __be64 *pas, int access_flags)
 {
 	unsigned long umem_page_shift = ilog2(umem->page_size);
 	int shift = page_shift - umem_page_shift;
@@ -120,6 +160,21 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 	int len;
 	struct scatterlist *sg;
 	int entry;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	const bool odp = umem->odp_data != NULL;
+
+	if (odp) {
+		WARN_ON(shift != 0);
+		WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
+
+		for (i = 0; i < num_pages; ++i) {
+			dma_addr_t pa = umem->odp_data->dma_list[offset + i];
+
+			pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
+		}
+		return;
+	}
+#endif
 
 	i = 0;
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@@ -128,8 +183,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 		for (k = 0; k < len; k++) {
 			if (!(i & mask)) {
 				cur = base + (k << umem_page_shift);
-				if (umr)
-					cur |= 3;
+				cur |= access_flags;
 
 				pas[i >> shift] = cpu_to_be64(cur);
 				mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
@@ -142,6 +196,13 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 	}
 }
 
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			  int page_shift, __be64 *pas, int access_flags)
+{
+	return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
+				      ib_umem_num_pages(umem), pas,
+				      access_flags);
+}
 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
 {
 	u64 page_size;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 386780f..83f22fe 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -111,6 +111,8 @@ struct mlx5_ib_pd {
  */
 
 #define MLX5_IB_SEND_UMR_UNREG	IB_SEND_RESERVED_START
+#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
+#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
 #define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
 #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
 
@@ -147,6 +149,29 @@ enum {
 	MLX5_QP_EMPTY
 };
 
+/*
+ * Connect-IB can trigger up to four concurrent pagefaults
+ * per-QP.
+ */
+enum mlx5_ib_pagefault_context {
+	MLX5_IB_PAGEFAULT_RESPONDER_READ,
+	MLX5_IB_PAGEFAULT_REQUESTOR_READ,
+	MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
+	MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
+	MLX5_IB_PAGEFAULT_CONTEXTS
+};
+
+static inline enum mlx5_ib_pagefault_context
+	mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
+{
+	return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
+}
+
+struct mlx5_ib_pfault {
+	struct work_struct	work;
+	struct mlx5_pagefault	mpfault;
+};
+
 struct mlx5_ib_qp {
 	struct ib_qp		ibqp;
 	struct mlx5_core_qp	mqp;
@@ -192,6 +217,21 @@ struct mlx5_ib_qp {
 
 	/* Store signature errors */
 	bool			signature_en;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/*
+	 * A flag that is true for QP's that are in a state that doesn't
+	 * allow page faults, and shouldn't schedule any more faults.
+	 */
+	int                     disable_page_faults;
+	/*
+	 * The disable_page_faults_lock protects a QP's disable_page_faults
+	 * field, allowing for a thread to atomically check whether the QP
+	 * allows page faults, and if so schedule a page fault.
+	 */
+	spinlock_t              disable_page_faults_lock;
+	struct mlx5_ib_pfault	pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
+#endif
 };
 
 struct mlx5_ib_cq_buf {
@@ -206,6 +246,19 @@ enum mlx5_ib_qp_flags {
 	MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
 };
 
+struct mlx5_umr_wr {
+	union {
+		u64			virt_addr;
+		u64			offset;
+	} target;
+	struct ib_pd		       *pd;
+	unsigned int			page_shift;
+	unsigned int			npages;
+	u32				length;
+	int				access_flags;
+	u32				mkey;
+};
+
 struct mlx5_shared_mr_info {
 	int mr_id;
 	struct ib_umem		*umem;
@@ -253,6 +306,13 @@ struct mlx5_ib_xrcd {
 	u32			xrcdn;
 };
 
+enum mlx5_ib_mtt_access_flags {
+	MLX5_IB_MTT_READ  = (1 << 0),
+	MLX5_IB_MTT_WRITE = (1 << 1),
+};
+
+#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+
 struct mlx5_ib_mr {
 	struct ib_mr		ibmr;
 	struct mlx5_core_mr	mmr;
@@ -261,12 +321,11 @@ struct mlx5_ib_mr {
 	struct list_head	list;
 	int			order;
 	int			umred;
-	__be64			*pas;
-	dma_addr_t		dma;
 	int			npages;
 	struct mlx5_ib_dev     *dev;
 	struct mlx5_create_mkey_mbox_out out;
 	struct mlx5_core_sig_ctx    *sig;
+	int			live;
 };
 
 struct mlx5_ib_fast_reg_page_list {
@@ -372,11 +431,18 @@ struct mlx5_ib_dev {
 	struct umr_common		umrc;
 	/* sync used page count stats
 	 */
-	spinlock_t			mr_lock;
 	struct mlx5_ib_resources	devr;
 	struct mlx5_mr_cache		cache;
 	struct timer_list		delay_timer;
 	int				fill_delay;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	struct ib_odp_caps	odp_caps;
+	/*
+	 * Sleepable RCU that prevents destruction of MRs while they are still
+	 * being used by a page fault handler.
+	 */
+	struct srcu_struct      mr_srcu;
+#endif
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -490,6 +556,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		      struct ib_recv_wr **bad_wr);
 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+			  void *buffer, u32 length);
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 				int vector, struct ib_ucontext *context,
 				struct ib_udata *udata);
@@ -502,6 +570,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
+		       int npages, int zap);
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
 int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
@@ -533,8 +603,11 @@ int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
 void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 			int *ncont, int *order);
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			    int page_shift, size_t offset, size_t num_pages,
+			    __be64 *pas, int access_flags);
 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			  int page_shift, __be64 *pas, int umr);
+			  int page_shift, __be64 *pas, int access_flags);
 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@@ -544,6 +617,38 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status);
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+extern struct workqueue_struct *mlx5_ib_page_fault_wq;
+
+int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev);
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+			       struct mlx5_ib_pfault *pfault);
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
+int __init mlx5_ib_odp_init(void);
+void mlx5_ib_odp_cleanup(void);
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+			      unsigned long end);
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
+{
+	return 0;
+}
+
+static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)		{}
+static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)	{}
+static inline int mlx5_ib_odp_init(void) { return 0; }
+static inline void mlx5_ib_odp_cleanup(void)				{}
+static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
+static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
 static inline void init_query_mad(struct ib_smp *mad)
 {
 	mad->base_version  = 1;
@@ -561,4 +666,7 @@ static inline u8 convert_access(int acc)
 	       MLX5_PERM_LOCAL_READ;
 }
 
+#define MLX5_MAX_UMR_SHIFT 16
+#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+
 #endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 5a80dd9..32a28bd 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -37,21 +37,34 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/ib_verbs.h>
 #include "mlx5_ib.h"
 
 enum {
 	MAX_PENDING_REG_MR = 8,
 };
 
-enum {
-	MLX5_UMR_ALIGN	= 2048
-};
+#define MLX5_UMR_ALIGN 2048
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static __be64 mlx5_ib_update_mtt_emergency_buffer[
+		MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
+	__aligned(MLX5_UMR_ALIGN);
+static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
+#endif
+
+static int clean_mr(struct mlx5_ib_mr *mr);
 
-static __be64 *mr_align(__be64 *ptr, int align)
+static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-	unsigned long mask = align - 1;
+	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 
-	return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/* Wait until all page fault handlers using the mr complete. */
+	synchronize_srcu(&dev->mr_srcu);
+#endif
+
+	return err;
 }
 
 static int order2idx(struct mlx5_ib_dev *dev, int order)
@@ -146,7 +159,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		mr->order = ent->order;
 		mr->umred = 1;
 		mr->dev = dev;
-		in->seg.status = 1 << 6;
+		in->seg.status = MLX5_MKEY_STATUS_FREE;
 		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
@@ -191,7 +204,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 		ent->cur--;
 		ent->size--;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+		err = destroy_mkey(dev, mr);
 		if (err)
 			mlx5_ib_warn(dev, "failed destroy mkey\n");
 		else
@@ -482,7 +495,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 		ent->cur--;
 		ent->size--;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+		err = destroy_mkey(dev, mr);
 		if (err)
 			mlx5_ib_warn(dev, "failed destroy mkey\n");
 		else
@@ -668,7 +681,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
 
 static int use_umr(int order)
 {
-	return order <= 17;
+	return order <= MLX5_MAX_UMR_SHIFT;
 }
 
 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
@@ -678,6 +691,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct ib_mr *mr = dev->umrc.mr;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
 
 	sg->addr = dma;
 	sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -692,21 +706,24 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 		wr->num_sge = 0;
 
 	wr->opcode = MLX5_IB_WR_UMR;
-	wr->wr.fast_reg.page_list_len = n;
-	wr->wr.fast_reg.page_shift = page_shift;
-	wr->wr.fast_reg.rkey = key;
-	wr->wr.fast_reg.iova_start = virt_addr;
-	wr->wr.fast_reg.length = len;
-	wr->wr.fast_reg.access_flags = access_flags;
-	wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
+
+	umrwr->npages = n;
+	umrwr->page_shift = page_shift;
+	umrwr->mkey = key;
+	umrwr->target.virt_addr = virt_addr;
+	umrwr->length = len;
+	umrwr->access_flags = access_flags;
+	umrwr->pd = pd;
 }
 
 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 			       struct ib_send_wr *wr, u32 key)
 {
-	wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
+	wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 	wr->opcode = MLX5_IB_WR_UMR;
-	wr->wr.fast_reg.rkey = key;
+	umrwr->mkey = key;
 }
 
 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
@@ -742,7 +759,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	struct ib_send_wr wr, *bad;
 	struct mlx5_ib_mr *mr;
 	struct ib_sge sg;
-	int size = sizeof(u64) * npages;
+	int size;
+	__be64 *mr_pas;
+	__be64 *pas;
+	dma_addr_t dma;
 	int err = 0;
 	int i;
 
@@ -761,25 +781,31 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	if (!mr)
 		return ERR_PTR(-EAGAIN);
 
-	mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
-	if (!mr->pas) {
+	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+	 * To avoid copying garbage after the pas array, we allocate
+	 * a little more. */
+	size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
+	mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+	if (!mr_pas) {
 		err = -ENOMEM;
 		goto free_mr;
 	}
 
-	mlx5_ib_populate_pas(dev, umem, page_shift,
-			     mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
+	pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
+	mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
+	/* Clear padding after the actual pages. */
+	memset(pas + npages, 0, size - npages * sizeof(u64));
 
-	mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
-				 DMA_TO_DEVICE);
-	if (dma_mapping_error(ddev, mr->dma)) {
+	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ddev, dma)) {
 		err = -ENOMEM;
 		goto free_pas;
 	}
 
 	memset(&wr, 0, sizeof(wr));
 	wr.wr_id = (u64)(unsigned long)&umr_context;
-	prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
+	prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
+			 virt_addr, len, access_flags);
 
 	mlx5_ib_init_umr_context(&umr_context);
 	down(&umrc->sem);
@@ -799,12 +825,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	mr->mmr.size = len;
 	mr->mmr.pd = to_mpd(pd)->pdn;
 
+	mr->live = 1;
+
 unmap_dma:
 	up(&umrc->sem);
-	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 
 free_pas:
-	kfree(mr->pas);
+	kfree(mr_pas);
 
 free_mr:
 	if (err) {
@@ -815,6 +843,128 @@ free_mr:
 	return mr;
 }
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
+		       int zap)
+{
+	struct mlx5_ib_dev *dev = mr->dev;
+	struct device *ddev = dev->ib_dev.dma_device;
+	struct umr_common *umrc = &dev->umrc;
+	struct mlx5_ib_umr_context umr_context;
+	struct ib_umem *umem = mr->umem;
+	int size;
+	__be64 *pas;
+	dma_addr_t dma;
+	struct ib_send_wr wr, *bad;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
+	struct ib_sge sg;
+	int err = 0;
+	const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
+	const int page_index_mask = page_index_alignment - 1;
+	size_t pages_mapped = 0;
+	size_t pages_to_map = 0;
+	size_t pages_iter = 0;
+	int use_emergency_buf = 0;
+
+	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
+	 * so we need to align the offset and length accordingly */
+	if (start_page_index & page_index_mask) {
+		npages += start_page_index & page_index_mask;
+		start_page_index &= ~page_index_mask;
+	}
+
+	pages_to_map = ALIGN(npages, page_index_alignment);
+
+	if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
+		return -EINVAL;
+
+	size = sizeof(u64) * pages_to_map;
+	size = min_t(int, PAGE_SIZE, size);
+	/* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
+	 * code, when we are called from an invalidation. The pas buffer must
+	 * be 2k-aligned for Connect-IB. */
+	pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
+	if (!pas) {
+		mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
+		pas = mlx5_ib_update_mtt_emergency_buffer;
+		size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
+		use_emergency_buf = 1;
+		mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+		memset(pas, 0, size);
+	}
+	pages_iter = size / sizeof(u64);
+	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ddev, dma)) {
+		mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
+		err = -ENOMEM;
+		goto free_pas;
+	}
+
+	for (pages_mapped = 0;
+	     pages_mapped < pages_to_map && !err;
+	     pages_mapped += pages_iter, start_page_index += pages_iter) {
+		dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
+
+		npages = min_t(size_t,
+			       pages_iter,
+			       ib_umem_num_pages(umem) - start_page_index);
+
+		if (!zap) {
+			__mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
+					       start_page_index, npages, pas,
+					       MLX5_IB_MTT_PRESENT);
+			/* Clear padding after the pages brought from the
+			 * umem. */
+			memset(pas + npages, 0, size - npages * sizeof(u64));
+		}
+
+		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+
+		memset(&wr, 0, sizeof(wr));
+		wr.wr_id = (u64)(unsigned long)&umr_context;
+
+		sg.addr = dma;
+		sg.length = ALIGN(npages * sizeof(u64),
+				MLX5_UMR_MTT_ALIGNMENT);
+		sg.lkey = dev->umrc.mr->lkey;
+
+		wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+				MLX5_IB_SEND_UMR_UPDATE_MTT;
+		wr.sg_list = &sg;
+		wr.num_sge = 1;
+		wr.opcode = MLX5_IB_WR_UMR;
+		umrwr->npages = sg.length / sizeof(u64);
+		umrwr->page_shift = PAGE_SHIFT;
+		umrwr->mkey = mr->mmr.key;
+		umrwr->target.offset = start_page_index;
+
+		mlx5_ib_init_umr_context(&umr_context);
+		down(&umrc->sem);
+		err = ib_post_send(umrc->qp, &wr, &bad);
+		if (err) {
+			mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
+		} else {
+			wait_for_completion(&umr_context.done);
+			if (umr_context.status != IB_WC_SUCCESS) {
+				mlx5_ib_err(dev, "UMR completion failed, code %d\n",
+					    umr_context.status);
+				err = -EFAULT;
+			}
+		}
+		up(&umrc->sem);
+	}
+	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+
+free_pas:
+	if (!use_emergency_buf)
+		free_page((unsigned long)pas);
+	else
+		mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+
+	return err;
+}
+#endif
+
 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 				     u64 length, struct ib_umem *umem,
 				     int npages, int page_shift,
@@ -825,6 +975,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 	struct mlx5_ib_mr *mr;
 	int inlen;
 	int err;
+	bool pg_cap = !!(dev->mdev->caps.gen.flags &
+			 MLX5_DEV_CAP_FLAG_ON_DMND_PG);
 
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
@@ -836,8 +988,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 		err = -ENOMEM;
 		goto err_1;
 	}
-	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
+	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
+			     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
 
+	/* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
+	 * in the page list submitted with the command. */
+	in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
 	in->seg.flags = convert_access(access_flags) |
 		MLX5_ACCESS_MODE_MTT;
 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -856,6 +1012,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 		goto err_2;
 	}
 	mr->umem = umem;
+	mr->live = 1;
 	kvfree(in);
 
 	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
@@ -910,6 +1067,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 			mlx5_ib_dbg(dev, "cache empty for order %d", order);
 			mr = NULL;
 		}
+	} else if (access_flags & IB_ACCESS_ON_DEMAND) {
+		err = -EINVAL;
+		pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
+		goto error;
 	}
 
 	if (!mr)
@@ -925,16 +1086,51 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	mr->umem = umem;
 	mr->npages = npages;
-	spin_lock(&dev->mr_lock);
-	dev->mdev->priv.reg_pages += npages;
-	spin_unlock(&dev->mr_lock);
+	atomic_add(npages, &dev->mdev->priv.reg_pages);
 	mr->ibmr.lkey = mr->mmr.key;
 	mr->ibmr.rkey = mr->mmr.key;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (umem->odp_data) {
+		/*
+		 * This barrier prevents the compiler from moving the
+		 * setting of umem->odp_data->private to point to our
+		 * MR, before reg_umr finished, to ensure that the MR
+		 * initialization have finished before starting to
+		 * handle invalidations.
+		 */
+		smp_wmb();
+		mr->umem->odp_data->private = mr;
+		/*
+		 * Make sure we will see the new
+		 * umem->odp_data->private value in the invalidation
+		 * routines, before we can get page faults on the
+		 * MR. Page faults can happen once we put the MR in
+		 * the tree, below this line. Without the barrier,
+		 * there can be a fault handling and an invalidation
+		 * before umem->odp_data->private == mr is visible to
+		 * the invalidation handler.
+		 */
+		smp_wmb();
+	}
+#endif
+
 	return &mr->ibmr;
 
 error:
+	/*
+	 * Destroy the umem *before* destroying the MR, to ensure we
+	 * will not have any in-flight notifiers when destroying the
+	 * MR.
+	 *
+	 * As the MR is completely invalid to begin with, and this
+	 * error path is only taken if we can't push the mr entry into
+	 * the pagefault tree, this is safe.
+	 */
+
 	ib_umem_release(umem);
+	/* Kill the MR, and return an error code. */
+	clean_mr(mr);
 	return ERR_PTR(err);
 }
 
@@ -971,17 +1167,14 @@ error:
 	return err;
 }
 
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int clean_mr(struct mlx5_ib_mr *mr)
 {
-	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
-	struct mlx5_ib_mr *mr = to_mmr(ibmr);
-	struct ib_umem *umem = mr->umem;
-	int npages = mr->npages;
+	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
 	int umred = mr->umred;
 	int err;
 
 	if (!umred) {
-		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+		err = destroy_mkey(dev, mr);
 		if (err) {
 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 				     mr->mmr.key, err);
@@ -996,15 +1189,47 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 		free_cached_mr(dev, mr);
 	}
 
-	if (umem) {
+	if (!umred)
+		kfree(mr);
+
+	return 0;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+	struct mlx5_ib_mr *mr = to_mmr(ibmr);
+	int npages = mr->npages;
+	struct ib_umem *umem = mr->umem;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (umem && umem->odp_data) {
+		/* Prevent new page faults from succeeding */
+		mr->live = 0;
+		/* Wait for all running page-fault handlers to finish. */
+		synchronize_srcu(&dev->mr_srcu);
+		/* Destroy all page mappings */
+		mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+					 ib_umem_end(umem));
+		/*
+		 * We kill the umem before the MR for ODP,
+		 * so that there will not be any invalidations in
+		 * flight, looking at the *mr struct.
+		 */
 		ib_umem_release(umem);
-		spin_lock(&dev->mr_lock);
-		dev->mdev->priv.reg_pages -= npages;
-		spin_unlock(&dev->mr_lock);
+		atomic_sub(npages, &dev->mdev->priv.reg_pages);
+
+		/* Avoid double-freeing the umem. */
+		umem = NULL;
 	}
+#endif
 
-	if (!umred)
-		kfree(mr);
+	clean_mr(mr);
+
+	if (umem) {
+		ib_umem_release(umem);
+		atomic_sub(npages, &dev->mdev->priv.reg_pages);
+	}
 
 	return 0;
 }
@@ -1028,7 +1253,7 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
 		goto err_free;
 	}
 
-	in->seg.status = 1 << 6; /* free */
+	in->seg.status = MLX5_MKEY_STATUS_FREE;
 	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -1113,7 +1338,7 @@ int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
 		kfree(mr->sig);
 	}
 
-	err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+	err = destroy_mkey(dev, mr);
 	if (err) {
 		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 			     mr->mmr.key, err);
@@ -1143,7 +1368,7 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 		goto err_free;
 	}
 
-	in->seg.status = 1 << 6; /* free */
+	in->seg.status = MLX5_MKEY_STATUS_FREE;
 	in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
new file mode 100644
index 0000000..a2c541c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -0,0 +1,798 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+
+#include "mlx5_ib.h"
+
+#define MAX_PREFETCH_LEN (4*1024*1024U)
+
+/* Timeout in ms to wait for an active mmu notifier to complete when handling
+ * a pagefault. */
+#define MMU_NOTIFIER_TIMEOUT 1000
+
+struct workqueue_struct *mlx5_ib_page_fault_wq;
+
+void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+			      unsigned long end)
+{
+	struct mlx5_ib_mr *mr;
+	const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(u64)) - 1;
+	u64 idx = 0, blk_start_idx = 0;
+	int in_block = 0;
+	u64 addr;
+
+	if (!umem || !umem->odp_data) {
+		pr_err("invalidation called on NULL umem or non-ODP umem\n");
+		return;
+	}
+
+	mr = umem->odp_data->private;
+
+	if (!mr || !mr->ibmr.pd)
+		return;
+
+	start = max_t(u64, ib_umem_start(umem), start);
+	end = min_t(u64, ib_umem_end(umem), end);
+
+	/*
+	 * Iteration one - zap the HW's MTTs. The notifiers_count ensures that
+	 * while we are doing the invalidation, no page fault will attempt to
+	 * overwrite the same MTTs.  Concurent invalidations might race us,
+	 * but they will write 0s as well, so no difference in the end result.
+	 */
+
+	for (addr = start; addr < end; addr += (u64)umem->page_size) {
+		idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+		/*
+		 * Strive to write the MTTs in chunks, but avoid overwriting
+		 * non-existing MTTs. The huristic here can be improved to
+		 * estimate the cost of another UMR vs. the cost of bigger
+		 * UMR.
+		 */
+		if (umem->odp_data->dma_list[idx] &
+		    (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
+			if (!in_block) {
+				blk_start_idx = idx;
+				in_block = 1;
+			}
+		} else {
+			u64 umr_offset = idx & umr_block_mask;
+
+			if (in_block && umr_offset == 0) {
+				mlx5_ib_update_mtt(mr, blk_start_idx,
+						   idx - blk_start_idx, 1);
+				in_block = 0;
+			}
+		}
+	}
+	if (in_block)
+		mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1,
+				   1);
+
+	/*
+	 * We are now sure that the device will not access the
+	 * memory. We can safely unmap it, and mark it as dirty if
+	 * needed.
+	 */
+
+	ib_umem_odp_unmap_dma_pages(umem, start, end);
+}
+
+#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do {	\
+	if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name)	\
+		ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name;	\
+} while (0)
+
+int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
+{
+	int err;
+	struct mlx5_odp_caps hw_caps;
+	struct ib_odp_caps *caps = &dev->odp_caps;
+
+	memset(caps, 0, sizeof(*caps));
+
+	if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
+		return 0;
+
+	err = mlx5_query_odp_caps(dev->mdev, &hw_caps);
+	if (err)
+		goto out;
+
+	caps->general_caps = IB_ODP_SUPPORT;
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.ud_odp_caps,
+			       SEND);
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+			       SEND);
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+			       RECV);
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+			       WRITE);
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+			       READ);
+
+out:
+	return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
+						   u32 key)
+{
+	u32 base_key = mlx5_base_mkey(key);
+	struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key);
+	struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr);
+
+	if (!mmr || mmr->key != key || !mr->live)
+		return NULL;
+
+	return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
+static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
+				      struct mlx5_ib_pfault *pfault,
+				      int error) {
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+					      pfault->mpfault.flags,
+					      error);
+	if (ret)
+		pr_err("Failed to resolve the page fault on QP 0x%x\n",
+		       qp->mqp.qpn);
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE.
+ *
+ * Returns number of pages retrieved on success. The caller will continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ *  page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ *  abort the page fault handling and possibly move the QP to an error state.
+ * On other errors the QP should also be closed with an error.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
+					 struct mlx5_ib_pfault *pfault,
+					 u32 key, u64 io_virt, size_t bcnt,
+					 u32 *bytes_mapped)
+{
+	struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
+	int srcu_key;
+	unsigned int current_seq;
+	u64 start_idx;
+	int npages = 0, ret = 0;
+	struct mlx5_ib_mr *mr;
+	u64 access_mask = ODP_READ_ALLOWED_BIT;
+
+	srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
+	mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+	/*
+	 * If we didn't find the MR, it means the MR was closed while we were
+	 * handling the ODP event. In this case we return -EFAULT so that the
+	 * QP will be closed.
+	 */
+	if (!mr || !mr->ibmr.pd) {
+		pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+		       key);
+		ret = -EFAULT;
+		goto srcu_unlock;
+	}
+	if (!mr->umem->odp_data) {
+		pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+			 key);
+		if (bytes_mapped)
+			*bytes_mapped +=
+				(bcnt - pfault->mpfault.bytes_committed);
+		goto srcu_unlock;
+	}
+	if (mr->ibmr.pd != qp->ibqp.pd) {
+		pr_err("Page-fault with different PDs for QP and MR.\n");
+		ret = -EFAULT;
+		goto srcu_unlock;
+	}
+
+	current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
+	/*
+	 * Ensure the sequence number is valid for some time before we call
+	 * gup.
+	 */
+	smp_rmb();
+
+	/*
+	 * Avoid branches - this code will perform correctly
+	 * in all iterations (in iteration 2 and above,
+	 * bytes_committed == 0).
+	 */
+	io_virt += pfault->mpfault.bytes_committed;
+	bcnt -= pfault->mpfault.bytes_committed;
+
+	start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT;
+
+	if (mr->umem->writable)
+		access_mask |= ODP_WRITE_ALLOWED_BIT;
+	npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
+					   access_mask, current_seq);
+	if (npages < 0) {
+		ret = npages;
+		goto srcu_unlock;
+	}
+
+	if (npages > 0) {
+		mutex_lock(&mr->umem->odp_data->umem_mutex);
+		if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+			/*
+			 * No need to check whether the MTTs really belong to
+			 * this MR, since ib_umem_odp_map_dma_pages already
+			 * checks this.
+			 */
+			ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
+		} else {
+			ret = -EAGAIN;
+		}
+		mutex_unlock(&mr->umem->odp_data->umem_mutex);
+		if (ret < 0) {
+			if (ret != -EAGAIN)
+				pr_err("Failed to update mkey page tables\n");
+			goto srcu_unlock;
+		}
+
+		if (bytes_mapped) {
+			u32 new_mappings = npages * PAGE_SIZE -
+				(io_virt - round_down(io_virt, PAGE_SIZE));
+			*bytes_mapped += min_t(u32, new_mappings, bcnt);
+		}
+	}
+
+srcu_unlock:
+	if (ret == -EAGAIN) {
+		if (!mr->umem->odp_data->dying) {
+			struct ib_umem_odp *odp_data = mr->umem->odp_data;
+			unsigned long timeout =
+				msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
+
+			if (!wait_for_completion_timeout(
+					&odp_data->notifier_completion,
+					timeout)) {
+				pr_warn("timeout waiting for mmu notifier completion\n");
+			}
+		} else {
+			/* The MR is being killed, kill the QP as well. */
+			ret = -EFAULT;
+		}
+	}
+	srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
+	pfault->mpfault.bytes_committed = 0;
+	return ret ? ret : npages;
+}
+
+/**
+ * Parse a series of data segments for page fault handling.
+ *
+ * @qp the QP on which the fault occurred.
+ * @pfault contains page fault information.
+ * @wqe points at the first data segment in the WQE.
+ * @wqe_end points after the end of the WQE.
+ * @bytes_mapped receives the number of bytes that the function was able to
+ *               map. This allows the caller to decide intelligently whether
+ *               enough memory was mapped to resolve the page fault
+ *               successfully (e.g. enough for the next MTU, or the entire
+ *               WQE).
+ * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
+ *                  the committed bytes).
+ *
+ * Returns the number of pages loaded if positive, zero for an empty WQE, or a
+ * negative error code.
+ */
+static int pagefault_data_segments(struct mlx5_ib_qp *qp,
+				   struct mlx5_ib_pfault *pfault, void *wqe,
+				   void *wqe_end, u32 *bytes_mapped,
+				   u32 *total_wqe_bytes, int receive_queue)
+{
+	int ret = 0, npages = 0;
+	u64 io_virt;
+	u32 key;
+	u32 byte_count;
+	size_t bcnt;
+	int inline_segment;
+
+	/* Skip SRQ next-WQE segment. */
+	if (receive_queue && qp->ibqp.srq)
+		wqe += sizeof(struct mlx5_wqe_srq_next_seg);
+
+	if (bytes_mapped)
+		*bytes_mapped = 0;
+	if (total_wqe_bytes)
+		*total_wqe_bytes = 0;
+
+	while (wqe < wqe_end) {
+		struct mlx5_wqe_data_seg *dseg = wqe;
+
+		io_virt = be64_to_cpu(dseg->addr);
+		key = be32_to_cpu(dseg->lkey);
+		byte_count = be32_to_cpu(dseg->byte_count);
+		inline_segment = !!(byte_count &  MLX5_INLINE_SEG);
+		bcnt	       = byte_count & ~MLX5_INLINE_SEG;
+
+		if (inline_segment) {
+			bcnt = bcnt & MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK;
+			wqe += ALIGN(sizeof(struct mlx5_wqe_inline_seg) + bcnt,
+				     16);
+		} else {
+			wqe += sizeof(*dseg);
+		}
+
+		/* receive WQE end of sg list. */
+		if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY &&
+		    io_virt == 0)
+			break;
+
+		if (!inline_segment && total_wqe_bytes) {
+			*total_wqe_bytes += bcnt - min_t(size_t, bcnt,
+					pfault->mpfault.bytes_committed);
+		}
+
+		/* A zero length data segment designates a length of 2GB. */
+		if (bcnt == 0)
+			bcnt = 1U << 31;
+
+		if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
+			pfault->mpfault.bytes_committed -=
+				min_t(size_t, bcnt,
+				      pfault->mpfault.bytes_committed);
+			continue;
+		}
+
+		ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
+						    bcnt, bytes_mapped);
+		if (ret < 0)
+			break;
+		npages += ret;
+	}
+
+	return ret < 0 ? ret : npages;
+}
+
+/*
+ * Parse initiator WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_initiator_pfault_handler(
+	struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+	void **wqe, void **wqe_end, int wqe_length)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
+	u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+	unsigned ds, opcode;
+#if defined(DEBUG)
+	u32 ctrl_wqe_index, ctrl_qpn;
+#endif
+
+	ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+	if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
+		mlx5_ib_err(dev, "Unable to read the complete WQE. ds = 0x%x, ret = 0x%x\n",
+			    ds, wqe_length);
+		return -EFAULT;
+	}
+
+	if (ds == 0) {
+		mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
+			    wqe_index, qp->mqp.qpn);
+		return -EFAULT;
+	}
+
+#if defined(DEBUG)
+	ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
+			MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
+			MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
+	if (wqe_index != ctrl_wqe_index) {
+		mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
+			    wqe_index, qp->mqp.qpn,
+			    ctrl_wqe_index);
+		return -EFAULT;
+	}
+
+	ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
+		MLX5_WQE_CTRL_QPN_SHIFT;
+	if (qp->mqp.qpn != ctrl_qpn) {
+		mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
+			    wqe_index, qp->mqp.qpn,
+			    ctrl_qpn);
+		return -EFAULT;
+	}
+#endif /* DEBUG */
+
+	*wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
+	*wqe += sizeof(*ctrl);
+
+	opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
+		 MLX5_WQE_CTRL_OPCODE_MASK;
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		switch (opcode) {
+		case MLX5_OPCODE_SEND:
+		case MLX5_OPCODE_SEND_IMM:
+		case MLX5_OPCODE_SEND_INVAL:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_SEND))
+				goto invalid_transport_or_opcode;
+			break;
+		case MLX5_OPCODE_RDMA_WRITE:
+		case MLX5_OPCODE_RDMA_WRITE_IMM:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_WRITE))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_raddr_seg);
+			break;
+		case MLX5_OPCODE_RDMA_READ:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_READ))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_raddr_seg);
+			break;
+		default:
+			goto invalid_transport_or_opcode;
+		}
+		break;
+	case IB_QPT_UD:
+		switch (opcode) {
+		case MLX5_OPCODE_SEND:
+		case MLX5_OPCODE_SEND_IMM:
+			if (!(dev->odp_caps.per_transport_caps.ud_odp_caps &
+			      IB_ODP_SUPPORT_SEND))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_datagram_seg);
+			break;
+		default:
+			goto invalid_transport_or_opcode;
+		}
+		break;
+	default:
+invalid_transport_or_opcode:
+		mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n",
+			    qp->ibqp.qp_type, opcode);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Parse responder WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_responder_pfault_handler(
+	struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+	void **wqe, void **wqe_end, int wqe_length)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	struct mlx5_ib_wq *wq = &qp->rq;
+	int wqe_size = 1 << wq->wqe_shift;
+
+	if (qp->ibqp.srq) {
+		mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n");
+		return -EFAULT;
+	}
+
+	if (qp->wq_sig) {
+		mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
+		return -EFAULT;
+	}
+
+	if (wqe_size > wqe_length) {
+		mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n");
+		return -EFAULT;
+	}
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+		      IB_ODP_SUPPORT_RECV))
+			goto invalid_transport_or_opcode;
+		break;
+	default:
+invalid_transport_or_opcode:
+		mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
+			    qp->ibqp.qp_type);
+		return -EFAULT;
+	}
+
+	*wqe_end = *wqe + wqe_size;
+
+	return 0;
+}
+
+static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
+					  struct mlx5_ib_pfault *pfault)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	int ret;
+	void *wqe, *wqe_end;
+	u32 bytes_mapped, total_wqe_bytes;
+	char *buffer = NULL;
+	int resume_with_error = 0;
+	u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+	int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+
+	buffer = (char *)__get_free_page(GFP_KERNEL);
+	if (!buffer) {
+		mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
+				    PAGE_SIZE);
+	if (ret < 0) {
+		mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
+			    -ret, wqe_index, qp->mqp.qpn);
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	wqe = buffer;
+	if (requestor)
+		ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
+							  &wqe_end, ret);
+	else
+		ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
+							  &wqe_end, ret);
+	if (ret < 0) {
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	if (wqe >= wqe_end) {
+		mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
+				      &total_wqe_bytes, !requestor);
+	if (ret == -EAGAIN) {
+		goto resolve_page_fault;
+	} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
+		mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
+			    -ret);
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+resolve_page_fault:
+	mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
+	mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
+		    qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+
+	free_page((unsigned long)buffer);
+}
+
+static int pages_in_range(u64 address, u32 length)
+{
+	return (ALIGN(address + length, PAGE_SIZE) -
+		(address & PAGE_MASK)) >> PAGE_SHIFT;
+}
+
+static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
+					   struct mlx5_ib_pfault *pfault)
+{
+	struct mlx5_pagefault *mpfault = &pfault->mpfault;
+	u64 address;
+	u32 length;
+	u32 prefetch_len = mpfault->bytes_committed;
+	int prefetch_activated = 0;
+	u32 rkey = mpfault->rdma.r_key;
+	int ret;
+
+	/* The RDMA responder handler handles the page fault in two parts.
+	 * First it brings the necessary pages for the current packet
+	 * (and uses the pfault context), and then (after resuming the QP)
+	 * prefetches more pages. The second operation cannot use the pfault
+	 * context and therefore uses the dummy_pfault context allocated on
+	 * the stack */
+	struct mlx5_ib_pfault dummy_pfault = {};
+
+	dummy_pfault.mpfault.bytes_committed = 0;
+
+	mpfault->rdma.rdma_va += mpfault->bytes_committed;
+	mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
+					 mpfault->rdma.rdma_op_len);
+	mpfault->bytes_committed = 0;
+
+	address = mpfault->rdma.rdma_va;
+	length  = mpfault->rdma.rdma_op_len;
+
+	/* For some operations, the hardware cannot tell the exact message
+	 * length, and in those cases it reports zero. Use prefetch
+	 * logic. */
+	if (length == 0) {
+		prefetch_activated = 1;
+		length = mpfault->rdma.packet_size;
+		prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
+	}
+
+	ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
+					    NULL);
+	if (ret == -EAGAIN) {
+		/* We're racing with an invalidation, don't prefetch */
+		prefetch_activated = 0;
+	} else if (ret < 0 || pages_in_range(address, length) > ret) {
+		mlx5_ib_page_fault_resume(qp, pfault, 1);
+		return;
+	}
+
+	mlx5_ib_page_fault_resume(qp, pfault, 0);
+
+	/* At this point, there might be a new pagefault already arriving in
+	 * the eq, switch to the dummy pagefault for the rest of the
+	 * processing. We're still OK with the objects being alive as the
+	 * work-queue is being fenced. */
+
+	if (prefetch_activated) {
+		ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
+						    address,
+						    prefetch_len,
+						    NULL);
+		if (ret < 0) {
+			pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
+				ret, prefetch_activated,
+				qp->ibqp.qp_num, address, prefetch_len);
+		}
+	}
+}
+
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+			       struct mlx5_ib_pfault *pfault)
+{
+	u8 event_subtype = pfault->mpfault.event_subtype;
+
+	switch (event_subtype) {
+	case MLX5_PFAULT_SUBTYPE_WQE:
+		mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
+		break;
+	case MLX5_PFAULT_SUBTYPE_RDMA:
+		mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
+		break;
+	default:
+		pr_warn("Invalid page fault event subtype: 0x%x\n",
+			event_subtype);
+		mlx5_ib_page_fault_resume(qp, pfault, 1);
+		break;
+	}
+}
+
+static void mlx5_ib_qp_pfault_action(struct work_struct *work)
+{
+	struct mlx5_ib_pfault *pfault = container_of(work,
+						     struct mlx5_ib_pfault,
+						     work);
+	enum mlx5_ib_pagefault_context context =
+		mlx5_ib_get_pagefault_context(&pfault->mpfault);
+	struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
+					     pagefaults[context]);
+	mlx5_ib_mr_pfault_handler(qp, pfault);
+}
+
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+	qp->disable_page_faults = 1;
+	spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+
+	/*
+	 * Note that at this point, we are guarenteed that no more
+	 * work queue elements will be posted to the work queue with
+	 * the QP we are closing.
+	 */
+	flush_workqueue(mlx5_ib_page_fault_wq);
+}
+
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+	qp->disable_page_faults = 0;
+	spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+}
+
+static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
+				   struct mlx5_pagefault *pfault)
+{
+	/*
+	 * Note that we will only get one fault event per QP per context
+	 * (responder/initiator, read/write), until we resolve the page fault
+	 * with the mlx5_ib_page_fault_resume command. Since this function is
+	 * called from within the work element, there is no risk of missing
+	 * events.
+	 */
+	struct mlx5_ib_qp *mibqp = to_mibqp(qp);
+	enum mlx5_ib_pagefault_context context =
+		mlx5_ib_get_pagefault_context(pfault);
+	struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
+
+	qp_pfault->mpfault = *pfault;
+
+	/* No need to stop interrupts here since we are in an interrupt */
+	spin_lock(&mibqp->disable_page_faults_lock);
+	if (!mibqp->disable_page_faults)
+		queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
+	spin_unlock(&mibqp->disable_page_faults_lock);
+}
+
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
+{
+	int i;
+
+	qp->disable_page_faults = 1;
+	spin_lock_init(&qp->disable_page_faults_lock);
+
+	qp->mqp.pfault_handler	= mlx5_ib_pfault_handler;
+
+	for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
+		INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
+}
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+{
+	int ret;
+
+	ret = init_srcu_struct(&ibdev->mr_srcu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+{
+	cleanup_srcu_struct(&ibdev->mr_srcu);
+}
+
+int __init mlx5_ib_odp_init(void)
+{
+	mlx5_ib_page_fault_wq =
+		create_singlethread_workqueue("mlx5_ib_page_faults");
+	if (!mlx5_ib_page_fault_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx5_ib_odp_cleanup(void)
+{
+	destroy_workqueue(mlx5_ib_page_fault_wq);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 1cae1c7..be0cd35 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -70,15 +70,6 @@ static const u32 mlx5_ib_opcode[] = {
 	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
 };
 
-struct umr_wr {
-	u64				virt_addr;
-	struct ib_pd		       *pd;
-	unsigned int			page_shift;
-	unsigned int			npages;
-	u32				length;
-	int				access_flags;
-	u32				mkey;
-};
 
 static int is_qp0(enum ib_qp_type qp_type)
 {
@@ -110,6 +101,77 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
 	return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
 }
 
+/**
+ * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
+ *
+ * @qp: QP to copy from.
+ * @send: copy from the send queue when non-zero, use the receive queue
+ *	  otherwise.
+ * @wqe_index:  index to start copying from. For send work queues, the
+ *		wqe_index is in units of MLX5_SEND_WQE_BB.
+ *		For receive work queue, it is the number of work queue
+ *		element in the queue.
+ * @buffer: destination buffer.
+ * @length: maximum number of bytes to copy.
+ *
+ * Copies at least a single WQE, but may copy more data.
+ *
+ * Return: the number of bytes copied, or an error code.
+ */
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+			  void *buffer, u32 length)
+{
+	struct ib_device *ibdev = qp->ibqp.device;
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
+	size_t offset;
+	size_t wq_end;
+	struct ib_umem *umem = qp->umem;
+	u32 first_copy_length;
+	int wqe_length;
+	int ret;
+
+	if (wq->wqe_cnt == 0) {
+		mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n",
+			    qp->ibqp.qp_type);
+		return -EINVAL;
+	}
+
+	offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift);
+	wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift);
+
+	if (send && length < sizeof(struct mlx5_wqe_ctrl_seg))
+		return -EINVAL;
+
+	if (offset > umem->length ||
+	    (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length))
+		return -EINVAL;
+
+	first_copy_length = min_t(u32, offset + length, wq_end) - offset;
+	ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length);
+	if (ret)
+		return ret;
+
+	if (send) {
+		struct mlx5_wqe_ctrl_seg *ctrl = buffer;
+		int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+
+		wqe_length = ds * MLX5_WQE_DS_UNITS;
+	} else {
+		wqe_length = 1 << wq->wqe_shift;
+	}
+
+	if (wqe_length <= first_copy_length)
+		return first_copy_length;
+
+	ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset,
+				wqe_length - first_copy_length);
+	if (ret)
+		return ret;
+
+	return wqe_length;
+}
+
 static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
 {
 	struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
@@ -814,6 +876,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	int inlen = sizeof(*in);
 	int err;
 
+	mlx5_ib_odp_create_qp(qp);
+
 	gen = &dev->mdev->caps.gen;
 	mutex_init(&qp->mutex);
 	spin_lock_init(&qp->sq.lock);
@@ -1098,11 +1162,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 	in = kzalloc(sizeof(*in), GFP_KERNEL);
 	if (!in)
 		return;
-	if (qp->state != IB_QPS_RESET)
+	if (qp->state != IB_QPS_RESET) {
+		mlx5_ib_qp_disable_pagefaults(qp);
 		if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
 					MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
 			mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
 				     qp->mqp.qpn);
+	}
 
 	get_cqs(qp, &send_cq, &recv_cq);
 
@@ -1650,6 +1716,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (mlx5_st < 0)
 		goto out;
 
+	/* If moving to a reset or error state, we must disable page faults on
+	 * this QP and flush all current page faults. Otherwise a stale page
+	 * fault may attempt to work on this QP after it is reset and moved
+	 * again to RTS, and may cause the driver and the device to get out of
+	 * sync. */
+	if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
+	    (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+		mlx5_ib_qp_disable_pagefaults(qp);
+
 	optpar = ib_mask_to_mlx5_opt(attr_mask);
 	optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
 	in->optparam = cpu_to_be32(optpar);
@@ -1659,6 +1734,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (err)
 		goto out;
 
+	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+		mlx5_ib_qp_enable_pagefaults(qp);
+
 	qp->state = new_state;
 
 	if (attr_mask & IB_QP_ACCESS_FLAGS)
@@ -1848,37 +1926,70 @@ static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 	umr->mkey_mask = frwr_mkey_mask();
 }
 
+static __be64 get_umr_reg_mr_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_LEN		|
+		 MLX5_MKEY_MASK_PAGE_SIZE	|
+		 MLX5_MKEY_MASK_START_ADDR	|
+		 MLX5_MKEY_MASK_PD		|
+		 MLX5_MKEY_MASK_LR		|
+		 MLX5_MKEY_MASK_LW		|
+		 MLX5_MKEY_MASK_KEY		|
+		 MLX5_MKEY_MASK_RR		|
+		 MLX5_MKEY_MASK_RW		|
+		 MLX5_MKEY_MASK_A		|
+		 MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_unreg_mr_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_mtt_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 				struct ib_send_wr *wr)
 {
-	struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
-	u64 mask;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
 
 	memset(umr, 0, sizeof(*umr));
 
+	if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
+		umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */
+	else
+		umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
+
 	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
-		umr->flags = 1 << 5; /* fail if not free */
 		umr->klm_octowords = get_klm_octo(umrwr->npages);
-		mask =  MLX5_MKEY_MASK_LEN		|
-			MLX5_MKEY_MASK_PAGE_SIZE	|
-			MLX5_MKEY_MASK_START_ADDR	|
-			MLX5_MKEY_MASK_PD		|
-			MLX5_MKEY_MASK_LR		|
-			MLX5_MKEY_MASK_LW		|
-			MLX5_MKEY_MASK_KEY		|
-			MLX5_MKEY_MASK_RR		|
-			MLX5_MKEY_MASK_RW		|
-			MLX5_MKEY_MASK_A		|
-			MLX5_MKEY_MASK_FREE;
-		umr->mkey_mask = cpu_to_be64(mask);
+		if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
+			umr->mkey_mask = get_umr_update_mtt_mask();
+			umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
+			umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+		} else {
+			umr->mkey_mask = get_umr_reg_mr_mask();
+		}
 	} else {
-		umr->flags = 2 << 5; /* fail if free */
-		mask = MLX5_MKEY_MASK_FREE;
-		umr->mkey_mask = cpu_to_be64(mask);
+		umr->mkey_mask = get_umr_unreg_mr_mask();
 	}
 
 	if (!wr->num_sge)
-		umr->flags |= (1 << 7); /* inline */
+		umr->flags |= MLX5_UMR_INLINE;
 }
 
 static u8 get_umr_flags(int acc)
@@ -1895,7 +2006,7 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
 {
 	memset(seg, 0, sizeof(*seg));
 	if (li) {
-		seg->status = 1 << 6;
+		seg->status = MLX5_MKEY_STATUS_FREE;
 		return;
 	}
 
@@ -1912,19 +2023,23 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
 
 static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
 {
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
 	memset(seg, 0, sizeof(*seg));
 	if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
-		seg->status = 1 << 6;
+		seg->status = MLX5_MKEY_STATUS_FREE;
 		return;
 	}
 
-	seg->flags = convert_access(wr->wr.fast_reg.access_flags);
-	seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
-	seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
-	seg->len = cpu_to_be64(wr->wr.fast_reg.length);
-	seg->log2_page_size = wr->wr.fast_reg.page_shift;
+	seg->flags = convert_access(umrwr->access_flags);
+	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
+		seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+		seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
+	}
+	seg->len = cpu_to_be64(umrwr->length);
+	seg->log2_page_size = umrwr->page_shift;
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
-				       mlx5_mkey_variant(wr->wr.fast_reg.rkey));
+				       mlx5_mkey_variant(umrwr->mkey));
 }
 
 static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -2927,6 +3042,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
 	int mlx5_state;
 	int err = 0;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/*
+	 * Wait for any outstanding page faults, in case the user frees memory
+	 * based upon this query's result.
+	 */
+	flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
 	mutex_lock(&qp->mutex);
 	outb = kzalloc(sizeof(*outb), GFP_KERNEL);
 	if (!outb) {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index fef067c..c0d0296 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2341,9 +2341,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
 			" offset = %u, page size = %u.\n",
 			(unsigned long int)start, (unsigned long int)virt, (u32)length,
-			region->offset, region->page_size);
+			ib_umem_offset(region), region->page_size);
 
-	skip_pages = ((u32)region->offset) >> 12;
+	skip_pages = ((u32)ib_umem_offset(region)) >> 12;
 
 	if (ib_copy_from_udata(&req, udata, sizeof(req))) {
 		ib_umem_release(region);
@@ -2408,7 +2408,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				region_length -= skip_pages << 12;
 				for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
 					skip_pages = 0;
-					if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
+					if ((page_count != 0) && (page_count << 12) - (ib_umem_offset(region) & (4096 - 1)) >= region->length)
 						goto enough_pages;
 					if ((page_count&0x01FF) == 0) {
 						if (page_count >= 1024 * 512) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index ac02ce4..f3cc8c9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -96,7 +96,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
 	union ib_gid sgid;
-	u8 zmac[ETH_ALEN];
 
 	if (!(attr->ah_flags & IB_AH_GRH))
 		return ERR_PTR(-EINVAL);
@@ -118,9 +117,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
 		goto av_conf_err;
 	}
 
-	memset(&zmac, 0, ETH_ALEN);
-	if (pd->uctx &&
-	    memcmp(attr->dmac, &zmac, ETH_ALEN)) {
+	if (pd->uctx) {
 		status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
                                         attr->dmac, &attr->vlan_id);
 		if (status) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 4c68305..fb8d8c4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -805,7 +805,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 		goto umem_err;
 
 	mr->hwmr.pbe_size = mr->umem->page_size;
-	mr->hwmr.fbo = mr->umem->offset;
+	mr->hwmr.fbo = ib_umem_offset(mr->umem);
 	mr->hwmr.va = usr_addr;
 	mr->hwmr.len = len;
 	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
@@ -1410,6 +1410,8 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
 	mutex_unlock(&dev->dev_lock);
 	if (status)
 		goto mbx_err;
+	if (qp->qp_type == IB_QPT_UD)
+		qp_attr->qkey = params.qkey;
 	qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
 	qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
 	qp_attr->path_mtu =
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 9bbb553..a77fb4f 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -258,7 +258,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	mr->mr.user_base = start;
 	mr->mr.iova = virt_addr;
 	mr->mr.length = length;
-	mr->mr.offset = umem->offset;
+	mr->mr.offset = ib_umem_offset(umem);
 	mr->mr.access_flags = mr_access_flags;
 	mr->umem = umem;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index d7562be..8ba80a6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -98,9 +98,15 @@ enum {
 
 	IPOIB_MCAST_FLAG_FOUND	  = 0,	/* used in set_multicast_list */
 	IPOIB_MCAST_FLAG_SENDONLY = 1,
-	IPOIB_MCAST_FLAG_BUSY	  = 2,	/* joining or already joined */
+	/*
+	 * For IPOIB_MCAST_FLAG_BUSY
+	 * When set, in flight join and mcast->mc is unreliable
+	 * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+	 *   haven't started yet
+	 * When clear and mcast->mc is valid pointer, join was successful
+	 */
+	IPOIB_MCAST_FLAG_BUSY	  = 2,
 	IPOIB_MCAST_FLAG_ATTACHED = 3,
-	IPOIB_MCAST_JOIN_STARTED  = 4,
 
 	MAX_SEND_CQE		  = 16,
 	IPOIB_CM_COPYBREAK	  = 256,
@@ -317,6 +323,7 @@ struct ipoib_dev_priv {
 	struct list_head multicast_list;
 	struct rb_root multicast_tree;
 
+	struct workqueue_struct *wq;
 	struct delayed_work mcast_task;
 	struct work_struct carrier_on_task;
 	struct work_struct flush_light;
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
 void ipoib_pkey_dev_check_presence(struct net_device *dev);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
 
 void ipoib_mcast_restart_task(struct work_struct *work);
 int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
 
 void ipoib_mcast_dev_down(struct net_device *dev);
 void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 933efce..56959ad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 	}
 
 	spin_lock_irq(&priv->lock);
-	queue_delayed_work(ipoib_workqueue,
+	queue_delayed_work(priv->wq,
 			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
 	/* Add this entry to passive ids list head, but do not re-add it
 	 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 			spin_lock_irqsave(&priv->lock, flags);
 			list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
 			ipoib_cm_start_rx_drain(priv);
-			queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+			queue_work(priv->wq, &priv->cm.rx_reap_task);
 			spin_unlock_irqrestore(&priv->lock, flags);
 		} else
 			ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 				spin_lock_irqsave(&priv->lock, flags);
 				list_move(&p->list, &priv->cm.rx_reap_list);
 				spin_unlock_irqrestore(&priv->lock, flags);
-				queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+				queue_work(priv->wq, &priv->cm.rx_reap_task);
 			}
 			return;
 		}
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
 		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
 			list_move(&tx->list, &priv->cm.reap_list);
-			queue_work(ipoib_workqueue, &priv->cm.reap_task);
+			queue_work(priv->wq, &priv->cm.reap_task);
 		}
 
 		clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 
 		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
 			list_move(&tx->list, &priv->cm.reap_list);
-			queue_work(ipoib_workqueue, &priv->cm.reap_task);
+			queue_work(priv->wq, &priv->cm.reap_task);
 		}
 
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
 	tx->dev = dev;
 	list_add(&tx->list, &priv->cm.start_list);
 	set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
-	queue_work(ipoib_workqueue, &priv->cm.start_task);
+	queue_work(priv->wq, &priv->cm.start_task);
 	return tx;
 }
 
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
 	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
 		spin_lock_irqsave(&priv->lock, flags);
 		list_move(&tx->list, &priv->cm.reap_list);
-		queue_work(ipoib_workqueue, &priv->cm.reap_task);
+		queue_work(priv->wq, &priv->cm.reap_task);
 		ipoib_dbg(priv, "Reap connection for gid %pI6\n",
 			  tx->neigh->daddr + 4);
 		tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
 
 	skb_queue_tail(&priv->cm.skb_queue, skb);
 	if (e)
-		queue_work(ipoib_workqueue, &priv->cm.skb_task);
+		queue_work(priv->wq, &priv->cm.skb_task);
 }
 
 static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
 	}
 
 	if (!list_empty(&priv->cm.passive_ids))
-		queue_delayed_work(ipoib_workqueue,
+		queue_delayed_work(priv->wq,
 				   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
 	spin_unlock_irq(&priv->lock);
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 72626c34..fe65abb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -655,7 +655,7 @@ void ipoib_reap_ah(struct work_struct *work)
 	__ipoib_reap_ah(dev);
 
 	if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+		queue_delayed_work(priv->wq, &priv->ah_reap_task,
 				   round_jiffies_relative(HZ));
 }
 
@@ -664,7 +664,7 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
 	drain_tx_cq((struct net_device *)ctx);
 }
 
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
@@ -696,7 +696,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
 	}
 
 	clear_bit(IPOIB_STOP_REAPER, &priv->flags);
-	queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+	queue_delayed_work(priv->wq, &priv->ah_reap_task,
 			   round_jiffies_relative(HZ));
 
 	if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +706,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
 dev_stop:
 	if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
 		napi_enable(&priv->napi);
-	ipoib_ib_dev_stop(dev, flush);
+	ipoib_ib_dev_stop(dev);
 	return -1;
 }
 
@@ -738,7 +738,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
 	return ipoib_mcast_start_thread(dev);
 }
 
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -747,7 +747,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
 	clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
 	netif_carrier_off(dev);
 
-	ipoib_mcast_stop_thread(dev, flush);
+	ipoib_mcast_stop_thread(dev);
 	ipoib_mcast_dev_flush(dev);
 
 	ipoib_flush_paths(dev);
@@ -807,7 +807,7 @@ void ipoib_drain_cq(struct net_device *dev)
 	local_bh_enable();
 }
 
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_attr qp_attr;
@@ -880,8 +880,7 @@ timeout:
 	/* Wait for all AHs to be reaped */
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	cancel_delayed_work(&priv->ah_reap_task);
-	if (flush)
-		flush_workqueue(ipoib_workqueue);
+	flush_workqueue(priv->wq);
 
 	begin = jiffies;
 
@@ -918,7 +917,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 		    (unsigned long) dev);
 
 	if (dev->flags & IFF_UP) {
-		if (ipoib_ib_dev_open(dev, 1)) {
+		if (ipoib_ib_dev_open(dev)) {
 			ipoib_transport_dev_cleanup(dev);
 			return -ENODEV;
 		}
@@ -1040,12 +1039,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 	}
 
 	if (level >= IPOIB_FLUSH_NORMAL)
-		ipoib_ib_dev_down(dev, 0);
+		ipoib_ib_dev_down(dev);
 
 	if (level == IPOIB_FLUSH_HEAVY) {
 		if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-			ipoib_ib_dev_stop(dev, 0);
-		if (ipoib_ib_dev_open(dev, 0) != 0)
+			ipoib_ib_dev_stop(dev);
+		if (ipoib_ib_dev_open(dev) != 0)
 			return;
 		if (netif_queue_stopped(dev))
 			netif_start_queue(dev);
@@ -1097,7 +1096,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
 	 */
 	ipoib_flush_paths(dev);
 
-	ipoib_mcast_stop_thread(dev, 1);
+	ipoib_mcast_stop_thread(dev);
 	ipoib_mcast_dev_flush(dev);
 
 	ipoib_transport_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 58b5aa3..6bad17d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
 
 	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
-	if (ipoib_ib_dev_open(dev, 1)) {
+	if (ipoib_ib_dev_open(dev)) {
 		if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
 			return 0;
 		goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
 	return 0;
 
 err_stop:
-	ipoib_ib_dev_stop(dev, 1);
+	ipoib_ib_dev_stop(dev);
 
 err_disable:
 	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
 
 	netif_stop_queue(dev);
 
-	ipoib_ib_dev_down(dev, 1);
-	ipoib_ib_dev_stop(dev, 0);
+	ipoib_ib_dev_down(dev);
+	ipoib_ib_dev_stop(dev);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		struct ipoib_dev_priv *cpriv;
@@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
 		return;
 	}
 
-	queue_work(ipoib_workqueue, &priv->restart_task);
+	queue_work(priv->wq, &priv->restart_task);
 }
 
 static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +954,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
 	__ipoib_reap_neigh(priv);
 
 	if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+		queue_delayed_work(priv->wq, &priv->neigh_reap_task,
 				   arp_tbl.gc_interval);
 }
 
@@ -1133,7 +1133,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
 
 	/* start garbage collection */
 	clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-	queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+	queue_delayed_work(priv->wq, &priv->neigh_reap_task,
 			   arp_tbl.gc_interval);
 
 	return 0;
@@ -1262,15 +1262,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	if (ipoib_neigh_hash_init(priv) < 0)
-		goto out;
 	/* Allocate RX/TX "rings" to hold queued skbs */
 	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
 				GFP_KERNEL);
 	if (!priv->rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
 		       ca->name, ipoib_recvq_size);
-		goto out_neigh_hash_cleanup;
+		goto out;
 	}
 
 	priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1285,16 +1283,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 	if (ipoib_ib_dev_init(dev, ca, port))
 		goto out_tx_ring_cleanup;
 
+	/*
+	 * Must be after ipoib_ib_dev_init so we can allocate a per
+	 * device wq there and use it here
+	 */
+	if (ipoib_neigh_hash_init(priv) < 0)
+		goto out_dev_uninit;
+
 	return 0;
 
+out_dev_uninit:
+	ipoib_ib_dev_cleanup(dev);
+
 out_tx_ring_cleanup:
 	vfree(priv->tx_ring);
 
 out_rx_ring_cleanup:
 	kfree(priv->rx_ring);
 
-out_neigh_hash_cleanup:
-	ipoib_neigh_hash_uninit(dev);
 out:
 	return -ENOMEM;
 }
@@ -1317,6 +1323,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
 	}
 	unregister_netdevice_many(&head);
 
+	/*
+	 * Must be before ipoib_ib_dev_cleanup or we delete an in use
+	 * work queue
+	 */
+	ipoib_neigh_hash_uninit(dev);
+
 	ipoib_ib_dev_cleanup(dev);
 
 	kfree(priv->rx_ring);
@@ -1324,8 +1336,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
 
 	priv->rx_ring = NULL;
 	priv->tx_ring = NULL;
-
-	ipoib_neigh_hash_uninit(dev);
 }
 
 static const struct header_ops ipoib_header_ops = {
@@ -1636,7 +1646,7 @@ register_failed:
 	/* Stop GC if started before flush */
 	set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
 	cancel_delayed_work(&priv->neigh_reap_task);
-	flush_workqueue(ipoib_workqueue);
+	flush_workqueue(priv->wq);
 
 event_failed:
 	ipoib_dev_cleanup(priv->dev);
@@ -1707,7 +1717,7 @@ static void ipoib_remove_one(struct ib_device *device)
 		/* Stop GC */
 		set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
 		cancel_delayed_work(&priv->neigh_reap_task);
-		flush_workqueue(ipoib_workqueue);
+		flush_workqueue(priv->wq);
 
 		unregister_netdev(priv->dev);
 		free_netdev(priv->dev);
@@ -1748,8 +1758,13 @@ static int __init ipoib_init_module(void)
 	 * unregister_netdev() and linkwatch_event take the rtnl lock,
 	 * so flush_scheduled_work() can deadlock during device
 	 * removal.
+	 *
+	 * In addition, bringing one device up and another down at the
+	 * same time can deadlock a single workqueue, so we have this
+	 * global fallback workqueue, but we also attempt to open a
+	 * per device workqueue each time we bring an interface up
 	 */
-	ipoib_workqueue = create_singlethread_workqueue("ipoib");
+	ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
 	if (!ipoib_workqueue) {
 		ret = -ENOMEM;
 		goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ffb83b5..bc50dd0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -190,12 +190,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 		spin_unlock_irq(&priv->lock);
 		priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
 		set_qkey = 1;
-
-		if (!ipoib_cm_admin_enabled(dev)) {
-			rtnl_lock();
-			dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
-			rtnl_unlock();
-		}
 	}
 
 	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -277,16 +271,27 @@ ipoib_mcast_sendonly_join_complete(int status,
 	struct ipoib_mcast *mcast = multicast->context;
 	struct net_device *dev = mcast->dev;
 
+	/*
+	 * We have to take the mutex to force mcast_sendonly_join to
+	 * return from ib_sa_multicast_join and set mcast->mc to a
+	 * valid value.  Otherwise we were racing with ourselves in
+	 * that we might fail here, but get a valid return from
+	 * ib_sa_multicast_join after we had cleared mcast->mc here,
+	 * resulting in mis-matched joins and leaves and a deadlock
+	 */
+	mutex_lock(&mcast_mutex);
+
 	/* We trap for port events ourselves. */
 	if (status == -ENETRESET)
-		return 0;
+		goto out;
 
 	if (!status)
 		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
 
 	if (status) {
 		if (mcast->logcount++ < 20)
-			ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
+			ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
+					"join failed for %pI6, status %d\n",
 					mcast->mcmember.mgid.raw, status);
 
 		/* Flush out any queued packets */
@@ -296,11 +301,15 @@ ipoib_mcast_sendonly_join_complete(int status,
 			dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
 		}
 		netif_tx_unlock_bh(dev);
-
-		/* Clear the busy flag so we try again */
-		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
-					    &mcast->flags);
 	}
+out:
+	clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	if (status)
+		mcast->mc = NULL;
+	complete(&mcast->done);
+	if (status == -ENETRESET)
+		status = 0;
+	mutex_unlock(&mcast_mutex);
 	return status;
 }
 
@@ -318,12 +327,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 	int ret = 0;
 
 	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
-		ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
+		ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
+				"multicast joins\n");
 		return -ENODEV;
 	}
 
-	if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
-		ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
+	if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+		ipoib_dbg_mcast(priv, "multicast entry busy, skipping "
+				"sendonly join\n");
 		return -EBUSY;
 	}
 
@@ -331,6 +342,9 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 	rec.port_gid = priv->local_gid;
 	rec.pkey     = cpu_to_be16(priv->pkey);
 
+	mutex_lock(&mcast_mutex);
+	init_completion(&mcast->done);
+	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
 					 priv->port, &rec,
 					 IB_SA_MCMEMBER_REC_MGID	|
@@ -343,12 +357,14 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 	if (IS_ERR(mcast->mc)) {
 		ret = PTR_ERR(mcast->mc);
 		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
-			   ret);
+		complete(&mcast->done);
+		ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
+			   "failed (ret = %d)\n", ret);
 	} else {
-		ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
-				mcast->mcmember.mgid.raw);
+		ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
+				"sendonly join\n", mcast->mcmember.mgid.raw);
 	}
+	mutex_unlock(&mcast_mutex);
 
 	return ret;
 }
@@ -359,18 +375,29 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
 						   carrier_on_task);
 	struct ib_port_attr attr;
 
-	/*
-	 * Take rtnl_lock to avoid racing with ipoib_stop() and
-	 * turning the carrier back on while a device is being
-	 * removed.
-	 */
 	if (ib_query_port(priv->ca, priv->port, &attr) ||
 	    attr.state != IB_PORT_ACTIVE) {
 		ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
 		return;
 	}
 
-	rtnl_lock();
+	/*
+	 * Take rtnl_lock to avoid racing with ipoib_stop() and
+	 * turning the carrier back on while a device is being
+	 * removed.  However, ipoib_stop() will attempt to flush
+	 * the workqueue while holding the rtnl lock, so loop
+	 * on trylock until either we get the lock or we see
+	 * FLAG_ADMIN_UP go away as that signals that we are bailing
+	 * and can safely ignore the carrier on work.
+	 */
+	while (!rtnl_trylock()) {
+		if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+			return;
+		else
+			msleep(20);
+	}
+	if (!ipoib_cm_admin_enabled(priv->dev))
+		dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
 	netif_carrier_on(priv->dev);
 	rtnl_unlock();
 }
@@ -385,60 +412,63 @@ static int ipoib_mcast_join_complete(int status,
 	ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
 			mcast->mcmember.mgid.raw, status);
 
+	/*
+	 * We have to take the mutex to force mcast_join to
+	 * return from ib_sa_multicast_join and set mcast->mc to a
+	 * valid value.  Otherwise we were racing with ourselves in
+	 * that we might fail here, but get a valid return from
+	 * ib_sa_multicast_join after we had cleared mcast->mc here,
+	 * resulting in mis-matched joins and leaves and a deadlock
+	 */
+	mutex_lock(&mcast_mutex);
+
 	/* We trap for port events ourselves. */
-	if (status == -ENETRESET) {
-		status = 0;
+	if (status == -ENETRESET)
 		goto out;
-	}
 
 	if (!status)
 		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
 
 	if (!status) {
 		mcast->backoff = 1;
-		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-			queue_delayed_work(ipoib_workqueue,
-					   &priv->mcast_task, 0);
-		mutex_unlock(&mcast_mutex);
+			queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 
 		/*
-		 * Defer carrier on work to ipoib_workqueue to avoid a
+		 * Defer carrier on work to priv->wq to avoid a
 		 * deadlock on rtnl_lock here.
 		 */
 		if (mcast == priv->broadcast)
-			queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
-		status = 0;
-		goto out;
-	}
-
-	if (mcast->logcount++ < 20) {
-		if (status == -ETIMEDOUT || status == -EAGAIN) {
-			ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
-					mcast->mcmember.mgid.raw, status);
-		} else {
-			ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
-				   mcast->mcmember.mgid.raw, status);
+			queue_work(priv->wq, &priv->carrier_on_task);
+	} else {
+		if (mcast->logcount++ < 20) {
+			if (status == -ETIMEDOUT || status == -EAGAIN) {
+				ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
+						mcast->mcmember.mgid.raw, status);
+			} else {
+				ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
+					   mcast->mcmember.mgid.raw, status);
+			}
 		}
-	}
-
-	mcast->backoff *= 2;
-	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
-	/* Clear the busy flag so we try again */
-	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
-	mutex_lock(&mcast_mutex);
+		mcast->backoff *= 2;
+		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+	}
+out:
 	spin_lock_irq(&priv->lock);
-	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+	clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	if (status)
+		mcast->mc = NULL;
+	complete(&mcast->done);
+	if (status == -ENETRESET)
+		status = 0;
+	if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
+		queue_delayed_work(priv->wq, &priv->mcast_task,
 				   mcast->backoff * HZ);
 	spin_unlock_irq(&priv->lock);
 	mutex_unlock(&mcast_mutex);
-out:
-	complete(&mcast->done);
+
 	return status;
 }
 
@@ -487,10 +517,9 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
 	}
 
-	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	mutex_lock(&mcast_mutex);
 	init_completion(&mcast->done);
-	set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
+	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
 					 &rec, comp_mask, GFP_KERNEL,
 					 ipoib_mcast_join_complete, mcast);
@@ -504,13 +533,11 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
 			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
-		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-			queue_delayed_work(ipoib_workqueue,
-					   &priv->mcast_task,
+			queue_delayed_work(priv->wq, &priv->mcast_task,
 					   mcast->backoff * HZ);
-		mutex_unlock(&mcast_mutex);
 	}
+	mutex_unlock(&mcast_mutex);
 }
 
 void ipoib_mcast_join_task(struct work_struct *work)
@@ -547,8 +574,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
 			ipoib_warn(priv, "failed to allocate broadcast group\n");
 			mutex_lock(&mcast_mutex);
 			if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-				queue_delayed_work(ipoib_workqueue,
-						   &priv->mcast_task, HZ);
+				queue_delayed_work(priv->wq, &priv->mcast_task,
+						   HZ);
 			mutex_unlock(&mcast_mutex);
 			return;
 		}
@@ -563,7 +590,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
 	}
 
 	if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
-		if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+		if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
 			ipoib_mcast_join(dev, priv->broadcast, 0);
 		return;
 	}
@@ -571,23 +599,33 @@ void ipoib_mcast_join_task(struct work_struct *work)
 	while (1) {
 		struct ipoib_mcast *mcast = NULL;
 
+		/*
+		 * Need the mutex so our flags are consistent, need the
+		 * priv->lock so we don't race with list removals in either
+		 * mcast_dev_flush or mcast_restart_task
+		 */
+		mutex_lock(&mcast_mutex);
 		spin_lock_irq(&priv->lock);
 		list_for_each_entry(mcast, &priv->multicast_list, list) {
-			if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
-			    && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
-			    && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+			if (IS_ERR_OR_NULL(mcast->mc) &&
+			    !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+			    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
 				/* Found the next unjoined group */
 				break;
 			}
 		}
 		spin_unlock_irq(&priv->lock);
+		mutex_unlock(&mcast_mutex);
 
 		if (&mcast->list == &priv->multicast_list) {
 			/* All done */
 			break;
 		}
 
-		ipoib_mcast_join(dev, mcast, 1);
+		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+			ipoib_mcast_sendonly_join(mcast);
+		else
+			ipoib_mcast_join(dev, mcast, 1);
 		return;
 	}
 
@@ -604,13 +642,13 @@ int ipoib_mcast_start_thread(struct net_device *dev)
 
 	mutex_lock(&mcast_mutex);
 	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
+		queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 	mutex_unlock(&mcast_mutex);
 
 	return 0;
 }
 
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -621,8 +659,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 	cancel_delayed_work(&priv->mcast_task);
 	mutex_unlock(&mcast_mutex);
 
-	if (flush)
-		flush_workqueue(ipoib_workqueue);
+	flush_workqueue(priv->wq);
 
 	return 0;
 }
@@ -633,6 +670,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 	int ret = 0;
 
 	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+		ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+	if (!IS_ERR_OR_NULL(mcast->mc))
 		ib_sa_free_multicast(mcast->mc);
 
 	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -685,6 +725,8 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 		memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
 		__ipoib_mcast_add(dev, mcast);
 		list_add_tail(&mcast->list, &priv->multicast_list);
+		if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
+			queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 	}
 
 	if (!mcast->ah) {
@@ -698,8 +740,6 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
 			ipoib_dbg_mcast(priv, "no address vector, "
 					"but multicast join already started\n");
-		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-			ipoib_mcast_sendonly_join(mcast);
 
 		/*
 		 * If lookup completes between here and out:, don't
@@ -759,9 +799,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* seperate between the wait to the leave*/
+	/*
+	 * make sure the in-flight joins have finished before we attempt
+	 * to leave
+	 */
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-		if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
 			wait_for_completion(&mcast->done);
 
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -794,8 +837,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 
 	ipoib_dbg_mcast(priv, "restarting multicast task\n");
 
-	ipoib_mcast_stop_thread(dev, 0);
-
 	local_irq_save(flags);
 	netif_addr_lock(dev);
 	spin_lock(&priv->lock);
@@ -880,14 +921,38 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 	netif_addr_unlock(dev);
 	local_irq_restore(flags);
 
-	/* We have to cancel outside of the spinlock */
+	/*
+	 * make sure the in-flight joins have finished before we attempt
+	 * to leave
+	 */
+	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+			wait_for_completion(&mcast->done);
+
+	/*
+	 * We have to cancel outside of the spinlock, but we have to
+	 * take the rtnl lock or else we race with the removal of
+	 * entries from the remove list in mcast_dev_flush as part
+	 * of ipoib_stop().  We detect the drop of the ADMIN_UP flag
+	 * to signal that we have hit this particular race, and we
+	 * return since we know we don't need to do anything else
+	 * anyway.
+	 */
+	while (!rtnl_trylock()) {
+		if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+			return;
+		else
+			msleep(20);
+	}
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
 		ipoib_mcast_leave(mcast->dev, mcast);
 		ipoib_mcast_free(mcast);
 	}
-
-	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-		ipoib_mcast_start_thread(dev);
+	/*
+	 * Restart our join task if needed
+	 */
+	ipoib_mcast_start_thread(dev);
+	rtnl_unlock();
 }
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index c56d5d4..b72a753 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -145,10 +145,20 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 	int ret, size;
 	int i;
 
+	/*
+	 * the various IPoIB tasks assume they will never race against
+	 * themselves, so always use a single thread workqueue
+	 */
+	priv->wq = create_singlethread_workqueue("ipoib_wq");
+	if (!priv->wq) {
+		printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+		return -ENODEV;
+	}
+
 	priv->pd = ib_alloc_pd(priv->ca);
 	if (IS_ERR(priv->pd)) {
 		printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
-		return -ENODEV;
+		goto out_free_wq;
 	}
 
 	priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
@@ -242,6 +252,10 @@ out_free_mr:
 
 out_free_pd:
 	ib_dealloc_pd(priv->pd);
+
+out_free_wq:
+	destroy_workqueue(priv->wq);
+	priv->wq = NULL;
 	return -ENODEV;
 }
 
@@ -270,6 +284,12 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
 
 	if (ib_dealloc_pd(priv->pd))
 		ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
+	if (priv->wq) {
+		flush_workqueue(priv->wq);
+		destroy_workqueue(priv->wq);
+		priv->wq = NULL;
+	}
 }
 
 void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 20ca6a6..6a594aa 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -97,7 +97,7 @@ module_param_named(pi_enable, iser_pi_enable, bool, 0644);
 MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
 
 module_param_named(pi_guard, iser_pi_guard, int, 0644);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)");
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
 
 static struct workqueue_struct *release_wq;
 struct iser_global ig;
@@ -164,18 +164,42 @@ iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
 	return 0;
 }
 
-int iser_initialize_task_headers(struct iscsi_task *task,
-						struct iser_tx_desc *tx_desc)
+/**
+ * iser_initialize_task_headers() - Initialize task headers
+ * @task:       iscsi task
+ * @tx_desc:    iser tx descriptor
+ *
+ * Notes:
+ * This routine may race with iser teardown flow for scsi
+ * error handling TMFs. So for TMF we should acquire the
+ * state mutex to avoid dereferencing the IB device which
+ * may have already been terminated.
+ */
+int
+iser_initialize_task_headers(struct iscsi_task *task,
+			     struct iser_tx_desc *tx_desc)
 {
-	struct iser_conn       *iser_conn   = task->conn->dd_data;
+	struct iser_conn *iser_conn = task->conn->dd_data;
 	struct iser_device *device = iser_conn->ib_conn.device;
 	struct iscsi_iser_task *iser_task = task->dd_data;
 	u64 dma_addr;
+	const bool mgmt_task = !task->sc && !in_interrupt();
+	int ret = 0;
+
+	if (unlikely(mgmt_task))
+		mutex_lock(&iser_conn->state_mutex);
+
+	if (unlikely(iser_conn->state != ISER_CONN_UP)) {
+		ret = -ENODEV;
+		goto out;
+	}
 
 	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
 				ISER_HEADERS_LEN, DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(device->ib_device, dma_addr))
-		return -ENOMEM;
+	if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	tx_desc->dma_addr = dma_addr;
 	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
@@ -183,7 +207,11 @@ int iser_initialize_task_headers(struct iscsi_task *task,
 	tx_desc->tx_sg[0].lkey   = device->mr->lkey;
 
 	iser_task->iser_conn = iser_conn;
-	return 0;
+out:
+	if (unlikely(mgmt_task))
+		mutex_unlock(&iser_conn->state_mutex);
+
+	return ret;
 }
 
 /**
@@ -199,9 +227,14 @@ static int
 iscsi_iser_task_init(struct iscsi_task *task)
 {
 	struct iscsi_iser_task *iser_task = task->dd_data;
+	int ret;
 
-	if (iser_initialize_task_headers(task, &iser_task->desc))
-			return -ENOMEM;
+	ret = iser_initialize_task_headers(task, &iser_task->desc);
+	if (ret) {
+		iser_err("Failed to init task %p, err = %d\n",
+			 iser_task, ret);
+		return ret;
+	}
 
 	/* mgmt task */
 	if (!task->sc)
@@ -508,8 +541,8 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 	 */
 	if (iser_conn) {
 		mutex_lock(&iser_conn->state_mutex);
-		iscsi_conn_stop(cls_conn, flag);
 		iser_conn_terminate(iser_conn);
+		iscsi_conn_stop(cls_conn, flag);
 
 		/* unbind */
 		iser_conn->iscsi_conn = NULL;
@@ -541,12 +574,13 @@ iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
 static inline unsigned int
 iser_dif_prot_caps(int prot_caps)
 {
-	return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION |
-						      SHOST_DIX_TYPE1_PROTECTION : 0) |
-	       ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION |
-						      SHOST_DIX_TYPE2_PROTECTION : 0) |
-	       ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION |
-						      SHOST_DIX_TYPE3_PROTECTION : 0);
+	return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ?
+		SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION |
+		SHOST_DIX_TYPE1_PROTECTION : 0) |
+	       ((prot_caps & IB_PROT_T10DIF_TYPE_2) ?
+		SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) |
+	       ((prot_caps & IB_PROT_T10DIF_TYPE_3) ?
+		SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0);
 }
 
 /**
@@ -569,6 +603,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 	struct Scsi_Host *shost;
 	struct iser_conn *iser_conn = NULL;
 	struct ib_conn *ib_conn;
+	u16 max_cmds;
 
 	shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
 	if (!shost)
@@ -586,26 +621,41 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 	 */
 	if (ep) {
 		iser_conn = ep->dd_data;
+		max_cmds = iser_conn->max_cmds;
+
+		mutex_lock(&iser_conn->state_mutex);
+		if (iser_conn->state != ISER_CONN_UP) {
+			iser_err("iser conn %p already started teardown\n",
+				 iser_conn);
+			mutex_unlock(&iser_conn->state_mutex);
+			goto free_host;
+		}
+
 		ib_conn = &iser_conn->ib_conn;
 		if (ib_conn->pi_support) {
 			u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
 
 			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
-			if (iser_pi_guard)
-				scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
-			else
-				scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+			scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
+						   SHOST_DIX_GUARD_CRC);
 		}
-	}
 
-	if (iscsi_host_add(shost, ep ?
-			   ib_conn->device->ib_device->dma_device : NULL))
-		goto free_host;
+		if (iscsi_host_add(shost,
+				   ib_conn->device->ib_device->dma_device)) {
+			mutex_unlock(&iser_conn->state_mutex);
+			goto free_host;
+		}
+		mutex_unlock(&iser_conn->state_mutex);
+	} else {
+		max_cmds = ISER_DEF_XMIT_CMDS_MAX;
+		if (iscsi_host_add(shost, NULL))
+			goto free_host;
+	}
 
-	if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+	if (cmds_max > max_cmds) {
 		iser_info("cmds_max changed from %u to %u\n",
-			  cmds_max, ISER_DEF_XMIT_CMDS_MAX);
-		cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+			  cmds_max, max_cmds);
+		cmds_max = max_cmds;
 	}
 
 	cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index cd4174c..5ce2681 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,34 +69,31 @@
 
 #define DRV_NAME	"iser"
 #define PFX		DRV_NAME ": "
-#define DRV_VER		"1.4.8"
+#define DRV_VER		"1.5"
 
 #define iser_dbg(fmt, arg...)				 \
 	do {						 \
-		if (iser_debug_level > 2)		 \
+		if (unlikely(iser_debug_level > 2))	 \
 			printk(KERN_DEBUG PFX "%s: " fmt,\
 				__func__ , ## arg);	 \
 	} while (0)
 
 #define iser_warn(fmt, arg...)				\
 	do {						\
-		if (iser_debug_level > 0)		\
+		if (unlikely(iser_debug_level > 0))	\
 			pr_warn(PFX "%s: " fmt,		\
 				__func__ , ## arg);	\
 	} while (0)
 
 #define iser_info(fmt, arg...)				\
 	do {						\
-		if (iser_debug_level > 1)		\
+		if (unlikely(iser_debug_level > 1))	\
 			pr_info(PFX "%s: " fmt,		\
 				__func__ , ## arg);	\
 	} while (0)
 
-#define iser_err(fmt, arg...)				\
-	do {						\
-		printk(KERN_ERR PFX "%s: " fmt,		\
-		       __func__ , ## arg);		\
-	} while (0)
+#define iser_err(fmt, arg...) \
+	pr_err(PFX "%s: " fmt, __func__ , ## arg)
 
 #define SHIFT_4K	12
 #define SIZE_4K	(1ULL << SHIFT_4K)
@@ -144,6 +141,11 @@
 					ISER_MAX_TX_MISC_PDUS         + \
 					ISER_MAX_RX_MISC_PDUS)
 
+#define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr			\
+					 - ISER_MAX_TX_MISC_PDUS	\
+					 - ISER_MAX_RX_MISC_PDUS) /	\
+					 (1 + ISER_INFLIGHT_DATAOUTS))
+
 #define ISER_WC_BATCH_COUNT   16
 #define ISER_SIGNAL_CMD_COUNT 32
 
@@ -247,7 +249,6 @@ struct iscsi_endpoint;
  * @va:           MR start address (buffer va)
  * @len:          MR length
  * @mem_h:        pointer to registration context (FMR/Fastreg)
- * @is_mr:        indicates weather we registered the buffer
  */
 struct iser_mem_reg {
 	u32  lkey;
@@ -255,7 +256,6 @@ struct iser_mem_reg {
 	u64  va;
 	u64  len;
 	void *mem_h;
-	int  is_mr;
 };
 
 /**
@@ -323,8 +323,6 @@ struct iser_rx_desc {
 	char		             pad[ISER_RX_PAD_SIZE];
 } __attribute__((packed));
 
-#define ISER_MAX_CQ 4
-
 struct iser_conn;
 struct ib_conn;
 struct iscsi_iser_task;
@@ -375,7 +373,7 @@ struct iser_device {
 	struct list_head             ig_list;
 	int                          refcount;
 	int			     comps_used;
-	struct iser_comp	     comps[ISER_MAX_CQ];
+	struct iser_comp	     *comps;
 	int                          (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
 								unsigned cmds_max);
 	void                         (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
@@ -432,6 +430,7 @@ struct fast_reg_descriptor {
  * @cma_id:              rdma_cm connection maneger handle
  * @qp:                  Connection Queue-pair
  * @post_recv_buf_count: post receive counter
+ * @sig_count:           send work request signal count
  * @rx_wr:               receive work request for batch posts
  * @device:              reference to iser device
  * @comp:                iser completion context
@@ -452,6 +451,7 @@ struct ib_conn {
 	struct rdma_cm_id           *cma_id;
 	struct ib_qp	            *qp;
 	int                          post_recv_buf_count;
+	u8                           sig_count;
 	struct ib_recv_wr	     rx_wr[ISER_MIN_POSTED_RX];
 	struct iser_device          *device;
 	struct iser_comp	    *comp;
@@ -482,6 +482,7 @@ struct ib_conn {
  *                    to max number of post recvs
  * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
  * @min_posted_rx:    (qp_max_recv_dtos >> 2)
+ * @max_cmds:         maximum cmds allowed for this connection
  * @name:             connection peer portal
  * @release_work:     deffered work for release job
  * @state_mutex:      protects iser onnection state
@@ -507,6 +508,7 @@ struct iser_conn {
 	unsigned		     qp_max_recv_dtos;
 	unsigned		     qp_max_recv_dtos_mask;
 	unsigned		     min_posted_rx;
+	u16                          max_cmds;
 	char 			     name[ISER_OBJECT_NAME_SIZE];
 	struct work_struct	     release_work;
 	struct mutex		     state_mutex;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 5a489ea..3821633 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -369,7 +369,7 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
 	return 0;
 }
 
-static inline bool iser_signal_comp(int sig_count)
+static inline bool iser_signal_comp(u8 sig_count)
 {
 	return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
 }
@@ -388,7 +388,7 @@ int iser_send_command(struct iscsi_conn *conn,
 	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
 	struct scsi_cmnd *sc  =  task->sc;
 	struct iser_tx_desc *tx_desc = &iser_task->desc;
-	static unsigned sig_count;
+	u8 sig_count = ++iser_conn->ib_conn.sig_count;
 
 	edtl = ntohl(hdr->data_length);
 
@@ -435,7 +435,7 @@ int iser_send_command(struct iscsi_conn *conn,
 	iser_task->status = ISER_TASK_STATUS_STARTED;
 
 	err = iser_post_send(&iser_conn->ib_conn, tx_desc,
-			     iser_signal_comp(++sig_count));
+			     iser_signal_comp(sig_count));
 	if (!err)
 		return 0;
 
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 6c5ce35..abce933 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -73,7 +73,6 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
 
 	if (cmd_dir == ISER_DIR_OUT) {
 		/* copy the unaligned sg the buffer which is used for RDMA */
-		int i;
 		char *p, *from;
 
 		sgl = (struct scatterlist *)data->buf;
@@ -409,7 +408,6 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
 		regd_buf->reg.rkey = device->mr->rkey;
 		regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
 		regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
-		regd_buf->reg.is_mr = 0;
 
 		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
 			 "va: 0x%08lX sz: %ld]\n",
@@ -440,13 +438,13 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
 	return 0;
 }
 
-static inline void
+static void
 iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
 		    struct ib_sig_domain *domain)
 {
 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
-	domain->sig.dif.pi_interval = sc->device->sector_size;
-	domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff;
+	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
+	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
 	/*
 	 * At the moment we hard code those, but in the future
 	 * we will take them from sc.
@@ -454,8 +452,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
 	domain->sig.dif.apptag_check_mask = 0xffff;
 	domain->sig.dif.app_escape = true;
 	domain->sig.dif.ref_escape = true;
-	if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 ||
-	    scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2)
+	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
 		domain->sig.dif.ref_remap = true;
 };
 
@@ -473,26 +470,16 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
 	case SCSI_PROT_WRITE_STRIP:
 		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
-		/*
-		 * At the moment we use this modparam to tell what is
-		 * the memory bg_type, in the future we will take it
-		 * from sc.
-		 */
-		sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
-						 IB_T10DIF_CRC;
+		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+						IB_T10DIF_CSUM : IB_T10DIF_CRC;
 		break;
 	case SCSI_PROT_READ_PASS:
 	case SCSI_PROT_WRITE_PASS:
 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
 		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
-		/*
-		 * At the moment we use this modparam to tell what is
-		 * the memory bg_type, in the future we will take it
-		 * from sc.
-		 */
-		sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
-						 IB_T10DIF_CRC;
+		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+						IB_T10DIF_CSUM : IB_T10DIF_CRC;
 		break;
 	default:
 		iser_err("Unsupported PI operation %d\n",
@@ -503,26 +490,28 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
 	return 0;
 }
 
-static int
+static inline void
 iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
 {
-	switch (scsi_get_prot_type(sc)) {
-	case SCSI_PROT_DIF_TYPE0:
-		break;
-	case SCSI_PROT_DIF_TYPE1:
-	case SCSI_PROT_DIF_TYPE2:
-		*mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG;
-		break;
-	case SCSI_PROT_DIF_TYPE3:
-		*mask = ISER_CHECK_GUARD;
-		break;
-	default:
-		iser_err("Unsupported protection type %d\n",
-			 scsi_get_prot_type(sc));
-		return -EINVAL;
-	}
+	*mask = 0;
+	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
+		*mask |= ISER_CHECK_REFTAG;
+	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
+		*mask |= ISER_CHECK_GUARD;
+}
 
-	return 0;
+static void
+iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+{
+	u32 rkey;
+
+	memset(inv_wr, 0, sizeof(*inv_wr));
+	inv_wr->opcode = IB_WR_LOCAL_INV;
+	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+	inv_wr->ex.invalidate_rkey = mr->rkey;
+
+	rkey = ib_inc_rkey(mr->rkey);
+	ib_update_fast_reg_key(mr, rkey);
 }
 
 static int
@@ -536,26 +525,17 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 	struct ib_send_wr *bad_wr, *wr = NULL;
 	struct ib_sig_attrs sig_attrs;
 	int ret;
-	u32 key;
 
 	memset(&sig_attrs, 0, sizeof(sig_attrs));
 	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
 	if (ret)
 		goto err;
 
-	ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
-	if (ret)
-		goto err;
+	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
 
 	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
 	}
 
 	memset(&sig_wr, 0, sizeof(sig_wr));
@@ -585,12 +565,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 
 	sig_sge->lkey = pi_ctx->sig_mr->lkey;
 	sig_sge->addr = 0;
-	sig_sge->length = data_sge->length + prot_sge->length;
-	if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT ||
-	    scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) {
-		sig_sge->length += (data_sge->length /
-				   iser_task->sc->device->sector_size) * 8;
-	}
+	sig_sge->length = scsi_transfer_length(iser_task->sc);
 
 	iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
 		 sig_sge->addr, sig_sge->length,
@@ -613,7 +588,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 	struct ib_fast_reg_page_list *frpl;
 	struct ib_send_wr fastreg_wr, inv_wr;
 	struct ib_send_wr *bad_wr, *wr = NULL;
-	u8 key;
 	int ret, offset, size, plen;
 
 	/* if there a single dma entry, dma mr suffices */
@@ -645,14 +619,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 	}
 
 	if (!(desc->reg_indicators & ind)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.ex.invalidate_rkey = mr->rkey;
+		iser_inv_rkey(&inv_wr, mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(mr, ++key);
 	}
 
 	/* Prepare FASTREG WR */
@@ -770,15 +738,11 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
 		regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
 		regd_buf->reg.va = sig_sge.addr;
 		regd_buf->reg.len = sig_sge.length;
-		regd_buf->reg.is_mr = 1;
 	} else {
-		if (desc) {
+		if (desc)
 			regd_buf->reg.rkey = desc->data_mr->rkey;
-			regd_buf->reg.is_mr = 1;
-		} else {
+		else
 			regd_buf->reg.rkey = device->mr->rkey;
-			regd_buf->reg.is_mr = 0;
-		}
 
 		regd_buf->reg.lkey = data_sge.lkey;
 		regd_buf->reg.va = data_sge.addr;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 67225bb..695a270 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -76,7 +76,7 @@ static void iser_event_handler(struct ib_event_handler *handler,
 static int iser_create_device_ib_res(struct iser_device *device)
 {
 	struct ib_device_attr *dev_attr = &device->dev_attr;
-	int ret, i;
+	int ret, i, max_cqe;
 
 	ret = ib_query_device(device->ib_device, dev_attr);
 	if (ret) {
@@ -104,11 +104,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
 		return -1;
 	}
 
-	device->comps_used = min(ISER_MAX_CQ,
+	device->comps_used = min_t(int, num_online_cpus(),
 				 device->ib_device->num_comp_vectors);
-	iser_info("using %d CQs, device %s supports %d vectors\n",
+
+	device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
+				GFP_KERNEL);
+	if (!device->comps)
+		goto comps_err;
+
+	max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+
+	iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
 		  device->comps_used, device->ib_device->name,
-		  device->ib_device->num_comp_vectors);
+		  device->ib_device->num_comp_vectors, max_cqe);
 
 	device->pd = ib_alloc_pd(device->ib_device);
 	if (IS_ERR(device->pd))
@@ -122,7 +130,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
 					iser_cq_callback,
 					iser_cq_event_callback,
 					(void *)comp,
-					ISER_MAX_CQ_LEN, i);
+					max_cqe, i);
 		if (IS_ERR(comp->cq)) {
 			comp->cq = NULL;
 			goto cq_err;
@@ -162,6 +170,8 @@ cq_err:
 	}
 	ib_dealloc_pd(device->pd);
 pd_err:
+	kfree(device->comps);
+comps_err:
 	iser_err("failed to allocate an IB resource\n");
 	return -1;
 }
@@ -187,6 +197,9 @@ static void iser_free_device_ib_res(struct iser_device *device)
 	(void)ib_dereg_mr(device->mr);
 	(void)ib_dealloc_pd(device->pd);
 
+	kfree(device->comps);
+	device->comps = NULL;
+
 	device->mr = NULL;
 	device->pd = NULL;
 }
@@ -425,7 +438,10 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
  */
 static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 {
+	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+						   ib_conn);
 	struct iser_device	*device;
+	struct ib_device_attr *dev_attr;
 	struct ib_qp_init_attr	init_attr;
 	int			ret = -ENOMEM;
 	int index, min_index = 0;
@@ -433,6 +449,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 	BUG_ON(ib_conn->device == NULL);
 
 	device = ib_conn->device;
+	dev_attr = &device->dev_attr;
 
 	memset(&init_attr, 0, sizeof init_attr);
 
@@ -460,8 +477,20 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 	if (ib_conn->pi_support) {
 		init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
 		init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+		iser_conn->max_cmds =
+			ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
 	} else {
-		init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
+		if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+			init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
+			iser_conn->max_cmds =
+				ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
+		} else {
+			init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+			iser_conn->max_cmds =
+				ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+			iser_dbg("device %s supports max_send_wr %d\n",
+				 device->ib_device->name, dev_attr->max_qp_wr);
+		}
 	}
 
 	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
@@ -475,7 +504,11 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 	return ret;
 
 out_err:
+	mutex_lock(&ig.connlist_mutex);
+	ib_conn->comp->active_qps--;
+	mutex_unlock(&ig.connlist_mutex);
 	iser_err("unable to alloc mem or create resource, err %d\n", ret);
+
 	return ret;
 }
 
@@ -610,9 +643,11 @@ void iser_conn_release(struct iser_conn *iser_conn)
 	mutex_unlock(&ig.connlist_mutex);
 
 	mutex_lock(&iser_conn->state_mutex);
-	if (iser_conn->state != ISER_CONN_DOWN)
+	if (iser_conn->state != ISER_CONN_DOWN) {
 		iser_warn("iser conn %p state %d, expected state down.\n",
 			  iser_conn, iser_conn->state);
+		iser_conn->state = ISER_CONN_DOWN;
+	}
 	/*
 	 * In case we never got to bind stage, we still need to
 	 * release IB resources (which is safe to call more than once).
@@ -662,8 +697,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
 
 		/* post an indication that all flush errors were consumed */
 		err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
-		if (err)
+		if (err) {
 			iser_err("conn %p failed to post beacon", ib_conn);
+			return 1;
+		}
 
 		wait_for_completion(&ib_conn->flush_comp);
 	}
@@ -846,20 +883,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
 		break;
 	case RDMA_CM_EVENT_DISCONNECTED:
 	case RDMA_CM_EVENT_ADDR_CHANGE:
-		iser_disconnected_handler(cma_id);
+	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+		iser_cleanup_handler(cma_id, false);
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		/*
 		 * we *must* destroy the device as we cannot rely
 		 * on iscsid to be around to initiate error handling.
-		 * also implicitly destroy the cma_id.
+		 * also if we are not in state DOWN implicitly destroy
+		 * the cma_id.
 		 */
 		iser_cleanup_handler(cma_id, true);
-		iser_conn->ib_conn.cma_id = NULL;
-		ret = 1;
-		break;
-	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-		iser_cleanup_handler(cma_id, false);
+		if (iser_conn->state != ISER_CONN_DOWN) {
+			iser_conn->ib_conn.cma_id = NULL;
+			ret = 1;
+		}
 		break;
 	default:
 		iser_err("Unexpected RDMA CM event (%d)\n", event->event);
@@ -981,7 +1019,6 @@ int iser_reg_page_vec(struct ib_conn *ib_conn,
 	mem_reg->rkey  = mem->fmr->rkey;
 	mem_reg->len   = page_vec->length * SIZE_4K;
 	mem_reg->va    = io_addr;
-	mem_reg->is_mr = 1;
 	mem_reg->mem_h = (void *)mem;
 
 	mem_reg->va   += page_vec->offset;
@@ -1008,7 +1045,7 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
 	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
 	int ret;
 
-	if (!reg->is_mr)
+	if (!reg->mem_h)
 		return;
 
 	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
@@ -1028,11 +1065,10 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
 	struct fast_reg_descriptor *desc = reg->mem_h;
 
-	if (!reg->is_mr)
+	if (!desc)
 		return;
 
 	reg->mem_h = NULL;
-	reg->is_mr = 0;
 	spin_lock_bh(&ib_conn->lock);
 	list_add_tail(&desc->list, &ib_conn->fastreg.pool);
 	spin_unlock_bh(&ib_conn->lock);
@@ -1049,7 +1085,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
 	sge.length = ISER_RX_LOGIN_SIZE;
 	sge.lkey   = ib_conn->device->mr->lkey;
 
-	rx_wr.wr_id   = (unsigned long)iser_conn->login_resp_buf;
+	rx_wr.wr_id   = (uintptr_t)iser_conn->login_resp_buf;
 	rx_wr.sg_list = &sge;
 	rx_wr.num_sge = 1;
 	rx_wr.next    = NULL;
@@ -1073,7 +1109,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
 
 	for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
 		rx_desc		= &iser_conn->rx_descs[my_rx_head];
-		rx_wr->wr_id	= (unsigned long)rx_desc;
+		rx_wr->wr_id	= (uintptr_t)rx_desc;
 		rx_wr->sg_list	= &rx_desc->rx_sg;
 		rx_wr->num_sge	= 1;
 		rx_wr->next	= rx_wr + 1;
@@ -1110,7 +1146,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
 				      DMA_TO_DEVICE);
 
 	send_wr.next	   = NULL;
-	send_wr.wr_id	   = (unsigned long)tx_desc;
+	send_wr.wr_id	   = (uintptr_t)tx_desc;
 	send_wr.sg_list	   = tx_desc->tx_sg;
 	send_wr.num_sge	   = tx_desc->num_sge;
 	send_wr.opcode	   = IB_WR_SEND;
@@ -1160,6 +1196,7 @@ static void
 iser_handle_comp_error(struct ib_conn *ib_conn,
 		       struct ib_wc *wc)
 {
+	void *wr_id = (void *)(uintptr_t)wc->wr_id;
 	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
 						   ib_conn);
 
@@ -1168,8 +1205,8 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
 			iscsi_conn_failure(iser_conn->iscsi_conn,
 					   ISCSI_ERR_CONN_FAILED);
 
-	if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
-		struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
+	if (is_iser_tx_desc(iser_conn, wr_id)) {
+		struct iser_tx_desc *desc = wr_id;
 
 		if (desc->type == ISCSI_TX_DATAOUT)
 			kmem_cache_free(ig.desc_cache, desc);
@@ -1193,14 +1230,14 @@ static void iser_handle_wc(struct ib_wc *wc)
 	struct iser_rx_desc *rx_desc;
 
 	ib_conn = wc->qp->qp_context;
-	if (wc->status == IB_WC_SUCCESS) {
+	if (likely(wc->status == IB_WC_SUCCESS)) {
 		if (wc->opcode == IB_WC_RECV) {
-			rx_desc = (struct iser_rx_desc *)wc->wr_id;
+			rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
 			iser_rcv_completion(rx_desc, wc->byte_len,
 					    ib_conn);
 		} else
 		if (wc->opcode == IB_WC_SEND) {
-			tx_desc = (struct iser_tx_desc *)wc->wr_id;
+			tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
 			iser_snd_completion(tx_desc, ib_conn);
 		} else {
 			iser_err("Unknown wc opcode %d\n", wc->opcode);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 10641b7..dafb3c5 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -22,7 +22,6 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/in6.h>
-#include <linux/llist.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
 #include <target/target_core_base.h>
@@ -36,11 +35,17 @@
 #define	ISERT_MAX_CONN		8
 #define ISER_MAX_RX_CQ_LEN	(ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN)
 #define ISER_MAX_TX_CQ_LEN	(ISERT_QP_MAX_REQ_DTOS  * ISERT_MAX_CONN)
+#define ISER_MAX_CQ_LEN		(ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \
+				 ISERT_MAX_CONN)
+
+int isert_debug_level = 0;
+module_param_named(debug_level, isert_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)");
 
 static DEFINE_MUTEX(device_list_mutex);
 static LIST_HEAD(device_list);
-static struct workqueue_struct *isert_rx_wq;
 static struct workqueue_struct *isert_comp_wq;
+static struct workqueue_struct *isert_release_wq;
 
 static void
 isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
@@ -54,19 +59,32 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	       struct isert_rdma_wr *wr);
 static int
 isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
+static int
+isert_rdma_post_recvl(struct isert_conn *isert_conn);
+static int
+isert_rdma_accept(struct isert_conn *isert_conn);
+struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np);
+
+static inline bool
+isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
+{
+	return (conn->pi_support &&
+		cmd->prot_op != TARGET_PROT_NORMAL);
+}
+
 
 static void
 isert_qp_event_callback(struct ib_event *e, void *context)
 {
 	struct isert_conn *isert_conn = (struct isert_conn *)context;
 
-	pr_err("isert_qp_event_callback event: %d\n", e->event);
+	isert_err("conn %p event: %d\n", isert_conn, e->event);
 	switch (e->event) {
 	case IB_EVENT_COMM_EST:
 		rdma_notify(isert_conn->conn_cm_id, IB_EVENT_COMM_EST);
 		break;
 	case IB_EVENT_QP_LAST_WQE_REACHED:
-		pr_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED:\n");
+		isert_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED\n");
 		break;
 	default:
 		break;
@@ -80,39 +98,41 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
 
 	ret = ib_query_device(ib_dev, devattr);
 	if (ret) {
-		pr_err("ib_query_device() failed: %d\n", ret);
+		isert_err("ib_query_device() failed: %d\n", ret);
 		return ret;
 	}
-	pr_debug("devattr->max_sge: %d\n", devattr->max_sge);
-	pr_debug("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
+	isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
+	isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
 
 	return 0;
 }
 
 static int
-isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id,
-		    u8 protection)
+isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
 {
 	struct isert_device *device = isert_conn->conn_device;
 	struct ib_qp_init_attr attr;
-	int ret, index, min_index = 0;
+	struct isert_comp *comp;
+	int ret, i, min = 0;
 
 	mutex_lock(&device_list_mutex);
-	for (index = 0; index < device->cqs_used; index++)
-		if (device->cq_active_qps[index] <
-		    device->cq_active_qps[min_index])
-			min_index = index;
-	device->cq_active_qps[min_index]++;
-	pr_debug("isert_conn_setup_qp: Using min_index: %d\n", min_index);
+	for (i = 0; i < device->comps_used; i++)
+		if (device->comps[i].active_qps <
+		    device->comps[min].active_qps)
+			min = i;
+	comp = &device->comps[min];
+	comp->active_qps++;
+	isert_info("conn %p, using comp %p min_index: %d\n",
+		   isert_conn, comp, min);
 	mutex_unlock(&device_list_mutex);
 
 	memset(&attr, 0, sizeof(struct ib_qp_init_attr));
 	attr.event_handler = isert_qp_event_callback;
 	attr.qp_context = isert_conn;
-	attr.send_cq = device->dev_tx_cq[min_index];
-	attr.recv_cq = device->dev_rx_cq[min_index];
+	attr.send_cq = comp->cq;
+	attr.recv_cq = comp->cq;
 	attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
-	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS;
+	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
 	/*
 	 * FIXME: Use devattr.max_sge - 2 for max_send_sge as
 	 * work-around for RDMA_READs with ConnectX-2.
@@ -126,29 +146,29 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id,
 	attr.cap.max_recv_sge = 1;
 	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	attr.qp_type = IB_QPT_RC;
-	if (protection)
+	if (device->pi_capable)
 		attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
 
-	pr_debug("isert_conn_setup_qp cma_id->device: %p\n",
-		 cma_id->device);
-	pr_debug("isert_conn_setup_qp conn_pd->device: %p\n",
-		 isert_conn->conn_pd->device);
-
 	ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr);
 	if (ret) {
-		pr_err("rdma_create_qp failed for cma_id %d\n", ret);
-		return ret;
+		isert_err("rdma_create_qp failed for cma_id %d\n", ret);
+		goto err;
 	}
 	isert_conn->conn_qp = cma_id->qp;
-	pr_debug("rdma_create_qp() returned success >>>>>>>>>>>>>>>>>>>>>>>>>.\n");
 
 	return 0;
+err:
+	mutex_lock(&device_list_mutex);
+	comp->active_qps--;
+	mutex_unlock(&device_list_mutex);
+
+	return ret;
 }
 
 static void
 isert_cq_event_callback(struct ib_event *e, void *context)
 {
-	pr_debug("isert_cq_event_callback event: %d\n", e->event);
+	isert_dbg("event: %d\n", e->event);
 }
 
 static int
@@ -182,6 +202,7 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
 	}
 
 	isert_conn->conn_rx_desc_head = 0;
+
 	return 0;
 
 dma_map_fail:
@@ -193,6 +214,8 @@ dma_map_fail:
 	kfree(isert_conn->conn_rx_descs);
 	isert_conn->conn_rx_descs = NULL;
 fail:
+	isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
+
 	return -ENOMEM;
 }
 
@@ -216,27 +239,23 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn)
 	isert_conn->conn_rx_descs = NULL;
 }
 
-static void isert_cq_tx_work(struct work_struct *);
-static void isert_cq_tx_callback(struct ib_cq *, void *);
-static void isert_cq_rx_work(struct work_struct *);
-static void isert_cq_rx_callback(struct ib_cq *, void *);
+static void isert_cq_work(struct work_struct *);
+static void isert_cq_callback(struct ib_cq *, void *);
 
 static int
 isert_create_device_ib_res(struct isert_device *device)
 {
 	struct ib_device *ib_dev = device->ib_device;
-	struct isert_cq_desc *cq_desc;
 	struct ib_device_attr *dev_attr;
-	int ret = 0, i, j;
-	int max_rx_cqe, max_tx_cqe;
+	int ret = 0, i;
+	int max_cqe;
 
 	dev_attr = &device->dev_attr;
 	ret = isert_query_device(ib_dev, dev_attr);
 	if (ret)
 		return ret;
 
-	max_rx_cqe = min(ISER_MAX_RX_CQ_LEN, dev_attr->max_cqe);
-	max_tx_cqe = min(ISER_MAX_TX_CQ_LEN, dev_attr->max_cqe);
+	max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
 
 	/* asign function handlers */
 	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
@@ -254,55 +273,38 @@ isert_create_device_ib_res(struct isert_device *device)
 	device->pi_capable = dev_attr->device_cap_flags &
 			     IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
 
-	device->cqs_used = min_t(int, num_online_cpus(),
-				 device->ib_device->num_comp_vectors);
-	device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used);
-	pr_debug("Using %d CQs, device %s supports %d vectors support "
-		 "Fast registration %d pi_capable %d\n",
-		 device->cqs_used, device->ib_device->name,
-		 device->ib_device->num_comp_vectors, device->use_fastreg,
-		 device->pi_capable);
-	device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) *
-				device->cqs_used, GFP_KERNEL);
-	if (!device->cq_desc) {
-		pr_err("Unable to allocate device->cq_desc\n");
+	device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(),
+					device->ib_device->num_comp_vectors));
+	isert_info("Using %d CQs, %s supports %d vectors support "
+		   "Fast registration %d pi_capable %d\n",
+		   device->comps_used, device->ib_device->name,
+		   device->ib_device->num_comp_vectors, device->use_fastreg,
+		   device->pi_capable);
+
+	device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
+				GFP_KERNEL);
+	if (!device->comps) {
+		isert_err("Unable to allocate completion contexts\n");
 		return -ENOMEM;
 	}
-	cq_desc = device->cq_desc;
-
-	for (i = 0; i < device->cqs_used; i++) {
-		cq_desc[i].device = device;
-		cq_desc[i].cq_index = i;
-
-		INIT_WORK(&cq_desc[i].cq_rx_work, isert_cq_rx_work);
-		device->dev_rx_cq[i] = ib_create_cq(device->ib_device,
-						isert_cq_rx_callback,
-						isert_cq_event_callback,
-						(void *)&cq_desc[i],
-						max_rx_cqe, i);
-		if (IS_ERR(device->dev_rx_cq[i])) {
-			ret = PTR_ERR(device->dev_rx_cq[i]);
-			device->dev_rx_cq[i] = NULL;
-			goto out_cq;
-		}
 
-		INIT_WORK(&cq_desc[i].cq_tx_work, isert_cq_tx_work);
-		device->dev_tx_cq[i] = ib_create_cq(device->ib_device,
-						isert_cq_tx_callback,
-						isert_cq_event_callback,
-						(void *)&cq_desc[i],
-						max_tx_cqe, i);
-		if (IS_ERR(device->dev_tx_cq[i])) {
-			ret = PTR_ERR(device->dev_tx_cq[i]);
-			device->dev_tx_cq[i] = NULL;
-			goto out_cq;
-		}
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
 
-		ret = ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP);
-		if (ret)
+		comp->device = device;
+		INIT_WORK(&comp->work, isert_cq_work);
+		comp->cq = ib_create_cq(device->ib_device,
+					isert_cq_callback,
+					isert_cq_event_callback,
+					(void *)comp,
+					max_cqe, i);
+		if (IS_ERR(comp->cq)) {
+			ret = PTR_ERR(comp->cq);
+			comp->cq = NULL;
 			goto out_cq;
+		}
 
-		ret = ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP);
+		ret = ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP);
 		if (ret)
 			goto out_cq;
 	}
@@ -310,19 +312,15 @@ isert_create_device_ib_res(struct isert_device *device)
 	return 0;
 
 out_cq:
-	for (j = 0; j < i; j++) {
-		cq_desc = &device->cq_desc[j];
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
 
-		if (device->dev_rx_cq[j]) {
-			cancel_work_sync(&cq_desc->cq_rx_work);
-			ib_destroy_cq(device->dev_rx_cq[j]);
-		}
-		if (device->dev_tx_cq[j]) {
-			cancel_work_sync(&cq_desc->cq_tx_work);
-			ib_destroy_cq(device->dev_tx_cq[j]);
+		if (comp->cq) {
+			cancel_work_sync(&comp->work);
+			ib_destroy_cq(comp->cq);
 		}
 	}
-	kfree(device->cq_desc);
+	kfree(device->comps);
 
 	return ret;
 }
@@ -330,21 +328,18 @@ out_cq:
 static void
 isert_free_device_ib_res(struct isert_device *device)
 {
-	struct isert_cq_desc *cq_desc;
 	int i;
 
-	for (i = 0; i < device->cqs_used; i++) {
-		cq_desc = &device->cq_desc[i];
+	isert_info("device %p\n", device);
 
-		cancel_work_sync(&cq_desc->cq_rx_work);
-		cancel_work_sync(&cq_desc->cq_tx_work);
-		ib_destroy_cq(device->dev_rx_cq[i]);
-		ib_destroy_cq(device->dev_tx_cq[i]);
-		device->dev_rx_cq[i] = NULL;
-		device->dev_tx_cq[i] = NULL;
-	}
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
 
-	kfree(device->cq_desc);
+		cancel_work_sync(&comp->work);
+		ib_destroy_cq(comp->cq);
+		comp->cq = NULL;
+	}
+	kfree(device->comps);
 }
 
 static void
@@ -352,6 +347,7 @@ isert_device_try_release(struct isert_device *device)
 {
 	mutex_lock(&device_list_mutex);
 	device->refcount--;
+	isert_info("device %p refcount %d\n", device, device->refcount);
 	if (!device->refcount) {
 		isert_free_device_ib_res(device);
 		list_del(&device->dev_node);
@@ -370,6 +366,8 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
 	list_for_each_entry(device, &device_list, dev_node) {
 		if (device->ib_device->node_guid == cma_id->device->node_guid) {
 			device->refcount++;
+			isert_info("Found iser device %p refcount %d\n",
+				   device, device->refcount);
 			mutex_unlock(&device_list_mutex);
 			return device;
 		}
@@ -393,6 +391,8 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
 
 	device->refcount++;
 	list_add_tail(&device->dev_node, &device_list);
+	isert_info("Created a new iser device %p refcount %d\n",
+		   device, device->refcount);
 	mutex_unlock(&device_list_mutex);
 
 	return device;
@@ -407,7 +407,7 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
 	if (list_empty(&isert_conn->conn_fr_pool))
 		return;
 
-	pr_debug("Freeing conn %p fastreg pool", isert_conn);
+	isert_info("Freeing conn %p fastreg pool", isert_conn);
 
 	list_for_each_entry_safe(fr_desc, tmp,
 				 &isert_conn->conn_fr_pool, list) {
@@ -425,87 +425,97 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
 	}
 
 	if (i < isert_conn->conn_fr_pool_size)
-		pr_warn("Pool still has %d regions registered\n",
+		isert_warn("Pool still has %d regions registered\n",
 			isert_conn->conn_fr_pool_size - i);
 }
 
 static int
+isert_create_pi_ctx(struct fast_reg_descriptor *desc,
+		    struct ib_device *device,
+		    struct ib_pd *pd)
+{
+	struct ib_mr_init_attr mr_init_attr;
+	struct pi_context *pi_ctx;
+	int ret;
+
+	pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
+	if (!pi_ctx) {
+		isert_err("Failed to allocate pi context\n");
+		return -ENOMEM;
+	}
+
+	pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device,
+					    ISCSI_ISER_SG_TABLESIZE);
+	if (IS_ERR(pi_ctx->prot_frpl)) {
+		isert_err("Failed to allocate prot frpl err=%ld\n",
+			  PTR_ERR(pi_ctx->prot_frpl));
+		ret = PTR_ERR(pi_ctx->prot_frpl);
+		goto err_pi_ctx;
+	}
+
+	pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+	if (IS_ERR(pi_ctx->prot_mr)) {
+		isert_err("Failed to allocate prot frmr err=%ld\n",
+			  PTR_ERR(pi_ctx->prot_mr));
+		ret = PTR_ERR(pi_ctx->prot_mr);
+		goto err_prot_frpl;
+	}
+	desc->ind |= ISERT_PROT_KEY_VALID;
+
+	memset(&mr_init_attr, 0, sizeof(mr_init_attr));
+	mr_init_attr.max_reg_descriptors = 2;
+	mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
+	pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+	if (IS_ERR(pi_ctx->sig_mr)) {
+		isert_err("Failed to allocate signature enabled mr err=%ld\n",
+			  PTR_ERR(pi_ctx->sig_mr));
+		ret = PTR_ERR(pi_ctx->sig_mr);
+		goto err_prot_mr;
+	}
+
+	desc->pi_ctx = pi_ctx;
+	desc->ind |= ISERT_SIG_KEY_VALID;
+	desc->ind &= ~ISERT_PROTECTED;
+
+	return 0;
+
+err_prot_mr:
+	ib_dereg_mr(desc->pi_ctx->prot_mr);
+err_prot_frpl:
+	ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+err_pi_ctx:
+	kfree(desc->pi_ctx);
+
+	return ret;
+}
+
+static int
 isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
-		     struct fast_reg_descriptor *fr_desc, u8 protection)
+		     struct fast_reg_descriptor *fr_desc)
 {
 	int ret;
 
 	fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
 							 ISCSI_ISER_SG_TABLESIZE);
 	if (IS_ERR(fr_desc->data_frpl)) {
-		pr_err("Failed to allocate data frpl err=%ld\n",
-		       PTR_ERR(fr_desc->data_frpl));
+		isert_err("Failed to allocate data frpl err=%ld\n",
+			  PTR_ERR(fr_desc->data_frpl));
 		return PTR_ERR(fr_desc->data_frpl);
 	}
 
 	fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
 	if (IS_ERR(fr_desc->data_mr)) {
-		pr_err("Failed to allocate data frmr err=%ld\n",
-		       PTR_ERR(fr_desc->data_mr));
+		isert_err("Failed to allocate data frmr err=%ld\n",
+			  PTR_ERR(fr_desc->data_mr));
 		ret = PTR_ERR(fr_desc->data_mr);
 		goto err_data_frpl;
 	}
-	pr_debug("Create fr_desc %p page_list %p\n",
-		 fr_desc, fr_desc->data_frpl->page_list);
 	fr_desc->ind |= ISERT_DATA_KEY_VALID;
 
-	if (protection) {
-		struct ib_mr_init_attr mr_init_attr = {0};
-		struct pi_context *pi_ctx;
-
-		fr_desc->pi_ctx = kzalloc(sizeof(*fr_desc->pi_ctx), GFP_KERNEL);
-		if (!fr_desc->pi_ctx) {
-			pr_err("Failed to allocate pi context\n");
-			ret = -ENOMEM;
-			goto err_data_mr;
-		}
-		pi_ctx = fr_desc->pi_ctx;
-
-		pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
-						    ISCSI_ISER_SG_TABLESIZE);
-		if (IS_ERR(pi_ctx->prot_frpl)) {
-			pr_err("Failed to allocate prot frpl err=%ld\n",
-			       PTR_ERR(pi_ctx->prot_frpl));
-			ret = PTR_ERR(pi_ctx->prot_frpl);
-			goto err_pi_ctx;
-		}
-
-		pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
-		if (IS_ERR(pi_ctx->prot_mr)) {
-			pr_err("Failed to allocate prot frmr err=%ld\n",
-			       PTR_ERR(pi_ctx->prot_mr));
-			ret = PTR_ERR(pi_ctx->prot_mr);
-			goto err_prot_frpl;
-		}
-		fr_desc->ind |= ISERT_PROT_KEY_VALID;
-
-		mr_init_attr.max_reg_descriptors = 2;
-		mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
-		pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
-		if (IS_ERR(pi_ctx->sig_mr)) {
-			pr_err("Failed to allocate signature enabled mr err=%ld\n",
-			       PTR_ERR(pi_ctx->sig_mr));
-			ret = PTR_ERR(pi_ctx->sig_mr);
-			goto err_prot_mr;
-		}
-		fr_desc->ind |= ISERT_SIG_KEY_VALID;
-	}
-	fr_desc->ind &= ~ISERT_PROTECTED;
+	isert_dbg("Created fr_desc %p\n", fr_desc);
 
 	return 0;
-err_prot_mr:
-	ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
-err_prot_frpl:
-	ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
-err_pi_ctx:
-	kfree(fr_desc->pi_ctx);
-err_data_mr:
-	ib_dereg_mr(fr_desc->data_mr);
+
 err_data_frpl:
 	ib_free_fast_reg_page_list(fr_desc->data_frpl);
 
@@ -513,7 +523,7 @@ err_data_frpl:
 }
 
 static int
-isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support)
+isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
 {
 	struct fast_reg_descriptor *fr_desc;
 	struct isert_device *device = isert_conn->conn_device;
@@ -531,16 +541,15 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support)
 	for (i = 0; i < tag_num; i++) {
 		fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
 		if (!fr_desc) {
-			pr_err("Failed to allocate fast_reg descriptor\n");
+			isert_err("Failed to allocate fast_reg descriptor\n");
 			ret = -ENOMEM;
 			goto err;
 		}
 
 		ret = isert_create_fr_desc(device->ib_device,
-					   isert_conn->conn_pd, fr_desc,
-					   pi_support);
+					   isert_conn->conn_pd, fr_desc);
 		if (ret) {
-			pr_err("Failed to create fastreg descriptor err=%d\n",
+			isert_err("Failed to create fastreg descriptor err=%d\n",
 			       ret);
 			kfree(fr_desc);
 			goto err;
@@ -550,7 +559,7 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support)
 		isert_conn->conn_fr_pool_size++;
 	}
 
-	pr_debug("Creating conn %p fastreg pool size=%d",
+	isert_dbg("Creating conn %p fastreg pool size=%d",
 		 isert_conn, isert_conn->conn_fr_pool_size);
 
 	return 0;
@@ -563,47 +572,45 @@ err:
 static int
 isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
-	struct iscsi_np *np = cma_id->context;
-	struct isert_np *isert_np = np->np_context;
+	struct isert_np *isert_np = cma_id->context;
+	struct iscsi_np *np = isert_np->np;
 	struct isert_conn *isert_conn;
 	struct isert_device *device;
 	struct ib_device *ib_dev = cma_id->device;
 	int ret = 0;
-	u8 pi_support;
 
 	spin_lock_bh(&np->np_thread_lock);
 	if (!np->enabled) {
 		spin_unlock_bh(&np->np_thread_lock);
-		pr_debug("iscsi_np is not enabled, reject connect request\n");
+		isert_dbg("iscsi_np is not enabled, reject connect request\n");
 		return rdma_reject(cma_id, NULL, 0);
 	}
 	spin_unlock_bh(&np->np_thread_lock);
 
-	pr_debug("Entering isert_connect_request cma_id: %p, context: %p\n",
+	isert_dbg("cma_id: %p, portal: %p\n",
 		 cma_id, cma_id->context);
 
 	isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
 	if (!isert_conn) {
-		pr_err("Unable to allocate isert_conn\n");
+		isert_err("Unable to allocate isert_conn\n");
 		return -ENOMEM;
 	}
 	isert_conn->state = ISER_CONN_INIT;
 	INIT_LIST_HEAD(&isert_conn->conn_accept_node);
 	init_completion(&isert_conn->conn_login_comp);
+	init_completion(&isert_conn->login_req_comp);
 	init_completion(&isert_conn->conn_wait);
-	init_completion(&isert_conn->conn_wait_comp_err);
 	kref_init(&isert_conn->conn_kref);
 	mutex_init(&isert_conn->conn_mutex);
 	spin_lock_init(&isert_conn->conn_lock);
 	INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
 
-	cma_id->context = isert_conn;
 	isert_conn->conn_cm_id = cma_id;
 
 	isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
 					ISER_RX_LOGIN_SIZE, GFP_KERNEL);
 	if (!isert_conn->login_buf) {
-		pr_err("Unable to allocate isert_conn->login_buf\n");
+		isert_err("Unable to allocate isert_conn->login_buf\n");
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -611,7 +618,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 	isert_conn->login_req_buf = isert_conn->login_buf;
 	isert_conn->login_rsp_buf = isert_conn->login_buf +
 				    ISCSI_DEF_MAX_RECV_SEG_LEN;
-	pr_debug("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
+	isert_dbg("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
 		 isert_conn->login_buf, isert_conn->login_req_buf,
 		 isert_conn->login_rsp_buf);
 
@@ -621,7 +628,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 
 	ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma);
 	if (ret) {
-		pr_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
+		isert_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
 		       ret);
 		isert_conn->login_req_dma = 0;
 		goto out_login_buf;
@@ -633,7 +640,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 
 	ret = ib_dma_mapping_error(ib_dev, isert_conn->login_rsp_dma);
 	if (ret) {
-		pr_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
+		isert_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
 		       ret);
 		isert_conn->login_rsp_dma = 0;
 		goto out_req_dma_map;
@@ -649,13 +656,13 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 	isert_conn->initiator_depth = min_t(u8,
 				event->param.conn.initiator_depth,
 				device->dev_attr.max_qp_init_rd_atom);
-	pr_debug("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+	isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
 
 	isert_conn->conn_device = device;
 	isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device);
 	if (IS_ERR(isert_conn->conn_pd)) {
 		ret = PTR_ERR(isert_conn->conn_pd);
-		pr_err("ib_alloc_pd failed for conn %p: ret=%d\n",
+		isert_err("ib_alloc_pd failed for conn %p: ret=%d\n",
 		       isert_conn, ret);
 		goto out_pd;
 	}
@@ -664,20 +671,20 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 					   IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(isert_conn->conn_mr)) {
 		ret = PTR_ERR(isert_conn->conn_mr);
-		pr_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
+		isert_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
 		       isert_conn, ret);
 		goto out_mr;
 	}
 
-	pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi;
-	if (pi_support && !device->pi_capable) {
-		pr_err("Protection information requested but not supported, "
-		       "rejecting connect request\n");
-		ret = rdma_reject(cma_id, NULL, 0);
-		goto out_mr;
-	}
+	ret = isert_conn_setup_qp(isert_conn, cma_id);
+	if (ret)
+		goto out_conn_dev;
 
-	ret = isert_conn_setup_qp(isert_conn, cma_id, pi_support);
+	ret = isert_rdma_post_recvl(isert_conn);
+	if (ret)
+		goto out_conn_dev;
+
+	ret = isert_rdma_accept(isert_conn);
 	if (ret)
 		goto out_conn_dev;
 
@@ -685,7 +692,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 	list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list);
 	mutex_unlock(&isert_np->np_accept_mutex);
 
-	pr_debug("isert_connect_request() up np_sem np: %p\n", np);
+	isert_info("np %p: Allow accept_np to continue\n", np);
 	up(&isert_np->np_sem);
 	return 0;
 
@@ -705,6 +712,7 @@ out_login_buf:
 	kfree(isert_conn->login_buf);
 out:
 	kfree(isert_conn);
+	rdma_reject(cma_id, NULL, 0);
 	return ret;
 }
 
@@ -713,24 +721,25 @@ isert_connect_release(struct isert_conn *isert_conn)
 {
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct isert_device *device = isert_conn->conn_device;
-	int cq_index;
 
-	pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p\n", isert_conn);
 
 	if (device && device->use_fastreg)
 		isert_conn_free_fastreg_pool(isert_conn);
 
+	isert_free_rx_descriptors(isert_conn);
+	rdma_destroy_id(isert_conn->conn_cm_id);
+
 	if (isert_conn->conn_qp) {
-		cq_index = ((struct isert_cq_desc *)
-			isert_conn->conn_qp->recv_cq->cq_context)->cq_index;
-		pr_debug("isert_connect_release: cq_index: %d\n", cq_index);
-		isert_conn->conn_device->cq_active_qps[cq_index]--;
+		struct isert_comp *comp = isert_conn->conn_qp->recv_cq->cq_context;
 
-		rdma_destroy_qp(isert_conn->conn_cm_id);
-	}
+		isert_dbg("dec completion context %p active_qps\n", comp);
+		mutex_lock(&device_list_mutex);
+		comp->active_qps--;
+		mutex_unlock(&device_list_mutex);
 
-	isert_free_rx_descriptors(isert_conn);
-	rdma_destroy_id(isert_conn->conn_cm_id);
+		ib_destroy_qp(isert_conn->conn_qp);
+	}
 
 	ib_dereg_mr(isert_conn->conn_mr);
 	ib_dealloc_pd(isert_conn->conn_pd);
@@ -747,16 +756,24 @@ isert_connect_release(struct isert_conn *isert_conn)
 
 	if (device)
 		isert_device_try_release(device);
-
-	pr_debug("Leaving isert_connect_release >>>>>>>>>>>>\n");
 }
 
 static void
 isert_connected_handler(struct rdma_cm_id *cma_id)
 {
-	struct isert_conn *isert_conn = cma_id->context;
+	struct isert_conn *isert_conn = cma_id->qp->qp_context;
 
-	kref_get(&isert_conn->conn_kref);
+	isert_info("conn %p\n", isert_conn);
+
+	if (!kref_get_unless_zero(&isert_conn->conn_kref)) {
+		isert_warn("conn %p connect_release is running\n", isert_conn);
+		return;
+	}
+
+	mutex_lock(&isert_conn->conn_mutex);
+	if (isert_conn->state != ISER_CONN_FULL_FEATURE)
+		isert_conn->state = ISER_CONN_UP;
+	mutex_unlock(&isert_conn->conn_mutex);
 }
 
 static void
@@ -765,8 +782,8 @@ isert_release_conn_kref(struct kref *kref)
 	struct isert_conn *isert_conn = container_of(kref,
 				struct isert_conn, conn_kref);
 
-	pr_debug("Calling isert_connect_release for final kref %s/%d\n",
-		 current->comm, current->pid);
+	isert_info("conn %p final kref %s/%d\n", isert_conn, current->comm,
+		   current->pid);
 
 	isert_connect_release(isert_conn);
 }
@@ -777,75 +794,111 @@ isert_put_conn(struct isert_conn *isert_conn)
 	kref_put(&isert_conn->conn_kref, isert_release_conn_kref);
 }
 
+/**
+ * isert_conn_terminate() - Initiate connection termination
+ * @isert_conn: isert connection struct
+ *
+ * Notes:
+ * In case the connection state is FULL_FEATURE, move state
+ * to TEMINATING and start teardown sequence (rdma_disconnect).
+ * In case the connection state is UP, complete flush as well.
+ *
+ * This routine must be called with conn_mutex held. Thus it is
+ * safe to call multiple times.
+ */
 static void
-isert_disconnect_work(struct work_struct *work)
+isert_conn_terminate(struct isert_conn *isert_conn)
 {
-	struct isert_conn *isert_conn = container_of(work,
-				struct isert_conn, conn_logout_work);
+	int err;
 
-	pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
-	mutex_lock(&isert_conn->conn_mutex);
-	if (isert_conn->state == ISER_CONN_UP)
+	switch (isert_conn->state) {
+	case ISER_CONN_TERMINATING:
+		break;
+	case ISER_CONN_UP:
+	case ISER_CONN_FULL_FEATURE: /* FALLTHRU */
+		isert_info("Terminating conn %p state %d\n",
+			   isert_conn, isert_conn->state);
 		isert_conn->state = ISER_CONN_TERMINATING;
-
-	if (isert_conn->post_recv_buf_count == 0 &&
-	    atomic_read(&isert_conn->post_send_buf_count) == 0) {
-		mutex_unlock(&isert_conn->conn_mutex);
-		goto wake_up;
-	}
-	if (!isert_conn->conn_cm_id) {
-		mutex_unlock(&isert_conn->conn_mutex);
-		isert_put_conn(isert_conn);
-		return;
+		err = rdma_disconnect(isert_conn->conn_cm_id);
+		if (err)
+			isert_warn("Failed rdma_disconnect isert_conn %p\n",
+				   isert_conn);
+		break;
+	default:
+		isert_warn("conn %p teminating in state %d\n",
+			   isert_conn, isert_conn->state);
 	}
+}
 
-	if (isert_conn->disconnect) {
-		/* Send DREQ/DREP towards our initiator */
-		rdma_disconnect(isert_conn->conn_cm_id);
-	}
+static int
+isert_np_cma_handler(struct isert_np *isert_np,
+		     enum rdma_cm_event_type event)
+{
+	isert_dbg("isert np %p, handling event %d\n", isert_np, event);
 
-	mutex_unlock(&isert_conn->conn_mutex);
+	switch (event) {
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		isert_np->np_cm_id = NULL;
+		break;
+	case RDMA_CM_EVENT_ADDR_CHANGE:
+		isert_np->np_cm_id = isert_setup_id(isert_np);
+		if (IS_ERR(isert_np->np_cm_id)) {
+			isert_err("isert np %p setup id failed: %ld\n",
+				  isert_np, PTR_ERR(isert_np->np_cm_id));
+			isert_np->np_cm_id = NULL;
+		}
+		break;
+	default:
+		isert_err("isert np %p Unexpected event %d\n",
+			  isert_np, event);
+	}
 
-wake_up:
-	complete(&isert_conn->conn_wait);
+	return -1;
 }
 
 static int
-isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect)
+isert_disconnected_handler(struct rdma_cm_id *cma_id,
+			   enum rdma_cm_event_type event)
 {
+	struct isert_np *isert_np = cma_id->context;
 	struct isert_conn *isert_conn;
 
-	if (!cma_id->qp) {
-		struct isert_np *isert_np = cma_id->context;
+	if (isert_np->np_cm_id == cma_id)
+		return isert_np_cma_handler(cma_id->context, event);
 
-		isert_np->np_cm_id = NULL;
-		return -1;
-	}
+	isert_conn = cma_id->qp->qp_context;
 
-	isert_conn = (struct isert_conn *)cma_id->context;
+	mutex_lock(&isert_conn->conn_mutex);
+	isert_conn_terminate(isert_conn);
+	mutex_unlock(&isert_conn->conn_mutex);
 
-	isert_conn->disconnect = disconnect;
-	INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work);
-	schedule_work(&isert_conn->conn_logout_work);
+	isert_info("conn %p completing conn_wait\n", isert_conn);
+	complete(&isert_conn->conn_wait);
 
 	return 0;
 }
 
+static void
+isert_connect_error(struct rdma_cm_id *cma_id)
+{
+	struct isert_conn *isert_conn = cma_id->qp->qp_context;
+
+	isert_put_conn(isert_conn);
+}
+
 static int
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
 	int ret = 0;
-	bool disconnect = false;
 
-	pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n",
-		 event->event, event->status, cma_id->context, cma_id);
+	isert_info("event %d status %d id %p np %p\n", event->event,
+		   event->status, cma_id, cma_id->context);
 
 	switch (event->event) {
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
 		ret = isert_connect_request(cma_id, event);
 		if (ret)
-			pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n",
-				event->event, ret);
+			isert_err("failed handle connect request %d\n", ret);
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		isert_connected_handler(cma_id);
@@ -853,13 +906,16 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 	case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */
 	case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
-		disconnect = true;
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */
-		ret = isert_disconnected_handler(cma_id, disconnect);
+		ret = isert_disconnected_handler(cma_id, event->event);
 		break;
+	case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
+	case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_CONNECT_ERROR:
+		isert_connect_error(cma_id);
+		break;
 	default:
-		pr_err("Unhandled RDMA CMA event: %d\n", event->event);
+		isert_err("Unhandled RDMA CMA event: %d\n", event->event);
 		break;
 	}
 
@@ -876,7 +932,7 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
 
 	for (rx_wr = isert_conn->conn_rx_wr, i = 0; i < count; i++, rx_wr++) {
 		rx_desc		= &isert_conn->conn_rx_descs[rx_head];
-		rx_wr->wr_id	= (unsigned long)rx_desc;
+		rx_wr->wr_id	= (uintptr_t)rx_desc;
 		rx_wr->sg_list	= &rx_desc->rx_sg;
 		rx_wr->num_sge	= 1;
 		rx_wr->next	= rx_wr + 1;
@@ -890,10 +946,10 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
 	ret = ib_post_recv(isert_conn->conn_qp, isert_conn->conn_rx_wr,
 				&rx_wr_failed);
 	if (ret) {
-		pr_err("ib_post_recv() failed with ret: %d\n", ret);
+		isert_err("ib_post_recv() failed with ret: %d\n", ret);
 		isert_conn->post_recv_buf_count -= count;
 	} else {
-		pr_debug("isert_post_recv(): Posted %d RX buffers\n", count);
+		isert_dbg("isert_post_recv(): Posted %d RX buffers\n", count);
 		isert_conn->conn_rx_desc_head = rx_head;
 	}
 	return ret;
@@ -910,19 +966,15 @@ isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
 				      ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
 	send_wr.next	= NULL;
-	send_wr.wr_id	= (unsigned long)tx_desc;
+	send_wr.wr_id	= (uintptr_t)tx_desc;
 	send_wr.sg_list	= tx_desc->tx_sg;
 	send_wr.num_sge	= tx_desc->num_sge;
 	send_wr.opcode	= IB_WR_SEND;
 	send_wr.send_flags = IB_SEND_SIGNALED;
 
-	atomic_inc(&isert_conn->post_send_buf_count);
-
 	ret = ib_post_send(isert_conn->conn_qp, &send_wr, &send_wr_failed);
-	if (ret) {
-		pr_err("ib_post_send() failed, ret: %d\n", ret);
-		atomic_dec(&isert_conn->post_send_buf_count);
-	}
+	if (ret)
+		isert_err("ib_post_send() failed, ret: %d\n", ret);
 
 	return ret;
 }
@@ -945,7 +997,7 @@ isert_create_send_desc(struct isert_conn *isert_conn,
 
 	if (tx_desc->tx_sg[0].lkey != isert_conn->conn_mr->lkey) {
 		tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
-		pr_debug("tx_desc %p lkey mismatch, fixing\n", tx_desc);
+		isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc);
 	}
 }
 
@@ -959,7 +1011,7 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn,
 	dma_addr = ib_dma_map_single(ib_dev, (void *)tx_desc,
 			ISER_HEADERS_LEN, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(ib_dev, dma_addr)) {
-		pr_err("ib_dma_mapping_error() failed\n");
+		isert_err("ib_dma_mapping_error() failed\n");
 		return -ENOMEM;
 	}
 
@@ -968,40 +1020,24 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn,
 	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
 	tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
 
-	pr_debug("isert_init_tx_hdrs: Setup tx_sg[0].addr: 0x%llx length: %u"
-		 " lkey: 0x%08x\n", tx_desc->tx_sg[0].addr,
-		 tx_desc->tx_sg[0].length, tx_desc->tx_sg[0].lkey);
+	isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n",
+		  tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length,
+		  tx_desc->tx_sg[0].lkey);
 
 	return 0;
 }
 
 static void
 isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
-		   struct ib_send_wr *send_wr, bool coalesce)
+		   struct ib_send_wr *send_wr)
 {
 	struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
 
 	isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	send_wr->opcode = IB_WR_SEND;
 	send_wr->sg_list = &tx_desc->tx_sg[0];
 	send_wr->num_sge = isert_cmd->tx_desc.num_sge;
-	/*
-	 * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED
-	 * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls.
-	 */
-	mutex_lock(&isert_conn->conn_mutex);
-	if (coalesce && isert_conn->state == ISER_CONN_UP &&
-	    ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) {
-		tx_desc->llnode_active = true;
-		llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist);
-		mutex_unlock(&isert_conn->conn_mutex);
-		return;
-	}
-	isert_conn->conn_comp_batch = 0;
-	tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist);
-	mutex_unlock(&isert_conn->conn_mutex);
-
 	send_wr->send_flags = IB_SEND_SIGNALED;
 }
 
@@ -1017,22 +1053,21 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
 	sge.length = ISER_RX_LOGIN_SIZE;
 	sge.lkey = isert_conn->conn_mr->lkey;
 
-	pr_debug("Setup sge: addr: %llx length: %d 0x%08x\n",
+	isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n",
 		sge.addr, sge.length, sge.lkey);
 
 	memset(&rx_wr, 0, sizeof(struct ib_recv_wr));
-	rx_wr.wr_id = (unsigned long)isert_conn->login_req_buf;
+	rx_wr.wr_id = (uintptr_t)isert_conn->login_req_buf;
 	rx_wr.sg_list = &sge;
 	rx_wr.num_sge = 1;
 
 	isert_conn->post_recv_buf_count++;
 	ret = ib_post_recv(isert_conn->conn_qp, &rx_wr, &rx_wr_fail);
 	if (ret) {
-		pr_err("ib_post_recv() failed: %d\n", ret);
+		isert_err("ib_post_recv() failed: %d\n", ret);
 		isert_conn->post_recv_buf_count--;
 	}
 
-	pr_debug("ib_post_recv(): returned success >>>>>>>>>>>>>>>>>>>>>>>>\n");
 	return ret;
 }
 
@@ -1072,13 +1107,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
 		if (login->login_complete) {
 			if (!conn->sess->sess_ops->SessionType &&
 			    isert_conn->conn_device->use_fastreg) {
-				/* Normal Session and fastreg is used */
-				u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi;
-
-				ret = isert_conn_create_fastreg_pool(isert_conn,
-								     pi_support);
+				ret = isert_conn_create_fastreg_pool(isert_conn);
 				if (ret) {
-					pr_err("Conn: %p failed to create"
+					isert_err("Conn: %p failed to create"
 					       " fastreg pool\n", isert_conn);
 					return ret;
 				}
@@ -1092,7 +1123,10 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
 			if (ret)
 				return ret;
 
-			isert_conn->state = ISER_CONN_UP;
+			/* Now we are in FULL_FEATURE phase */
+			mutex_lock(&isert_conn->conn_mutex);
+			isert_conn->state = ISER_CONN_FULL_FEATURE;
+			mutex_unlock(&isert_conn->conn_mutex);
 			goto post_send;
 		}
 
@@ -1109,18 +1143,17 @@ post_send:
 }
 
 static void
-isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen,
-		   struct isert_conn *isert_conn)
+isert_rx_login_req(struct isert_conn *isert_conn)
 {
+	struct iser_rx_desc *rx_desc = (void *)isert_conn->login_req_buf;
+	int rx_buflen = isert_conn->login_req_len;
 	struct iscsi_conn *conn = isert_conn->conn;
 	struct iscsi_login *login = conn->conn_login;
 	int size;
 
-	if (!login) {
-		pr_err("conn->conn_login is NULL\n");
-		dump_stack();
-		return;
-	}
+	isert_info("conn %p\n", isert_conn);
+
+	WARN_ON_ONCE(!login);
 
 	if (login->first_request) {
 		struct iscsi_login_req *login_req =
@@ -1146,8 +1179,9 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen,
 	memcpy(&login->req[0], (void *)&rx_desc->iscsi_header, ISCSI_HDR_LEN);
 
 	size = min(rx_buflen, MAX_KEY_VALUE_PAIRS);
-	pr_debug("Using login payload size: %d, rx_buflen: %d MAX_KEY_VALUE_PAIRS: %d\n",
-		 size, rx_buflen, MAX_KEY_VALUE_PAIRS);
+	isert_dbg("Using login payload size: %d, rx_buflen: %d "
+		  "MAX_KEY_VALUE_PAIRS: %d\n", size, rx_buflen,
+		  MAX_KEY_VALUE_PAIRS);
 	memcpy(login->req_buf, &rx_desc->data[0], size);
 
 	if (login->first_request) {
@@ -1166,7 +1200,7 @@ static struct iscsi_cmd
 
 	cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE);
 	if (!cmd) {
-		pr_err("Unable to allocate iscsi_cmd + isert_cmd\n");
+		isert_err("Unable to allocate iscsi_cmd + isert_cmd\n");
 		return NULL;
 	}
 	isert_cmd = iscsit_priv_cmd(cmd);
@@ -1209,8 +1243,8 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn,
 	sg = &cmd->se_cmd.t_data_sg[0];
 	sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE));
 
-	pr_debug("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n",
-		 sg, sg_nents, &rx_desc->data[0], imm_data_len);
+	isert_dbg("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n",
+		  sg, sg_nents, &rx_desc->data[0], imm_data_len);
 
 	sg_copy_from_buffer(sg, sg_nents, &rx_desc->data[0], imm_data_len);
 
@@ -1254,13 +1288,15 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
 	 * FIXME: Unexpected unsolicited_data out
 	 */
 	if (!cmd->unsolicited_data) {
-		pr_err("Received unexpected solicited data payload\n");
+		isert_err("Received unexpected solicited data payload\n");
 		dump_stack();
 		return -1;
 	}
 
-	pr_debug("Unsolicited DataOut unsol_data_len: %u, write_data_done: %u, data_length: %u\n",
-		 unsol_data_len, cmd->write_data_done, cmd->se_cmd.data_length);
+	isert_dbg("Unsolicited DataOut unsol_data_len: %u, "
+		  "write_data_done: %u, data_length: %u\n",
+		  unsol_data_len,  cmd->write_data_done,
+		  cmd->se_cmd.data_length);
 
 	sg_off = cmd->write_data_done / PAGE_SIZE;
 	sg_start = &cmd->se_cmd.t_data_sg[sg_off];
@@ -1270,12 +1306,13 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
 	 * FIXME: Non page-aligned unsolicited_data out
 	 */
 	if (page_off) {
-		pr_err("Received unexpected non-page aligned data payload\n");
+		isert_err("unexpected non-page aligned data payload\n");
 		dump_stack();
 		return -1;
 	}
-	pr_debug("Copying DataOut: sg_start: %p, sg_off: %u sg_nents: %u from %p %u\n",
-		 sg_start, sg_off, sg_nents, &rx_desc->data[0], unsol_data_len);
+	isert_dbg("Copying DataOut: sg_start: %p, sg_off: %u "
+		  "sg_nents: %u from %p %u\n", sg_start, sg_off,
+		  sg_nents, &rx_desc->data[0], unsol_data_len);
 
 	sg_copy_from_buffer(sg_start, sg_nents, &rx_desc->data[0],
 			    unsol_data_len);
@@ -1322,8 +1359,8 @@ isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd
 
 	text_in = kzalloc(payload_length, GFP_KERNEL);
 	if (!text_in) {
-		pr_err("Unable to allocate text_in of payload_length: %u\n",
-		       payload_length);
+		isert_err("Unable to allocate text_in of payload_length: %u\n",
+			  payload_length);
 		return -ENOMEM;
 	}
 	cmd->text_in_ptr = text_in;
@@ -1348,8 +1385,8 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 
 	if (sess->sess_ops->SessionType &&
 	   (!(opcode & ISCSI_OP_TEXT) || !(opcode & ISCSI_OP_LOGOUT))) {
-		pr_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
-		       " ignoring\n", opcode);
+		isert_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
+			  " ignoring\n", opcode);
 		return 0;
 	}
 
@@ -1395,10 +1432,6 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 			break;
 
 		ret = iscsit_handle_logout_cmd(conn, cmd, (unsigned char *)hdr);
-		if (ret > 0)
-			wait_for_completion_timeout(&conn->conn_logout_comp,
-						    SECONDS_FOR_LOGOUT_COMP *
-						    HZ);
 		break;
 	case ISCSI_OP_TEXT:
 		cmd = isert_allocate_cmd(conn);
@@ -1410,7 +1443,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 					    rx_desc, (struct iscsi_text *)hdr);
 		break;
 	default:
-		pr_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode);
+		isert_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode);
 		dump_stack();
 		break;
 	}
@@ -1431,23 +1464,23 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
 		if (iser_hdr->flags & ISER_RSV) {
 			read_stag = be32_to_cpu(iser_hdr->read_stag);
 			read_va = be64_to_cpu(iser_hdr->read_va);
-			pr_debug("ISER_RSV: read_stag: 0x%08x read_va: 0x%16llx\n",
-				 read_stag, (unsigned long long)read_va);
+			isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
+				  read_stag, (unsigned long long)read_va);
 		}
 		if (iser_hdr->flags & ISER_WSV) {
 			write_stag = be32_to_cpu(iser_hdr->write_stag);
 			write_va = be64_to_cpu(iser_hdr->write_va);
-			pr_debug("ISER_WSV: write__stag: 0x%08x write_va: 0x%16llx\n",
-				 write_stag, (unsigned long long)write_va);
+			isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
+				  write_stag, (unsigned long long)write_va);
 		}
 
-		pr_debug("ISER ISCSI_CTRL PDU\n");
+		isert_dbg("ISER ISCSI_CTRL PDU\n");
 		break;
 	case ISER_HELLO:
-		pr_err("iSER Hello message\n");
+		isert_err("iSER Hello message\n");
 		break;
 	default:
-		pr_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+		isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
 		break;
 	}
 
@@ -1457,7 +1490,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
 
 static void
 isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
-		    unsigned long xfer_len)
+		    u32 xfer_len)
 {
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct iscsi_hdr *hdr;
@@ -1467,34 +1500,43 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
 	if ((char *)desc == isert_conn->login_req_buf) {
 		rx_dma = isert_conn->login_req_dma;
 		rx_buflen = ISER_RX_LOGIN_SIZE;
-		pr_debug("ISER login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
+		isert_dbg("login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
 			 rx_dma, rx_buflen);
 	} else {
 		rx_dma = desc->dma_addr;
 		rx_buflen = ISER_RX_PAYLOAD_SIZE;
-		pr_debug("ISER req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
+		isert_dbg("req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
 			 rx_dma, rx_buflen);
 	}
 
 	ib_dma_sync_single_for_cpu(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE);
 
 	hdr = &desc->iscsi_header;
-	pr_debug("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n",
+	isert_dbg("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n",
 		 hdr->opcode, hdr->itt, hdr->flags,
 		 (int)(xfer_len - ISER_HEADERS_LEN));
 
-	if ((char *)desc == isert_conn->login_req_buf)
-		isert_rx_login_req(desc, xfer_len - ISER_HEADERS_LEN,
-				   isert_conn);
-	else
+	if ((char *)desc == isert_conn->login_req_buf) {
+		isert_conn->login_req_len = xfer_len - ISER_HEADERS_LEN;
+		if (isert_conn->conn) {
+			struct iscsi_login *login = isert_conn->conn->conn_login;
+
+			if (login && !login->first_request)
+				isert_rx_login_req(isert_conn);
+		}
+		mutex_lock(&isert_conn->conn_mutex);
+		complete(&isert_conn->login_req_comp);
+		mutex_unlock(&isert_conn->conn_mutex);
+	} else {
 		isert_rx_do_work(desc, isert_conn);
+	}
 
 	ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen,
 				      DMA_FROM_DEVICE);
 
 	isert_conn->post_recv_buf_count--;
-	pr_debug("iSERT: Decremented post_recv_buf_count: %d\n",
-		 isert_conn->post_recv_buf_count);
+	isert_dbg("Decremented post_recv_buf_count: %d\n",
+		  isert_conn->post_recv_buf_count);
 
 	if ((char *)desc == isert_conn->login_req_buf)
 		return;
@@ -1505,7 +1547,7 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
 				ISERT_MIN_POSTED_RX);
 		err = isert_post_recv(isert_conn, count);
 		if (err) {
-			pr_err("isert_post_recv() count: %d failed, %d\n",
+			isert_err("isert_post_recv() count: %d failed, %d\n",
 			       count, err);
 		}
 	}
@@ -1534,12 +1576,12 @@ isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 	data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents,
 					data->dma_dir);
 	if (unlikely(!data->dma_nents)) {
-		pr_err("Cmd: unable to dma map SGs %p\n", sg);
+		isert_err("Cmd: unable to dma map SGs %p\n", sg);
 		return -EINVAL;
 	}
 
-	pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
-		 isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
+	isert_dbg("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+		  isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
 
 	return 0;
 }
@@ -1560,21 +1602,21 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
 {
 	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 
-	pr_debug("isert_unmap_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	if (wr->data.sg) {
-		pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd);
+		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
 		isert_unmap_data_buf(isert_conn, &wr->data);
 	}
 
 	if (wr->send_wr) {
-		pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd);
+		isert_dbg("Cmd %p free send_wr\n", isert_cmd);
 		kfree(wr->send_wr);
 		wr->send_wr = NULL;
 	}
 
 	if (wr->ib_sge) {
-		pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd);
+		isert_dbg("Cmd %p free ib_sge\n", isert_cmd);
 		kfree(wr->ib_sge);
 		wr->ib_sge = NULL;
 	}
@@ -1586,11 +1628,10 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
 	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 	LIST_HEAD(unmap_list);
 
-	pr_debug("unreg_fastreg_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	if (wr->fr_desc) {
-		pr_debug("unreg_fastreg_cmd: %p free fr_desc %p\n",
-			 isert_cmd, wr->fr_desc);
+		isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, wr->fr_desc);
 		if (wr->fr_desc->ind & ISERT_PROTECTED) {
 			isert_unmap_data_buf(isert_conn, &wr->prot);
 			wr->fr_desc->ind &= ~ISERT_PROTECTED;
@@ -1602,7 +1643,7 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
 	}
 
 	if (wr->data.sg) {
-		pr_debug("unreg_fastreg_cmd: %p unmap_sg op\n", isert_cmd);
+		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
 		isert_unmap_data_buf(isert_conn, &wr->data);
 	}
 
@@ -1618,7 +1659,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
 	struct iscsi_conn *conn = isert_conn->conn;
 	struct isert_device *device = isert_conn->conn_device;
 
-	pr_debug("Entering isert_put_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	switch (cmd->iscsi_opcode) {
 	case ISCSI_OP_SCSI_CMD:
@@ -1668,7 +1709,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
 		 * associated cmd->se_cmd needs to be released.
 		 */
 		if (cmd->se_cmd.se_tfo != NULL) {
-			pr_debug("Calling transport_generic_free_cmd from"
+			isert_dbg("Calling transport_generic_free_cmd from"
 				 " isert_put_cmd for 0x%02x\n",
 				 cmd->iscsi_opcode);
 			transport_generic_free_cmd(&cmd->se_cmd, 0);
@@ -1687,7 +1728,7 @@ static void
 isert_unmap_tx_desc(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev)
 {
 	if (tx_desc->dma_addr != 0) {
-		pr_debug("Calling ib_dma_unmap_single for tx_desc->dma_addr\n");
+		isert_dbg("unmap single for tx_desc->dma_addr\n");
 		ib_dma_unmap_single(ib_dev, tx_desc->dma_addr,
 				    ISER_HEADERS_LEN, DMA_TO_DEVICE);
 		tx_desc->dma_addr = 0;
@@ -1699,7 +1740,7 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd,
 		     struct ib_device *ib_dev, bool comp_err)
 {
 	if (isert_cmd->pdu_buf_dma != 0) {
-		pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n");
+		isert_dbg("unmap single for isert_cmd->pdu_buf_dma\n");
 		ib_dma_unmap_single(ib_dev, isert_cmd->pdu_buf_dma,
 				    isert_cmd->pdu_buf_len, DMA_TO_DEVICE);
 		isert_cmd->pdu_buf_dma = 0;
@@ -1717,7 +1758,7 @@ isert_check_pi_status(struct se_cmd *se_cmd, struct ib_mr *sig_mr)
 
 	ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
 	if (ret) {
-		pr_err("ib_check_mr_status failed, ret %d\n", ret);
+		isert_err("ib_check_mr_status failed, ret %d\n", ret);
 		goto fail_mr_status;
 	}
 
@@ -1740,12 +1781,12 @@ isert_check_pi_status(struct se_cmd *se_cmd, struct ib_mr *sig_mr)
 		do_div(sec_offset_err, block_size);
 		se_cmd->bad_sector = sec_offset_err + se_cmd->t_task_lba;
 
-		pr_err("isert: PI error found type %d at sector 0x%llx "
-		       "expected 0x%x vs actual 0x%x\n",
-		       mr_status.sig_err.err_type,
-		       (unsigned long long)se_cmd->bad_sector,
-		       mr_status.sig_err.expected,
-		       mr_status.sig_err.actual);
+		isert_err("PI error found type %d at sector 0x%llx "
+			  "expected 0x%x vs actual 0x%x\n",
+			  mr_status.sig_err.err_type,
+			  (unsigned long long)se_cmd->bad_sector,
+			  mr_status.sig_err.expected,
+			  mr_status.sig_err.actual);
 		ret = 1;
 	}
 
@@ -1801,7 +1842,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
 	cmd->write_data_done = wr->data.len;
 	wr->send_wr_num = 0;
 
-	pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
+	isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
 	spin_lock_bh(&cmd->istate_lock);
 	cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
 	cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
@@ -1823,36 +1864,22 @@ isert_do_control_comp(struct work_struct *work)
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 
+	isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state);
+
 	switch (cmd->i_state) {
 	case ISTATE_SEND_TASKMGTRSP:
-		pr_debug("Calling iscsit_tmr_post_handler >>>>>>>>>>>>>>>>>\n");
-
-		atomic_dec(&isert_conn->post_send_buf_count);
 		iscsit_tmr_post_handler(cmd, cmd->conn);
-
-		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
-		break;
-	case ISTATE_SEND_REJECT:
-		pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n");
-		atomic_dec(&isert_conn->post_send_buf_count);
-
+	case ISTATE_SEND_REJECT:   /* FALLTHRU */
+	case ISTATE_SEND_TEXTRSP:  /* FALLTHRU */
 		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+		isert_completion_put(&isert_cmd->tx_desc, isert_cmd,
+				     ib_dev, false);
 		break;
 	case ISTATE_SEND_LOGOUTRSP:
-		pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n");
-
-		atomic_dec(&isert_conn->post_send_buf_count);
 		iscsit_logout_post_handler(cmd, cmd->conn);
 		break;
-	case ISTATE_SEND_TEXTRSP:
-		atomic_dec(&isert_conn->post_send_buf_count);
-		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
-		break;
 	default:
-		pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state);
+		isert_err("Unknown i_state %d\n", cmd->i_state);
 		dump_stack();
 		break;
 	}
@@ -1865,7 +1892,6 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
 			  struct ib_device *ib_dev)
 {
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 
 	if (cmd->i_state == ISTATE_SEND_TASKMGTRSP ||
 	    cmd->i_state == ISTATE_SEND_LOGOUTRSP ||
@@ -1878,267 +1904,151 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
 		return;
 	}
 
-	/**
-	 * If send_wr_num is 0 this means that we got
-	 * RDMA completion and we cleared it and we should
-	 * simply decrement the response post. else the
-	 * response is incorporated in send_wr_num, just
-	 * sub it.
-	 **/
-	if (wr->send_wr_num)
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	else
-		atomic_dec(&isert_conn->post_send_buf_count);
-
 	cmd->i_state = ISTATE_SENT_STATUS;
 	isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
 }
 
 static void
-__isert_send_completion(struct iser_tx_desc *tx_desc,
-		        struct isert_conn *isert_conn)
+isert_send_completion(struct iser_tx_desc *tx_desc,
+		      struct isert_conn *isert_conn)
 {
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
 	struct isert_rdma_wr *wr;
 
 	if (!isert_cmd) {
-		atomic_dec(&isert_conn->post_send_buf_count);
 		isert_unmap_tx_desc(tx_desc, ib_dev);
 		return;
 	}
 	wr = &isert_cmd->rdma_wr;
 
+	isert_dbg("Cmd %p iser_ib_op %d\n", isert_cmd, wr->iser_ib_op);
+
 	switch (wr->iser_ib_op) {
 	case ISER_IB_RECV:
-		pr_err("isert_send_completion: Got ISER_IB_RECV\n");
+		isert_err("Got ISER_IB_RECV\n");
 		dump_stack();
 		break;
 	case ISER_IB_SEND:
-		pr_debug("isert_send_completion: Got ISER_IB_SEND\n");
 		isert_response_completion(tx_desc, isert_cmd,
 					  isert_conn, ib_dev);
 		break;
 	case ISER_IB_RDMA_WRITE:
-		pr_debug("isert_send_completion: Got ISER_IB_RDMA_WRITE\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
 		isert_completion_rdma_write(tx_desc, isert_cmd);
 		break;
 	case ISER_IB_RDMA_READ:
-		pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n");
-
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
 		isert_completion_rdma_read(tx_desc, isert_cmd);
 		break;
 	default:
-		pr_err("Unknown wr->iser_ib_op: 0x%02x\n", wr->iser_ib_op);
+		isert_err("Unknown wr->iser_ib_op: 0x%x\n", wr->iser_ib_op);
 		dump_stack();
 		break;
 	}
 }
 
-static void
-isert_send_completion(struct iser_tx_desc *tx_desc,
-		      struct isert_conn *isert_conn)
-{
-	struct llist_node *llnode = tx_desc->comp_llnode_batch;
-	struct iser_tx_desc *t;
-	/*
-	 * Drain coalesced completion llist starting from comp_llnode_batch
-	 * setup in isert_init_send_wr(), and then complete trailing tx_desc.
-	 */
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		__isert_send_completion(t, isert_conn);
-	}
-	__isert_send_completion(tx_desc, isert_conn);
-}
-
-static void
-isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev)
+/**
+ * is_isert_tx_desc() - Indicate if the completion wr_id
+ *     is a TX descriptor or not.
+ * @isert_conn: iser connection
+ * @wr_id: completion WR identifier
+ *
+ * Since we cannot rely on wc opcode in FLUSH errors
+ * we must work around it by checking if the wr_id address
+ * falls in the iser connection rx_descs buffer. If so
+ * it is an RX descriptor, otherwize it is a TX.
+ */
+static inline bool
+is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id)
 {
-	struct llist_node *llnode;
-	struct isert_rdma_wr *wr;
-	struct iser_tx_desc *t;
+	void *start = isert_conn->conn_rx_descs;
+	int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->conn_rx_descs);
 
-	mutex_lock(&isert_conn->conn_mutex);
-	llnode = llist_del_all(&isert_conn->conn_comp_llist);
-	isert_conn->conn_comp_batch = 0;
-	mutex_unlock(&isert_conn->conn_mutex);
-
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		wr = &t->isert_cmd->rdma_wr;
-
-		/**
-		 * If send_wr_num is 0 this means that we got
-		 * RDMA completion and we cleared it and we should
-		 * simply decrement the response post. else the
-		 * response is incorporated in send_wr_num, just
-		 * sub it.
-		 **/
-		if (wr->send_wr_num)
-			atomic_sub(wr->send_wr_num,
-				   &isert_conn->post_send_buf_count);
-		else
-			atomic_dec(&isert_conn->post_send_buf_count);
+	if (wr_id >= start && wr_id < start + len)
+		return false;
 
-		isert_completion_put(t, t->isert_cmd, ib_dev, true);
-	}
+	return true;
 }
 
 static void
-isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn)
+isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
 {
-	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-	struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
-	struct llist_node *llnode = tx_desc->comp_llnode_batch;
-	struct isert_rdma_wr *wr;
-	struct iser_tx_desc *t;
-
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		wr = &t->isert_cmd->rdma_wr;
+	if (wc->wr_id == ISER_BEACON_WRID) {
+		isert_info("conn %p completing conn_wait_comp_err\n",
+			   isert_conn);
+		complete(&isert_conn->conn_wait_comp_err);
+	} else if (is_isert_tx_desc(isert_conn, (void *)(uintptr_t)wc->wr_id)) {
+		struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+		struct isert_cmd *isert_cmd;
+		struct iser_tx_desc *desc;
 
-		/**
-		 * If send_wr_num is 0 this means that we got
-		 * RDMA completion and we cleared it and we should
-		 * simply decrement the response post. else the
-		 * response is incorporated in send_wr_num, just
-		 * sub it.
-		 **/
-		if (wr->send_wr_num)
-			atomic_sub(wr->send_wr_num,
-				   &isert_conn->post_send_buf_count);
+		desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
+		isert_cmd = desc->isert_cmd;
+		if (!isert_cmd)
+			isert_unmap_tx_desc(desc, ib_dev);
 		else
-			atomic_dec(&isert_conn->post_send_buf_count);
-
-		isert_completion_put(t, t->isert_cmd, ib_dev, true);
-	}
-	tx_desc->comp_llnode_batch = NULL;
-
-	if (!isert_cmd)
-		isert_unmap_tx_desc(tx_desc, ib_dev);
-	else
-		isert_completion_put(tx_desc, isert_cmd, ib_dev, true);
-}
-
-static void
-isert_cq_rx_comp_err(struct isert_conn *isert_conn)
-{
-	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-	struct iscsi_conn *conn = isert_conn->conn;
-
-	if (isert_conn->post_recv_buf_count)
-		return;
-
-	isert_cq_drain_comp_llist(isert_conn, ib_dev);
-
-	if (conn->sess) {
-		target_sess_cmd_list_set_waiting(conn->sess->se_sess);
-		target_wait_for_sess_cmds(conn->sess->se_sess);
+			isert_completion_put(desc, isert_cmd, ib_dev, true);
+	} else {
+		isert_conn->post_recv_buf_count--;
+		if (!isert_conn->post_recv_buf_count)
+			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
 	}
-
-	while (atomic_read(&isert_conn->post_send_buf_count))
-		msleep(3000);
-
-	mutex_lock(&isert_conn->conn_mutex);
-	isert_conn->state = ISER_CONN_DOWN;
-	mutex_unlock(&isert_conn->conn_mutex);
-
-	iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
-
-	complete(&isert_conn->conn_wait_comp_err);
 }
 
 static void
-isert_cq_tx_work(struct work_struct *work)
+isert_handle_wc(struct ib_wc *wc)
 {
-	struct isert_cq_desc *cq_desc = container_of(work,
-				struct isert_cq_desc, cq_tx_work);
-	struct isert_device *device = cq_desc->device;
-	int cq_index = cq_desc->cq_index;
-	struct ib_cq *tx_cq = device->dev_tx_cq[cq_index];
 	struct isert_conn *isert_conn;
 	struct iser_tx_desc *tx_desc;
-	struct ib_wc wc;
-
-	while (ib_poll_cq(tx_cq, 1, &wc) == 1) {
-		tx_desc = (struct iser_tx_desc *)(unsigned long)wc.wr_id;
-		isert_conn = wc.qp->qp_context;
+	struct iser_rx_desc *rx_desc;
 
-		if (wc.status == IB_WC_SUCCESS) {
-			isert_send_completion(tx_desc, isert_conn);
+	isert_conn = wc->qp->qp_context;
+	if (likely(wc->status == IB_WC_SUCCESS)) {
+		if (wc->opcode == IB_WC_RECV) {
+			rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
+			isert_rx_completion(rx_desc, isert_conn, wc->byte_len);
 		} else {
-			pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n");
-			pr_debug("TX wc.status: 0x%08x\n", wc.status);
-			pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err);
-
-			if (wc.wr_id != ISER_FASTREG_LI_WRID) {
-				if (tx_desc->llnode_active)
-					continue;
-
-				atomic_dec(&isert_conn->post_send_buf_count);
-				isert_cq_tx_comp_err(tx_desc, isert_conn);
-			}
+			tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
+			isert_send_completion(tx_desc, isert_conn);
 		}
-	}
-
-	ib_req_notify_cq(tx_cq, IB_CQ_NEXT_COMP);
-}
-
-static void
-isert_cq_tx_callback(struct ib_cq *cq, void *context)
-{
-	struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context;
+	} else {
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			isert_err("wr id %llx status %d vend_err %x\n",
+				  wc->wr_id, wc->status, wc->vendor_err);
+		else
+			isert_dbg("flush error: wr id %llx\n", wc->wr_id);
 
-	queue_work(isert_comp_wq, &cq_desc->cq_tx_work);
+		if (wc->wr_id != ISER_FASTREG_LI_WRID)
+			isert_cq_comp_err(isert_conn, wc);
+	}
 }
 
 static void
-isert_cq_rx_work(struct work_struct *work)
+isert_cq_work(struct work_struct *work)
 {
-	struct isert_cq_desc *cq_desc = container_of(work,
-			struct isert_cq_desc, cq_rx_work);
-	struct isert_device *device = cq_desc->device;
-	int cq_index = cq_desc->cq_index;
-	struct ib_cq *rx_cq = device->dev_rx_cq[cq_index];
-	struct isert_conn *isert_conn;
-	struct iser_rx_desc *rx_desc;
-	struct ib_wc wc;
-	unsigned long xfer_len;
+	enum { isert_poll_budget = 65536 };
+	struct isert_comp *comp = container_of(work, struct isert_comp,
+					       work);
+	struct ib_wc *const wcs = comp->wcs;
+	int i, n, completed = 0;
 
-	while (ib_poll_cq(rx_cq, 1, &wc) == 1) {
-		rx_desc = (struct iser_rx_desc *)(unsigned long)wc.wr_id;
-		isert_conn = wc.qp->qp_context;
+	while ((n = ib_poll_cq(comp->cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
+		for (i = 0; i < n; i++)
+			isert_handle_wc(&wcs[i]);
 
-		if (wc.status == IB_WC_SUCCESS) {
-			xfer_len = (unsigned long)wc.byte_len;
-			isert_rx_completion(rx_desc, isert_conn, xfer_len);
-		} else {
-			pr_debug("RX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n");
-			if (wc.status != IB_WC_WR_FLUSH_ERR) {
-				pr_debug("RX wc.status: 0x%08x\n", wc.status);
-				pr_debug("RX wc.vendor_err: 0x%08x\n",
-					 wc.vendor_err);
-			}
-			isert_conn->post_recv_buf_count--;
-			isert_cq_rx_comp_err(isert_conn);
-		}
+		completed += n;
+		if (completed >= isert_poll_budget)
+			break;
 	}
 
-	ib_req_notify_cq(rx_cq, IB_CQ_NEXT_COMP);
+	ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP);
 }
 
 static void
-isert_cq_rx_callback(struct ib_cq *cq, void *context)
+isert_cq_callback(struct ib_cq *cq, void *context)
 {
-	struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context;
+	struct isert_comp *comp = context;
 
-	queue_work(isert_rx_wq, &cq_desc->cq_rx_work);
+	queue_work(isert_comp_wq, &comp->work);
 }
 
 static int
@@ -2147,13 +2057,10 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
 	struct ib_send_wr *wr_failed;
 	int ret;
 
-	atomic_inc(&isert_conn->post_send_buf_count);
-
 	ret = ib_post_send(isert_conn->conn_qp, &isert_cmd->tx_desc.send_wr,
 			   &wr_failed);
 	if (ret) {
-		pr_err("ib_post_send failed with %d\n", ret);
-		atomic_dec(&isert_conn->post_send_buf_count);
+		isert_err("ib_post_send failed with %d\n", ret);
 		return ret;
 	}
 	return ret;
@@ -2200,9 +2107,9 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 		isert_cmd->tx_desc.num_sge = 2;
 	}
 
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("Posting SCSI Response\n");
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2231,8 +2138,16 @@ isert_get_sup_prot_ops(struct iscsi_conn *conn)
 	struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
 	struct isert_device *device = isert_conn->conn_device;
 
-	if (device->pi_capable)
-		return TARGET_PROT_ALL;
+	if (conn->tpg->tpg_attrib.t10_pi) {
+		if (device->pi_capable) {
+			isert_info("conn %p PI offload enabled\n", isert_conn);
+			isert_conn->pi_support = true;
+			return TARGET_PROT_ALL;
+		}
+	}
+
+	isert_info("conn %p PI offload disabled\n", isert_conn);
+	isert_conn->pi_support = false;
 
 	return TARGET_PROT_NORMAL;
 }
@@ -2250,9 +2165,9 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
 			       &isert_cmd->tx_desc.iscsi_header,
 			       nopout_response);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting NOPIN Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2268,9 +2183,9 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 	iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *)
 				&isert_cmd->tx_desc.iscsi_header);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Logout Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2286,9 +2201,9 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 	iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *)
 				  &isert_cmd->tx_desc.iscsi_header);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Task Management Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2318,9 +2233,9 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 	tx_dsg->lkey	= isert_conn->conn_mr->lkey;
 	isert_cmd->tx_desc.num_sge = 2;
 
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Reject\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2358,9 +2273,9 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 		tx_dsg->lkey	= isert_conn->conn_mr->lkey;
 		isert_cmd->tx_desc.num_sge = 2;
 	}
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Text Reject\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2383,30 +2298,31 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 
 	send_wr->sg_list = ib_sge;
 	send_wr->num_sge = sg_nents;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	/*
 	 * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
 	 */
 	for_each_sg(sg_start, tmp_sg, sg_nents, i) {
-		pr_debug("ISER RDMA from SGL dma_addr: 0x%16llx dma_len: %u, page_off: %u\n",
-			 (unsigned long long)tmp_sg->dma_address,
-			 tmp_sg->length, page_off);
+		isert_dbg("RDMA from SGL dma_addr: 0x%llx dma_len: %u, "
+			  "page_off: %u\n",
+			  (unsigned long long)tmp_sg->dma_address,
+			  tmp_sg->length, page_off);
 
 		ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
 		ib_sge->length = min_t(u32, data_left,
 				ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
 		ib_sge->lkey = isert_conn->conn_mr->lkey;
 
-		pr_debug("RDMA ib_sge: addr: 0x%16llx  length: %u lkey: %08x\n",
-			 ib_sge->addr, ib_sge->length, ib_sge->lkey);
+		isert_dbg("RDMA ib_sge: addr: 0x%llx  length: %u lkey: %x\n",
+			  ib_sge->addr, ib_sge->length, ib_sge->lkey);
 		page_off = 0;
 		data_left -= ib_sge->length;
 		ib_sge++;
-		pr_debug("Incrementing ib_sge pointer to %p\n", ib_sge);
+		isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
 	}
 
-	pr_debug("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
-		 send_wr->sg_list, send_wr->num_sge);
+	isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
+		  send_wr->sg_list, send_wr->num_sge);
 
 	return sg_nents;
 }
@@ -2438,7 +2354,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 
 	ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL);
 	if (!ib_sge) {
-		pr_warn("Unable to allocate ib_sge\n");
+		isert_warn("Unable to allocate ib_sge\n");
 		ret = -ENOMEM;
 		goto unmap_cmd;
 	}
@@ -2448,7 +2364,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
 				GFP_KERNEL);
 	if (!wr->send_wr) {
-		pr_debug("Unable to allocate wr->send_wr\n");
+		isert_dbg("Unable to allocate wr->send_wr\n");
 		ret = -ENOMEM;
 		goto unmap_cmd;
 	}
@@ -2512,9 +2428,9 @@ isert_map_fr_pagelist(struct ib_device *ib_dev,
 			chunk_start = start_addr;
 		end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
 
-		pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n",
-			 i, (unsigned long long)tmp_sg->dma_address,
-			 tmp_sg->length);
+		isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n",
+			  i, (unsigned long long)tmp_sg->dma_address,
+			  tmp_sg->length);
 
 		if ((end_addr & ~PAGE_MASK) && i < last_ent) {
 			new_chunk = 0;
@@ -2525,8 +2441,8 @@ isert_map_fr_pagelist(struct ib_device *ib_dev,
 		page = chunk_start & PAGE_MASK;
 		do {
 			fr_pl[n_pages++] = page;
-			pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n",
-				 n_pages - 1, page);
+			isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n",
+				  n_pages - 1, page);
 			page += PAGE_SIZE;
 		} while (page < end_addr);
 	}
@@ -2534,6 +2450,21 @@ isert_map_fr_pagelist(struct ib_device *ib_dev,
 	return n_pages;
 }
 
+static inline void
+isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+{
+	u32 rkey;
+
+	memset(inv_wr, 0, sizeof(*inv_wr));
+	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+	inv_wr->opcode = IB_WR_LOCAL_INV;
+	inv_wr->ex.invalidate_rkey = mr->rkey;
+
+	/* Bump the key */
+	rkey = ib_inc_rkey(mr->rkey);
+	ib_update_fast_reg_key(mr, rkey);
+}
+
 static int
 isert_fast_reg_mr(struct isert_conn *isert_conn,
 		  struct fast_reg_descriptor *fr_desc,
@@ -2548,15 +2479,13 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
 	struct ib_send_wr *bad_wr, *wr = NULL;
 	int ret, pagelist_len;
 	u32 page_off;
-	u8 key;
 
 	if (mem->dma_nents == 1) {
 		sge->lkey = isert_conn->conn_mr->lkey;
 		sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
 		sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
-		pr_debug("%s:%d sge: addr: 0x%llx  length: %u lkey: %x\n",
-			 __func__, __LINE__, sge->addr, sge->length,
-			 sge->lkey);
+		isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
+			 sge->addr, sge->length, sge->lkey);
 		return 0;
 	}
 
@@ -2572,21 +2501,15 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
 
 	page_off = mem->offset % PAGE_SIZE;
 
-	pr_debug("Use fr_desc %p sg_nents %d offset %u\n",
-		 fr_desc, mem->nents, mem->offset);
+	isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
+		  fr_desc, mem->nents, mem->offset);
 
 	pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents,
 					     &frpl->page_list[0]);
 
-	if (!(fr_desc->ind & ISERT_DATA_KEY_VALID)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.ex.invalidate_rkey = mr->rkey;
+	if (!(fr_desc->ind & ind)) {
+		isert_inv_rkey(&inv_wr, mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(mr, ++key);
 	}
 
 	/* Prepare FASTREG WR */
@@ -2608,7 +2531,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
 
 	ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
 	if (ret) {
-		pr_err("fast registration failed, ret:%d\n", ret);
+		isert_err("fast registration failed, ret:%d\n", ret);
 		return ret;
 	}
 	fr_desc->ind &= ~ind;
@@ -2617,9 +2540,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
 	sge->addr = frpl->page_list[0] + page_off;
 	sge->length = mem->len;
 
-	pr_debug("%s:%d sge: addr: 0x%llx  length: %u lkey: %x\n",
-		 __func__, __LINE__, sge->addr, sge->length,
-		 sge->lkey);
+	isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
+		  sge->addr, sge->length, sge->lkey);
 
 	return ret;
 }
@@ -2665,7 +2587,7 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
 		isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
 		break;
 	default:
-		pr_err("Unsupported PI operation %d\n", se_cmd->prot_op);
+		isert_err("Unsupported PI operation %d\n", se_cmd->prot_op);
 		return -EINVAL;
 	}
 
@@ -2681,17 +2603,16 @@ isert_set_prot_checks(u8 prot_checks)
 }
 
 static int
-isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd,
-		 struct fast_reg_descriptor *fr_desc,
-		 struct ib_sge *data_sge, struct ib_sge *prot_sge,
-		 struct ib_sge *sig_sge)
+isert_reg_sig_mr(struct isert_conn *isert_conn,
+		 struct se_cmd *se_cmd,
+		 struct isert_rdma_wr *rdma_wr,
+		 struct fast_reg_descriptor *fr_desc)
 {
 	struct ib_send_wr sig_wr, inv_wr;
 	struct ib_send_wr *bad_wr, *wr = NULL;
 	struct pi_context *pi_ctx = fr_desc->pi_ctx;
 	struct ib_sig_attrs sig_attrs;
 	int ret;
-	u32 key;
 
 	memset(&sig_attrs, 0, sizeof(sig_attrs));
 	ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
@@ -2701,26 +2622,20 @@ isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd,
 	sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks);
 
 	if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+		isert_inv_rkey(&inv_wr, pi_ctx->sig_mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
 	}
 
 	memset(&sig_wr, 0, sizeof(sig_wr));
 	sig_wr.opcode = IB_WR_REG_SIG_MR;
 	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
-	sig_wr.sg_list = data_sge;
+	sig_wr.sg_list = &rdma_wr->ib_sg[DATA];
 	sig_wr.num_sge = 1;
 	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE;
 	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
 	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
 	if (se_cmd->t_prot_sg)
-		sig_wr.wr.sig_handover.prot = prot_sge;
+		sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT];
 
 	if (!wr)
 		wr = &sig_wr;
@@ -2729,39 +2644,98 @@ isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd,
 
 	ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
 	if (ret) {
-		pr_err("fast registration failed, ret:%d\n", ret);
+		isert_err("fast registration failed, ret:%d\n", ret);
 		goto err;
 	}
 	fr_desc->ind &= ~ISERT_SIG_KEY_VALID;
 
-	sig_sge->lkey = pi_ctx->sig_mr->lkey;
-	sig_sge->addr = 0;
-	sig_sge->length = se_cmd->data_length;
+	rdma_wr->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey;
+	rdma_wr->ib_sg[SIG].addr = 0;
+	rdma_wr->ib_sg[SIG].length = se_cmd->data_length;
 	if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP &&
 	    se_cmd->prot_op != TARGET_PROT_DOUT_INSERT)
 		/*
 		 * We have protection guards on the wire
 		 * so we need to set a larget transfer
 		 */
-		sig_sge->length += se_cmd->prot_length;
+		rdma_wr->ib_sg[SIG].length += se_cmd->prot_length;
 
-	pr_debug("sig_sge: addr: 0x%llx  length: %u lkey: %x\n",
-		 sig_sge->addr, sig_sge->length,
-		 sig_sge->lkey);
+	isert_dbg("sig_sge: addr: 0x%llx  length: %u lkey: %x\n",
+		  rdma_wr->ib_sg[SIG].addr, rdma_wr->ib_sg[SIG].length,
+		  rdma_wr->ib_sg[SIG].lkey);
 err:
 	return ret;
 }
 
 static int
+isert_handle_prot_cmd(struct isert_conn *isert_conn,
+		      struct isert_cmd *isert_cmd,
+		      struct isert_rdma_wr *wr)
+{
+	struct isert_device *device = isert_conn->conn_device;
+	struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
+	int ret;
+
+	if (!wr->fr_desc->pi_ctx) {
+		ret = isert_create_pi_ctx(wr->fr_desc,
+					  device->ib_device,
+					  isert_conn->conn_pd);
+		if (ret) {
+			isert_err("conn %p failed to allocate pi_ctx\n",
+				  isert_conn);
+			return ret;
+		}
+	}
+
+	if (se_cmd->t_prot_sg) {
+		ret = isert_map_data_buf(isert_conn, isert_cmd,
+					 se_cmd->t_prot_sg,
+					 se_cmd->t_prot_nents,
+					 se_cmd->prot_length,
+					 0, wr->iser_ib_op, &wr->prot);
+		if (ret) {
+			isert_err("conn %p failed to map protection buffer\n",
+				  isert_conn);
+			return ret;
+		}
+
+		memset(&wr->ib_sg[PROT], 0, sizeof(wr->ib_sg[PROT]));
+		ret = isert_fast_reg_mr(isert_conn, wr->fr_desc, &wr->prot,
+					ISERT_PROT_KEY_VALID, &wr->ib_sg[PROT]);
+		if (ret) {
+			isert_err("conn %p failed to fast reg mr\n",
+				  isert_conn);
+			goto unmap_prot_cmd;
+		}
+	}
+
+	ret = isert_reg_sig_mr(isert_conn, se_cmd, wr, wr->fr_desc);
+	if (ret) {
+		isert_err("conn %p failed to fast reg mr\n",
+			  isert_conn);
+		goto unmap_prot_cmd;
+	}
+	wr->fr_desc->ind |= ISERT_PROTECTED;
+
+	return 0;
+
+unmap_prot_cmd:
+	if (se_cmd->t_prot_sg)
+		isert_unmap_data_buf(isert_conn, &wr->prot);
+
+	return ret;
+}
+
+static int
 isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	       struct isert_rdma_wr *wr)
 {
 	struct se_cmd *se_cmd = &cmd->se_cmd;
 	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
 	struct isert_conn *isert_conn = conn->context;
-	struct ib_sge data_sge;
-	struct ib_send_wr *send_wr;
 	struct fast_reg_descriptor *fr_desc = NULL;
+	struct ib_send_wr *send_wr;
+	struct ib_sge *ib_sg;
 	u32 offset;
 	int ret = 0;
 	unsigned long flags;
@@ -2775,8 +2749,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	if (ret)
 		return ret;
 
-	if (wr->data.dma_nents != 1 ||
-	    se_cmd->prot_op != TARGET_PROT_NORMAL) {
+	if (wr->data.dma_nents != 1 || isert_prot_cmd(isert_conn, se_cmd)) {
 		spin_lock_irqsave(&isert_conn->conn_lock, flags);
 		fr_desc = list_first_entry(&isert_conn->conn_fr_pool,
 					   struct fast_reg_descriptor, list);
@@ -2786,38 +2759,21 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	}
 
 	ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->data,
-				ISERT_DATA_KEY_VALID, &data_sge);
+				ISERT_DATA_KEY_VALID, &wr->ib_sg[DATA]);
 	if (ret)
 		goto unmap_cmd;
 
-	if (se_cmd->prot_op != TARGET_PROT_NORMAL) {
-		struct ib_sge prot_sge, sig_sge;
-
-		if (se_cmd->t_prot_sg) {
-			ret = isert_map_data_buf(isert_conn, isert_cmd,
-						 se_cmd->t_prot_sg,
-						 se_cmd->t_prot_nents,
-						 se_cmd->prot_length,
-						 0, wr->iser_ib_op, &wr->prot);
-			if (ret)
-				goto unmap_cmd;
-
-			ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->prot,
-						ISERT_PROT_KEY_VALID, &prot_sge);
-			if (ret)
-				goto unmap_prot_cmd;
-		}
-
-		ret = isert_reg_sig_mr(isert_conn, se_cmd, fr_desc,
-				       &data_sge, &prot_sge, &sig_sge);
+	if (isert_prot_cmd(isert_conn, se_cmd)) {
+		ret = isert_handle_prot_cmd(isert_conn, isert_cmd, wr);
 		if (ret)
-			goto unmap_prot_cmd;
+			goto unmap_cmd;
 
-		fr_desc->ind |= ISERT_PROTECTED;
-		memcpy(&wr->s_ib_sge, &sig_sge, sizeof(sig_sge));
-	} else
-		memcpy(&wr->s_ib_sge, &data_sge, sizeof(data_sge));
+		ib_sg = &wr->ib_sg[SIG];
+	} else {
+		ib_sg = &wr->ib_sg[DATA];
+	}
 
+	memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg));
 	wr->ib_sge = &wr->s_ib_sge;
 	wr->send_wr_num = 1;
 	memset(&wr->s_send_wr, 0, sizeof(*send_wr));
@@ -2827,12 +2783,12 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	send_wr = &isert_cmd->rdma_wr.s_send_wr;
 	send_wr->sg_list = &wr->s_ib_sge;
 	send_wr->num_sge = 1;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
 		send_wr->opcode = IB_WR_RDMA_WRITE;
 		send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
 		send_wr->wr.rdma.rkey = isert_cmd->read_stag;
-		send_wr->send_flags = se_cmd->prot_op == TARGET_PROT_NORMAL ?
+		send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
 				      0 : IB_SEND_SIGNALED;
 	} else {
 		send_wr->opcode = IB_WR_RDMA_READ;
@@ -2842,9 +2798,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	}
 
 	return 0;
-unmap_prot_cmd:
-	if (se_cmd->t_prot_sg)
-		isert_unmap_data_buf(isert_conn, &wr->prot);
+
 unmap_cmd:
 	if (fr_desc) {
 		spin_lock_irqsave(&isert_conn->conn_lock, flags);
@@ -2867,16 +2821,17 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 	struct ib_send_wr *wr_failed;
 	int rc;
 
-	pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n",
+	isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n",
 		 isert_cmd, se_cmd->data_length);
+
 	wr->iser_ib_op = ISER_IB_RDMA_WRITE;
 	rc = device->reg_rdma_mem(conn, cmd, wr);
 	if (rc) {
-		pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+		isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
 		return rc;
 	}
 
-	if (se_cmd->prot_op == TARGET_PROT_NORMAL) {
+	if (!isert_prot_cmd(isert_conn, se_cmd)) {
 		/*
 		 * Build isert_conn->tx_desc for iSCSI response PDU and attach
 		 */
@@ -2886,24 +2841,20 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 				     &isert_cmd->tx_desc.iscsi_header);
 		isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
 		isert_init_send_wr(isert_conn, isert_cmd,
-				   &isert_cmd->tx_desc.send_wr, false);
+				   &isert_cmd->tx_desc.send_wr);
 		isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
 		wr->send_wr_num += 1;
 	}
 
-	atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);
-
 	rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
-	if (rc) {
-		pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	}
+	if (rc)
+		isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
 
-	if (se_cmd->prot_op == TARGET_PROT_NORMAL)
-		pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
+	if (!isert_prot_cmd(isert_conn, se_cmd))
+		isert_dbg("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
 			 "READ\n", isert_cmd);
 	else
-		pr_debug("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
+		isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
 			 isert_cmd);
 
 	return 1;
@@ -2920,23 +2871,20 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
 	struct ib_send_wr *wr_failed;
 	int rc;
 
-	pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
+	isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
 		 isert_cmd, se_cmd->data_length, cmd->write_data_done);
 	wr->iser_ib_op = ISER_IB_RDMA_READ;
 	rc = device->reg_rdma_mem(conn, cmd, wr);
 	if (rc) {
-		pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+		isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
 		return rc;
 	}
 
-	atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);
-
 	rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
-	if (rc) {
-		pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	}
-	pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
+	if (rc)
+		isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
+
+	isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
 		 isert_cmd);
 
 	return 0;
@@ -2952,7 +2900,7 @@ isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
 		ret = isert_put_nopin(cmd, conn, false);
 		break;
 	default:
-		pr_err("Unknown immediate state: 0x%02x\n", state);
+		isert_err("Unknown immediate state: 0x%02x\n", state);
 		ret = -EINVAL;
 		break;
 	}
@@ -2963,15 +2911,14 @@ isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
 static int
 isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
 {
+	struct isert_conn *isert_conn = conn->context;
 	int ret;
 
 	switch (state) {
 	case ISTATE_SEND_LOGOUTRSP:
 		ret = isert_put_logout_rsp(cmd, conn);
-		if (!ret) {
-			pr_debug("Returning iSER Logout -EAGAIN\n");
-			ret = -EAGAIN;
-		}
+		if (!ret)
+			isert_conn->logout_posted = true;
 		break;
 	case ISTATE_SEND_NOPIN:
 		ret = isert_put_nopin(cmd, conn, true);
@@ -2993,7 +2940,7 @@ isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
 		ret = isert_put_response(conn, cmd);
 		break;
 	default:
-		pr_err("Unknown response state: 0x%02x\n", state);
+		isert_err("Unknown response state: 0x%02x\n", state);
 		ret = -EINVAL;
 		break;
 	}
@@ -3001,27 +2948,64 @@ isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
 	return ret;
 }
 
+struct rdma_cm_id *
+isert_setup_id(struct isert_np *isert_np)
+{
+	struct iscsi_np *np = isert_np->np;
+	struct rdma_cm_id *id;
+	struct sockaddr *sa;
+	int ret;
+
+	sa = (struct sockaddr *)&np->np_sockaddr;
+	isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa);
+
+	id = rdma_create_id(isert_cma_handler, isert_np,
+			    RDMA_PS_TCP, IB_QPT_RC);
+	if (IS_ERR(id)) {
+		isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id));
+		ret = PTR_ERR(id);
+		goto out;
+	}
+	isert_dbg("id %p context %p\n", id, id->context);
+
+	ret = rdma_bind_addr(id, sa);
+	if (ret) {
+		isert_err("rdma_bind_addr() failed: %d\n", ret);
+		goto out_id;
+	}
+
+	ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG);
+	if (ret) {
+		isert_err("rdma_listen() failed: %d\n", ret);
+		goto out_id;
+	}
+
+	return id;
+out_id:
+	rdma_destroy_id(id);
+out:
+	return ERR_PTR(ret);
+}
+
 static int
 isert_setup_np(struct iscsi_np *np,
 	       struct __kernel_sockaddr_storage *ksockaddr)
 {
 	struct isert_np *isert_np;
 	struct rdma_cm_id *isert_lid;
-	struct sockaddr *sa;
 	int ret;
 
 	isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL);
 	if (!isert_np) {
-		pr_err("Unable to allocate struct isert_np\n");
+		isert_err("Unable to allocate struct isert_np\n");
 		return -ENOMEM;
 	}
 	sema_init(&isert_np->np_sem, 0);
 	mutex_init(&isert_np->np_accept_mutex);
 	INIT_LIST_HEAD(&isert_np->np_accept_list);
 	init_completion(&isert_np->np_login_comp);
+	isert_np->np = np;
 
-	sa = (struct sockaddr *)ksockaddr;
-	pr_debug("ksockaddr: %p, sa: %p\n", ksockaddr, sa);
 	/*
 	 * Setup the np->np_sockaddr from the passed sockaddr setup
 	 * in iscsi_target_configfs.c code..
@@ -3029,37 +3013,20 @@ isert_setup_np(struct iscsi_np *np,
 	memcpy(&np->np_sockaddr, ksockaddr,
 	       sizeof(struct __kernel_sockaddr_storage));
 
-	isert_lid = rdma_create_id(isert_cma_handler, np, RDMA_PS_TCP,
-				IB_QPT_RC);
+	isert_lid = isert_setup_id(isert_np);
 	if (IS_ERR(isert_lid)) {
-		pr_err("rdma_create_id() for isert_listen_handler failed: %ld\n",
-		       PTR_ERR(isert_lid));
 		ret = PTR_ERR(isert_lid);
 		goto out;
 	}
 
-	ret = rdma_bind_addr(isert_lid, sa);
-	if (ret) {
-		pr_err("rdma_bind_addr() for isert_lid failed: %d\n", ret);
-		goto out_lid;
-	}
-
-	ret = rdma_listen(isert_lid, ISERT_RDMA_LISTEN_BACKLOG);
-	if (ret) {
-		pr_err("rdma_listen() for isert_lid failed: %d\n", ret);
-		goto out_lid;
-	}
-
 	isert_np->np_cm_id = isert_lid;
 	np->np_context = isert_np;
-	pr_debug("Setup isert_lid->context: %p\n", isert_lid->context);
 
 	return 0;
 
-out_lid:
-	rdma_destroy_id(isert_lid);
 out:
 	kfree(isert_np);
+
 	return ret;
 }
 
@@ -3075,16 +3042,12 @@ isert_rdma_accept(struct isert_conn *isert_conn)
 	cp.retry_count = 7;
 	cp.rnr_retry_count = 7;
 
-	pr_debug("Before rdma_accept >>>>>>>>>>>>>>>>>>>>.\n");
-
 	ret = rdma_accept(cm_id, &cp);
 	if (ret) {
-		pr_err("rdma_accept() failed with: %d\n", ret);
+		isert_err("rdma_accept() failed with: %d\n", ret);
 		return ret;
 	}
 
-	pr_debug("After rdma_accept >>>>>>>>>>>>>>>>>>>>>.\n");
-
 	return 0;
 }
 
@@ -3094,7 +3057,15 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
 	struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
 	int ret;
 
-	pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn);
+	isert_info("before login_req comp conn: %p\n", isert_conn);
+	ret = wait_for_completion_interruptible(&isert_conn->login_req_comp);
+	if (ret) {
+		isert_err("isert_conn %p interrupted before got login req\n",
+			  isert_conn);
+		return ret;
+	}
+	reinit_completion(&isert_conn->login_req_comp);
+
 	/*
 	 * For login requests after the first PDU, isert_rx_login_req() will
 	 * kick schedule_delayed_work(&conn->login_work) as the packet is
@@ -3104,11 +3075,15 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
 	if (!login->first_request)
 		return 0;
 
+	isert_rx_login_req(isert_conn);
+
+	isert_info("before conn_login_comp conn: %p\n", conn);
 	ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp);
 	if (ret)
 		return ret;
 
-	pr_debug("isert_get_login_rx processing login->req: %p\n", login->req);
+	isert_info("processing login->req: %p\n", login->req);
+
 	return 0;
 }
 
@@ -3161,7 +3136,7 @@ accept_wait:
 	spin_lock_bh(&np->np_thread_lock);
 	if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) {
 		spin_unlock_bh(&np->np_thread_lock);
-		pr_debug("np_thread_state %d for isert_accept_np\n",
+		isert_dbg("np_thread_state %d for isert_accept_np\n",
 			 np->np_thread_state);
 		/**
 		 * No point in stalling here when np_thread
@@ -3186,17 +3161,10 @@ accept_wait:
 	isert_conn->conn = conn;
 	max_accept = 0;
 
-	ret = isert_rdma_post_recvl(isert_conn);
-	if (ret)
-		return ret;
-
-	ret = isert_rdma_accept(isert_conn);
-	if (ret)
-		return ret;
-
 	isert_set_conn_info(np, conn, isert_conn);
 
-	pr_debug("Processing isert_accept_np: isert_conn: %p\n", isert_conn);
+	isert_dbg("Processing isert_conn: %p\n", isert_conn);
+
 	return 0;
 }
 
@@ -3204,25 +3172,103 @@ static void
 isert_free_np(struct iscsi_np *np)
 {
 	struct isert_np *isert_np = (struct isert_np *)np->np_context;
+	struct isert_conn *isert_conn, *n;
 
 	if (isert_np->np_cm_id)
 		rdma_destroy_id(isert_np->np_cm_id);
 
+	/*
+	 * FIXME: At this point we don't have a good way to insure
+	 * that at this point we don't have hanging connections that
+	 * completed RDMA establishment but didn't start iscsi login
+	 * process. So work-around this by cleaning up what ever piled
+	 * up in np_accept_list.
+	 */
+	mutex_lock(&isert_np->np_accept_mutex);
+	if (!list_empty(&isert_np->np_accept_list)) {
+		isert_info("Still have isert connections, cleaning up...\n");
+		list_for_each_entry_safe(isert_conn, n,
+					 &isert_np->np_accept_list,
+					 conn_accept_node) {
+			isert_info("cleaning isert_conn %p state (%d)\n",
+				   isert_conn, isert_conn->state);
+			isert_connect_release(isert_conn);
+		}
+	}
+	mutex_unlock(&isert_np->np_accept_mutex);
+
 	np->np_context = NULL;
 	kfree(isert_np);
 }
 
+static void isert_release_work(struct work_struct *work)
+{
+	struct isert_conn *isert_conn = container_of(work,
+						     struct isert_conn,
+						     release_work);
+
+	isert_info("Starting release conn %p\n", isert_conn);
+
+	wait_for_completion(&isert_conn->conn_wait);
+
+	mutex_lock(&isert_conn->conn_mutex);
+	isert_conn->state = ISER_CONN_DOWN;
+	mutex_unlock(&isert_conn->conn_mutex);
+
+	isert_info("Destroying conn %p\n", isert_conn);
+	isert_put_conn(isert_conn);
+}
+
+static void
+isert_wait4logout(struct isert_conn *isert_conn)
+{
+	struct iscsi_conn *conn = isert_conn->conn;
+
+	isert_info("conn %p\n", isert_conn);
+
+	if (isert_conn->logout_posted) {
+		isert_info("conn %p wait for conn_logout_comp\n", isert_conn);
+		wait_for_completion_timeout(&conn->conn_logout_comp,
+					    SECONDS_FOR_LOGOUT_COMP * HZ);
+	}
+}
+
+static void
+isert_wait4cmds(struct iscsi_conn *conn)
+{
+	isert_info("iscsi_conn %p\n", conn);
+
+	if (conn->sess) {
+		target_sess_cmd_list_set_waiting(conn->sess->se_sess);
+		target_wait_for_sess_cmds(conn->sess->se_sess);
+	}
+}
+
+static void
+isert_wait4flush(struct isert_conn *isert_conn)
+{
+	struct ib_recv_wr *bad_wr;
+
+	isert_info("conn %p\n", isert_conn);
+
+	init_completion(&isert_conn->conn_wait_comp_err);
+	isert_conn->beacon.wr_id = ISER_BEACON_WRID;
+	/* post an indication that all flush errors were consumed */
+	if (ib_post_recv(isert_conn->conn_qp, &isert_conn->beacon, &bad_wr)) {
+		isert_err("conn %p failed to post beacon", isert_conn);
+		return;
+	}
+
+	wait_for_completion(&isert_conn->conn_wait_comp_err);
+}
+
 static void isert_wait_conn(struct iscsi_conn *conn)
 {
 	struct isert_conn *isert_conn = conn->context;
 
-	pr_debug("isert_wait_conn: Starting \n");
+	isert_info("Starting conn %p\n", isert_conn);
 
 	mutex_lock(&isert_conn->conn_mutex);
-	if (isert_conn->conn_cm_id && !isert_conn->disconnect) {
-		pr_debug("Calling rdma_disconnect from isert_wait_conn\n");
-		rdma_disconnect(isert_conn->conn_cm_id);
-	}
 	/*
 	 * Only wait for conn_wait_comp_err if the isert_conn made it
 	 * into full feature phase..
@@ -3231,14 +3277,15 @@ static void isert_wait_conn(struct iscsi_conn *conn)
 		mutex_unlock(&isert_conn->conn_mutex);
 		return;
 	}
-	if (isert_conn->state == ISER_CONN_UP)
-		isert_conn->state = ISER_CONN_TERMINATING;
+	isert_conn_terminate(isert_conn);
 	mutex_unlock(&isert_conn->conn_mutex);
 
-	wait_for_completion(&isert_conn->conn_wait_comp_err);
+	isert_wait4cmds(conn);
+	isert_wait4flush(isert_conn);
+	isert_wait4logout(isert_conn);
 
-	wait_for_completion(&isert_conn->conn_wait);
-	isert_put_conn(isert_conn);
+	INIT_WORK(&isert_conn->release_work, isert_release_work);
+	queue_work(isert_release_wq, &isert_conn->release_work);
 }
 
 static void isert_free_conn(struct iscsi_conn *conn)
@@ -3273,35 +3320,39 @@ static int __init isert_init(void)
 {
 	int ret;
 
-	isert_rx_wq = alloc_workqueue("isert_rx_wq", 0, 0);
-	if (!isert_rx_wq) {
-		pr_err("Unable to allocate isert_rx_wq\n");
+	isert_comp_wq = alloc_workqueue("isert_comp_wq", 0, 0);
+	if (!isert_comp_wq) {
+		isert_err("Unable to allocate isert_comp_wq\n");
+		ret = -ENOMEM;
 		return -ENOMEM;
 	}
 
-	isert_comp_wq = alloc_workqueue("isert_comp_wq", 0, 0);
-	if (!isert_comp_wq) {
-		pr_err("Unable to allocate isert_comp_wq\n");
+	isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND,
+					WQ_UNBOUND_MAX_ACTIVE);
+	if (!isert_release_wq) {
+		isert_err("Unable to allocate isert_release_wq\n");
 		ret = -ENOMEM;
-		goto destroy_rx_wq;
+		goto destroy_comp_wq;
 	}
 
 	iscsit_register_transport(&iser_target_transport);
-	pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n");
+	isert_info("iSER_TARGET[0] - Loaded iser_target_transport\n");
+
 	return 0;
 
-destroy_rx_wq:
-	destroy_workqueue(isert_rx_wq);
+destroy_comp_wq:
+	destroy_workqueue(isert_comp_wq);
+
 	return ret;
 }
 
 static void __exit isert_exit(void)
 {
 	flush_scheduled_work();
+	destroy_workqueue(isert_release_wq);
 	destroy_workqueue(isert_comp_wq);
-	destroy_workqueue(isert_rx_wq);
 	iscsit_unregister_transport(&iser_target_transport);
-	pr_debug("iSER_TARGET[0] - Released iser_target_transport\n");
+	isert_info("iSER_TARGET[0] - Released iser_target_transport\n");
 }
 
 MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 04f51f7..8dc8415 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -4,9 +4,37 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
 
+#define DRV_NAME	"isert"
+#define PFX		DRV_NAME ": "
+
+#define isert_dbg(fmt, arg...)				 \
+	do {						 \
+		if (unlikely(isert_debug_level > 2))	 \
+			printk(KERN_DEBUG PFX "%s: " fmt,\
+				__func__ , ## arg);	 \
+	} while (0)
+
+#define isert_warn(fmt, arg...)				\
+	do {						\
+		if (unlikely(isert_debug_level > 0))	\
+			pr_warn(PFX "%s: " fmt,         \
+				__func__ , ## arg);	\
+	} while (0)
+
+#define isert_info(fmt, arg...)				\
+	do {						\
+		if (unlikely(isert_debug_level > 1))	\
+			pr_info(PFX "%s: " fmt,         \
+				__func__ , ## arg);	\
+	} while (0)
+
+#define isert_err(fmt, arg...) \
+	pr_err(PFX "%s: " fmt, __func__ , ## arg)
+
 #define ISERT_RDMA_LISTEN_BACKLOG	10
 #define ISCSI_ISER_SG_TABLESIZE		256
 #define ISER_FASTREG_LI_WRID		0xffffffffffffffffULL
+#define ISER_BEACON_WRID               0xfffffffffffffffeULL
 
 enum isert_desc_type {
 	ISCSI_TX_CONTROL,
@@ -23,6 +51,7 @@ enum iser_ib_op_code {
 enum iser_conn_state {
 	ISER_CONN_INIT,
 	ISER_CONN_UP,
+	ISER_CONN_FULL_FEATURE,
 	ISER_CONN_TERMINATING,
 	ISER_CONN_DOWN,
 };
@@ -44,9 +73,6 @@ struct iser_tx_desc {
 	struct ib_sge	tx_sg[2];
 	int		num_sge;
 	struct isert_cmd *isert_cmd;
-	struct llist_node *comp_llnode_batch;
-	struct llist_node comp_llnode;
-	bool		llnode_active;
 	struct ib_send_wr send_wr;
 } __packed;
 
@@ -81,6 +107,12 @@ struct isert_data_buf {
 	enum dma_data_direction dma_dir;
 };
 
+enum {
+	DATA = 0,
+	PROT = 1,
+	SIG = 2,
+};
+
 struct isert_rdma_wr {
 	struct list_head	wr_list;
 	struct isert_cmd	*isert_cmd;
@@ -90,6 +122,7 @@ struct isert_rdma_wr {
 	int			send_wr_num;
 	struct ib_send_wr	*send_wr;
 	struct ib_send_wr	s_send_wr;
+	struct ib_sge		ib_sg[3];
 	struct isert_data_buf	data;
 	struct isert_data_buf	prot;
 	struct fast_reg_descriptor *fr_desc;
@@ -117,14 +150,15 @@ struct isert_device;
 struct isert_conn {
 	enum iser_conn_state	state;
 	int			post_recv_buf_count;
-	atomic_t		post_send_buf_count;
 	u32			responder_resources;
 	u32			initiator_depth;
+	bool			pi_support;
 	u32			max_sge;
 	char			*login_buf;
 	char			*login_req_buf;
 	char			*login_rsp_buf;
 	u64			login_req_dma;
+	int			login_req_len;
 	u64			login_rsp_dma;
 	unsigned int		conn_rx_desc_head;
 	struct iser_rx_desc	*conn_rx_descs;
@@ -132,13 +166,13 @@ struct isert_conn {
 	struct iscsi_conn	*conn;
 	struct list_head	conn_accept_node;
 	struct completion	conn_login_comp;
+	struct completion	login_req_comp;
 	struct iser_tx_desc	conn_login_tx_desc;
 	struct rdma_cm_id	*conn_cm_id;
 	struct ib_pd		*conn_pd;
 	struct ib_mr		*conn_mr;
 	struct ib_qp		*conn_qp;
 	struct isert_device	*conn_device;
-	struct work_struct	conn_logout_work;
 	struct mutex		conn_mutex;
 	struct completion	conn_wait;
 	struct completion	conn_wait_comp_err;
@@ -147,31 +181,38 @@ struct isert_conn {
 	int			conn_fr_pool_size;
 	/* lock to protect fastreg pool */
 	spinlock_t		conn_lock;
-#define ISERT_COMP_BATCH_COUNT	8
-	int			conn_comp_batch;
-	struct llist_head	conn_comp_llist;
-	bool                    disconnect;
+	struct work_struct	release_work;
+	struct ib_recv_wr       beacon;
+	bool                    logout_posted;
 };
 
 #define ISERT_MAX_CQ 64
 
-struct isert_cq_desc {
-	struct isert_device	*device;
-	int			cq_index;
-	struct work_struct	cq_rx_work;
-	struct work_struct	cq_tx_work;
+/**
+ * struct isert_comp - iSER completion context
+ *
+ * @device:     pointer to device handle
+ * @cq:         completion queue
+ * @wcs:        work completion array
+ * @active_qps: Number of active QPs attached
+ *              to completion context
+ * @work:       completion work handle
+ */
+struct isert_comp {
+	struct isert_device     *device;
+	struct ib_cq		*cq;
+	struct ib_wc		 wcs[16];
+	int                      active_qps;
+	struct work_struct	 work;
 };
 
 struct isert_device {
 	int			use_fastreg;
 	bool			pi_capable;
-	int			cqs_used;
 	int			refcount;
-	int			cq_active_qps[ISERT_MAX_CQ];
 	struct ib_device	*ib_device;
-	struct ib_cq		*dev_rx_cq[ISERT_MAX_CQ];
-	struct ib_cq		*dev_tx_cq[ISERT_MAX_CQ];
-	struct isert_cq_desc	*cq_desc;
+	struct isert_comp	*comps;
+	int                     comps_used;
 	struct list_head	dev_node;
 	struct ib_device_attr	dev_attr;
 	int			(*reg_rdma_mem)(struct iscsi_conn *conn,
@@ -182,6 +223,7 @@ struct isert_device {
 };
 
 struct isert_np {
+	struct iscsi_np         *np;
 	struct semaphore	np_sem;
 	struct rdma_cm_id	*np_cm_id;
 	struct mutex		np_accept_mutex;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 5461924..0747c05 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2740,7 +2740,6 @@ static struct scsi_host_template srp_template = {
 	.info				= srp_target_info,
 	.queuecommand			= srp_queuecommand,
 	.change_queue_depth             = srp_change_queue_depth,
-	.change_queue_type              = scsi_change_queue_type,
 	.eh_abort_handler		= srp_abort,
 	.eh_device_reset_handler	= srp_reset_device,
 	.eh_host_reset_handler		= srp_reset_host,
@@ -2929,7 +2928,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
 		return -ENOMEM;
 
 	sep_opt = options;
-	while ((p = strsep(&sep_opt, ",")) != NULL) {
+	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
 		if (!*p)
 			continue;
 
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index dc82968..eb694dd 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1708,17 +1708,17 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
 
 	switch (srp_cmd->task_attr) {
 	case SRP_CMD_SIMPLE_Q:
-		cmd->sam_task_attr = MSG_SIMPLE_TAG;
+		cmd->sam_task_attr = TCM_SIMPLE_TAG;
 		break;
 	case SRP_CMD_ORDERED_Q:
 	default:
-		cmd->sam_task_attr = MSG_ORDERED_TAG;
+		cmd->sam_task_attr = TCM_ORDERED_TAG;
 		break;
 	case SRP_CMD_HEAD_OF_Q:
-		cmd->sam_task_attr = MSG_HEAD_TAG;
+		cmd->sam_task_attr = TCM_HEAD_TAG;
 		break;
 	case SRP_CMD_ACA:
-		cmd->sam_task_attr = MSG_ACA_TAG;
+		cmd->sam_task_attr = TCM_ACA_TAG;
 		break;
 	}
 
@@ -1733,7 +1733,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
 				       sizeof(srp_cmd->lun));
 	rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
 			&send_ioctx->sense_data[0], unpacked_lun, data_len,
-			MSG_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
+			TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
 	if (rc != 0) {
 		ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		goto send_sense;
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index e29c04e..e853a21 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -527,14 +527,14 @@ EXPORT_SYMBOL(gameport_set_phys);
  */
 static void gameport_init_port(struct gameport *gameport)
 {
-	static atomic_t gameport_no = ATOMIC_INIT(0);
+	static atomic_t gameport_no = ATOMIC_INIT(-1);
 
 	__module_get(THIS_MODULE);
 
 	mutex_init(&gameport->drv_mutex);
 	device_initialize(&gameport->dev);
 	dev_set_name(&gameport->dev, "gameport%lu",
-			(unsigned long)atomic_inc_return(&gameport_no) - 1);
+			(unsigned long)atomic_inc_return(&gameport_no));
 	gameport->dev.bus = &gameport_bus;
 	gameport->dev.release = gameport_release_port;
 	if (gameport->parent)
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 0f175f5..04217c2 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1775,7 +1775,7 @@ EXPORT_SYMBOL_GPL(input_class);
  */
 struct input_dev *input_allocate_device(void)
 {
-	static atomic_t input_no = ATOMIC_INIT(0);
+	static atomic_t input_no = ATOMIC_INIT(-1);
 	struct input_dev *dev;
 
 	dev = kzalloc(sizeof(struct input_dev), GFP_KERNEL);
@@ -1790,7 +1790,7 @@ struct input_dev *input_allocate_device(void)
 		INIT_LIST_HEAD(&dev->node);
 
 		dev_set_name(&dev->dev, "input%lu",
-			     (unsigned long) atomic_inc_return(&input_no) - 1);
+			     (unsigned long)atomic_inc_return(&input_no));
 
 		__module_get(THIS_MODULE);
 	}
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index fc55f0d..3aa2f3f 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -886,8 +886,8 @@ static void xpad_led_set(struct led_classdev *led_cdev,
 
 static int xpad_led_probe(struct usb_xpad *xpad)
 {
-	static atomic_t led_seq	= ATOMIC_INIT(0);
-	long led_no;
+	static atomic_t led_seq	= ATOMIC_INIT(-1);
+	unsigned long led_no;
 	struct xpad_led *led;
 	struct led_classdev *led_cdev;
 	int error;
@@ -899,9 +899,9 @@ static int xpad_led_probe(struct usb_xpad *xpad)
 	if (!led)
 		return -ENOMEM;
 
-	led_no = (long)atomic_inc_return(&led_seq) - 1;
+	led_no = atomic_inc_return(&led_seq);
 
-	snprintf(led->name, sizeof(led->name), "xpad%ld", led_no);
+	snprintf(led->name, sizeof(led->name), "xpad%lu", led_no);
 	led->xpad = xpad;
 
 	led_cdev = &led->led_cdev;
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index a3958c6..96ee26c 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -665,14 +665,14 @@ config KEYBOARD_CROS_EC
 	  To compile this driver as a module, choose M here: the
 	  module will be called cros_ec_keyb.
 
-config KEYBOARD_CAP1106
-	tristate "Microchip CAP1106 touch sensor"
+config KEYBOARD_CAP11XX
+	tristate "Microchip CAP11XX based touch sensors"
 	depends on OF && I2C
 	select REGMAP_I2C
 	help
-	  Say Y here to enable the CAP1106 touch sensor driver.
+	  Say Y here to enable the CAP11XX touch sensor driver.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called cap1106.
+	  module will be called cap11xx.
 
 endif
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 0a33456..febafa5 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_KEYBOARD_AMIGA)		+= amikbd.o
 obj-$(CONFIG_KEYBOARD_ATARI)		+= atakbd.o
 obj-$(CONFIG_KEYBOARD_ATKBD)		+= atkbd.o
 obj-$(CONFIG_KEYBOARD_BFIN)		+= bf54x-keys.o
-obj-$(CONFIG_KEYBOARD_CAP1106)		+= cap1106.o
+obj-$(CONFIG_KEYBOARD_CAP11XX)		+= cap11xx.o
 obj-$(CONFIG_KEYBOARD_CLPS711X)		+= clps711x-keypad.o
 obj-$(CONFIG_KEYBOARD_CROS_EC)		+= cros_ec_keyb.o
 obj-$(CONFIG_KEYBOARD_DAVINCI)		+= davinci_keyscan.o
diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c
index d3b8c58..e04a3b4 100644
--- a/drivers/input/keyboard/amikbd.c
+++ b/drivers/input/keyboard/amikbd.c
@@ -45,6 +45,7 @@ MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
 MODULE_DESCRIPTION("Amiga keyboard driver");
 MODULE_LICENSE("GPL");
 
+#ifdef CONFIG_HW_CONSOLE
 static unsigned char amikbd_keycode[0x78] __initdata = {
 	[0]	 = KEY_GRAVE,
 	[1]	 = KEY_1,
@@ -144,6 +145,32 @@ static unsigned char amikbd_keycode[0x78] __initdata = {
 	[103]	 = KEY_RIGHTMETA
 };
 
+static void __init amikbd_init_console_keymaps(void)
+{
+	/* We can spare 512 bytes on stack for temp_map in init path. */
+	unsigned short temp_map[NR_KEYS];
+	int i, j;
+
+	for (i = 0; i < MAX_NR_KEYMAPS; i++) {
+		if (!key_maps[i])
+			continue;
+		memset(temp_map, 0, sizeof(temp_map));
+		for (j = 0; j < 0x78; j++) {
+			if (!amikbd_keycode[j])
+				continue;
+			temp_map[j] = key_maps[i][amikbd_keycode[j]];
+		}
+		for (j = 0; j < NR_KEYS; j++) {
+			if (!temp_map[j])
+				temp_map[j] = 0xf200;
+		}
+		memcpy(key_maps[i], temp_map, sizeof(temp_map));
+	}
+}
+#else /* !CONFIG_HW_CONSOLE */
+static inline void amikbd_init_console_keymaps(void) {}
+#endif /* !CONFIG_HW_CONSOLE */
+
 static const char *amikbd_messages[8] = {
 	[0] = KERN_ALERT "amikbd: Ctrl-Amiga-Amiga reset warning!!\n",
 	[1] = KERN_WARNING "amikbd: keyboard lost sync\n",
@@ -186,7 +213,7 @@ static irqreturn_t amikbd_interrupt(int irq, void *data)
 static int __init amikbd_probe(struct platform_device *pdev)
 {
 	struct input_dev *dev;
-	int i, j, err;
+	int i, err;
 
 	dev = input_allocate_device();
 	if (!dev) {
@@ -207,22 +234,8 @@ static int __init amikbd_probe(struct platform_device *pdev)
 	for (i = 0; i < 0x78; i++)
 		set_bit(i, dev->keybit);
 
-	for (i = 0; i < MAX_NR_KEYMAPS; i++) {
-		static u_short temp_map[NR_KEYS] __initdata;
-		if (!key_maps[i])
-			continue;
-		memset(temp_map, 0, sizeof(temp_map));
-		for (j = 0; j < 0x78; j++) {
-			if (!amikbd_keycode[j])
-				continue;
-			temp_map[j] = key_maps[i][amikbd_keycode[j]];
-		}
-		for (j = 0; j < NR_KEYS; j++) {
-			if (!temp_map[j])
-				temp_map[j] = 0xf200;
-		}
-		memcpy(key_maps[i], temp_map, sizeof(temp_map));
-	}
+	amikbd_init_console_keymaps();
+
 	ciaa.cra &= ~0x41;	 /* serial data in, turn off TA */
 	err = request_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt, 0, "amikbd",
 			  dev);
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 6f5d795..e27a258 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -456,8 +456,9 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data,
 
 	keycode = atkbd->keycode[code];
 
-	if (keycode != ATKBD_KEY_NULL)
-		input_event(dev, EV_MSC, MSC_SCAN, code);
+	if (!(atkbd->release && test_bit(code, atkbd->force_release_mask)))
+		if (keycode != ATKBD_KEY_NULL)
+			input_event(dev, EV_MSC, MSC_SCAN, code);
 
 	switch (keycode) {
 	case ATKBD_KEY_NULL:
@@ -511,6 +512,7 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data,
 		input_sync(dev);
 
 		if (value && test_bit(code, atkbd->force_release_mask)) {
+			input_event(dev, EV_MSC, MSC_SCAN, code);
 			input_report_key(dev, keycode, 0);
 			input_sync(dev);
 		}
diff --git a/drivers/input/keyboard/cap1106.c b/drivers/input/keyboard/cap1106.c
deleted file mode 100644
index d70b65a..0000000
--- a/drivers/input/keyboard/cap1106.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Input driver for Microchip CAP1106, 6 channel capacitive touch sensor
- *
- * http://www.microchip.com/wwwproducts/Devices.aspx?product=CAP1106
- *
- * (c) 2014 Daniel Mack <linux@zonque.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/input.h>
-#include <linux/of_irq.h>
-#include <linux/regmap.h>
-#include <linux/i2c.h>
-#include <linux/gpio/consumer.h>
-
-#define CAP1106_REG_MAIN_CONTROL	0x00
-#define CAP1106_REG_MAIN_CONTROL_GAIN_SHIFT	(6)
-#define CAP1106_REG_MAIN_CONTROL_GAIN_MASK	(0xc0)
-#define CAP1106_REG_MAIN_CONTROL_DLSEEP		BIT(4)
-#define CAP1106_REG_GENERAL_STATUS	0x02
-#define CAP1106_REG_SENSOR_INPUT	0x03
-#define CAP1106_REG_NOISE_FLAG_STATUS	0x0a
-#define CAP1106_REG_SENOR_DELTA(X)	(0x10 + (X))
-#define CAP1106_REG_SENSITIVITY_CONTROL	0x1f
-#define CAP1106_REG_CONFIG		0x20
-#define CAP1106_REG_SENSOR_ENABLE	0x21
-#define CAP1106_REG_SENSOR_CONFIG	0x22
-#define CAP1106_REG_SENSOR_CONFIG2	0x23
-#define CAP1106_REG_SAMPLING_CONFIG	0x24
-#define CAP1106_REG_CALIBRATION		0x26
-#define CAP1106_REG_INT_ENABLE		0x27
-#define CAP1106_REG_REPEAT_RATE		0x28
-#define CAP1106_REG_MT_CONFIG		0x2a
-#define CAP1106_REG_MT_PATTERN_CONFIG	0x2b
-#define CAP1106_REG_MT_PATTERN		0x2d
-#define CAP1106_REG_RECALIB_CONFIG	0x2f
-#define CAP1106_REG_SENSOR_THRESH(X)	(0x30 + (X))
-#define CAP1106_REG_SENSOR_NOISE_THRESH	0x38
-#define CAP1106_REG_STANDBY_CHANNEL	0x40
-#define CAP1106_REG_STANDBY_CONFIG	0x41
-#define CAP1106_REG_STANDBY_SENSITIVITY	0x42
-#define CAP1106_REG_STANDBY_THRESH	0x43
-#define CAP1106_REG_CONFIG2		0x44
-#define CAP1106_REG_SENSOR_BASE_CNT(X)	(0x50 + (X))
-#define CAP1106_REG_SENSOR_CALIB	(0xb1 + (X))
-#define CAP1106_REG_SENSOR_CALIB_LSB1	0xb9
-#define CAP1106_REG_SENSOR_CALIB_LSB2	0xba
-#define CAP1106_REG_PRODUCT_ID		0xfd
-#define CAP1106_REG_MANUFACTURER_ID	0xfe
-#define CAP1106_REG_REVISION		0xff
-
-#define CAP1106_NUM_CHN 6
-#define CAP1106_PRODUCT_ID	0x55
-#define CAP1106_MANUFACTURER_ID	0x5d
-
-struct cap1106_priv {
-	struct regmap *regmap;
-	struct input_dev *idev;
-
-	/* config */
-	unsigned short keycodes[CAP1106_NUM_CHN];
-};
-
-static const struct reg_default cap1106_reg_defaults[] = {
-	{ CAP1106_REG_MAIN_CONTROL,		0x00 },
-	{ CAP1106_REG_GENERAL_STATUS,		0x00 },
-	{ CAP1106_REG_SENSOR_INPUT,		0x00 },
-	{ CAP1106_REG_NOISE_FLAG_STATUS,	0x00 },
-	{ CAP1106_REG_SENSITIVITY_CONTROL,	0x2f },
-	{ CAP1106_REG_CONFIG,			0x20 },
-	{ CAP1106_REG_SENSOR_ENABLE,		0x3f },
-	{ CAP1106_REG_SENSOR_CONFIG,		0xa4 },
-	{ CAP1106_REG_SENSOR_CONFIG2,		0x07 },
-	{ CAP1106_REG_SAMPLING_CONFIG,		0x39 },
-	{ CAP1106_REG_CALIBRATION,		0x00 },
-	{ CAP1106_REG_INT_ENABLE,		0x3f },
-	{ CAP1106_REG_REPEAT_RATE,		0x3f },
-	{ CAP1106_REG_MT_CONFIG,		0x80 },
-	{ CAP1106_REG_MT_PATTERN_CONFIG,	0x00 },
-	{ CAP1106_REG_MT_PATTERN,		0x3f },
-	{ CAP1106_REG_RECALIB_CONFIG,		0x8a },
-	{ CAP1106_REG_SENSOR_THRESH(0),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(1),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(2),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(3),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(4),		0x40 },
-	{ CAP1106_REG_SENSOR_THRESH(5),		0x40 },
-	{ CAP1106_REG_SENSOR_NOISE_THRESH,	0x01 },
-	{ CAP1106_REG_STANDBY_CHANNEL,		0x00 },
-	{ CAP1106_REG_STANDBY_CONFIG,		0x39 },
-	{ CAP1106_REG_STANDBY_SENSITIVITY,	0x02 },
-	{ CAP1106_REG_STANDBY_THRESH,		0x40 },
-	{ CAP1106_REG_CONFIG2,			0x40 },
-	{ CAP1106_REG_SENSOR_CALIB_LSB1,	0x00 },
-	{ CAP1106_REG_SENSOR_CALIB_LSB2,	0x00 },
-};
-
-static bool cap1106_volatile_reg(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case CAP1106_REG_MAIN_CONTROL:
-	case CAP1106_REG_SENSOR_INPUT:
-	case CAP1106_REG_SENOR_DELTA(0):
-	case CAP1106_REG_SENOR_DELTA(1):
-	case CAP1106_REG_SENOR_DELTA(2):
-	case CAP1106_REG_SENOR_DELTA(3):
-	case CAP1106_REG_SENOR_DELTA(4):
-	case CAP1106_REG_SENOR_DELTA(5):
-	case CAP1106_REG_PRODUCT_ID:
-	case CAP1106_REG_MANUFACTURER_ID:
-	case CAP1106_REG_REVISION:
-		return true;
-	}
-
-	return false;
-}
-
-static const struct regmap_config cap1106_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-
-	.max_register = CAP1106_REG_REVISION,
-	.reg_defaults = cap1106_reg_defaults,
-
-	.num_reg_defaults = ARRAY_SIZE(cap1106_reg_defaults),
-	.cache_type = REGCACHE_RBTREE,
-	.volatile_reg = cap1106_volatile_reg,
-};
-
-static irqreturn_t cap1106_thread_func(int irq_num, void *data)
-{
-	struct cap1106_priv *priv = data;
-	unsigned int status;
-	int ret, i;
-
-	/*
-	 * Deassert interrupt. This needs to be done before reading the status
-	 * registers, which will not carry valid values otherwise.
-	 */
-	ret = regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL, 1, 0);
-	if (ret < 0)
-		goto out;
-
-	ret = regmap_read(priv->regmap, CAP1106_REG_SENSOR_INPUT, &status);
-	if (ret < 0)
-		goto out;
-
-	for (i = 0; i < CAP1106_NUM_CHN; i++)
-		input_report_key(priv->idev, priv->keycodes[i],
-				 status & (1 << i));
-
-	input_sync(priv->idev);
-
-out:
-	return IRQ_HANDLED;
-}
-
-static int cap1106_set_sleep(struct cap1106_priv *priv, bool sleep)
-{
-	return regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL,
-				  CAP1106_REG_MAIN_CONTROL_DLSEEP,
-				  sleep ? CAP1106_REG_MAIN_CONTROL_DLSEEP : 0);
-}
-
-static int cap1106_input_open(struct input_dev *idev)
-{
-	struct cap1106_priv *priv = input_get_drvdata(idev);
-
-	return cap1106_set_sleep(priv, false);
-}
-
-static void cap1106_input_close(struct input_dev *idev)
-{
-	struct cap1106_priv *priv = input_get_drvdata(idev);
-
-	cap1106_set_sleep(priv, true);
-}
-
-static int cap1106_i2c_probe(struct i2c_client *i2c_client,
-			     const struct i2c_device_id *id)
-{
-	struct device *dev = &i2c_client->dev;
-	struct cap1106_priv *priv;
-	struct device_node *node;
-	int i, error, irq, gain = 0;
-	unsigned int val, rev;
-	u32 gain32, keycodes[CAP1106_NUM_CHN];
-
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->regmap = devm_regmap_init_i2c(i2c_client, &cap1106_regmap_config);
-	if (IS_ERR(priv->regmap))
-		return PTR_ERR(priv->regmap);
-
-	error = regmap_read(priv->regmap, CAP1106_REG_PRODUCT_ID, &val);
-	if (error)
-		return error;
-
-	if (val != CAP1106_PRODUCT_ID) {
-		dev_err(dev, "Product ID: Got 0x%02x, expected 0x%02x\n",
-			val, CAP1106_PRODUCT_ID);
-		return -ENODEV;
-	}
-
-	error = regmap_read(priv->regmap, CAP1106_REG_MANUFACTURER_ID, &val);
-	if (error)
-		return error;
-
-	if (val != CAP1106_MANUFACTURER_ID) {
-		dev_err(dev, "Manufacturer ID: Got 0x%02x, expected 0x%02x\n",
-			val, CAP1106_MANUFACTURER_ID);
-		return -ENODEV;
-	}
-
-	error = regmap_read(priv->regmap, CAP1106_REG_REVISION, &rev);
-	if (error < 0)
-		return error;
-
-	dev_info(dev, "CAP1106 detected, revision 0x%02x\n", rev);
-	i2c_set_clientdata(i2c_client, priv);
-	node = dev->of_node;
-
-	if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) {
-		if (is_power_of_2(gain32) && gain32 <= 8)
-			gain = ilog2(gain32);
-		else
-			dev_err(dev, "Invalid sensor-gain value %d\n", gain32);
-	}
-
-	BUILD_BUG_ON(ARRAY_SIZE(keycodes) != ARRAY_SIZE(priv->keycodes));
-
-	/* Provide some useful defaults */
-	for (i = 0; i < ARRAY_SIZE(keycodes); i++)
-		keycodes[i] = KEY_A + i;
-
-	of_property_read_u32_array(node, "linux,keycodes",
-				   keycodes, ARRAY_SIZE(keycodes));
-
-	for (i = 0; i < ARRAY_SIZE(keycodes); i++)
-		priv->keycodes[i] = keycodes[i];
-
-	error = regmap_update_bits(priv->regmap, CAP1106_REG_MAIN_CONTROL,
-				   CAP1106_REG_MAIN_CONTROL_GAIN_MASK,
-				   gain << CAP1106_REG_MAIN_CONTROL_GAIN_SHIFT);
-	if (error)
-		return error;
-
-	/* Disable autorepeat. The Linux input system has its own handling. */
-	error = regmap_write(priv->regmap, CAP1106_REG_REPEAT_RATE, 0);
-	if (error)
-		return error;
-
-	priv->idev = devm_input_allocate_device(dev);
-	if (!priv->idev)
-		return -ENOMEM;
-
-	priv->idev->name = "CAP1106 capacitive touch sensor";
-	priv->idev->id.bustype = BUS_I2C;
-	priv->idev->evbit[0] = BIT_MASK(EV_KEY);
-
-	if (of_property_read_bool(node, "autorepeat"))
-		__set_bit(EV_REP, priv->idev->evbit);
-
-	for (i = 0; i < CAP1106_NUM_CHN; i++)
-		__set_bit(priv->keycodes[i], priv->idev->keybit);
-
-	__clear_bit(KEY_RESERVED, priv->idev->keybit);
-
-	priv->idev->keycode = priv->keycodes;
-	priv->idev->keycodesize = sizeof(priv->keycodes[0]);
-	priv->idev->keycodemax = ARRAY_SIZE(priv->keycodes);
-
-	priv->idev->id.vendor = CAP1106_MANUFACTURER_ID;
-	priv->idev->id.product = CAP1106_PRODUCT_ID;
-	priv->idev->id.version = rev;
-
-	priv->idev->open = cap1106_input_open;
-	priv->idev->close = cap1106_input_close;
-
-	input_set_drvdata(priv->idev, priv);
-
-	/*
-	 * Put the device in deep sleep mode for now.
-	 * ->open() will bring it back once the it is actually needed.
-	 */
-	cap1106_set_sleep(priv, true);
-
-	error = input_register_device(priv->idev);
-	if (error)
-		return error;
-
-	irq = irq_of_parse_and_map(node, 0);
-	if (!irq) {
-		dev_err(dev, "Unable to parse or map IRQ\n");
-		return -ENXIO;
-	}
-
-	error = devm_request_threaded_irq(dev, irq, NULL, cap1106_thread_func,
-					  IRQF_ONESHOT, dev_name(dev), priv);
-	if (error)
-		return error;
-
-	return 0;
-}
-
-static const struct of_device_id cap1106_dt_ids[] = {
-	{ .compatible = "microchip,cap1106", },
-	{}
-};
-MODULE_DEVICE_TABLE(of, cap1106_dt_ids);
-
-static const struct i2c_device_id cap1106_i2c_ids[] = {
-	{ "cap1106", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, cap1106_i2c_ids);
-
-static struct i2c_driver cap1106_i2c_driver = {
-	.driver = {
-		.name	= "cap1106",
-		.owner	= THIS_MODULE,
-		.of_match_table = cap1106_dt_ids,
-	},
-	.id_table	= cap1106_i2c_ids,
-	.probe		= cap1106_i2c_probe,
-};
-
-module_i2c_driver(cap1106_i2c_driver);
-
-MODULE_ALIAS("platform:cap1106");
-MODULE_DESCRIPTION("Microchip CAP1106 driver");
-MODULE_AUTHOR("Daniel Mack <linux@zonque.org>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c
new file mode 100644
index 0000000..4f59f0b
--- /dev/null
+++ b/drivers/input/keyboard/cap11xx.c
@@ -0,0 +1,376 @@
+/*
+ * Input driver for Microchip CAP11xx based capacitive touch sensors
+ *
+ * (c) 2014 Daniel Mack <linux@zonque.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
+#include <linux/i2c.h>
+#include <linux/gpio/consumer.h>
+
+#define CAP11XX_REG_MAIN_CONTROL	0x00
+#define CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT	(6)
+#define CAP11XX_REG_MAIN_CONTROL_GAIN_MASK	(0xc0)
+#define CAP11XX_REG_MAIN_CONTROL_DLSEEP		BIT(4)
+#define CAP11XX_REG_GENERAL_STATUS	0x02
+#define CAP11XX_REG_SENSOR_INPUT	0x03
+#define CAP11XX_REG_NOISE_FLAG_STATUS	0x0a
+#define CAP11XX_REG_SENOR_DELTA(X)	(0x10 + (X))
+#define CAP11XX_REG_SENSITIVITY_CONTROL	0x1f
+#define CAP11XX_REG_CONFIG		0x20
+#define CAP11XX_REG_SENSOR_ENABLE	0x21
+#define CAP11XX_REG_SENSOR_CONFIG	0x22
+#define CAP11XX_REG_SENSOR_CONFIG2	0x23
+#define CAP11XX_REG_SAMPLING_CONFIG	0x24
+#define CAP11XX_REG_CALIBRATION		0x26
+#define CAP11XX_REG_INT_ENABLE		0x27
+#define CAP11XX_REG_REPEAT_RATE		0x28
+#define CAP11XX_REG_MT_CONFIG		0x2a
+#define CAP11XX_REG_MT_PATTERN_CONFIG	0x2b
+#define CAP11XX_REG_MT_PATTERN		0x2d
+#define CAP11XX_REG_RECALIB_CONFIG	0x2f
+#define CAP11XX_REG_SENSOR_THRESH(X)	(0x30 + (X))
+#define CAP11XX_REG_SENSOR_NOISE_THRESH	0x38
+#define CAP11XX_REG_STANDBY_CHANNEL	0x40
+#define CAP11XX_REG_STANDBY_CONFIG	0x41
+#define CAP11XX_REG_STANDBY_SENSITIVITY	0x42
+#define CAP11XX_REG_STANDBY_THRESH	0x43
+#define CAP11XX_REG_CONFIG2		0x44
+#define CAP11XX_REG_CONFIG2_ALT_POL	BIT(6)
+#define CAP11XX_REG_SENSOR_BASE_CNT(X)	(0x50 + (X))
+#define CAP11XX_REG_SENSOR_CALIB	(0xb1 + (X))
+#define CAP11XX_REG_SENSOR_CALIB_LSB1	0xb9
+#define CAP11XX_REG_SENSOR_CALIB_LSB2	0xba
+#define CAP11XX_REG_PRODUCT_ID		0xfd
+#define CAP11XX_REG_MANUFACTURER_ID	0xfe
+#define CAP11XX_REG_REVISION		0xff
+
+#define CAP11XX_MANUFACTURER_ID	0x5d
+
+struct cap11xx_priv {
+	struct regmap *regmap;
+	struct input_dev *idev;
+
+	/* config */
+	u32 keycodes[];
+};
+
+struct cap11xx_hw_model {
+	u8 product_id;
+	unsigned int num_channels;
+};
+
+enum {
+	CAP1106,
+	CAP1126,
+	CAP1188,
+};
+
+static const struct cap11xx_hw_model cap11xx_devices[] = {
+	[CAP1106] = { .product_id = 0x55, .num_channels = 6 },
+	[CAP1126] = { .product_id = 0x53, .num_channels = 6 },
+	[CAP1188] = { .product_id = 0x50, .num_channels = 8 },
+};
+
+static const struct reg_default cap11xx_reg_defaults[] = {
+	{ CAP11XX_REG_MAIN_CONTROL,		0x00 },
+	{ CAP11XX_REG_GENERAL_STATUS,		0x00 },
+	{ CAP11XX_REG_SENSOR_INPUT,		0x00 },
+	{ CAP11XX_REG_NOISE_FLAG_STATUS,	0x00 },
+	{ CAP11XX_REG_SENSITIVITY_CONTROL,	0x2f },
+	{ CAP11XX_REG_CONFIG,			0x20 },
+	{ CAP11XX_REG_SENSOR_ENABLE,		0x3f },
+	{ CAP11XX_REG_SENSOR_CONFIG,		0xa4 },
+	{ CAP11XX_REG_SENSOR_CONFIG2,		0x07 },
+	{ CAP11XX_REG_SAMPLING_CONFIG,		0x39 },
+	{ CAP11XX_REG_CALIBRATION,		0x00 },
+	{ CAP11XX_REG_INT_ENABLE,		0x3f },
+	{ CAP11XX_REG_REPEAT_RATE,		0x3f },
+	{ CAP11XX_REG_MT_CONFIG,		0x80 },
+	{ CAP11XX_REG_MT_PATTERN_CONFIG,	0x00 },
+	{ CAP11XX_REG_MT_PATTERN,		0x3f },
+	{ CAP11XX_REG_RECALIB_CONFIG,		0x8a },
+	{ CAP11XX_REG_SENSOR_THRESH(0),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(1),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(2),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(3),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(4),		0x40 },
+	{ CAP11XX_REG_SENSOR_THRESH(5),		0x40 },
+	{ CAP11XX_REG_SENSOR_NOISE_THRESH,	0x01 },
+	{ CAP11XX_REG_STANDBY_CHANNEL,		0x00 },
+	{ CAP11XX_REG_STANDBY_CONFIG,		0x39 },
+	{ CAP11XX_REG_STANDBY_SENSITIVITY,	0x02 },
+	{ CAP11XX_REG_STANDBY_THRESH,		0x40 },
+	{ CAP11XX_REG_CONFIG2,			0x40 },
+	{ CAP11XX_REG_SENSOR_CALIB_LSB1,	0x00 },
+	{ CAP11XX_REG_SENSOR_CALIB_LSB2,	0x00 },
+};
+
+static bool cap11xx_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case CAP11XX_REG_MAIN_CONTROL:
+	case CAP11XX_REG_SENSOR_INPUT:
+	case CAP11XX_REG_SENOR_DELTA(0):
+	case CAP11XX_REG_SENOR_DELTA(1):
+	case CAP11XX_REG_SENOR_DELTA(2):
+	case CAP11XX_REG_SENOR_DELTA(3):
+	case CAP11XX_REG_SENOR_DELTA(4):
+	case CAP11XX_REG_SENOR_DELTA(5):
+	case CAP11XX_REG_PRODUCT_ID:
+	case CAP11XX_REG_MANUFACTURER_ID:
+	case CAP11XX_REG_REVISION:
+		return true;
+	}
+
+	return false;
+}
+
+static const struct regmap_config cap11xx_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = CAP11XX_REG_REVISION,
+	.reg_defaults = cap11xx_reg_defaults,
+
+	.num_reg_defaults = ARRAY_SIZE(cap11xx_reg_defaults),
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = cap11xx_volatile_reg,
+};
+
+static irqreturn_t cap11xx_thread_func(int irq_num, void *data)
+{
+	struct cap11xx_priv *priv = data;
+	unsigned int status;
+	int ret, i;
+
+	/*
+	 * Deassert interrupt. This needs to be done before reading the status
+	 * registers, which will not carry valid values otherwise.
+	 */
+	ret = regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL, 1, 0);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_read(priv->regmap, CAP11XX_REG_SENSOR_INPUT, &status);
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < priv->idev->keycodemax; i++)
+		input_report_key(priv->idev, priv->keycodes[i],
+				 status & (1 << i));
+
+	input_sync(priv->idev);
+
+out:
+	return IRQ_HANDLED;
+}
+
+static int cap11xx_set_sleep(struct cap11xx_priv *priv, bool sleep)
+{
+	return regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL,
+				  CAP11XX_REG_MAIN_CONTROL_DLSEEP,
+				  sleep ? CAP11XX_REG_MAIN_CONTROL_DLSEEP : 0);
+}
+
+static int cap11xx_input_open(struct input_dev *idev)
+{
+	struct cap11xx_priv *priv = input_get_drvdata(idev);
+
+	return cap11xx_set_sleep(priv, false);
+}
+
+static void cap11xx_input_close(struct input_dev *idev)
+{
+	struct cap11xx_priv *priv = input_get_drvdata(idev);
+
+	cap11xx_set_sleep(priv, true);
+}
+
+static int cap11xx_i2c_probe(struct i2c_client *i2c_client,
+			     const struct i2c_device_id *id)
+{
+	struct device *dev = &i2c_client->dev;
+	struct cap11xx_priv *priv;
+	struct device_node *node;
+	const struct cap11xx_hw_model *cap;
+	int i, error, irq, gain = 0;
+	unsigned int val, rev;
+	u32 gain32;
+
+	if (id->driver_data >= ARRAY_SIZE(cap11xx_devices)) {
+		dev_err(dev, "Invalid device ID %lu\n", id->driver_data);
+		return -EINVAL;
+	}
+
+	cap = &cap11xx_devices[id->driver_data];
+	if (!cap || !cap->num_channels) {
+		dev_err(dev, "Invalid device configuration\n");
+		return -EINVAL;
+	}
+
+	priv = devm_kzalloc(dev,
+			    sizeof(*priv) +
+				cap->num_channels * sizeof(priv->keycodes[0]),
+			    GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->regmap = devm_regmap_init_i2c(i2c_client, &cap11xx_regmap_config);
+	if (IS_ERR(priv->regmap))
+		return PTR_ERR(priv->regmap);
+
+	error = regmap_read(priv->regmap, CAP11XX_REG_PRODUCT_ID, &val);
+	if (error)
+		return error;
+
+	if (val != cap->product_id) {
+		dev_err(dev, "Product ID: Got 0x%02x, expected 0x%02x\n",
+			val, cap->product_id);
+		return -ENXIO;
+	}
+
+	error = regmap_read(priv->regmap, CAP11XX_REG_MANUFACTURER_ID, &val);
+	if (error)
+		return error;
+
+	if (val != CAP11XX_MANUFACTURER_ID) {
+		dev_err(dev, "Manufacturer ID: Got 0x%02x, expected 0x%02x\n",
+			val, CAP11XX_MANUFACTURER_ID);
+		return -ENXIO;
+	}
+
+	error = regmap_read(priv->regmap, CAP11XX_REG_REVISION, &rev);
+	if (error < 0)
+		return error;
+
+	dev_info(dev, "CAP11XX detected, revision 0x%02x\n", rev);
+	i2c_set_clientdata(i2c_client, priv);
+	node = dev->of_node;
+
+	if (!of_property_read_u32(node, "microchip,sensor-gain", &gain32)) {
+		if (is_power_of_2(gain32) && gain32 <= 8)
+			gain = ilog2(gain32);
+		else
+			dev_err(dev, "Invalid sensor-gain value %d\n", gain32);
+	}
+
+	if (of_property_read_bool(node, "microchip,irq-active-high")) {
+		error = regmap_update_bits(priv->regmap, CAP11XX_REG_CONFIG2,
+					   CAP11XX_REG_CONFIG2_ALT_POL, 0);
+		if (error)
+			return error;
+	}
+
+	/* Provide some useful defaults */
+	for (i = 0; i < cap->num_channels; i++)
+		priv->keycodes[i] = KEY_A + i;
+
+	of_property_read_u32_array(node, "linux,keycodes",
+				   priv->keycodes, cap->num_channels);
+
+	error = regmap_update_bits(priv->regmap, CAP11XX_REG_MAIN_CONTROL,
+				   CAP11XX_REG_MAIN_CONTROL_GAIN_MASK,
+				   gain << CAP11XX_REG_MAIN_CONTROL_GAIN_SHIFT);
+	if (error)
+		return error;
+
+	/* Disable autorepeat. The Linux input system has its own handling. */
+	error = regmap_write(priv->regmap, CAP11XX_REG_REPEAT_RATE, 0);
+	if (error)
+		return error;
+
+	priv->idev = devm_input_allocate_device(dev);
+	if (!priv->idev)
+		return -ENOMEM;
+
+	priv->idev->name = "CAP11XX capacitive touch sensor";
+	priv->idev->id.bustype = BUS_I2C;
+	priv->idev->evbit[0] = BIT_MASK(EV_KEY);
+
+	if (of_property_read_bool(node, "autorepeat"))
+		__set_bit(EV_REP, priv->idev->evbit);
+
+	for (i = 0; i < cap->num_channels; i++)
+		__set_bit(priv->keycodes[i], priv->idev->keybit);
+
+	__clear_bit(KEY_RESERVED, priv->idev->keybit);
+
+	priv->idev->keycode = priv->keycodes;
+	priv->idev->keycodesize = sizeof(priv->keycodes[0]);
+	priv->idev->keycodemax = cap->num_channels;
+
+	priv->idev->id.vendor = CAP11XX_MANUFACTURER_ID;
+	priv->idev->id.product = cap->product_id;
+	priv->idev->id.version = rev;
+
+	priv->idev->open = cap11xx_input_open;
+	priv->idev->close = cap11xx_input_close;
+
+	input_set_drvdata(priv->idev, priv);
+
+	/*
+	 * Put the device in deep sleep mode for now.
+	 * ->open() will bring it back once the it is actually needed.
+	 */
+	cap11xx_set_sleep(priv, true);
+
+	error = input_register_device(priv->idev);
+	if (error)
+		return error;
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (!irq) {
+		dev_err(dev, "Unable to parse or map IRQ\n");
+		return -ENXIO;
+	}
+
+	error = devm_request_threaded_irq(dev, irq, NULL, cap11xx_thread_func,
+					  IRQF_ONESHOT, dev_name(dev), priv);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static const struct of_device_id cap11xx_dt_ids[] = {
+	{ .compatible = "microchip,cap1106", },
+	{ .compatible = "microchip,cap1126", },
+	{ .compatible = "microchip,cap1188", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, cap11xx_dt_ids);
+
+static const struct i2c_device_id cap11xx_i2c_ids[] = {
+	{ "cap1106", CAP1106 },
+	{ "cap1126", CAP1126 },
+	{ "cap1188", CAP1188 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, cap11xx_i2c_ids);
+
+static struct i2c_driver cap11xx_i2c_driver = {
+	.driver = {
+		.name	= "cap11xx",
+		.owner	= THIS_MODULE,
+		.of_match_table = cap11xx_dt_ids,
+	},
+	.id_table	= cap11xx_i2c_ids,
+	.probe		= cap11xx_i2c_probe,
+};
+
+module_i2c_driver(cap11xx_i2c_driver);
+
+MODULE_ALIAS("platform:cap11xx");
+MODULE_DESCRIPTION("Microchip CAP11XX driver");
+MODULE_AUTHOR("Daniel Mack <linux@zonque.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 8f3a24e..d4dd78a 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -29,6 +29,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
+#include <linux/of_irq.h>
 #include <linux/spinlock.h>
 
 struct gpio_button_data {
@@ -617,27 +618,31 @@ gpio_keys_get_devtree_pdata(struct device *dev)
 
 	i = 0;
 	for_each_child_of_node(node, pp) {
-		int gpio;
+		int gpio = -1;
 		enum of_gpio_flags flags;
 
-		if (!of_find_property(pp, "gpios", NULL)) {
-			pdata->nbuttons--;
-			dev_warn(dev, "Found button without gpios\n");
-			continue;
-		}
+		button = &pdata->buttons[i++];
 
-		gpio = of_get_gpio_flags(pp, 0, &flags);
-		if (gpio < 0) {
-			error = gpio;
-			if (error != -EPROBE_DEFER)
-				dev_err(dev,
-					"Failed to get gpio flags, error: %d\n",
-					error);
-			return ERR_PTR(error);
+		if (!of_find_property(pp, "gpios", NULL)) {
+			button->irq = irq_of_parse_and_map(pp, 0);
+			if (button->irq == 0) {
+				i--;
+				pdata->nbuttons--;
+				dev_warn(dev, "Found button without gpios or irqs\n");
+				continue;
+			}
+		} else {
+			gpio = of_get_gpio_flags(pp, 0, &flags);
+			if (gpio < 0) {
+				error = gpio;
+				if (error != -EPROBE_DEFER)
+					dev_err(dev,
+						"Failed to get gpio flags, error: %d\n",
+						error);
+				return ERR_PTR(error);
+			}
 		}
 
-		button = &pdata->buttons[i++];
-
 		button->gpio = gpio;
 		button->active_low = flags & OF_GPIO_ACTIVE_LOW;
 
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index cb32e2b..21bea52 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -616,6 +616,8 @@ static ssize_t lm8323_set_disable(struct device *dev,
 	unsigned int i;
 
 	ret = kstrtouint(buf, 10, &i);
+	if (ret)
+		return ret;
 
 	mutex_lock(&lm->lock);
 	lm->kp_enabled = !i;
diff --git a/drivers/input/keyboard/lpc32xx-keys.c b/drivers/input/keyboard/lpc32xx-keys.c
index 8c07937..265d641 100644
--- a/drivers/input/keyboard/lpc32xx-keys.c
+++ b/drivers/input/keyboard/lpc32xx-keys.c
@@ -66,7 +66,6 @@
 struct lpc32xx_kscan_drv {
 	struct input_dev *input;
 	struct clk *clk;
-	struct resource *iores;
 	void __iomem *kscan_base;
 	unsigned int irq;
 
@@ -188,32 +187,27 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	kscandat = kzalloc(sizeof(struct lpc32xx_kscan_drv), GFP_KERNEL);
-	if (!kscandat) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	kscandat = devm_kzalloc(&pdev->dev, sizeof(*kscandat),
+				GFP_KERNEL);
+	if (!kscandat)
 		return -ENOMEM;
-	}
 
 	error = lpc32xx_parse_dt(&pdev->dev, kscandat);
 	if (error) {
 		dev_err(&pdev->dev, "failed to parse device tree\n");
-		goto err_free_mem;
+		return error;
 	}
 
 	keymap_size = sizeof(kscandat->keymap[0]) *
 				(kscandat->matrix_sz << kscandat->row_shift);
-	kscandat->keymap = kzalloc(keymap_size, GFP_KERNEL);
-	if (!kscandat->keymap) {
-		dev_err(&pdev->dev, "could not allocate memory for keymap\n");
-		error = -ENOMEM;
-		goto err_free_mem;
-	}
+	kscandat->keymap = devm_kzalloc(&pdev->dev, keymap_size, GFP_KERNEL);
+	if (!kscandat->keymap)
+		return -ENOMEM;
 
-	kscandat->input = input = input_allocate_device();
+	kscandat->input = input = devm_input_allocate_device(&pdev->dev);
 	if (!input) {
 		dev_err(&pdev->dev, "failed to allocate input device\n");
-		error = -ENOMEM;
-		goto err_free_keymap;
+		return -ENOMEM;
 	}
 
 	/* Setup key input */
@@ -234,39 +228,26 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 					   kscandat->keymap, kscandat->input);
 	if (error) {
 		dev_err(&pdev->dev, "failed to build keymap\n");
-		goto err_free_input;
+		return error;
 	}
 
 	input_set_drvdata(kscandat->input, kscandat);
 
-	kscandat->iores = request_mem_region(res->start, resource_size(res),
-					     pdev->name);
-	if (!kscandat->iores) {
-		dev_err(&pdev->dev, "failed to request I/O memory\n");
-		error = -EBUSY;
-		goto err_free_input;
-	}
-
-	kscandat->kscan_base = ioremap(kscandat->iores->start,
-				       resource_size(kscandat->iores));
-	if (!kscandat->kscan_base) {
-		dev_err(&pdev->dev, "failed to remap I/O memory\n");
-		error = -EBUSY;
-		goto err_release_memregion;
-	}
+	kscandat->kscan_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(kscandat->kscan_base))
+		return PTR_ERR(kscandat->kscan_base);
 
 	/* Get the key scanner clock */
-	kscandat->clk = clk_get(&pdev->dev, NULL);
+	kscandat->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(kscandat->clk)) {
 		dev_err(&pdev->dev, "failed to get clock\n");
-		error = PTR_ERR(kscandat->clk);
-		goto err_unmap;
+		return PTR_ERR(kscandat->clk);
 	}
 
 	/* Configure the key scanner */
 	error = clk_prepare_enable(kscandat->clk);
 	if (error)
-		goto err_clk_put;
+		return error;
 
 	writel(kscandat->deb_clks, LPC32XX_KS_DEB(kscandat->kscan_base));
 	writel(kscandat->scan_delay, LPC32XX_KS_SCAN_CTL(kscandat->kscan_base));
@@ -277,52 +258,20 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 	writel(1, LPC32XX_KS_IRQ(kscandat->kscan_base));
 	clk_disable_unprepare(kscandat->clk);
 
-	error = request_irq(irq, lpc32xx_kscan_irq, 0, pdev->name, kscandat);
+	error = devm_request_irq(&pdev->dev, irq, lpc32xx_kscan_irq, 0,
+				 pdev->name, kscandat);
 	if (error) {
 		dev_err(&pdev->dev, "failed to request irq\n");
-		goto err_clk_put;
+		return error;
 	}
 
 	error = input_register_device(kscandat->input);
 	if (error) {
 		dev_err(&pdev->dev, "failed to register input device\n");
-		goto err_free_irq;
+		return error;
 	}
 
 	platform_set_drvdata(pdev, kscandat);
-	return 0;
-
-err_free_irq:
-	free_irq(irq, kscandat);
-err_clk_put:
-	clk_put(kscandat->clk);
-err_unmap:
-	iounmap(kscandat->kscan_base);
-err_release_memregion:
-	release_mem_region(kscandat->iores->start,
-			   resource_size(kscandat->iores));
-err_free_input:
-	input_free_device(kscandat->input);
-err_free_keymap:
-	kfree(kscandat->keymap);
-err_free_mem:
-	kfree(kscandat);
-
-	return error;
-}
-
-static int lpc32xx_kscan_remove(struct platform_device *pdev)
-{
-	struct lpc32xx_kscan_drv *kscandat = platform_get_drvdata(pdev);
-
-	free_irq(platform_get_irq(pdev, 0), kscandat);
-	clk_put(kscandat->clk);
-	iounmap(kscandat->kscan_base);
-	release_mem_region(kscandat->iores->start,
-			   resource_size(kscandat->iores));
-	input_unregister_device(kscandat->input);
-	kfree(kscandat->keymap);
-	kfree(kscandat);
 
 	return 0;
 }
@@ -378,7 +327,6 @@ MODULE_DEVICE_TABLE(of, lpc32xx_kscan_match);
 
 static struct platform_driver lpc32xx_kscan_driver = {
 	.probe		= lpc32xx_kscan_probe,
-	.remove		= lpc32xx_kscan_remove,
 	.driver		= {
 		.name	= DRV_NAME,
 		.pm	= &lpc32xx_kscan_pm_ops,
diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c
index 009c822..3aa2ec4 100644
--- a/drivers/input/keyboard/mpr121_touchkey.c
+++ b/drivers/input/keyboard/mpr121_touchkey.c
@@ -214,13 +214,14 @@ static int mpr_touchkey_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
-	mpr121 = kzalloc(sizeof(struct mpr121_touchkey), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!mpr121 || !input_dev) {
-		dev_err(&client->dev, "Failed to allocate memory\n");
-		error = -ENOMEM;
-		goto err_free_mem;
-	}
+	mpr121 = devm_kzalloc(&client->dev, sizeof(*mpr121),
+			      GFP_KERNEL);
+	if (!mpr121)
+		return -ENOMEM;
+
+	input_dev = devm_input_allocate_device(&client->dev);
+	if (!input_dev)
+		return -ENOMEM;
 
 	mpr121->client = client;
 	mpr121->input_dev = input_dev;
@@ -243,44 +244,26 @@ static int mpr_touchkey_probe(struct i2c_client *client,
 	error = mpr121_phys_init(pdata, mpr121, client);
 	if (error) {
 		dev_err(&client->dev, "Failed to init register\n");
-		goto err_free_mem;
+		return error;
 	}
 
-	error = request_threaded_irq(client->irq, NULL,
+	error = devm_request_threaded_irq(&client->dev, client->irq, NULL,
 				     mpr_touchkey_interrupt,
 				     IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				     client->dev.driver->name, mpr121);
 	if (error) {
 		dev_err(&client->dev, "Failed to register interrupt\n");
-		goto err_free_mem;
+		return error;
 	}
 
 	error = input_register_device(input_dev);
 	if (error)
-		goto err_free_irq;
+		return error;
 
 	i2c_set_clientdata(client, mpr121);
 	device_init_wakeup(&client->dev, pdata->wakeup);
 
 	return 0;
-
-err_free_irq:
-	free_irq(client->irq, mpr121);
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(mpr121);
-	return error;
-}
-
-static int mpr_touchkey_remove(struct i2c_client *client)
-{
-	struct mpr121_touchkey *mpr121 = i2c_get_clientdata(client);
-
-	free_irq(client->irq, mpr121);
-	input_unregister_device(mpr121->input_dev);
-	kfree(mpr121);
-
-	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -327,7 +310,6 @@ static struct i2c_driver mpr_touchkey_driver = {
 	},
 	.id_table	= mpr121_id,
 	.probe		= mpr_touchkey_probe,
-	.remove		= mpr_touchkey_remove,
 };
 
 module_i2c_driver(mpr_touchkey_driver);
diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c
index 6ab3e7c..a90d6bd 100644
--- a/drivers/input/keyboard/pxa27x_keypad.c
+++ b/drivers/input/keyboard/pxa27x_keypad.c
@@ -741,37 +741,27 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	keypad = kzalloc(sizeof(struct pxa27x_keypad), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!keypad || !input_dev) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
-		error = -ENOMEM;
-		goto failed_free;
-	}
+	keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad),
+			      GFP_KERNEL);
+	if (!keypad)
+		return -ENOMEM;
+
+	input_dev = devm_input_allocate_device(&pdev->dev);
+	if (!input_dev)
+		return -ENOMEM;
 
 	keypad->pdata = pdata;
 	keypad->input_dev = input_dev;
 	keypad->irq = irq;
 
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "failed to request I/O memory\n");
-		error = -EBUSY;
-		goto failed_free;
-	}
-
-	keypad->mmio_base = ioremap(res->start, resource_size(res));
-	if (keypad->mmio_base == NULL) {
-		dev_err(&pdev->dev, "failed to remap I/O memory\n");
-		error = -ENXIO;
-		goto failed_free_mem;
-	}
+	keypad->mmio_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(keypad->mmio_base))
+		return PTR_ERR(keypad->mmio_base);
 
-	keypad->clk = clk_get(&pdev->dev, NULL);
+	keypad->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(keypad->clk)) {
 		dev_err(&pdev->dev, "failed to get keypad clock\n");
-		error = PTR_ERR(keypad->clk);
-		goto failed_free_io;
+		return PTR_ERR(keypad->clk);
 	}
 
 	input_dev->name = pdev->name;
@@ -802,7 +792,7 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
 	}
 	if (error) {
 		dev_err(&pdev->dev, "failed to build keycode\n");
-		goto failed_put_clk;
+		return error;
 	}
 
 	keypad->row_shift = get_count_order(pdata->matrix_key_cols);
@@ -812,61 +802,26 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
 		input_dev->evbit[0] |= BIT_MASK(EV_REL);
 	}
 
-	error = request_irq(irq, pxa27x_keypad_irq_handler, 0,
-			    pdev->name, keypad);
+	error = devm_request_irq(&pdev->dev, irq, pxa27x_keypad_irq_handler,
+				 0, pdev->name, keypad);
 	if (error) {
 		dev_err(&pdev->dev, "failed to request IRQ\n");
-		goto failed_put_clk;
+		return error;
 	}
 
 	/* Register the input device */
 	error = input_register_device(input_dev);
 	if (error) {
 		dev_err(&pdev->dev, "failed to register input device\n");
-		goto failed_free_irq;
+		return error;
 	}
 
 	platform_set_drvdata(pdev, keypad);
 	device_init_wakeup(&pdev->dev, 1);
 
 	return 0;
-
-failed_free_irq:
-	free_irq(irq, keypad);
-failed_put_clk:
-	clk_put(keypad->clk);
-failed_free_io:
-	iounmap(keypad->mmio_base);
-failed_free_mem:
-	release_mem_region(res->start, resource_size(res));
-failed_free:
-	input_free_device(input_dev);
-	kfree(keypad);
-	return error;
 }
 
-static int pxa27x_keypad_remove(struct platform_device *pdev)
-{
-	struct pxa27x_keypad *keypad = platform_get_drvdata(pdev);
-	struct resource *res;
-
-	free_irq(keypad->irq, keypad);
-	clk_put(keypad->clk);
-
-	input_unregister_device(keypad->input_dev);
-	iounmap(keypad->mmio_base);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(res->start, resource_size(res));
-
-	kfree(keypad);
-
-	return 0;
-}
-
-/* work with hotplug and coldplug */
-MODULE_ALIAS("platform:pxa27x-keypad");
-
 #ifdef CONFIG_OF
 static const struct of_device_id pxa27x_keypad_dt_match[] = {
 	{ .compatible = "marvell,pxa27x-keypad" },
@@ -877,7 +832,6 @@ MODULE_DEVICE_TABLE(of, pxa27x_keypad_dt_match);
 
 static struct platform_driver pxa27x_keypad_driver = {
 	.probe		= pxa27x_keypad_probe,
-	.remove		= pxa27x_keypad_remove,
 	.driver		= {
 		.name	= "pxa27x-keypad",
 		.of_match_table = of_match_ptr(pxa27x_keypad_dt_match),
@@ -888,3 +842,5 @@ module_platform_driver(pxa27x_keypad_driver);
 
 MODULE_DESCRIPTION("PXA27x Keypad Controller Driver");
 MODULE_LICENSE("GPL");
+/* work with hotplug and coldplug */
+MODULE_ALIAS("platform:pxa27x-keypad");
diff --git a/drivers/input/misc/88pm860x_onkey.c b/drivers/input/misc/88pm860x_onkey.c
index cfdca6e..cc87443 100644
--- a/drivers/input/misc/88pm860x_onkey.c
+++ b/drivers/input/misc/88pm860x_onkey.c
@@ -112,8 +112,7 @@ static int pm860x_onkey_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pm860x_onkey_suspend(struct device *dev)
+static int __maybe_unused pm860x_onkey_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
@@ -122,7 +121,7 @@ static int pm860x_onkey_suspend(struct device *dev)
 		chip->wakeup_flag |= 1 << PM8607_IRQ_ONKEY;
 	return 0;
 }
-static int pm860x_onkey_resume(struct device *dev)
+static int __maybe_unused pm860x_onkey_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
@@ -131,7 +130,6 @@ static int pm860x_onkey_resume(struct device *dev)
 		chip->wakeup_flag &= ~(1 << PM8607_IRQ_ONKEY);
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(pm860x_onkey_pm_ops, pm860x_onkey_suspend, pm860x_onkey_resume);
 
diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c
index e0f5225..189bdc8 100644
--- a/drivers/input/misc/ad714x-i2c.c
+++ b/drivers/input/misc/ad714x-i2c.c
@@ -13,17 +13,15 @@
 #include <linux/pm.h>
 #include "ad714x.h"
 
-#ifdef CONFIG_PM_SLEEP
-static int ad714x_i2c_suspend(struct device *dev)
+static int __maybe_unused ad714x_i2c_suspend(struct device *dev)
 {
 	return ad714x_disable(i2c_get_clientdata(to_i2c_client(dev)));
 }
 
-static int ad714x_i2c_resume(struct device *dev)
+static int __maybe_unused ad714x_i2c_resume(struct device *dev)
 {
 	return ad714x_enable(i2c_get_clientdata(to_i2c_client(dev)));
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume);
 
diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c
index 3a90b710..a79e50b 100644
--- a/drivers/input/misc/ad714x-spi.c
+++ b/drivers/input/misc/ad714x-spi.c
@@ -16,17 +16,15 @@
 #define AD714x_SPI_CMD_PREFIX      0xE000   /* bits 15:11 */
 #define AD714x_SPI_READ            BIT(10)
 
-#ifdef CONFIG_PM_SLEEP
-static int ad714x_spi_suspend(struct device *dev)
+static int __maybe_unused ad714x_spi_suspend(struct device *dev)
 {
 	return ad714x_disable(spi_get_drvdata(to_spi_device(dev)));
 }
 
-static int ad714x_spi_resume(struct device *dev)
+static int __maybe_unused ad714x_spi_resume(struct device *dev)
 {
 	return ad714x_enable(spi_get_drvdata(to_spi_device(dev)));
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume);
 
diff --git a/drivers/input/misc/adxl34x-i2c.c b/drivers/input/misc/adxl34x-i2c.c
index 416f47d..470bfd6 100644
--- a/drivers/input/misc/adxl34x-i2c.c
+++ b/drivers/input/misc/adxl34x-i2c.c
@@ -105,8 +105,7 @@ static int adxl34x_i2c_remove(struct i2c_client *client)
 	return adxl34x_remove(ac);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int adxl34x_i2c_suspend(struct device *dev)
+static int __maybe_unused adxl34x_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct adxl34x *ac = i2c_get_clientdata(client);
@@ -116,7 +115,7 @@ static int adxl34x_i2c_suspend(struct device *dev)
 	return 0;
 }
 
-static int adxl34x_i2c_resume(struct device *dev)
+static int __maybe_unused adxl34x_i2c_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct adxl34x *ac = i2c_get_clientdata(client);
@@ -125,7 +124,6 @@ static int adxl34x_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(adxl34x_i2c_pm, adxl34x_i2c_suspend,
 			 adxl34x_i2c_resume);
diff --git a/drivers/input/misc/adxl34x-spi.c b/drivers/input/misc/adxl34x-spi.c
index 76dc067..da6e76b 100644
--- a/drivers/input/misc/adxl34x-spi.c
+++ b/drivers/input/misc/adxl34x-spi.c
@@ -94,8 +94,7 @@ static int adxl34x_spi_remove(struct spi_device *spi)
 	return adxl34x_remove(ac);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int adxl34x_spi_suspend(struct device *dev)
+static int __maybe_unused adxl34x_spi_suspend(struct device *dev)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct adxl34x *ac = spi_get_drvdata(spi);
@@ -105,7 +104,7 @@ static int adxl34x_spi_suspend(struct device *dev)
 	return 0;
 }
 
-static int adxl34x_spi_resume(struct device *dev)
+static int __maybe_unused adxl34x_spi_resume(struct device *dev)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct adxl34x *ac = spi_get_drvdata(spi);
@@ -114,7 +113,6 @@ static int adxl34x_spi_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(adxl34x_spi_pm, adxl34x_spi_suspend,
 			 adxl34x_spi_resume);
diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c
index cab87f5..a364e10 100644
--- a/drivers/input/misc/drv260x.c
+++ b/drivers/input/misc/drv260x.c
@@ -639,8 +639,7 @@ static int drv260x_probe(struct i2c_client *client,
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int drv260x_suspend(struct device *dev)
+static int __maybe_unused drv260x_suspend(struct device *dev)
 {
 	struct drv260x_data *haptics = dev_get_drvdata(dev);
 	int ret = 0;
@@ -672,7 +671,7 @@ out:
 	return ret;
 }
 
-static int drv260x_resume(struct device *dev)
+static int __maybe_unused drv260x_resume(struct device *dev)
 {
 	struct drv260x_data *haptics = dev_get_drvdata(dev);
 	int ret = 0;
@@ -702,7 +701,6 @@ out:
 	mutex_unlock(&haptics->input_dev->mutex);
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(drv260x_pm_ops, drv260x_suspend, drv260x_resume);
 
diff --git a/drivers/input/misc/drv2667.c b/drivers/input/misc/drv2667.c
index 0f43758..a021744 100644
--- a/drivers/input/misc/drv2667.c
+++ b/drivers/input/misc/drv2667.c
@@ -406,8 +406,7 @@ static int drv2667_probe(struct i2c_client *client,
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int drv2667_suspend(struct device *dev)
+static int __maybe_unused drv2667_suspend(struct device *dev)
 {
 	struct drv2667_data *haptics = dev_get_drvdata(dev);
 	int ret = 0;
@@ -436,7 +435,7 @@ out:
 	return ret;
 }
 
-static int drv2667_resume(struct device *dev)
+static int __maybe_unused drv2667_resume(struct device *dev)
 {
 	struct drv2667_data *haptics = dev_get_drvdata(dev);
 	int ret = 0;
@@ -464,7 +463,6 @@ out:
 	mutex_unlock(&haptics->input_dev->mutex);
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(drv2667_pm_ops, drv2667_suspend, drv2667_resume);
 
diff --git a/drivers/input/misc/gp2ap002a00f.c b/drivers/input/misc/gp2ap002a00f.c
index de21e31..0ac176d 100644
--- a/drivers/input/misc/gp2ap002a00f.c
+++ b/drivers/input/misc/gp2ap002a00f.c
@@ -225,8 +225,7 @@ static int gp2a_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int gp2a_suspend(struct device *dev)
+static int __maybe_unused gp2a_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct gp2a_data *dt = i2c_get_clientdata(client);
@@ -244,7 +243,7 @@ static int gp2a_suspend(struct device *dev)
 	return retval;
 }
 
-static int gp2a_resume(struct device *dev)
+static int __maybe_unused gp2a_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct gp2a_data *dt = i2c_get_clientdata(client);
@@ -261,7 +260,6 @@ static int gp2a_resume(struct device *dev)
 
 	return retval;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(gp2a_pm, gp2a_suspend, gp2a_resume);
 
diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index afed8e2..ac1fa5f 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1851,7 +1851,7 @@ static int ims_pcu_identify_type(struct ims_pcu *pcu, u8 *device_id)
 
 static int ims_pcu_init_application_mode(struct ims_pcu *pcu)
 {
-	static atomic_t device_no = ATOMIC_INIT(0);
+	static atomic_t device_no = ATOMIC_INIT(-1);
 
 	const struct ims_pcu_device_info *info;
 	int error;
@@ -1882,7 +1882,7 @@ static int ims_pcu_init_application_mode(struct ims_pcu *pcu)
 	}
 
 	/* Device appears to be operable, complete initialization */
-	pcu->device_no = atomic_inc_return(&device_no) - 1;
+	pcu->device_no = atomic_inc_return(&device_no);
 
 	/*
 	 * PCU-B devices, both GEN_1 and GEN_2 do not have OFN sensor
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
index d708478..6e29349 100644
--- a/drivers/input/misc/kxtj9.c
+++ b/drivers/input/misc/kxtj9.c
@@ -615,8 +615,7 @@ static int kxtj9_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int kxtj9_suspend(struct device *dev)
+static int __maybe_unused kxtj9_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
@@ -631,7 +630,7 @@ static int kxtj9_suspend(struct device *dev)
 	return 0;
 }
 
-static int kxtj9_resume(struct device *dev)
+static int __maybe_unused kxtj9_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct kxtj9_data *tj9 = i2c_get_clientdata(client);
@@ -646,7 +645,6 @@ static int kxtj9_resume(struct device *dev)
 	mutex_unlock(&input_dev->mutex);
 	return retval;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume);
 
diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c
index 034093e..39e930c 100644
--- a/drivers/input/misc/max77693-haptic.c
+++ b/drivers/input/misc/max77693-haptic.c
@@ -309,8 +309,7 @@ static int max77693_haptic_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int max77693_haptic_suspend(struct device *dev)
+static int __maybe_unused max77693_haptic_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max77693_haptic *haptic = platform_get_drvdata(pdev);
@@ -323,7 +322,7 @@ static int max77693_haptic_suspend(struct device *dev)
 	return 0;
 }
 
-static int max77693_haptic_resume(struct device *dev)
+static int __maybe_unused max77693_haptic_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max77693_haptic *haptic = platform_get_drvdata(pdev);
@@ -335,7 +334,6 @@ static int max77693_haptic_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(max77693_haptic_pm_ops,
 			 max77693_haptic_suspend, max77693_haptic_resume);
diff --git a/drivers/input/misc/max8925_onkey.c b/drivers/input/misc/max8925_onkey.c
index 297e2a9..7c49b8d 100644
--- a/drivers/input/misc/max8925_onkey.c
+++ b/drivers/input/misc/max8925_onkey.c
@@ -133,8 +133,7 @@ static int max8925_onkey_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int max8925_onkey_suspend(struct device *dev)
+static int __maybe_unused max8925_onkey_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max8925_onkey_info *info = platform_get_drvdata(pdev);
@@ -148,7 +147,7 @@ static int max8925_onkey_suspend(struct device *dev)
 	return 0;
 }
 
-static int max8925_onkey_resume(struct device *dev)
+static int __maybe_unused max8925_onkey_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max8925_onkey_info *info = platform_get_drvdata(pdev);
@@ -161,7 +160,6 @@ static int max8925_onkey_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(max8925_onkey_pm_ops, max8925_onkey_suspend, max8925_onkey_resume);
 
diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c
index 5b3154e..d0f6872 100644
--- a/drivers/input/misc/max8997_haptic.c
+++ b/drivers/input/misc/max8997_haptic.c
@@ -378,8 +378,7 @@ static int max8997_haptic_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int max8997_haptic_suspend(struct device *dev)
+static int __maybe_unused max8997_haptic_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct max8997_haptic *chip = platform_get_drvdata(pdev);
@@ -388,7 +387,6 @@ static int max8997_haptic_suspend(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(max8997_haptic_pm_ops, max8997_haptic_suspend, NULL);
 
diff --git a/drivers/input/misc/palmas-pwrbutton.c b/drivers/input/misc/palmas-pwrbutton.c
index 066c5ab..1f9b5ee 100644
--- a/drivers/input/misc/palmas-pwrbutton.c
+++ b/drivers/input/misc/palmas-pwrbutton.c
@@ -260,7 +260,6 @@ static int palmas_pwron_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
 /**
  * palmas_pwron_suspend() - suspend handler
  * @dev:	power button device
@@ -269,7 +268,7 @@ static int palmas_pwron_remove(struct platform_device *pdev)
  *
  * Return: 0
  */
-static int palmas_pwron_suspend(struct device *dev)
+static int __maybe_unused palmas_pwron_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct palmas_pwron *pwron = platform_get_drvdata(pdev);
@@ -290,7 +289,7 @@ static int palmas_pwron_suspend(struct device *dev)
  *
  * Return: 0
  */
-static int palmas_pwron_resume(struct device *dev)
+static int __maybe_unused palmas_pwron_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct palmas_pwron *pwron = platform_get_drvdata(pdev);
@@ -300,7 +299,6 @@ static int palmas_pwron_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(palmas_pwron_pm,
 			 palmas_pwron_suspend, palmas_pwron_resume);
diff --git a/drivers/input/misc/pm8xxx-vibrator.c b/drivers/input/misc/pm8xxx-vibrator.c
index e9c77a9..5113877 100644
--- a/drivers/input/misc/pm8xxx-vibrator.c
+++ b/drivers/input/misc/pm8xxx-vibrator.c
@@ -199,8 +199,7 @@ static int pm8xxx_vib_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pm8xxx_vib_suspend(struct device *dev)
+static int __maybe_unused pm8xxx_vib_suspend(struct device *dev)
 {
 	struct pm8xxx_vib *vib = dev_get_drvdata(dev);
 
@@ -209,7 +208,6 @@ static int pm8xxx_vib_suspend(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(pm8xxx_vib_pm_ops, pm8xxx_vib_suspend, NULL);
 
diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c
index cb79917..c4ca20e 100644
--- a/drivers/input/misc/pmic8xxx-pwrkey.c
+++ b/drivers/input/misc/pmic8xxx-pwrkey.c
@@ -53,8 +53,7 @@ static irqreturn_t pwrkey_release_irq(int irq, void *_pwr)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pmic8xxx_pwrkey_suspend(struct device *dev)
+static int __maybe_unused pmic8xxx_pwrkey_suspend(struct device *dev)
 {
 	struct pmic8xxx_pwrkey *pwrkey = dev_get_drvdata(dev);
 
@@ -64,7 +63,7 @@ static int pmic8xxx_pwrkey_suspend(struct device *dev)
 	return 0;
 }
 
-static int pmic8xxx_pwrkey_resume(struct device *dev)
+static int __maybe_unused pmic8xxx_pwrkey_resume(struct device *dev)
 {
 	struct pmic8xxx_pwrkey *pwrkey = dev_get_drvdata(dev);
 
@@ -73,7 +72,6 @@ static int pmic8xxx_pwrkey_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(pm8xxx_pwr_key_pm_ops,
 		pmic8xxx_pwrkey_suspend, pmic8xxx_pwrkey_resume);
diff --git a/drivers/input/misc/pwm-beeper.c b/drivers/input/misc/pwm-beeper.c
index 294aa48..a28ee70 100644
--- a/drivers/input/misc/pwm-beeper.c
+++ b/drivers/input/misc/pwm-beeper.c
@@ -144,8 +144,7 @@ static int pwm_beeper_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pwm_beeper_suspend(struct device *dev)
+static int __maybe_unused pwm_beeper_suspend(struct device *dev)
 {
 	struct pwm_beeper *beeper = dev_get_drvdata(dev);
 
@@ -155,7 +154,7 @@ static int pwm_beeper_suspend(struct device *dev)
 	return 0;
 }
 
-static int pwm_beeper_resume(struct device *dev)
+static int __maybe_unused pwm_beeper_resume(struct device *dev)
 {
 	struct pwm_beeper *beeper = dev_get_drvdata(dev);
 
@@ -170,6 +169,7 @@ static int pwm_beeper_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(pwm_beeper_pm_ops,
 			 pwm_beeper_suspend, pwm_beeper_resume);
 
+#ifdef CONFIG_PM_SLEEP
 #define PWM_BEEPER_PM_OPS (&pwm_beeper_pm_ops)
 #else
 #define PWM_BEEPER_PM_OPS NULL
diff --git a/drivers/input/misc/sirfsoc-onkey.c b/drivers/input/misc/sirfsoc-onkey.c
index 4faf9f8d..9d5b89b 100644
--- a/drivers/input/misc/sirfsoc-onkey.c
+++ b/drivers/input/misc/sirfsoc-onkey.c
@@ -179,8 +179,7 @@ static int sirfsoc_pwrc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int sirfsoc_pwrc_resume(struct device *dev)
+static int __maybe_unused sirfsoc_pwrc_resume(struct device *dev)
 {
 	struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_get_drvdata(dev);
 	struct input_dev *input = pwrcdrv->input;
@@ -196,7 +195,6 @@ static int sirfsoc_pwrc_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(sirfsoc_pwrc_pm_ops, NULL, sirfsoc_pwrc_resume);
 
diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c
index ccd6dd1..fc17b95 100644
--- a/drivers/input/misc/twl4030-vibra.c
+++ b/drivers/input/misc/twl4030-vibra.c
@@ -157,8 +157,7 @@ static void twl4030_vibra_close(struct input_dev *input)
 }
 
 /*** Module ***/
-#ifdef CONFIG_PM_SLEEP
-static int twl4030_vibra_suspend(struct device *dev)
+static int __maybe_unused twl4030_vibra_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct vibra_info *info = platform_get_drvdata(pdev);
@@ -169,12 +168,11 @@ static int twl4030_vibra_suspend(struct device *dev)
 	return 0;
 }
 
-static int twl4030_vibra_resume(struct device *dev)
+static int __maybe_unused twl4030_vibra_resume(struct device *dev)
 {
 	vibra_disable_leds();
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops,
 			 twl4030_vibra_suspend, twl4030_vibra_resume);
diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index 96e0e0c..0e0d094 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c
@@ -236,8 +236,7 @@ static void twl6040_vibra_close(struct input_dev *input)
 	mutex_unlock(&info->mutex);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int twl6040_vibra_suspend(struct device *dev)
+static int __maybe_unused twl6040_vibra_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct vibra_info *info = platform_get_drvdata(pdev);
@@ -251,7 +250,6 @@ static int twl6040_vibra_suspend(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(twl6040_vibra_pm_ops, twl6040_vibra_suspend, NULL);
 
diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index 366fc7a..d8b46b0 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -215,6 +215,36 @@ config MOUSE_CYAPA
 	  To compile this driver as a module, choose M here: the module will be
 	  called cyapa.
 
+config MOUSE_ELAN_I2C
+	tristate "ELAN I2C Touchpad support"
+	depends on I2C
+	help
+	  This driver adds support for Elan I2C/SMbus Trackpads.
+
+	  Say Y here if you have a ELAN I2C/SMbus Touchpad.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called elan_i2c.
+
+config MOUSE_ELAN_I2C_I2C
+	bool "Enable I2C support"
+	depends on MOUSE_ELAN_I2C
+	default y
+	help
+	   Say Y here if Elan Touchpad in your system is connected to
+	   a standard I2C controller.
+
+	   If unsure, say Y.
+
+config MOUSE_ELAN_I2C_SMBUS
+	bool "Enable SMbus support"
+	depends on MOUSE_ELAN_I2C
+	help
+	   Say Y here if Elan Touchpad in your system is connected to
+	   a SMbus adapter.
+
+	   If unsure, say Y.
+
 config MOUSE_INPORT
 	tristate "InPort/MS/ATIXL busmouse"
 	depends on ISA
diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile
index dda507f..560003d 100644
--- a/drivers/input/mouse/Makefile
+++ b/drivers/input/mouse/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_MOUSE_APPLETOUCH)		+= appletouch.o
 obj-$(CONFIG_MOUSE_ATARI)		+= atarimouse.o
 obj-$(CONFIG_MOUSE_BCM5974)		+= bcm5974.o
 obj-$(CONFIG_MOUSE_CYAPA)		+= cyapa.o
+obj-$(CONFIG_MOUSE_ELAN_I2C)		+= elan_i2c.o
 obj-$(CONFIG_MOUSE_GPIO)		+= gpio_mouse.o
 obj-$(CONFIG_MOUSE_INPORT)		+= inport.o
 obj-$(CONFIG_MOUSE_LOGIBM)		+= logibm.o
@@ -34,3 +35,7 @@ psmouse-$(CONFIG_MOUSE_PS2_SENTELIC)	+= sentelic.o
 psmouse-$(CONFIG_MOUSE_PS2_TRACKPOINT)	+= trackpoint.o
 psmouse-$(CONFIG_MOUSE_PS2_TOUCHKIT)	+= touchkit_ps2.o
 psmouse-$(CONFIG_MOUSE_PS2_CYPRESS)	+= cypress_ps2.o
+
+elan_i2c-objs := elan_i2c_core.o
+elan_i2c-$(CONFIG_MOUSE_ELAN_I2C_I2C)	+= elan_i2c_i2c.o
+elan_i2c-$(CONFIG_MOUSE_ELAN_I2C_SMBUS)	+= elan_i2c_smbus.o
diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c
index b409c3d..1bece8c 100644
--- a/drivers/input/mouse/cyapa.c
+++ b/drivers/input/mouse/cyapa.c
@@ -6,7 +6,7 @@
  *   Daniel Kurtz <djkurtz@chromium.org>
  *   Benson Leung <bleung@chromium.org>
  *
- * Copyright (C) 2011-2012 Cypress Semiconductor, Inc.
+ * Copyright (C) 2011-2014 Cypress Semiconductor, Inc.
  * Copyright (C) 2011-2012 Google, Inc.
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -206,7 +206,6 @@ struct cyapa {
 	struct i2c_client *client;
 	struct input_dev *input;
 	char phys[32];	/* device physical location */
-	int irq;
 	bool irq_wake;  /* irq wake is enabled */
 	bool smbus;
 
@@ -422,8 +421,8 @@ static ssize_t cyapa_read_block(struct cyapa *cyapa, u8 cmd_idx, u8 *values)
  */
 static int cyapa_get_state(struct cyapa *cyapa)
 {
-	int ret;
 	u8 status[BL_STATUS_SIZE];
+	int error;
 
 	cyapa->state = CYAPA_STATE_NO_DEVICE;
 
@@ -433,18 +432,18 @@ static int cyapa_get_state(struct cyapa *cyapa)
 	 * If the device is in operation mode, this will be the DATA regs.
 	 *
 	 */
-	ret = cyapa_i2c_reg_read_block(cyapa, BL_HEAD_OFFSET, BL_STATUS_SIZE,
-				       status);
+	error = cyapa_i2c_reg_read_block(cyapa, BL_HEAD_OFFSET, BL_STATUS_SIZE,
+				         status);
 
 	/*
 	 * On smbus systems in OP mode, the i2c_reg_read will fail with
 	 * -ETIMEDOUT.  In this case, try again using the smbus equivalent
 	 * command.  This should return a BL_HEAD indicating CYAPA_STATE_OP.
 	 */
-	if (cyapa->smbus && (ret == -ETIMEDOUT || ret == -ENXIO))
-		ret = cyapa_read_block(cyapa, CYAPA_CMD_BL_STATUS, status);
+	if (cyapa->smbus && (error == -ETIMEDOUT || error == -ENXIO))
+		error = cyapa_read_block(cyapa, CYAPA_CMD_BL_STATUS, status);
 
-	if (ret != BL_STATUS_SIZE)
+	if (error != BL_STATUS_SIZE)
 		goto error;
 
 	if ((status[REG_OP_STATUS] & OP_STATUS_SRC) == OP_STATUS_SRC) {
@@ -454,7 +453,7 @@ static int cyapa_get_state(struct cyapa *cyapa)
 			cyapa->state = CYAPA_STATE_OP;
 			break;
 		default:
-			ret = -EAGAIN;
+			error = -EAGAIN;
 			goto error;
 		}
 	} else {
@@ -468,7 +467,7 @@ static int cyapa_get_state(struct cyapa *cyapa)
 
 	return 0;
 error:
-	return (ret < 0) ? ret : -EAGAIN;
+	return (error < 0) ? error : -EAGAIN;
 }
 
 /*
@@ -487,31 +486,31 @@ error:
  */
 static int cyapa_poll_state(struct cyapa *cyapa, unsigned int timeout)
 {
-	int ret;
+	int error;
 	int tries = timeout / 100;
 
-	ret = cyapa_get_state(cyapa);
-	while ((ret || cyapa->state >= CYAPA_STATE_BL_BUSY) && tries--) {
+	error = cyapa_get_state(cyapa);
+	while ((error || cyapa->state >= CYAPA_STATE_BL_BUSY) && tries--) {
 		msleep(100);
-		ret = cyapa_get_state(cyapa);
+		error = cyapa_get_state(cyapa);
 	}
-	return (ret == -EAGAIN || ret == -ETIMEDOUT) ? -ETIMEDOUT : ret;
+	return (error == -EAGAIN || error == -ETIMEDOUT) ? -ETIMEDOUT : error;
 }
 
 static int cyapa_bl_deactivate(struct cyapa *cyapa)
 {
-	int ret;
+	int error;
 
-	ret = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_deactivate),
-					bl_deactivate);
-	if (ret < 0)
-		return ret;
+	error = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_deactivate),
+					  bl_deactivate);
+	if (error)
+		return error;
 
 	/* wait for bootloader to switch to idle state; should take < 100ms */
 	msleep(100);
-	ret = cyapa_poll_state(cyapa, 500);
-	if (ret < 0)
-		return ret;
+	error = cyapa_poll_state(cyapa, 500);
+	if (error)
+		return error;
 	if (cyapa->state != CYAPA_STATE_BL_IDLE)
 		return -EAGAIN;
 	return 0;
@@ -532,11 +531,11 @@ static int cyapa_bl_deactivate(struct cyapa *cyapa)
  */
 static int cyapa_bl_exit(struct cyapa *cyapa)
 {
-	int ret;
+	int error;
 
-	ret = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_exit), bl_exit);
-	if (ret < 0)
-		return ret;
+	error = cyapa_i2c_reg_write_block(cyapa, 0, sizeof(bl_exit), bl_exit);
+	if (error)
+		return error;
 
 	/*
 	 * Wait for bootloader to exit, and operation mode to start.
@@ -548,9 +547,9 @@ static int cyapa_bl_exit(struct cyapa *cyapa)
 	 * updated to new firmware, it must first calibrate its sensors, which
 	 * can take up to an additional 2 seconds.
 	 */
-	ret = cyapa_poll_state(cyapa, 2000);
-	if (ret < 0)
-		return ret;
+	error = cyapa_poll_state(cyapa, 2000);
+	if (error < 0)
+		return error;
 	if (cyapa->state != CYAPA_STATE_OP)
 		return -EAGAIN;
 
@@ -577,10 +576,13 @@ static int cyapa_set_power_mode(struct cyapa *cyapa, u8 power_mode)
 	power = ret & ~PWR_MODE_MASK;
 	power |= power_mode & PWR_MODE_MASK;
 	ret = cyapa_write_byte(cyapa, CYAPA_CMD_POWER_MODE, power);
-	if (ret < 0)
+	if (ret < 0) {
 		dev_err(dev, "failed to set power_mode 0x%02x err = %d\n",
 			power_mode, ret);
-	return ret;
+		return ret;
+	}
+
+	return 0;
 }
 
 static int cyapa_get_query_data(struct cyapa *cyapa)
@@ -637,28 +639,28 @@ static int cyapa_check_is_operational(struct cyapa *cyapa)
 {
 	struct device *dev = &cyapa->client->dev;
 	static const char unique_str[] = "CYTRA";
-	int ret;
+	int error;
 
-	ret = cyapa_poll_state(cyapa, 2000);
-	if (ret < 0)
-		return ret;
+	error = cyapa_poll_state(cyapa, 2000);
+	if (error)
+		return error;
 	switch (cyapa->state) {
 	case CYAPA_STATE_BL_ACTIVE:
-		ret = cyapa_bl_deactivate(cyapa);
-		if (ret)
-			return ret;
+		error = cyapa_bl_deactivate(cyapa);
+		if (error)
+			return error;
 
 	/* Fallthrough state */
 	case CYAPA_STATE_BL_IDLE:
-		ret = cyapa_bl_exit(cyapa);
-		if (ret)
-			return ret;
+		error = cyapa_bl_exit(cyapa);
+		if (error)
+			return error;
 
 	/* Fallthrough state */
 	case CYAPA_STATE_OP:
-		ret = cyapa_get_query_data(cyapa);
-		if (ret < 0)
-			return ret;
+		error = cyapa_get_query_data(cyapa);
+		if (error)
+			return error;
 
 		/* only support firmware protocol gen3 */
 		if (cyapa->gen != CYAPA_GEN3) {
@@ -753,18 +755,42 @@ static u8 cyapa_check_adapter_functionality(struct i2c_client *client)
 	return ret;
 }
 
+static int cyapa_open(struct input_dev *input)
+{
+	struct cyapa *cyapa = input_get_drvdata(input);
+	struct i2c_client *client = cyapa->client;
+	int error;
+
+	error = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE);
+	if (error) {
+		dev_err(&client->dev, "set active power failed: %d\n", error);
+		return error;
+	}
+
+	enable_irq(client->irq);
+	return 0;
+}
+
+static void cyapa_close(struct input_dev *input)
+{
+	struct cyapa *cyapa = input_get_drvdata(input);
+
+	disable_irq(cyapa->client->irq);
+	cyapa_set_power_mode(cyapa, PWR_MODE_OFF);
+}
+
 static int cyapa_create_input_dev(struct cyapa *cyapa)
 {
 	struct device *dev = &cyapa->client->dev;
-	int ret;
 	struct input_dev *input;
+	int error;
 
 	if (!cyapa->physical_size_x || !cyapa->physical_size_y)
 		return -EINVAL;
 
-	input = cyapa->input = input_allocate_device();
+	input = devm_input_allocate_device(dev);
 	if (!input) {
-		dev_err(dev, "allocate memory for input device failed\n");
+		dev_err(dev, "failed to allocate memory for input device.\n");
 		return -ENOMEM;
 	}
 
@@ -772,14 +798,17 @@ static int cyapa_create_input_dev(struct cyapa *cyapa)
 	input->phys = cyapa->phys;
 	input->id.bustype = BUS_I2C;
 	input->id.version = 1;
-	input->id.product = 0;  /* means any product in eventcomm. */
+	input->id.product = 0;  /* Means any product in eventcomm. */
 	input->dev.parent = &cyapa->client->dev;
 
+	input->open = cyapa_open;
+	input->close = cyapa_close;
+
 	input_set_drvdata(input, cyapa);
 
 	__set_bit(EV_ABS, input->evbit);
 
-	/* finger position */
+	/* Finger position */
 	input_set_abs_params(input, ABS_MT_POSITION_X, 0, cyapa->max_abs_x, 0,
 			     0);
 	input_set_abs_params(input, ABS_MT_POSITION_Y, 0, cyapa->max_abs_y, 0,
@@ -801,35 +830,25 @@ static int cyapa_create_input_dev(struct cyapa *cyapa)
 	if (cyapa->btn_capability == CAPABILITY_LEFT_BTN_MASK)
 		__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
 
-	/* handle pointer emulation and unused slots in core */
-	ret = input_mt_init_slots(input, CYAPA_MAX_MT_SLOTS,
-				  INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED);
-	if (ret) {
-		dev_err(dev, "allocate memory for MT slots failed, %d\n", ret);
-		goto err_free_device;
+	/* Handle pointer emulation and unused slots in core */
+	error = input_mt_init_slots(input, CYAPA_MAX_MT_SLOTS,
+				    INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED);
+	if (error) {
+		dev_err(dev, "failed to initialize MT slots: %d\n", error);
+		return error;
 	}
 
-	/* Register the device in input subsystem */
-	ret = input_register_device(input);
-	if (ret) {
-		dev_err(dev, "input device register failed, %d\n", ret);
-		goto err_free_device;
-	}
+	cyapa->input = input;
 	return 0;
-
-err_free_device:
-	input_free_device(input);
-	cyapa->input = NULL;
-	return ret;
 }
 
 static int cyapa_probe(struct i2c_client *client,
 		       const struct i2c_device_id *dev_id)
 {
-	int ret;
-	u8 adapter_func;
-	struct cyapa *cyapa;
 	struct device *dev = &client->dev;
+	struct cyapa *cyapa;
+	u8 adapter_func;
+	int error;
 
 	adapter_func = cyapa_check_adapter_functionality(client);
 	if (adapter_func == CYAPA_ADAPTER_FUNC_NONE) {
@@ -837,11 +856,9 @@ static int cyapa_probe(struct i2c_client *client,
 		return -EIO;
 	}
 
-	cyapa = kzalloc(sizeof(struct cyapa), GFP_KERNEL);
-	if (!cyapa) {
-		dev_err(dev, "allocate memory for cyapa failed\n");
+	cyapa = devm_kzalloc(dev, sizeof(struct cyapa), GFP_KERNEL);
+	if (!cyapa)
 		return -ENOMEM;
-	}
 
 	cyapa->gen = CYAPA_GEN3;
 	cyapa->client = client;
@@ -852,67 +869,61 @@ static int cyapa_probe(struct i2c_client *client,
 	/* i2c isn't supported, use smbus */
 	if (adapter_func == CYAPA_ADAPTER_FUNC_SMBUS)
 		cyapa->smbus = true;
+
 	cyapa->state = CYAPA_STATE_NO_DEVICE;
-	ret = cyapa_check_is_operational(cyapa);
-	if (ret) {
-		dev_err(dev, "device not operational, %d\n", ret);
-		goto err_mem_free;
-	}
 
-	ret = cyapa_create_input_dev(cyapa);
-	if (ret) {
-		dev_err(dev, "create input_dev instance failed, %d\n", ret);
-		goto err_mem_free;
+	error = cyapa_check_is_operational(cyapa);
+	if (error) {
+		dev_err(dev, "device not operational, %d\n", error);
+		return error;
 	}
 
-	ret = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE);
-	if (ret) {
-		dev_err(dev, "set active power failed, %d\n", ret);
-		goto err_unregister_device;
+	/* Power down the device until we need it */
+	error = cyapa_set_power_mode(cyapa, PWR_MODE_OFF);
+	if (error) {
+		dev_err(dev, "failed to quiesce the device: %d\n", error);
+		return error;
 	}
 
-	cyapa->irq = client->irq;
-	ret = request_threaded_irq(cyapa->irq,
-				   NULL,
-				   cyapa_irq,
-				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				   "cyapa",
-				   cyapa);
-	if (ret) {
-		dev_err(dev, "IRQ request failed: %d\n, ", ret);
-		goto err_unregister_device;
+	error = cyapa_create_input_dev(cyapa);
+	if (error)
+		return error;
+
+	error = devm_request_threaded_irq(dev, client->irq,
+					  NULL, cyapa_irq,
+					  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					  "cyapa", cyapa);
+	if (error) {
+		dev_err(dev, "failed to request threaded irq: %d\n", error);
+		return error;
 	}
 
-	return 0;
+	/* Disable IRQ until the device is opened */
+	disable_irq(client->irq);
 
-err_unregister_device:
-	input_unregister_device(cyapa->input);
-err_mem_free:
-	kfree(cyapa);
-
-	return ret;
-}
-
-static int cyapa_remove(struct i2c_client *client)
-{
-	struct cyapa *cyapa = i2c_get_clientdata(client);
-
-	free_irq(cyapa->irq, cyapa);
-	input_unregister_device(cyapa->input);
-	cyapa_set_power_mode(cyapa, PWR_MODE_OFF);
-	kfree(cyapa);
+	/* Register the device in input subsystem */
+	error = input_register_device(cyapa->input);
+	if (error) {
+		dev_err(dev, "failed to register input device: %d\n", error);
+		return error;
+	}
 
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int cyapa_suspend(struct device *dev)
+static int __maybe_unused cyapa_suspend(struct device *dev)
 {
-	int ret;
+	struct i2c_client *client = to_i2c_client(dev);
+	struct cyapa *cyapa = i2c_get_clientdata(client);
+	struct input_dev *input = cyapa->input;
 	u8 power_mode;
-	struct cyapa *cyapa = dev_get_drvdata(dev);
+	int error;
 
-	disable_irq(cyapa->irq);
+	error = mutex_lock_interruptible(&input->mutex);
+	if (error)
+		return error;
+
+	disable_irq(client->irq);
 
 	/*
 	 * Set trackpad device to idle mode if wakeup is allowed,
@@ -920,31 +931,44 @@ static int cyapa_suspend(struct device *dev)
 	 */
 	power_mode = device_may_wakeup(dev) ? PWR_MODE_IDLE
 					    : PWR_MODE_OFF;
-	ret = cyapa_set_power_mode(cyapa, power_mode);
-	if (ret < 0)
-		dev_err(dev, "set power mode failed, %d\n", ret);
+	error = cyapa_set_power_mode(cyapa, power_mode);
+	if (error)
+		dev_err(dev, "resume: set power mode to %d failed: %d\n",
+			 power_mode, error);
 
 	if (device_may_wakeup(dev))
-		cyapa->irq_wake = (enable_irq_wake(cyapa->irq) == 0);
+		cyapa->irq_wake = (enable_irq_wake(client->irq) == 0);
+
+	mutex_unlock(&input->mutex);
+
 	return 0;
 }
 
-static int cyapa_resume(struct device *dev)
+static int __maybe_unused cyapa_resume(struct device *dev)
 {
-	int ret;
-	struct cyapa *cyapa = dev_get_drvdata(dev);
+	struct i2c_client *client = to_i2c_client(dev);
+	struct cyapa *cyapa = i2c_get_clientdata(client);
+	struct input_dev *input = cyapa->input;
+	u8 power_mode;
+	int error;
+
+	mutex_lock(&input->mutex);
 
 	if (device_may_wakeup(dev) && cyapa->irq_wake)
-		disable_irq_wake(cyapa->irq);
+		disable_irq_wake(client->irq);
+
+	power_mode = input->users ? PWR_MODE_FULL_ACTIVE : PWR_MODE_OFF;
+	error = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE);
+	if (error)
+		dev_warn(dev, "resume: set power mode to %d failed: %d\n",
+			 power_mode, error);
+
+	enable_irq(client->irq);
 
-	ret = cyapa_set_power_mode(cyapa, PWR_MODE_FULL_ACTIVE);
-	if (ret)
-		dev_warn(dev, "resume active power failed, %d\n", ret);
+	mutex_unlock(&input->mutex);
 
-	enable_irq(cyapa->irq);
 	return 0;
 }
-#endif /* CONFIG_PM_SLEEP */
 
 static SIMPLE_DEV_PM_OPS(cyapa_pm_ops, cyapa_suspend, cyapa_resume);
 
@@ -962,7 +986,6 @@ static struct i2c_driver cyapa_driver = {
 	},
 
 	.probe = cyapa_probe,
-	.remove = cyapa_remove,
 	.id_table = cyapa_id_table,
 };
 
diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h
new file mode 100644
index 0000000..2e83862
--- /dev/null
+++ b/drivers/input/mouse/elan_i2c.h
@@ -0,0 +1,86 @@
+/*
+ * Elan I2C/SMBus Touchpad driver
+ *
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+ * Version: 1.5.5
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+ * copyright (c) 2011-2012 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Trademarks are the property of their respective owners.
+ */
+
+#ifndef _ELAN_I2C_H
+#define _ELAN_i2C_H
+
+#include <linux/types.h>
+
+#define ETP_ENABLE_ABS		0x0001
+#define ETP_ENABLE_CALIBRATE	0x0002
+#define ETP_DISABLE_CALIBRATE	0x0000
+#define ETP_DISABLE_POWER	0x0001
+
+/* IAP Firmware handling */
+#define ETP_FW_NAME		"elan_i2c.bin"
+#define ETP_IAP_START_ADDR	0x0083
+#define ETP_FW_IAP_PAGE_ERR	(1 << 5)
+#define ETP_FW_IAP_INTF_ERR	(1 << 4)
+#define ETP_FW_PAGE_SIZE	64
+#define ETP_FW_PAGE_COUNT	768
+#define ETP_FW_SIZE		(ETP_FW_PAGE_SIZE * ETP_FW_PAGE_COUNT)
+
+struct i2c_client;
+struct completion;
+
+enum tp_mode {
+	IAP_MODE = 1,
+	MAIN_MODE
+};
+
+struct elan_transport_ops {
+	int (*initialize)(struct i2c_client *client);
+	int (*sleep_control)(struct i2c_client *, bool sleep);
+	int (*power_control)(struct i2c_client *, bool enable);
+	int (*set_mode)(struct i2c_client *client, u8 mode);
+
+	int (*calibrate)(struct i2c_client *client);
+	int (*calibrate_result)(struct i2c_client *client, u8 *val);
+
+	int (*get_baseline_data)(struct i2c_client *client,
+				 bool max_baseliune, u8 *value);
+
+	int (*get_version)(struct i2c_client *client, bool iap, u8 *version);
+	int (*get_sm_version)(struct i2c_client *client, u8 *version);
+	int (*get_checksum)(struct i2c_client *client, bool iap, u16 *csum);
+	int (*get_product_id)(struct i2c_client *client, u8 *id);
+
+	int (*get_max)(struct i2c_client *client,
+		       unsigned int *max_x, unsigned int *max_y);
+	int (*get_resolution)(struct i2c_client *client,
+			      u8 *hw_res_x, u8 *hw_res_y);
+	int (*get_num_traces)(struct i2c_client *client,
+			      unsigned int *x_tracenum,
+			      unsigned int *y_tracenum);
+
+	int (*iap_get_mode)(struct i2c_client *client, enum tp_mode *mode);
+	int (*iap_reset)(struct i2c_client *client);
+
+	int (*prepare_fw_update)(struct i2c_client *client);
+	int (*write_fw_block)(struct i2c_client *client,
+			      const u8 *page, u16 checksum, int idx);
+	int (*finish_fw_update)(struct i2c_client *client,
+				struct completion *reset_done);
+
+	int (*get_report)(struct i2c_client *client, u8 *report);
+};
+
+extern const struct elan_transport_ops elan_smbus_ops, elan_i2c_ops;
+
+#endif /* _ELAN_I2C_H */
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
new file mode 100644
index 0000000..0cb2be4
--- /dev/null
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -0,0 +1,1137 @@
+/*
+ * Elan I2C/SMBus Touchpad driver
+ *
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+ * Version: 1.5.5
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+ * copyright (c) 2011-2012 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Trademarks are the property of their respective owners.
+ */
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/input/mt.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/input.h>
+#include <linux/uaccess.h>
+#include <linux/jiffies.h>
+#include <linux/completion.h>
+#include <linux/of.h>
+#include <linux/regulator/consumer.h>
+#include <asm/unaligned.h>
+
+#include "elan_i2c.h"
+
+#define DRIVER_NAME		"elan_i2c"
+#define ELAN_DRIVER_VERSION	"1.5.5"
+#define ETP_PRESSURE_OFFSET	25
+#define ETP_MAX_PRESSURE	255
+#define ETP_FWIDTH_REDUCE	90
+#define ETP_FINGER_WIDTH	15
+#define ETP_RETRY_COUNT		3
+
+#define ETP_MAX_FINGERS		5
+#define ETP_FINGER_DATA_LEN	5
+#define ETP_REPORT_ID		0x5D
+#define ETP_REPORT_ID_OFFSET	2
+#define ETP_TOUCH_INFO_OFFSET	3
+#define ETP_FINGER_DATA_OFFSET	4
+#define ETP_MAX_REPORT_LEN	34
+
+/* The main device structure */
+struct elan_tp_data {
+	struct i2c_client	*client;
+	struct input_dev	*input;
+	struct regulator	*vcc;
+
+	const struct elan_transport_ops *ops;
+
+	/* for fw update */
+	struct completion	fw_completion;
+	bool			in_fw_update;
+
+	struct mutex		sysfs_mutex;
+
+	unsigned int		max_x;
+	unsigned int		max_y;
+	unsigned int		width_x;
+	unsigned int		width_y;
+	unsigned int		x_res;
+	unsigned int		y_res;
+
+	u8			product_id;
+	u8			fw_version;
+	u8			sm_version;
+	u8			iap_version;
+	u16			fw_checksum;
+
+	u8			mode;
+
+	bool			irq_wake;
+
+	u8			min_baseline;
+	u8			max_baseline;
+	bool			baseline_ready;
+};
+
+static int elan_enable_power(struct elan_tp_data *data)
+{
+	int repeat = ETP_RETRY_COUNT;
+	int error;
+
+	error = regulator_enable(data->vcc);
+	if (error) {
+		dev_err(&data->client->dev,
+			"Failed to enable regulator: %d\n", error);
+		return error;
+	}
+
+	do {
+		error = data->ops->power_control(data->client, true);
+		if (error >= 0)
+			return 0;
+
+		msleep(30);
+	} while (--repeat > 0);
+
+	return error;
+}
+
+static int elan_disable_power(struct elan_tp_data *data)
+{
+	int repeat = ETP_RETRY_COUNT;
+	int error;
+
+	do {
+		error = data->ops->power_control(data->client, false);
+		if (!error) {
+			error = regulator_disable(data->vcc);
+			if (error) {
+				dev_err(&data->client->dev,
+					"Failed to disable regulator: %d\n",
+					error);
+				/* Attempt to power the chip back up */
+				data->ops->power_control(data->client, true);
+				break;
+			}
+
+			return 0;
+		}
+
+		msleep(30);
+	} while (--repeat > 0);
+
+	return error;
+}
+
+static int elan_sleep(struct elan_tp_data *data)
+{
+	int repeat = ETP_RETRY_COUNT;
+	int error;
+
+	do {
+		error = data->ops->sleep_control(data->client, true);
+		if (!error)
+			return 0;
+
+		msleep(30);
+	} while (--repeat > 0);
+
+	return error;
+}
+
+static int __elan_initialize(struct elan_tp_data *data)
+{
+	struct i2c_client *client = data->client;
+	int error;
+
+	error = data->ops->initialize(client);
+	if (error) {
+		dev_err(&client->dev, "device initialize failed: %d\n", error);
+		return error;
+	}
+
+	data->mode |= ETP_ENABLE_ABS;
+	error = data->ops->set_mode(client, data->mode);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to switch to absolute mode: %d\n", error);
+		return error;
+	}
+
+	error = data->ops->sleep_control(client, false);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to wake device up: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_initialize(struct elan_tp_data *data)
+{
+	int repeat = ETP_RETRY_COUNT;
+	int error;
+
+	do {
+		error = __elan_initialize(data);
+		if (!error)
+			return 0;
+
+		repeat--;
+		msleep(30);
+	} while (--repeat > 0);
+
+	return error;
+}
+
+static int elan_query_device_info(struct elan_tp_data *data)
+{
+	int error;
+
+	error = data->ops->get_product_id(data->client, &data->product_id);
+	if (error)
+		return error;
+
+	error = data->ops->get_version(data->client, false, &data->fw_version);
+	if (error)
+		return error;
+
+	error = data->ops->get_checksum(data->client, false,
+					&data->fw_checksum);
+	if (error)
+		return error;
+
+	error = data->ops->get_sm_version(data->client, &data->sm_version);
+	if (error)
+		return error;
+
+	error = data->ops->get_version(data->client, true, &data->iap_version);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+static unsigned int elan_convert_resolution(u8 val)
+{
+	/*
+	 * (value from firmware) * 10 + 790 = dpi
+	 *
+	 * We also have to convert dpi to dots/mm (*10/254 to avoid floating
+	 * point).
+	 */
+
+	return ((int)(char)val * 10 + 790) * 10 / 254;
+}
+
+static int elan_query_device_parameters(struct elan_tp_data *data)
+{
+	unsigned int x_traces, y_traces;
+	u8 hw_x_res, hw_y_res;
+	int error;
+
+	error = data->ops->get_max(data->client, &data->max_x, &data->max_y);
+	if (error)
+		return error;
+
+	error = data->ops->get_num_traces(data->client, &x_traces, &y_traces);
+	if (error)
+		return error;
+
+	data->width_x = data->max_x / x_traces;
+	data->width_y = data->max_y / y_traces;
+
+	error = data->ops->get_resolution(data->client, &hw_x_res, &hw_y_res);
+	if (error)
+		return error;
+
+	data->x_res = elan_convert_resolution(hw_x_res);
+	data->y_res = elan_convert_resolution(hw_y_res);
+
+	return 0;
+}
+
+/*
+ **********************************************************
+ * IAP firmware updater related routines
+ **********************************************************
+ */
+static int elan_write_fw_block(struct elan_tp_data *data,
+			       const u8 *page, u16 checksum, int idx)
+{
+	int retry = ETP_RETRY_COUNT;
+	int error;
+
+	do {
+		error = data->ops->write_fw_block(data->client,
+						  page, checksum, idx);
+		if (!error)
+			return 0;
+
+		dev_dbg(&data->client->dev,
+			"IAP retrying page %d (error: %d)\n", idx, error);
+	} while (--retry > 0);
+
+	return error;
+}
+
+static int __elan_update_firmware(struct elan_tp_data *data,
+				  const struct firmware *fw)
+{
+	struct i2c_client *client = data->client;
+	struct device *dev = &client->dev;
+	int i, j;
+	int error;
+	u16 iap_start_addr;
+	u16 boot_page_count;
+	u16 sw_checksum = 0, fw_checksum = 0;
+
+	error = data->ops->prepare_fw_update(client);
+	if (error)
+		return error;
+
+	iap_start_addr = get_unaligned_le16(&fw->data[ETP_IAP_START_ADDR * 2]);
+
+	boot_page_count = (iap_start_addr * 2) / ETP_FW_PAGE_SIZE;
+	for (i = boot_page_count; i < ETP_FW_PAGE_COUNT; i++) {
+		u16 checksum = 0;
+		const u8 *page = &fw->data[i * ETP_FW_PAGE_SIZE];
+
+		for (j = 0; j < ETP_FW_PAGE_SIZE; j += 2)
+			checksum += ((page[j + 1] << 8) | page[j]);
+
+		error = elan_write_fw_block(data, page, checksum, i);
+		if (error) {
+			dev_err(dev, "write page %d fail: %d\n", i, error);
+			return error;
+		}
+
+		sw_checksum += checksum;
+	}
+
+	/* Wait WDT reset and power on reset */
+	msleep(600);
+
+	error = data->ops->finish_fw_update(client, &data->fw_completion);
+	if (error)
+		return error;
+
+	error = data->ops->get_checksum(client, true, &fw_checksum);
+	if (error)
+		return error;
+
+	if (sw_checksum != fw_checksum) {
+		dev_err(dev, "checksum diff sw=[%04X], fw=[%04X]\n",
+			sw_checksum, fw_checksum);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_update_firmware(struct elan_tp_data *data,
+				const struct firmware *fw)
+{
+	struct i2c_client *client = data->client;
+	int retval;
+
+	dev_dbg(&client->dev, "Starting firmware update....\n");
+
+	disable_irq(client->irq);
+	data->in_fw_update = true;
+
+	retval = __elan_update_firmware(data, fw);
+	if (retval) {
+		dev_err(&client->dev, "firmware update failed: %d\n", retval);
+		data->ops->iap_reset(client);
+	} else {
+		/* Reinitialize TP after fw is updated */
+		elan_initialize(data);
+		elan_query_device_info(data);
+	}
+
+	data->in_fw_update = false;
+	enable_irq(client->irq);
+
+	return retval;
+}
+
+/*
+ *******************************************************************
+ * SYSFS attributes
+ *******************************************************************
+ */
+static ssize_t elan_sysfs_read_fw_checksum(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "0x%04x\n", data->fw_checksum);
+}
+
+static ssize_t elan_sysfs_read_product_id(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d.0\n", data->product_id);
+}
+
+static ssize_t elan_sysfs_read_fw_ver(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d.0\n", data->fw_version);
+}
+
+static ssize_t elan_sysfs_read_sm_ver(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d.0\n", data->sm_version);
+}
+
+static ssize_t elan_sysfs_read_iap_ver(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%d.0\n", data->iap_version);
+}
+
+static ssize_t elan_sysfs_update_fw(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	const struct firmware *fw;
+	int error;
+
+	error = request_firmware(&fw, ETP_FW_NAME, dev);
+	if (error) {
+		dev_err(dev, "cannot load firmware %s: %d\n",
+			ETP_FW_NAME, error);
+		return error;
+	}
+
+	/* Firmware must be exactly PAGE_NUM * PAGE_SIZE bytes */
+	if (fw->size != ETP_FW_SIZE) {
+		dev_err(dev, "invalid firmware size = %zu, expected %d.\n",
+			fw->size, ETP_FW_SIZE);
+		error = -EBADF;
+		goto out_release_fw;
+	}
+
+	error = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (error)
+		goto out_release_fw;
+
+	error = elan_update_firmware(data, fw);
+
+	mutex_unlock(&data->sysfs_mutex);
+
+out_release_fw:
+	release_firmware(fw);
+	return error ?: count;
+}
+
+static ssize_t calibrate_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int tries = 20;
+	int retval;
+	int error;
+	u8 val[3];
+
+	retval = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (retval)
+		return retval;
+
+	disable_irq(client->irq);
+
+	data->mode |= ETP_ENABLE_CALIBRATE;
+	retval = data->ops->set_mode(client, data->mode);
+	if (retval) {
+		dev_err(dev, "failed to enable calibration mode: %d\n",
+			retval);
+		goto out;
+	}
+
+	retval = data->ops->calibrate(client);
+	if (retval) {
+		dev_err(dev, "failed to start calibration: %d\n",
+			retval);
+		goto out_disable_calibrate;
+	}
+
+	val[0] = 0xff;
+	do {
+		/* Wait 250ms before checking if calibration has completed. */
+		msleep(250);
+
+		retval = data->ops->calibrate_result(client, val);
+		if (retval)
+			dev_err(dev, "failed to check calibration result: %d\n",
+				retval);
+		else if (val[0] == 0)
+			break; /* calibration done */
+
+	} while (--tries);
+
+	if (tries == 0) {
+		dev_err(dev, "failed to calibrate. Timeout.\n");
+		retval = -ETIMEDOUT;
+	}
+
+out_disable_calibrate:
+	data->mode &= ~ETP_ENABLE_CALIBRATE;
+	error = data->ops->set_mode(data->client, data->mode);
+	if (error) {
+		dev_err(dev, "failed to disable calibration mode: %d\n",
+			error);
+		if (!retval)
+			retval = error;
+	}
+out:
+	enable_irq(client->irq);
+	mutex_unlock(&data->sysfs_mutex);
+	return retval ?: count;
+}
+
+static ssize_t elan_sysfs_read_mode(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int error;
+	enum tp_mode mode;
+
+	error = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (error)
+		return error;
+
+	error = data->ops->iap_get_mode(data->client, &mode);
+
+	mutex_unlock(&data->sysfs_mutex);
+
+	if (error)
+		return error;
+
+	return sprintf(buf, "%d\n", (int)mode);
+}
+
+static DEVICE_ATTR(product_id, S_IRUGO, elan_sysfs_read_product_id, NULL);
+static DEVICE_ATTR(firmware_version, S_IRUGO, elan_sysfs_read_fw_ver, NULL);
+static DEVICE_ATTR(sample_version, S_IRUGO, elan_sysfs_read_sm_ver, NULL);
+static DEVICE_ATTR(iap_version, S_IRUGO, elan_sysfs_read_iap_ver, NULL);
+static DEVICE_ATTR(fw_checksum, S_IRUGO, elan_sysfs_read_fw_checksum, NULL);
+static DEVICE_ATTR(mode, S_IRUGO, elan_sysfs_read_mode, NULL);
+static DEVICE_ATTR(update_fw, S_IWUSR, NULL, elan_sysfs_update_fw);
+
+static DEVICE_ATTR_WO(calibrate);
+
+static struct attribute *elan_sysfs_entries[] = {
+	&dev_attr_product_id.attr,
+	&dev_attr_firmware_version.attr,
+	&dev_attr_sample_version.attr,
+	&dev_attr_iap_version.attr,
+	&dev_attr_fw_checksum.attr,
+	&dev_attr_calibrate.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_update_fw.attr,
+	NULL,
+};
+
+static const struct attribute_group elan_sysfs_group = {
+	.attrs = elan_sysfs_entries,
+};
+
+static ssize_t acquire_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int error;
+	int retval;
+
+	retval = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (retval)
+		return retval;
+
+	disable_irq(client->irq);
+
+	data->baseline_ready = false;
+
+	data->mode |= ETP_ENABLE_CALIBRATE;
+	retval = data->ops->set_mode(data->client, data->mode);
+	if (retval) {
+		dev_err(dev, "Failed to enable calibration mode to get baseline: %d\n",
+			retval);
+		goto out;
+	}
+
+	msleep(250);
+
+	retval = data->ops->get_baseline_data(data->client, true,
+					      &data->max_baseline);
+	if (retval) {
+		dev_err(dev, "Failed to read max baseline form device: %d\n",
+			retval);
+		goto out_disable_calibrate;
+	}
+
+	retval = data->ops->get_baseline_data(data->client, false,
+					      &data->min_baseline);
+	if (retval) {
+		dev_err(dev, "Failed to read min baseline form device: %d\n",
+			retval);
+		goto out_disable_calibrate;
+	}
+
+	data->baseline_ready = true;
+
+out_disable_calibrate:
+	data->mode &= ~ETP_ENABLE_CALIBRATE;
+	error = data->ops->set_mode(data->client, data->mode);
+	if (error) {
+		dev_err(dev, "Failed to disable calibration mode after acquiring baseline: %d\n",
+			error);
+		if (!retval)
+			retval = error;
+	}
+out:
+	enable_irq(client->irq);
+	mutex_unlock(&data->sysfs_mutex);
+	return retval ?: count;
+}
+
+static ssize_t min_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int retval;
+
+	retval = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (retval)
+		return retval;
+
+	if (!data->baseline_ready) {
+		retval = -ENODATA;
+		goto out;
+	}
+
+	retval = snprintf(buf, PAGE_SIZE, "%d", data->min_baseline);
+
+out:
+	mutex_unlock(&data->sysfs_mutex);
+	return retval;
+}
+
+static ssize_t max_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int retval;
+
+	retval = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (retval)
+		return retval;
+
+	if (!data->baseline_ready) {
+		retval = -ENODATA;
+		goto out;
+	}
+
+	retval = snprintf(buf, PAGE_SIZE, "%d", data->max_baseline);
+
+out:
+	mutex_unlock(&data->sysfs_mutex);
+	return retval;
+}
+
+
+static DEVICE_ATTR_WO(acquire);
+static DEVICE_ATTR_RO(min);
+static DEVICE_ATTR_RO(max);
+
+static struct attribute *elan_baseline_sysfs_entries[] = {
+	&dev_attr_acquire.attr,
+	&dev_attr_min.attr,
+	&dev_attr_max.attr,
+	NULL,
+};
+
+static const struct attribute_group elan_baseline_sysfs_group = {
+	.name = "baseline",
+	.attrs = elan_baseline_sysfs_entries,
+};
+
+static const struct attribute_group *elan_sysfs_groups[] = {
+	&elan_sysfs_group,
+	&elan_baseline_sysfs_group,
+	NULL
+};
+
+/*
+ ******************************************************************
+ * Elan isr functions
+ ******************************************************************
+ */
+static void elan_report_contact(struct elan_tp_data *data,
+				int contact_num, bool contact_valid,
+				u8 *finger_data)
+{
+	struct input_dev *input = data->input;
+	unsigned int pos_x, pos_y;
+	unsigned int pressure, mk_x, mk_y;
+	unsigned int area_x, area_y, major, minor, new_pressure;
+
+
+	if (contact_valid) {
+		pos_x = ((finger_data[0] & 0xf0) << 4) |
+						finger_data[1];
+		pos_y = ((finger_data[0] & 0x0f) << 8) |
+						finger_data[2];
+		mk_x = (finger_data[3] & 0x0f);
+		mk_y = (finger_data[3] >> 4);
+		pressure = finger_data[4];
+
+		if (pos_x > data->max_x || pos_y > data->max_y) {
+			dev_dbg(input->dev.parent,
+				"[%d] x=%d y=%d over max (%d, %d)",
+				contact_num, pos_x, pos_y,
+				data->max_x, data->max_y);
+			return;
+		}
+
+		/*
+		 * To avoid treating large finger as palm, let's reduce the
+		 * width x and y per trace.
+		 */
+		area_x = mk_x * (data->width_x - ETP_FWIDTH_REDUCE);
+		area_y = mk_y * (data->width_y - ETP_FWIDTH_REDUCE);
+
+		major = max(area_x, area_y);
+		minor = min(area_x, area_y);
+
+		new_pressure = pressure + ETP_PRESSURE_OFFSET;
+		if (new_pressure > ETP_MAX_PRESSURE)
+			new_pressure = ETP_MAX_PRESSURE;
+
+		input_mt_slot(input, contact_num);
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+		input_report_abs(input, ABS_MT_POSITION_X, pos_x);
+		input_report_abs(input, ABS_MT_POSITION_Y, data->max_y - pos_y);
+		input_report_abs(input, ABS_MT_PRESSURE, new_pressure);
+		input_report_abs(input, ABS_TOOL_WIDTH, mk_x);
+		input_report_abs(input, ABS_MT_TOUCH_MAJOR, major);
+		input_report_abs(input, ABS_MT_TOUCH_MINOR, minor);
+	} else {
+		input_mt_slot(input, contact_num);
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, false);
+	}
+}
+
+static void elan_report_absolute(struct elan_tp_data *data, u8 *packet)
+{
+	struct input_dev *input = data->input;
+	u8 *finger_data = &packet[ETP_FINGER_DATA_OFFSET];
+	int i;
+	u8 tp_info = packet[ETP_TOUCH_INFO_OFFSET];
+	bool contact_valid;
+
+	for (i = 0; i < ETP_MAX_FINGERS; i++) {
+		contact_valid = tp_info & (1U << (3 + i));
+		elan_report_contact(data, i, contact_valid, finger_data);
+
+		if (contact_valid)
+			finger_data += ETP_FINGER_DATA_LEN;
+	}
+
+	input_report_key(input, BTN_LEFT, tp_info & 0x01);
+	input_mt_report_pointer_emulation(input, true);
+	input_sync(input);
+}
+
+static irqreturn_t elan_isr(int irq, void *dev_id)
+{
+	struct elan_tp_data *data = dev_id;
+	struct device *dev = &data->client->dev;
+	int error;
+	u8 report[ETP_MAX_REPORT_LEN];
+
+	/*
+	 * When device is connected to i2c bus, when all IAP page writes
+	 * complete, the driver will receive interrupt and must read
+	 * 0000 to confirm that IAP is finished.
+	*/
+	if (data->in_fw_update) {
+		complete(&data->fw_completion);
+		goto out;
+	}
+
+	error = data->ops->get_report(data->client, report);
+	if (error)
+		goto out;
+
+	if (report[ETP_REPORT_ID_OFFSET] != ETP_REPORT_ID)
+		dev_err(dev, "invalid report id data (%x)\n",
+			report[ETP_REPORT_ID_OFFSET]);
+	else
+		elan_report_absolute(data, report);
+
+out:
+	return IRQ_HANDLED;
+}
+
+/*
+ ******************************************************************
+ * Elan initialization functions
+ ******************************************************************
+ */
+static int elan_setup_input_device(struct elan_tp_data *data)
+{
+	struct device *dev = &data->client->dev;
+	struct input_dev *input;
+	unsigned int max_width = max(data->width_x, data->width_y);
+	unsigned int min_width = min(data->width_x, data->width_y);
+	int error;
+
+	input = devm_input_allocate_device(dev);
+	if (!input)
+		return -ENOMEM;
+
+	input->name = "Elan Touchpad";
+	input->id.bustype = BUS_I2C;
+	input_set_drvdata(input, data);
+
+	error = input_mt_init_slots(input, ETP_MAX_FINGERS,
+				    INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED);
+	if (error) {
+		dev_err(dev, "failed to initialize MT slots: %d\n", error);
+		return error;
+	}
+
+	__set_bit(EV_ABS, input->evbit);
+	__set_bit(INPUT_PROP_POINTER, input->propbit);
+	__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
+	__set_bit(BTN_LEFT, input->keybit);
+
+	/* Set up ST parameters */
+	input_set_abs_params(input, ABS_X, 0, data->max_x, 0, 0);
+	input_set_abs_params(input, ABS_Y, 0, data->max_y, 0, 0);
+	input_abs_set_res(input, ABS_X, data->x_res);
+	input_abs_set_res(input, ABS_Y, data->y_res);
+	input_set_abs_params(input, ABS_PRESSURE, 0, ETP_MAX_PRESSURE, 0, 0);
+	input_set_abs_params(input, ABS_TOOL_WIDTH, 0, ETP_FINGER_WIDTH, 0, 0);
+
+	/* And MT parameters */
+	input_set_abs_params(input, ABS_MT_POSITION_X, 0, data->max_x, 0, 0);
+	input_set_abs_params(input, ABS_MT_POSITION_Y, 0, data->max_y, 0, 0);
+	input_abs_set_res(input, ABS_MT_POSITION_X, data->x_res);
+	input_abs_set_res(input, ABS_MT_POSITION_Y, data->y_res);
+	input_set_abs_params(input, ABS_MT_PRESSURE, 0,
+			     ETP_MAX_PRESSURE, 0, 0);
+	input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0,
+			     ETP_FINGER_WIDTH * max_width, 0, 0);
+	input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0,
+			     ETP_FINGER_WIDTH * min_width, 0, 0);
+
+	data->input = input;
+
+	return 0;
+}
+
+static void elan_disable_regulator(void *_data)
+{
+	struct elan_tp_data *data = _data;
+
+	regulator_disable(data->vcc);
+}
+
+static void elan_remove_sysfs_groups(void *_data)
+{
+	struct elan_tp_data *data = _data;
+
+	sysfs_remove_groups(&data->client->dev.kobj, elan_sysfs_groups);
+}
+
+static int elan_probe(struct i2c_client *client,
+		      const struct i2c_device_id *dev_id)
+{
+	const struct elan_transport_ops *transport_ops;
+	struct device *dev = &client->dev;
+	struct elan_tp_data *data;
+	unsigned long irqflags;
+	int error;
+
+	if (IS_ENABLED(CONFIG_MOUSE_ELAN_I2C_I2C) &&
+	    i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		transport_ops = &elan_i2c_ops;
+	} else if (IS_ENABLED(CONFIG_MOUSE_ELAN_I2C_SMBUS) &&
+		   i2c_check_functionality(client->adapter,
+					   I2C_FUNC_SMBUS_BYTE_DATA |
+						I2C_FUNC_SMBUS_BLOCK_DATA |
+						I2C_FUNC_SMBUS_I2C_BLOCK)) {
+		transport_ops = &elan_smbus_ops;
+	} else {
+		dev_err(dev, "not a supported I2C/SMBus adapter\n");
+		return -EIO;
+	}
+
+	data = devm_kzalloc(&client->dev, sizeof(struct elan_tp_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, data);
+
+	data->ops = transport_ops;
+	data->client = client;
+	init_completion(&data->fw_completion);
+	mutex_init(&data->sysfs_mutex);
+
+	data->vcc = devm_regulator_get(&client->dev, "vcc");
+	if (IS_ERR(data->vcc)) {
+		error = PTR_ERR(data->vcc);
+		if (error != -EPROBE_DEFER)
+			dev_err(&client->dev,
+				"Failed to get 'vcc' regulator: %d\n",
+				error);
+		return error;
+	}
+
+	error = regulator_enable(data->vcc);
+	if (error) {
+		dev_err(&client->dev,
+			"Failed to enable regulator: %d\n", error);
+		return error;
+	}
+
+	error = devm_add_action(&client->dev,
+				elan_disable_regulator, data);
+	if (error) {
+		regulator_disable(data->vcc);
+		dev_err(&client->dev,
+			"Failed to add disable regulator action: %d\n",
+			error);
+		return error;
+	}
+
+	/* Initialize the touchpad. */
+	error = elan_initialize(data);
+	if (error)
+		return error;
+
+	error = elan_query_device_info(data);
+	if (error)
+		return error;
+
+	error = elan_query_device_parameters(data);
+	if (error)
+		return error;
+
+	dev_dbg(&client->dev,
+		"Elan Touchpad Information:\n"
+		"    Module product ID:  0x%04x\n"
+		"    Firmware Version:  0x%04x\n"
+		"    Sample Version:  0x%04x\n"
+		"    IAP Version:  0x%04x\n"
+		"    Max ABS X,Y:   %d,%d\n"
+		"    Width X,Y:   %d,%d\n"
+		"    Resolution X,Y:   %d,%d (dots/mm)\n",
+		data->product_id,
+		data->fw_version,
+		data->sm_version,
+		data->iap_version,
+		data->max_x, data->max_y,
+		data->width_x, data->width_y,
+		data->x_res, data->y_res);
+
+	/* Set up input device properties based on queried parameters. */
+	error = elan_setup_input_device(data);
+	if (error)
+		return error;
+
+	/*
+	 * Systems using device tree should set up interrupt via DTS,
+	 * the rest will use the default falling edge interrupts.
+	 */
+	irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING;
+
+	error = devm_request_threaded_irq(&client->dev, client->irq,
+					  NULL, elan_isr,
+					  irqflags | IRQF_ONESHOT,
+					  client->name, data);
+	if (error) {
+		dev_err(&client->dev, "cannot register irq=%d\n", client->irq);
+		return error;
+	}
+
+	error = sysfs_create_groups(&client->dev.kobj, elan_sysfs_groups);
+	if (error) {
+		dev_err(&client->dev, "failed to create sysfs attributes: %d\n",
+			error);
+		return error;
+	}
+
+	error = devm_add_action(&client->dev,
+				elan_remove_sysfs_groups, data);
+	if (error) {
+		elan_remove_sysfs_groups(data);
+		dev_err(&client->dev,
+			"Failed to add sysfs cleanup action: %d\n",
+			error);
+		return error;
+	}
+
+	error = input_register_device(data->input);
+	if (error) {
+		dev_err(&client->dev, "failed to register input device: %d\n",
+			error);
+		return error;
+	}
+
+	/*
+	 * Systems using device tree should set up wakeup via DTS,
+	 * the rest will configure device as wakeup source by default.
+	 */
+	if (!client->dev.of_node)
+		device_init_wakeup(&client->dev, true);
+
+	return 0;
+}
+
+static int __maybe_unused elan_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int ret;
+
+	/*
+	 * We are taking the mutex to make sure sysfs operations are
+	 * complete before we attempt to bring the device into low[er]
+	 * power mode.
+	 */
+	ret = mutex_lock_interruptible(&data->sysfs_mutex);
+	if (ret)
+		return ret;
+
+	disable_irq(client->irq);
+
+	if (device_may_wakeup(dev)) {
+		ret = elan_sleep(data);
+		/* Enable wake from IRQ */
+		data->irq_wake = (enable_irq_wake(client->irq) == 0);
+	} else {
+		ret = elan_disable_power(data);
+	}
+
+	mutex_unlock(&data->sysfs_mutex);
+	return ret;
+}
+
+static int __maybe_unused elan_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elan_tp_data *data = i2c_get_clientdata(client);
+	int error;
+
+	if (device_may_wakeup(dev) && data->irq_wake) {
+		disable_irq_wake(client->irq);
+		data->irq_wake = false;
+	}
+
+	error = elan_enable_power(data);
+	if (error)
+		dev_err(dev, "power up when resuming failed: %d\n", error);
+
+	error = elan_initialize(data);
+	if (error)
+		dev_err(dev, "initialize when resuming failed: %d\n", error);
+
+	enable_irq(data->client->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(elan_pm_ops, elan_suspend, elan_resume);
+
+static const struct i2c_device_id elan_id[] = {
+	{ DRIVER_NAME, 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, elan_id);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id elan_acpi_id[] = {
+	{ "ELAN0000", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, elan_acpi_id);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id elan_of_match[] = {
+	{ .compatible = "elan,ekth3000" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, elan_of_match);
+#endif
+
+static struct i2c_driver elan_driver = {
+	.driver = {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+		.pm	= &elan_pm_ops,
+		.acpi_match_table = ACPI_PTR(elan_acpi_id),
+		.of_match_table = of_match_ptr(elan_of_match),
+	},
+	.probe		= elan_probe,
+	.id_table	= elan_id,
+};
+
+module_i2c_driver(elan_driver);
+
+MODULE_AUTHOR("Duson Lin <dusonlin@emc.com.tw>");
+MODULE_DESCRIPTION("Elan I2C/SMBus Touchpad driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(ELAN_DRIVER_VERSION);
diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
new file mode 100644
index 0000000..97d4937
--- /dev/null
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -0,0 +1,611 @@
+/*
+ * Elan I2C/SMBus Touchpad driver - I2C interface
+ *
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+ * Version: 1.5.5
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+ * copyright (c) 2011-2012 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Trademarks are the property of their respective owners.
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/unaligned.h>
+
+#include "elan_i2c.h"
+
+/* Elan i2c commands */
+#define ETP_I2C_RESET			0x0100
+#define ETP_I2C_WAKE_UP			0x0800
+#define ETP_I2C_SLEEP			0x0801
+#define ETP_I2C_DESC_CMD		0x0001
+#define ETP_I2C_REPORT_DESC_CMD		0x0002
+#define ETP_I2C_STAND_CMD		0x0005
+#define ETP_I2C_UNIQUEID_CMD		0x0101
+#define ETP_I2C_FW_VERSION_CMD		0x0102
+#define ETP_I2C_SM_VERSION_CMD		0x0103
+#define ETP_I2C_XY_TRACENUM_CMD		0x0105
+#define ETP_I2C_MAX_X_AXIS_CMD		0x0106
+#define ETP_I2C_MAX_Y_AXIS_CMD		0x0107
+#define ETP_I2C_RESOLUTION_CMD		0x0108
+#define ETP_I2C_IAP_VERSION_CMD		0x0110
+#define ETP_I2C_SET_CMD			0x0300
+#define ETP_I2C_POWER_CMD		0x0307
+#define ETP_I2C_FW_CHECKSUM_CMD		0x030F
+#define ETP_I2C_IAP_CTRL_CMD		0x0310
+#define ETP_I2C_IAP_CMD			0x0311
+#define ETP_I2C_IAP_RESET_CMD		0x0314
+#define ETP_I2C_IAP_CHECKSUM_CMD	0x0315
+#define ETP_I2C_CALIBRATE_CMD		0x0316
+#define ETP_I2C_MAX_BASELINE_CMD	0x0317
+#define ETP_I2C_MIN_BASELINE_CMD	0x0318
+
+#define ETP_I2C_REPORT_LEN		34
+#define ETP_I2C_DESC_LENGTH		30
+#define ETP_I2C_REPORT_DESC_LENGTH	158
+#define ETP_I2C_INF_LENGTH		2
+#define ETP_I2C_IAP_PASSWORD		0x1EA5
+#define ETP_I2C_IAP_RESET		0xF0F0
+#define ETP_I2C_MAIN_MODE_ON		(1 << 9)
+#define ETP_I2C_IAP_REG_L		0x01
+#define ETP_I2C_IAP_REG_H		0x06
+
+static int elan_i2c_read_block(struct i2c_client *client,
+			       u16 reg, u8 *val, u16 len)
+{
+	__le16 buf[] = {
+		cpu_to_le16(reg),
+	};
+	struct i2c_msg msgs[] = {
+		{
+			.addr = client->addr,
+			.flags = client->flags & I2C_M_TEN,
+			.len = sizeof(buf),
+			.buf = (u8 *)buf,
+		},
+		{
+			.addr = client->addr,
+			.flags = (client->flags & I2C_M_TEN) | I2C_M_RD,
+			.len = len,
+			.buf = val,
+		}
+	};
+	int ret;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	return ret == ARRAY_SIZE(msgs) ? 0 : (ret < 0 ? ret : -EIO);
+}
+
+static int elan_i2c_read_cmd(struct i2c_client *client, u16 reg, u8 *val)
+{
+	int retval;
+
+	retval = elan_i2c_read_block(client, reg, val, ETP_I2C_INF_LENGTH);
+	if (retval < 0) {
+		dev_err(&client->dev, "reading cmd (0x%04x) fail.\n", reg);
+		return retval;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_write_cmd(struct i2c_client *client, u16 reg, u16 cmd)
+{
+	__le16 buf[] = {
+		cpu_to_le16(reg),
+		cpu_to_le16(cmd),
+	};
+	struct i2c_msg msg = {
+		.addr = client->addr,
+		.flags = client->flags & I2C_M_TEN,
+		.len = sizeof(buf),
+		.buf = (u8 *)buf,
+	};
+	int ret;
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	return ret == 1 ? 0 : (ret < 0 ? ret : -EIO);
+}
+
+static int elan_i2c_initialize(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	int error;
+	u8 val[256];
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET);
+	if (error) {
+		dev_err(dev, "device reset failed: %d\n", error);
+		return error;
+	}
+
+	/* Wait for the device to reset */
+	msleep(100);
+
+	/* get reset acknowledgement 0000 */
+	error = i2c_master_recv(client, val, ETP_I2C_INF_LENGTH);
+	if (error < 0) {
+		dev_err(dev, "failed to read reset response: %d\n", error);
+		return error;
+	}
+
+	error = elan_i2c_read_block(client, ETP_I2C_DESC_CMD,
+				    val, ETP_I2C_DESC_LENGTH);
+	if (error) {
+		dev_err(dev, "cannot get device descriptor: %d\n", error);
+		return error;
+	}
+
+	error = elan_i2c_read_block(client, ETP_I2C_REPORT_DESC_CMD,
+				    val, ETP_I2C_REPORT_DESC_LENGTH);
+	if (error) {
+		dev_err(dev, "fetching report descriptor failed.: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_sleep_control(struct i2c_client *client, bool sleep)
+{
+	return elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD,
+				  sleep ? ETP_I2C_SLEEP : ETP_I2C_WAKE_UP);
+}
+
+static int elan_i2c_power_control(struct i2c_client *client, bool enable)
+{
+	u8 val[2];
+	u16 reg;
+	int error;
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_POWER_CMD, val);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to read current power state: %d\n",
+			error);
+		return error;
+	}
+
+	reg = le16_to_cpup((__le16 *)val);
+	if (enable)
+		reg &= ~ETP_DISABLE_POWER;
+	else
+		reg |= ETP_DISABLE_POWER;
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_POWER_CMD, reg);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to write current power state: %d\n",
+			error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_set_mode(struct i2c_client *client, u8 mode)
+{
+	return elan_i2c_write_cmd(client, ETP_I2C_SET_CMD, mode);
+}
+
+
+static int elan_i2c_calibrate(struct i2c_client *client)
+{
+	return elan_i2c_write_cmd(client, ETP_I2C_CALIBRATE_CMD, 1);
+}
+
+static int elan_i2c_calibrate_result(struct i2c_client *client, u8 *val)
+{
+	return elan_i2c_read_block(client, ETP_I2C_CALIBRATE_CMD, val, 1);
+}
+
+static int elan_i2c_get_baseline_data(struct i2c_client *client,
+				      bool max_baseline, u8 *value)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client,
+				  max_baseline ? ETP_I2C_MAX_BASELINE_CMD :
+						 ETP_I2C_MIN_BASELINE_CMD,
+				  val);
+	if (error)
+		return error;
+
+	*value = le16_to_cpup((__le16 *)val);
+
+	return 0;
+}
+
+static int elan_i2c_get_version(struct i2c_client *client,
+				bool iap, u8 *version)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client,
+				  iap ? ETP_I2C_IAP_VERSION_CMD :
+					ETP_I2C_FW_VERSION_CMD,
+				  val);
+	if (error) {
+		dev_err(&client->dev, "failed to get %s version: %d\n",
+			iap ? "IAP" : "FW", error);
+		return error;
+	}
+
+	*version = val[0];
+	return 0;
+}
+
+static int elan_i2c_get_sm_version(struct i2c_client *client, u8 *version)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_SM_VERSION_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get SM version: %d\n", error);
+		return error;
+	}
+
+	*version = val[0];
+	return 0;
+}
+
+static int elan_i2c_get_product_id(struct i2c_client *client, u8 *id)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_UNIQUEID_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get product ID: %d\n", error);
+		return error;
+	}
+
+	*id = val[0];
+	return 0;
+}
+
+static int elan_i2c_get_checksum(struct i2c_client *client,
+				 bool iap, u16 *csum)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client,
+				  iap ? ETP_I2C_IAP_CHECKSUM_CMD :
+					ETP_I2C_FW_CHECKSUM_CMD,
+				  val);
+	if (error) {
+		dev_err(&client->dev, "failed to get %s checksum: %d\n",
+			iap ? "IAP" : "FW", error);
+		return error;
+	}
+
+	*csum = le16_to_cpup((__le16 *)val);
+	return 0;
+}
+
+static int elan_i2c_get_max(struct i2c_client *client,
+			    unsigned int *max_x, unsigned int *max_y)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_MAX_X_AXIS_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get X dimension: %d\n", error);
+		return error;
+	}
+
+	*max_x = le16_to_cpup((__le16 *)val) & 0x0fff;
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_MAX_Y_AXIS_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get Y dimension: %d\n", error);
+		return error;
+	}
+
+	*max_y = le16_to_cpup((__le16 *)val) & 0x0fff;
+
+	return 0;
+}
+
+static int elan_i2c_get_resolution(struct i2c_client *client,
+				   u8 *hw_res_x, u8 *hw_res_y)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_RESOLUTION_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get resolution: %d\n", error);
+		return error;
+	}
+
+	*hw_res_x = val[0];
+	*hw_res_y = val[1];
+
+	return 0;
+}
+
+static int elan_i2c_get_num_traces(struct i2c_client *client,
+				   unsigned int *x_traces,
+				   unsigned int *y_traces)
+{
+	int error;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_XY_TRACENUM_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get trace info: %d\n", error);
+		return error;
+	}
+
+	*x_traces = val[0] - 1;
+	*y_traces = val[1] - 1;
+
+	return 0;
+}
+
+static int elan_i2c_iap_get_mode(struct i2c_client *client, enum tp_mode *mode)
+{
+	int error;
+	u16 constant;
+	u8 val[3];
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to read iap control register: %d\n",
+			error);
+		return error;
+	}
+
+	constant = le16_to_cpup((__le16 *)val);
+	dev_dbg(&client->dev, "iap control reg: 0x%04x.\n", constant);
+
+	*mode = (constant & ETP_I2C_MAIN_MODE_ON) ? MAIN_MODE : IAP_MODE;
+
+	return 0;
+}
+
+static int elan_i2c_iap_reset(struct i2c_client *client)
+{
+	int error;
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_IAP_RESET_CMD,
+				   ETP_I2C_IAP_RESET);
+	if (error) {
+		dev_err(&client->dev, "cannot reset IC: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_set_flash_key(struct i2c_client *client)
+{
+	int error;
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_IAP_CMD,
+				   ETP_I2C_IAP_PASSWORD);
+	if (error) {
+		dev_err(&client->dev, "cannot set flash key: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_prepare_fw_update(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	int error;
+	enum tp_mode mode;
+	u8 val[3];
+	u16 password;
+
+	/* Get FW in which mode	(IAP_MODE/MAIN_MODE)  */
+	error = elan_i2c_iap_get_mode(client, &mode);
+	if (error)
+		return error;
+
+	if (mode == IAP_MODE) {
+		/* Reset IC */
+		error = elan_i2c_iap_reset(client);
+		if (error)
+			return error;
+
+		msleep(30);
+	}
+
+	/* Set flash key*/
+	error = elan_i2c_set_flash_key(client);
+	if (error)
+		return error;
+
+	/* Wait for F/W IAP initialization */
+	msleep(mode == MAIN_MODE ? 100 : 30);
+
+	/* Check if we are in IAP mode or not */
+	error = elan_i2c_iap_get_mode(client, &mode);
+	if (error)
+		return error;
+
+	if (mode == MAIN_MODE) {
+		dev_err(dev, "wrong mode: %d\n", mode);
+		return -EIO;
+	}
+
+	/* Set flash key again */
+	error = elan_i2c_set_flash_key(client);
+	if (error)
+		return error;
+
+	/* Wait for F/W IAP initialization */
+	msleep(30);
+
+	/* read back to check we actually enabled successfully. */
+	error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CMD, val);
+	if (error) {
+		dev_err(dev, "cannot read iap password: %d\n",
+			error);
+		return error;
+	}
+
+	password = le16_to_cpup((__le16 *)val);
+	if (password != ETP_I2C_IAP_PASSWORD) {
+		dev_err(dev, "wrong iap password: 0x%X\n", password);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_write_fw_block(struct i2c_client *client,
+				   const u8 *page, u16 checksum, int idx)
+{
+	struct device *dev = &client->dev;
+	u8 page_store[ETP_FW_PAGE_SIZE + 4];
+	u8 val[3];
+	u16 result;
+	int ret, error;
+
+	page_store[0] = ETP_I2C_IAP_REG_L;
+	page_store[1] = ETP_I2C_IAP_REG_H;
+	memcpy(&page_store[2], page, ETP_FW_PAGE_SIZE);
+	/* recode checksum at last two bytes */
+	put_unaligned_le16(checksum, &page_store[ETP_FW_PAGE_SIZE + 2]);
+
+	ret = i2c_master_send(client, page_store, sizeof(page_store));
+	if (ret != sizeof(page_store)) {
+		error = ret < 0 ? ret : -EIO;
+		dev_err(dev, "Failed to write page %d: %d\n", idx, error);
+		return error;
+	}
+
+	/* Wait for F/W to update one page ROM data. */
+	msleep(20);
+
+	error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val);
+	if (error) {
+		dev_err(dev, "Failed to read IAP write result: %d\n", error);
+		return error;
+	}
+
+	result = le16_to_cpup((__le16 *)val);
+	if (result & (ETP_FW_IAP_PAGE_ERR | ETP_FW_IAP_INTF_ERR)) {
+		dev_err(dev, "IAP reports failed write: %04hx\n",
+			result);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_finish_fw_update(struct i2c_client *client,
+				     struct completion *completion)
+{
+	struct device *dev = &client->dev;
+	long ret;
+	int error;
+	int len;
+	u8 buffer[ETP_I2C_INF_LENGTH];
+
+	reinit_completion(completion);
+	enable_irq(client->irq);
+
+	error = elan_i2c_write_cmd(client, ETP_I2C_STAND_CMD, ETP_I2C_RESET);
+	if (!error)
+		ret = wait_for_completion_interruptible_timeout(completion,
+							msecs_to_jiffies(300));
+	disable_irq(client->irq);
+
+	if (error) {
+		dev_err(dev, "device reset failed: %d\n", error);
+		return error;
+	} else if (ret == 0) {
+		dev_err(dev, "timeout waiting for device reset\n");
+		return -ETIMEDOUT;
+	} else if (ret < 0) {
+		error = ret;
+		dev_err(dev, "error waiting for device reset: %d\n", error);
+		return error;
+	}
+
+	len = i2c_master_recv(client, buffer, ETP_I2C_INF_LENGTH);
+	if (len != ETP_I2C_INF_LENGTH) {
+		error = len < 0 ? len : -EIO;
+		dev_err(dev, "failed to read INT signal: %d (%d)\n",
+			error, len);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_i2c_get_report(struct i2c_client *client, u8 *report)
+{
+	int len;
+
+	len = i2c_master_recv(client, report, ETP_I2C_REPORT_LEN);
+	if (len < 0) {
+		dev_err(&client->dev, "failed to read report data: %d\n", len);
+		return len;
+	}
+
+	if (len != ETP_I2C_REPORT_LEN) {
+		dev_err(&client->dev,
+			"wrong report length (%d vs %d expected)\n",
+			len, ETP_I2C_REPORT_LEN);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+const struct elan_transport_ops elan_i2c_ops = {
+	.initialize		= elan_i2c_initialize,
+	.sleep_control		= elan_i2c_sleep_control,
+	.power_control		= elan_i2c_power_control,
+	.set_mode		= elan_i2c_set_mode,
+
+	.calibrate		= elan_i2c_calibrate,
+	.calibrate_result	= elan_i2c_calibrate_result,
+
+	.get_baseline_data	= elan_i2c_get_baseline_data,
+
+	.get_version		= elan_i2c_get_version,
+	.get_sm_version		= elan_i2c_get_sm_version,
+	.get_product_id		= elan_i2c_get_product_id,
+	.get_checksum		= elan_i2c_get_checksum,
+
+	.get_max		= elan_i2c_get_max,
+	.get_resolution		= elan_i2c_get_resolution,
+	.get_num_traces		= elan_i2c_get_num_traces,
+
+	.iap_get_mode		= elan_i2c_iap_get_mode,
+	.iap_reset		= elan_i2c_iap_reset,
+
+	.prepare_fw_update	= elan_i2c_prepare_fw_update,
+	.write_fw_block		= elan_i2c_write_fw_block,
+	.finish_fw_update	= elan_i2c_finish_fw_update,
+
+	.get_report		= elan_i2c_get_report,
+};
diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c
new file mode 100644
index 0000000..359bf85
--- /dev/null
+++ b/drivers/input/mouse/elan_i2c_smbus.c
@@ -0,0 +1,514 @@
+/*
+ * Elan I2C/SMBus Touchpad driver - SMBus interface
+ *
+ * Copyright (c) 2013 ELAN Microelectronics Corp.
+ *
+ * Author: 林政維 (Duson Lin) <dusonlin@emc.com.tw>
+ * Version: 1.5.5
+ *
+ * Based on cyapa driver:
+ * copyright (c) 2011-2012 Cypress Semiconductor, Inc.
+ * copyright (c) 2011-2012 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Trademarks are the property of their respective owners.
+ */
+
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include "elan_i2c.h"
+
+/* Elan SMbus commands */
+#define ETP_SMBUS_IAP_CMD		0x00
+#define ETP_SMBUS_ENABLE_TP		0x20
+#define ETP_SMBUS_SLEEP_CMD		0x21
+#define ETP_SMBUS_IAP_PASSWORD_WRITE	0x29
+#define ETP_SMBUS_IAP_PASSWORD_READ	0x80
+#define ETP_SMBUS_WRITE_FW_BLOCK	0x2A
+#define ETP_SMBUS_IAP_RESET_CMD		0x2B
+#define ETP_SMBUS_RANGE_CMD		0xA0
+#define ETP_SMBUS_FW_VERSION_CMD	0xA1
+#define ETP_SMBUS_XY_TRACENUM_CMD	0xA2
+#define ETP_SMBUS_SM_VERSION_CMD	0xA3
+#define ETP_SMBUS_UNIQUEID_CMD		0xA3
+#define ETP_SMBUS_RESOLUTION_CMD	0xA4
+#define ETP_SMBUS_HELLOPACKET_CMD	0xA7
+#define ETP_SMBUS_PACKET_QUERY		0xA8
+#define ETP_SMBUS_IAP_VERSION_CMD	0xAC
+#define ETP_SMBUS_IAP_CTRL_CMD		0xAD
+#define ETP_SMBUS_IAP_CHECKSUM_CMD	0xAE
+#define ETP_SMBUS_FW_CHECKSUM_CMD	0xAF
+#define ETP_SMBUS_MAX_BASELINE_CMD	0xC3
+#define ETP_SMBUS_MIN_BASELINE_CMD	0xC4
+#define ETP_SMBUS_CALIBRATE_QUERY	0xC5
+
+#define ETP_SMBUS_REPORT_LEN		32
+#define ETP_SMBUS_REPORT_OFFSET		2
+#define ETP_SMBUS_HELLOPACKET_LEN	5
+#define ETP_SMBUS_IAP_PASSWORD		0x1234
+#define ETP_SMBUS_IAP_MODE_ON		(1 << 6)
+
+static int elan_smbus_initialize(struct i2c_client *client)
+{
+	u8 check[ETP_SMBUS_HELLOPACKET_LEN] = { 0x55, 0x55, 0x55, 0x55, 0x55 };
+	u8 values[ETP_SMBUS_HELLOPACKET_LEN] = { 0, 0, 0, 0, 0 };
+	int len, error;
+
+	/* Get hello packet */
+	len = i2c_smbus_read_block_data(client,
+					ETP_SMBUS_HELLOPACKET_CMD, values);
+	if (len != ETP_SMBUS_HELLOPACKET_LEN) {
+		dev_err(&client->dev, "hello packet length fail: %d\n", len);
+		error = len < 0 ? len : -EIO;
+		return error;
+	}
+
+	/* compare hello packet */
+	if (memcmp(values, check, ETP_SMBUS_HELLOPACKET_LEN)) {
+		dev_err(&client->dev, "hello packet fail [%*px]\n",
+			ETP_SMBUS_HELLOPACKET_LEN, values);
+		return -ENXIO;
+	}
+
+	/* enable tp */
+	error = i2c_smbus_write_byte(client, ETP_SMBUS_ENABLE_TP);
+	if (error) {
+		dev_err(&client->dev, "failed to enable touchpad: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_set_mode(struct i2c_client *client, u8 mode)
+{
+	u8 cmd[4] = { 0x00, 0x07, 0x00, mode };
+
+	return i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD,
+					  sizeof(cmd), cmd);
+}
+
+static int elan_smbus_sleep_control(struct i2c_client *client, bool sleep)
+{
+	if (sleep)
+		return i2c_smbus_write_byte(client, ETP_SMBUS_SLEEP_CMD);
+	else
+		return 0; /* XXX should we send ETP_SMBUS_ENABLE_TP here? */
+}
+
+static int elan_smbus_power_control(struct i2c_client *client, bool enable)
+{
+	return 0; /* A no-op */
+}
+
+static int elan_smbus_calibrate(struct i2c_client *client)
+{
+	u8 cmd[4] = { 0x00, 0x08, 0x00, 0x01 };
+
+	return i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD,
+					  sizeof(cmd), cmd);
+}
+
+static int elan_smbus_calibrate_result(struct i2c_client *client, u8 *val)
+{
+	int error;
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_CALIBRATE_QUERY, val);
+	if (error < 0)
+		return error;
+
+	return 0;
+}
+
+static int elan_smbus_get_baseline_data(struct i2c_client *client,
+					bool max_baseline, u8 *value)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  max_baseline ?
+						ETP_SMBUS_MAX_BASELINE_CMD :
+						ETP_SMBUS_MIN_BASELINE_CMD,
+					  val);
+	if (error < 0)
+		return error;
+
+	*value = be16_to_cpup((__be16 *)val);
+
+	return 0;
+}
+
+static int elan_smbus_get_version(struct i2c_client *client,
+				  bool iap, u8 *version)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  iap ? ETP_SMBUS_IAP_VERSION_CMD :
+						ETP_SMBUS_FW_VERSION_CMD,
+					  val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to get %s version: %d\n",
+			iap ? "IAP" : "FW", error);
+		return error;
+	}
+
+	*version = val[2];
+	return 0;
+}
+
+static int elan_smbus_get_sm_version(struct i2c_client *client, u8 *version)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_SM_VERSION_CMD, val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to get SM version: %d\n", error);
+		return error;
+	}
+
+	*version = val[0]; /* XXX Why 0 and not 2 as in IAP/FW versions? */
+	return 0;
+}
+
+static int elan_smbus_get_product_id(struct i2c_client *client, u8 *id)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_UNIQUEID_CMD, val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to get product ID: %d\n", error);
+		return error;
+	}
+
+	*id = val[1];
+	return 0;
+}
+
+static int elan_smbus_get_checksum(struct i2c_client *client,
+				   bool iap, u16 *csum)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  iap ? ETP_SMBUS_FW_CHECKSUM_CMD :
+						ETP_SMBUS_IAP_CHECKSUM_CMD,
+					  val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to get %s checksum: %d\n",
+			iap ? "IAP" : "FW", error);
+		return error;
+	}
+
+	*csum = be16_to_cpup((__be16 *)val);
+	return 0;
+}
+
+static int elan_smbus_get_max(struct i2c_client *client,
+			      unsigned int *max_x, unsigned int *max_y)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client, ETP_SMBUS_RANGE_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get dimensions: %d\n", error);
+		return error;
+	}
+
+	*max_x = (0x0f & val[0]) << 8 | val[1];
+	*max_y = (0xf0 & val[0]) << 4 | val[2];
+
+	return 0;
+}
+
+static int elan_smbus_get_resolution(struct i2c_client *client,
+				     u8 *hw_res_x, u8 *hw_res_y)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_RESOLUTION_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get resolution: %d\n", error);
+		return error;
+	}
+
+	*hw_res_x = val[1] & 0x0F;
+	*hw_res_y = (val[1] & 0xF0) >> 4;
+
+	return 0;
+}
+
+static int elan_smbus_get_num_traces(struct i2c_client *client,
+				     unsigned int *x_traces,
+				     unsigned int *y_traces)
+{
+	int error;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_XY_TRACENUM_CMD, val);
+	if (error) {
+		dev_err(&client->dev, "failed to get trace info: %d\n", error);
+		return error;
+	}
+
+	*x_traces = val[1] - 1;
+	*y_traces = val[2] - 1;
+
+	return 0;
+}
+
+static int elan_smbus_iap_get_mode(struct i2c_client *client,
+				   enum tp_mode *mode)
+{
+	int error;
+	u16 constant;
+	u8 val[3];
+
+	error = i2c_smbus_read_block_data(client, ETP_SMBUS_IAP_CTRL_CMD, val);
+	if (error < 0) {
+		dev_err(&client->dev, "failed to read iap ctrol register: %d\n",
+			error);
+		return error;
+	}
+
+	constant = be16_to_cpup((__be16 *)val);
+	dev_dbg(&client->dev, "iap control reg: 0x%04x.\n", constant);
+
+	*mode = (constant & ETP_SMBUS_IAP_MODE_ON) ? IAP_MODE : MAIN_MODE;
+
+	return 0;
+}
+
+static int elan_smbus_iap_reset(struct i2c_client *client)
+{
+	int error;
+
+	error = i2c_smbus_write_byte(client, ETP_SMBUS_IAP_RESET_CMD);
+	if (error) {
+		dev_err(&client->dev, "cannot reset IC: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_set_flash_key(struct i2c_client *client)
+{
+	int error;
+	u8 cmd[4] = { 0x00, 0x0B, 0x00, 0x5A };
+
+	error = i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD,
+					   sizeof(cmd), cmd);
+	if (error) {
+		dev_err(&client->dev, "cannot set flash key: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_prepare_fw_update(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	int len;
+	int error;
+	enum tp_mode mode;
+	u8 val[3];
+	u8 cmd[4] = {0x0F, 0x78, 0x00, 0x06};
+	u16 password;
+
+	/* Get FW in which mode	(IAP_MODE/MAIN_MODE)  */
+	error = elan_smbus_iap_get_mode(client, &mode);
+	if (error)
+		return error;
+
+	if (mode == MAIN_MODE) {
+
+		/* set flash key */
+		error = elan_smbus_set_flash_key(client);
+		if (error)
+			return error;
+
+		/* write iap password */
+		if (i2c_smbus_write_byte(client,
+					 ETP_SMBUS_IAP_PASSWORD_WRITE) < 0) {
+			dev_err(dev, "cannot write iap password\n");
+			return -EIO;
+		}
+
+		error = i2c_smbus_write_block_data(client, ETP_SMBUS_IAP_CMD,
+						   sizeof(cmd), cmd);
+		if (error) {
+			dev_err(dev, "failed to write iap password: %d\n",
+				error);
+			return error;
+		}
+
+		/*
+		 * Read back password to make sure we enabled flash
+		 * successfully.
+		 */
+		len = i2c_smbus_read_block_data(client,
+						ETP_SMBUS_IAP_PASSWORD_READ,
+						val);
+		if (len < sizeof(u16)) {
+			error = len < 0 ? len : -EIO;
+			dev_err(dev, "failed to read iap password: %d\n",
+				error);
+			return error;
+		}
+
+		password = be16_to_cpup((__be16 *)val);
+		if (password != ETP_SMBUS_IAP_PASSWORD) {
+			dev_err(dev, "wrong iap password = 0x%X\n", password);
+			return -EIO;
+		}
+
+		/* Wait 30ms for MAIN_MODE change to IAP_MODE */
+		msleep(30);
+	}
+
+	error = elan_smbus_set_flash_key(client);
+	if (error)
+		return error;
+
+	/* Reset IC */
+	error = elan_smbus_iap_reset(client);
+	if (error)
+		return error;
+
+	return 0;
+}
+
+
+static int elan_smbus_write_fw_block(struct i2c_client *client,
+				     const u8 *page, u16 checksum, int idx)
+{
+	struct device *dev = &client->dev;
+	int error;
+	u16 result;
+	u8 val[3];
+
+	/*
+	 * Due to the limitation of smbus protocol limiting
+	 * transfer to 32 bytes at a time, we must split block
+	 * in 2 transfers.
+	 */
+	error = i2c_smbus_write_block_data(client,
+					   ETP_SMBUS_WRITE_FW_BLOCK,
+					   ETP_FW_PAGE_SIZE / 2,
+					   page);
+	if (error) {
+		dev_err(dev, "Failed to write page %d (part %d): %d\n",
+			idx, 1, error);
+		return error;
+	}
+
+	error = i2c_smbus_write_block_data(client,
+					   ETP_SMBUS_WRITE_FW_BLOCK,
+					   ETP_FW_PAGE_SIZE / 2,
+					   page + ETP_FW_PAGE_SIZE / 2);
+	if (error) {
+		dev_err(dev, "Failed to write page %d (part %d): %d\n",
+			idx, 2, error);
+		return error;
+	}
+
+
+	/* Wait for F/W to update one page ROM data. */
+	usleep_range(8000, 10000);
+
+	error = i2c_smbus_read_block_data(client,
+					  ETP_SMBUS_IAP_CTRL_CMD, val);
+	if (error < 0) {
+		dev_err(dev, "Failed to read IAP write result: %d\n",
+			error);
+		return error;
+	}
+
+	result = be16_to_cpup((__be16 *)val);
+	if (result & (ETP_FW_IAP_PAGE_ERR | ETP_FW_IAP_INTF_ERR)) {
+		dev_err(dev, "IAP reports failed write: %04hx\n",
+			result);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_get_report(struct i2c_client *client, u8 *report)
+{
+	int len;
+
+	len = i2c_smbus_read_block_data(client,
+					ETP_SMBUS_PACKET_QUERY,
+					&report[ETP_SMBUS_REPORT_OFFSET]);
+	if (len < 0) {
+		dev_err(&client->dev, "failed to read report data: %d\n", len);
+		return len;
+	}
+
+	if (len != ETP_SMBUS_REPORT_LEN) {
+		dev_err(&client->dev,
+			"wrong report length (%d vs %d expected)\n",
+			len, ETP_SMBUS_REPORT_LEN);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int elan_smbus_finish_fw_update(struct i2c_client *client,
+				       struct completion *fw_completion)
+{
+	/* No special handling unlike I2C transport */
+	return 0;
+}
+
+const struct elan_transport_ops elan_smbus_ops = {
+	.initialize		= elan_smbus_initialize,
+	.sleep_control		= elan_smbus_sleep_control,
+	.power_control		= elan_smbus_power_control,
+	.set_mode		= elan_smbus_set_mode,
+
+	.calibrate		= elan_smbus_calibrate,
+	.calibrate_result	= elan_smbus_calibrate_result,
+
+	.get_baseline_data	= elan_smbus_get_baseline_data,
+
+	.get_version		= elan_smbus_get_version,
+	.get_sm_version		= elan_smbus_get_sm_version,
+	.get_product_id		= elan_smbus_get_product_id,
+	.get_checksum		= elan_smbus_get_checksum,
+
+	.get_max		= elan_smbus_get_max,
+	.get_resolution		= elan_smbus_get_resolution,
+	.get_num_traces		= elan_smbus_get_num_traces,
+
+	.iap_get_mode		= elan_smbus_iap_get_mode,
+	.iap_reset		= elan_smbus_iap_reset,
+
+	.prepare_fw_update	= elan_smbus_prepare_fw_update,
+	.write_fw_block		= elan_smbus_write_fw_block,
+	.finish_fw_update	= elan_smbus_finish_fw_update,
+
+	.get_report		= elan_smbus_get_report,
+};
diff --git a/drivers/input/mouse/lifebook.h b/drivers/input/mouse/lifebook.h
index 4c4326c..0baf02a 100644
--- a/drivers/input/mouse/lifebook.h
+++ b/drivers/input/mouse/lifebook.h
@@ -16,14 +16,14 @@ void lifebook_module_init(void);
 int lifebook_detect(struct psmouse *psmouse, bool set_properties);
 int lifebook_init(struct psmouse *psmouse);
 #else
-inline void lifebook_module_init(void)
+static inline void lifebook_module_init(void)
 {
 }
-inline int lifebook_detect(struct psmouse *psmouse, bool set_properties)
+static inline int lifebook_detect(struct psmouse *psmouse, bool set_properties)
 {
 	return -ENOSYS;
 }
-inline int lifebook_init(struct psmouse *psmouse)
+static inline int lifebook_init(struct psmouse *psmouse)
 {
 	return -ENOSYS;
 }
diff --git a/drivers/input/mouse/navpoint.c b/drivers/input/mouse/navpoint.c
index 2a0360f..d6e8f58 100644
--- a/drivers/input/mouse/navpoint.c
+++ b/drivers/input/mouse/navpoint.c
@@ -318,8 +318,7 @@ static int navpoint_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int navpoint_suspend(struct device *dev)
+static int __maybe_unused navpoint_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct navpoint *navpoint = platform_get_drvdata(pdev);
@@ -333,7 +332,7 @@ static int navpoint_suspend(struct device *dev)
 	return 0;
 }
 
-static int navpoint_resume(struct device *dev)
+static int __maybe_unused navpoint_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct navpoint *navpoint = platform_get_drvdata(pdev);
@@ -346,7 +345,6 @@ static int navpoint_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(navpoint_pm_ops, navpoint_suspend, navpoint_resume);
 
diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index ad82260..878f184 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -614,8 +614,7 @@ static int synaptics_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int synaptics_i2c_suspend(struct device *dev)
+static int __maybe_unused synaptics_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct synaptics_i2c *touch = i2c_get_clientdata(client);
@@ -628,7 +627,7 @@ static int synaptics_i2c_suspend(struct device *dev)
 	return 0;
 }
 
-static int synaptics_i2c_resume(struct device *dev)
+static int __maybe_unused synaptics_i2c_resume(struct device *dev)
 {
 	int ret;
 	struct i2c_client *client = to_i2c_client(dev);
@@ -643,7 +642,6 @@ static int synaptics_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(synaptics_i2c_pm, synaptics_i2c_suspend,
 			 synaptics_i2c_resume);
diff --git a/drivers/input/serio/altera_ps2.c b/drivers/input/serio/altera_ps2.c
index 8921c96..131d782 100644
--- a/drivers/input/serio/altera_ps2.c
+++ b/drivers/input/serio/altera_ps2.c
@@ -24,9 +24,7 @@
 
 struct ps2if {
 	struct serio *io;
-	struct resource *iomem_res;
 	void __iomem *base;
-	unsigned irq;
 };
 
 /*
@@ -83,16 +81,34 @@ static void altera_ps2_close(struct serio *io)
 static int altera_ps2_probe(struct platform_device *pdev)
 {
 	struct ps2if *ps2if;
+	struct resource *res;
 	struct serio *serio;
 	int error, irq;
 
-	ps2if = kzalloc(sizeof(struct ps2if), GFP_KERNEL);
-	serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
-	if (!ps2if || !serio) {
-		error = -ENOMEM;
-		goto err_free_mem;
+	ps2if = devm_kzalloc(&pdev->dev, sizeof(struct ps2if), GFP_KERNEL);
+	if (!ps2if)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ps2if->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ps2if->base))
+		return PTR_ERR(ps2if->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -ENXIO;
+
+	error = devm_request_irq(&pdev->dev, irq, altera_ps2_rxint, 0,
+				 pdev->name, ps2if);
+	if (error) {
+		dev_err(&pdev->dev, "could not request IRQ %d\n", irq);
+		return error;
 	}
 
+	serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
+	if (!serio)
+		return -ENOMEM;
+
 	serio->id.type		= SERIO_8042;
 	serio->write		= altera_ps2_write;
 	serio->open		= altera_ps2_open;
@@ -103,56 +119,12 @@ static int altera_ps2_probe(struct platform_device *pdev)
 	serio->dev.parent	= &pdev->dev;
 	ps2if->io		= serio;
 
-	ps2if->iomem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (ps2if->iomem_res == NULL) {
-		error = -ENOENT;
-		goto err_free_mem;
-	}
-
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		error = -ENXIO;
-		goto err_free_mem;
-	}
-	ps2if->irq = irq;
-
-	if (!request_mem_region(ps2if->iomem_res->start,
-				resource_size(ps2if->iomem_res), pdev->name)) {
-		error = -EBUSY;
-		goto err_free_mem;
-	}
-
-	ps2if->base = ioremap(ps2if->iomem_res->start,
-			      resource_size(ps2if->iomem_res));
-	if (!ps2if->base) {
-		error = -ENOMEM;
-		goto err_free_res;
-	}
-
-	error = request_irq(ps2if->irq, altera_ps2_rxint, 0, pdev->name, ps2if);
-	if (error) {
-		dev_err(&pdev->dev, "could not allocate IRQ %d: %d\n",
-			ps2if->irq, error);
-		goto err_unmap;
-	}
-
-	dev_info(&pdev->dev, "base %p, irq %d\n", ps2if->base, ps2if->irq);
+	dev_info(&pdev->dev, "base %p, irq %d\n", ps2if->base, irq);
 
 	serio_register_port(ps2if->io);
 	platform_set_drvdata(pdev, ps2if);
 
 	return 0;
-
- err_unmap:
-	iounmap(ps2if->base);
- err_free_res:
-	release_mem_region(ps2if->iomem_res->start,
-			   resource_size(ps2if->iomem_res));
- err_free_mem:
-	kfree(ps2if);
-	kfree(serio);
-	return error;
 }
 
 /*
@@ -163,11 +135,6 @@ static int altera_ps2_remove(struct platform_device *pdev)
 	struct ps2if *ps2if = platform_get_drvdata(pdev);
 
 	serio_unregister_port(ps2if->io);
-	free_irq(ps2if->irq, ps2if);
-	iounmap(ps2if->base);
-	release_mem_region(ps2if->iomem_res->start,
-			   resource_size(ps2if->iomem_res));
-	kfree(ps2if);
 
 	return 0;
 }
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index faeeb13..c66d1b5 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -579,6 +579,16 @@ static const struct dmi_system_id __initconst i8042_dmi_nopnp_table[] = {
 		},
 	},
 	{
+		/*
+		 * Intel NUC D54250WYK - does not have i8042 controller but
+		 * declares PS/2 devices in DSDT.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "D54250WYK"),
+			DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
+		},
+	},
+	{
 		/* MSI Wind U-100 */
 		.matches = {
 			DMI_MATCH(DMI_BOARD_NAME, "U-100"),
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index d399b8b..a05a517 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -514,7 +514,7 @@ static void serio_release_port(struct device *dev)
  */
 static void serio_init_port(struct serio *serio)
 {
-	static atomic_t serio_no = ATOMIC_INIT(0);
+	static atomic_t serio_no = ATOMIC_INIT(-1);
 
 	__module_get(THIS_MODULE);
 
@@ -525,7 +525,7 @@ static void serio_init_port(struct serio *serio)
 	mutex_init(&serio->drv_mutex);
 	device_initialize(&serio->dev);
 	dev_set_name(&serio->dev, "serio%lu",
-		     (unsigned long)atomic_inc_return(&serio_no) - 1);
+		     (unsigned long)atomic_inc_return(&serio_no));
 	serio->dev.bus = &serio_bus;
 	serio->dev.release = serio_release_port;
 	serio->dev.groups = serio_device_attr_groups;
diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c
index c9a02fe..71ef5d6 100644
--- a/drivers/input/serio/serio_raw.c
+++ b/drivers/input/serio/serio_raw.c
@@ -292,7 +292,7 @@ static irqreturn_t serio_raw_interrupt(struct serio *serio, unsigned char data,
 
 static int serio_raw_connect(struct serio *serio, struct serio_driver *drv)
 {
-	static atomic_t serio_raw_no = ATOMIC_INIT(0);
+	static atomic_t serio_raw_no = ATOMIC_INIT(-1);
 	struct serio_raw *serio_raw;
 	int err;
 
@@ -303,7 +303,7 @@ static int serio_raw_connect(struct serio *serio, struct serio_driver *drv)
 	}
 
 	snprintf(serio_raw->name, sizeof(serio_raw->name),
-		 "serio_raw%ld", (long)atomic_inc_return(&serio_raw_no) - 1);
+		 "serio_raw%ld", (long)atomic_inc_return(&serio_raw_no));
 	kref_init(&serio_raw->kref);
 	INIT_LIST_HEAD(&serio_raw->client_list);
 	init_waitqueue_head(&serio_raw->wait);
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index e1d8003..5891752 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -295,6 +295,19 @@ config TOUCHSCREEN_FUJITSU
 	  To compile this driver as a module, choose M here: the
 	  module will be called fujitsu-ts.
 
+config TOUCHSCREEN_GOODIX
+	tristate "Goodix I2C touchscreen"
+	depends on I2C && ACPI
+	help
+	  Say Y here if you have the Goodix touchscreen (such as one
+	  installed in Onda v975w tablets) connected to your
+	  system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called goodix.
+
 config TOUCHSCREEN_ILI210X
 	tristate "Ilitek ILI210X based touchscreen"
 	depends on I2C
@@ -334,6 +347,18 @@ config TOUCHSCREEN_GUNZE
 	  To compile this driver as a module, choose M here: the
 	  module will be called gunze.
 
+config TOUCHSCREEN_ELAN
+	tristate "Elan eKTH I2C touchscreen"
+	depends on I2C
+	help
+	  Say Y here if you have an Elan eKTH I2C touchscreen
+	  connected to your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called elants_i2c.
+
 config TOUCHSCREEN_ELO
 	tristate "Elo serial touchscreens"
 	select SERIO
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 090e61c..0242fea 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -31,9 +31,11 @@ obj-$(CONFIG_TOUCHSCREEN_EDT_FT5X06)	+= edt-ft5x06.o
 obj-$(CONFIG_TOUCHSCREEN_HAMPSHIRE)	+= hampshire.o
 obj-$(CONFIG_TOUCHSCREEN_GUNZE)		+= gunze.o
 obj-$(CONFIG_TOUCHSCREEN_EETI)		+= eeti_ts.o
+obj-$(CONFIG_TOUCHSCREEN_ELAN)		+= elants_i2c.o
 obj-$(CONFIG_TOUCHSCREEN_ELO)		+= elo.o
 obj-$(CONFIG_TOUCHSCREEN_EGALAX)	+= egalax_ts.o
 obj-$(CONFIG_TOUCHSCREEN_FUJITSU)	+= fujitsu_ts.o
+obj-$(CONFIG_TOUCHSCREEN_GOODIX)	+= goodix.o
 obj-$(CONFIG_TOUCHSCREEN_ILI210X)	+= ili210x.o
 obj-$(CONFIG_TOUCHSCREEN_INEXIO)	+= inexio.o
 obj-$(CONFIG_TOUCHSCREEN_INTEL_MID)	+= intel-mid-touch.o
diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index 523865d..da4e5bb 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c
@@ -820,8 +820,7 @@ static int ad7877_remove(struct spi_device *spi)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ad7877_suspend(struct device *dev)
+static int __maybe_unused ad7877_suspend(struct device *dev)
 {
 	struct ad7877 *ts = dev_get_drvdata(dev);
 
@@ -830,7 +829,7 @@ static int ad7877_suspend(struct device *dev)
 	return 0;
 }
 
-static int ad7877_resume(struct device *dev)
+static int __maybe_unused ad7877_resume(struct device *dev)
 {
 	struct ad7877 *ts = dev_get_drvdata(dev);
 
@@ -838,7 +837,6 @@ static int ad7877_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ad7877_pm, ad7877_suspend, ad7877_resume);
 
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index 1eb9d3c..fec66ad 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -284,8 +284,7 @@ static void ad7879_close(struct input_dev* input)
 		__ad7879_disable(ts);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ad7879_suspend(struct device *dev)
+static int __maybe_unused ad7879_suspend(struct device *dev)
 {
 	struct ad7879 *ts = dev_get_drvdata(dev);
 
@@ -301,7 +300,7 @@ static int ad7879_suspend(struct device *dev)
 	return 0;
 }
 
-static int ad7879_resume(struct device *dev)
+static int __maybe_unused ad7879_resume(struct device *dev)
 {
 	struct ad7879 *ts = dev_get_drvdata(dev);
 
@@ -316,7 +315,6 @@ static int ad7879_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 SIMPLE_DEV_PM_OPS(ad7879_pm_ops, ad7879_suspend, ad7879_resume);
 EXPORT_SYMBOL(ad7879_pm_ops);
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index e57ba52..e4eb8a6 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -883,8 +883,7 @@ static irqreturn_t ads7846_irq(int irq, void *handle)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ads7846_suspend(struct device *dev)
+static int __maybe_unused ads7846_suspend(struct device *dev)
 {
 	struct ads7846 *ts = dev_get_drvdata(dev);
 
@@ -906,7 +905,7 @@ static int ads7846_suspend(struct device *dev)
 	return 0;
 }
 
-static int ads7846_resume(struct device *dev)
+static int __maybe_unused ads7846_resume(struct device *dev)
 {
 	struct ads7846 *ts = dev_get_drvdata(dev);
 
@@ -927,7 +926,6 @@ static int ads7846_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ads7846_pm, ads7846_suspend, ads7846_resume);
 
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index aaacf8b..bb07020 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -2244,8 +2244,7 @@ static int mxt_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int mxt_suspend(struct device *dev)
+static int __maybe_unused mxt_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mxt_data *data = i2c_get_clientdata(client);
@@ -2261,7 +2260,7 @@ static int mxt_suspend(struct device *dev)
 	return 0;
 }
 
-static int mxt_resume(struct device *dev)
+static int __maybe_unused mxt_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mxt_data *data = i2c_get_clientdata(client);
@@ -2276,7 +2275,6 @@ static int mxt_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume);
 
diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c
index 7f3c947..40e02dd 100644
--- a/drivers/input/touchscreen/auo-pixcir-ts.c
+++ b/drivers/input/touchscreen/auo-pixcir-ts.c
@@ -417,8 +417,7 @@ static void auo_pixcir_input_close(struct input_dev *dev)
 	return;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int auo_pixcir_suspend(struct device *dev)
+static int __maybe_unused auo_pixcir_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct auo_pixcir_ts *ts = i2c_get_clientdata(client);
@@ -450,7 +449,7 @@ unlock:
 	return ret;
 }
 
-static int auo_pixcir_resume(struct device *dev)
+static int __maybe_unused auo_pixcir_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct auo_pixcir_ts *ts = i2c_get_clientdata(client);
@@ -479,7 +478,6 @@ unlock:
 
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(auo_pixcir_pm_ops,
 			 auo_pixcir_suspend, auo_pixcir_resume);
diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c
index 5bf1aee..f2119ee 100644
--- a/drivers/input/touchscreen/cy8ctmg110_ts.c
+++ b/drivers/input/touchscreen/cy8ctmg110_ts.c
@@ -291,8 +291,7 @@ err_free_mem:
 	return err;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int cy8ctmg110_suspend(struct device *dev)
+static int __maybe_unused cy8ctmg110_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
@@ -306,7 +305,7 @@ static int cy8ctmg110_suspend(struct device *dev)
 	return 0;
 }
 
-static int cy8ctmg110_resume(struct device *dev)
+static int __maybe_unused cy8ctmg110_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct cy8ctmg110 *ts = i2c_get_clientdata(client);
@@ -319,7 +318,6 @@ static int cy8ctmg110_resume(struct device *dev)
 	}
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(cy8ctmg110_pm, cy8ctmg110_suspend, cy8ctmg110_resume);
 
diff --git a/drivers/input/touchscreen/cyttsp_core.c b/drivers/input/touchscreen/cyttsp_core.c
index eee656f..5b74e8b 100644
--- a/drivers/input/touchscreen/cyttsp_core.c
+++ b/drivers/input/touchscreen/cyttsp_core.c
@@ -472,8 +472,7 @@ static int cyttsp_disable(struct cyttsp *ts)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int cyttsp_suspend(struct device *dev)
+static int __maybe_unused cyttsp_suspend(struct device *dev)
 {
 	struct cyttsp *ts = dev_get_drvdata(dev);
 	int retval = 0;
@@ -491,7 +490,7 @@ static int cyttsp_suspend(struct device *dev)
 	return retval;
 }
 
-static int cyttsp_resume(struct device *dev)
+static int __maybe_unused cyttsp_resume(struct device *dev)
 {
 	struct cyttsp *ts = dev_get_drvdata(dev);
 
@@ -507,8 +506,6 @@ static int cyttsp_resume(struct device *dev)
 	return 0;
 }
 
-#endif
-
 SIMPLE_DEV_PM_OPS(cyttsp_pm_ops, cyttsp_suspend, cyttsp_resume);
 EXPORT_SYMBOL_GPL(cyttsp_pm_ops);
 
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index ee3434f..3793fcc 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -1092,8 +1092,7 @@ static int edt_ft5x06_ts_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int edt_ft5x06_ts_suspend(struct device *dev)
+static int __maybe_unused edt_ft5x06_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -1103,7 +1102,7 @@ static int edt_ft5x06_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int edt_ft5x06_ts_resume(struct device *dev)
+static int __maybe_unused edt_ft5x06_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -1112,7 +1111,6 @@ static int edt_ft5x06_ts_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(edt_ft5x06_ts_pm_ops,
 			 edt_ft5x06_ts_suspend, edt_ft5x06_ts_resume);
diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c
index b1884dd..09be6ce 100644
--- a/drivers/input/touchscreen/eeti_ts.c
+++ b/drivers/input/touchscreen/eeti_ts.c
@@ -264,8 +264,7 @@ static int eeti_ts_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int eeti_ts_suspend(struct device *dev)
+static int __maybe_unused eeti_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct eeti_ts_priv *priv = i2c_get_clientdata(client);
@@ -284,7 +283,7 @@ static int eeti_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int eeti_ts_resume(struct device *dev)
+static int __maybe_unused eeti_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct eeti_ts_priv *priv = i2c_get_clientdata(client);
@@ -302,7 +301,6 @@ static int eeti_ts_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(eeti_ts_pm, eeti_ts_suspend, eeti_ts_resume);
 
diff --git a/drivers/input/touchscreen/egalax_ts.c b/drivers/input/touchscreen/egalax_ts.c
index c805784..4c56299 100644
--- a/drivers/input/touchscreen/egalax_ts.c
+++ b/drivers/input/touchscreen/egalax_ts.c
@@ -239,8 +239,7 @@ static const struct i2c_device_id egalax_ts_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, egalax_ts_id);
 
-#ifdef CONFIG_PM_SLEEP
-static int egalax_ts_suspend(struct device *dev)
+static int __maybe_unused egalax_ts_suspend(struct device *dev)
 {
 	static const u8 suspend_cmd[MAX_I2C_DATA_LEN] = {
 		0x3, 0x6, 0xa, 0x3, 0x36, 0x3f, 0x2, 0, 0, 0
@@ -252,13 +251,12 @@ static int egalax_ts_suspend(struct device *dev)
 	return ret > 0 ? 0 : ret;
 }
 
-static int egalax_ts_resume(struct device *dev)
+static int __maybe_unused egalax_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
 	return egalax_wake_up_device(client);
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(egalax_ts_pm_ops, egalax_ts_suspend, egalax_ts_resume);
 
diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c
new file mode 100644
index 0000000..a510f7e
--- /dev/null
+++ b/drivers/input/touchscreen/elants_i2c.c
@@ -0,0 +1,1271 @@
+/*
+ * Elan Microelectronics touch panels with I2C interface
+ *
+ * Copyright (C) 2014 Elan Microelectronics Corporation.
+ * Scott Liu <scott.liu@emc.com.tw>
+ *
+ * This code is partly based on hid-multitouch.c:
+ *
+ *  Copyright (c) 2010-2012 Stephane Chatty <chatty@enac.fr>
+ *  Copyright (c) 2010-2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ *  Copyright (c) 2010-2012 Ecole Nationale de l'Aviation Civile, France
+ *
+ *
+ * This code is partly based on i2c-hid.c:
+ *
+ * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
+ * Copyright (c) 2012 Red Hat, Inc
+ */
+
+/*
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ */
+
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/async.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/buffer_head.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+#include <linux/firmware.h>
+#include <linux/version.h>
+#include <linux/input/mt.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+#include <asm/unaligned.h>
+
+/* Device, Driver information */
+#define DEVICE_NAME	"elants_i2c"
+#define DRV_VERSION	"1.0.9"
+
+/* Convert from rows or columns into resolution */
+#define ELAN_TS_RESOLUTION(n, m)   (((n) - 1) * (m))
+
+/* FW header data */
+#define HEADER_SIZE		4
+#define FW_HDR_TYPE		0
+#define FW_HDR_COUNT		1
+#define FW_HDR_LENGTH		2
+
+/* Buffer mode Queue Header information */
+#define QUEUE_HEADER_SINGLE	0x62
+#define QUEUE_HEADER_NORMAL	0X63
+#define QUEUE_HEADER_WAIT	0x64
+
+/* Command header definition */
+#define CMD_HEADER_WRITE	0x54
+#define CMD_HEADER_READ		0x53
+#define CMD_HEADER_6B_READ	0x5B
+#define CMD_HEADER_RESP		0x52
+#define CMD_HEADER_6B_RESP	0x9B
+#define CMD_HEADER_HELLO	0x55
+#define CMD_HEADER_REK		0x66
+
+/* FW position data */
+#define PACKET_SIZE		55
+#define MAX_CONTACT_NUM		10
+#define FW_POS_HEADER		0
+#define FW_POS_STATE		1
+#define FW_POS_TOTAL		2
+#define FW_POS_XY		3
+#define FW_POS_CHECKSUM		34
+#define FW_POS_WIDTH		35
+#define FW_POS_PRESSURE		45
+
+#define HEADER_REPORT_10_FINGER	0x62
+
+/* Header (4 bytes) plus 3 fill 10-finger packets */
+#define MAX_PACKET_SIZE		169
+
+#define BOOT_TIME_DELAY_MS	50
+
+/* FW read command, 0x53 0x?? 0x0, 0x01 */
+#define E_ELAN_INFO_FW_VER	0x00
+#define E_ELAN_INFO_BC_VER	0x10
+#define E_ELAN_INFO_TEST_VER	0xE0
+#define E_ELAN_INFO_FW_ID	0xF0
+#define E_INFO_OSR		0xD6
+#define E_INFO_PHY_SCAN		0xD7
+#define E_INFO_PHY_DRIVER	0xD8
+
+#define MAX_RETRIES		3
+#define MAX_FW_UPDATE_RETRIES	30
+
+#define ELAN_FW_PAGESIZE	132
+#define ELAN_FW_FILENAME	"elants_i2c.bin"
+
+/* calibration timeout definition */
+#define ELAN_CALI_TIMEOUT_MSEC	10000
+
+enum elants_state {
+	ELAN_STATE_NORMAL,
+	ELAN_WAIT_QUEUE_HEADER,
+	ELAN_WAIT_RECALIBRATION,
+};
+
+enum elants_iap_mode {
+	ELAN_IAP_OPERATIONAL,
+	ELAN_IAP_RECOVERY,
+};
+
+/* struct elants_data - represents state of Elan touchscreen device */
+struct elants_data {
+	struct i2c_client *client;
+	struct input_dev *input;
+
+	u16 fw_version;
+	u8 test_version;
+	u8 solution_version;
+	u8 bc_version;
+	u8 iap_version;
+	u16 hw_version;
+	unsigned int x_res;	/* resolution in units/mm */
+	unsigned int y_res;
+	unsigned int x_max;
+	unsigned int y_max;
+
+	enum elants_state state;
+	enum elants_iap_mode iap_mode;
+
+	/* Guards against concurrent access to the device via sysfs */
+	struct mutex sysfs_mutex;
+
+	u8 cmd_resp[HEADER_SIZE];
+	struct completion cmd_done;
+
+	u8 buf[MAX_PACKET_SIZE];
+
+	bool wake_irq_enabled;
+};
+
+static int elants_i2c_send(struct i2c_client *client,
+			   const void *data, size_t size)
+{
+	int ret;
+
+	ret = i2c_master_send(client, data, size);
+	if (ret == size)
+		return 0;
+
+	if (ret >= 0)
+		ret = -EIO;
+
+	dev_err(&client->dev, "%s failed (%*ph): %d\n",
+		__func__, (int)size, data, ret);
+
+	return ret;
+}
+
+static int elants_i2c_read(struct i2c_client *client, void *data, size_t size)
+{
+	int ret;
+
+	ret = i2c_master_recv(client, data, size);
+	if (ret == size)
+		return 0;
+
+	if (ret >= 0)
+		ret = -EIO;
+
+	dev_err(&client->dev, "%s failed: %d\n", __func__, ret);
+
+	return ret;
+}
+
+static int elants_i2c_execute_command(struct i2c_client *client,
+				      const u8 *cmd, size_t cmd_size,
+				      u8 *resp, size_t resp_size)
+{
+	struct i2c_msg msgs[2];
+	int ret;
+	u8 expected_response;
+
+	switch (cmd[0]) {
+	case CMD_HEADER_READ:
+		expected_response = CMD_HEADER_RESP;
+		break;
+
+	case CMD_HEADER_6B_READ:
+		expected_response = CMD_HEADER_6B_RESP;
+		break;
+
+	default:
+		dev_err(&client->dev, "%s: invalid command %*ph\n",
+			__func__, (int)cmd_size, cmd);
+		return -EINVAL;
+	}
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags & I2C_M_TEN;
+	msgs[0].len = cmd_size;
+	msgs[0].buf = (u8 *)cmd;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = client->flags & I2C_M_TEN;
+	msgs[1].flags |= I2C_M_RD;
+	msgs[1].len = resp_size;
+	msgs[1].buf = resp;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		return ret;
+
+	if (ret != ARRAY_SIZE(msgs) || resp[FW_HDR_TYPE] != expected_response)
+		return -EIO;
+
+	return 0;
+}
+
+static int elants_i2c_calibrate(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int ret, error;
+	static const u8 w_flashkey[] = { 0x54, 0xC0, 0xE1, 0x5A };
+	static const u8 rek[] = { 0x54, 0x29, 0x00, 0x01 };
+	static const u8 rek_resp[] = { CMD_HEADER_REK, 0x66, 0x66, 0x66 };
+
+	disable_irq(client->irq);
+
+	ts->state = ELAN_WAIT_RECALIBRATION;
+	reinit_completion(&ts->cmd_done);
+
+	elants_i2c_send(client, w_flashkey, sizeof(w_flashkey));
+	elants_i2c_send(client, rek, sizeof(rek));
+
+	enable_irq(client->irq);
+
+	ret = wait_for_completion_interruptible_timeout(&ts->cmd_done,
+				msecs_to_jiffies(ELAN_CALI_TIMEOUT_MSEC));
+
+	ts->state = ELAN_STATE_NORMAL;
+
+	if (ret <= 0) {
+		error = ret < 0 ? ret : -ETIMEDOUT;
+		dev_err(&client->dev,
+			"error while waiting for calibration to complete: %d\n",
+			error);
+		return error;
+	}
+
+	if (memcmp(rek_resp, ts->cmd_resp, sizeof(rek_resp))) {
+		dev_err(&client->dev,
+			"unexpected calibration response: %*ph\n",
+			(int)sizeof(ts->cmd_resp), ts->cmd_resp);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int elants_i2c_sw_reset(struct i2c_client *client)
+{
+	const u8 soft_rst_cmd[] = { 0x77, 0x77, 0x77, 0x77 };
+	int error;
+
+	error = elants_i2c_send(client, soft_rst_cmd,
+				sizeof(soft_rst_cmd));
+	if (error) {
+		dev_err(&client->dev, "software reset failed: %d\n", error);
+		return error;
+	}
+
+	/*
+	 * We should wait at least 10 msec (but no more than 40) before
+	 * sending fastboot or IAP command to the device.
+	 */
+	msleep(30);
+
+	return 0;
+}
+
+static u16 elants_i2c_parse_version(u8 *buf)
+{
+	return get_unaligned_be32(buf) >> 4;
+}
+
+static int elants_i2c_query_fw_id(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error, retry_cnt;
+	const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_FW_ID, 0x00, 0x01 };
+	u8 resp[HEADER_SIZE];
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_execute_command(client, cmd, sizeof(cmd),
+						   resp, sizeof(resp));
+		if (!error) {
+			ts->hw_version = elants_i2c_parse_version(resp);
+			if (ts->hw_version != 0xffff)
+				return 0;
+		}
+
+		dev_dbg(&client->dev, "read fw id error=%d, buf=%*phC\n",
+			error, (int)sizeof(resp), resp);
+	}
+
+	dev_err(&client->dev,
+		"Failed to read fw id or fw id is invalid\n");
+
+	return -EINVAL;
+}
+
+static int elants_i2c_query_fw_version(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error, retry_cnt;
+	const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_FW_VER, 0x00, 0x01 };
+	u8 resp[HEADER_SIZE];
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_execute_command(client, cmd, sizeof(cmd),
+						   resp, sizeof(resp));
+		if (!error) {
+			ts->fw_version = elants_i2c_parse_version(resp);
+			if (ts->fw_version != 0x0000 &&
+			    ts->fw_version != 0xffff)
+				return 0;
+		}
+
+		dev_dbg(&client->dev, "read fw version error=%d, buf=%*phC\n",
+			error, (int)sizeof(resp), resp);
+	}
+
+	dev_err(&client->dev,
+		"Failed to read fw version or fw version is invalid\n");
+
+	return -EINVAL;
+}
+
+static int elants_i2c_query_test_version(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error, retry_cnt;
+	u16 version;
+	const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_TEST_VER, 0x00, 0x01 };
+	u8 resp[HEADER_SIZE];
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_execute_command(client, cmd, sizeof(cmd),
+						   resp, sizeof(resp));
+		if (!error) {
+			version = elants_i2c_parse_version(resp);
+			ts->test_version = version >> 8;
+			ts->solution_version = version & 0xff;
+
+			return 0;
+		}
+
+		dev_dbg(&client->dev,
+			"read test version error rc=%d, buf=%*phC\n",
+			error, (int)sizeof(resp), resp);
+	}
+
+	dev_err(&client->dev, "Failed to read test version\n");
+
+	return -EINVAL;
+}
+
+static int elants_i2c_query_bc_version(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	const u8 cmd[] = { CMD_HEADER_READ, E_ELAN_INFO_BC_VER, 0x00, 0x01 };
+	u8 resp[HEADER_SIZE];
+	u16 version;
+	int error;
+
+	error = elants_i2c_execute_command(client, cmd, sizeof(cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev,
+			"read BC version error=%d, buf=%*phC\n",
+			error, (int)sizeof(resp), resp);
+		return error;
+	}
+
+	version = elants_i2c_parse_version(resp);
+	ts->bc_version = version >> 8;
+	ts->iap_version = version & 0xff;
+
+	return 0;
+}
+
+static int elants_i2c_query_ts_info(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error;
+	u8 resp[17];
+	u16 phy_x, phy_y, rows, cols, osr;
+	const u8 get_resolution_cmd[] = {
+		CMD_HEADER_6B_READ, 0x00, 0x00, 0x00, 0x00, 0x00
+	};
+	const u8 get_osr_cmd[] = {
+		CMD_HEADER_READ, E_INFO_OSR, 0x00, 0x01
+	};
+	const u8 get_physical_scan_cmd[] = {
+		CMD_HEADER_READ, E_INFO_PHY_SCAN, 0x00, 0x01
+	};
+	const u8 get_physical_drive_cmd[] = {
+		CMD_HEADER_READ, E_INFO_PHY_DRIVER, 0x00, 0x01
+	};
+
+	/* Get trace number */
+	error = elants_i2c_execute_command(client,
+					   get_resolution_cmd,
+					   sizeof(get_resolution_cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev, "get resolution command failed: %d\n",
+			error);
+		return error;
+	}
+
+	rows = resp[2] + resp[6] + resp[10];
+	cols = resp[3] + resp[7] + resp[11];
+
+	/* Process mm_to_pixel information */
+	error = elants_i2c_execute_command(client,
+					   get_osr_cmd, sizeof(get_osr_cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev, "get osr command failed: %d\n",
+			error);
+		return error;
+	}
+
+	osr = resp[3];
+
+	error = elants_i2c_execute_command(client,
+					   get_physical_scan_cmd,
+					   sizeof(get_physical_scan_cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev, "get physical scan command failed: %d\n",
+			error);
+		return error;
+	}
+
+	phy_x = get_unaligned_be16(&resp[2]);
+
+	error = elants_i2c_execute_command(client,
+					   get_physical_drive_cmd,
+					   sizeof(get_physical_drive_cmd),
+					   resp, sizeof(resp));
+	if (error) {
+		dev_err(&client->dev, "get physical drive command failed: %d\n",
+			error);
+		return error;
+	}
+
+	phy_y = get_unaligned_be16(&resp[2]);
+
+	dev_dbg(&client->dev, "phy_x=%d, phy_y=%d\n", phy_x, phy_y);
+
+	if (rows == 0 || cols == 0 || osr == 0) {
+		dev_warn(&client->dev,
+			 "invalid trace number data: %d, %d, %d\n",
+			 rows, cols, osr);
+	} else {
+		/* translate trace number to TS resolution */
+		ts->x_max = ELAN_TS_RESOLUTION(rows, osr);
+		ts->x_res = DIV_ROUND_CLOSEST(ts->x_max, phy_x);
+		ts->y_max = ELAN_TS_RESOLUTION(cols, osr);
+		ts->y_res = DIV_ROUND_CLOSEST(ts->y_max, phy_y);
+	}
+
+	return 0;
+}
+
+static int elants_i2c_fastboot(struct i2c_client *client)
+{
+	const u8 boot_cmd[] = { 0x4D, 0x61, 0x69, 0x6E };
+	int error;
+
+	error = elants_i2c_send(client, boot_cmd, sizeof(boot_cmd));
+	if (error) {
+		dev_err(&client->dev, "boot failed: %d\n", error);
+		return error;
+	}
+
+	dev_dbg(&client->dev, "boot success -- 0x%x\n", client->addr);
+	return 0;
+}
+
+static int elants_i2c_initialize(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	int error, retry_cnt;
+	const u8 hello_packet[] = { 0x55, 0x55, 0x55, 0x55 };
+	const u8 recov_packet[] = { 0x55, 0x55, 0x80, 0x80 };
+	u8 buf[HEADER_SIZE];
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_sw_reset(client);
+		if (error) {
+			/* Continue initializing if it's the last try */
+			if (retry_cnt < MAX_RETRIES - 1)
+				continue;
+		}
+
+		error = elants_i2c_fastboot(client);
+		if (error) {
+			/* Continue initializing if it's the last try */
+			if (retry_cnt < MAX_RETRIES - 1)
+				continue;
+		}
+
+		/* Wait for Hello packet */
+		msleep(BOOT_TIME_DELAY_MS);
+
+		error = elants_i2c_read(client, buf, sizeof(buf));
+		if (error) {
+			dev_err(&client->dev,
+				"failed to read 'hello' packet: %d\n", error);
+		} else if (!memcmp(buf, hello_packet, sizeof(hello_packet))) {
+			ts->iap_mode = ELAN_IAP_OPERATIONAL;
+			break;
+		} else if (!memcmp(buf, recov_packet, sizeof(recov_packet))) {
+			/*
+			 * Setting error code will mark device
+			 * in recovery mode below.
+			 */
+			error = -EIO;
+			break;
+		} else {
+			error = -EINVAL;
+			dev_err(&client->dev,
+				"invalid 'hello' packet: %*ph\n",
+				(int)sizeof(buf), buf);
+		}
+	}
+
+	if (!error)
+		error = elants_i2c_query_fw_id(ts);
+	if (!error)
+		error = elants_i2c_query_fw_version(ts);
+
+	if (error) {
+		ts->iap_mode = ELAN_IAP_RECOVERY;
+	} else {
+		elants_i2c_query_test_version(ts);
+		elants_i2c_query_bc_version(ts);
+		elants_i2c_query_ts_info(ts);
+	}
+
+	return 0;
+}
+
+/*
+ * Firmware update interface.
+ */
+
+static int elants_i2c_fw_write_page(struct i2c_client *client,
+				    const void *page)
+{
+	const u8 ack_ok[] = { 0xaa, 0xaa };
+	u8 buf[2];
+	int retry;
+	int error;
+
+	for (retry = 0; retry < MAX_FW_UPDATE_RETRIES; retry++) {
+		error = elants_i2c_send(client, page, ELAN_FW_PAGESIZE);
+		if (error) {
+			dev_err(&client->dev,
+				"IAP Write Page failed: %d\n", error);
+			continue;
+		}
+
+		error = elants_i2c_read(client, buf, 2);
+		if (error) {
+			dev_err(&client->dev,
+				"IAP Ack read failed: %d\n", error);
+			return error;
+		}
+
+		if (!memcmp(buf, ack_ok, sizeof(ack_ok)))
+			return 0;
+
+		error = -EIO;
+		dev_err(&client->dev,
+			"IAP Get Ack Error [%02x:%02x]\n",
+			buf[0], buf[1]);
+	}
+
+	return error;
+}
+
+static int elants_i2c_do_update_firmware(struct i2c_client *client,
+					 const struct firmware *fw,
+					 bool force)
+{
+	const u8 enter_iap[] = { 0x45, 0x49, 0x41, 0x50 };
+	const u8 enter_iap2[] = { 0x54, 0x00, 0x12, 0x34 };
+	const u8 iap_ack[] = { 0x55, 0xaa, 0x33, 0xcc };
+	u8 buf[HEADER_SIZE];
+	u16 send_id;
+	int page, n_fw_pages;
+	int error;
+
+	/* Recovery mode detection! */
+	if (force) {
+		dev_dbg(&client->dev, "Recovery mode procedure\n");
+		error = elants_i2c_send(client, enter_iap2, sizeof(enter_iap2));
+	} else {
+		/* Start IAP Procedure */
+		dev_dbg(&client->dev, "Normal IAP procedure\n");
+		elants_i2c_sw_reset(client);
+
+		error = elants_i2c_send(client, enter_iap, sizeof(enter_iap));
+	}
+
+	if (error) {
+		dev_err(&client->dev, "failed to enter IAP mode: %d\n", error);
+		return error;
+	}
+
+	msleep(20);
+
+	/* check IAP state */
+	error = elants_i2c_read(client, buf, 4);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to read IAP acknowledgement: %d\n",
+			error);
+		return error;
+	}
+
+	if (memcmp(buf, iap_ack, sizeof(iap_ack))) {
+		dev_err(&client->dev,
+			"failed to enter IAP: %*ph (expected %*ph)\n",
+			(int)sizeof(buf), buf, (int)sizeof(iap_ack), iap_ack);
+		return -EIO;
+	}
+
+	dev_info(&client->dev, "successfully entered IAP mode");
+
+	send_id = client->addr;
+	error = elants_i2c_send(client, &send_id, 1);
+	if (error) {
+		dev_err(&client->dev, "sending dummy byte failed: %d\n",
+			error);
+		return error;
+	}
+
+	/* Clear the last page of Master */
+	error = elants_i2c_send(client, fw->data, ELAN_FW_PAGESIZE);
+	if (error) {
+		dev_err(&client->dev, "clearing of the last page failed: %d\n",
+			error);
+		return error;
+	}
+
+	error = elants_i2c_read(client, buf, 2);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to read ACK for clearing the last page: %d\n",
+			error);
+		return error;
+	}
+
+	n_fw_pages = fw->size / ELAN_FW_PAGESIZE;
+	dev_dbg(&client->dev, "IAP Pages = %d\n", n_fw_pages);
+
+	for (page = 0; page < n_fw_pages; page++) {
+		error = elants_i2c_fw_write_page(client,
+					fw->data + page * ELAN_FW_PAGESIZE);
+		if (error) {
+			dev_err(&client->dev,
+				"failed to write FW page %d: %d\n",
+				page, error);
+			return error;
+		}
+	}
+
+	/* Old iap needs to wait 200ms for WDT and rest is for hello packets */
+	msleep(300);
+
+	dev_info(&client->dev, "firmware update completed\n");
+	return 0;
+}
+
+static int elants_i2c_fw_update(struct elants_data *ts)
+{
+	struct i2c_client *client = ts->client;
+	const struct firmware *fw;
+	int error;
+
+	error = request_firmware(&fw, ELAN_FW_FILENAME, &client->dev);
+	if (error) {
+		dev_err(&client->dev, "failed to request firmware %s: %d\n",
+			ELAN_FW_FILENAME, error);
+		return error;
+	}
+
+	if (fw->size % ELAN_FW_PAGESIZE) {
+		dev_err(&client->dev, "invalid firmware length: %zu\n",
+			fw->size);
+		error = -EINVAL;
+		goto out;
+	}
+
+	disable_irq(client->irq);
+
+	error = elants_i2c_do_update_firmware(client, fw,
+					ts->iap_mode == ELAN_IAP_RECOVERY);
+	if (error) {
+		dev_err(&client->dev, "firmware update failed: %d\n", error);
+		ts->iap_mode = ELAN_IAP_RECOVERY;
+		goto out_enable_irq;
+	}
+
+	error = elants_i2c_initialize(ts);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to initialize device after firmware update: %d\n",
+			error);
+		ts->iap_mode = ELAN_IAP_RECOVERY;
+		goto out_enable_irq;
+	}
+
+	ts->iap_mode = ELAN_IAP_OPERATIONAL;
+
+out_enable_irq:
+	ts->state = ELAN_STATE_NORMAL;
+	enable_irq(client->irq);
+	msleep(100);
+
+	if (!error)
+		elants_i2c_calibrate(ts);
+out:
+	release_firmware(fw);
+	return error;
+}
+
+/*
+ * Event reporting.
+ */
+
+static void elants_i2c_mt_event(struct elants_data *ts, u8 *buf)
+{
+	struct input_dev *input = ts->input;
+	unsigned int n_fingers;
+	u16 finger_state;
+	int i;
+
+	n_fingers = buf[FW_POS_STATE + 1] & 0x0f;
+	finger_state = ((buf[FW_POS_STATE + 1] & 0x30) << 4) |
+			buf[FW_POS_STATE];
+
+	dev_dbg(&ts->client->dev,
+		"n_fingers: %u, state: %04x\n",  n_fingers, finger_state);
+
+	for (i = 0; i < MAX_CONTACT_NUM && n_fingers; i++) {
+		if (finger_state & 1) {
+			unsigned int x, y, p, w;
+			u8 *pos;
+
+			pos = &buf[FW_POS_XY + i * 3];
+			x = (((u16)pos[0] & 0xf0) << 4) | pos[1];
+			y = (((u16)pos[0] & 0x0f) << 8) | pos[2];
+			p = buf[FW_POS_PRESSURE + i];
+			w = buf[FW_POS_WIDTH + i];
+
+			dev_dbg(&ts->client->dev, "i=%d x=%d y=%d p=%d w=%d\n",
+				i, x, y, p, w);
+
+			input_mt_slot(input, i);
+			input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+			input_event(input, EV_ABS, ABS_MT_POSITION_X, x);
+			input_event(input, EV_ABS, ABS_MT_POSITION_Y, y);
+			input_event(input, EV_ABS, ABS_MT_PRESSURE, p);
+			input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, w);
+
+			n_fingers--;
+		}
+
+		finger_state >>= 1;
+	}
+
+	input_mt_sync_frame(input);
+	input_sync(input);
+}
+
+static u8 elants_i2c_calculate_checksum(u8 *buf)
+{
+	u8 checksum = 0;
+	u8 i;
+
+	for (i = 0; i < FW_POS_CHECKSUM; i++)
+		checksum += buf[i];
+
+	return checksum;
+}
+
+static void elants_i2c_event(struct elants_data *ts, u8 *buf)
+{
+	u8 checksum = elants_i2c_calculate_checksum(buf);
+
+	if (unlikely(buf[FW_POS_CHECKSUM] != checksum))
+		dev_warn(&ts->client->dev,
+			 "%s: invalid checksum for packet %02x: %02x vs. %02x\n",
+			 __func__, buf[FW_POS_HEADER],
+			 checksum, buf[FW_POS_CHECKSUM]);
+	else if (unlikely(buf[FW_POS_HEADER] != HEADER_REPORT_10_FINGER))
+		dev_warn(&ts->client->dev,
+			 "%s: unknown packet type: %02x\n",
+			 __func__, buf[FW_POS_HEADER]);
+	else
+		elants_i2c_mt_event(ts, buf);
+}
+
+static irqreturn_t elants_i2c_irq(int irq, void *_dev)
+{
+	const u8 wait_packet[] = { 0x64, 0x64, 0x64, 0x64 };
+	struct elants_data *ts = _dev;
+	struct i2c_client *client = ts->client;
+	int report_count, report_len;
+	int i;
+	int len;
+
+	len = i2c_master_recv(client, ts->buf, sizeof(ts->buf));
+	if (len < 0) {
+		dev_err(&client->dev, "%s: failed to read data: %d\n",
+			__func__, len);
+		goto out;
+	}
+
+	dev_dbg(&client->dev, "%s: packet %*ph\n",
+		__func__, HEADER_SIZE, ts->buf);
+
+	switch (ts->state) {
+	case ELAN_WAIT_RECALIBRATION:
+		if (ts->buf[FW_HDR_TYPE] == CMD_HEADER_REK) {
+			memcpy(ts->cmd_resp, ts->buf, sizeof(ts->cmd_resp));
+			complete(&ts->cmd_done);
+			ts->state = ELAN_STATE_NORMAL;
+		}
+		break;
+
+	case ELAN_WAIT_QUEUE_HEADER:
+		if (ts->buf[FW_HDR_TYPE] != QUEUE_HEADER_NORMAL)
+			break;
+
+		ts->state = ELAN_STATE_NORMAL;
+		/* fall through */
+
+	case ELAN_STATE_NORMAL:
+
+		switch (ts->buf[FW_HDR_TYPE]) {
+		case CMD_HEADER_HELLO:
+		case CMD_HEADER_RESP:
+		case CMD_HEADER_REK:
+			break;
+
+		case QUEUE_HEADER_WAIT:
+			if (memcmp(ts->buf, wait_packet, sizeof(wait_packet))) {
+				dev_err(&client->dev,
+					"invalid wait packet %*ph\n",
+					HEADER_SIZE, ts->buf);
+			} else {
+				ts->state = ELAN_WAIT_QUEUE_HEADER;
+				udelay(30);
+			}
+			break;
+
+		case QUEUE_HEADER_SINGLE:
+			elants_i2c_event(ts, &ts->buf[HEADER_SIZE]);
+			break;
+
+		case QUEUE_HEADER_NORMAL:
+			report_count = ts->buf[FW_HDR_COUNT];
+			if (report_count > 3) {
+				dev_err(&client->dev,
+					"too large report count: %*ph\n",
+					HEADER_SIZE, ts->buf);
+				break;
+			}
+
+			report_len = ts->buf[FW_HDR_LENGTH] / report_count;
+			if (report_len != PACKET_SIZE) {
+				dev_err(&client->dev,
+					"mismatching report length: %*ph\n",
+					HEADER_SIZE, ts->buf);
+				break;
+			}
+
+			for (i = 0; i < report_count; i++) {
+				u8 *buf = ts->buf + HEADER_SIZE +
+							i * PACKET_SIZE;
+				elants_i2c_event(ts, buf);
+			}
+			break;
+
+		default:
+			dev_err(&client->dev, "unknown packet %*ph\n",
+				HEADER_SIZE, ts->buf);
+			break;
+		}
+		break;
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+
+/*
+ * sysfs interface
+ */
+static ssize_t calibrate_store(struct device *dev,
+			       struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	int error;
+
+	error = mutex_lock_interruptible(&ts->sysfs_mutex);
+	if (error)
+		return error;
+
+	error = elants_i2c_calibrate(ts);
+
+	mutex_unlock(&ts->sysfs_mutex);
+	return error ?: count;
+}
+
+static ssize_t write_update_fw(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	int error;
+
+	error = mutex_lock_interruptible(&ts->sysfs_mutex);
+	if (error)
+		return error;
+
+	error = elants_i2c_fw_update(ts);
+	dev_dbg(dev, "firmware update result: %d\n", error);
+
+	mutex_unlock(&ts->sysfs_mutex);
+	return error ?: count;
+}
+
+static ssize_t show_iap_mode(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+
+	return sprintf(buf, "%s\n",
+		       ts->iap_mode == ELAN_IAP_OPERATIONAL ?
+				"Normal" : "Recovery");
+}
+
+static DEVICE_ATTR(calibrate, S_IWUSR, NULL, calibrate_store);
+static DEVICE_ATTR(iap_mode, S_IRUGO, show_iap_mode, NULL);
+static DEVICE_ATTR(update_fw, S_IWUSR, NULL, write_update_fw);
+
+struct elants_version_attribute {
+	struct device_attribute dattr;
+	size_t field_offset;
+	size_t field_size;
+};
+
+#define __ELANTS_FIELD_SIZE(_field)					\
+	sizeof(((struct elants_data *)NULL)->_field)
+#define __ELANTS_VERIFY_SIZE(_field)					\
+	(BUILD_BUG_ON_ZERO(__ELANTS_FIELD_SIZE(_field) > 2) +		\
+	 __ELANTS_FIELD_SIZE(_field))
+#define ELANTS_VERSION_ATTR(_field)					\
+	struct elants_version_attribute elants_ver_attr_##_field = {	\
+		.dattr = __ATTR(_field, S_IRUGO,			\
+				elants_version_attribute_show, NULL),	\
+		.field_offset = offsetof(struct elants_data, _field),	\
+		.field_size = __ELANTS_VERIFY_SIZE(_field),		\
+	}
+
+static ssize_t elants_version_attribute_show(struct device *dev,
+					     struct device_attribute *dattr,
+					     char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	struct elants_version_attribute *attr =
+		container_of(dattr, struct elants_version_attribute, dattr);
+	u8 *field = (u8 *)((char *)ts + attr->field_offset);
+	unsigned int fmt_size;
+	unsigned int val;
+
+	if (attr->field_size == 1) {
+		val = *field;
+		fmt_size = 2; /* 2 HEX digits */
+	} else {
+		val = *(u16 *)field;
+		fmt_size = 4; /* 4 HEX digits */
+	}
+
+	return sprintf(buf, "%0*x\n", fmt_size, val);
+}
+
+static ELANTS_VERSION_ATTR(fw_version);
+static ELANTS_VERSION_ATTR(hw_version);
+static ELANTS_VERSION_ATTR(test_version);
+static ELANTS_VERSION_ATTR(solution_version);
+static ELANTS_VERSION_ATTR(bc_version);
+static ELANTS_VERSION_ATTR(iap_version);
+
+static struct attribute *elants_attributes[] = {
+	&dev_attr_calibrate.attr,
+	&dev_attr_update_fw.attr,
+	&dev_attr_iap_mode.attr,
+
+	&elants_ver_attr_fw_version.dattr.attr,
+	&elants_ver_attr_hw_version.dattr.attr,
+	&elants_ver_attr_test_version.dattr.attr,
+	&elants_ver_attr_solution_version.dattr.attr,
+	&elants_ver_attr_bc_version.dattr.attr,
+	&elants_ver_attr_iap_version.dattr.attr,
+	NULL
+};
+
+static struct attribute_group elants_attribute_group = {
+	.attrs = elants_attributes,
+};
+
+static void elants_i2c_remove_sysfs_group(void *_data)
+{
+	struct elants_data *ts = _data;
+
+	sysfs_remove_group(&ts->client->dev.kobj, &elants_attribute_group);
+}
+
+static int elants_i2c_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	union i2c_smbus_data dummy;
+	struct elants_data *ts;
+	unsigned long irqflags;
+	int error;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev,
+			"%s: i2c check functionality error\n", DEVICE_NAME);
+		return -ENXIO;
+	}
+
+	/* Make sure there is something at this address */
+	if (i2c_smbus_xfer(client->adapter, client->addr, 0,
+			I2C_SMBUS_READ, 0, I2C_SMBUS_BYTE, &dummy) < 0) {
+		dev_err(&client->dev, "nothing at this address\n");
+		return -ENXIO;
+	}
+
+	ts = devm_kzalloc(&client->dev, sizeof(struct elants_data), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	mutex_init(&ts->sysfs_mutex);
+	init_completion(&ts->cmd_done);
+
+	ts->client = client;
+	i2c_set_clientdata(client, ts);
+
+	error = elants_i2c_initialize(ts);
+	if (error) {
+		dev_err(&client->dev, "failed to initialize: %d\n", error);
+		return error;
+	}
+
+	ts->input = devm_input_allocate_device(&client->dev);
+	if (!ts->input) {
+		dev_err(&client->dev, "Failed to allocate input device\n");
+		return -ENOMEM;
+	}
+
+	ts->input->name = "Elan Touchscreen";
+	ts->input->id.bustype = BUS_I2C;
+
+	__set_bit(BTN_TOUCH, ts->input->keybit);
+	__set_bit(EV_ABS, ts->input->evbit);
+	__set_bit(EV_KEY, ts->input->evbit);
+
+	/* Single touch input params setup */
+	input_set_abs_params(ts->input, ABS_X, 0, ts->x_max, 0, 0);
+	input_set_abs_params(ts->input, ABS_Y, 0, ts->y_max, 0, 0);
+	input_set_abs_params(ts->input, ABS_PRESSURE, 0, 255, 0, 0);
+	input_abs_set_res(ts->input, ABS_X, ts->x_res);
+	input_abs_set_res(ts->input, ABS_Y, ts->y_res);
+
+	/* Multitouch input params setup */
+	error = input_mt_init_slots(ts->input, MAX_CONTACT_NUM,
+				    INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
+	if (error) {
+		dev_err(&client->dev,
+			"failed to initialize MT slots: %d\n", error);
+		return error;
+	}
+
+	input_set_abs_params(ts->input, ABS_MT_POSITION_X, 0, ts->x_max, 0, 0);
+	input_set_abs_params(ts->input, ABS_MT_POSITION_Y, 0, ts->y_max, 0, 0);
+	input_set_abs_params(ts->input, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0);
+	input_set_abs_params(ts->input, ABS_MT_PRESSURE, 0, 255, 0, 0);
+	input_abs_set_res(ts->input, ABS_MT_POSITION_X, ts->x_res);
+	input_abs_set_res(ts->input, ABS_MT_POSITION_Y, ts->y_res);
+
+	input_set_drvdata(ts->input, ts);
+
+	error = input_register_device(ts->input);
+	if (error) {
+		dev_err(&client->dev,
+			"unable to register input device: %d\n", error);
+		return error;
+	}
+
+	/*
+	 * Systems using device tree should set up interrupt via DTS,
+	 * the rest will use the default falling edge interrupts.
+	 */
+	irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING;
+
+	error = devm_request_threaded_irq(&client->dev, client->irq,
+					  NULL, elants_i2c_irq,
+					  irqflags | IRQF_ONESHOT,
+					  client->name, ts);
+	if (error) {
+		dev_err(&client->dev, "Failed to register interrupt\n");
+		return error;
+	}
+
+	/*
+	 * Systems using device tree should set up wakeup via DTS,
+	 * the rest will configure device as wakeup source by default.
+	 */
+	if (!client->dev.of_node)
+		device_init_wakeup(&client->dev, true);
+
+	error = sysfs_create_group(&client->dev.kobj, &elants_attribute_group);
+	if (error) {
+		dev_err(&client->dev, "failed to create sysfs attributes: %d\n",
+			error);
+		return error;
+	}
+
+	error = devm_add_action(&client->dev,
+				elants_i2c_remove_sysfs_group, ts);
+	if (error) {
+		elants_i2c_remove_sysfs_group(ts);
+		dev_err(&client->dev,
+			"Failed to add sysfs cleanup action: %d\n",
+			error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused elants_i2c_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	const u8 set_sleep_cmd[] = { 0x54, 0x50, 0x00, 0x01 };
+	int retry_cnt;
+	int error;
+
+	/* Command not support in IAP recovery mode */
+	if (ts->iap_mode != ELAN_IAP_OPERATIONAL)
+		return -EBUSY;
+
+	disable_irq(client->irq);
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_send(client, set_sleep_cmd,
+					sizeof(set_sleep_cmd));
+		if (!error)
+			break;
+
+		dev_err(&client->dev, "suspend command failed: %d\n", error);
+	}
+
+	if (device_may_wakeup(dev))
+		ts->wake_irq_enabled = (enable_irq_wake(client->irq) == 0);
+
+	return 0;
+}
+
+static int __maybe_unused elants_i2c_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct elants_data *ts = i2c_get_clientdata(client);
+	const u8 set_active_cmd[] = { 0x54, 0x58, 0x00, 0x01 };
+	int retry_cnt;
+	int error;
+
+	if (device_may_wakeup(dev) && ts->wake_irq_enabled)
+		disable_irq_wake(client->irq);
+
+	for (retry_cnt = 0; retry_cnt < MAX_RETRIES; retry_cnt++) {
+		error = elants_i2c_send(client, set_active_cmd,
+					sizeof(set_active_cmd));
+		if (!error)
+			break;
+
+		dev_err(&client->dev, "resume command failed: %d\n", error);
+	}
+
+	ts->state = ELAN_STATE_NORMAL;
+	enable_irq(client->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(elants_i2c_pm_ops,
+			 elants_i2c_suspend, elants_i2c_resume);
+
+static const struct i2c_device_id elants_i2c_id[] = {
+	{ DEVICE_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, elants_i2c_id);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id elants_acpi_id[] = {
+	{ "ELAN0001", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, elants_acpi_id);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id elants_of_match[] = {
+	{ .compatible = "elan,ekth3500" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, elants_of_match);
+#endif
+
+static struct i2c_driver elants_i2c_driver = {
+	.probe = elants_i2c_probe,
+	.id_table = elants_i2c_id,
+	.driver = {
+		.name = DEVICE_NAME,
+		.owner = THIS_MODULE,
+		.pm = &elants_i2c_pm_ops,
+		.acpi_match_table = ACPI_PTR(elants_acpi_id),
+		.of_match_table = of_match_ptr(elants_of_match),
+	},
+};
+module_i2c_driver(elants_i2c_driver);
+
+MODULE_AUTHOR("Scott Liu <scott.liu@emc.com.tw>");
+MODULE_DESCRIPTION("Elan I2c Touchscreen driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
new file mode 100644
index 0000000..ca19668
--- /dev/null
+++ b/drivers/input/touchscreen/goodix.c
@@ -0,0 +1,395 @@
+/*
+ *  Driver for Goodix Touchscreens
+ *
+ *  Copyright (c) 2014 Red Hat Inc.
+ *
+ *  This code is based on gt9xx.c authored by andrew@goodix.com:
+ *
+ *  2010 - 2012 Goodix Technology.
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+struct goodix_ts_data {
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	int abs_x_max;
+	int abs_y_max;
+	unsigned int max_touch_num;
+	unsigned int int_trigger_type;
+};
+
+#define GOODIX_MAX_HEIGHT		4096
+#define GOODIX_MAX_WIDTH		4096
+#define GOODIX_INT_TRIGGER		1
+#define GOODIX_CONTACT_SIZE		8
+#define GOODIX_MAX_CONTACTS		10
+
+#define GOODIX_CONFIG_MAX_LENGTH	240
+
+/* Register defines */
+#define GOODIX_READ_COOR_ADDR		0x814E
+#define GOODIX_REG_CONFIG_DATA		0x8047
+#define GOODIX_REG_VERSION		0x8140
+
+#define RESOLUTION_LOC		1
+#define TRIGGER_LOC		6
+
+static const unsigned long goodix_irq_flags[] = {
+	IRQ_TYPE_EDGE_RISING,
+	IRQ_TYPE_EDGE_FALLING,
+	IRQ_TYPE_LEVEL_LOW,
+	IRQ_TYPE_LEVEL_HIGH,
+};
+
+/**
+ * goodix_i2c_read - read data from a register of the i2c slave device.
+ *
+ * @client: i2c device.
+ * @reg: the register to read from.
+ * @buf: raw write data buffer.
+ * @len: length of the buffer to write
+ */
+static int goodix_i2c_read(struct i2c_client *client,
+				u16 reg, u8 *buf, int len)
+{
+	struct i2c_msg msgs[2];
+	u16 wbuf = cpu_to_be16(reg);
+	int ret;
+
+	msgs[0].flags = 0;
+	msgs[0].addr  = client->addr;
+	msgs[0].len   = 2;
+	msgs[0].buf   = (u8 *) &wbuf;
+
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].addr  = client->addr;
+	msgs[1].len   = len;
+	msgs[1].buf   = buf;
+
+	ret = i2c_transfer(client->adapter, msgs, 2);
+	return ret < 0 ? ret : (ret != ARRAY_SIZE(msgs) ? -EIO : 0);
+}
+
+static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
+{
+	int touch_num;
+	int error;
+
+	error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR, data,
+				GOODIX_CONTACT_SIZE + 1);
+	if (error) {
+		dev_err(&ts->client->dev, "I2C transfer error: %d\n", error);
+		return error;
+	}
+
+	touch_num = data[0] & 0x0f;
+	if (touch_num > GOODIX_MAX_CONTACTS)
+		return -EPROTO;
+
+	if (touch_num > 1) {
+		data += 1 + GOODIX_CONTACT_SIZE;
+		error = goodix_i2c_read(ts->client,
+					GOODIX_READ_COOR_ADDR +
+						1 + GOODIX_CONTACT_SIZE,
+					data,
+					GOODIX_CONTACT_SIZE * (touch_num - 1));
+		if (error)
+			return error;
+	}
+
+	return touch_num;
+}
+
+static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
+{
+	int id = coor_data[0] & 0x0F;
+	int input_x = get_unaligned_le16(&coor_data[1]);
+	int input_y = get_unaligned_le16(&coor_data[3]);
+	int input_w = get_unaligned_le16(&coor_data[5]);
+
+	input_mt_slot(ts->input_dev, id);
+	input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, true);
+	input_report_abs(ts->input_dev, ABS_MT_POSITION_X, input_x);
+	input_report_abs(ts->input_dev, ABS_MT_POSITION_Y, input_y);
+	input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, input_w);
+	input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
+}
+
+/**
+ * goodix_process_events - Process incoming events
+ *
+ * @ts: our goodix_ts_data pointer
+ *
+ * Called when the IRQ is triggered. Read the current device state, and push
+ * the input events to the user space.
+ */
+static void goodix_process_events(struct goodix_ts_data *ts)
+{
+	u8  point_data[1 + GOODIX_CONTACT_SIZE * GOODIX_MAX_CONTACTS];
+	int touch_num;
+	int i;
+
+	touch_num = goodix_ts_read_input_report(ts, point_data);
+	if (touch_num < 0)
+		return;
+
+	for (i = 0; i < touch_num; i++)
+		goodix_ts_report_touch(ts,
+				&point_data[1 + GOODIX_CONTACT_SIZE * i]);
+
+	input_mt_sync_frame(ts->input_dev);
+	input_sync(ts->input_dev);
+}
+
+/**
+ * goodix_ts_irq_handler - The IRQ handler
+ *
+ * @irq: interrupt number.
+ * @dev_id: private data pointer.
+ */
+static irqreturn_t goodix_ts_irq_handler(int irq, void *dev_id)
+{
+	static const u8 end_cmd[] = {
+		GOODIX_READ_COOR_ADDR >> 8,
+		GOODIX_READ_COOR_ADDR & 0xff,
+		0
+	};
+	struct goodix_ts_data *ts = dev_id;
+
+	goodix_process_events(ts);
+
+	if (i2c_master_send(ts->client, end_cmd, sizeof(end_cmd)) < 0)
+		dev_err(&ts->client->dev, "I2C write end_cmd error\n");
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * goodix_read_config - Read the embedded configuration of the panel
+ *
+ * @ts: our goodix_ts_data pointer
+ *
+ * Must be called during probe
+ */
+static void goodix_read_config(struct goodix_ts_data *ts)
+{
+	u8 config[GOODIX_CONFIG_MAX_LENGTH];
+	int error;
+
+	error = goodix_i2c_read(ts->client, GOODIX_REG_CONFIG_DATA,
+			      config,
+			   GOODIX_CONFIG_MAX_LENGTH);
+	if (error) {
+		dev_warn(&ts->client->dev,
+			 "Error reading config (%d), using defaults\n",
+			 error);
+		ts->abs_x_max = GOODIX_MAX_WIDTH;
+		ts->abs_y_max = GOODIX_MAX_HEIGHT;
+		ts->int_trigger_type = GOODIX_INT_TRIGGER;
+		return;
+	}
+
+	ts->abs_x_max = get_unaligned_le16(&config[RESOLUTION_LOC]);
+	ts->abs_y_max = get_unaligned_le16(&config[RESOLUTION_LOC + 2]);
+	ts->int_trigger_type = (config[TRIGGER_LOC]) & 0x03;
+	if (!ts->abs_x_max || !ts->abs_y_max) {
+		dev_err(&ts->client->dev,
+			"Invalid config, using defaults\n");
+		ts->abs_x_max = GOODIX_MAX_WIDTH;
+		ts->abs_y_max = GOODIX_MAX_HEIGHT;
+	}
+}
+
+
+/**
+ * goodix_read_version - Read goodix touchscreen version
+ *
+ * @client: the i2c client
+ * @version: output buffer containing the version on success
+ */
+static int goodix_read_version(struct i2c_client *client, u16 *version)
+{
+	int error;
+	u8 buf[6];
+
+	error = goodix_i2c_read(client, GOODIX_REG_VERSION, buf, sizeof(buf));
+	if (error) {
+		dev_err(&client->dev, "read version failed: %d\n", error);
+		return error;
+	}
+
+	if (version)
+		*version = get_unaligned_le16(&buf[4]);
+
+	dev_info(&client->dev, "IC VERSION: %6ph\n", buf);
+
+	return 0;
+}
+
+/**
+ * goodix_i2c_test - I2C test function to check if the device answers.
+ *
+ * @client: the i2c client
+ */
+static int goodix_i2c_test(struct i2c_client *client)
+{
+	int retry = 0;
+	int error;
+	u8 test;
+
+	while (retry++ < 2) {
+		error = goodix_i2c_read(client, GOODIX_REG_CONFIG_DATA,
+					&test, 1);
+		if (!error)
+			return 0;
+
+		dev_err(&client->dev, "i2c test failed attempt %d: %d\n",
+			retry, error);
+		msleep(20);
+	}
+
+	return error;
+}
+
+/**
+ * goodix_request_input_dev - Allocate, populate and register the input device
+ *
+ * @ts: our goodix_ts_data pointer
+ *
+ * Must be called during probe
+ */
+static int goodix_request_input_dev(struct goodix_ts_data *ts)
+{
+	int error;
+
+	ts->input_dev = devm_input_allocate_device(&ts->client->dev);
+	if (!ts->input_dev) {
+		dev_err(&ts->client->dev, "Failed to allocate input device.");
+		return -ENOMEM;
+	}
+
+	ts->input_dev->evbit[0] = BIT_MASK(EV_SYN) |
+				  BIT_MASK(EV_KEY) |
+				  BIT_MASK(EV_ABS);
+
+	input_set_abs_params(ts->input_dev, ABS_MT_POSITION_X, 0,
+				ts->abs_x_max, 0, 0);
+	input_set_abs_params(ts->input_dev, ABS_MT_POSITION_Y, 0,
+				ts->abs_y_max, 0, 0);
+	input_set_abs_params(ts->input_dev, ABS_MT_WIDTH_MAJOR, 0, 255, 0, 0);
+	input_set_abs_params(ts->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0);
+
+	input_mt_init_slots(ts->input_dev, GOODIX_MAX_CONTACTS,
+			    INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
+
+	ts->input_dev->name = "Goodix Capacitive TouchScreen";
+	ts->input_dev->phys = "input/ts";
+	ts->input_dev->id.bustype = BUS_I2C;
+	ts->input_dev->id.vendor = 0x0416;
+	ts->input_dev->id.product = 0x1001;
+	ts->input_dev->id.version = 10427;
+
+	error = input_register_device(ts->input_dev);
+	if (error) {
+		dev_err(&ts->client->dev,
+			"Failed to register input device: %d", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int goodix_ts_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct goodix_ts_data *ts;
+	unsigned long irq_flags;
+	int error;
+	u16 version_info;
+
+	dev_dbg(&client->dev, "I2C Address: 0x%02x\n", client->addr);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "I2C check functionality failed.\n");
+		return -ENXIO;
+	}
+
+	ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ts->client = client;
+	i2c_set_clientdata(client, ts);
+
+	error = goodix_i2c_test(client);
+	if (error) {
+		dev_err(&client->dev, "I2C communication failure: %d\n", error);
+		return error;
+	}
+
+	error = goodix_read_version(client, &version_info);
+	if (error) {
+		dev_err(&client->dev, "Read version failed.\n");
+		return error;
+	}
+
+	goodix_read_config(ts);
+
+	error = goodix_request_input_dev(ts);
+	if (error)
+		return error;
+
+	irq_flags = goodix_irq_flags[ts->int_trigger_type] | IRQF_ONESHOT;
+	error = devm_request_threaded_irq(&ts->client->dev, client->irq,
+					  NULL, goodix_ts_irq_handler,
+					  irq_flags, client->name, ts);
+	if (error) {
+		dev_err(&client->dev, "request IRQ failed: %d\n", error);
+		return error;
+	}
+
+	return 0;
+}
+
+static const struct i2c_device_id goodix_ts_id[] = {
+	{ "GDIX1001:00", 0 },
+	{ }
+};
+
+static const struct acpi_device_id goodix_acpi_match[] = {
+	{ "GDIX1001", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, goodix_acpi_match);
+
+static struct i2c_driver goodix_ts_driver = {
+	.probe = goodix_ts_probe,
+	.id_table = goodix_ts_id,
+	.driver = {
+		.name = "Goodix-TS",
+		.owner = THIS_MODULE,
+		.acpi_match_table = goodix_acpi_match,
+	},
+};
+module_i2c_driver(goodix_ts_driver);
+
+MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
+MODULE_AUTHOR("Bastien Nocera <hadess@hadess.net>");
+MODULE_DESCRIPTION("Goodix touchscreen driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index 2a50891..da6dc81 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c
@@ -311,8 +311,7 @@ static int ili210x_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ili210x_i2c_suspend(struct device *dev)
+static int __maybe_unused ili210x_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -322,7 +321,7 @@ static int ili210x_i2c_suspend(struct device *dev)
 	return 0;
 }
 
-static int ili210x_i2c_resume(struct device *dev)
+static int __maybe_unused ili210x_i2c_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -331,7 +330,6 @@ static int ili210x_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ili210x_i2c_pm,
 			 ili210x_i2c_suspend, ili210x_i2c_resume);
diff --git a/drivers/input/touchscreen/ipaq-micro-ts.c b/drivers/input/touchscreen/ipaq-micro-ts.c
index 62c8976..33c1348 100644
--- a/drivers/input/touchscreen/ipaq-micro-ts.c
+++ b/drivers/input/touchscreen/ipaq-micro-ts.c
@@ -122,8 +122,7 @@ static int micro_ts_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int micro_ts_suspend(struct device *dev)
+static int __maybe_unused micro_ts_suspend(struct device *dev)
 {
 	struct touchscreen_data *ts = dev_get_drvdata(dev);
 
@@ -132,7 +131,7 @@ static int micro_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int micro_ts_resume(struct device *dev)
+static int __maybe_unused micro_ts_resume(struct device *dev)
 {
 	struct touchscreen_data *ts = dev_get_drvdata(dev);
 	struct input_dev *input = ts->input;
@@ -146,7 +145,6 @@ static int micro_ts_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static const struct dev_pm_ops micro_ts_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(micro_ts_suspend, micro_ts_resume)
diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c
index 372bbf7..67c0d31 100644
--- a/drivers/input/touchscreen/mms114.c
+++ b/drivers/input/touchscreen/mms114.c
@@ -515,8 +515,7 @@ static int mms114_probe(struct i2c_client *client,
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int mms114_suspend(struct device *dev)
+static int __maybe_unused mms114_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mms114_data *data = i2c_get_clientdata(client);
@@ -540,7 +539,7 @@ static int mms114_suspend(struct device *dev)
 	return 0;
 }
 
-static int mms114_resume(struct device *dev)
+static int __maybe_unused mms114_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct mms114_data *data = i2c_get_clientdata(client);
@@ -559,7 +558,6 @@ static int mms114_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(mms114_pm_ops, mms114_suspend, mms114_resume);
 
diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index fc49c753..4fb5537 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c
@@ -347,8 +347,7 @@ static void pixcir_input_close(struct input_dev *dev)
 	pixcir_stop(ts);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int pixcir_i2c_ts_suspend(struct device *dev)
+static int __maybe_unused pixcir_i2c_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct pixcir_i2c_ts_data *ts = i2c_get_clientdata(client);
@@ -377,7 +376,7 @@ unlock:
 	return ret;
 }
 
-static int pixcir_i2c_ts_resume(struct device *dev)
+static int __maybe_unused pixcir_i2c_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct pixcir_i2c_ts_data *ts = i2c_get_clientdata(client);
@@ -405,7 +404,6 @@ unlock:
 
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(pixcir_dev_pm_ops,
 			 pixcir_i2c_ts_suspend, pixcir_i2c_ts_resume);
diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c
index 3c0f57e..697e26e 100644
--- a/drivers/input/touchscreen/st1232.c
+++ b/drivers/input/touchscreen/st1232.c
@@ -243,8 +243,7 @@ static int st1232_ts_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int st1232_ts_suspend(struct device *dev)
+static int __maybe_unused st1232_ts_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct st1232_ts_data *ts = i2c_get_clientdata(client);
@@ -259,7 +258,7 @@ static int st1232_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int st1232_ts_resume(struct device *dev)
+static int __maybe_unused st1232_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct st1232_ts_data *ts = i2c_get_clientdata(client);
@@ -274,8 +273,6 @@ static int st1232_ts_resume(struct device *dev)
 	return 0;
 }
 
-#endif
-
 static SIMPLE_DEV_PM_OPS(st1232_ts_pm_ops,
 			 st1232_ts_suspend, st1232_ts_resume);
 
diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index 52380b6..72657c5 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c
@@ -773,8 +773,7 @@ static int tsc2005_remove(struct spi_device *spi)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int tsc2005_suspend(struct device *dev)
+static int __maybe_unused tsc2005_suspend(struct device *dev)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct tsc2005 *ts = spi_get_drvdata(spi);
@@ -791,7 +790,7 @@ static int tsc2005_suspend(struct device *dev)
 	return 0;
 }
 
-static int tsc2005_resume(struct device *dev)
+static int __maybe_unused tsc2005_resume(struct device *dev)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	struct tsc2005 *ts = spi_get_drvdata(spi);
@@ -807,7 +806,6 @@ static int tsc2005_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(tsc2005_pm_ops, tsc2005_suspend, tsc2005_resume);
 
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 0eca00d..c1e23cf 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -406,8 +406,7 @@ static int ucb1400_ts_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int ucb1400_ts_suspend(struct device *dev)
+static int __maybe_unused ucb1400_ts_suspend(struct device *dev)
 {
 	struct ucb1400_ts *ucb = dev_get_platdata(dev);
 	struct input_dev *idev = ucb->ts_idev;
@@ -421,7 +420,7 @@ static int ucb1400_ts_suspend(struct device *dev)
 	return 0;
 }
 
-static int ucb1400_ts_resume(struct device *dev)
+static int __maybe_unused ucb1400_ts_resume(struct device *dev)
 {
 	struct ucb1400_ts *ucb = dev_get_platdata(dev);
 	struct input_dev *idev = ucb->ts_idev;
@@ -434,7 +433,6 @@ static int ucb1400_ts_resume(struct device *dev)
 	mutex_unlock(&idev->mutex);
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(ucb1400_ts_pm_ops,
 			 ucb1400_ts_suspend, ucb1400_ts_resume);
diff --git a/drivers/input/touchscreen/wacom_i2c.c b/drivers/input/touchscreen/wacom_i2c.c
index 7ccaa1b..32f8ac0 100644
--- a/drivers/input/touchscreen/wacom_i2c.c
+++ b/drivers/input/touchscreen/wacom_i2c.c
@@ -242,8 +242,7 @@ static int wacom_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int wacom_i2c_suspend(struct device *dev)
+static int __maybe_unused wacom_i2c_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -252,7 +251,7 @@ static int wacom_i2c_suspend(struct device *dev)
 	return 0;
 }
 
-static int wacom_i2c_resume(struct device *dev)
+static int __maybe_unused wacom_i2c_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
@@ -260,7 +259,6 @@ static int wacom_i2c_resume(struct device *dev)
 
 	return 0;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(wacom_i2c_pm, wacom_i2c_suspend, wacom_i2c_resume);
 
diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c
index 8ba48f5..19880c7 100644
--- a/drivers/input/touchscreen/zforce_ts.c
+++ b/drivers/input/touchscreen/zforce_ts.c
@@ -602,8 +602,7 @@ static void zforce_input_close(struct input_dev *dev)
 	return;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int zforce_suspend(struct device *dev)
+static int __maybe_unused zforce_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct zforce_ts *ts = i2c_get_clientdata(client);
@@ -648,7 +647,7 @@ unlock:
 	return ret;
 }
 
-static int zforce_resume(struct device *dev)
+static int __maybe_unused zforce_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct zforce_ts *ts = i2c_get_clientdata(client);
@@ -685,7 +684,6 @@ unlock:
 
 	return ret;
 }
-#endif
 
 static SIMPLE_DEV_PM_OPS(zforce_pm_ops, zforce_suspend, zforce_resume);
 
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b205f76..9802485 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4071,7 +4071,7 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
 	int devid;
 	int ret;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4134,7 +4134,7 @@ static int set_affinity(struct irq_data *data, const struct cpumask *mask,
 	if (!config_enabled(CONFIG_SMP))
 		return -1;
 
-	cfg       = data->chip_data;
+	cfg       = irqd_cfg(data);
 	irq       = data->irq;
 	irte_info = &cfg->irq_2_irte;
 
@@ -4172,7 +4172,7 @@ static int free_irq(int irq)
 	struct irq_2_irte *irte_info;
 	struct irq_cfg *cfg;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4191,7 +4191,7 @@ static void compose_msi_msg(struct pci_dev *pdev,
 	struct irq_cfg *cfg;
 	union irte irte;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return;
 
@@ -4220,7 +4220,7 @@ static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec)
 	if (!pdev)
 		return -EINVAL;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4240,7 +4240,7 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
 	if (!pdev)
 		return -EINVAL;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4263,7 +4263,7 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int id)
 	struct irq_cfg *cfg;
 	int index, devid;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 27541d4..a55b207 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -54,7 +54,7 @@ static int __init parse_ioapics_under_ir(void);
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	return cfg ? &cfg->irq_2_iommu : NULL;
 }
 
@@ -85,7 +85,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 {
 	struct ir_table *table = iommu->ir_table;
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned int mask = 0;
 	unsigned long flags;
 	int index;
@@ -153,7 +153,7 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 
 	if (!irq_iommu)
@@ -1050,7 +1050,7 @@ static int
 intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 			  bool force)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	unsigned int dest, irq = data->irq;
 	struct irte irte;
 	int err;
@@ -1105,7 +1105,7 @@ static void intel_compose_msi_msg(struct pci_dev *pdev,
 	u16 sub_handle = 0;
 	int ir_index;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 
 	ir_index = map_irq_to_irte_handle(irq, &sub_handle);
 	BUG_ON(ir_index == -1);
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2c3f5ad..89c4846 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -298,7 +298,7 @@ static int set_remapped_irq_affinity(struct irq_data *data,
 
 void free_remapped_irq(int irq)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	if (!remap_ops || !remap_ops->free_irq)
 		return;
@@ -311,7 +311,7 @@ void compose_remapped_msi_msg(struct pci_dev *pdev,
 			      unsigned int irq, unsigned int dest,
 			      struct msi_msg *msg, u8 hpet_id)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	if (!irq_remapped(cfg))
 		native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
@@ -364,7 +364,7 @@ static void ir_ack_apic_edge(struct irq_data *data)
 static void ir_ack_apic_level(struct irq_data *data)
 {
 	ack_APIC_irq();
-	eoi_ioapic_irq(data->irq, data->chip_data);
+	eoi_ioapic_irq(data->irq, irqd_cfg(data));
 }
 
 static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 8a8ba11..7ea1ea42 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -203,7 +203,7 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
 		fwnode_property_read_string(child, "linux,default-trigger",
 					    &led.default_trigger);
 
-		if (!fwnode_property_read_string(child, "linux,default_state",
+		if (!fwnode_property_read_string(child, "default-state",
 						 &state)) {
 			if (!strcmp(state, "keep"))
 				led.default_state = LEDS_GPIO_DEFSTATE_KEEP;
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index 3c89fcb..49cd308 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -160,7 +160,6 @@ source "drivers/media/usb/Kconfig"
 source "drivers/media/pci/Kconfig"
 source "drivers/media/platform/Kconfig"
 source "drivers/media/mmc/Kconfig"
-source "drivers/media/parport/Kconfig"
 source "drivers/media/radio/Kconfig"
 
 comment "Supported FireWire (IEEE 1394) Adapters"
diff --git a/drivers/media/Makefile b/drivers/media/Makefile
index 620f275..e608bbc 100644
--- a/drivers/media/Makefile
+++ b/drivers/media/Makefile
@@ -28,6 +28,6 @@ obj-y += rc/
 # Finally, merge the drivers that require the core
 #
 
-obj-y += common/ platform/ pci/ usb/ mmc/ firewire/ parport/
+obj-y += common/ platform/ pci/ usb/ mmc/ firewire/
 obj-$(CONFIG_VIDEO_DEV) += radio/
 
diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index f40b4cf..205d713 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -284,15 +284,6 @@ config VIDEO_SAA711X
 	  To compile this driver as a module, choose M here: the
 	  module will be called saa7115.
 
-config VIDEO_SAA7191
-	tristate "Philips SAA7191 video decoder"
-	depends on VIDEO_V4L2 && I2C
-	---help---
-	  Support for the Philips SAA7191 video decoder.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called saa7191.
-
 config VIDEO_TVP514X
 	tristate "Texas Instruments TVP514x video decoder"
 	depends on VIDEO_V4L2 && I2C
diff --git a/drivers/media/i2c/Makefile b/drivers/media/i2c/Makefile
index 01ae932..98589001 100644
--- a/drivers/media/i2c/Makefile
+++ b/drivers/media/i2c/Makefile
@@ -18,7 +18,6 @@ obj-$(CONFIG_VIDEO_SAA711X) += saa7115.o
 obj-$(CONFIG_VIDEO_SAA717X) += saa717x.o
 obj-$(CONFIG_VIDEO_SAA7127) += saa7127.o
 obj-$(CONFIG_VIDEO_SAA7185) += saa7185.o
-obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o
 obj-$(CONFIG_VIDEO_SAA6752HS) += saa6752hs.o
 obj-$(CONFIG_VIDEO_ADV7170) += adv7170.o
 obj-$(CONFIG_VIDEO_ADV7175) += adv7175.o
diff --git a/drivers/media/pci/cx88/cx88-blackbird.c b/drivers/media/pci/cx88/cx88-blackbird.c
index 4160ca4..d3c79d9 100644
--- a/drivers/media/pci/cx88/cx88-blackbird.c
+++ b/drivers/media/pci/cx88/cx88-blackbird.c
@@ -647,6 +647,7 @@ static int queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
 	dev->ts_packet_size  = 188 * 4;
 	dev->ts_packet_count  = 32;
 	sizes[0] = dev->ts_packet_size * dev->ts_packet_count;
+	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -662,14 +663,11 @@ static void buffer_finish(struct vb2_buffer *vb)
 {
 	struct cx8802_dev *dev = vb->vb2_queue->drv_priv;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
 		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
 	memset(risc, 0, sizeof(*risc));
-
-	dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
 }
 
 static void buffer_queue(struct vb2_buffer *vb)
diff --git a/drivers/media/pci/cx88/cx88-dvb.c b/drivers/media/pci/cx88/cx88-dvb.c
index c344bfd..5780e2f 100644
--- a/drivers/media/pci/cx88/cx88-dvb.c
+++ b/drivers/media/pci/cx88/cx88-dvb.c
@@ -92,6 +92,7 @@ static int queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
 	dev->ts_packet_size  = 188 * 4;
 	dev->ts_packet_count = dvb_buf_tscnt;
 	sizes[0] = dev->ts_packet_size * dev->ts_packet_count;
+	alloc_ctxs[0] = dev->alloc_ctx;
 	*num_buffers = dvb_buf_tscnt;
 	return 0;
 }
@@ -108,14 +109,11 @@ static void buffer_finish(struct vb2_buffer *vb)
 {
 	struct cx8802_dev *dev = vb->vb2_queue->drv_priv;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
 		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
 	memset(risc, 0, sizeof(*risc));
-
-	dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
 }
 
 static void buffer_queue(struct vb2_buffer *vb)
diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c
index f181a3a..1c1f69e 100644
--- a/drivers/media/pci/cx88/cx88-mpeg.c
+++ b/drivers/media/pci/cx88/cx88-mpeg.c
@@ -235,10 +235,6 @@ int cx8802_buf_prepare(struct vb2_queue *q, struct cx8802_dev *dev,
 		return -EINVAL;
 	vb2_set_plane_payload(&buf->vb, 0, size);
 
-	rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-	if (!rc)
-		return -EIO;
-
 	rc = cx88_risc_databuffer(dev->pci, risc, sgt->sgl,
 			     dev->ts_packet_size, dev->ts_packet_count, 0);
 	if (rc) {
@@ -733,6 +729,11 @@ static int cx8802_probe(struct pci_dev *pci_dev,
 	if (NULL == dev)
 		goto fail_core;
 	dev->pci = pci_dev;
+	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
+	if (IS_ERR(dev->alloc_ctx)) {
+		err = PTR_ERR(dev->alloc_ctx);
+		goto fail_core;
+	}
 	dev->core = core;
 
 	/* Maintain a reference so cx88-video can query the 8802 device. */
@@ -752,6 +753,7 @@ static int cx8802_probe(struct pci_dev *pci_dev,
 	return 0;
 
  fail_free:
+	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	kfree(dev);
  fail_core:
 	core->dvbdev = NULL;
@@ -798,6 +800,7 @@ static void cx8802_remove(struct pci_dev *pci_dev)
 	/* common */
 	cx8802_fini_common(dev);
 	cx88_core_put(dev->core,dev->pci);
+	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	kfree(dev);
 }
 
diff --git a/drivers/media/pci/cx88/cx88-vbi.c b/drivers/media/pci/cx88/cx88-vbi.c
index 6ab6e27..32eb7fd 100644
--- a/drivers/media/pci/cx88/cx88-vbi.c
+++ b/drivers/media/pci/cx88/cx88-vbi.c
@@ -120,6 +120,7 @@ static int queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
 		sizes[0] = VBI_LINE_NTSC_COUNT * VBI_LINE_LENGTH * 2;
 	else
 		sizes[0] = VBI_LINE_PAL_COUNT * VBI_LINE_LENGTH * 2;
+	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -131,7 +132,6 @@ static int buffer_prepare(struct vb2_buffer *vb)
 	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	unsigned int lines;
 	unsigned int size;
-	int rc;
 
 	if (dev->core->tvnorm & V4L2_STD_525_60)
 		lines = VBI_LINE_NTSC_COUNT;
@@ -142,10 +142,6 @@ static int buffer_prepare(struct vb2_buffer *vb)
 		return -EINVAL;
 	vb2_set_plane_payload(vb, 0, size);
 
-	rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-	if (!rc)
-		return -EIO;
-
 	cx88_risc_buffer(dev->pci, &buf->risc, sgt->sgl,
 			 0, VBI_LINE_LENGTH * lines,
 			 VBI_LINE_LENGTH, 0,
@@ -157,14 +153,11 @@ static void buffer_finish(struct vb2_buffer *vb)
 {
 	struct cx8800_dev *dev = vb->vb2_queue->drv_priv;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
 		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
 	memset(risc, 0, sizeof(*risc));
-
-	dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
 }
 
 static void buffer_queue(struct vb2_buffer *vb)
diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
index a64ae31..860c98fc 100644
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c
@@ -440,6 +440,7 @@ static int queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
 
 	*num_planes = 1;
 	sizes[0] = (dev->fmt->depth * core->width * core->height) >> 3;
+	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -449,7 +450,6 @@ static int buffer_prepare(struct vb2_buffer *vb)
 	struct cx88_core *core = dev->core;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
 	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
-	int rc;
 
 	buf->bpl = core->width * dev->fmt->depth >> 3;
 
@@ -457,10 +457,6 @@ static int buffer_prepare(struct vb2_buffer *vb)
 		return -EINVAL;
 	vb2_set_plane_payload(vb, 0, core->height * buf->bpl);
 
-	rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-	if (!rc)
-		return -EIO;
-
 	switch (core->field) {
 	case V4L2_FIELD_TOP:
 		cx88_risc_buffer(dev->pci, &buf->risc,
@@ -505,14 +501,11 @@ static void buffer_finish(struct vb2_buffer *vb)
 {
 	struct cx8800_dev *dev = vb->vb2_queue->drv_priv;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
 		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
 	memset(risc, 0, sizeof(*risc));
-
-	dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
 }
 
 static void buffer_queue(struct vb2_buffer *vb)
@@ -530,7 +523,6 @@ static void buffer_queue(struct vb2_buffer *vb)
 
 	if (list_empty(&q->active)) {
 		list_add_tail(&buf->list, &q->active);
-		start_video_dma(dev, q, buf);
 		buf->count    = q->count++;
 		dprintk(2,"[%p/%d] buffer_queue - first active\n",
 			buf, buf->vb.v4l2_buf.index);
@@ -1345,6 +1337,12 @@ static int cx8800_initdev(struct pci_dev *pci_dev,
 		err = -EIO;
 		goto fail_core;
 	}
+	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
+	if (IS_ERR(dev->alloc_ctx)) {
+		err = PTR_ERR(dev->alloc_ctx);
+		goto fail_core;
+	}
+
 
 	/* initialize driver struct */
 	spin_lock_init(&dev->slock);
@@ -1549,6 +1547,7 @@ fail_unreg:
 	free_irq(pci_dev->irq, dev);
 	mutex_unlock(&core->lock);
 fail_core:
+	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	core->v4ldev = NULL;
 	cx88_core_put(core,dev->pci);
 fail_free:
@@ -1582,6 +1581,7 @@ static void cx8800_finidev(struct pci_dev *pci_dev)
 
 	/* free memory */
 	cx88_core_put(core,dev->pci);
+	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	kfree(dev);
 }
 
diff --git a/drivers/media/pci/cx88/cx88.h b/drivers/media/pci/cx88/cx88.h
index 3b0ae75..7748ca9 100644
--- a/drivers/media/pci/cx88/cx88.h
+++ b/drivers/media/pci/cx88/cx88.h
@@ -485,6 +485,7 @@ struct cx8800_dev {
 	/* pci i/o */
 	struct pci_dev             *pci;
 	unsigned char              pci_rev,pci_lat;
+	void			   *alloc_ctx;
 
 	const struct cx8800_fmt    *fmt;
 
@@ -548,6 +549,7 @@ struct cx8802_dev {
 	/* pci i/o */
 	struct pci_dev             *pci;
 	unsigned char              pci_rev,pci_lat;
+	void			   *alloc_ctx;
 
 	/* dma queues */
 	struct cx88_dmaqueue       mpegq;
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 0c61155..765bffb 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -65,14 +65,6 @@ config VIDEO_TIMBERDALE
 	---help---
 	  Add support for the Video In peripherial of the timberdale FPGA.
 
-config VIDEO_VINO
-	tristate "SGI Vino Video For Linux"
-	depends on I2C && SGI_IP22 && VIDEO_V4L2
-	select VIDEO_SAA7191 if MEDIA_SUBDRV_AUTOSELECT
-	help
-	  Say Y here to build in support for the Vino video input system found
-	  on SGI Indy machines.
-
 config VIDEO_M32R_AR
 	tristate "AR devices"
 	depends on VIDEO_V4L2
@@ -112,7 +104,7 @@ config VIDEO_OMAP3_DEBUG
 config VIDEO_S3C_CAMIF
 	tristate "Samsung S3C24XX/S3C64XX SoC Camera Interface driver"
 	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API
-	depends on PM_RUNTIME
+	depends on PM
 	depends on ARCH_S3C64XX || PLAT_S3C24XX || COMPILE_TEST
 	depends on HAS_DMA
 	select VIDEOBUF2_DMA_CONTIG
diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index b818afb..a49936b 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -2,9 +2,6 @@
 # Makefile for the video capture/playback device drivers.
 #
 
-obj-$(CONFIG_VIDEO_VINO) += indycam.o
-obj-$(CONFIG_VIDEO_VINO) += vino.o
-
 obj-$(CONFIG_VIDEO_TIMBERDALE)	+= timblogiw.o
 obj-$(CONFIG_VIDEO_M32R_AR_M64278) += arv.o
 
diff --git a/drivers/media/platform/s5p-tv/Kconfig b/drivers/media/platform/s5p-tv/Kconfig
index beb180e..5a1835d 100644
--- a/drivers/media/platform/s5p-tv/Kconfig
+++ b/drivers/media/platform/s5p-tv/Kconfig
@@ -8,7 +8,7 @@
 
 config VIDEO_SAMSUNG_S5P_TV
 	bool "Samsung TV driver for S5P platform"
-	depends on PM_RUNTIME
+	depends on PM
 	depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
 	default n
 	---help---
diff --git a/drivers/media/platform/soc_camera/rcar_vin.c b/drivers/media/platform/soc_camera/rcar_vin.c
index 126ac7c..0c1f556 100644
--- a/drivers/media/platform/soc_camera/rcar_vin.c
+++ b/drivers/media/platform/soc_camera/rcar_vin.c
@@ -64,6 +64,30 @@
 #define VNDMR_REG	0x58	/* Video n Data Mode Register */
 #define VNDMR2_REG	0x5C	/* Video n Data Mode Register 2 */
 #define VNUVAOF_REG	0x60	/* Video n UV Address Offset Register */
+#define VNC1A_REG	0x80	/* Video n Coefficient Set C1A Register */
+#define VNC1B_REG	0x84	/* Video n Coefficient Set C1B Register */
+#define VNC1C_REG	0x88	/* Video n Coefficient Set C1C Register */
+#define VNC2A_REG	0x90	/* Video n Coefficient Set C2A Register */
+#define VNC2B_REG	0x94	/* Video n Coefficient Set C2B Register */
+#define VNC2C_REG	0x98	/* Video n Coefficient Set C2C Register */
+#define VNC3A_REG	0xA0	/* Video n Coefficient Set C3A Register */
+#define VNC3B_REG	0xA4	/* Video n Coefficient Set C3B Register */
+#define VNC3C_REG	0xA8	/* Video n Coefficient Set C3C Register */
+#define VNC4A_REG	0xB0	/* Video n Coefficient Set C4A Register */
+#define VNC4B_REG	0xB4	/* Video n Coefficient Set C4B Register */
+#define VNC4C_REG	0xB8	/* Video n Coefficient Set C4C Register */
+#define VNC5A_REG	0xC0	/* Video n Coefficient Set C5A Register */
+#define VNC5B_REG	0xC4	/* Video n Coefficient Set C5B Register */
+#define VNC5C_REG	0xC8	/* Video n Coefficient Set C5C Register */
+#define VNC6A_REG	0xD0	/* Video n Coefficient Set C6A Register */
+#define VNC6B_REG	0xD4	/* Video n Coefficient Set C6B Register */
+#define VNC6C_REG	0xD8	/* Video n Coefficient Set C6C Register */
+#define VNC7A_REG	0xE0	/* Video n Coefficient Set C7A Register */
+#define VNC7B_REG	0xE4	/* Video n Coefficient Set C7B Register */
+#define VNC7C_REG	0xE8	/* Video n Coefficient Set C7C Register */
+#define VNC8A_REG	0xF0	/* Video n Coefficient Set C8A Register */
+#define VNC8B_REG	0xF4	/* Video n Coefficient Set C8B Register */
+#define VNC8C_REG	0xF8	/* Video n Coefficient Set C8C Register */
 
 /* Register bit fields for R-Car VIN */
 /* Video n Main Control Register bits */
@@ -106,6 +130,7 @@
 #define VNDMR2_VPS		(1 << 30)
 #define VNDMR2_HPS		(1 << 29)
 #define VNDMR2_FTEV		(1 << 17)
+#define VNDMR2_VLV(n)		((n & 0xf) << 12)
 
 #define VIN_MAX_WIDTH		2048
 #define VIN_MAX_HEIGHT		2048
@@ -117,6 +142,324 @@ enum chip_id {
 	RCAR_E1,
 };
 
+struct vin_coeff {
+	unsigned short xs_value;
+	u32 coeff_set[24];
+};
+
+static const struct vin_coeff vin_coeff_set[] = {
+	{ 0x0000, {
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000 },
+	},
+	{ 0x1000, {
+		0x000fa400,		0x000fa400,		0x09625902,
+		0x000003f8,		0x00000403,		0x3de0d9f0,
+		0x001fffed,		0x00000804,		0x3cc1f9c3,
+		0x001003de,		0x00000c01,		0x3cb34d7f,
+		0x002003d2,		0x00000c00,		0x3d24a92d,
+		0x00200bca,		0x00000bff,		0x3df600d2,
+		0x002013cc,		0x000007ff,		0x3ed70c7e,
+		0x00100fde,		0x00000000,		0x3f87c036 },
+	},
+	{ 0x1200, {
+		0x002ffff1,		0x002ffff1,		0x02a0a9c8,
+		0x002003e7,		0x001ffffa,		0x000185bc,
+		0x002007dc,		0x000003ff,		0x3e52859c,
+		0x00200bd4,		0x00000002,		0x3d53996b,
+		0x00100fd0,		0x00000403,		0x3d04ad2d,
+		0x00000bd5,		0x00000403,		0x3d35ace7,
+		0x3ff003e4,		0x00000801,		0x3dc674a1,
+		0x3fffe800,		0x00000800,		0x3e76f461 },
+	},
+	{ 0x1400, {
+		0x00100be3,		0x00100be3,		0x04d1359a,
+		0x00000fdb,		0x002003ed,		0x0211fd93,
+		0x00000fd6,		0x002003f4,		0x0002d97b,
+		0x000007d6,		0x002ffffb,		0x3e93b956,
+		0x3ff003da,		0x001003ff,		0x3db49926,
+		0x3fffefe9,		0x00100001,		0x3d655cee,
+		0x3fffd400,		0x00000003,		0x3d65f4b6,
+		0x000fb421,		0x00000402,		0x3dc6547e },
+	},
+	{ 0x1600, {
+		0x00000bdd,		0x00000bdd,		0x06519578,
+		0x3ff007da,		0x00000be3,		0x03c24973,
+		0x3ff003d9,		0x00000be9,		0x01b30d5f,
+		0x3ffff7df,		0x001003f1,		0x0003c542,
+		0x000fdfec,		0x001003f7,		0x3ec4711d,
+		0x000fc400,		0x002ffffd,		0x3df504f1,
+		0x001fa81a,		0x002ffc00,		0x3d957cc2,
+		0x002f8c3c,		0x00100000,		0x3db5c891 },
+	},
+	{ 0x1800, {
+		0x3ff003dc,		0x3ff003dc,		0x0791e558,
+		0x000ff7dd,		0x3ff007de,		0x05328554,
+		0x000fe7e3,		0x3ff00be2,		0x03232546,
+		0x000fd7ee,		0x000007e9,		0x0143bd30,
+		0x001fb800,		0x000007ee,		0x00044511,
+		0x002fa015,		0x000007f4,		0x3ef4bcee,
+		0x002f8832,		0x001003f9,		0x3e4514c7,
+		0x001f7853,		0x001003fd,		0x3de54c9f },
+	},
+	{ 0x1a00, {
+		0x000fefe0,		0x000fefe0,		0x08721d3c,
+		0x001fdbe7,		0x000ffbde,		0x0652a139,
+		0x001fcbf0,		0x000003df,		0x0463292e,
+		0x002fb3ff,		0x3ff007e3,		0x0293a91d,
+		0x002f9c12,		0x3ff00be7,		0x01241905,
+		0x001f8c29,		0x000007ed,		0x3fe470eb,
+		0x000f7c46,		0x000007f2,		0x3f04b8ca,
+		0x3fef7865,		0x000007f6,		0x3e74e4a8 },
+	},
+	{ 0x1c00, {
+		0x001fd3e9,		0x001fd3e9,		0x08f23d26,
+		0x002fbff3,		0x001fe3e4,		0x0712ad23,
+		0x002fa800,		0x000ff3e0,		0x05631d1b,
+		0x001f9810,		0x000ffbe1,		0x03b3890d,
+		0x000f8c23,		0x000003e3,		0x0233e8fa,
+		0x3fef843b,		0x000003e7,		0x00f430e4,
+		0x3fbf8456,		0x3ff00bea,		0x00046cc8,
+		0x3f8f8c72,		0x3ff00bef,		0x3f3490ac },
+	},
+	{ 0x1e00, {
+		0x001fbbf4,		0x001fbbf4,		0x09425112,
+		0x001fa800,		0x002fc7ed,		0x0792b110,
+		0x000f980e,		0x001fdbe6,		0x0613110a,
+		0x3fff8c20,		0x001fe7e3,		0x04a368fd,
+		0x3fcf8c33,		0x000ff7e2,		0x0343b8ed,
+		0x3f9f8c4a,		0x000fffe3,		0x0203f8da,
+		0x3f5f9c61,		0x000003e6,		0x00e428c5,
+		0x3f1fb07b,		0x000003eb,		0x3fe440af },
+	},
+	{ 0x2000, {
+		0x000fa400,		0x000fa400,		0x09625902,
+		0x3fff980c,		0x001fb7f5,		0x0812b0ff,
+		0x3fdf901c,		0x001fc7ed,		0x06b2fcfa,
+		0x3faf902d,		0x001fd3e8,		0x055348f1,
+		0x3f7f983f,		0x001fe3e5,		0x04038ce3,
+		0x3f3fa454,		0x001fefe3,		0x02e3c8d1,
+		0x3f0fb86a,		0x001ff7e4,		0x01c3e8c0,
+		0x3ecfd880,		0x000fffe6,		0x00c404ac },
+	},
+	{ 0x2200, {
+		0x3fdf9c0b,		0x3fdf9c0b,		0x09725cf4,
+		0x3fbf9818,		0x3fffa400,		0x0842a8f1,
+		0x3f8f9827,		0x000fb3f7,		0x0702f0ec,
+		0x3f5fa037,		0x000fc3ef,		0x05d330e4,
+		0x3f2fac49,		0x001fcfea,		0x04a364d9,
+		0x3effc05c,		0x001fdbe7,		0x038394ca,
+		0x3ecfdc6f,		0x001fe7e6,		0x0273b0bb,
+		0x3ea00083,		0x001fefe6,		0x0183c0a9 },
+	},
+	{ 0x2400, {
+		0x3f9fa014,		0x3f9fa014,		0x098260e6,
+		0x3f7f9c23,		0x3fcf9c0a,		0x08629ce5,
+		0x3f4fa431,		0x3fefa400,		0x0742d8e1,
+		0x3f1fb440,		0x3fffb3f8,		0x062310d9,
+		0x3eefc850,		0x000fbbf2,		0x050340d0,
+		0x3ecfe062,		0x000fcbec,		0x041364c2,
+		0x3ea00073,		0x001fd3ea,		0x03037cb5,
+		0x3e902086,		0x001fdfe8,		0x022388a5 },
+	},
+	{ 0x2600, {
+		0x3f5fa81e,		0x3f5fa81e,		0x096258da,
+		0x3f3fac2b,		0x3f8fa412,		0x088290d8,
+		0x3f0fbc38,		0x3fafa408,		0x0772c8d5,
+		0x3eefcc47,		0x3fcfa800,		0x0672f4ce,
+		0x3ecfe456,		0x3fefaffa,		0x05531cc6,
+		0x3eb00066,		0x3fffbbf3,		0x047334bb,
+		0x3ea01c77,		0x000fc7ee,		0x039348ae,
+		0x3ea04486,		0x000fd3eb,		0x02b350a1 },
+	},
+	{ 0x2800, {
+		0x3f2fb426,		0x3f2fb426,		0x094250ce,
+		0x3f0fc032,		0x3f4fac1b,		0x086284cd,
+		0x3eefd040,		0x3f7fa811,		0x0782acc9,
+		0x3ecfe84c,		0x3f9fa807,		0x06a2d8c4,
+		0x3eb0005b,		0x3fbfac00,		0x05b2f4bc,
+		0x3eb0186a,		0x3fdfb3fa,		0x04c308b4,
+		0x3eb04077,		0x3fefbbf4,		0x03f31ca8,
+		0x3ec06884,		0x000fbff2,		0x03031c9e },
+	},
+	{ 0x2a00, {
+		0x3f0fc42d,		0x3f0fc42d,		0x090240c4,
+		0x3eefd439,		0x3f2fb822,		0x08526cc2,
+		0x3edfe845,		0x3f4fb018,		0x078294bf,
+		0x3ec00051,		0x3f6fac0f,		0x06b2b4bb,
+		0x3ec0185f,		0x3f8fac07,		0x05e2ccb4,
+		0x3ec0386b,		0x3fafac00,		0x0502e8ac,
+		0x3ed05c77,		0x3fcfb3fb,		0x0432f0a3,
+		0x3ef08482,		0x3fdfbbf6,		0x0372f898 },
+	},
+	{ 0x2c00, {
+		0x3eefdc31,		0x3eefdc31,		0x08e238b8,
+		0x3edfec3d,		0x3f0fc828,		0x082258b9,
+		0x3ed00049,		0x3f1fc01e,		0x077278b6,
+		0x3ed01455,		0x3f3fb815,		0x06c294b2,
+		0x3ed03460,		0x3f5fb40d,		0x0602acac,
+		0x3ef0506c,		0x3f7fb006,		0x0542c0a4,
+		0x3f107476,		0x3f9fb400,		0x0472c89d,
+		0x3f309c80,		0x3fbfb7fc,		0x03b2cc94 },
+	},
+	{ 0x2e00, {
+		0x3eefec37,		0x3eefec37,		0x088220b0,
+		0x3ee00041,		0x3effdc2d,		0x07f244ae,
+		0x3ee0144c,		0x3f0fd023,		0x07625cad,
+		0x3ef02c57,		0x3f1fc81a,		0x06c274a9,
+		0x3f004861,		0x3f3fbc13,		0x060288a6,
+		0x3f20686b,		0x3f5fb80c,		0x05529c9e,
+		0x3f408c74,		0x3f6fb805,		0x04b2ac96,
+		0x3f80ac7e,		0x3f8fb800,		0x0402ac8e },
+	},
+	{ 0x3000, {
+		0x3ef0003a,		0x3ef0003a,		0x084210a6,
+		0x3ef01045,		0x3effec32,		0x07b228a7,
+		0x3f00284e,		0x3f0fdc29,		0x073244a4,
+		0x3f104058,		0x3f0fd420,		0x06a258a2,
+		0x3f305c62,		0x3f2fc818,		0x0612689d,
+		0x3f508069,		0x3f3fc011,		0x05728496,
+		0x3f80a072,		0x3f4fc00a,		0x04d28c90,
+		0x3fc0c07b,		0x3f6fbc04,		0x04429088 },
+	},
+	{ 0x3200, {
+		0x3f00103e,		0x3f00103e,		0x07f1fc9e,
+		0x3f102447,		0x3f000035,		0x0782149d,
+		0x3f203c4f,		0x3f0ff02c,		0x07122c9c,
+		0x3f405458,		0x3f0fe424,		0x06924099,
+		0x3f607061,		0x3f1fd41d,		0x06024c97,
+		0x3f909068,		0x3f2fcc16,		0x05726490,
+		0x3fc0b070,		0x3f3fc80f,		0x04f26c8a,
+		0x0000d077,		0x3f4fc409,		0x04627484 },
+	},
+	{ 0x3400, {
+		0x3f202040,		0x3f202040,		0x07a1e898,
+		0x3f303449,		0x3f100c38,		0x0741fc98,
+		0x3f504c50,		0x3f10002f,		0x06e21495,
+		0x3f706459,		0x3f1ff028,		0x06722492,
+		0x3fa08060,		0x3f1fe421,		0x05f2348f,
+		0x3fd09c67,		0x3f1fdc19,		0x05824c89,
+		0x0000bc6e,		0x3f2fd014,		0x04f25086,
+		0x0040dc74,		0x3f3fcc0d,		0x04825c7f },
+	},
+	{ 0x3600, {
+		0x3f403042,		0x3f403042,		0x0761d890,
+		0x3f504848,		0x3f301c3b,		0x0701f090,
+		0x3f805c50,		0x3f200c33,		0x06a2008f,
+		0x3fa07458,		0x3f10002b,		0x06520c8d,
+		0x3fd0905e,		0x3f1ff424,		0x05e22089,
+		0x0000ac65,		0x3f1fe81d,		0x05823483,
+		0x0030cc6a,		0x3f2fdc18,		0x04f23c81,
+		0x0080e871,		0x3f2fd412,		0x0482407c },
+	},
+	{ 0x3800, {
+		0x3f604043,		0x3f604043,		0x0721c88a,
+		0x3f80544a,		0x3f502c3c,		0x06d1d88a,
+		0x3fb06851,		0x3f301c35,		0x0681e889,
+		0x3fd08456,		0x3f30082f,		0x0611fc88,
+		0x00009c5d,		0x3f200027,		0x05d20884,
+		0x0030b863,		0x3f2ff421,		0x05621880,
+		0x0070d468,		0x3f2fe81b,		0x0502247c,
+		0x00c0ec6f,		0x3f2fe015,		0x04a22877 },
+	},
+	{ 0x3a00, {
+		0x3f904c44,		0x3f904c44,		0x06e1b884,
+		0x3fb0604a,		0x3f70383e,		0x0691c885,
+		0x3fe07451,		0x3f502c36,		0x0661d483,
+		0x00009055,		0x3f401831,		0x0601ec81,
+		0x0030a85b,		0x3f300c2a,		0x05b1f480,
+		0x0070c061,		0x3f300024,		0x0562047a,
+		0x00b0d867,		0x3f3ff41e,		0x05020c77,
+		0x00f0f46b,		0x3f2fec19,		0x04a21474 },
+	},
+	{ 0x3c00, {
+		0x3fb05c43,		0x3fb05c43,		0x06c1b07e,
+		0x3fe06c4b,		0x3f902c3f,		0x0681c081,
+		0x0000844f,		0x3f703838,		0x0631cc7d,
+		0x00309855,		0x3f602433,		0x05d1d47e,
+		0x0060b459,		0x3f50142e,		0x0581e47b,
+		0x00a0c85f,		0x3f400828,		0x0531f078,
+		0x00e0e064,		0x3f300021,		0x0501fc73,
+		0x00b0fc6a,		0x3f3ff41d,		0x04a20873 },
+	},
+	{ 0x3e00, {
+		0x3fe06444,		0x3fe06444,		0x0681a07a,
+		0x00007849,		0x3fc0503f,		0x0641b07a,
+		0x0020904d,		0x3fa0403a,		0x05f1c07a,
+		0x0060a453,		0x3f803034,		0x05c1c878,
+		0x0090b858,		0x3f70202f,		0x0571d477,
+		0x00d0d05d,		0x3f501829,		0x0531e073,
+		0x0110e462,		0x3f500825,		0x04e1e471,
+		0x01510065,		0x3f40001f,		0x04a1f06d },
+	},
+	{ 0x4000, {
+		0x00007044,		0x00007044,		0x06519476,
+		0x00208448,		0x3fe05c3f,		0x0621a476,
+		0x0050984d,		0x3fc04c3a,		0x05e1b075,
+		0x0080ac52,		0x3fa03c35,		0x05a1b875,
+		0x00c0c056,		0x3f803030,		0x0561c473,
+		0x0100d45b,		0x3f70202b,		0x0521d46f,
+		0x0140e860,		0x3f601427,		0x04d1d46e,
+		0x01810064,		0x3f500822,		0x0491dc6b },
+	},
+	{ 0x5000, {
+		0x0110a442,		0x0110a442,		0x0551545e,
+		0x0140b045,		0x00e0983f,		0x0531585f,
+		0x0160c047,		0x00c08c3c,		0x0511645e,
+		0x0190cc4a,		0x00908039,		0x04f1685f,
+		0x01c0dc4c,		0x00707436,		0x04d1705e,
+		0x0200e850,		0x00506833,		0x04b1785b,
+		0x0230f453,		0x00305c30,		0x0491805a,
+		0x02710056,		0x0010542d,		0x04718059 },
+	},
+	{ 0x6000, {
+		0x01c0bc40,		0x01c0bc40,		0x04c13052,
+		0x01e0c841,		0x01a0b43d,		0x04c13851,
+		0x0210cc44,		0x0180a83c,		0x04a13453,
+		0x0230d845,		0x0160a03a,		0x04913c52,
+		0x0260e047,		0x01409838,		0x04714052,
+		0x0280ec49,		0x01208c37,		0x04514c50,
+		0x02b0f44b,		0x01008435,		0x04414c50,
+		0x02d1004c,		0x00e07c33,		0x0431544f },
+	},
+	{ 0x7000, {
+		0x0230c83e,		0x0230c83e,		0x04711c4c,
+		0x0250d03f,		0x0210c43c,		0x0471204b,
+		0x0270d840,		0x0200b83c,		0x0451244b,
+		0x0290dc42,		0x01e0b43a,		0x0441244c,
+		0x02b0e443,		0x01c0b038,		0x0441284b,
+		0x02d0ec44,		0x01b0a438,		0x0421304a,
+		0x02f0f445,		0x0190a036,		0x04213449,
+		0x0310f847,		0x01709c34,		0x04213848 },
+	},
+	{ 0x8000, {
+		0x0280d03d,		0x0280d03d,		0x04310c48,
+		0x02a0d43e,		0x0270c83c,		0x04311047,
+		0x02b0dc3e,		0x0250c83a,		0x04311447,
+		0x02d0e040,		0x0240c03a,		0x04211446,
+		0x02e0e840,		0x0220bc39,		0x04111847,
+		0x0300e842,		0x0210b438,		0x04012445,
+		0x0310f043,		0x0200b037,		0x04012045,
+		0x0330f444,		0x01e0ac36,		0x03f12445 },
+	},
+	{ 0xefff, {
+		0x0340dc3a,		0x0340dc3a,		0x03b0ec40,
+		0x0340e03a,		0x0330e039,		0x03c0f03e,
+		0x0350e03b,		0x0330dc39,		0x03c0ec3e,
+		0x0350e43a,		0x0320dc38,		0x03c0f43e,
+		0x0360e43b,		0x0320d839,		0x03b0f03e,
+		0x0360e83b,		0x0310d838,		0x03c0fc3b,
+		0x0370e83b,		0x0310d439,		0x03a0f83d,
+		0x0370e83c,		0x0300d438,		0x03b0fc3c },
+	}
+};
+
 enum rcar_vin_state {
 	STOPPED = 0,
 	RUNNING,
@@ -161,6 +504,9 @@ struct rcar_vin_cam {
 	/* Client output, as seen by the VIN */
 	unsigned int			width;
 	unsigned int			height;
+	/* User window from S_FMT */
+	unsigned int out_width;
+	unsigned int out_height;
 	/*
 	 * User window from S_CROP / G_CROP, produced by client cropping and
 	 * scaling, VIN scaling and VIN cropping, mapped back onto the client
@@ -332,7 +678,7 @@ static int rcar_vin_setup(struct rcar_vin_priv *priv)
 		vnmc |= VNMC_BPS;
 
 	/* progressive or interlaced mode */
-	interrupts = progressive ? VNIE_FIE | VNIE_EFE : VNIE_EFE;
+	interrupts = progressive ? VNIE_FIE : VNIE_EFE;
 
 	/* ack interrupts */
 	iowrite32(interrupts, priv->base + VNINTS_REG);
@@ -667,6 +1013,60 @@ static void rcar_vin_clock_stop(struct soc_camera_host *ici)
 	/* VIN does not have "mclk" */
 }
 
+static void set_coeff(struct rcar_vin_priv *priv, unsigned short xs)
+{
+	int i;
+	const struct vin_coeff *p_prev_set = NULL;
+	const struct vin_coeff *p_set = NULL;
+
+	/* Look for suitable coefficient values */
+	for (i = 0; i < ARRAY_SIZE(vin_coeff_set); i++) {
+		p_prev_set = p_set;
+		p_set = &vin_coeff_set[i];
+
+		if (xs < p_set->xs_value)
+			break;
+	}
+
+	/* Use previous value if its XS value is closer */
+	if (p_prev_set && p_set &&
+	    xs - p_prev_set->xs_value < p_set->xs_value - xs)
+		p_set = p_prev_set;
+
+	/* Set coefficient registers */
+	iowrite32(p_set->coeff_set[0], priv->base + VNC1A_REG);
+	iowrite32(p_set->coeff_set[1], priv->base + VNC1B_REG);
+	iowrite32(p_set->coeff_set[2], priv->base + VNC1C_REG);
+
+	iowrite32(p_set->coeff_set[3], priv->base + VNC2A_REG);
+	iowrite32(p_set->coeff_set[4], priv->base + VNC2B_REG);
+	iowrite32(p_set->coeff_set[5], priv->base + VNC2C_REG);
+
+	iowrite32(p_set->coeff_set[6], priv->base + VNC3A_REG);
+	iowrite32(p_set->coeff_set[7], priv->base + VNC3B_REG);
+	iowrite32(p_set->coeff_set[8], priv->base + VNC3C_REG);
+
+	iowrite32(p_set->coeff_set[9], priv->base + VNC4A_REG);
+	iowrite32(p_set->coeff_set[10], priv->base + VNC4B_REG);
+	iowrite32(p_set->coeff_set[11], priv->base + VNC4C_REG);
+
+	iowrite32(p_set->coeff_set[12], priv->base + VNC5A_REG);
+	iowrite32(p_set->coeff_set[13], priv->base + VNC5B_REG);
+	iowrite32(p_set->coeff_set[14], priv->base + VNC5C_REG);
+
+	iowrite32(p_set->coeff_set[15], priv->base + VNC6A_REG);
+	iowrite32(p_set->coeff_set[16], priv->base + VNC6B_REG);
+	iowrite32(p_set->coeff_set[17], priv->base + VNC6C_REG);
+
+	iowrite32(p_set->coeff_set[18], priv->base + VNC7A_REG);
+	iowrite32(p_set->coeff_set[19], priv->base + VNC7B_REG);
+	iowrite32(p_set->coeff_set[20], priv->base + VNC7C_REG);
+
+	iowrite32(p_set->coeff_set[21], priv->base + VNC8A_REG);
+	iowrite32(p_set->coeff_set[22], priv->base + VNC8B_REG);
+	iowrite32(p_set->coeff_set[23], priv->base + VNC8C_REG);
+}
+
 /* rect is guaranteed to not exceed the scaled camera rectangle */
 static int rcar_vin_set_rect(struct soc_camera_device *icd)
 {
@@ -676,6 +1076,7 @@ static int rcar_vin_set_rect(struct soc_camera_device *icd)
 	unsigned int left_offset, top_offset;
 	unsigned char dsize = 0;
 	struct v4l2_rect *cam_subrect = &cam->subrect;
+	u32 value;
 
 	dev_dbg(icd->parent, "Crop %ux%u@%u:%u\n",
 		icd->user_width, icd->user_height, cam->vin_left, cam->vin_top);
@@ -695,40 +1096,64 @@ static int rcar_vin_set_rect(struct soc_camera_device *icd)
 
 	/* Set Start/End Pixel/Line Pre-Clip */
 	iowrite32(left_offset << dsize, priv->base + VNSPPRC_REG);
-	iowrite32((left_offset + cam->width - 1) << dsize,
+	iowrite32((left_offset + cam_subrect->width - 1) << dsize,
 		  priv->base + VNEPPRC_REG);
 	switch (priv->field) {
 	case V4L2_FIELD_INTERLACED:
 	case V4L2_FIELD_INTERLACED_TB:
 	case V4L2_FIELD_INTERLACED_BT:
 		iowrite32(top_offset / 2, priv->base + VNSLPRC_REG);
-		iowrite32((top_offset + cam->height) / 2 - 1,
+		iowrite32((top_offset + cam_subrect->height) / 2 - 1,
 			  priv->base + VNELPRC_REG);
 		break;
 	default:
 		iowrite32(top_offset, priv->base + VNSLPRC_REG);
-		iowrite32(top_offset + cam->height - 1,
+		iowrite32(top_offset + cam_subrect->height - 1,
 			  priv->base + VNELPRC_REG);
 		break;
 	}
 
+	/* Set scaling coefficient */
+	value = 0;
+	if (cam_subrect->height != cam->out_height)
+		value = (4096 * cam_subrect->height) / cam->out_height;
+	dev_dbg(icd->parent, "YS Value: %x\n", value);
+	iowrite32(value, priv->base + VNYS_REG);
+
+	value = 0;
+	if (cam_subrect->width != cam->out_width)
+		value = (4096 * cam_subrect->width) / cam->out_width;
+
+	/* Horizontal upscaling is up to double size */
+	if (0 < value && value < 2048)
+		value = 2048;
+
+	dev_dbg(icd->parent, "XS Value: %x\n", value);
+	iowrite32(value, priv->base + VNXS_REG);
+
+	/* Horizontal upscaling is carried out by scaling down from double size */
+	if (value < 4096)
+		value *= 2;
+
+	set_coeff(priv, value);
+
 	/* Set Start/End Pixel/Line Post-Clip */
 	iowrite32(0, priv->base + VNSPPOC_REG);
 	iowrite32(0, priv->base + VNSLPOC_REG);
-	iowrite32((cam_subrect->width - 1) << dsize, priv->base + VNEPPOC_REG);
+	iowrite32((cam->out_width - 1) << dsize, priv->base + VNEPPOC_REG);
 	switch (priv->field) {
 	case V4L2_FIELD_INTERLACED:
 	case V4L2_FIELD_INTERLACED_TB:
 	case V4L2_FIELD_INTERLACED_BT:
-		iowrite32(cam_subrect->height / 2 - 1,
+		iowrite32(cam->out_height / 2 - 1,
 			  priv->base + VNELPOC_REG);
 		break;
 	default:
-		iowrite32(cam_subrect->height - 1, priv->base + VNELPOC_REG);
+		iowrite32(cam->out_height - 1, priv->base + VNELPOC_REG);
 		break;
 	}
 
-	iowrite32(ALIGN(cam->width, 0x10), priv->base + VNIS_REG);
+	iowrite32(ALIGN(cam->out_width, 0x10), priv->base + VNIS_REG);
 
 	return 0;
 }
@@ -819,7 +1244,7 @@ static int rcar_vin_set_bus_param(struct soc_camera_device *icd)
 	if (ret < 0 && ret != -ENOIOCTLCMD)
 		return ret;
 
-	val = priv->field == V4L2_FIELD_NONE ? VNDMR2_FTEV : 0;
+	val = VNDMR2_FTEV | VNDMR2_VLV(1);
 	if (!(common_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW))
 		val |= VNDMR2_VPS;
 	if (!(common_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW))
@@ -880,6 +1305,14 @@ static const struct soc_mbus_pixelfmt rcar_vin_formats[] = {
 		.layout			= SOC_MBUS_LAYOUT_PLANAR_Y_C,
 	},
 	{
+		.fourcc			= V4L2_PIX_FMT_YUYV,
+		.name			= "YUYV",
+		.bits_per_sample	= 16,
+		.packing		= SOC_MBUS_PACKING_NONE,
+		.order			= SOC_MBUS_ORDER_LE,
+		.layout			= SOC_MBUS_LAYOUT_PACKED,
+	},
+	{
 		.fourcc			= V4L2_PIX_FMT_UYVY,
 		.name			= "UYVY",
 		.bits_per_sample	= 16,
@@ -999,6 +1432,8 @@ static int rcar_vin_get_formats(struct soc_camera_device *icd, unsigned int idx,
 		cam->subrect = rect;
 		cam->width = mf.width;
 		cam->height = mf.height;
+		cam->out_width	= mf.width;
+		cam->out_height	= mf.height;
 
 		icd->host_priv = cam;
 	} else {
@@ -1259,6 +1694,9 @@ static int rcar_vin_set_fmt(struct soc_camera_device *icd,
 	dev_dbg(dev, "W: %u : %u, H: %u : %u\n",
 		vin_sub_width, pix->width, vin_sub_height, pix->height);
 
+	cam->out_width = pix->width;
+	cam->out_height = pix->height;
+
 	icd->current_fmt = xlate;
 
 	priv->field = field;
@@ -1310,8 +1748,12 @@ static int rcar_vin_try_fmt(struct soc_camera_device *icd,
 	if (ret < 0)
 		return ret;
 
-	pix->width = mf.width;
-	pix->height = mf.height;
+	/* Adjust only if VIN cannot scale */
+	if (pix->width > mf.width * 2)
+		pix->width = mf.width * 2;
+	if (pix->height > mf.height * 3)
+		pix->height = mf.height * 3;
+
 	pix->field = mf.field;
 	pix->colorspace = mf.colorspace;
 
@@ -1395,6 +1837,8 @@ static struct soc_camera_host_ops rcar_vin_host_ops = {
 
 #ifdef CONFIG_OF
 static struct of_device_id rcar_vin_of_table[] = {
+	{ .compatible = "renesas,vin-r8a7794", .data = (void *)RCAR_GEN2 },
+	{ .compatible = "renesas,vin-r8a7793", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,vin-r8a7791", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,vin-r8a7790", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,vin-r8a7779", .data = (void *)RCAR_H1 },
diff --git a/drivers/media/platform/vivid/vivid-vid-out.c b/drivers/media/platform/vivid/vivid-vid-out.c
index ee5c399..39ff79f 100644
--- a/drivers/media/platform/vivid/vivid-vid-out.c
+++ b/drivers/media/platform/vivid/vivid-vid-out.c
@@ -625,7 +625,7 @@ int vivid_vid_out_g_selection(struct file *file, void *priv,
 		sel->r = dev->fmt_out_rect;
 		break;
 	case V4L2_SEL_TGT_CROP_BOUNDS:
-		if (!dev->has_compose_out)
+		if (!dev->has_crop_out)
 			return -EINVAL;
 		sel->r = vivid_max_rect;
 		break;
diff --git a/drivers/media/usb/Kconfig b/drivers/media/usb/Kconfig
index 056181f..7496f33 100644
--- a/drivers/media/usb/Kconfig
+++ b/drivers/media/usb/Kconfig
@@ -24,7 +24,6 @@ if MEDIA_ANALOG_TV_SUPPORT
 	comment "Analog TV USB devices"
 source "drivers/media/usb/pvrusb2/Kconfig"
 source "drivers/media/usb/hdpvr/Kconfig"
-source "drivers/media/usb/tlg2300/Kconfig"
 source "drivers/media/usb/usbvision/Kconfig"
 source "drivers/media/usb/stk1160/Kconfig"
 source "drivers/media/usb/go7007/Kconfig"
diff --git a/drivers/media/usb/Makefile b/drivers/media/usb/Makefile
index 6f2eb7c..8874ba7 100644
--- a/drivers/media/usb/Makefile
+++ b/drivers/media/usb/Makefile
@@ -16,7 +16,6 @@ obj-$(CONFIG_VIDEO_CPIA2) += cpia2/
 obj-$(CONFIG_VIDEO_AU0828) += au0828/
 obj-$(CONFIG_VIDEO_HDPVR)	+= hdpvr/
 obj-$(CONFIG_VIDEO_PVRUSB2) += pvrusb2/
-obj-$(CONFIG_VIDEO_TLG2300) += tlg2300/
 obj-$(CONFIG_VIDEO_USBVISION) += usbvision/
 obj-$(CONFIG_VIDEO_STK1160) += stk1160/
 obj-$(CONFIG_VIDEO_CX231XX) += cx231xx/
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 7565871..faac2f4 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1017,6 +1017,12 @@ static int v4l_querycap(const struct v4l2_ioctl_ops *ops,
 	ret = ops->vidioc_querycap(file, fh, cap);
 
 	cap->capabilities |= V4L2_CAP_EXT_PIX_FORMAT;
+	/*
+	 * Drivers MUST fill in device_caps, so check for this and
+	 * warn if it was forgotten.
+	 */
+	WARN_ON(!(cap->capabilities & V4L2_CAP_DEVICE_CAPS) ||
+		!cap->device_caps);
 	cap->device_caps |= V4L2_CAP_EXT_PIX_FORMAT;
 
 	return ret;
diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c
index 3d5d792..410c397 100644
--- a/drivers/memory/fsl_ifc.c
+++ b/drivers/memory/fsl_ifc.c
@@ -61,7 +61,7 @@ int fsl_ifc_find(phys_addr_t addr_base)
 	if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->regs)
 		return -ENODEV;
 
-	for (i = 0; i < ARRAY_SIZE(fsl_ifc_ctrl_dev->regs->cspr_cs); i++) {
+	for (i = 0; i < fsl_ifc_ctrl_dev->banks; i++) {
 		u32 cspr = in_be32(&fsl_ifc_ctrl_dev->regs->cspr_cs[i].cspr);
 		if (cspr & CSPR_V && (cspr & CSPR_BA) ==
 				convert_ifc_address(addr_base))
@@ -213,7 +213,7 @@ static irqreturn_t fsl_ifc_ctrl_irq(int irqno, void *data)
 static int fsl_ifc_ctrl_probe(struct platform_device *dev)
 {
 	int ret = 0;
-
+	int version, banks;
 
 	dev_info(&dev->dev, "Freescale Integrated Flash Controller\n");
 
@@ -231,6 +231,15 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev)
 		goto err;
 	}
 
+	version = ioread32be(&fsl_ifc_ctrl_dev->regs->ifc_rev) &
+			FSL_IFC_VERSION_MASK;
+	banks = (version == FSL_IFC_VERSION_1_0_0) ? 4 : 8;
+	dev_info(&dev->dev, "IFC version %d.%d, %d banks\n",
+		version >> 24, (version >> 16) & 0xf, banks);
+
+	fsl_ifc_ctrl_dev->version = version;
+	fsl_ifc_ctrl_dev->banks = banks;
+
 	/* get the Controller level irq */
 	fsl_ifc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0);
 	if (fsl_ifc_ctrl_dev->irq == NO_IRQ) {
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index cca4721..51fd6b5 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -34,7 +34,8 @@ struct cxl_context *cxl_context_alloc(void)
 /*
  * Initialises a CXL context.
  */
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+		     struct address_space *mapping)
 {
 	int i;
 
@@ -42,6 +43,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
 	ctx->afu = afu;
 	ctx->master = master;
 	ctx->pid = NULL; /* Set in start work ioctl */
+	mutex_init(&ctx->mapping_lock);
+	ctx->mapping = mapping;
 
 	/*
 	 * Allocate the segment table before we put it in the IDR so that we
@@ -82,12 +85,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
 	 * Allocating IDR! We better make sure everything's setup that
 	 * dereferences from it.
 	 */
+	mutex_lock(&afu->contexts_lock);
 	idr_preload(GFP_KERNEL);
-	spin_lock(&afu->contexts_lock);
 	i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
 		      ctx->afu->num_procs, GFP_NOWAIT);
-	spin_unlock(&afu->contexts_lock);
 	idr_preload_end();
+	mutex_unlock(&afu->contexts_lock);
 	if (i < 0)
 		return i;
 
@@ -147,6 +150,12 @@ static void __detach_context(struct cxl_context *ctx)
 	afu_release_irqs(ctx);
 	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
 	wake_up_all(&ctx->wq);
+
+	/* Release Problem State Area mapping */
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->mapping)
+		unmap_mapping_range(ctx->mapping, 0, 0, 1);
+	mutex_unlock(&ctx->mapping_lock);
 }
 
 /*
@@ -168,21 +177,22 @@ void cxl_context_detach_all(struct cxl_afu *afu)
 	struct cxl_context *ctx;
 	int tmp;
 
-	rcu_read_lock();
-	idr_for_each_entry(&afu->contexts_idr, ctx, tmp)
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
 		/*
 		 * Anything done in here needs to be setup before the IDR is
 		 * created and torn down after the IDR removed
 		 */
 		__detach_context(ctx);
-	rcu_read_unlock();
+	}
+	mutex_unlock(&afu->contexts_lock);
 }
 
 void cxl_context_free(struct cxl_context *ctx)
 {
-	spin_lock(&ctx->afu->contexts_lock);
+	mutex_lock(&ctx->afu->contexts_lock);
 	idr_remove(&ctx->afu->contexts_idr, ctx->pe);
-	spin_unlock(&ctx->afu->contexts_lock);
+	mutex_unlock(&ctx->afu->contexts_lock);
 	synchronize_rcu();
 
 	free_page((u64)ctx->sstp);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b5b6bda..28078f8 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -351,7 +351,7 @@ struct cxl_afu {
 	struct device *chardev_s, *chardev_m, *chardev_d;
 	struct idr contexts_idr;
 	struct dentry *debugfs;
-	spinlock_t contexts_lock;
+	struct mutex contexts_lock;
 	struct mutex spa_mutex;
 	spinlock_t afu_cntl_lock;
 
@@ -398,6 +398,10 @@ struct cxl_context {
 	phys_addr_t psn_phys;
 	u64 psn_size;
 
+	/* Used to unmap any mmaps when force detaching */
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+
 	spinlock_t sste_lock; /* Protects segment table entries */
 	struct cxl_sste *sstp;
 	u64 sstp0, sstp1;
@@ -599,7 +603,8 @@ int cxl_alloc_sst(struct cxl_context *ctx);
 void init_cxl_native(void);
 
 struct cxl_context *cxl_context_alloc(void);
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+		     struct address_space *mapping);
 void cxl_context_free(struct cxl_context *ctx);
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
 
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 378b099..e9f2f10 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -77,7 +77,7 @@ static int __afu_open(struct inode *inode, struct file *file, bool master)
 		goto err_put_afu;
 	}
 
-	if ((rc = cxl_context_init(ctx, afu, master)))
+	if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping)))
 		goto err_put_afu;
 
 	pr_devel("afu_open pe: %i\n", ctx->pe);
@@ -113,6 +113,10 @@ static int afu_release(struct inode *inode, struct file *file)
 		 __func__, ctx->pe);
 	cxl_context_detach(ctx);
 
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+
 	put_device(&ctx->afu->dev);
 
 	/*
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 9a5a442..f2b37b4 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -277,6 +277,7 @@ static int do_process_element_cmd(struct cxl_context *ctx,
 				  u64 cmd, u64 pe_state)
 {
 	u64 state;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
 
 	WARN_ON(!ctx->afu->enabled);
 
@@ -286,6 +287,10 @@ static int do_process_element_cmd(struct cxl_context *ctx,
 	smp_mb();
 	cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
 	while (1) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
+			return -EBUSY;
+		}
 		state = be64_to_cpup(ctx->afu->sw_command_status);
 		if (state == ~0ULL) {
 			pr_err("cxl: Error adding process element to AFU\n");
@@ -610,13 +615,6 @@ static inline int detach_process_native_dedicated(struct cxl_context *ctx)
 	return 0;
 }
 
-/*
- * TODO: handle case when this is called inside a rcu_read_lock() which may
- * happen when we unbind the driver (ie. cxl_context_detach_all()) .  Terminate
- * & remove use a mutex lock and schedule which will not good with lock held.
- * May need to write do_process_element_cmd() that handles outstanding page
- * faults synchronously.
- */
 static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
 {
 	if (!ctx->pe_inserted)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 10c98ab..0f2cc9f8 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -502,7 +502,7 @@ static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
 	afu->dev.release = cxl_release_afu;
 	afu->slice = slice;
 	idr_init(&afu->contexts_idr);
-	spin_lock_init(&afu->contexts_lock);
+	mutex_init(&afu->contexts_lock);
 	spin_lock_init(&afu->afu_cntl_lock);
 	mutex_init(&afu->spa_mutex);
 
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index ce7ec06..461bdbd 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -121,7 +121,7 @@ static ssize_t reset_store_afu(struct device *device,
 	int rc;
 
 	/* Not safe to reset if it is currently in use */
-	spin_lock(&afu->contexts_lock);
+	mutex_lock(&afu->contexts_lock);
 	if (!idr_is_empty(&afu->contexts_idr)) {
 		rc = -EBUSY;
 		goto err;
@@ -132,7 +132,7 @@ static ssize_t reset_store_afu(struct device *device,
 
 	rc = count;
 err:
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 	return rc;
 }
 
@@ -247,7 +247,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
 	int rc = -EBUSY;
 
 	/* can't change this if we have a user */
-	spin_lock(&afu->contexts_lock);
+	mutex_lock(&afu->contexts_lock);
 	if (!idr_is_empty(&afu->contexts_idr))
 		goto err;
 
@@ -271,7 +271,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
 	afu->current_mode = 0;
 	afu->num_procs = 0;
 
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 
 	if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
 		return rc;
@@ -280,7 +280,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
 
 	return count;
 err:
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 	return rc;
 }
 
diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
index 028ba5d6..687e9aa 100644
--- a/drivers/misc/mic/host/mic_debugfs.c
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -326,21 +326,27 @@ static int mic_vdev_info_show(struct seq_file *s, void *unused)
 			}
 			avail = vrh->vring.avail;
 			seq_printf(s, "avail flags 0x%x idx %d\n",
-				   avail->flags, avail->idx & (num - 1));
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh, avail->idx) & (num - 1));
 			seq_printf(s, "avail flags 0x%x idx %d\n",
-				   avail->flags, avail->idx);
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh, avail->idx));
 			for (j = 0; j < num; j++)
 				seq_printf(s, "avail ring[%d] %d\n",
 					   j, avail->ring[j]);
 			used = vrh->vring.used;
 			seq_printf(s, "used flags 0x%x idx %d\n",
-				   used->flags, used->idx & (num - 1));
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx) & (num - 1));
 			seq_printf(s, "used flags 0x%x idx %d\n",
-				   used->flags, used->idx);
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx));
 			for (j = 0; j < num; j++)
 				seq_printf(s, "used ring[%d] id %d len %d\n",
-					   j, used->ring[j].id,
-					   used->ring[j].len);
+					   j, vringh32_to_cpu(vrh,
+							      used->ring[j].id),
+					   vringh32_to_cpu(vrh,
+							   used->ring[j].len));
 		}
 	}
 	mutex_unlock(&mdev->mic_mutex);
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 62aba9a..03d7c75 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -2561,7 +2561,7 @@ static int atmci_runtime_resume(struct device *dev)
 static const struct dev_pm_ops atmci_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
 				pm_runtime_force_resume)
-	SET_PM_RUNTIME_PM_OPS(atmci_runtime_suspend, atmci_runtime_resume, NULL)
+	SET_RUNTIME_PM_OPS(atmci_runtime_suspend, atmci_runtime_resume, NULL)
 };
 
 static struct platform_driver atmci_driver = {
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 94b8210..71fea89 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -133,7 +133,7 @@ config MTD_OF_PARTS
 	help
 	  This provides a partition parsing function which derives
 	  the partition map from the children of the flash node,
-	  as described in Documentation/devicetree/booting-without-of.txt.
+	  as described in Documentation/devicetree/bindings/mtd/partition.txt.
 
 config MTD_AR7_PARTS
 	tristate "TI AR7 partitioning support"
diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index 8057f52..cc13ea5 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c
@@ -15,8 +15,12 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
-/* 10 parts were found on sflash on Netgear WNDR4500 */
-#define BCM47XXPART_MAX_PARTS		12
+/*
+ * NAND flash on Netgear R6250 was verified to contain 15 partitions.
+ * This will result in allocating too big array for some old devices, but the
+ * memory will be freed soon anyway (see mtd_device_parse_register).
+ */
+#define BCM47XXPART_MAX_PARTS		20
 
 /*
  * Amount of bytes we read when analyzing each block of flash memory.
@@ -168,18 +172,26 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 				i++;
 			}
 
-			bcm47xxpart_add_part(&parts[curr_part++], "linux",
-					     offset + trx->offset[i], 0);
-			i++;
+			if (trx->offset[i]) {
+				bcm47xxpart_add_part(&parts[curr_part++],
+						     "linux",
+						     offset + trx->offset[i],
+						     0);
+				i++;
+			}
 
 			/*
 			 * Pure rootfs size is known and can be calculated as:
 			 * trx->length - trx->offset[i]. We don't fill it as
 			 * we want to have jffs2 (overlay) in the same mtd.
 			 */
-			bcm47xxpart_add_part(&parts[curr_part++], "rootfs",
-					     offset + trx->offset[i], 0);
-			i++;
+			if (trx->offset[i]) {
+				bcm47xxpart_add_part(&parts[curr_part++],
+						     "rootfs",
+						     offset + trx->offset[i],
+						     0);
+				i++;
+			}
 
 			last_trx_part = curr_part - 1;
 
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 3096f3d..286b97a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -2654,8 +2654,7 @@ static void cfi_intelext_destroy(struct mtd_info *mtd)
 	kfree(cfi);
 	for (i = 0; i < mtd->numeraseregions; i++) {
 		region = &mtd->eraseregions[i];
-		if (region->lockmap)
-			kfree(region->lockmap);
+		kfree(region->lockmap);
 	}
 	kfree(mtd->eraseregions);
 }
diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index 7234604..448ce42 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -1655,22 +1656,21 @@ static int dbg_flashctrl_show(struct seq_file *s, void *p)
 {
 	struct docg3 *docg3 = (struct docg3 *)s->private;
 
-	int pos = 0;
 	u8 fctrl;
 
 	mutex_lock(&docg3->cascade->lock);
 	fctrl = doc_register_readb(docg3, DOC_FLASHCONTROL);
 	mutex_unlock(&docg3->cascade->lock);
 
-	pos += seq_printf(s,
-		 "FlashControl : 0x%02x (%s,CE# %s,%s,%s,flash %s)\n",
-		 fctrl,
-		 fctrl & DOC_CTRL_VIOLATION ? "protocol violation" : "-",
-		 fctrl & DOC_CTRL_CE ? "active" : "inactive",
-		 fctrl & DOC_CTRL_PROTECTION_ERROR ? "protection error" : "-",
-		 fctrl & DOC_CTRL_SEQUENCE_ERROR ? "sequence error" : "-",
-		 fctrl & DOC_CTRL_FLASHREADY ? "ready" : "not ready");
-	return pos;
+	seq_printf(s, "FlashControl : 0x%02x (%s,CE# %s,%s,%s,flash %s)\n",
+		   fctrl,
+		   fctrl & DOC_CTRL_VIOLATION ? "protocol violation" : "-",
+		   fctrl & DOC_CTRL_CE ? "active" : "inactive",
+		   fctrl & DOC_CTRL_PROTECTION_ERROR ? "protection error" : "-",
+		   fctrl & DOC_CTRL_SEQUENCE_ERROR ? "sequence error" : "-",
+		   fctrl & DOC_CTRL_FLASHREADY ? "ready" : "not ready");
+
+	return 0;
 }
 DEBUGFS_RO_ATTR(flashcontrol, dbg_flashctrl_show);
 
@@ -1678,58 +1678,56 @@ static int dbg_asicmode_show(struct seq_file *s, void *p)
 {
 	struct docg3 *docg3 = (struct docg3 *)s->private;
 
-	int pos = 0, pctrl, mode;
+	int pctrl, mode;
 
 	mutex_lock(&docg3->cascade->lock);
 	pctrl = doc_register_readb(docg3, DOC_ASICMODE);
 	mode = pctrl & 0x03;
 	mutex_unlock(&docg3->cascade->lock);
 
-	pos += seq_printf(s,
-			 "%04x : RAM_WE=%d,RSTIN_RESET=%d,BDETCT_RESET=%d,WRITE_ENABLE=%d,POWERDOWN=%d,MODE=%d%d (",
-			 pctrl,
-			 pctrl & DOC_ASICMODE_RAM_WE ? 1 : 0,
-			 pctrl & DOC_ASICMODE_RSTIN_RESET ? 1 : 0,
-			 pctrl & DOC_ASICMODE_BDETCT_RESET ? 1 : 0,
-			 pctrl & DOC_ASICMODE_MDWREN ? 1 : 0,
-			 pctrl & DOC_ASICMODE_POWERDOWN ? 1 : 0,
-			 mode >> 1, mode & 0x1);
+	seq_printf(s,
+		   "%04x : RAM_WE=%d,RSTIN_RESET=%d,BDETCT_RESET=%d,WRITE_ENABLE=%d,POWERDOWN=%d,MODE=%d%d (",
+		   pctrl,
+		   pctrl & DOC_ASICMODE_RAM_WE ? 1 : 0,
+		   pctrl & DOC_ASICMODE_RSTIN_RESET ? 1 : 0,
+		   pctrl & DOC_ASICMODE_BDETCT_RESET ? 1 : 0,
+		   pctrl & DOC_ASICMODE_MDWREN ? 1 : 0,
+		   pctrl & DOC_ASICMODE_POWERDOWN ? 1 : 0,
+		   mode >> 1, mode & 0x1);
 
 	switch (mode) {
 	case DOC_ASICMODE_RESET:
-		pos += seq_puts(s, "reset");
+		seq_puts(s, "reset");
 		break;
 	case DOC_ASICMODE_NORMAL:
-		pos += seq_puts(s, "normal");
+		seq_puts(s, "normal");
 		break;
 	case DOC_ASICMODE_POWERDOWN:
-		pos += seq_puts(s, "powerdown");
+		seq_puts(s, "powerdown");
 		break;
 	}
-	pos += seq_puts(s, ")\n");
-	return pos;
+	seq_puts(s, ")\n");
+	return 0;
 }
 DEBUGFS_RO_ATTR(asic_mode, dbg_asicmode_show);
 
 static int dbg_device_id_show(struct seq_file *s, void *p)
 {
 	struct docg3 *docg3 = (struct docg3 *)s->private;
-	int pos = 0;
 	int id;
 
 	mutex_lock(&docg3->cascade->lock);
 	id = doc_register_readb(docg3, DOC_DEVICESELECT);
 	mutex_unlock(&docg3->cascade->lock);
 
-	pos += seq_printf(s, "DeviceId = %d\n", id);
-	return pos;
+	seq_printf(s, "DeviceId = %d\n", id);
+	return 0;
 }
 DEBUGFS_RO_ATTR(device_id, dbg_device_id_show);
 
 static int dbg_protection_show(struct seq_file *s, void *p)
 {
 	struct docg3 *docg3 = (struct docg3 *)s->private;
-	int pos = 0;
 	int protect, dps0, dps0_low, dps0_high, dps1, dps1_low, dps1_high;
 
 	mutex_lock(&docg3->cascade->lock);
@@ -1742,45 +1740,40 @@ static int dbg_protection_show(struct seq_file *s, void *p)
 	dps1_high = doc_register_readw(docg3, DOC_DPS1_ADDRHIGH);
 	mutex_unlock(&docg3->cascade->lock);
 
-	pos += seq_printf(s, "Protection = 0x%02x (",
-			 protect);
+	seq_printf(s, "Protection = 0x%02x (", protect);
 	if (protect & DOC_PROTECT_FOUNDRY_OTP_LOCK)
-		pos += seq_puts(s, "FOUNDRY_OTP_LOCK,");
+		seq_puts(s, "FOUNDRY_OTP_LOCK,");
 	if (protect & DOC_PROTECT_CUSTOMER_OTP_LOCK)
-		pos += seq_puts(s, "CUSTOMER_OTP_LOCK,");
+		seq_puts(s, "CUSTOMER_OTP_LOCK,");
 	if (protect & DOC_PROTECT_LOCK_INPUT)
-		pos += seq_puts(s, "LOCK_INPUT,");
+		seq_puts(s, "LOCK_INPUT,");
 	if (protect & DOC_PROTECT_STICKY_LOCK)
-		pos += seq_puts(s, "STICKY_LOCK,");
+		seq_puts(s, "STICKY_LOCK,");
 	if (protect & DOC_PROTECT_PROTECTION_ENABLED)
-		pos += seq_puts(s, "PROTECTION ON,");
+		seq_puts(s, "PROTECTION ON,");
 	if (protect & DOC_PROTECT_IPL_DOWNLOAD_LOCK)
-		pos += seq_puts(s, "IPL_DOWNLOAD_LOCK,");
+		seq_puts(s, "IPL_DOWNLOAD_LOCK,");
 	if (protect & DOC_PROTECT_PROTECTION_ERROR)
-		pos += seq_puts(s, "PROTECT_ERR,");
+		seq_puts(s, "PROTECT_ERR,");
 	else
-		pos += seq_puts(s, "NO_PROTECT_ERR");
-	pos += seq_puts(s, ")\n");
-
-	pos += seq_printf(s, "DPS0 = 0x%02x : "
-			 "Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, "
-			 "WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n",
-			 dps0, dps0_low, dps0_high,
-			 !!(dps0 & DOC_DPS_OTP_PROTECTED),
-			 !!(dps0 & DOC_DPS_READ_PROTECTED),
-			 !!(dps0 & DOC_DPS_WRITE_PROTECTED),
-			 !!(dps0 & DOC_DPS_HW_LOCK_ENABLED),
-			 !!(dps0 & DOC_DPS_KEY_OK));
-	pos += seq_printf(s, "DPS1 = 0x%02x : "
-			 "Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, "
-			 "WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n",
-			 dps1, dps1_low, dps1_high,
-			 !!(dps1 & DOC_DPS_OTP_PROTECTED),
-			 !!(dps1 & DOC_DPS_READ_PROTECTED),
-			 !!(dps1 & DOC_DPS_WRITE_PROTECTED),
-			 !!(dps1 & DOC_DPS_HW_LOCK_ENABLED),
-			 !!(dps1 & DOC_DPS_KEY_OK));
-	return pos;
+		seq_puts(s, "NO_PROTECT_ERR");
+	seq_puts(s, ")\n");
+
+	seq_printf(s, "DPS0 = 0x%02x : Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n",
+		   dps0, dps0_low, dps0_high,
+		   !!(dps0 & DOC_DPS_OTP_PROTECTED),
+		   !!(dps0 & DOC_DPS_READ_PROTECTED),
+		   !!(dps0 & DOC_DPS_WRITE_PROTECTED),
+		   !!(dps0 & DOC_DPS_HW_LOCK_ENABLED),
+		   !!(dps0 & DOC_DPS_KEY_OK));
+	seq_printf(s, "DPS1 = 0x%02x : Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, WRITE=%d, HW_LOCK=%d, KEY_OK=%d\n",
+		   dps1, dps1_low, dps1_high,
+		   !!(dps1 & DOC_DPS_OTP_PROTECTED),
+		   !!(dps1 & DOC_DPS_READ_PROTECTED),
+		   !!(dps1 & DOC_DPS_WRITE_PROTECTED),
+		   !!(dps1 & DOC_DPS_HW_LOCK_ENABLED),
+		   !!(dps1 & DOC_DPS_KEY_OK));
+	return 0;
 }
 DEBUGFS_RO_ATTR(protection, dbg_protection_show);
 
@@ -2126,9 +2119,18 @@ static int __exit docg3_release(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static struct of_device_id docg3_dt_ids[] = {
+	{ .compatible = "m-systems,diskonchip-g3" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, docg3_dt_ids);
+#endif
+
 static struct platform_driver g3_driver = {
 	.driver		= {
 		.name	= "docg3",
+		.of_match_table = of_match_ptr(docg3_dt_ids),
 	},
 	.suspend	= docg3_suspend,
 	.resume		= docg3_resume,
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index ed827cf..85e35467 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -128,13 +128,10 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 	struct spi_device *spi = flash->spi;
 	struct spi_transfer t[2];
 	struct spi_message m;
-	int dummy = nor->read_dummy;
-	int ret;
+	unsigned int dummy = nor->read_dummy;
 
-	/* Wait till previous write/erase is done. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
+	/* convert the dummy cycles to the number of bytes */
+	dummy /= 8;
 
 	spi_message_init(&m);
 	memset(t, 0, (sizeof t));
@@ -160,21 +157,10 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 static int m25p80_erase(struct spi_nor *nor, loff_t offset)
 {
 	struct m25p *flash = nor->priv;
-	int ret;
 
 	dev_dbg(nor->dev, "%dKiB at 0x%08x\n",
 		flash->mtd.erasesize / 1024, (u32)offset);
 
-	/* Wait until finished previous write command. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* Send write enable, then erase commands. */
-	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
-	if (ret)
-		return ret;
-
 	/* Set up command buffer. */
 	flash->command[0] = nor->erase_opcode;
 	m25p_addr2cmd(nor, offset, flash->command);
@@ -260,7 +246,6 @@ static int m25p_remove(struct spi_device *spi)
 	return mtd_device_unregister(&flash->mtd);
 }
 
-
 /*
  * XXX This needs to be kept in sync with spi_nor_ids.  We can't share
  * it with spi-nor, because if this is built as a module then modpost
@@ -287,7 +272,7 @@ static const struct spi_device_id m25p_ids[] = {
 	{"s25fl512s"},	{"s70fl01gs"},	{"s25sl12800"},	{"s25sl12801"},
 	{"s25fl129p0"},	{"s25fl129p1"},	{"s25sl004a"},	{"s25sl008a"},
 	{"s25sl016a"},	{"s25sl032a"},	{"s25sl064a"},	{"s25fl008k"},
-	{"s25fl016k"},	{"s25fl064k"},
+	{"s25fl016k"},	{"s25fl064k"},	{"s25fl132k"},
 	{"sst25vf040b"},{"sst25vf080b"},{"sst25vf016b"},{"sst25vf032b"},
 	{"sst25vf064c"},{"sst25wf512"},	{"sst25wf010"},	{"sst25wf020"},
 	{"sst25wf040"},
@@ -300,17 +285,16 @@ static const struct spi_device_id m25p_ids[] = {
 	{"m45pe10"},	{"m45pe80"},	{"m45pe16"},
 	{"m25pe20"},	{"m25pe80"},	{"m25pe16"},
 	{"m25px16"},	{"m25px32"},	{"m25px32-s0"},	{"m25px32-s1"},
-	{"m25px64"},
+	{"m25px64"},	{"m25px80"},
 	{"w25x10"},	{"w25x20"},	{"w25x40"},	{"w25x80"},
 	{"w25x16"},	{"w25x32"},	{"w25q32"},	{"w25q32dw"},
-	{"w25x64"},	{"w25q64"},	{"w25q128"},	{"w25q80"},
-	{"w25q80bl"},	{"w25q128"},	{"w25q256"},	{"cat25c11"},
+	{"w25x64"},	{"w25q64"},	{"w25q80"},	{"w25q80bl"},
+	{"w25q128"},	{"w25q256"},	{"cat25c11"},
 	{"cat25c03"},	{"cat25c09"},	{"cat25c17"},	{"cat25128"},
 	{ },
 };
 MODULE_DEVICE_TABLE(spi, m25p_ids);
 
-
 static struct spi_driver m25p80_driver = {
 	.driver = {
 		.name	= "m25p80",
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index dd22ce2..0099aba 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -149,7 +149,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 {
 	struct dataflash	*priv = mtd->priv;
 	struct spi_device	*spi = priv->spi;
-	struct spi_transfer	x = { .tx_dma = 0, };
+	struct spi_transfer	x = { };
 	struct spi_message	msg;
 	unsigned		blocksize = priv->page_size << 3;
 	uint8_t			*command;
@@ -235,7 +235,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 			       size_t *retlen, u_char *buf)
 {
 	struct dataflash	*priv = mtd->priv;
-	struct spi_transfer	x[2] = { { .tx_dma = 0, }, };
+	struct spi_transfer	x[2] = { };
 	struct spi_message	msg;
 	unsigned int		addr;
 	uint8_t			*command;
@@ -301,7 +301,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 {
 	struct dataflash	*priv = mtd->priv;
 	struct spi_device	*spi = priv->spi;
-	struct spi_transfer	x[2] = { { .tx_dma = 0, }, };
+	struct spi_transfer	x[2] = { };
 	struct spi_message	msg;
 	unsigned int		pageaddr, addr, offset, writelen;
 	size_t			remaining = len;
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index effd9a4..8b66e52 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -17,7 +17,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <asm/io.h>
+#include <linux/io.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index f02603e..708b7e8 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -812,8 +812,7 @@ static int __init init_pmc551(void)
 	}
 
 	/* Exited early, reference left over */
-	if (PCI_Device)
-		pci_dev_put(PCI_Device);
+	pci_dev_put(PCI_Device);
 
 	if (!pmc551list) {
 		printk(KERN_NOTICE "pmc551: not detected\n");
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 487e64f..1388c8d 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -518,7 +518,7 @@ void INFTL_dumpVUchains(struct INFTLrecord *s)
 	pr_debug("INFTL Virtual Unit Chains:\n");
 	for (logical = 0; logical < s->nb_blocks; logical++) {
 		block = s->VUtable[logical];
-		if (block > s->nb_blocks)
+		if (block >= s->nb_blocks)
 			continue;
 		pr_debug("  LOGICAL %d --> %d ", logical, block);
 		for (i = 0; i < s->nb_blocks; i++) {
diff --git a/drivers/mtd/maps/bfin-async-flash.c b/drivers/mtd/maps/bfin-async-flash.c
index 6ea51e5..41730fe 100644
--- a/drivers/mtd/maps/bfin-async-flash.c
+++ b/drivers/mtd/maps/bfin-async-flash.c
@@ -126,7 +126,6 @@ static const char * const part_probe_types[] = {
 
 static int bfin_flash_probe(struct platform_device *pdev)
 {
-	int ret;
 	struct physmap_flash_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *memory = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct resource *flash_ambctl = platform_get_resource(pdev, IORESOURCE_MEM, 1);
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 991d0cb..f35cd20 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -47,14 +47,12 @@ static int of_flash_remove(struct platform_device *dev)
 		return 0;
 	dev_set_drvdata(&dev->dev, NULL);
 
-	if (info->cmtd != info->list[0].mtd) {
+	if (info->cmtd) {
 		mtd_device_unregister(info->cmtd);
-		mtd_concat_destroy(info->cmtd);
+		if (info->cmtd != info->list[0].mtd)
+			mtd_concat_destroy(info->cmtd);
 	}
 
-	if (info->cmtd)
-		mtd_device_unregister(info->cmtd);
-
 	for (i = 0; i < info->list_size; i++) {
 		if (info->list[i].mtd)
 			map_destroy(info->list[i].mtd);
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index dd10646..7d0150d 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -75,10 +75,12 @@ config MTD_NAND_DENALI_SCRATCH_REG_ADDR
           boards, the scratch register is at 0xFF108018.
 
 config MTD_NAND_GPIO
-	tristate "GPIO NAND Flash driver"
+	tristate "GPIO assisted NAND Flash driver"
 	depends on GPIOLIB
 	help
-	  This enables a GPIO based NAND flash driver.
+	  This enables a NAND flash driver where control signals are
+	  connected to GPIO pins, and commands and data are communicated
+	  via a memory mapped interface.
 
 config MTD_NAND_AMS_DELTA
 	tristate "NAND Flash device on Amstrad E3"
@@ -516,4 +518,10 @@ config MTD_NAND_XWAY
 	  Enables support for NAND Flash chips on Lantiq XWAY SoCs. NAND is attached
 	  to the External Bus Unit (EBU).
 
+config MTD_NAND_SUNXI
+	tristate "Support for NAND on Allwinner SoCs"
+	depends on ARCH_SUNXI
+	help
+	  Enables support for NAND Flash chips on Allwinner SoCs.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 9c847e4..bd38f21 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -50,5 +50,6 @@ obj-$(CONFIG_MTD_NAND_JZ4740)		+= jz4740_nand.o
 obj-$(CONFIG_MTD_NAND_GPMI_NAND)	+= gpmi-nand/
 obj-$(CONFIG_MTD_NAND_XWAY)		+= xway_nand.o
 obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
+obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 84c38f3..a345e7b 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -92,7 +92,7 @@ static struct nand_ecclayout atmel_oobinfo_small = {
 struct atmel_nfc {
 	void __iomem		*base_cmd_regs;
 	void __iomem		*hsmc_regs;
-	void __iomem		*sram_bank0;
+	void			*sram_bank0;
 	dma_addr_t		sram_bank0_phys;
 	bool			use_nfc_sram;
 	bool			write_by_sram;
@@ -105,7 +105,7 @@ struct atmel_nfc {
 	struct completion	comp_xfer_done;
 
 	/* Point to the sram bank which include readed data via NFC */
-	void __iomem		*data_in_sram;
+	void			*data_in_sram;
 	bool			will_write_sram;
 };
 static struct atmel_nfc	nand_nfc;
@@ -127,6 +127,7 @@ struct atmel_nand_host {
 	bool			has_pmecc;
 	u8			pmecc_corr_cap;
 	u16			pmecc_sector_size;
+	bool			has_no_lookup_table;
 	u32			pmecc_lookup_table_offset;
 	u32			pmecc_lookup_table_offset_512;
 	u32			pmecc_lookup_table_offset_1024;
@@ -256,26 +257,6 @@ static int atmel_nand_set_enable_ready_pins(struct mtd_info *mtd)
 	return res;
 }
 
-static void memcpy32_fromio(void *trg, const void __iomem  *src, size_t size)
-{
-	int i;
-	u32 *t = trg;
-	const __iomem u32 *s = src;
-
-	for (i = 0; i < (size >> 2); i++)
-		*t++ = readl_relaxed(s++);
-}
-
-static void memcpy32_toio(void __iomem *trg, const void *src, int size)
-{
-	int i;
-	u32 __iomem *t = trg;
-	const u32 *s = src;
-
-	for (i = 0; i < (size >> 2); i++)
-		writel_relaxed(*s++, t++);
-}
-
 /*
  * Minimal-overhead PIO for data access.
  */
@@ -285,7 +266,7 @@ static void atmel_read_buf8(struct mtd_info *mtd, u8 *buf, int len)
 	struct atmel_nand_host *host = nand_chip->priv;
 
 	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
-		memcpy32_fromio(buf, host->nfc->data_in_sram, len);
+		memcpy(buf, host->nfc->data_in_sram, len);
 		host->nfc->data_in_sram += len;
 	} else {
 		__raw_readsb(nand_chip->IO_ADDR_R, buf, len);
@@ -298,7 +279,7 @@ static void atmel_read_buf16(struct mtd_info *mtd, u8 *buf, int len)
 	struct atmel_nand_host *host = nand_chip->priv;
 
 	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
-		memcpy32_fromio(buf, host->nfc->data_in_sram, len);
+		memcpy(buf, host->nfc->data_in_sram, len);
 		host->nfc->data_in_sram += len;
 	} else {
 		__raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2);
@@ -1112,12 +1093,66 @@ static int pmecc_choose_ecc(struct atmel_nand_host *host,
 	return 0;
 }
 
+static inline int deg(unsigned int poly)
+{
+	/* polynomial degree is the most-significant bit index */
+	return fls(poly) - 1;
+}
+
+static int build_gf_tables(int mm, unsigned int poly,
+		int16_t *index_of, int16_t *alpha_to)
+{
+	unsigned int i, x = 1;
+	const unsigned int k = 1 << deg(poly);
+	unsigned int nn = (1 << mm) - 1;
+
+	/* primitive polynomial must be of degree m */
+	if (k != (1u << mm))
+		return -EINVAL;
+
+	for (i = 0; i < nn; i++) {
+		alpha_to[i] = x;
+		index_of[x] = i;
+		if (i && (x == 1))
+			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
+			return -EINVAL;
+		x <<= 1;
+		if (x & k)
+			x ^= poly;
+	}
+	alpha_to[nn] = 1;
+	index_of[0] = 0;
+
+	return 0;
+}
+
+static uint16_t *create_lookup_table(struct device *dev, int sector_size)
+{
+	int degree = (sector_size == 512) ?
+			PMECC_GF_DIMENSION_13 :
+			PMECC_GF_DIMENSION_14;
+	unsigned int poly = (sector_size == 512) ?
+			PMECC_GF_13_PRIMITIVE_POLY :
+			PMECC_GF_14_PRIMITIVE_POLY;
+	int table_size = (sector_size == 512) ?
+			PMECC_LOOKUP_TABLE_SIZE_512 :
+			PMECC_LOOKUP_TABLE_SIZE_1024;
+
+	int16_t *addr = devm_kzalloc(dev, 2 * table_size * sizeof(uint16_t),
+			GFP_KERNEL);
+	if (addr && build_gf_tables(degree, poly, addr, addr + table_size))
+		return NULL;
+
+	return addr;
+}
+
 static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
 					 struct atmel_nand_host *host)
 {
 	struct mtd_info *mtd = &host->mtd;
 	struct nand_chip *nand_chip = &host->nand_chip;
 	struct resource *regs, *regs_pmerr, *regs_rom;
+	uint16_t *galois_table;
 	int cap, sector_size, err_no;
 
 	err_no = pmecc_choose_ecc(host, &cap, &sector_size);
@@ -1163,8 +1198,24 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
 	regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
 	host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev, regs_rom);
 	if (IS_ERR(host->pmecc_rom_base)) {
-		err_no = PTR_ERR(host->pmecc_rom_base);
-		goto err;
+		if (!host->has_no_lookup_table)
+			/* Don't display the information again */
+			dev_err(host->dev, "Can not get I/O resource for ROM, will build a lookup table in runtime!\n");
+
+		host->has_no_lookup_table = true;
+	}
+
+	if (host->has_no_lookup_table) {
+		/* Build the look-up table in runtime */
+		galois_table = create_lookup_table(host->dev, sector_size);
+		if (!galois_table) {
+			dev_err(host->dev, "Failed to build a lookup table in runtime!\n");
+			err_no = -EINVAL;
+			goto err;
+		}
+
+		host->pmecc_rom_base = (void __iomem *)galois_table;
+		host->pmecc_lookup_table_offset = 0;
 	}
 
 	nand_chip->ecc.size = sector_size;
@@ -1501,8 +1552,10 @@ static int atmel_of_init_port(struct atmel_nand_host *host,
 
 	if (of_property_read_u32_array(np, "atmel,pmecc-lookup-table-offset",
 			offset, 2) != 0) {
-		dev_err(host->dev, "Cannot get PMECC lookup table offset\n");
-		return -EINVAL;
+		dev_err(host->dev, "Cannot get PMECC lookup table offset, will build a lookup table in runtime.\n");
+		host->has_no_lookup_table = true;
+		/* Will build a lookup table and initialize the offset later */
+		return 0;
 	}
 	if (!offset[0] && !offset[1]) {
 		dev_err(host->dev, "Invalid PMECC lookup table offset\n");
@@ -1899,7 +1952,7 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	int cfg, len;
 	int status = 0;
 	struct atmel_nand_host *host = chip->priv;
-	void __iomem *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host);
+	void *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host);
 
 	/* Subpage write is not supported */
 	if (offset || (data_len < mtd->writesize))
@@ -1910,14 +1963,14 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (use_dma) {
 		if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) != 0)
 			/* Fall back to use cpu copy */
-			memcpy32_toio(sram, buf, len);
+			memcpy(sram, buf, len);
 	} else {
-		memcpy32_toio(sram, buf, len);
+		memcpy(sram, buf, len);
 	}
 
 	cfg = nfc_readl(host->nfc->hsmc_regs, CFG);
 	if (unlikely(raw) && oob_required) {
-		memcpy32_toio(sram + len, chip->oob_poi, mtd->oobsize);
+		memcpy(sram + len, chip->oob_poi, mtd->oobsize);
 		len += mtd->oobsize;
 		nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE);
 	} else {
@@ -2260,7 +2313,8 @@ static int atmel_nand_nfc_probe(struct platform_device *pdev)
 
 	nfc_sram = platform_get_resource(pdev, IORESOURCE_MEM, 2);
 	if (nfc_sram) {
-		nfc->sram_bank0 = devm_ioremap_resource(&pdev->dev, nfc_sram);
+		nfc->sram_bank0 = (void * __force)
+				devm_ioremap_resource(&pdev->dev, nfc_sram);
 		if (IS_ERR(nfc->sram_bank0)) {
 			dev_warn(&pdev->dev, "Fail to ioremap the NFC sram with error: %ld. So disable NFC sram.\n",
 					PTR_ERR(nfc->sram_bank0));
diff --git a/drivers/mtd/nand/atmel_nand_ecc.h b/drivers/mtd/nand/atmel_nand_ecc.h
index 8a1e9a6..d4035e3 100644
--- a/drivers/mtd/nand/atmel_nand_ecc.h
+++ b/drivers/mtd/nand/atmel_nand_ecc.h
@@ -142,6 +142,10 @@
 #define PMECC_GF_DIMENSION_13			13
 #define PMECC_GF_DIMENSION_14			14
 
+/* Primitive Polynomial used by PMECC */
+#define PMECC_GF_13_PRIMITIVE_POLY		0x201b
+#define PMECC_GF_14_PRIMITIVE_POLY		0x4443
+
 #define PMECC_LOOKUP_TABLE_SIZE_512		0x2000
 #define PMECC_LOOKUP_TABLE_SIZE_1024		0x4000
 
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 4e66726..9a0f45f 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -529,50 +529,6 @@ static int cafe_nand_write_page_lowlevel(struct mtd_info *mtd,
 	return 0;
 }
 
-static int cafe_nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offset, int data_len, const uint8_t *buf,
-			int oob_required, int page, int cached, int raw)
-{
-	int status;
-
-	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
-
-	if (unlikely(raw))
-		status = chip->ecc.write_page_raw(mtd, chip, buf, oob_required);
-	else
-		status = chip->ecc.write_page(mtd, chip, buf, oob_required);
-
-	if (status < 0)
-		return status;
-
-	/*
-	 * Cached progamming disabled for now, Not sure if its worth the
-	 * trouble. The speed gain is not very impressive. (2.3->2.6Mib/s)
-	 */
-	cached = 0;
-
-	if (!cached || !(chip->options & NAND_CACHEPRG)) {
-
-		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
-		status = chip->waitfunc(mtd, chip);
-		/*
-		 * See if operation failed and additional status checks are
-		 * available
-		 */
-		if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-			status = chip->errstat(mtd, chip, FL_WRITING, status,
-					       page);
-
-		if (status & NAND_STATUS_FAIL)
-			return -EIO;
-	} else {
-		chip->cmdfunc(mtd, NAND_CMD_CACHEDPROG, -1, -1);
-		status = chip->waitfunc(mtd, chip);
-	}
-
-	return 0;
-}
-
 static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
 {
 	return 0;
@@ -800,7 +756,6 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	cafe->nand.ecc.hwctl  = (void *)cafe_nand_bug;
 	cafe->nand.ecc.calculate = (void *)cafe_nand_bug;
 	cafe->nand.ecc.correct  = (void *)cafe_nand_bug;
-	cafe->nand.write_page = cafe_nand_write_page;
 	cafe->nand.ecc.write_page = cafe_nand_write_page_lowlevel;
 	cafe->nand.ecc.write_oob = cafe_nand_write_oob;
 	cafe->nand.ecc.read_page = cafe_nand_read_page;
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index b9ef7a6..4c05f4f 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -31,7 +31,6 @@
 #include <linux/mtd/nand_ecc.h>
 #include <linux/fsl_ifc.h>
 
-#define FSL_IFC_V1_1_0	0x01010000
 #define ERR_BYTE		0xFF /* Value returned for read
 					bytes when read failed	*/
 #define IFC_TIMEOUT_MSECS	500  /* Maximum number of mSecs to wait
@@ -877,7 +876,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
 	struct nand_chip *chip = &priv->chip;
 	struct nand_ecclayout *layout;
-	u32 csor, ver;
+	u32 csor;
 
 	/* Fill in fsl_ifc_mtd structure */
 	priv->mtd.priv = chip;
@@ -984,8 +983,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 		chip->ecc.mode = NAND_ECC_SOFT;
 	}
 
-	ver = ioread32be(&ifc->ifc_rev);
-	if (ver == FSL_IFC_V1_1_0)
+	if (ctrl->version == FSL_IFC_VERSION_1_1_0)
 		fsl_ifc_sram_init(priv);
 
 	return 0;
@@ -1045,12 +1043,12 @@ static int fsl_ifc_nand_probe(struct platform_device *dev)
 	}
 
 	/* find which chip select it is connected to */
-	for (bank = 0; bank < FSL_IFC_BANK_COUNT; bank++) {
+	for (bank = 0; bank < fsl_ifc_ctrl_dev->banks; bank++) {
 		if (match_bank(ifc, bank, res.start))
 			break;
 	}
 
-	if (bank >= FSL_IFC_BANK_COUNT) {
+	if (bank >= fsl_ifc_ctrl_dev->banks) {
 		dev_err(&dev->dev, "%s: address did not match any chip selects\n",
 			__func__);
 		return -ENODEV;
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 9182839..73c4048 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -8,7 +8,9 @@
  *
  * © 2004 Simtec Electronics
  *
- * Device driver for NAND connected via GPIO
+ * Device driver for NAND flash that uses a memory mapped interface to
+ * read/write the NAND commands and data, and GPIO pins for control signals
+ * (the DT binding refers to this as "GPIO assisted NAND flash")
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 87e658c..27f272e 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -1353,3 +1353,156 @@ int gpmi_read_page(struct gpmi_nand_data *this,
 	set_dma_type(this, DMA_FOR_READ_ECC_PAGE);
 	return start_dma_with_bch_irq(this, desc);
 }
+
+/**
+ * gpmi_copy_bits - copy bits from one memory region to another
+ * @dst: destination buffer
+ * @dst_bit_off: bit offset we're starting to write at
+ * @src: source buffer
+ * @src_bit_off: bit offset we're starting to read from
+ * @nbits: number of bits to copy
+ *
+ * This functions copies bits from one memory region to another, and is used by
+ * the GPMI driver to copy ECC sections which are not guaranteed to be byte
+ * aligned.
+ *
+ * src and dst should not overlap.
+ *
+ */
+void gpmi_copy_bits(u8 *dst, size_t dst_bit_off,
+		    const u8 *src, size_t src_bit_off,
+		    size_t nbits)
+{
+	size_t i;
+	size_t nbytes;
+	u32 src_buffer = 0;
+	size_t bits_in_src_buffer = 0;
+
+	if (!nbits)
+		return;
+
+	/*
+	 * Move src and dst pointers to the closest byte pointer and store bit
+	 * offsets within a byte.
+	 */
+	src += src_bit_off / 8;
+	src_bit_off %= 8;
+
+	dst += dst_bit_off / 8;
+	dst_bit_off %= 8;
+
+	/*
+	 * Initialize the src_buffer value with bits available in the first
+	 * byte of data so that we end up with a byte aligned src pointer.
+	 */
+	if (src_bit_off) {
+		src_buffer = src[0] >> src_bit_off;
+		if (nbits >= (8 - src_bit_off)) {
+			bits_in_src_buffer += 8 - src_bit_off;
+		} else {
+			src_buffer &= GENMASK(nbits - 1, 0);
+			bits_in_src_buffer += nbits;
+		}
+		nbits -= bits_in_src_buffer;
+		src++;
+	}
+
+	/* Calculate the number of bytes that can be copied from src to dst. */
+	nbytes = nbits / 8;
+
+	/* Try to align dst to a byte boundary. */
+	if (dst_bit_off) {
+		if (bits_in_src_buffer < (8 - dst_bit_off) && nbytes) {
+			src_buffer |= src[0] << bits_in_src_buffer;
+			bits_in_src_buffer += 8;
+			src++;
+			nbytes--;
+		}
+
+		if (bits_in_src_buffer >= (8 - dst_bit_off)) {
+			dst[0] &= GENMASK(dst_bit_off - 1, 0);
+			dst[0] |= src_buffer << dst_bit_off;
+			src_buffer >>= (8 - dst_bit_off);
+			bits_in_src_buffer -= (8 - dst_bit_off);
+			dst_bit_off = 0;
+			dst++;
+			if (bits_in_src_buffer > 7) {
+				bits_in_src_buffer -= 8;
+				dst[0] = src_buffer;
+				dst++;
+				src_buffer >>= 8;
+			}
+		}
+	}
+
+	if (!bits_in_src_buffer && !dst_bit_off) {
+		/*
+		 * Both src and dst pointers are byte aligned, thus we can
+		 * just use the optimized memcpy function.
+		 */
+		if (nbytes)
+			memcpy(dst, src, nbytes);
+	} else {
+		/*
+		 * src buffer is not byte aligned, hence we have to copy each
+		 * src byte to the src_buffer variable before extracting a byte
+		 * to store in dst.
+		 */
+		for (i = 0; i < nbytes; i++) {
+			src_buffer |= src[i] << bits_in_src_buffer;
+			dst[i] = src_buffer;
+			src_buffer >>= 8;
+		}
+	}
+	/* Update dst and src pointers */
+	dst += nbytes;
+	src += nbytes;
+
+	/*
+	 * nbits is the number of remaining bits. It should not exceed 8 as
+	 * we've already copied as much bytes as possible.
+	 */
+	nbits %= 8;
+
+	/*
+	 * If there's no more bits to copy to the destination and src buffer
+	 * was already byte aligned, then we're done.
+	 */
+	if (!nbits && !bits_in_src_buffer)
+		return;
+
+	/* Copy the remaining bits to src_buffer */
+	if (nbits)
+		src_buffer |= (*src & GENMASK(nbits - 1, 0)) <<
+			      bits_in_src_buffer;
+	bits_in_src_buffer += nbits;
+
+	/*
+	 * In case there were not enough bits to get a byte aligned dst buffer
+	 * prepare the src_buffer variable to match the dst organization (shift
+	 * src_buffer by dst_bit_off and retrieve the least significant bits
+	 * from dst).
+	 */
+	if (dst_bit_off)
+		src_buffer = (src_buffer << dst_bit_off) |
+			     (*dst & GENMASK(dst_bit_off - 1, 0));
+	bits_in_src_buffer += dst_bit_off;
+
+	/*
+	 * Keep most significant bits from dst if we end up with an unaligned
+	 * number of bits.
+	 */
+	nbytes = bits_in_src_buffer / 8;
+	if (bits_in_src_buffer % 8) {
+		src_buffer |= (dst[nbytes] &
+			       GENMASK(7, bits_in_src_buffer % 8)) <<
+			      (nbytes * 8);
+		nbytes++;
+	}
+
+	/* Copy the remaining bytes to dst */
+	for (i = 0; i < nbytes; i++) {
+		dst[i] = src_buffer;
+		src_buffer >>= 8;
+	}
+}
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 959cb9b..4f3851a 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -791,6 +791,7 @@ static void gpmi_free_dma_buffer(struct gpmi_nand_data *this)
 					this->page_buffer_phys);
 	kfree(this->cmd_buffer);
 	kfree(this->data_buffer_dma);
+	kfree(this->raw_buffer);
 
 	this->cmd_buffer	= NULL;
 	this->data_buffer_dma	= NULL;
@@ -837,6 +838,9 @@ static int gpmi_alloc_dma_buffer(struct gpmi_nand_data *this)
 	if (!this->page_buffer_virt)
 		goto error_alloc;
 
+	this->raw_buffer = kzalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL);
+	if (!this->raw_buffer)
+		goto error_alloc;
 
 	/* Slice up the page buffer. */
 	this->payload_virt = this->page_buffer_virt;
@@ -1347,6 +1351,199 @@ gpmi_ecc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page)
 	return status & NAND_STATUS_FAIL ? -EIO : 0;
 }
 
+/*
+ * This function reads a NAND page without involving the ECC engine (no HW
+ * ECC correction).
+ * The tricky part in the GPMI/BCH controller is that it stores ECC bits
+ * inline (interleaved with payload DATA), and do not align data chunk on
+ * byte boundaries.
+ * We thus need to take care moving the payload data and ECC bits stored in the
+ * page into the provided buffers, which is why we're using gpmi_copy_bits.
+ *
+ * See set_geometry_by_ecc_info inline comments to have a full description
+ * of the layout used by the GPMI controller.
+ */
+static int gpmi_ecc_read_page_raw(struct mtd_info *mtd,
+				  struct nand_chip *chip, uint8_t *buf,
+				  int oob_required, int page)
+{
+	struct gpmi_nand_data *this = chip->priv;
+	struct bch_geometry *nfc_geo = &this->bch_geometry;
+	int eccsize = nfc_geo->ecc_chunk_size;
+	int eccbits = nfc_geo->ecc_strength * nfc_geo->gf_len;
+	u8 *tmp_buf = this->raw_buffer;
+	size_t src_bit_off;
+	size_t oob_bit_off;
+	size_t oob_byte_off;
+	uint8_t *oob = chip->oob_poi;
+	int step;
+
+	chip->read_buf(mtd, tmp_buf,
+		       mtd->writesize + mtd->oobsize);
+
+	/*
+	 * If required, swap the bad block marker and the data stored in the
+	 * metadata section, so that we don't wrongly consider a block as bad.
+	 *
+	 * See the layout description for a detailed explanation on why this
+	 * is needed.
+	 */
+	if (this->swap_block_mark) {
+		u8 swap = tmp_buf[0];
+
+		tmp_buf[0] = tmp_buf[mtd->writesize];
+		tmp_buf[mtd->writesize] = swap;
+	}
+
+	/*
+	 * Copy the metadata section into the oob buffer (this section is
+	 * guaranteed to be aligned on a byte boundary).
+	 */
+	if (oob_required)
+		memcpy(oob, tmp_buf, nfc_geo->metadata_size);
+
+	oob_bit_off = nfc_geo->metadata_size * 8;
+	src_bit_off = oob_bit_off;
+
+	/* Extract interleaved payload data and ECC bits */
+	for (step = 0; step < nfc_geo->ecc_chunk_count; step++) {
+		if (buf)
+			gpmi_copy_bits(buf, step * eccsize * 8,
+				       tmp_buf, src_bit_off,
+				       eccsize * 8);
+		src_bit_off += eccsize * 8;
+
+		/* Align last ECC block to align a byte boundary */
+		if (step == nfc_geo->ecc_chunk_count - 1 &&
+		    (oob_bit_off + eccbits) % 8)
+			eccbits += 8 - ((oob_bit_off + eccbits) % 8);
+
+		if (oob_required)
+			gpmi_copy_bits(oob, oob_bit_off,
+				       tmp_buf, src_bit_off,
+				       eccbits);
+
+		src_bit_off += eccbits;
+		oob_bit_off += eccbits;
+	}
+
+	if (oob_required) {
+		oob_byte_off = oob_bit_off / 8;
+
+		if (oob_byte_off < mtd->oobsize)
+			memcpy(oob + oob_byte_off,
+			       tmp_buf + mtd->writesize + oob_byte_off,
+			       mtd->oobsize - oob_byte_off);
+	}
+
+	return 0;
+}
+
+/*
+ * This function writes a NAND page without involving the ECC engine (no HW
+ * ECC generation).
+ * The tricky part in the GPMI/BCH controller is that it stores ECC bits
+ * inline (interleaved with payload DATA), and do not align data chunk on
+ * byte boundaries.
+ * We thus need to take care moving the OOB area at the right place in the
+ * final page, which is why we're using gpmi_copy_bits.
+ *
+ * See set_geometry_by_ecc_info inline comments to have a full description
+ * of the layout used by the GPMI controller.
+ */
+static int gpmi_ecc_write_page_raw(struct mtd_info *mtd,
+				   struct nand_chip *chip,
+				   const uint8_t *buf,
+				   int oob_required)
+{
+	struct gpmi_nand_data *this = chip->priv;
+	struct bch_geometry *nfc_geo = &this->bch_geometry;
+	int eccsize = nfc_geo->ecc_chunk_size;
+	int eccbits = nfc_geo->ecc_strength * nfc_geo->gf_len;
+	u8 *tmp_buf = this->raw_buffer;
+	uint8_t *oob = chip->oob_poi;
+	size_t dst_bit_off;
+	size_t oob_bit_off;
+	size_t oob_byte_off;
+	int step;
+
+	/*
+	 * Initialize all bits to 1 in case we don't have a buffer for the
+	 * payload or oob data in order to leave unspecified bits of data
+	 * to their initial state.
+	 */
+	if (!buf || !oob_required)
+		memset(tmp_buf, 0xff, mtd->writesize + mtd->oobsize);
+
+	/*
+	 * First copy the metadata section (stored in oob buffer) at the
+	 * beginning of the page, as imposed by the GPMI layout.
+	 */
+	memcpy(tmp_buf, oob, nfc_geo->metadata_size);
+	oob_bit_off = nfc_geo->metadata_size * 8;
+	dst_bit_off = oob_bit_off;
+
+	/* Interleave payload data and ECC bits */
+	for (step = 0; step < nfc_geo->ecc_chunk_count; step++) {
+		if (buf)
+			gpmi_copy_bits(tmp_buf, dst_bit_off,
+				       buf, step * eccsize * 8, eccsize * 8);
+		dst_bit_off += eccsize * 8;
+
+		/* Align last ECC block to align a byte boundary */
+		if (step == nfc_geo->ecc_chunk_count - 1 &&
+		    (oob_bit_off + eccbits) % 8)
+			eccbits += 8 - ((oob_bit_off + eccbits) % 8);
+
+		if (oob_required)
+			gpmi_copy_bits(tmp_buf, dst_bit_off,
+				       oob, oob_bit_off, eccbits);
+
+		dst_bit_off += eccbits;
+		oob_bit_off += eccbits;
+	}
+
+	oob_byte_off = oob_bit_off / 8;
+
+	if (oob_required && oob_byte_off < mtd->oobsize)
+		memcpy(tmp_buf + mtd->writesize + oob_byte_off,
+		       oob + oob_byte_off, mtd->oobsize - oob_byte_off);
+
+	/*
+	 * If required, swap the bad block marker and the first byte of the
+	 * metadata section, so that we don't modify the bad block marker.
+	 *
+	 * See the layout description for a detailed explanation on why this
+	 * is needed.
+	 */
+	if (this->swap_block_mark) {
+		u8 swap = tmp_buf[0];
+
+		tmp_buf[0] = tmp_buf[mtd->writesize];
+		tmp_buf[mtd->writesize] = swap;
+	}
+
+	chip->write_buf(mtd, tmp_buf, mtd->writesize + mtd->oobsize);
+
+	return 0;
+}
+
+static int gpmi_ecc_read_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				 int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page);
+
+	return gpmi_ecc_read_page_raw(mtd, chip, NULL, 1, page);
+}
+
+static int gpmi_ecc_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				 int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page);
+
+	return gpmi_ecc_write_page_raw(mtd, chip, NULL, 1);
+}
+
 static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
 	struct nand_chip *chip = mtd->priv;
@@ -1664,6 +1861,10 @@ static int gpmi_init_last(struct gpmi_nand_data *this)
 	ecc->write_page	= gpmi_ecc_write_page;
 	ecc->read_oob	= gpmi_ecc_read_oob;
 	ecc->write_oob	= gpmi_ecc_write_oob;
+	ecc->read_page_raw = gpmi_ecc_read_page_raw;
+	ecc->write_page_raw = gpmi_ecc_write_page_raw;
+	ecc->read_oob_raw = gpmi_ecc_read_oob_raw;
+	ecc->write_oob_raw = gpmi_ecc_write_oob_raw;
 	ecc->mode	= NAND_ECC_HW;
 	ecc->size	= bch_geo->ecc_chunk_size;
 	ecc->strength	= bch_geo->ecc_strength;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 32c6ba4..544062f 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -189,6 +189,8 @@ struct gpmi_nand_data {
 	void			*auxiliary_virt;
 	dma_addr_t		auxiliary_phys;
 
+	void			*raw_buffer;
+
 	/* DMA channels */
 #define DMA_CHANS		8
 	struct dma_chan		*dma_chans[DMA_CHANS];
@@ -290,6 +292,10 @@ extern int gpmi_send_page(struct gpmi_nand_data *,
 extern int gpmi_read_page(struct gpmi_nand_data *,
 			dma_addr_t payload, dma_addr_t auxiliary);
 
+void gpmi_copy_bits(u8 *dst, size_t dst_bit_off,
+		    const u8 *src, size_t src_bit_off,
+		    size_t nbits);
+
 /* BCH : Status Block Completion Codes */
 #define STATUS_GOOD		0x00
 #define STATUS_ERASED		0xff
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index e1d56be..a8f550f 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -280,14 +280,10 @@ static void memcpy32_fromio(void *trg, const void __iomem  *src, size_t size)
 		*t++ = __raw_readl(s++);
 }
 
-static void memcpy32_toio(void __iomem *trg, const void *src, int size)
+static inline void memcpy32_toio(void __iomem *trg, const void *src, int size)
 {
-	int i;
-	u32 __iomem *t = trg;
-	const u32 *s = src;
-
-	for (i = 0; i < (size >> 2); i++)
-		__raw_writel(*s++, t++);
+	/* __iowrite32_copy use 32bit size values so divide by 4 */
+	__iowrite32_copy(trg, src, size / 4);
 }
 
 static int check_int_v3(struct mxc_nand_host *host)
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 5b5c627..41585df 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -485,11 +485,11 @@ static int nand_check_wp(struct mtd_info *mtd)
 }
 
 /**
- * nand_block_checkbad - [GENERIC] Check if a block is marked bad
+ * nand_block_isreserved - [GENERIC] Check if a block is marked reserved.
  * @mtd: MTD device structure
  * @ofs: offset from device start
  *
- * Check if the block is mark as reserved.
+ * Check if the block is marked as reserved.
  */
 static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 {
@@ -720,7 +720,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 
 	/*
 	 * Program and erase have their own busy handlers status, sequential
-	 * in, and deplete1 need no delay.
+	 * in and status need no delay.
 	 */
 	switch (command) {
 
@@ -3765,9 +3765,9 @@ ident_done:
 		pr_info("%s %s\n", nand_manuf_ids[maf_idx].name,
 				type->name);
 
-	pr_info("%dMiB, %s, page size: %d, OOB size: %d\n",
+	pr_info("%d MiB, %s, erase size: %d KiB, page size: %d, OOB size: %d\n",
 		(int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC",
-		mtd->writesize, mtd->oobsize);
+		mtd->erasesize >> 10, mtd->writesize, mtd->oobsize);
 	return type;
 }
 
@@ -4035,7 +4035,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 		 */
 		if (!ecc->size && (mtd->oobsize >= 64)) {
 			ecc->size = 512;
-			ecc->bytes = 7;
+			ecc->bytes = DIV_ROUND_UP(13 * ecc->strength, 8);
 		}
 		ecc->priv = nand_bch_init(mtd, ecc->size, ecc->bytes,
 					       &ecc->layout);
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index fbde8910..dd620c1 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -178,6 +178,7 @@ struct nand_manufacturers nand_manuf_ids[] = {
 	{NAND_MFR_EON, "Eon"},
 	{NAND_MFR_SANDISK, "SanDisk"},
 	{NAND_MFR_INTEL, "Intel"},
+	{NAND_MFR_ATO, "ATO"},
 	{0x0, "Unknown"}
 };
 
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 7dc1dd2..ab5bbf5 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -87,10 +87,6 @@
 #define CONFIG_NANDSIM_MAX_PARTS  32
 #endif
 
-static uint first_id_byte  = CONFIG_NANDSIM_FIRST_ID_BYTE;
-static uint second_id_byte = CONFIG_NANDSIM_SECOND_ID_BYTE;
-static uint third_id_byte  = CONFIG_NANDSIM_THIRD_ID_BYTE;
-static uint fourth_id_byte = CONFIG_NANDSIM_FOURTH_ID_BYTE;
 static uint access_delay   = CONFIG_NANDSIM_ACCESS_DELAY;
 static uint programm_delay = CONFIG_NANDSIM_PROGRAMM_DELAY;
 static uint erase_delay    = CONFIG_NANDSIM_ERASE_DELAY;
@@ -111,11 +107,19 @@ static unsigned int overridesize = 0;
 static char *cache_file = NULL;
 static unsigned int bbt;
 static unsigned int bch;
+static u_char id_bytes[8] = {
+	[0] = CONFIG_NANDSIM_FIRST_ID_BYTE,
+	[1] = CONFIG_NANDSIM_SECOND_ID_BYTE,
+	[2] = CONFIG_NANDSIM_THIRD_ID_BYTE,
+	[3] = CONFIG_NANDSIM_FOURTH_ID_BYTE,
+	[4 ... 7] = 0xFF,
+};
 
-module_param(first_id_byte,  uint, 0400);
-module_param(second_id_byte, uint, 0400);
-module_param(third_id_byte,  uint, 0400);
-module_param(fourth_id_byte, uint, 0400);
+module_param_array(id_bytes, byte, NULL, 0400);
+module_param_named(first_id_byte, id_bytes[0], byte, 0400);
+module_param_named(second_id_byte, id_bytes[1], byte, 0400);
+module_param_named(third_id_byte, id_bytes[2], byte, 0400);
+module_param_named(fourth_id_byte, id_bytes[3], byte, 0400);
 module_param(access_delay,   uint, 0400);
 module_param(programm_delay, uint, 0400);
 module_param(erase_delay,    uint, 0400);
@@ -136,10 +140,11 @@ module_param(cache_file,     charp, 0400);
 module_param(bbt,	     uint, 0400);
 module_param(bch,	     uint, 0400);
 
-MODULE_PARM_DESC(first_id_byte,  "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)");
-MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)");
-MODULE_PARM_DESC(third_id_byte,  "The third byte returned by NAND Flash 'read ID' command");
-MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command");
+MODULE_PARM_DESC(id_bytes,       "The ID bytes returned by NAND Flash 'read ID' command");
+MODULE_PARM_DESC(first_id_byte,  "The first byte returned by NAND Flash 'read ID' command (manufacturer ID) (obsolete)");
+MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID) (obsolete)");
+MODULE_PARM_DESC(third_id_byte,  "The third byte returned by NAND Flash 'read ID' command (obsolete)");
+MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command (obsolete)");
 MODULE_PARM_DESC(access_delay,   "Initial page access delay (microseconds)");
 MODULE_PARM_DESC(programm_delay, "Page programm delay (microseconds");
 MODULE_PARM_DESC(erase_delay,    "Sector erase delay (milliseconds)");
@@ -304,7 +309,7 @@ struct nandsim {
 	unsigned int nbparts;
 
 	uint busw;              /* flash chip bus width (8 or 16) */
-	u_char ids[4];          /* chip's ID bytes */
+	u_char ids[8];          /* chip's ID bytes */
 	uint32_t options;       /* chip's characteristic bits */
 	uint32_t state;         /* current chip state */
 	uint32_t nxstate;       /* next expected state */
@@ -2279,17 +2284,18 @@ static int __init ns_init_module(void)
 	 * Perform minimum nandsim structure initialization to handle
 	 * the initial ID read command correctly
 	 */
-	if (third_id_byte != 0xFF || fourth_id_byte != 0xFF)
+	if (id_bytes[6] != 0xFF || id_bytes[7] != 0xFF)
+		nand->geom.idbytes = 8;
+	else if (id_bytes[4] != 0xFF || id_bytes[5] != 0xFF)
+		nand->geom.idbytes = 6;
+	else if (id_bytes[2] != 0xFF || id_bytes[3] != 0xFF)
 		nand->geom.idbytes = 4;
 	else
 		nand->geom.idbytes = 2;
 	nand->regs.status = NS_STATUS_OK(nand);
 	nand->nxstate = STATE_UNKNOWN;
 	nand->options |= OPT_PAGE512; /* temporary value */
-	nand->ids[0] = first_id_byte;
-	nand->ids[1] = second_id_byte;
-	nand->ids[2] = third_id_byte;
-	nand->ids[3] = fourth_id_byte;
+	memcpy(nand->ids, id_bytes, sizeof(nand->ids));
 	if (bus_width == 16) {
 		nand->busw = 16;
 		chip->options |= NAND_BUSWIDTH_16;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 6d74b56..63f858e 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -144,11 +144,13 @@ static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
 	0xac, 0x6b, 0xff, 0x99, 0x7b};
 static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
 
-/* oob info generated runtime depending on ecc algorithm and layout selected */
-static struct nand_ecclayout omap_oobinfo;
+/* Shared among all NAND instances to synchronize access to the ECC Engine */
+static struct nand_hw_control omap_gpmc_controller = {
+	.lock = __SPIN_LOCK_UNLOCKED(omap_gpmc_controller.lock),
+	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(omap_gpmc_controller.wq),
+};
 
 struct omap_nand_info {
-	struct nand_hw_control		controller;
 	struct omap_nand_platform_data	*pdata;
 	struct mtd_info			mtd;
 	struct nand_chip		nand;
@@ -168,6 +170,8 @@ struct omap_nand_info {
 	u_char				*buf;
 	int					buf_len;
 	struct gpmc_nand_regs		reg;
+	/* generated at runtime depending on ECC algorithm and layout selected */
+	struct nand_ecclayout		oobinfo;
 	/* fields specific for BCHx_HW ECC scheme */
 	struct device			*elm_dev;
 	struct device_node		*of_node;
@@ -1686,9 +1690,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, info);
 
-	spin_lock_init(&info->controller.lock);
-	init_waitqueue_head(&info->controller.wq);
-
 	info->pdev		= pdev;
 	info->gpmc_cs		= pdata->cs;
 	info->reg		= pdata->reg;
@@ -1708,7 +1709,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 
 	info->phys_base = res->start;
 
-	nand_chip->controller = &info->controller;
+	nand_chip->controller = &omap_gpmc_controller;
 
 	nand_chip->IO_ADDR_W = nand_chip->IO_ADDR_R;
 	nand_chip->cmd_ctrl  = omap_hwcontrol;
@@ -1741,13 +1742,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
-	/* check for small page devices */
-	if ((mtd->oobsize < 64) && (pdata->ecc_opt != OMAP_ECC_HAM1_CODE_HW)) {
-		dev_err(&info->pdev->dev, "small page devices are not supported\n");
-		err = -EINVAL;
-		goto return_error;
-	}
-
 	/* re-populate low-level callbacks based on xfer modes */
 	switch (pdata->xfer_type) {
 	case NAND_OMAP_PREFETCH_POLLED:
@@ -1840,7 +1834,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 	}
 
 	/* populate MTD interface based on ECC scheme */
-	ecclayout		= &omap_oobinfo;
+	ecclayout		= &info->oobinfo;
 	switch (info->ecc_opt) {
 	case OMAP_ECC_HAM1_CODE_SW:
 		nand_chip->ecc.mode = NAND_ECC_SOFT;
diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index c53e369..c3c6d30 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c
@@ -19,7 +19,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/clk.h>
 #include <linux/err.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/sizes.h>
 #include <linux/platform_data/mtd-orion_nand.h>
 
@@ -85,33 +85,24 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	int ret = 0;
 	u32 val = 0;
 
-	nc = kzalloc(sizeof(struct nand_chip) + sizeof(struct mtd_info), GFP_KERNEL);
-	if (!nc) {
-		ret = -ENOMEM;
-		goto no_res;
-	}
+	nc = devm_kzalloc(&pdev->dev,
+			sizeof(struct nand_chip) + sizeof(struct mtd_info),
+			GFP_KERNEL);
+	if (!nc)
+		return -ENOMEM;
 	mtd = (struct mtd_info *)(nc + 1);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		ret = -ENODEV;
-		goto no_res;
-	}
+	io_base = devm_ioremap_resource(&pdev->dev, res);
 
-	io_base = ioremap(res->start, resource_size(res));
-	if (!io_base) {
-		dev_err(&pdev->dev, "ioremap failed\n");
-		ret = -EIO;
-		goto no_res;
-	}
+	if (IS_ERR(io_base))
+		return PTR_ERR(io_base);
 
 	if (pdev->dev.of_node) {
 		board = devm_kzalloc(&pdev->dev, sizeof(struct orion_nand_data),
 					GFP_KERNEL);
-		if (!board) {
-			ret = -ENOMEM;
-			goto no_res;
-		}
+		if (!board)
+			return -ENOMEM;
 		if (!of_property_read_u32(pdev->dev.of_node, "cle", &val))
 			board->cle = (u8)val;
 		else
@@ -185,9 +176,6 @@ no_dev:
 		clk_disable_unprepare(clk);
 		clk_put(clk);
 	}
-	iounmap(io_base);
-no_res:
-	kfree(nc);
 
 	return ret;
 }
@@ -195,15 +183,10 @@ no_res:
 static int orion_nand_remove(struct platform_device *pdev)
 {
 	struct mtd_info *mtd = platform_get_drvdata(pdev);
-	struct nand_chip *nc = mtd->priv;
 	struct clk *clk;
 
 	nand_release(mtd);
 
-	iounmap(nc->IO_ADDR_W);
-
-	kfree(nc);
-
 	clk = clk_get(&pdev->dev, NULL);
 	if (!IS_ERR(clk)) {
 		clk_disable_unprepare(clk);
diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
new file mode 100644
index 0000000..ccaa8e2
--- /dev/null
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -0,0 +1,1432 @@
+/*
+ * Copyright (C) 2013 Boris BREZILLON <b.brezillon.dev@gmail.com>
+ *
+ * Derived from:
+ *	https://github.com/yuq/sunxi-nfc-mtd
+ *	Copyright (C) 2013 Qiang Yu <yuq825@gmail.com>
+ *
+ *	https://github.com/hno/Allwinner-Info
+ *	Copyright (C) 2013 Henrik Nordström <Henrik Nordström>
+ *
+ *	Copyright (C) 2013 Dmitriy B. <rzk333@gmail.com>
+ *	Copyright (C) 2013 Sergey Lapin <slapin@ossfans.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/of_mtd.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+#define NFC_REG_CTL		0x0000
+#define NFC_REG_ST		0x0004
+#define NFC_REG_INT		0x0008
+#define NFC_REG_TIMING_CTL	0x000C
+#define NFC_REG_TIMING_CFG	0x0010
+#define NFC_REG_ADDR_LOW	0x0014
+#define NFC_REG_ADDR_HIGH	0x0018
+#define NFC_REG_SECTOR_NUM	0x001C
+#define NFC_REG_CNT		0x0020
+#define NFC_REG_CMD		0x0024
+#define NFC_REG_RCMD_SET	0x0028
+#define NFC_REG_WCMD_SET	0x002C
+#define NFC_REG_IO_DATA		0x0030
+#define NFC_REG_ECC_CTL		0x0034
+#define NFC_REG_ECC_ST		0x0038
+#define NFC_REG_DEBUG		0x003C
+#define NFC_REG_ECC_CNT0	0x0040
+#define NFC_REG_ECC_CNT1	0x0044
+#define NFC_REG_ECC_CNT2	0x0048
+#define NFC_REG_ECC_CNT3	0x004c
+#define NFC_REG_USER_DATA_BASE	0x0050
+#define NFC_REG_SPARE_AREA	0x00A0
+#define NFC_RAM0_BASE		0x0400
+#define NFC_RAM1_BASE		0x0800
+
+/* define bit use in NFC_CTL */
+#define NFC_EN			BIT(0)
+#define NFC_RESET		BIT(1)
+#define NFC_BUS_WIDYH		BIT(2)
+#define NFC_RB_SEL		BIT(3)
+#define NFC_CE_SEL		GENMASK(26, 24)
+#define NFC_CE_CTL		BIT(6)
+#define NFC_CE_CTL1		BIT(7)
+#define NFC_PAGE_SIZE		GENMASK(11, 8)
+#define NFC_SAM			BIT(12)
+#define NFC_RAM_METHOD		BIT(14)
+#define NFC_DEBUG_CTL		BIT(31)
+
+/* define bit use in NFC_ST */
+#define NFC_RB_B2R		BIT(0)
+#define NFC_CMD_INT_FLAG	BIT(1)
+#define NFC_DMA_INT_FLAG	BIT(2)
+#define NFC_CMD_FIFO_STATUS	BIT(3)
+#define NFC_STA			BIT(4)
+#define NFC_NATCH_INT_FLAG	BIT(5)
+#define NFC_RB_STATE0		BIT(8)
+#define NFC_RB_STATE1		BIT(9)
+#define NFC_RB_STATE2		BIT(10)
+#define NFC_RB_STATE3		BIT(11)
+
+/* define bit use in NFC_INT */
+#define NFC_B2R_INT_ENABLE	BIT(0)
+#define NFC_CMD_INT_ENABLE	BIT(1)
+#define NFC_DMA_INT_ENABLE	BIT(2)
+#define NFC_INT_MASK		(NFC_B2R_INT_ENABLE | \
+				 NFC_CMD_INT_ENABLE | \
+				 NFC_DMA_INT_ENABLE)
+
+/* define bit use in NFC_CMD */
+#define NFC_CMD_LOW_BYTE	GENMASK(7, 0)
+#define NFC_CMD_HIGH_BYTE	GENMASK(15, 8)
+#define NFC_ADR_NUM		GENMASK(18, 16)
+#define NFC_SEND_ADR		BIT(19)
+#define NFC_ACCESS_DIR		BIT(20)
+#define NFC_DATA_TRANS		BIT(21)
+#define NFC_SEND_CMD1		BIT(22)
+#define NFC_WAIT_FLAG		BIT(23)
+#define NFC_SEND_CMD2		BIT(24)
+#define NFC_SEQ			BIT(25)
+#define NFC_DATA_SWAP_METHOD	BIT(26)
+#define NFC_ROW_AUTO_INC	BIT(27)
+#define NFC_SEND_CMD3		BIT(28)
+#define NFC_SEND_CMD4		BIT(29)
+#define NFC_CMD_TYPE		GENMASK(31, 30)
+
+/* define bit use in NFC_RCMD_SET */
+#define NFC_READ_CMD		GENMASK(7, 0)
+#define NFC_RANDOM_READ_CMD0	GENMASK(15, 8)
+#define NFC_RANDOM_READ_CMD1	GENMASK(23, 16)
+
+/* define bit use in NFC_WCMD_SET */
+#define NFC_PROGRAM_CMD		GENMASK(7, 0)
+#define NFC_RANDOM_WRITE_CMD	GENMASK(15, 8)
+#define NFC_READ_CMD0		GENMASK(23, 16)
+#define NFC_READ_CMD1		GENMASK(31, 24)
+
+/* define bit use in NFC_ECC_CTL */
+#define NFC_ECC_EN		BIT(0)
+#define NFC_ECC_PIPELINE	BIT(3)
+#define NFC_ECC_EXCEPTION	BIT(4)
+#define NFC_ECC_BLOCK_SIZE	BIT(5)
+#define NFC_RANDOM_EN		BIT(9)
+#define NFC_RANDOM_DIRECTION	BIT(10)
+#define NFC_ECC_MODE_SHIFT	12
+#define NFC_ECC_MODE		GENMASK(15, 12)
+#define NFC_RANDOM_SEED		GENMASK(30, 16)
+
+#define NFC_DEFAULT_TIMEOUT_MS	1000
+
+#define NFC_SRAM_SIZE		1024
+
+#define NFC_MAX_CS		7
+
+/*
+ * Ready/Busy detection type: describes the Ready/Busy detection modes
+ *
+ * @RB_NONE:	no external detection available, rely on STATUS command
+ *		and software timeouts
+ * @RB_NATIVE:	use sunxi NAND controller Ready/Busy support. The Ready/Busy
+ *		pin of the NAND flash chip must be connected to one of the
+ *		native NAND R/B pins (those which can be muxed to the NAND
+ *		Controller)
+ * @RB_GPIO:	use a simple GPIO to handle Ready/Busy status. The Ready/Busy
+ *		pin of the NAND flash chip must be connected to a GPIO capable
+ *		pin.
+ */
+enum sunxi_nand_rb_type {
+	RB_NONE,
+	RB_NATIVE,
+	RB_GPIO,
+};
+
+/*
+ * Ready/Busy structure: stores information related to Ready/Busy detection
+ *
+ * @type:	the Ready/Busy detection mode
+ * @info:	information related to the R/B detection mode. Either a gpio
+ *		id or a native R/B id (those supported by the NAND controller).
+ */
+struct sunxi_nand_rb {
+	enum sunxi_nand_rb_type type;
+	union {
+		int gpio;
+		int nativeid;
+	} info;
+};
+
+/*
+ * Chip Select structure: stores information related to NAND Chip Select
+ *
+ * @cs:		the NAND CS id used to communicate with a NAND Chip
+ * @rb:		the Ready/Busy description
+ */
+struct sunxi_nand_chip_sel {
+	u8 cs;
+	struct sunxi_nand_rb rb;
+};
+
+/*
+ * sunxi HW ECC infos: stores information related to HW ECC support
+ *
+ * @mode:	the sunxi ECC mode field deduced from ECC requirements
+ * @layout:	the OOB layout depending on the ECC requirements and the
+ *		selected ECC mode
+ */
+struct sunxi_nand_hw_ecc {
+	int mode;
+	struct nand_ecclayout layout;
+};
+
+/*
+ * NAND chip structure: stores NAND chip device related information
+ *
+ * @node:		used to store NAND chips into a list
+ * @nand:		base NAND chip structure
+ * @mtd:		base MTD structure
+ * @clk_rate:		clk_rate required for this NAND chip
+ * @selected:		current active CS
+ * @nsels:		number of CS lines required by the NAND chip
+ * @sels:		array of CS lines descriptions
+ */
+struct sunxi_nand_chip {
+	struct list_head node;
+	struct nand_chip nand;
+	struct mtd_info mtd;
+	unsigned long clk_rate;
+	int selected;
+	int nsels;
+	struct sunxi_nand_chip_sel sels[0];
+};
+
+static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand)
+{
+	return container_of(nand, struct sunxi_nand_chip, nand);
+}
+
+/*
+ * NAND Controller structure: stores sunxi NAND controller information
+ *
+ * @controller:		base controller structure
+ * @dev:		parent device (used to print error messages)
+ * @regs:		NAND controller registers
+ * @ahb_clk:		NAND Controller AHB clock
+ * @mod_clk:		NAND Controller mod clock
+ * @assigned_cs:	bitmask describing already assigned CS lines
+ * @clk_rate:		NAND controller current clock rate
+ * @chips:		a list containing all the NAND chips attached to
+ *			this NAND controller
+ * @complete:		a completion object used to wait for NAND
+ *			controller events
+ */
+struct sunxi_nfc {
+	struct nand_hw_control controller;
+	struct device *dev;
+	void __iomem *regs;
+	struct clk *ahb_clk;
+	struct clk *mod_clk;
+	unsigned long assigned_cs;
+	unsigned long clk_rate;
+	struct list_head chips;
+	struct completion complete;
+};
+
+static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_hw_control *ctrl)
+{
+	return container_of(ctrl, struct sunxi_nfc, controller);
+}
+
+static irqreturn_t sunxi_nfc_interrupt(int irq, void *dev_id)
+{
+	struct sunxi_nfc *nfc = dev_id;
+	u32 st = readl(nfc->regs + NFC_REG_ST);
+	u32 ien = readl(nfc->regs + NFC_REG_INT);
+
+	if (!(ien & st))
+		return IRQ_NONE;
+
+	if ((ien & st) == ien)
+		complete(&nfc->complete);
+
+	writel(st & NFC_INT_MASK, nfc->regs + NFC_REG_ST);
+	writel(~st & ien & NFC_INT_MASK, nfc->regs + NFC_REG_INT);
+
+	return IRQ_HANDLED;
+}
+
+static int sunxi_nfc_wait_int(struct sunxi_nfc *nfc, u32 flags,
+			      unsigned int timeout_ms)
+{
+	init_completion(&nfc->complete);
+
+	writel(flags, nfc->regs + NFC_REG_INT);
+
+	if (!timeout_ms)
+		timeout_ms = NFC_DEFAULT_TIMEOUT_MS;
+
+	if (!wait_for_completion_timeout(&nfc->complete,
+					 msecs_to_jiffies(timeout_ms))) {
+		dev_err(nfc->dev, "wait interrupt timedout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int sunxi_nfc_wait_cmd_fifo_empty(struct sunxi_nfc *nfc)
+{
+	unsigned long timeout = jiffies +
+				msecs_to_jiffies(NFC_DEFAULT_TIMEOUT_MS);
+
+	do {
+		if (!(readl(nfc->regs + NFC_REG_ST) & NFC_CMD_FIFO_STATUS))
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	dev_err(nfc->dev, "wait for empty cmd FIFO timedout\n");
+	return -ETIMEDOUT;
+}
+
+static int sunxi_nfc_rst(struct sunxi_nfc *nfc)
+{
+	unsigned long timeout = jiffies +
+				msecs_to_jiffies(NFC_DEFAULT_TIMEOUT_MS);
+
+	writel(0, nfc->regs + NFC_REG_ECC_CTL);
+	writel(NFC_RESET, nfc->regs + NFC_REG_CTL);
+
+	do {
+		if (!(readl(nfc->regs + NFC_REG_CTL) & NFC_RESET))
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	dev_err(nfc->dev, "wait for NAND controller reset timedout\n");
+	return -ETIMEDOUT;
+}
+
+static int sunxi_nfc_dev_ready(struct mtd_info *mtd)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	struct sunxi_nand_rb *rb;
+	unsigned long timeo = (sunxi_nand->nand.state == FL_ERASING ? 400 : 20);
+	int ret;
+
+	if (sunxi_nand->selected < 0)
+		return 0;
+
+	rb = &sunxi_nand->sels[sunxi_nand->selected].rb;
+
+	switch (rb->type) {
+	case RB_NATIVE:
+		ret = !!(readl(nfc->regs + NFC_REG_ST) &
+			 (NFC_RB_STATE0 << rb->info.nativeid));
+		if (ret)
+			break;
+
+		sunxi_nfc_wait_int(nfc, NFC_RB_B2R, timeo);
+		ret = !!(readl(nfc->regs + NFC_REG_ST) &
+			 (NFC_RB_STATE0 << rb->info.nativeid));
+		break;
+	case RB_GPIO:
+		ret = gpio_get_value(rb->info.gpio);
+		break;
+	case RB_NONE:
+	default:
+		ret = 0;
+		dev_err(nfc->dev, "cannot check R/B NAND status!\n");
+		break;
+	}
+
+	return ret;
+}
+
+static void sunxi_nfc_select_chip(struct mtd_info *mtd, int chip)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	struct sunxi_nand_chip_sel *sel;
+	u32 ctl;
+
+	if (chip > 0 && chip >= sunxi_nand->nsels)
+		return;
+
+	if (chip == sunxi_nand->selected)
+		return;
+
+	ctl = readl(nfc->regs + NFC_REG_CTL) &
+	      ~(NFC_CE_SEL | NFC_RB_SEL | NFC_EN);
+
+	if (chip >= 0) {
+		sel = &sunxi_nand->sels[chip];
+
+		ctl |= (sel->cs << 24) | NFC_EN |
+		       (((nand->page_shift - 10) & 0xf) << 8);
+		if (sel->rb.type == RB_NONE) {
+			nand->dev_ready = NULL;
+		} else {
+			nand->dev_ready = sunxi_nfc_dev_ready;
+			if (sel->rb.type == RB_NATIVE)
+				ctl |= (sel->rb.info.nativeid << 3);
+		}
+
+		writel(mtd->writesize, nfc->regs + NFC_REG_SPARE_AREA);
+
+		if (nfc->clk_rate != sunxi_nand->clk_rate) {
+			clk_set_rate(nfc->mod_clk, sunxi_nand->clk_rate);
+			nfc->clk_rate = sunxi_nand->clk_rate;
+		}
+	}
+
+	writel(ctl, nfc->regs + NFC_REG_CTL);
+
+	sunxi_nand->selected = chip;
+}
+
+static void sunxi_nfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	int ret;
+	int cnt;
+	int offs = 0;
+	u32 tmp;
+
+	while (len > offs) {
+		cnt = min(len - offs, NFC_SRAM_SIZE);
+
+		ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+		if (ret)
+			break;
+
+		writel(cnt, nfc->regs + NFC_REG_CNT);
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD;
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			break;
+
+		if (buf)
+			memcpy_fromio(buf + offs, nfc->regs + NFC_RAM0_BASE,
+				      cnt);
+		offs += cnt;
+	}
+}
+
+static void sunxi_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
+				int len)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	int ret;
+	int cnt;
+	int offs = 0;
+	u32 tmp;
+
+	while (len > offs) {
+		cnt = min(len - offs, NFC_SRAM_SIZE);
+
+		ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+		if (ret)
+			break;
+
+		writel(cnt, nfc->regs + NFC_REG_CNT);
+		memcpy_toio(nfc->regs + NFC_RAM0_BASE, buf + offs, cnt);
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD |
+		      NFC_ACCESS_DIR;
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			break;
+
+		offs += cnt;
+	}
+}
+
+static uint8_t sunxi_nfc_read_byte(struct mtd_info *mtd)
+{
+	uint8_t ret;
+
+	sunxi_nfc_read_buf(mtd, &ret, 1);
+
+	return ret;
+}
+
+static void sunxi_nfc_cmd_ctrl(struct mtd_info *mtd, int dat,
+			       unsigned int ctrl)
+{
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	int ret;
+	u32 tmp;
+
+	ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+	if (ret)
+		return;
+
+	if (ctrl & NAND_CTRL_CHANGE) {
+		tmp = readl(nfc->regs + NFC_REG_CTL);
+		if (ctrl & NAND_NCE)
+			tmp |= NFC_CE_CTL;
+		else
+			tmp &= ~NFC_CE_CTL;
+		writel(tmp, nfc->regs + NFC_REG_CTL);
+	}
+
+	if (dat == NAND_CMD_NONE)
+		return;
+
+	if (ctrl & NAND_CLE) {
+		writel(NFC_SEND_CMD1 | dat, nfc->regs + NFC_REG_CMD);
+	} else {
+		writel(dat, nfc->regs + NFC_REG_ADDR_LOW);
+		writel(NFC_SEND_ADR, nfc->regs + NFC_REG_CMD);
+	}
+
+	sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+}
+
+static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd,
+				      struct nand_chip *chip, uint8_t *buf,
+				      int oob_required, int page)
+{
+	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct nand_ecclayout *layout = ecc->layout;
+	struct sunxi_nand_hw_ecc *data = ecc->priv;
+	unsigned int max_bitflips = 0;
+	int offset;
+	int ret;
+	u32 tmp;
+	int i;
+	int cnt;
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE);
+	tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) |
+	       NFC_ECC_EXCEPTION;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	for (i = 0; i < ecc->steps; i++) {
+		if (i)
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, i * ecc->size, -1);
+
+		offset = mtd->writesize + layout->eccpos[i * ecc->bytes] - 4;
+
+		chip->read_buf(mtd, NULL, ecc->size);
+
+		chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+
+		ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+		if (ret)
+			return ret;
+
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | (1 << 30);
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			return ret;
+
+		memcpy_fromio(buf + (i * ecc->size),
+			      nfc->regs + NFC_RAM0_BASE, ecc->size);
+
+		if (readl(nfc->regs + NFC_REG_ECC_ST) & 0x1) {
+			mtd->ecc_stats.failed++;
+		} else {
+			tmp = readl(nfc->regs + NFC_REG_ECC_CNT0) & 0xff;
+			mtd->ecc_stats.corrected += tmp;
+			max_bitflips = max_t(unsigned int, max_bitflips, tmp);
+		}
+
+		if (oob_required) {
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+
+			ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+			if (ret)
+				return ret;
+
+			offset -= mtd->writesize;
+			chip->read_buf(mtd, chip->oob_poi + offset,
+				      ecc->bytes + 4);
+		}
+	}
+
+	if (oob_required) {
+		cnt = ecc->layout->oobfree[ecc->steps].length;
+		if (cnt > 0) {
+			offset = mtd->writesize +
+				 ecc->layout->oobfree[ecc->steps].offset;
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+			offset -= mtd->writesize;
+			chip->read_buf(mtd, chip->oob_poi + offset, cnt);
+		}
+	}
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~NFC_ECC_EN;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	return max_bitflips;
+}
+
+static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd,
+				       struct nand_chip *chip,
+				       const uint8_t *buf, int oob_required)
+{
+	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct nand_ecclayout *layout = ecc->layout;
+	struct sunxi_nand_hw_ecc *data = ecc->priv;
+	int offset;
+	int ret;
+	u32 tmp;
+	int i;
+	int cnt;
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE);
+	tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) |
+	       NFC_ECC_EXCEPTION;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	for (i = 0; i < ecc->steps; i++) {
+		if (i)
+			chip->cmdfunc(mtd, NAND_CMD_RNDIN, i * ecc->size, -1);
+
+		chip->write_buf(mtd, buf + (i * ecc->size), ecc->size);
+
+		offset = layout->eccpos[i * ecc->bytes] - 4 + mtd->writesize;
+
+		/* Fill OOB data in */
+		if (oob_required) {
+			tmp = 0xffffffff;
+			memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp,
+				    4);
+		} else {
+			memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE,
+				    chip->oob_poi + offset - mtd->writesize,
+				    4);
+		}
+
+		chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1);
+
+		ret = sunxi_nfc_wait_cmd_fifo_empty(nfc);
+		if (ret)
+			return ret;
+
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ACCESS_DIR |
+		      (1 << 30);
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			return ret;
+	}
+
+	if (oob_required) {
+		cnt = ecc->layout->oobfree[i].length;
+		if (cnt > 0) {
+			offset = mtd->writesize +
+				 ecc->layout->oobfree[i].offset;
+			chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1);
+			offset -= mtd->writesize;
+			chip->write_buf(mtd, chip->oob_poi + offset, cnt);
+		}
+	}
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~NFC_ECC_EN;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	return 0;
+}
+
+static int sunxi_nfc_hw_syndrome_ecc_read_page(struct mtd_info *mtd,
+					       struct nand_chip *chip,
+					       uint8_t *buf, int oob_required,
+					       int page)
+{
+	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct sunxi_nand_hw_ecc *data = ecc->priv;
+	unsigned int max_bitflips = 0;
+	uint8_t *oob = chip->oob_poi;
+	int offset = 0;
+	int ret;
+	int cnt;
+	u32 tmp;
+	int i;
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE);
+	tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) |
+	       NFC_ECC_EXCEPTION;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	for (i = 0; i < ecc->steps; i++) {
+		chip->read_buf(mtd, NULL, ecc->size);
+
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | (1 << 30);
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			return ret;
+
+		memcpy_fromio(buf, nfc->regs + NFC_RAM0_BASE, ecc->size);
+		buf += ecc->size;
+		offset += ecc->size;
+
+		if (readl(nfc->regs + NFC_REG_ECC_ST) & 0x1) {
+			mtd->ecc_stats.failed++;
+		} else {
+			tmp = readl(nfc->regs + NFC_REG_ECC_CNT0) & 0xff;
+			mtd->ecc_stats.corrected += tmp;
+			max_bitflips = max_t(unsigned int, max_bitflips, tmp);
+		}
+
+		if (oob_required) {
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+			chip->read_buf(mtd, oob, ecc->bytes + ecc->prepad);
+			oob += ecc->bytes + ecc->prepad;
+		}
+
+		offset += ecc->bytes + ecc->prepad;
+	}
+
+	if (oob_required) {
+		cnt = mtd->oobsize - (oob - chip->oob_poi);
+		if (cnt > 0) {
+			chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1);
+			chip->read_buf(mtd, oob, cnt);
+		}
+	}
+
+	writel(readl(nfc->regs + NFC_REG_ECC_CTL) & ~NFC_ECC_EN,
+	       nfc->regs + NFC_REG_ECC_CTL);
+
+	return max_bitflips;
+}
+
+static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd,
+						struct nand_chip *chip,
+						const uint8_t *buf,
+						int oob_required)
+{
+	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->controller);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct sunxi_nand_hw_ecc *data = ecc->priv;
+	uint8_t *oob = chip->oob_poi;
+	int offset = 0;
+	int ret;
+	int cnt;
+	u32 tmp;
+	int i;
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~(NFC_ECC_MODE | NFC_ECC_PIPELINE | NFC_ECC_BLOCK_SIZE);
+	tmp |= NFC_ECC_EN | (data->mode << NFC_ECC_MODE_SHIFT) |
+	       NFC_ECC_EXCEPTION;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	for (i = 0; i < ecc->steps; i++) {
+		chip->write_buf(mtd, buf + (i * ecc->size), ecc->size);
+		offset += ecc->size;
+
+		/* Fill OOB data in */
+		if (oob_required) {
+			tmp = 0xffffffff;
+			memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, &tmp,
+				    4);
+		} else {
+			memcpy_toio(nfc->regs + NFC_REG_USER_DATA_BASE, oob,
+				    4);
+		}
+
+		tmp = NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ACCESS_DIR |
+		      (1 << 30);
+		writel(tmp, nfc->regs + NFC_REG_CMD);
+
+		ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+		if (ret)
+			return ret;
+
+		offset += ecc->bytes + ecc->prepad;
+		oob += ecc->bytes + ecc->prepad;
+	}
+
+	if (oob_required) {
+		cnt = mtd->oobsize - (oob - chip->oob_poi);
+		if (cnt > 0) {
+			chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset, -1);
+			chip->write_buf(mtd, oob, cnt);
+		}
+	}
+
+	tmp = readl(nfc->regs + NFC_REG_ECC_CTL);
+	tmp &= ~NFC_ECC_EN;
+
+	writel(tmp, nfc->regs + NFC_REG_ECC_CTL);
+
+	return 0;
+}
+
+static int sunxi_nand_chip_set_timings(struct sunxi_nand_chip *chip,
+				       const struct nand_sdr_timings *timings)
+{
+	u32 min_clk_period = 0;
+
+	/* T1 <=> tCLS */
+	if (timings->tCLS_min > min_clk_period)
+		min_clk_period = timings->tCLS_min;
+
+	/* T2 <=> tCLH */
+	if (timings->tCLH_min > min_clk_period)
+		min_clk_period = timings->tCLH_min;
+
+	/* T3 <=> tCS */
+	if (timings->tCS_min > min_clk_period)
+		min_clk_period = timings->tCS_min;
+
+	/* T4 <=> tCH */
+	if (timings->tCH_min > min_clk_period)
+		min_clk_period = timings->tCH_min;
+
+	/* T5 <=> tWP */
+	if (timings->tWP_min > min_clk_period)
+		min_clk_period = timings->tWP_min;
+
+	/* T6 <=> tWH */
+	if (timings->tWH_min > min_clk_period)
+		min_clk_period = timings->tWH_min;
+
+	/* T7 <=> tALS */
+	if (timings->tALS_min > min_clk_period)
+		min_clk_period = timings->tALS_min;
+
+	/* T8 <=> tDS */
+	if (timings->tDS_min > min_clk_period)
+		min_clk_period = timings->tDS_min;
+
+	/* T9 <=> tDH */
+	if (timings->tDH_min > min_clk_period)
+		min_clk_period = timings->tDH_min;
+
+	/* T10 <=> tRR */
+	if (timings->tRR_min > (min_clk_period * 3))
+		min_clk_period = DIV_ROUND_UP(timings->tRR_min, 3);
+
+	/* T11 <=> tALH */
+	if (timings->tALH_min > min_clk_period)
+		min_clk_period = timings->tALH_min;
+
+	/* T12 <=> tRP */
+	if (timings->tRP_min > min_clk_period)
+		min_clk_period = timings->tRP_min;
+
+	/* T13 <=> tREH */
+	if (timings->tREH_min > min_clk_period)
+		min_clk_period = timings->tREH_min;
+
+	/* T14 <=> tRC */
+	if (timings->tRC_min > (min_clk_period * 2))
+		min_clk_period = DIV_ROUND_UP(timings->tRC_min, 2);
+
+	/* T15 <=> tWC */
+	if (timings->tWC_min > (min_clk_period * 2))
+		min_clk_period = DIV_ROUND_UP(timings->tWC_min, 2);
+
+
+	/* Convert min_clk_period from picoseconds to nanoseconds */
+	min_clk_period = DIV_ROUND_UP(min_clk_period, 1000);
+
+	/*
+	 * Convert min_clk_period into a clk frequency, then get the
+	 * appropriate rate for the NAND controller IP given this formula
+	 * (specified in the datasheet):
+	 * nand clk_rate = 2 * min_clk_rate
+	 */
+	chip->clk_rate = (2 * NSEC_PER_SEC) / min_clk_period;
+
+	/* TODO: configure T16-T19 */
+
+	return 0;
+}
+
+static int sunxi_nand_chip_init_timings(struct sunxi_nand_chip *chip,
+					struct device_node *np)
+{
+	const struct nand_sdr_timings *timings;
+	int ret;
+	int mode;
+
+	mode = onfi_get_async_timing_mode(&chip->nand);
+	if (mode == ONFI_TIMING_MODE_UNKNOWN) {
+		mode = chip->nand.onfi_timing_mode_default;
+	} else {
+		uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {};
+
+		mode = fls(mode) - 1;
+		if (mode < 0)
+			mode = 0;
+
+		feature[0] = mode;
+		ret = chip->nand.onfi_set_features(&chip->mtd, &chip->nand,
+						ONFI_FEATURE_ADDR_TIMING_MODE,
+						feature);
+		if (ret)
+			return ret;
+	}
+
+	timings = onfi_async_timing_mode_to_sdr_timings(mode);
+	if (IS_ERR(timings))
+		return PTR_ERR(timings);
+
+	return sunxi_nand_chip_set_timings(chip, timings);
+}
+
+static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd,
+					      struct nand_ecc_ctrl *ecc,
+					      struct device_node *np)
+{
+	static const u8 strengths[] = { 16, 24, 28, 32, 40, 48, 56, 60, 64 };
+	struct nand_chip *nand = mtd->priv;
+	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
+	struct sunxi_nand_hw_ecc *data;
+	struct nand_ecclayout *layout;
+	int nsectors;
+	int ret;
+	int i;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Add ECC info retrieval from DT */
+	for (i = 0; i < ARRAY_SIZE(strengths); i++) {
+		if (ecc->strength <= strengths[i])
+			break;
+	}
+
+	if (i >= ARRAY_SIZE(strengths)) {
+		dev_err(nfc->dev, "unsupported strength\n");
+		ret = -ENOTSUPP;
+		goto err;
+	}
+
+	data->mode = i;
+
+	/* HW ECC always request ECC bytes for 1024 bytes blocks */
+	ecc->bytes = DIV_ROUND_UP(ecc->strength * fls(8 * 1024), 8);
+
+	/* HW ECC always work with even numbers of ECC bytes */
+	ecc->bytes = ALIGN(ecc->bytes, 2);
+
+	layout = &data->layout;
+	nsectors = mtd->writesize / ecc->size;
+
+	if (mtd->oobsize < ((ecc->bytes + 4) * nsectors)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	layout->eccbytes = (ecc->bytes * nsectors);
+
+	ecc->layout = layout;
+	ecc->priv = data;
+
+	return 0;
+
+err:
+	kfree(data);
+
+	return ret;
+}
+
+static void sunxi_nand_hw_common_ecc_ctrl_cleanup(struct nand_ecc_ctrl *ecc)
+{
+	kfree(ecc->priv);
+}
+
+static int sunxi_nand_hw_ecc_ctrl_init(struct mtd_info *mtd,
+				       struct nand_ecc_ctrl *ecc,
+				       struct device_node *np)
+{
+	struct nand_ecclayout *layout;
+	int nsectors;
+	int i, j;
+	int ret;
+
+	ret = sunxi_nand_hw_common_ecc_ctrl_init(mtd, ecc, np);
+	if (ret)
+		return ret;
+
+	ecc->read_page = sunxi_nfc_hw_ecc_read_page;
+	ecc->write_page = sunxi_nfc_hw_ecc_write_page;
+	layout = ecc->layout;
+	nsectors = mtd->writesize / ecc->size;
+
+	for (i = 0; i < nsectors; i++) {
+		if (i) {
+			layout->oobfree[i].offset =
+				layout->oobfree[i - 1].offset +
+				layout->oobfree[i - 1].length +
+				ecc->bytes;
+			layout->oobfree[i].length = 4;
+		} else {
+			/*
+			 * The first 2 bytes are used for BB markers, hence we
+			 * only have 2 bytes available in the first user data
+			 * section.
+			 */
+			layout->oobfree[i].length = 2;
+			layout->oobfree[i].offset = 2;
+		}
+
+		for (j = 0; j < ecc->bytes; j++)
+			layout->eccpos[(ecc->bytes * i) + j] =
+					layout->oobfree[i].offset +
+					layout->oobfree[i].length + j;
+	}
+
+	if (mtd->oobsize > (ecc->bytes + 4) * nsectors) {
+		layout->oobfree[nsectors].offset =
+				layout->oobfree[nsectors - 1].offset +
+				layout->oobfree[nsectors - 1].length +
+				ecc->bytes;
+		layout->oobfree[nsectors].length = mtd->oobsize -
+				((ecc->bytes + 4) * nsectors);
+	}
+
+	return 0;
+}
+
+static int sunxi_nand_hw_syndrome_ecc_ctrl_init(struct mtd_info *mtd,
+						struct nand_ecc_ctrl *ecc,
+						struct device_node *np)
+{
+	struct nand_ecclayout *layout;
+	int nsectors;
+	int i;
+	int ret;
+
+	ret = sunxi_nand_hw_common_ecc_ctrl_init(mtd, ecc, np);
+	if (ret)
+		return ret;
+
+	ecc->prepad = 4;
+	ecc->read_page = sunxi_nfc_hw_syndrome_ecc_read_page;
+	ecc->write_page = sunxi_nfc_hw_syndrome_ecc_write_page;
+
+	layout = ecc->layout;
+	nsectors = mtd->writesize / ecc->size;
+
+	for (i = 0; i < (ecc->bytes * nsectors); i++)
+		layout->eccpos[i] = i;
+
+	layout->oobfree[0].length = mtd->oobsize - i;
+	layout->oobfree[0].offset = i;
+
+	return 0;
+}
+
+static void sunxi_nand_ecc_cleanup(struct nand_ecc_ctrl *ecc)
+{
+	switch (ecc->mode) {
+	case NAND_ECC_HW:
+	case NAND_ECC_HW_SYNDROME:
+		sunxi_nand_hw_common_ecc_ctrl_cleanup(ecc);
+		break;
+	case NAND_ECC_NONE:
+		kfree(ecc->layout);
+	default:
+		break;
+	}
+}
+
+static int sunxi_nand_ecc_init(struct mtd_info *mtd, struct nand_ecc_ctrl *ecc,
+			       struct device_node *np)
+{
+	struct nand_chip *nand = mtd->priv;
+	int strength;
+	int blk_size;
+	int ret;
+
+	blk_size = of_get_nand_ecc_step_size(np);
+	strength = of_get_nand_ecc_strength(np);
+	if (blk_size > 0 && strength > 0) {
+		ecc->size = blk_size;
+		ecc->strength = strength;
+	} else {
+		ecc->size = nand->ecc_step_ds;
+		ecc->strength = nand->ecc_strength_ds;
+	}
+
+	if (!ecc->size || !ecc->strength)
+		return -EINVAL;
+
+	ecc->mode = NAND_ECC_HW;
+
+	ret = of_get_nand_ecc_mode(np);
+	if (ret >= 0)
+		ecc->mode = ret;
+
+	switch (ecc->mode) {
+	case NAND_ECC_SOFT_BCH:
+		ecc->bytes = DIV_ROUND_UP(ecc->strength * fls(8 * ecc->size),
+					  8);
+		break;
+	case NAND_ECC_HW:
+		ret = sunxi_nand_hw_ecc_ctrl_init(mtd, ecc, np);
+		if (ret)
+			return ret;
+		break;
+	case NAND_ECC_HW_SYNDROME:
+		ret = sunxi_nand_hw_syndrome_ecc_ctrl_init(mtd, ecc, np);
+		if (ret)
+			return ret;
+		break;
+	case NAND_ECC_NONE:
+		ecc->layout = kzalloc(sizeof(*ecc->layout), GFP_KERNEL);
+		if (!ecc->layout)
+			return -ENOMEM;
+		ecc->layout->oobfree[0].length = mtd->oobsize;
+	case NAND_ECC_SOFT:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
+				struct device_node *np)
+{
+	const struct nand_sdr_timings *timings;
+	struct sunxi_nand_chip *chip;
+	struct mtd_part_parser_data ppdata;
+	struct mtd_info *mtd;
+	struct nand_chip *nand;
+	int nsels;
+	int ret;
+	int i;
+	u32 tmp;
+
+	if (!of_get_property(np, "reg", &nsels))
+		return -EINVAL;
+
+	nsels /= sizeof(u32);
+	if (!nsels) {
+		dev_err(dev, "invalid reg property size\n");
+		return -EINVAL;
+	}
+
+	chip = devm_kzalloc(dev,
+			    sizeof(*chip) +
+			    (nsels * sizeof(struct sunxi_nand_chip_sel)),
+			    GFP_KERNEL);
+	if (!chip) {
+		dev_err(dev, "could not allocate chip\n");
+		return -ENOMEM;
+	}
+
+	chip->nsels = nsels;
+	chip->selected = -1;
+
+	for (i = 0; i < nsels; i++) {
+		ret = of_property_read_u32_index(np, "reg", i, &tmp);
+		if (ret) {
+			dev_err(dev, "could not retrieve reg property: %d\n",
+				ret);
+			return ret;
+		}
+
+		if (tmp > NFC_MAX_CS) {
+			dev_err(dev,
+				"invalid reg value: %u (max CS = 7)\n",
+				tmp);
+			return -EINVAL;
+		}
+
+		if (test_and_set_bit(tmp, &nfc->assigned_cs)) {
+			dev_err(dev, "CS %d already assigned\n", tmp);
+			return -EINVAL;
+		}
+
+		chip->sels[i].cs = tmp;
+
+		if (!of_property_read_u32_index(np, "allwinner,rb", i, &tmp) &&
+		    tmp < 2) {
+			chip->sels[i].rb.type = RB_NATIVE;
+			chip->sels[i].rb.info.nativeid = tmp;
+		} else {
+			ret = of_get_named_gpio(np, "rb-gpios", i);
+			if (ret >= 0) {
+				tmp = ret;
+				chip->sels[i].rb.type = RB_GPIO;
+				chip->sels[i].rb.info.gpio = tmp;
+				ret = devm_gpio_request(dev, tmp, "nand-rb");
+				if (ret)
+					return ret;
+
+				ret = gpio_direction_input(tmp);
+				if (ret)
+					return ret;
+			} else {
+				chip->sels[i].rb.type = RB_NONE;
+			}
+		}
+	}
+
+	timings = onfi_async_timing_mode_to_sdr_timings(0);
+	if (IS_ERR(timings)) {
+		ret = PTR_ERR(timings);
+		dev_err(dev,
+			"could not retrieve timings for ONFI mode 0: %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = sunxi_nand_chip_set_timings(chip, timings);
+	if (ret) {
+		dev_err(dev, "could not configure chip timings: %d\n", ret);
+		return ret;
+	}
+
+	nand = &chip->nand;
+	/* Default tR value specified in the ONFI spec (chapter 4.15.1) */
+	nand->chip_delay = 200;
+	nand->controller = &nfc->controller;
+	nand->select_chip = sunxi_nfc_select_chip;
+	nand->cmd_ctrl = sunxi_nfc_cmd_ctrl;
+	nand->read_buf = sunxi_nfc_read_buf;
+	nand->write_buf = sunxi_nfc_write_buf;
+	nand->read_byte = sunxi_nfc_read_byte;
+
+	if (of_get_nand_on_flash_bbt(np))
+		nand->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB;
+
+	mtd = &chip->mtd;
+	mtd->dev.parent = dev;
+	mtd->priv = nand;
+	mtd->owner = THIS_MODULE;
+
+	ret = nand_scan_ident(mtd, nsels, NULL);
+	if (ret)
+		return ret;
+
+	ret = sunxi_nand_chip_init_timings(chip, np);
+	if (ret) {
+		dev_err(dev, "could not configure chip timings: %d\n", ret);
+		return ret;
+	}
+
+	ret = sunxi_nand_ecc_init(mtd, &nand->ecc, np);
+	if (ret) {
+		dev_err(dev, "ECC init failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = nand_scan_tail(mtd);
+	if (ret) {
+		dev_err(dev, "nand_scan_tail failed: %d\n", ret);
+		return ret;
+	}
+
+	ppdata.of_node = np;
+	ret = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0);
+	if (ret) {
+		dev_err(dev, "failed to register mtd device: %d\n", ret);
+		nand_release(mtd);
+		return ret;
+	}
+
+	list_add_tail(&chip->node, &nfc->chips);
+
+	return 0;
+}
+
+static int sunxi_nand_chips_init(struct device *dev, struct sunxi_nfc *nfc)
+{
+	struct device_node *np = dev->of_node;
+	struct device_node *nand_np;
+	int nchips = of_get_child_count(np);
+	int ret;
+
+	if (nchips > 8) {
+		dev_err(dev, "too many NAND chips: %d (max = 8)\n", nchips);
+		return -EINVAL;
+	}
+
+	for_each_child_of_node(np, nand_np) {
+		ret = sunxi_nand_chip_init(dev, nfc, nand_np);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void sunxi_nand_chips_cleanup(struct sunxi_nfc *nfc)
+{
+	struct sunxi_nand_chip *chip;
+
+	while (!list_empty(&nfc->chips)) {
+		chip = list_first_entry(&nfc->chips, struct sunxi_nand_chip,
+					node);
+		nand_release(&chip->mtd);
+		sunxi_nand_ecc_cleanup(&chip->nand.ecc);
+	}
+}
+
+static int sunxi_nfc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *r;
+	struct sunxi_nfc *nfc;
+	int irq;
+	int ret;
+
+	nfc = devm_kzalloc(dev, sizeof(*nfc), GFP_KERNEL);
+	if (!nfc)
+		return -ENOMEM;
+
+	nfc->dev = dev;
+	spin_lock_init(&nfc->controller.lock);
+	init_waitqueue_head(&nfc->controller.wq);
+	INIT_LIST_HEAD(&nfc->chips);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nfc->regs = devm_ioremap_resource(dev, r);
+	if (IS_ERR(nfc->regs))
+		return PTR_ERR(nfc->regs);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "failed to retrieve irq\n");
+		return irq;
+	}
+
+	nfc->ahb_clk = devm_clk_get(dev, "ahb");
+	if (IS_ERR(nfc->ahb_clk)) {
+		dev_err(dev, "failed to retrieve ahb clk\n");
+		return PTR_ERR(nfc->ahb_clk);
+	}
+
+	ret = clk_prepare_enable(nfc->ahb_clk);
+	if (ret)
+		return ret;
+
+	nfc->mod_clk = devm_clk_get(dev, "mod");
+	if (IS_ERR(nfc->mod_clk)) {
+		dev_err(dev, "failed to retrieve mod clk\n");
+		ret = PTR_ERR(nfc->mod_clk);
+		goto out_ahb_clk_unprepare;
+	}
+
+	ret = clk_prepare_enable(nfc->mod_clk);
+	if (ret)
+		goto out_ahb_clk_unprepare;
+
+	ret = sunxi_nfc_rst(nfc);
+	if (ret)
+		goto out_mod_clk_unprepare;
+
+	writel(0, nfc->regs + NFC_REG_INT);
+	ret = devm_request_irq(dev, irq, sunxi_nfc_interrupt,
+			       0, "sunxi-nand", nfc);
+	if (ret)
+		goto out_mod_clk_unprepare;
+
+	platform_set_drvdata(pdev, nfc);
+
+	/*
+	 * TODO: replace these magic values with proper flags as soon as we
+	 * know what they are encoding.
+	 */
+	writel(0x100, nfc->regs + NFC_REG_TIMING_CTL);
+	writel(0x7ff, nfc->regs + NFC_REG_TIMING_CFG);
+
+	ret = sunxi_nand_chips_init(dev, nfc);
+	if (ret) {
+		dev_err(dev, "failed to init nand chips\n");
+		goto out_mod_clk_unprepare;
+	}
+
+	return 0;
+
+out_mod_clk_unprepare:
+	clk_disable_unprepare(nfc->mod_clk);
+out_ahb_clk_unprepare:
+	clk_disable_unprepare(nfc->ahb_clk);
+
+	return ret;
+}
+
+static int sunxi_nfc_remove(struct platform_device *pdev)
+{
+	struct sunxi_nfc *nfc = platform_get_drvdata(pdev);
+
+	sunxi_nand_chips_cleanup(nfc);
+
+	return 0;
+}
+
+static const struct of_device_id sunxi_nfc_ids[] = {
+	{ .compatible = "allwinner,sun4i-a10-nand" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sunxi_nfc_ids);
+
+static struct platform_driver sunxi_nfc_driver = {
+	.driver = {
+		.name = "sunxi_nand",
+		.of_match_table = sunxi_nfc_ids,
+	},
+	.probe = sunxi_nfc_probe,
+	.remove = sunxi_nfc_remove,
+};
+module_platform_driver(sunxi_nfc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Boris BREZILLON");
+MODULE_DESCRIPTION("Allwinner NAND Flash Controller driver");
+MODULE_ALIAS("platform:sunxi_nand");
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 2fb07ec..39763b9 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -719,16 +719,10 @@ static int fsl_qspi_read(struct spi_nor *nor, loff_t from,
 {
 	struct fsl_qspi *q = nor->priv;
 	u8 cmd = nor->read_opcode;
-	int ret;
 
 	dev_dbg(q->dev, "cmd [%x],read from (0x%p, 0x%.8x, 0x%.8x),len:%d\n",
 		cmd, q->ahb_base, q->chip_base_addr, (unsigned int)from, len);
 
-	/* Wait until the previous command is finished. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
-
 	/* Read out the data directly from the AHB buffer.*/
 	memcpy(buf, q->ahb_base + q->chip_base_addr + from, len);
 
@@ -744,16 +738,6 @@ static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs)
 	dev_dbg(nor->dev, "%dKiB at 0x%08x:0x%08x\n",
 		nor->mtd->erasesize / 1024, q->chip_base_addr, (u32)offs);
 
-	/* Wait until finished previous write command. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* Send write enable, then erase commands. */
-	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
-	if (ret)
-		return ret;
-
 	ret = fsl_qspi_runcmd(q, nor->erase_opcode, offs, 0);
 	if (ret)
 		return ret;
@@ -849,9 +833,8 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 
 	ret = clk_prepare_enable(q->clk);
 	if (ret) {
-		clk_disable_unprepare(q->clk_en);
 		dev_err(dev, "can not enable the qspi clock\n");
-		goto map_failed;
+		goto clk_failed;
 	}
 
 	/* find the irq */
@@ -905,7 +888,8 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		nor->prepare = fsl_qspi_prep;
 		nor->unprepare = fsl_qspi_unprep;
 
-		if (of_modalias_node(np, modalias, sizeof(modalias)) < 0)
+		ret = of_modalias_node(np, modalias, sizeof(modalias));
+		if (ret < 0)
 			goto map_failed;
 
 		ret = of_property_read_u32(np, "spi-max-frequency",
@@ -964,6 +948,7 @@ last_init_failed:
 
 irq_failed:
 	clk_disable_unprepare(q->clk);
+clk_failed:
 	clk_disable_unprepare(q->clk_en);
 map_failed:
 	dev_err(dev, "Freescale QuadSPI probe failed\n");
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index c51ee52..0f8ec3c 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -26,7 +26,38 @@
 /* Define max times to check status register before we give up. */
 #define	MAX_READY_WAIT_JIFFIES	(40 * HZ) /* M25P16 specs 40s max chip erase */
 
-#define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
+#define SPI_NOR_MAX_ID_LEN	6
+
+struct flash_info {
+	/*
+	 * This array stores the ID bytes.
+	 * The first three bytes are the JEDIC ID.
+	 * JEDEC ID zero means "no ID" (mostly older chips).
+	 */
+	u8		id[SPI_NOR_MAX_ID_LEN];
+	u8		id_len;
+
+	/* The size listed here is what works with SPINOR_OP_SE, which isn't
+	 * necessarily called a "sector" by the vendor.
+	 */
+	unsigned	sector_size;
+	u16		n_sectors;
+
+	u16		page_size;
+	u16		addr_width;
+
+	u16		flags;
+#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
+#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
+#define	SST_WRITE		0x04	/* use SST byte programming */
+#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
+#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
+#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
+#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
+#define	USE_FSR			0x80	/* use flag status register */
+};
+
+#define JEDEC_MFR(info)	((info)->id[0])
 
 static const struct spi_device_id *spi_nor_match_id(const char *name);
 
@@ -98,7 +129,7 @@ static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
 	case SPI_NOR_FAST:
 	case SPI_NOR_DUAL:
 	case SPI_NOR_QUAD:
-		return 1;
+		return 8;
 	case SPI_NOR_NORMAL:
 		return 0;
 	}
@@ -138,13 +169,14 @@ static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
 }
 
 /* Enable/disable 4-byte addressing mode. */
-static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
+static inline int set_4byte(struct spi_nor *nor, struct flash_info *info,
+			    int enable)
 {
 	int status;
 	bool need_wren = false;
 	u8 cmd;
 
-	switch (JEDEC_MFR(jedec_id)) {
+	switch (JEDEC_MFR(info)) {
 	case CFI_MFR_ST: /* Micron, actually */
 		/* Some Micron need WREN command; all will accept it */
 		need_wren = true;
@@ -165,81 +197,74 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
 		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1, 0);
 	}
 }
-
-static int spi_nor_wait_till_ready(struct spi_nor *nor)
+static inline int spi_nor_sr_ready(struct spi_nor *nor)
 {
-	unsigned long deadline;
-	int sr;
-
-	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
-
-	do {
-		cond_resched();
+	int sr = read_sr(nor);
+	if (sr < 0)
+		return sr;
+	else
+		return !(sr & SR_WIP);
+}
 
-		sr = read_sr(nor);
-		if (sr < 0)
-			break;
-		else if (!(sr & SR_WIP))
-			return 0;
-	} while (!time_after_eq(jiffies, deadline));
+static inline int spi_nor_fsr_ready(struct spi_nor *nor)
+{
+	int fsr = read_fsr(nor);
+	if (fsr < 0)
+		return fsr;
+	else
+		return fsr & FSR_READY;
+}
 
-	return -ETIMEDOUT;
+static int spi_nor_ready(struct spi_nor *nor)
+{
+	int sr, fsr;
+	sr = spi_nor_sr_ready(nor);
+	if (sr < 0)
+		return sr;
+	fsr = nor->flags & SNOR_F_USE_FSR ? spi_nor_fsr_ready(nor) : 1;
+	if (fsr < 0)
+		return fsr;
+	return sr && fsr;
 }
 
-static int spi_nor_wait_till_fsr_ready(struct spi_nor *nor)
+/*
+ * Service routine to read status register until ready, or timeout occurs.
+ * Returns non-zero if error.
+ */
+static int spi_nor_wait_till_ready(struct spi_nor *nor)
 {
 	unsigned long deadline;
-	int sr;
-	int fsr;
+	int timeout = 0, ret;
 
 	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
 
-	do {
+	while (!timeout) {
+		if (time_after_eq(jiffies, deadline))
+			timeout = 1;
+
+		ret = spi_nor_ready(nor);
+		if (ret < 0)
+			return ret;
+		if (ret)
+			return 0;
+
 		cond_resched();
+	}
 
-		sr = read_sr(nor);
-		if (sr < 0) {
-			break;
-		} else if (!(sr & SR_WIP)) {
-			fsr = read_fsr(nor);
-			if (fsr < 0)
-				break;
-			if (fsr & FSR_READY)
-				return 0;
-		}
-	} while (!time_after_eq(jiffies, deadline));
+	dev_err(nor->dev, "flash operation timed out\n");
 
 	return -ETIMEDOUT;
 }
 
 /*
- * Service routine to read status register until ready, or timeout occurs.
- * Returns non-zero if error.
- */
-static int wait_till_ready(struct spi_nor *nor)
-{
-	return nor->wait_till_ready(nor);
-}
-
-/*
  * Erase the whole flash memory
  *
  * Returns 0 if successful, non-zero otherwise.
  */
 static int erase_chip(struct spi_nor *nor)
 {
-	int ret;
-
 	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd->size >> 10));
 
-	/* Wait until finished previous write command. */
-	ret = wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* Send write enable, then erase commands. */
-	write_enable(nor);
-
 	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0);
 }
 
@@ -294,11 +319,17 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 	/* whole-chip erase? */
 	if (len == mtd->size) {
+		write_enable(nor);
+
 		if (erase_chip(nor)) {
 			ret = -EIO;
 			goto erase_err;
 		}
 
+		ret = spi_nor_wait_till_ready(nor);
+		if (ret)
+			goto erase_err;
+
 	/* REVISIT in some cases we could speed up erasing large regions
 	 * by using SPINOR_OP_SE instead of SPINOR_OP_BE_4K.  We may have set up
 	 * to use "small sector erase", but that's not always optimal.
@@ -307,6 +338,8 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 	/* "sector"-at-a-time erase */
 	} else {
 		while (len) {
+			write_enable(nor);
+
 			if (nor->erase(nor, addr)) {
 				ret = -EIO;
 				goto erase_err;
@@ -314,9 +347,15 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 			addr += mtd->erasesize;
 			len -= mtd->erasesize;
+
+			ret = spi_nor_wait_till_ready(nor);
+			if (ret)
+				goto erase_err;
 		}
 	}
 
+	write_disable(nor);
+
 	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
 
 	instr->state = MTD_ERASE_DONE;
@@ -341,11 +380,6 @@ static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	if (ret)
 		return ret;
 
-	/* Wait until finished previous command */
-	ret = wait_till_ready(nor);
-	if (ret)
-		goto err;
-
 	status_old = read_sr(nor);
 
 	if (offset < mtd->size - (mtd->size / 2))
@@ -388,11 +422,6 @@ static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	if (ret)
 		return ret;
 
-	/* Wait until finished previous command */
-	ret = wait_till_ready(nor);
-	if (ret)
-		goto err;
-
 	status_old = read_sr(nor);
 
 	if (offset+len > mtd->size - (mtd->size / 64))
@@ -424,38 +453,34 @@ err:
 	return ret;
 }
 
-struct flash_info {
-	/* JEDEC id zero means "no ID" (most older chips); otherwise it has
-	 * a high byte of zero plus three data bytes: the manufacturer id,
-	 * then a two byte device id.
-	 */
-	u32		jedec_id;
-	u16             ext_id;
-
-	/* The size listed here is what works with SPINOR_OP_SE, which isn't
-	 * necessarily called a "sector" by the vendor.
-	 */
-	unsigned	sector_size;
-	u16		n_sectors;
-
-	u16		page_size;
-	u16		addr_width;
-
-	u16		flags;
-#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
-#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
-#define	SST_WRITE		0x04	/* use SST byte programming */
-#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
-#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
-#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
-#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
-#define	USE_FSR			0x80	/* use flag status register */
-};
-
+/* Used when the "_ext_id" is two bytes at most */
 #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
 	((kernel_ulong_t)&(struct flash_info) {				\
-		.jedec_id = (_jedec_id),				\
-		.ext_id = (_ext_id),					\
+		.id = {							\
+			((_jedec_id) >> 16) & 0xff,			\
+			((_jedec_id) >> 8) & 0xff,			\
+			(_jedec_id) & 0xff,				\
+			((_ext_id) >> 8) & 0xff,			\
+			(_ext_id) & 0xff,				\
+			},						\
+		.id_len = (!(_jedec_id) ? 0 : (3 + ((_ext_id) ? 2 : 0))),	\
+		.sector_size = (_sector_size),				\
+		.n_sectors = (_n_sectors),				\
+		.page_size = 256,					\
+		.flags = (_flags),					\
+	})
+
+#define INFO6(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
+	((kernel_ulong_t)&(struct flash_info) {				\
+		.id = {							\
+			((_jedec_id) >> 16) & 0xff,			\
+			((_jedec_id) >> 8) & 0xff,			\
+			(_jedec_id) & 0xff,				\
+			((_ext_id) >> 16) & 0xff,			\
+			((_ext_id) >> 8) & 0xff,			\
+			(_ext_id) & 0xff,				\
+			},						\
+		.id_len = 6,						\
 		.sector_size = (_sector_size),				\
 		.n_sectors = (_n_sectors),				\
 		.page_size = 256,					\
@@ -507,6 +532,9 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
 	{ "mr25h10",  CAT25_INFO(128 * 1024, 1, 256, 3, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
 
+	/* Fujitsu */
+	{ "mb85rs1mt", INFO(0x047f27, 0, 128 * 1024, 1, SPI_NOR_NO_ERASE) },
+
 	/* GigaDevice */
 	{ "gd25q32", INFO(0xc84016, 0, 64 * 1024,  64, SECT_4K) },
 	{ "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) },
@@ -532,6 +560,7 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
 
 	/* Micron */
+	{ "n25q032",	 INFO(0x20ba16, 0, 64 * 1024,   64, 0) },
 	{ "n25q064",     INFO(0x20ba17, 0, 64 * 1024,  128, 0) },
 	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, 0) },
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, 0) },
@@ -556,6 +585,7 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "s70fl01gs",  INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) },
 	{ "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024,  64, 0) },
 	{ "s25sl12801", INFO(0x012018, 0x0301,  64 * 1024, 256, 0) },
+	{ "s25fl128s",	INFO6(0x012018, 0x4d0180, 64 * 1024, 256, SPI_NOR_QUAD_READ) },
 	{ "s25fl129p0", INFO(0x012018, 0x4d00, 256 * 1024,  64, 0) },
 	{ "s25fl129p1", INFO(0x012018, 0x4d01,  64 * 1024, 256, 0) },
 	{ "s25sl004a",  INFO(0x010212,      0,  64 * 1024,   8, 0) },
@@ -566,6 +596,7 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "s25fl008k",  INFO(0xef4014,      0,  64 * 1024,  16, SECT_4K) },
 	{ "s25fl016k",  INFO(0xef4015,      0,  64 * 1024,  32, SECT_4K) },
 	{ "s25fl064k",  INFO(0xef4017,      0,  64 * 1024, 128, SECT_4K) },
+	{ "s25fl132k",  INFO(0x014016,      0,  64 * 1024,  64, 0) },
 
 	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
 	{ "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
@@ -577,6 +608,7 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "sst25wf010",  INFO(0xbf2502, 0, 64 * 1024,  2, SECT_4K | SST_WRITE) },
 	{ "sst25wf020",  INFO(0xbf2503, 0, 64 * 1024,  4, SECT_4K | SST_WRITE) },
 	{ "sst25wf040",  INFO(0xbf2504, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
+	{ "sst25wf080",  INFO(0xbf2505, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) },
 
 	/* ST Microelectronics -- newer production may have feature updates */
 	{ "m25p05",  INFO(0x202010,  0,  32 * 1024,   2, 0) },
@@ -588,7 +620,6 @@ static const struct spi_device_id spi_nor_ids[] = {
 	{ "m25p32",  INFO(0x202016,  0,  64 * 1024,  64, 0) },
 	{ "m25p64",  INFO(0x202017,  0,  64 * 1024, 128, 0) },
 	{ "m25p128", INFO(0x202018,  0, 256 * 1024,  64, 0) },
-	{ "n25q032", INFO(0x20ba16,  0,  64 * 1024,  64, 0) },
 
 	{ "m25p05-nonjedec",  INFO(0, 0,  32 * 1024,   2, 0) },
 	{ "m25p10-nonjedec",  INFO(0, 0,  32 * 1024,   4, 0) },
@@ -643,32 +674,24 @@ static const struct spi_device_id spi_nor_ids[] = {
 static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
 {
 	int			tmp;
-	u8			id[5];
-	u32			jedec;
-	u16                     ext_jedec;
+	u8			id[SPI_NOR_MAX_ID_LEN];
 	struct flash_info	*info;
 
-	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, 5);
+	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, SPI_NOR_MAX_ID_LEN);
 	if (tmp < 0) {
 		dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp);
 		return ERR_PTR(tmp);
 	}
-	jedec = id[0];
-	jedec = jedec << 8;
-	jedec |= id[1];
-	jedec = jedec << 8;
-	jedec |= id[2];
-
-	ext_jedec = id[3] << 8 | id[4];
 
 	for (tmp = 0; tmp < ARRAY_SIZE(spi_nor_ids) - 1; tmp++) {
 		info = (void *)spi_nor_ids[tmp].driver_data;
-		if (info->jedec_id == jedec) {
-			if (info->ext_id == 0 || info->ext_id == ext_jedec)
+		if (info->id_len) {
+			if (!memcmp(info->id, id, info->id_len))
 				return &spi_nor_ids[tmp];
 		}
 	}
-	dev_err(nor->dev, "unrecognized JEDEC id %06x\n", jedec);
+	dev_err(nor->dev, "unrecognized JEDEC id bytes: %02x, %2x, %2x\n",
+		id[0], id[1], id[2]);
 	return ERR_PTR(-ENODEV);
 }
 
@@ -703,11 +726,6 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	if (ret)
 		return ret;
 
-	/* Wait until finished previous write command. */
-	ret = wait_till_ready(nor);
-	if (ret)
-		goto time_out;
-
 	write_enable(nor);
 
 	nor->sst_write_second = false;
@@ -719,7 +737,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 		/* write one byte. */
 		nor->write(nor, to, 1, retlen, buf);
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 	}
@@ -731,7 +749,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 		/* write two bytes. */
 		nor->write(nor, to, 2, retlen, buf + actual);
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 		to += 2;
@@ -740,7 +758,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	nor->sst_write_second = false;
 
 	write_disable(nor);
-	ret = wait_till_ready(nor);
+	ret = spi_nor_wait_till_ready(nor);
 	if (ret)
 		goto time_out;
 
@@ -751,7 +769,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 		nor->program_opcode = SPINOR_OP_BP;
 		nor->write(nor, to, 1, retlen, buf + actual);
 
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 		write_disable(nor);
@@ -779,11 +797,6 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 	if (ret)
 		return ret;
 
-	/* Wait until finished previous write command. */
-	ret = wait_till_ready(nor);
-	if (ret)
-		goto write_err;
-
 	write_enable(nor);
 
 	page_offset = to & (nor->page_size - 1);
@@ -802,16 +815,20 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 			if (page_size > nor->page_size)
 				page_size = nor->page_size;
 
-			wait_till_ready(nor);
+			ret = spi_nor_wait_till_ready(nor);
+			if (ret)
+				goto write_err;
+
 			write_enable(nor);
 
 			nor->write(nor, to + i, page_size, retlen, buf + i);
 		}
 	}
 
+	ret = spi_nor_wait_till_ready(nor);
 write_err:
 	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
-	return 0;
+	return ret;
 }
 
 static int macronix_quad_enable(struct spi_nor *nor)
@@ -824,7 +841,7 @@ static int macronix_quad_enable(struct spi_nor *nor)
 	nor->cmd_buf[0] = val | SR_QUAD_EN_MX;
 	nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
 
-	if (wait_till_ready(nor))
+	if (spi_nor_wait_till_ready(nor))
 		return 1;
 
 	ret = read_sr(nor);
@@ -874,11 +891,11 @@ static int spansion_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
-static int set_quad_mode(struct spi_nor *nor, u32 jedec_id)
+static int set_quad_mode(struct spi_nor *nor, struct flash_info *info)
 {
 	int status;
 
-	switch (JEDEC_MFR(jedec_id)) {
+	switch (JEDEC_MFR(info)) {
 	case CFI_MFR_MACRONIX:
 		status = macronix_quad_enable(nor);
 		if (status) {
@@ -904,11 +921,6 @@ static int spi_nor_check(struct spi_nor *nor)
 		return -EINVAL;
 	}
 
-	if (!nor->read_id)
-		nor->read_id = spi_nor_read_id;
-	if (!nor->wait_till_ready)
-		nor->wait_till_ready = spi_nor_wait_till_ready;
-
 	return 0;
 }
 
@@ -926,16 +938,24 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (ret)
 		return ret;
 
-	id = spi_nor_match_id(name);
-	if (!id)
+	/* Try to auto-detect if chip name wasn't specified */
+	if (!name)
+		id = spi_nor_read_id(nor);
+	else
+		id = spi_nor_match_id(name);
+	if (IS_ERR_OR_NULL(id))
 		return -ENOENT;
 
 	info = (void *)id->driver_data;
 
-	if (info->jedec_id) {
+	/*
+	 * If caller has specified name of flash model that can normally be
+	 * detected using JEDEC, let's verify it.
+	 */
+	if (name && info->id_len) {
 		const struct spi_device_id *jid;
 
-		jid = nor->read_id(nor);
+		jid = spi_nor_read_id(nor);
 		if (IS_ERR(jid)) {
 			return PTR_ERR(jid);
 		} else if (jid != id) {
@@ -960,9 +980,9 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	 * up with the software protection bits set
 	 */
 
-	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ATMEL ||
-	    JEDEC_MFR(info->jedec_id) == CFI_MFR_INTEL ||
-	    JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) {
+	if (JEDEC_MFR(info) == CFI_MFR_ATMEL ||
+	    JEDEC_MFR(info) == CFI_MFR_INTEL ||
+	    JEDEC_MFR(info) == CFI_MFR_SST) {
 		write_enable(nor);
 		write_sr(nor, 0);
 	}
@@ -977,7 +997,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	mtd->_read = spi_nor_read;
 
 	/* nor protection support for STmicro chips */
-	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) {
+	if (JEDEC_MFR(info) == CFI_MFR_ST) {
 		mtd->_lock = spi_nor_lock;
 		mtd->_unlock = spi_nor_unlock;
 	}
@@ -988,9 +1008,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	else
 		mtd->_write = spi_nor_write;
 
-	if ((info->flags & USE_FSR) &&
-	    nor->wait_till_ready == spi_nor_wait_till_ready)
-		nor->wait_till_ready = spi_nor_wait_till_fsr_ready;
+	if (info->flags & USE_FSR)
+		nor->flags |= SNOR_F_USE_FSR;
 
 #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
 	/* prefer "small sector" erase if possible */
@@ -1031,7 +1050,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 
 	/* Quad/Dual-read mode takes precedence over fast/normal */
 	if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) {
-		ret = set_quad_mode(nor, info->jedec_id);
+		ret = set_quad_mode(nor, info);
 		if (ret) {
 			dev_err(dev, "quad mode not supported\n");
 			return ret;
@@ -1067,7 +1086,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	else if (mtd->size > 0x1000000) {
 		/* enable 4-byte addressing if the device exceeds 16MiB */
 		nor->addr_width = 4;
-		if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) {
+		if (JEDEC_MFR(info) == CFI_MFR_AMD) {
 			/* Dedicated 4-byte command set */
 			switch (nor->flash_read) {
 			case SPI_NOR_QUAD:
@@ -1088,7 +1107,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 			nor->erase_opcode = SPINOR_OP_SE_4B;
 			mtd->erasesize = info->sector_size;
 		} else
-			set_4byte(nor, info->jedec_id, 1);
+			set_4byte(nor, info, 1);
 	} else {
 		nor->addr_width = 3;
 	}
diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c
index dc4f960..5e06118 100644
--- a/drivers/mtd/tests/oobtest.c
+++ b/drivers/mtd/tests/oobtest.c
@@ -34,8 +34,11 @@
 #include "mtd_test.h"
 
 static int dev = -EINVAL;
+static int bitflip_limit;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
+module_param(bitflip_limit, int, S_IRUGO);
+MODULE_PARM_DESC(bitflip_limit, "Max. allowed bitflips per page");
 
 static struct mtd_info *mtd;
 static unsigned char *readbuf;
@@ -115,12 +118,36 @@ static int write_whole_device(void)
 	return 0;
 }
 
+/*
+ * Display the address, offset and data bytes at comparison failure.
+ * Return number of bitflips encountered.
+ */
+static size_t memcmpshow(loff_t addr, const void *cs, const void *ct, size_t count)
+{
+	const unsigned char *su1, *su2;
+	int res;
+	size_t i = 0;
+	size_t bitflips = 0;
+
+	for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--, i++) {
+		res = *su1 ^ *su2;
+		if (res) {
+			pr_info("error @addr[0x%lx:0x%zx] 0x%x -> 0x%x diff 0x%x\n",
+				(unsigned long)addr, i, *su1, *su2, res);
+			bitflips += hweight8(res);
+		}
+	}
+
+	return bitflips;
+}
+
 static int verify_eraseblock(int ebnum)
 {
 	int i;
 	struct mtd_oob_ops ops;
 	int err = 0;
 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
+	size_t bitflips;
 
 	prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt);
 	for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
@@ -139,8 +166,11 @@ static int verify_eraseblock(int ebnum)
 			errcnt += 1;
 			return err ? err : -1;
 		}
-		if (memcmp(readbuf, writebuf + (use_len_max * i) + use_offset,
-			   use_len)) {
+
+		bitflips = memcmpshow(addr, readbuf,
+				      writebuf + (use_len_max * i) + use_offset,
+				      use_len);
+		if (bitflips > bitflip_limit) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
@@ -148,7 +178,10 @@ static int verify_eraseblock(int ebnum)
 				pr_err("error: too many errors\n");
 				return -1;
 			}
+		} else if (bitflips) {
+			pr_info("ignoring error as within bitflip_limit\n");
 		}
+
 		if (use_offset != 0 || use_len < mtd->ecclayout->oobavail) {
 			int k;
 
@@ -167,9 +200,10 @@ static int verify_eraseblock(int ebnum)
 				errcnt += 1;
 				return err ? err : -1;
 			}
-			if (memcmp(readbuf + use_offset,
-				   writebuf + (use_len_max * i) + use_offset,
-				   use_len)) {
+			bitflips = memcmpshow(addr, readbuf + use_offset,
+					      writebuf + (use_len_max * i) + use_offset,
+					      use_len);
+			if (bitflips > bitflip_limit) {
 				pr_err("error: verify failed at %#llx\n",
 						(long long)addr);
 				errcnt += 1;
@@ -177,7 +211,10 @@ static int verify_eraseblock(int ebnum)
 					pr_err("error: too many errors\n");
 					return -1;
 				}
+			} else if (bitflips) {
+				pr_info("ignoring error as within bitflip_limit\n");
 			}
+
 			for (k = 0; k < use_offset; ++k)
 				if (readbuf[k] != 0xff) {
 					pr_err("error: verify 0xff "
@@ -216,6 +253,9 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	int err = 0;
 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
 	size_t len = mtd->ecclayout->oobavail * pgcnt;
+	size_t oobavail = mtd->ecclayout->oobavail;
+	size_t bitflips;
+	int i;
 
 	prandom_bytes_state(&rnd_state, writebuf, len);
 	ops.mode      = MTD_OPS_AUTO_OOB;
@@ -226,6 +266,8 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	ops.ooboffs   = 0;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = readbuf;
+
+	/* read entire block's OOB at one go */
 	err = mtd_read_oob(mtd, addr, &ops);
 	if (err || ops.oobretlen != len) {
 		pr_err("error: readoob failed at %#llx\n",
@@ -233,13 +275,21 @@ static int verify_eraseblock_in_one_go(int ebnum)
 		errcnt += 1;
 		return err ? err : -1;
 	}
-	if (memcmp(readbuf, writebuf, len)) {
-		pr_err("error: verify failed at %#llx\n",
-		       (long long)addr);
-		errcnt += 1;
-		if (errcnt > 1000) {
-			pr_err("error: too many errors\n");
-			return -1;
+
+	/* verify one page OOB at a time for bitflip per page limit check */
+	for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
+		bitflips = memcmpshow(addr, readbuf + (i * oobavail),
+				      writebuf + (i * oobavail), oobavail);
+		if (bitflips > bitflip_limit) {
+			pr_err("error: verify failed at %#llx\n",
+			       (long long)addr);
+			errcnt += 1;
+			if (errcnt > 1000) {
+				pr_err("error: too many errors\n");
+				return -1;
+			}
+		} else if (bitflips) {
+			pr_info("ignoring error as within bitflip_limit\n");
 		}
 	}
 
@@ -610,7 +660,8 @@ static int __init mtd_oobtest_init(void)
 		err = mtd_read_oob(mtd, addr, &ops);
 		if (err)
 			goto out;
-		if (memcmp(readbuf, writebuf, mtd->ecclayout->oobavail * 2)) {
+		if (memcmpshow(addr, readbuf, writebuf,
+			       mtd->ecclayout->oobavail * 2)) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
diff --git a/drivers/mtd/tests/torturetest.c b/drivers/mtd/tests/torturetest.c
index eeab969..b55bc52 100644
--- a/drivers/mtd/tests/torturetest.c
+++ b/drivers/mtd/tests/torturetest.c
@@ -264,7 +264,9 @@ static int __init tort_init(void)
 		int i;
 		void *patt;
 
-		mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt);
+		err = mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt);
+		if (err)
+			goto out;
 
 		/* Check if the eraseblocks contain only 0xFF bytes */
 		if (check) {
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 7cf8f4a..48e62a3 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -59,7 +59,7 @@ config NET_DSA_BCM_SF2
 	depends on HAS_IOMEM
 	select NET_DSA
 	select NET_DSA_TAG_BRCM
-	select FIXED_PHY if NET_DSA_BCM_SF2=y
+	select FIXED_PHY
 	select BCM7XXX_PHY
 	select MDIO_BCM_UNIMAC
 	---help---
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 888247a..41a3c98 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -64,7 +64,7 @@ config BCMGENET
 	tristate "Broadcom GENET internal MAC support"
 	select MII
 	select PHYLIB
-	select FIXED_PHY if BCMGENET=y
+	select FIXED_PHY
 	select BCM7XXX_PHY
 	help
 	  This driver supports the built-in Ethernet MACs found in the
@@ -155,7 +155,7 @@ config SYSTEMPORT
 	depends on OF
 	select MII
 	select PHYLIB
-	select FIXED_PHY if SYSTEMPORT=y
+	select FIXED_PHY
 	help
 	  This driver supports the built-in Ethernet MACs found in the
 	  Broadcom BCM7xxx Set Top Box family chipset using an internal
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 691f0bf..9f5e387 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13256,7 +13256,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 		return -EFAULT;
 	}
 
-	DP(BNX2X_MSG_PTP, "Configrued val = %d, period = %d\n", best_val,
+	DP(BNX2X_MSG_PTP, "Configured val = %d, period = %d\n", best_val,
 	   best_period);
 
 	return 0;
@@ -14784,7 +14784,7 @@ static int bnx2x_hwtstamp_ioctl(struct bnx2x *bp, struct ifreq *ifr)
 		-EFAULT : 0;
 }
 
-/* Configrues HW for PTP */
+/* Configures HW for PTP */
 static int bnx2x_configure_ptp(struct bnx2x *bp)
 {
 	int rc, port = BP_PORT(bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index b0779d7..6fe547c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -7549,7 +7549,7 @@ Theotherbitsarereservedandshouldbezero*/
 #define IGU_REG_SISR_MDPC_WOMASK_UPPER		0x05a6
 
 #define IGU_REG_RESERVED_UPPER				0x05ff
-/* Fields of IGU PF CONFIGRATION REGISTER */
+/* Fields of IGU PF CONFIGURATION REGISTER */
 #define IGU_PF_CONF_FUNC_EN	  (0x1<<0)  /* function enable	      */
 #define IGU_PF_CONF_MSI_MSIX_EN   (0x1<<1)  /* MSI/MSIX enable	      */
 #define IGU_PF_CONF_INT_LINE_EN   (0x1<<2)  /* INT enable	      */
@@ -7557,7 +7557,7 @@ Theotherbitsarereservedandshouldbezero*/
 #define IGU_PF_CONF_SINGLE_ISR_EN (0x1<<4)  /* single ISR mode enable */
 #define IGU_PF_CONF_SIMD_MODE	  (0x1<<5)  /* simd all ones mode     */
 
-/* Fields of IGU VF CONFIGRATION REGISTER */
+/* Fields of IGU VF CONFIGURATION REGISTER */
 #define IGU_VF_CONF_FUNC_EN	   (0x1<<0)  /* function enable        */
 #define IGU_VF_CONF_MSI_MSIX_EN    (0x1<<1)  /* MSI/MSIX enable        */
 #define IGU_VF_CONF_PARENT_MASK    (0x3<<2)  /* Parent PF	       */
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 06dea3d..3767271 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2160,7 +2160,7 @@ static int __init macb_probe(struct platform_device *pdev)
 	int err = -ENXIO;
 	const char *mac;
 	void __iomem *mem;
-	unsigned int hw_q, queue_mask, q, num_queues, q_irq = 0;
+	unsigned int hw_q, queue_mask, q, num_queues;
 	struct clk *pclk, *hclk, *tx_clk;
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2235,11 +2235,11 @@ static int __init macb_probe(struct platform_device *pdev)
 	 * register mapping but we don't want to test the queue index then
 	 * compute the corresponding register offset at run time.
 	 */
-	for (hw_q = 0; hw_q < MACB_MAX_QUEUES; ++hw_q) {
+	for (hw_q = 0, q = 0; hw_q < MACB_MAX_QUEUES; ++hw_q) {
 		if (!(queue_mask & (1 << hw_q)))
 			continue;
 
-		queue = &bp->queues[q_irq];
+		queue = &bp->queues[q];
 		queue->bp = bp;
 		if (hw_q) {
 			queue->ISR  = GEM_ISR(hw_q - 1);
@@ -2261,18 +2261,18 @@ static int __init macb_probe(struct platform_device *pdev)
 		 * must remove the optional gaps that could exist in the
 		 * hardware queue mask.
 		 */
-		queue->irq = platform_get_irq(pdev, q_irq);
+		queue->irq = platform_get_irq(pdev, q);
 		err = devm_request_irq(&pdev->dev, queue->irq, macb_interrupt,
 				       0, dev->name, queue);
 		if (err) {
 			dev_err(&pdev->dev,
 				"Unable to request IRQ %d (error %d)\n",
 				queue->irq, err);
-			goto err_out_free_irq;
+			goto err_out_free_netdev;
 		}
 
 		INIT_WORK(&queue->tx_error_task, macb_tx_error_task);
-		q_irq++;
+		q++;
 	}
 	dev->irq = bp->queues[0].irq;
 
@@ -2350,7 +2350,7 @@ static int __init macb_probe(struct platform_device *pdev)
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-		goto err_out_free_irq;
+		goto err_out_free_netdev;
 	}
 
 	err = macb_mii_init(bp);
@@ -2373,9 +2373,7 @@ static int __init macb_probe(struct platform_device *pdev)
 
 err_out_unregister_netdev:
 	unregister_netdev(dev);
-err_out_free_irq:
-	for (q = 0, queue = bp->queues; q < q_irq; ++q, ++queue)
-		devm_free_irq(&pdev->dev, queue->irq, queue);
+err_out_free_netdev:
 	free_netdev(dev);
 err_out_disable_clocks:
 	if (!IS_ERR(tx_clk))
@@ -2392,8 +2390,6 @@ static int __exit macb_remove(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct macb *bp;
-	struct macb_queue *queue;
-	unsigned int q;
 
 	dev = platform_get_drvdata(pdev);
 
@@ -2405,14 +2401,11 @@ static int __exit macb_remove(struct platform_device *pdev)
 		kfree(bp->mii_bus->irq);
 		mdiobus_free(bp->mii_bus);
 		unregister_netdev(dev);
-		queue = bp->queues;
-		for (q = 0; q < bp->num_queues; ++q, ++queue)
-			devm_free_irq(&pdev->dev, queue->irq, queue);
-		free_netdev(dev);
 		if (!IS_ERR(bp->tx_clk))
 			clk_disable_unprepare(bp->tx_clk);
 		clk_disable_unprepare(bp->hclk);
 		clk_disable_unprepare(bp->pclk);
+		free_netdev(dev);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 28d0415..c132d90 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2376,7 +2376,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type)
 		"KR/KX",
 		"KR/KX/KX4",
 		"R QSFP_10G",
-		"",
+		"R QSA",
 		"R QSFP",
 		"R BP40_BA",
 	};
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 291b6f2..7c0aec8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -2470,8 +2470,8 @@ enum fw_port_type {
 	FW_PORT_TYPE_BP_AP,
 	FW_PORT_TYPE_BP4_AP,
 	FW_PORT_TYPE_QSFP_10G,
-	FW_PORT_TYPE_QSFP,
 	FW_PORT_TYPE_QSA,
+	FW_PORT_TYPE_QSFP,
 	FW_PORT_TYPE_BP40_BA,
 
 	FW_PORT_TYPE_NONE = FW_PORT_CMD_PTYPE_M
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index b242792..d1c025f 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -60,6 +60,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/in.h>
+#include <linux/jiffies.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
@@ -238,13 +239,13 @@ writereg(struct net_device *dev, u16 regno, u16 value)
 static int __init
 wait_eeprom_ready(struct net_device *dev)
 {
-	int timeout = jiffies;
+	unsigned long timeout = jiffies;
 	/* check to see if the EEPROM is ready,
 	 * a timeout is used just in case EEPROM is ready when
 	 * SI_BUSY in the PP_SelfST is clear
 	 */
 	while (readreg(dev, PP_SelfST) & SI_BUSY)
-		if (jiffies - timeout >= 40)
+		if (time_after_eq(jiffies, timeout + 40))
 			return -1;
 	return 0;
 }
@@ -485,7 +486,7 @@ control_dc_dc(struct net_device *dev, int on_not_off)
 {
 	struct net_local *lp = netdev_priv(dev);
 	unsigned int selfcontrol;
-	int timenow = jiffies;
+	unsigned long timenow = jiffies;
 	/* control the DC to DC convertor in the SelfControl register.
 	 * Note: This is hooked up to a general purpose pin, might not
 	 * always be a DC to DC convertor.
@@ -499,7 +500,7 @@ control_dc_dc(struct net_device *dev, int on_not_off)
 	writereg(dev, PP_SelfCTL, selfcontrol);
 
 	/* Wait for the DC/DC converter to power up - 500ms */
-	while (jiffies - timenow < HZ)
+	while (time_before(jiffies, timenow + HZ))
 		;
 }
 
@@ -514,7 +515,7 @@ send_test_pkt(struct net_device *dev)
 		0, 0,		/* DSAP=0 & SSAP=0 fields */
 		0xf3, 0		/* Control (Test Req + P bit set) */
 	};
-	long timenow = jiffies;
+	unsigned long timenow = jiffies;
 
 	writereg(dev, PP_LineCTL, readreg(dev, PP_LineCTL) | SERIAL_TX_ON);
 
@@ -525,10 +526,10 @@ send_test_pkt(struct net_device *dev)
 	iowrite16(ETH_ZLEN, lp->virt_addr + TX_LEN_PORT);
 
 	/* Test to see if the chip has allocated memory for the packet */
-	while (jiffies - timenow < 5)
+	while (time_before(jiffies, timenow + 5))
 		if (readreg(dev, PP_BusST) & READY_FOR_TX_NOW)
 			break;
-	if (jiffies - timenow >= 5)
+	if (time_after_eq(jiffies, timenow + 5))
 		return 0;	/* this shouldn't happen */
 
 	/* Write the contents of the packet */
@@ -536,7 +537,7 @@ send_test_pkt(struct net_device *dev)
 
 	cs89_dbg(1, debug, "Sending test packet ");
 	/* wait a couple of jiffies for packet to be received */
-	for (timenow = jiffies; jiffies - timenow < 3;)
+	for (timenow = jiffies; time_before(jiffies, timenow + 3);)
 		;
 	if ((readreg(dev, PP_TxEvent) & TX_SEND_OK_BITS) == TX_OK) {
 		cs89_dbg(1, cont, "succeeded\n");
@@ -556,7 +557,7 @@ static int
 detect_tp(struct net_device *dev)
 {
 	struct net_local *lp = netdev_priv(dev);
-	int timenow = jiffies;
+	unsigned long timenow = jiffies;
 	int fdx;
 
 	cs89_dbg(1, debug, "%s: Attempting TP\n", dev->name);
@@ -574,7 +575,7 @@ detect_tp(struct net_device *dev)
 	/* Delay for the hardware to work out if the TP cable is present
 	 * - 150ms
 	 */
-	for (timenow = jiffies; jiffies - timenow < 15;)
+	for (timenow = jiffies; time_before(jiffies, timenow + 15);)
 		;
 	if ((readreg(dev, PP_LineST) & LINK_OK) == 0)
 		return DETECTED_NONE;
@@ -618,7 +619,7 @@ detect_tp(struct net_device *dev)
 		if ((lp->auto_neg_cnf & AUTO_NEG_BITS) == AUTO_NEG_ENABLE) {
 			pr_info("%s: negotiating duplex...\n", dev->name);
 			while (readreg(dev, PP_AutoNegST) & AUTO_NEG_BUSY) {
-				if (jiffies - timenow > 4000) {
+				if (time_after(jiffies, timenow + 4000)) {
 					pr_err("**** Full / half duplex auto-negotiation timed out ****\n");
 					break;
 				}
@@ -1271,7 +1272,7 @@ static void __init reset_chip(struct net_device *dev)
 {
 #if !defined(CONFIG_MACH_MX31ADS)
 	struct net_local *lp = netdev_priv(dev);
-	int reset_start_time;
+	unsigned long reset_start_time;
 
 	writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
 
@@ -1294,7 +1295,7 @@ static void __init reset_chip(struct net_device *dev)
 	/* Wait until the chip is reset */
 	reset_start_time = jiffies;
 	while ((readreg(dev, PP_SelfST) & INIT_DONE) == 0 &&
-	       jiffies - reset_start_time < 2)
+	       time_before(jiffies, reset_start_time + 2))
 		;
 #endif /* !CONFIG_MACH_MX31ADS */
 }
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2aacd47..1960731 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3138,6 +3138,7 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter)
 
 	netdev->hw_enc_features = 0;
 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
+	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
 }
 #endif
 
@@ -4429,6 +4430,7 @@ static void be_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
 				   NETIF_F_TSO | NETIF_F_TSO6 |
 				   NETIF_F_GSO_UDP_TUNNEL;
 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
 
 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
 		 be16_to_cpu(port));
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index ebf76c4..5ebdf8d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1558,20 +1558,21 @@ fec_enet_interrupt(int irq, void *dev_id)
 {
 	struct net_device *ndev = dev_id;
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	const unsigned napi_mask = FEC_ENET_RXF | FEC_ENET_TXF;
 	uint int_events;
 	irqreturn_t ret = IRQ_NONE;
 
 	int_events = readl(fep->hwp + FEC_IEVENT);
-	writel(int_events & ~napi_mask, fep->hwp + FEC_IEVENT);
+	writel(int_events, fep->hwp + FEC_IEVENT);
 	fec_enet_collect_events(fep, int_events);
 
-	if (int_events & napi_mask) {
+	if (fep->work_tx || fep->work_rx) {
 		ret = IRQ_HANDLED;
 
-		/* Disable the NAPI interrupts */
-		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
-		napi_schedule(&fep->napi);
+		if (napi_schedule_prep(&fep->napi)) {
+			/* Disable the NAPI interrupts */
+			writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
+			__napi_schedule(&fep->napi);
+		}
 	}
 
 	if (int_events & FEC_ENET_MII) {
@@ -1591,12 +1592,6 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	int pkts;
 
-	/*
-	 * Clear any pending transmit or receive interrupts before
-	 * processing the rings to avoid racing with the hardware.
-	 */
-	writel(FEC_ENET_RXF | FEC_ENET_TXF, fep->hwp + FEC_IEVENT);
-
 	pkts = fec_enet_rx(ndev, budget);
 
 	fec_enet_tx(ndev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0a7ea4c..a5f2660 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7549,6 +7549,11 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
 		return -EOPNOTSUPP;
 
+	if (vid) {
+		pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+		return -EINVAL;
+	}
+
 	/* Hardware does not support aging addresses so if a
 	 * ndm_state is given only allow permanent addresses
 	 */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 6ff214d..190cbd9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1569,8 +1569,15 @@ int mlx4_en_start_port(struct net_device *dev)
 			mlx4_en_free_affinity_hint(priv, i);
 			goto cq_err;
 		}
-		for (j = 0; j < cq->size; j++)
-			cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+
+		for (j = 0; j < cq->size; j++) {
+			struct mlx4_cqe *cqe = NULL;
+
+			cqe = mlx4_en_get_cqe(cq->buf, j, priv->cqe_size) +
+			      priv->cqe_factor;
+			cqe->owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+		}
+
 		err = mlx4_en_set_cq_moder(priv, cq);
 		if (err) {
 			en_err(priv, "Failed setting cq moderation parameters\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index ef3b95b..982861d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -787,11 +787,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size))
 			field = 3;
 		dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
-		mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
-			 dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
 	} else {
 		dev_cap->bf_reg_size = 0;
-		mlx4_dbg(dev, "BlueFlame not available\n");
 	}
 
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET);
@@ -902,9 +899,6 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			goto out;
 	}
 
-	mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
-		 dev_cap->bmme_flags, dev_cap->reserved_lkey);
-
 	/*
 	 * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
 	 * we can't use any EQs whose doorbell falls on that page,
@@ -916,6 +910,21 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	else
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS;
 
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+	if (dev_cap->bf_reg_size > 0)
+		mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
+			 dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
+	else
+		mlx4_dbg(dev, "BlueFlame not available\n");
+
+	mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
+		 dev_cap->bmme_flags, dev_cap->reserved_lkey);
 	mlx4_dbg(dev, "Max ICM size %lld MB\n",
 		 (unsigned long long) dev_cap->max_icm_sz >> 20);
 	mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
@@ -949,13 +958,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 dev_cap->dmfs_high_rate_qpn_base);
 	mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n",
 		 dev_cap->dmfs_high_rate_qpn_range);
-
 	dump_dev_cap_flags(dev, dev_cap->flags);
 	dump_dev_cap_flags2(dev, dev_cap->flags2);
-
-out:
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
 }
 
 int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap)
@@ -1848,8 +1852,8 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
 	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
 	if (byte_field) {
-		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
-		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+		param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED;
+		param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED;
 		param->cqe_size = 1 << ((byte_field &
 					 MLX4_CQE_SIZE_MASK_STRIDE) + 5);
 		param->eqe_size = 1 << (((byte_field &
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 794e282..62562b6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -224,6 +224,7 @@ struct mlx4_set_ib_param {
 	u32 cap_mask;
 };
 
+void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
 int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
 int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap);
 int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index e25436b..943cbd4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -171,9 +171,9 @@ int mlx4_check_port_params(struct mlx4_dev *dev,
 {
 	int i;
 
-	for (i = 0; i < dev->caps.num_ports - 1; i++) {
-		if (port_type[i] != port_type[i + 1]) {
-			if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+		for (i = 0; i < dev->caps.num_ports - 1; i++) {
+			if (port_type[i] != port_type[i + 1]) {
 				mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
 				return -EINVAL;
 			}
@@ -305,6 +305,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 		return err;
 	}
+	mlx4_dev_cap_dump(dev, dev_cap);
 
 	if (dev_cap->min_page_sz > PAGE_SIZE) {
 		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
@@ -2488,41 +2489,42 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
 			     u8 total_vfs, int existing_vfs)
 {
 	u64 dev_flags = dev->flags;
+	int err = 0;
 
-	dev->dev_vfs = kzalloc(
-			total_vfs * sizeof(*dev->dev_vfs),
-			GFP_KERNEL);
+	atomic_inc(&pf_loading);
+	if (dev->flags &  MLX4_FLAG_SRIOV) {
+		if (existing_vfs != total_vfs) {
+			mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
+				 existing_vfs, total_vfs);
+			total_vfs = existing_vfs;
+		}
+	}
+
+	dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
 	if (NULL == dev->dev_vfs) {
 		mlx4_err(dev, "Failed to allocate memory for VFs\n");
 		goto disable_sriov;
-	} else if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
-		int err = 0;
-
-		atomic_inc(&pf_loading);
-		if (existing_vfs) {
-			if (existing_vfs != total_vfs)
-				mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
-					 existing_vfs, total_vfs);
-		} else {
-			mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
-			err = pci_enable_sriov(pdev, total_vfs);
-		}
-		if (err) {
-			mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
-				 err);
-			atomic_dec(&pf_loading);
-			goto disable_sriov;
-		} else {
-			mlx4_warn(dev, "Running in master mode\n");
-			dev_flags |= MLX4_FLAG_SRIOV |
-				MLX4_FLAG_MASTER;
-			dev_flags &= ~MLX4_FLAG_SLAVE;
-			dev->num_vfs = total_vfs;
-		}
+	}
+
+	if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
+		mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
+		err = pci_enable_sriov(pdev, total_vfs);
+	}
+	if (err) {
+		mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
+			 err);
+		goto disable_sriov;
+	} else {
+		mlx4_warn(dev, "Running in master mode\n");
+		dev_flags |= MLX4_FLAG_SRIOV |
+			MLX4_FLAG_MASTER;
+		dev_flags &= ~MLX4_FLAG_SLAVE;
+		dev->num_vfs = total_vfs;
 	}
 	return dev_flags;
 
 disable_sriov:
+	atomic_dec(&pf_loading);
 	dev->num_vfs = 0;
 	kfree(dev->dev_vfs);
 	return dev_flags & ~MLX4_FLAG_MASTER;
@@ -2606,8 +2608,10 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 		}
 
 		if (total_vfs) {
-			existing_vfs = pci_num_vf(pdev);
 			dev->flags = MLX4_FLAG_MASTER;
+			existing_vfs = pci_num_vf(pdev);
+			if (existing_vfs)
+				dev->flags |= MLX4_FLAG_SRIOV;
 			dev->num_vfs = total_vfs;
 		}
 	}
@@ -2643,6 +2647,7 @@ slave_start:
 	}
 
 	if (mlx4_is_master(dev)) {
+		/* when we hit the goto slave_start below, dev_cap already initialized */
 		if (!dev_cap) {
 			dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
 
@@ -2849,6 +2854,7 @@ slave_start:
 	if (mlx4_is_master(dev) && dev->num_vfs)
 		atomic_dec(&pf_loading);
 
+	kfree(dev_cap);
 	return 0;
 
 err_port:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ab68446..da82991 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -157,6 +157,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_CMD";
 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+	case MLX5_EVENT_TYPE_PAGE_FAULT:
+		return "MLX5_EVENT_TYPE_PAGE_FAULT";
 	default:
 		return "Unrecognized event";
 	}
@@ -279,6 +281,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 			}
 			break;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+		case MLX5_EVENT_TYPE_PAGE_FAULT:
+			mlx5_eq_pagefault(dev, eqe);
+			break;
+#endif
 
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
@@ -446,8 +453,12 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
 int mlx5_start_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	u32 async_event_mask = MLX5_ASYNC_EVENT_MASK;
 	int err;
 
+	if (dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
@@ -459,7 +470,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	mlx5_cmd_use_events(dev);
 
 	err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
-				 MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK,
+				 MLX5_NUM_ASYNC_EQE, async_event_mask,
 				 "mlx5_async_eq", &dev->priv.uuari.uars[0]);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 087c4c7..06f9036 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -69,6 +69,46 @@ int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps)
 	return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR);
 }
 
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps)
+{
+	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *out;
+	int err;
+
+	if (!(dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
+		return -ENOTSUPP;
+
+	memset(in, 0, sizeof(in));
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod, HCA_CAP_OPMOD_GET_ODP_CUR);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+	if (err)
+		goto out;
+
+	err = mlx5_cmd_status_to_err_v2(out);
+	if (err) {
+		mlx5_core_warn(dev, "query cur hca ODP caps failed, %d\n", err);
+		goto out;
+	}
+
+	memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct),
+	       sizeof(*caps));
+
+	mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n",
+		be32_to_cpu(caps->per_transport_caps.rc_odp_caps),
+		be32_to_cpu(caps->per_transport_caps.uc_odp_caps),
+		be32_to_cpu(caps->per_transport_caps.ud_odp_caps));
+
+out:
+	kfree(out);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_query_odp_caps);
+
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd_init_hca_mbox_in in;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 5261a2b..575d853 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -88,6 +88,95 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
 	mlx5_core_put_rsc(common);
 }
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+	struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
+	int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
+	struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
+	struct mlx5_core_qp *qp =
+		container_of(common, struct mlx5_core_qp, common);
+	struct mlx5_pagefault pfault;
+
+	if (!qp) {
+		mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
+			       qpn);
+		return;
+	}
+
+	pfault.event_subtype = eqe->sub_type;
+	pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
+		(MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
+	pfault.bytes_committed = be32_to_cpu(
+		pf_eqe->bytes_committed);
+
+	mlx5_core_dbg(dev,
+		      "PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
+		      eqe->sub_type, pfault.flags);
+
+	switch (eqe->sub_type) {
+	case MLX5_PFAULT_SUBTYPE_RDMA:
+		/* RDMA based event */
+		pfault.rdma.r_key =
+			be32_to_cpu(pf_eqe->rdma.r_key);
+		pfault.rdma.packet_size =
+			be16_to_cpu(pf_eqe->rdma.packet_length);
+		pfault.rdma.rdma_op_len =
+			be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+		pfault.rdma.rdma_va =
+			be64_to_cpu(pf_eqe->rdma.rdma_va);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
+			      qpn, pfault.rdma.r_key);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: rdma_op_len: 0x%08x,\n",
+			      pfault.rdma.rdma_op_len);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: rdma_va: 0x%016llx,\n",
+			      pfault.rdma.rdma_va);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
+			      pfault.bytes_committed);
+		break;
+
+	case MLX5_PFAULT_SUBTYPE_WQE:
+		/* WQE based event */
+		pfault.wqe.wqe_index =
+			be16_to_cpu(pf_eqe->wqe.wqe_index);
+		pfault.wqe.packet_size =
+			be16_to_cpu(pf_eqe->wqe.packet_length);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
+			      qpn, pfault.wqe.wqe_index);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
+			      pfault.bytes_committed);
+		break;
+
+	default:
+		mlx5_core_warn(dev,
+			       "Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
+			       eqe->sub_type, qpn);
+		/* Unsupported page faults should still be resolved by the
+		 * page fault handler
+		 */
+	}
+
+	if (qp->pfault_handler) {
+		qp->pfault_handler(qp, &pfault);
+	} else {
+		mlx5_core_err(dev,
+			      "ODP event for QP %08x, without a fault handler in QP\n",
+			      qpn);
+		/* Page fault will remain unresolved. QP will hang until it is
+		 * destroyed
+		 */
+	}
+
+	mlx5_core_put_rsc(common);
+}
+#endif
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
 			struct mlx5_create_qp_mbox_in *in,
@@ -322,3 +411,33 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+				u8 flags, int error)
+{
+	struct mlx5_page_fault_resume_mbox_in in;
+	struct mlx5_page_fault_resume_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME);
+	in.hdr.opmod = 0;
+	flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR |
+		  MLX5_PAGE_FAULT_RESUME_WRITE	   |
+		  MLX5_PAGE_FAULT_RESUME_RDMA);
+	flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0);
+	in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) |
+				   (flags << MLX5_QPN_BITS));
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
+#endif
diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig
index 6279268..9468e64 100644
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig
@@ -39,7 +39,7 @@ config SMC91X
 	select CRC32
 	select MII
 	depends on (ARM || M32R || SUPERH || MIPS || BLACKFIN || \
-		    MN10300 || COLDFIRE || ARM64 || XTENSA || NIOS2)
+		    MN10300 || COLDFIRE || ARM64 || XTENSA || NIOS2) && (!OF || GPIOLIB)
 	---help---
 	  This is a driver for SMC's 91x series of Ethernet chipsets,
 	  including the SMC91C94 and the SMC91C111. Say Y if you want it
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 0e13775..056b358 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -309,16 +309,16 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
 
 	if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) {
 		const char *rs;
-		dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
 
 		err = of_property_read_string(np, "st,tx-retime-src", &rs);
-		if (err < 0)
+		if (err < 0) {
 			dev_warn(dev, "Use internal clock source\n");
-
-		if (!strcasecmp(rs, "clk_125"))
+			dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
+		} else if (!strcasecmp(rs, "clk_125")) {
 			dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
-		else if (!strcasecmp(rs, "txclk"))
+		} else if (!strcasecmp(rs, "txclk")) {
 			dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
+		}
 
 		dwmac->speed = SPEED_1000;
 	}
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 60f7ee5..7df2217 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -46,16 +46,18 @@ struct macvtap_queue {
 	struct list_head next;
 };
 
-#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_VNET_LE | IFF_MULTI_QUEUE)
+#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
+
+#define MACVTAP_VNET_LE 0x80000000
 
 static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
 {
-	return __virtio16_to_cpu(q->flags & IFF_VNET_LE, val);
+	return __virtio16_to_cpu(q->flags & MACVTAP_VNET_LE, val);
 }
 
 static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val)
 {
-	return __cpu_to_virtio16(q->flags & IFF_VNET_LE, val);
+	return __cpu_to_virtio16(q->flags & MACVTAP_VNET_LE, val);
 }
 
 static struct proto macvtap_proto = {
@@ -999,7 +1001,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
 	void __user *argp = (void __user *)arg;
 	struct ifreq __user *ifr = argp;
 	unsigned int __user *up = argp;
-	unsigned int u;
+	unsigned short u;
 	int __user *sp = argp;
 	int s;
 	int ret;
@@ -1014,7 +1016,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
 		if ((u & ~MACVTAP_FEATURES) != (IFF_NO_PI | IFF_TAP))
 			ret = -EINVAL;
 		else
-			q->flags = u;
+			q->flags = (q->flags & ~MACVTAP_FEATURES) | u;
 
 		return ret;
 
@@ -1027,8 +1029,9 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
 		}
 
 		ret = 0;
+		u = q->flags;
 		if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) ||
-		    put_user(q->flags, &ifr->ifr_flags))
+		    put_user(u, &ifr->ifr_flags))
 			ret = -EFAULT;
 		macvtap_put_vlan(vlan);
 		rtnl_unlock();
@@ -1069,6 +1072,21 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
 		q->vnet_hdr_sz = s;
 		return 0;
 
+	case TUNGETVNETLE:
+		s = !!(q->flags & MACVTAP_VNET_LE);
+		if (put_user(s, sp))
+			return -EFAULT;
+		return 0;
+
+	case TUNSETVNETLE:
+		if (get_user(s, sp))
+			return -EFAULT;
+		if (s)
+			q->flags |= MACVTAP_VNET_LE;
+		else
+			q->flags &= ~MACVTAP_VNET_LE;
+		return 0;
+
 	case TUNSETOFFLOAD:
 		/* let the user check for future flags */
 		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index b4b0f80..a3c251b 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -119,8 +119,8 @@ config MICREL_PHY
 	  Supports the KSZ9021, VSC8201, KS8001 PHYs.
 
 config FIXED_PHY
-	bool "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
-	depends on PHYLIB=y
+	tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
+	depends on PHYLIB
 	---help---
 	  Adds the platform "fixed" MDIO Bus to cover the boards that use
 	  PHYs that are not connected to the real MDIO bus.
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index eb3b18b..501ea769 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_BCM87XX_PHY)	+= bcm87xx.o
 obj-$(CONFIG_ICPLUS_PHY)	+= icplus.o
 obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
 obj-$(CONFIG_LSI_ET1011C_PHY)	+= et1011c.o
-obj-$(CONFIG_FIXED_PHY)		+= fixed.o
+obj-$(CONFIG_FIXED_PHY)		+= fixed_phy.o
 obj-$(CONFIG_MDIO_BITBANG)	+= mdio-bitbang.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o
diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed_phy.c
index 3ad0e6e..3ad0e6e 100644
--- a/drivers/net/phy/fixed.c
+++ b/drivers/net/phy/fixed_phy.c
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a5cbf67..8c8dc16 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -110,9 +110,11 @@ do {								\
  * overload it to mean fasync when stored there.
  */
 #define TUN_FASYNC	IFF_ATTACH_QUEUE
+/* High bits in flags field are unused. */
+#define TUN_VNET_LE     0x80000000
 
 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
-		      IFF_VNET_LE | IFF_MULTI_QUEUE)
+		      IFF_MULTI_QUEUE)
 #define GOODCOPY_LEN 128
 
 #define FLT_EXACT_COUNT 8
@@ -208,12 +210,12 @@ struct tun_struct {
 
 static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
 {
-	return __virtio16_to_cpu(tun->flags & IFF_VNET_LE, val);
+	return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val);
 }
 
 static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
 {
-	return __cpu_to_virtio16(tun->flags & IFF_VNET_LE, val);
+	return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val);
 }
 
 static inline u32 tun_hashfn(u32 rxhash)
@@ -1843,6 +1845,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	int sndbuf;
 	int vnet_hdr_sz;
 	unsigned int ifindex;
+	int le;
 	int ret;
 
 	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
@@ -2042,6 +2045,23 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		tun->vnet_hdr_sz = vnet_hdr_sz;
 		break;
 
+	case TUNGETVNETLE:
+		le = !!(tun->flags & TUN_VNET_LE);
+		if (put_user(le, (int __user *)argp))
+			ret = -EFAULT;
+		break;
+
+	case TUNSETVNETLE:
+		if (get_user(le, (int __user *)argp)) {
+			ret = -EFAULT;
+			break;
+		}
+		if (le)
+			tun->flags |= TUN_VNET_LE;
+		else
+			tun->flags &= ~TUN_VNET_LE;
+		break;
+
 	case TUNATTACHFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
index a104d7a..eb8584a 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
@@ -316,7 +316,7 @@ static const u16 xmtfifo_sz[][NFIFO] = {
 static const char * const fifo_names[] = {
 	"AC_BK", "AC_BE", "AC_VI", "AC_VO", "BCMC", "ATIM" };
 #else
-static const char fifo_names[6][0];
+static const char fifo_names[6][1];
 #endif
 
 #ifdef DEBUG
diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index b6ec519..50033aa 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c
@@ -381,18 +381,15 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
 
 	res = pcmcia_read_config_byte(hw_priv->link, CISREG_COR, &old_cor);
 	if (res != 0) {
-		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 1 "
-		       "(%d)\n", res);
+		printk(KERN_DEBUG "%s failed 1 (%d)\n", __func__, res);
 		return;
 	}
-	printk(KERN_DEBUG "prism2_pccard_genesis_sreset: original COR %02x\n",
-		old_cor);
+	printk(KERN_DEBUG "%s: original COR %02x\n", __func__, old_cor);
 
 	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
 				old_cor | COR_SOFT_RESET);
 	if (res != 0) {
-		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 2 "
-		       "(%d)\n", res);
+		printk(KERN_DEBUG "%s failed 2 (%d)\n", __func__, res);
 		return;
 	}
 
@@ -401,8 +398,7 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
 	/* Setup Genesis mode */
 	res = pcmcia_write_config_byte(hw_priv->link, CISREG_CCSR, hcr);
 	if (res != 0) {
-		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 3 "
-		       "(%d)\n", res);
+		printk(KERN_DEBUG "%s failed 3 (%d)\n", __func__, res);
 		return;
 	}
 	mdelay(10);
@@ -410,8 +406,7 @@ static void prism2_pccard_genesis_reset(local_info_t *local, int hcr)
 	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
 				old_cor & ~COR_SOFT_RESET);
 	if (res != 0) {
-		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 4 "
-		       "(%d)\n", res);
+		printk(KERN_DEBUG "%s failed 4 (%d)\n", __func__, res);
 		return;
 	}
 
diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
index d2ec516..5c646d5 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
@@ -955,6 +955,7 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
 	local_save_flags(flags);
 	local_irq_enable();
 
+	rtlhal->fw_ready = false;
 	rtlpriv->intf_ops->disable_aspm(hw);
 	rtstatus = _rtl92ce_init_mac(hw);
 	if (!rtstatus) {
@@ -971,6 +972,7 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
 		goto exit;
 	}
 
+	rtlhal->fw_ready = true;
 	rtlhal->last_hmeboxnum = 0;
 	rtl92c_phy_mac_config(hw);
 	/* because last function modify RCR, so we update
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
index 873363a..5513217 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
@@ -1592,7 +1592,7 @@ void rtl92cu_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val)
 	}
 }
 
-bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb)
+static bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
   /* Currently nothing happens here.
    * Traffic stops after some seconds in WPA2 802.11n mode.
diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
index 9be1061..ba30b0d 100644
--- a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
+++ b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
@@ -2078,8 +2078,7 @@ void rtl8821ae_dm_txpwr_track_set_pwr(struct ieee80211_hw *hw,
 	if (rtldm->tx_rate != 0xFF)
 		tx_rate = rtl8821ae_hw_rate_to_mrate(hw, rtldm->tx_rate);
 
-	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-		 "===>rtl8812ae_dm_txpwr_track_set_pwr\n");
+	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD, "===>%s\n", __func__);
 
 	if (tx_rate != 0xFF) { /* Mimic Modify High Rate BBSwing Limit.*/
 		/*CCK*/
@@ -2128,7 +2127,7 @@ void rtl8821ae_dm_txpwr_track_set_pwr(struct ieee80211_hw *hw,
 
 	if (method == BBSWING) {
 		RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-			 "===>rtl8812ae_dm_txpwr_track_set_pwr\n");
+			 "===>%s\n", __func__);
 		if (rf_path == RF90_PATH_A) {
 			final_swing_idx[RF90_PATH_A] =
 				(rtldm->ofdm_index[RF90_PATH_A] >
@@ -2260,7 +2259,8 @@ void rtl8821ae_dm_txpower_tracking_callback_thermalmeter(
 	rtldm->txpower_trackinginit = true;
 
 	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-		 "===>rtl8812ae_dm_txpower_tracking_callback_thermalmeter,\n pDM_Odm->BbSwingIdxCckBase: %d,pDM_Odm->BbSwingIdxOfdmBase[A]:%d, pDM_Odm->DefaultOfdmIndex: %d\n",
+		 "===>%s,\n pDM_Odm->BbSwingIdxCckBase: %d,pDM_Odm->BbSwingIdxOfdmBase[A]:%d, pDM_Odm->DefaultOfdmIndex: %d\n",
+		 __func__,
 		 rtldm->swing_idx_cck_base,
 		 rtldm->swing_idx_ofdm_base[RF90_PATH_A],
 		 rtldm->default_ofdm_index);
@@ -2539,8 +2539,7 @@ void rtl8821ae_dm_txpower_tracking_callback_thermalmeter(
 		}
 	}
 
-	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-		 "<===rtl8812ae_dm_txpower_tracking_callback_thermalmeter\n");
+	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD, "<===%s\n", __func__);
 }
 
 void rtl8821ae_dm_check_txpower_tracking_thermalmeter(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c
index 73a49b8..07b94ed 100644
--- a/drivers/net/wireless/zd1211rw/zd_chip.c
+++ b/drivers/net/wireless/zd1211rw/zd_chip.c
@@ -129,7 +129,7 @@ int zd_ioread32v_locked(struct zd_chip *chip, u32 *values, const zd_addr_t *addr
 	r = zd_ioread16v_locked(chip, v16, a16, count16);
 	if (r) {
 		dev_dbg_f(zd_chip_dev(chip),
-			  "error: zd_ioread16v_locked. Error number %d\n", r);
+			  "error: %s. Error number %d\n", __func__, r);
 		return r;
 	}
 
@@ -256,8 +256,8 @@ int zd_iowrite32a_locked(struct zd_chip *chip,
 		if (r) {
 			zd_usb_iowrite16v_async_end(&chip->usb, 0);
 			dev_dbg_f(zd_chip_dev(chip),
-				"error _zd_iowrite32v_locked."
-				" Error number %d\n", r);
+				"error _%s. Error number %d\n", __func__,
+				r);
 			return r;
 		}
 	}
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 083ecc9..5f1fda4 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -230,6 +230,8 @@ struct xenvif {
 	 */
 	bool disabled;
 	unsigned long status;
+	unsigned long drain_timeout;
+	unsigned long stall_timeout;
 
 	/* Queues */
 	struct xenvif_queue *queues;
@@ -328,7 +330,7 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id);
 extern bool separate_tx_rx_irq;
 
 extern unsigned int rx_drain_timeout_msecs;
-extern unsigned int rx_drain_timeout_jiffies;
+extern unsigned int rx_stall_timeout_msecs;
 extern unsigned int xenvif_max_queues;
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index a6a32d3..9259a73 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -166,7 +166,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 
 	cb = XENVIF_RX_CB(skb);
-	cb->expires = jiffies + rx_drain_timeout_jiffies;
+	cb->expires = jiffies + vif->drain_timeout;
 
 	xenvif_rx_queue_tail(queue, skb);
 	xenvif_kick_thread(queue);
@@ -414,6 +414,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif->ip_csum = 1;
 	vif->dev = dev;
 	vif->disabled = false;
+	vif->drain_timeout = msecs_to_jiffies(rx_drain_timeout_msecs);
+	vif->stall_timeout = msecs_to_jiffies(rx_stall_timeout_msecs);
 
 	/* Start out with no queues. */
 	vif->queues = NULL;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 4a509f7..908e65e 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -60,14 +60,12 @@ module_param(separate_tx_rx_irq, bool, 0644);
  */
 unsigned int rx_drain_timeout_msecs = 10000;
 module_param(rx_drain_timeout_msecs, uint, 0444);
-unsigned int rx_drain_timeout_jiffies;
 
 /* The length of time before the frontend is considered unresponsive
  * because it isn't providing Rx slots.
  */
-static unsigned int rx_stall_timeout_msecs = 60000;
+unsigned int rx_stall_timeout_msecs = 60000;
 module_param(rx_stall_timeout_msecs, uint, 0444);
-static unsigned int rx_stall_timeout_jiffies;
 
 unsigned int xenvif_max_queues;
 module_param_named(max_queues, xenvif_max_queues, uint, 0644);
@@ -2020,7 +2018,7 @@ static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
 	return !queue->stalled
 		&& prod - cons < XEN_NETBK_RX_SLOTS_MAX
 		&& time_after(jiffies,
-			      queue->last_rx_time + rx_stall_timeout_jiffies);
+			      queue->last_rx_time + queue->vif->stall_timeout);
 }
 
 static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
@@ -2038,8 +2036,9 @@ static bool xenvif_have_rx_work(struct xenvif_queue *queue)
 {
 	return (!skb_queue_empty(&queue->rx_queue)
 		&& xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
-		|| xenvif_rx_queue_stalled(queue)
-		|| xenvif_rx_queue_ready(queue)
+		|| (queue->vif->stall_timeout &&
+		    (xenvif_rx_queue_stalled(queue)
+		     || xenvif_rx_queue_ready(queue)))
 		|| kthread_should_stop()
 		|| queue->vif->disabled;
 }
@@ -2092,6 +2091,9 @@ int xenvif_kthread_guest_rx(void *data)
 	struct xenvif_queue *queue = data;
 	struct xenvif *vif = queue->vif;
 
+	if (!vif->stall_timeout)
+		xenvif_queue_carrier_on(queue);
+
 	for (;;) {
 		xenvif_wait_for_rx_work(queue);
 
@@ -2118,10 +2120,12 @@ int xenvif_kthread_guest_rx(void *data)
 		 * while it's probably not responsive, drop the
 		 * carrier so packets are dropped earlier.
 		 */
-		if (xenvif_rx_queue_stalled(queue))
-			xenvif_queue_carrier_off(queue);
-		else if (xenvif_rx_queue_ready(queue))
-			xenvif_queue_carrier_on(queue);
+		if (vif->stall_timeout) {
+			if (xenvif_rx_queue_stalled(queue))
+				xenvif_queue_carrier_off(queue);
+			else if (xenvif_rx_queue_ready(queue))
+				xenvif_queue_carrier_on(queue);
+		}
 
 		/* Queued packets may have foreign pages from other
 		 * domains.  These cannot be queued indefinitely as
@@ -2192,9 +2196,6 @@ static int __init netback_init(void)
 	if (rc)
 		goto failed_init;
 
-	rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
-	rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
-
 #ifdef CONFIG_DEBUG_FS
 	xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
 	if (IS_ERR_OR_NULL(xen_netback_dbg_root))
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d44cd19..efbaf2a 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -887,9 +887,15 @@ static int read_xenbus_vif_flags(struct backend_info *be)
 		return -EOPNOTSUPP;
 
 	if (xenbus_scanf(XBT_NIL, dev->otherend,
-			 "feature-rx-notify", "%d", &val) < 0 || val == 0) {
-		xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
-		return -EINVAL;
+			 "feature-rx-notify", "%d", &val) < 0)
+		val = 0;
+	if (!val) {
+		/* - Reduce drain timeout to poll more frequently for
+		 *   Rx requests.
+		 * - Disable Rx stall detection.
+		 */
+		be->vif->drain_timeout = msecs_to_jiffies(30);
+		be->vif->stall_timeout = 0;
 	}
 
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 2f0a9ce..22bcb4e 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -977,7 +977,6 @@ static int xennet_poll(struct napi_struct *napi, int budget)
 	struct sk_buff_head rxq;
 	struct sk_buff_head errq;
 	struct sk_buff_head tmpq;
-	unsigned long flags;
 	int err;
 
 	spin_lock(&queue->rx_lock);
@@ -1050,15 +1049,11 @@ err:
 	if (work_done < budget) {
 		int more_to_do = 0;
 
-		napi_gro_flush(napi, false);
-
-		local_irq_save(flags);
+		napi_complete(napi);
 
 		RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
-		if (!more_to_do)
-			__napi_complete(napi);
-
-		local_irq_restore(flags);
+		if (more_to_do)
+			napi_schedule(napi);
 	}
 
 	spin_unlock(&queue->rx_lock);
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index d2ccd28..aa6a333 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -2154,7 +2154,7 @@ static int trf7970a_resume(struct device *dev)
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int trf7970a_pm_runtime_suspend(struct device *dev)
 {
 	struct spi_device *spi = container_of(dev, struct spi_device, dev);
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index cced842..7a8f1c5 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -67,7 +67,7 @@ config XEN_PCIDEV_FRONTEND
 config HT_IRQ
 	bool "Interrupts on hypertransport devices"
 	default y
-	depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
+	depends on PCI && X86_LOCAL_APIC
 	help
 	   This allows native hypertransport devices to use interrupts.
 
@@ -110,13 +110,6 @@ config PCI_PASID
 
 	  If unsure, say N.
 
-config PCI_IOAPIC
-	bool "PCI IO-APIC hotplug support" if X86
-	depends on PCI
-	depends on ACPI
-	depends on X86_IO_APIC
-	default !X86
-
 config PCI_LABEL
 	def_bool y if (DMI || ACPI)
 	select NLS
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index e04fe2d..73e4af4 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -13,8 +13,6 @@ obj-$(CONFIG_PCI_QUIRKS) += quirks.o
 # Build PCI Express stuff if needed
 obj-$(CONFIG_PCIEPORTBUS) += pcie/
 
-obj-$(CONFIG_PCI_IOAPIC) += ioapic.o
-
 # Build the PCI Hotplug drivers if we were asked to
 obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
 ifdef CONFIG_HOTPLUG_PCI
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 3efaf4c..96c5c72 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -36,6 +36,7 @@
 #include <linux/wait.h>
 #include "../pci.h"
 #include <asm/pci_x86.h>		/* for struct irq_routing_table */
+#include <asm/io_apic.h>
 #include "ibmphp.h"
 
 #define attn_on(sl)  ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON)
@@ -155,13 +156,10 @@ int ibmphp_init_devno(struct slot **cur_slot)
 	for (loop = 0; loop < len; loop++) {
 		if ((*cur_slot)->number == rtable->slots[loop].slot &&
 		    (*cur_slot)->bus == rtable->slots[loop].bus) {
-			struct io_apic_irq_attr irq_attr;
-
 			(*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
 			for (i = 0; i < 4; i++)
 				(*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
-						(int) (*cur_slot)->device, i,
-						&irq_attr);
+						(int) (*cur_slot)->device, i);
 
 			debug("(*cur_slot)->irq[0] = %x\n",
 					(*cur_slot)->irq[0]);
diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
deleted file mode 100644
index f6219d3..0000000
--- a/drivers/pci/ioapic.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * IOAPIC/IOxAPIC/IOSAPIC driver
- *
- * Copyright (C) 2009 Fujitsu Limited.
- * (c) Copyright 2009 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * This driver manages PCI I/O APICs added by hotplug after boot.  We try to
- * claim all I/O APIC PCI devices, but those present at boot were registered
- * when we parsed the ACPI MADT, so we'll fail when we try to re-register
- * them.
- */
-
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/acpi.h>
-#include <linux/slab.h>
-
-struct ioapic {
-	acpi_handle	handle;
-	u32		gsi_base;
-};
-
-static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
-{
-	acpi_handle handle;
-	acpi_status status;
-	unsigned long long gsb;
-	struct ioapic *ioapic;
-	int ret;
-	char *type;
-	struct resource *res;
-
-	handle = ACPI_HANDLE(&dev->dev);
-	if (!handle)
-		return -EINVAL;
-
-	status = acpi_evaluate_integer(handle, "_GSB", NULL, &gsb);
-	if (ACPI_FAILURE(status))
-		return -EINVAL;
-
-	/*
-	 * The previous code in acpiphp evaluated _MAT if _GSB failed, but
-	 * ACPI spec 4.0 sec 6.2.2 requires _GSB for hot-pluggable I/O APICs.
-	 */
-
-	ioapic = kzalloc(sizeof(*ioapic), GFP_KERNEL);
-	if (!ioapic)
-		return -ENOMEM;
-
-	ioapic->handle = handle;
-	ioapic->gsi_base = (u32) gsb;
-
-	if (dev->class == PCI_CLASS_SYSTEM_PIC_IOAPIC)
-		type = "IOAPIC";
-	else
-		type = "IOxAPIC";
-
-	ret = pci_enable_device(dev);
-	if (ret < 0)
-		goto exit_free;
-
-	pci_set_master(dev);
-
-	if (pci_request_region(dev, 0, type))
-		goto exit_disable;
-
-	res = &dev->resource[0];
-	if (acpi_register_ioapic(ioapic->handle, res->start, ioapic->gsi_base))
-		goto exit_release;
-
-	pci_set_drvdata(dev, ioapic);
-	dev_info(&dev->dev, "%s at %pR, GSI %u\n", type, res, ioapic->gsi_base);
-	return 0;
-
-exit_release:
-	pci_release_region(dev, 0);
-exit_disable:
-	pci_disable_device(dev);
-exit_free:
-	kfree(ioapic);
-	return -ENODEV;
-}
-
-static void ioapic_remove(struct pci_dev *dev)
-{
-	struct ioapic *ioapic = pci_get_drvdata(dev);
-
-	acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base);
-	pci_release_region(dev, 0);
-	pci_disable_device(dev);
-	kfree(ioapic);
-}
-
-
-static const struct pci_device_id ioapic_devices[] = {
-	{ PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOAPIC, ~0) },
-	{ PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOXAPIC, ~0) },
-	{ }
-};
-MODULE_DEVICE_TABLE(pci, ioapic_devices);
-
-static struct pci_driver ioapic_driver = {
-	.name		= "ioapic",
-	.id_table	= ioapic_devices,
-	.probe		= ioapic_probe,
-	.remove		= ioapic_remove,
-};
-
-static int __init ioapic_init(void)
-{
-	return pci_register_driver(&ioapic_driver);
-}
-module_init(ioapic_init);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c
index 4e489a8..6f4aef3 100644
--- a/drivers/phy/phy-omap-usb2.c
+++ b/drivers/phy/phy-omap-usb2.c
@@ -318,7 +318,7 @@ static int omap_usb2_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 
 static int omap_usb2_runtime_suspend(struct device *dev)
 {
diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c
index c297b7a..1387b4d 100644
--- a/drivers/phy/phy-ti-pipe3.c
+++ b/drivers/phy/phy-ti-pipe3.c
@@ -423,7 +423,7 @@ static int ti_pipe3_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 
 static int ti_pipe3_runtime_suspend(struct device *dev)
 {
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index a2eabe6..638e7970 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -38,7 +38,8 @@ config ACER_WMI
 
 config ACERHDF
 	tristate "Acer Aspire One temperature and fan driver"
-	depends on THERMAL && ACPI
+	depends on ACPI && THERMAL
+	select THERMAL_GOV_BANG_BANG
 	---help---
 	  This is a driver for Acer Aspire One netbooks. It allows to access
 	  the temperature sensor and to control the fan.
@@ -128,10 +129,10 @@ config DELL_WMI_AIO
 	  be called dell-wmi-aio.
 
 config DELL_SMO8800
-	tristate "Dell Latitude freefall driver (ACPI SMO8800/SMO8810)"
+	tristate "Dell Latitude freefall driver (ACPI SMO88XX)"
 	depends on ACPI
 	---help---
-	  Say Y here if you want to support SMO8800/SMO8810 freefall device
+	  Say Y here if you want to support SMO88XX freefall devices
 	  on Dell Latitude laptops.
 
 	  To compile this driver as a module, choose M here: the module will
diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index aaf37c5..594c918 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c
@@ -50,7 +50,7 @@
  */
 #undef START_IN_KERNEL_MODE
 
-#define DRV_VER "0.5.26"
+#define DRV_VER "0.7.0"
 
 /*
  * According to the Atom N270 datasheet,
@@ -119,116 +119,152 @@ struct fancmd {
 	u8 cmd_auto;
 };
 
+struct manualcmd {
+	u8 mreg;
+	u8 moff;
+};
+
+/* default register and command to disable fan in manual mode */
+static const struct manualcmd mcmd = {
+	.mreg = 0x94,
+	.moff = 0xff,
+};
+
 /* BIOS settings */
-struct bios_settings_t {
+struct bios_settings {
 	const char *vendor;
 	const char *product;
 	const char *version;
-	unsigned char fanreg;
-	unsigned char tempreg;
+	u8 fanreg;
+	u8 tempreg;
 	struct fancmd cmd;
+	int mcmd_enable;
 };
 
 /* Register addresses and values for different BIOS versions */
-static const struct bios_settings_t bios_tbl[] = {
+static const struct bios_settings bios_tbl[] = {
 	/* AOA110 */
-	{"Acer", "AOA110", "v0.3109", 0x55, 0x58, {0x1f, 0x00} },
-	{"Acer", "AOA110", "v0.3114", 0x55, 0x58, {0x1f, 0x00} },
-	{"Acer", "AOA110", "v0.3301", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "AOA110", "v0.3304", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "AOA110", "v0.3305", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "AOA110", "v0.3307", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "AOA110", "v0.3308", 0x55, 0x58, {0x21, 0x00} },
-	{"Acer", "AOA110", "v0.3309", 0x55, 0x58, {0x21, 0x00} },
-	{"Acer", "AOA110", "v0.3310", 0x55, 0x58, {0x21, 0x00} },
+	{"Acer", "AOA110", "v0.3109", 0x55, 0x58, {0x1f, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3114", 0x55, 0x58, {0x1f, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3301", 0x55, 0x58, {0xaf, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3304", 0x55, 0x58, {0xaf, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3305", 0x55, 0x58, {0xaf, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3307", 0x55, 0x58, {0xaf, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3308", 0x55, 0x58, {0x21, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3309", 0x55, 0x58, {0x21, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3310", 0x55, 0x58, {0x21, 0x00}, 0},
 	/* AOA150 */
-	{"Acer", "AOA150", "v0.3114", 0x55, 0x58, {0x1f, 0x00} },
-	{"Acer", "AOA150", "v0.3301", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3304", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3305", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3307", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3308", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3309", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3310", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3114", 0x55, 0x58, {0x1f, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3301", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3304", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3305", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3307", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3308", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3309", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3310", 0x55, 0x58, {0x20, 0x00}, 0},
 	/* LT1005u */
-	{"Acer", "LT-10Q", "v0.3310", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "LT-10Q", "v0.3310", 0x55, 0x58, {0x20, 0x00}, 0},
 	/* Acer 1410 */
-	{"Acer", "Aspire 1410", "v0.3108", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3113", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3115", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3117", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3119", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3120", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3204", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3303", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3308", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3310", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3314", 0x55, 0x58, {0x9e, 0x00} },
+	{"Acer", "Aspire 1410", "v0.3108", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3113", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3115", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3117", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3119", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3120", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3204", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3303", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3308", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3310", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3314", 0x55, 0x58, {0x9e, 0x00}, 0},
 	/* Acer 1810xx */
-	{"Acer", "Aspire 1810TZ", "v0.3108", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3108", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3113", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3113", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3115", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3115", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3117", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3117", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3119", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3119", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3120", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3120", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3204", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3204", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3303", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3303", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3308", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3308", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3310", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3310", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3314", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3314", 0x55, 0x58, {0x9e, 0x00} },
+	{"Acer", "Aspire 1810TZ", "v0.3108", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3108", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3113", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3113", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3115", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3115", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3117", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3117", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3119", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3119", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3120", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3120", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3204", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3204", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3303", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3303", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3308", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3308", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3310", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3310", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3314", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3314", 0x55, 0x58, {0x9e, 0x00}, 0},
+	/* Acer 5755G */
+	{"Acer", "Aspire 5755G",  "V1.20",   0xab, 0xb4, {0x00, 0x08}, 0},
+	{"Acer", "Aspire 5755G",  "V1.21",   0xab, 0xb3, {0x00, 0x08}, 0},
+	/* Acer 521 */
+	{"Acer", "AO521", "V1.11", 0x55, 0x58, {0x1f, 0x00}, 0},
 	/* Acer 531 */
-	{"Acer", "AO531h", "v0.3104", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AO531h", "v0.3201", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AO531h", "v0.3304", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AO531h", "v0.3104", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AO531h", "v0.3201", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AO531h", "v0.3304", 0x55, 0x58, {0x20, 0x00}, 0},
 	/* Acer 751 */
-	{"Acer", "AO751h", "V0.3212", 0x55, 0x58, {0x21, 0x00} },
+	{"Acer", "AO751h", "V0.3206", 0x55, 0x58, {0x21, 0x00}, 0},
+	{"Acer", "AO751h", "V0.3212", 0x55, 0x58, {0x21, 0x00}, 0},
+	/* Acer 753 */
+	{"Acer", "Aspire One 753", "V1.24", 0x93, 0xac, {0x14, 0x04}, 1},
 	/* Acer 1825 */
-	{"Acer", "Aspire 1825PTZ", "V1.3118", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1825PTZ", "V1.3127", 0x55, 0x58, {0x9e, 0x00} },
+	{"Acer", "Aspire 1825PTZ", "V1.3118", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1825PTZ", "V1.3127", 0x55, 0x58, {0x9e, 0x00}, 0},
+	/* Acer Extensa 5420 */
+	{"Acer", "Extensa 5420", "V1.17", 0x93, 0xac, {0x14, 0x04}, 1},
+	/* Acer Aspire 5315 */
+	{"Acer", "Aspire 5315", "V1.19", 0x93, 0xac, {0x14, 0x04}, 1},
+	/* Acer Aspire 5739 */
+	{"Acer", "Aspire 5739G", "V1.3311", 0x55, 0x58, {0x20, 0x00}, 0},
 	/* Acer TravelMate 7730 */
-	{"Acer", "TravelMate 7730G", "v0.3509", 0x55, 0x58, {0xaf, 0x00} },
+	{"Acer", "TravelMate 7730G", "v0.3509", 0x55, 0x58, {0xaf, 0x00}, 0},
+	/* Acer TravelMate TM8573T */
+	{"Acer", "TM8573T", "V1.13", 0x93, 0xa8, {0x14, 0x04}, 1},
 	/* Gateway */
-	{"Gateway", "AOA110", "v0.3103",  0x55, 0x58, {0x21, 0x00} },
-	{"Gateway", "AOA150", "v0.3103",  0x55, 0x58, {0x20, 0x00} },
-	{"Gateway", "LT31",   "v1.3103",  0x55, 0x58, {0x9e, 0x00} },
-	{"Gateway", "LT31",   "v1.3201",  0x55, 0x58, {0x9e, 0x00} },
-	{"Gateway", "LT31",   "v1.3302",  0x55, 0x58, {0x9e, 0x00} },
-	{"Gateway", "LT31",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00} },
+	{"Gateway", "AOA110", "v0.3103",  0x55, 0x58, {0x21, 0x00}, 0},
+	{"Gateway", "AOA150", "v0.3103",  0x55, 0x58, {0x20, 0x00}, 0},
+	{"Gateway", "LT31",   "v1.3103",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Gateway", "LT31",   "v1.3201",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Gateway", "LT31",   "v1.3302",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Gateway", "LT31",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00}, 0},
 	/* Packard Bell */
-	{"Packard Bell", "DOA150",  "v0.3104",  0x55, 0x58, {0x21, 0x00} },
-	{"Packard Bell", "DOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00} },
-	{"Packard Bell", "AOA110",  "v0.3105",  0x55, 0x58, {0x21, 0x00} },
-	{"Packard Bell", "AOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00} },
-	{"Packard Bell", "ENBFT",   "V1.3118",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "ENBFT",   "V1.3127",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v1.3303",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3120",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3108",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3113",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3115",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3117",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3119",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v1.3204",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMA",   "v1.3201",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMA",   "v1.3302",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMA",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTVR46", "v1.3308",  0x55, 0x58, {0x9e, 0x00} },
+	{"Packard Bell", "DOA150",  "v0.3104",  0x55, 0x58, {0x21, 0x00}, 0},
+	{"Packard Bell", "DOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00}, 0},
+	{"Packard Bell", "AOA110",  "v0.3105",  0x55, 0x58, {0x21, 0x00}, 0},
+	{"Packard Bell", "AOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00}, 0},
+	{"Packard Bell", "ENBFT",   "V1.3118",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "ENBFT",   "V1.3127",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v1.3303",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3120",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3108",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3113",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3115",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3117",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3119",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v1.3204",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMA",   "v1.3201",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMA",   "v1.3302",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMA",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTVR46", "v1.3308",  0x55, 0x58, {0x9e, 0x00}, 0},
 	/* pewpew-terminator */
-	{"", "", "", 0, 0, {0, 0} }
+	{"", "", "", 0, 0, {0, 0}, 0}
 };
 
-static const struct bios_settings_t *bios_cfg __read_mostly;
+static const struct bios_settings *bios_cfg __read_mostly;
+
+/*
+ * this struct is used to instruct thermal layer to use bang_bang instead of
+ * default governor for acerhdf
+ */
+static struct thermal_zone_params acerhdf_zone_params = {
+	.governor_name = "bang_bang",
+};
 
 static int acerhdf_get_temp(int *temp)
 {
@@ -275,6 +311,12 @@ static void acerhdf_change_fanstate(int state)
 	fanstate = state;
 
 	ec_write(bios_cfg->fanreg, cmd);
+
+	if (bios_cfg->mcmd_enable && state == ACERHDF_FAN_OFF) {
+		if (verbose)
+			pr_notice("turning off fan manually\n");
+		ec_write(mcmd.mreg, mcmd.moff);
+	}
 }
 
 static void acerhdf_check_param(struct thermal_zone_device *thermal)
@@ -401,6 +443,21 @@ static int acerhdf_get_trip_type(struct thermal_zone_device *thermal, int trip,
 {
 	if (trip == 0)
 		*type = THERMAL_TRIP_ACTIVE;
+	else if (trip == 1)
+		*type = THERMAL_TRIP_CRITICAL;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int acerhdf_get_trip_hyst(struct thermal_zone_device *thermal, int trip,
+				 unsigned long *temp)
+{
+	if (trip != 0)
+		return -EINVAL;
+
+	*temp = fanon - fanoff;
 
 	return 0;
 }
@@ -410,6 +467,10 @@ static int acerhdf_get_trip_temp(struct thermal_zone_device *thermal, int trip,
 {
 	if (trip == 0)
 		*temp = fanon;
+	else if (trip == 1)
+		*temp = ACERHDF_TEMP_CRIT;
+	else
+		return -EINVAL;
 
 	return 0;
 }
@@ -429,6 +490,7 @@ static struct thermal_zone_device_ops acerhdf_dev_ops = {
 	.get_mode = acerhdf_get_mode,
 	.set_mode = acerhdf_set_mode,
 	.get_trip_type = acerhdf_get_trip_type,
+	.get_trip_hyst = acerhdf_get_trip_hyst,
 	.get_trip_temp = acerhdf_get_trip_temp,
 	.get_crit_temp = acerhdf_get_crit_temp,
 };
@@ -481,9 +543,7 @@ static int acerhdf_set_cur_state(struct thermal_cooling_device *cdev,
 	}
 
 	if (state == 0) {
-		/* turn fan off only if below fanoff temperature */
-		if ((cur_state == ACERHDF_FAN_AUTO) &&
-		    (cur_temp < fanoff))
+		if (cur_state == ACERHDF_FAN_AUTO)
 			acerhdf_change_fanstate(ACERHDF_FAN_OFF);
 	} else {
 		if (cur_state == ACERHDF_FAN_OFF)
@@ -558,7 +618,7 @@ static int str_starts_with(const char *str, const char *start)
 static int acerhdf_check_hardware(void)
 {
 	char const *vendor, *version, *product;
-	const struct bios_settings_t *bt = NULL;
+	const struct bios_settings *bt = NULL;
 
 	/* get BIOS data */
 	vendor  = dmi_get_system_info(DMI_SYS_VENDOR);
@@ -660,12 +720,20 @@ static int acerhdf_register_thermal(void)
 	if (IS_ERR(cl_dev))
 		return -EINVAL;
 
-	thz_dev = thermal_zone_device_register("acerhdf", 1, 0, NULL,
-					      &acerhdf_dev_ops, NULL, 0,
+	thz_dev = thermal_zone_device_register("acerhdf", 2, 0, NULL,
+					      &acerhdf_dev_ops,
+					      &acerhdf_zone_params, 0,
 					      (kernelmode) ? interval*1000 : 0);
 	if (IS_ERR(thz_dev))
 		return -EINVAL;
 
+	if (strcmp(thz_dev->governor->name,
+				acerhdf_zone_params.governor_name)) {
+		pr_err("Didn't get thermal governor %s, perhaps not compiled into thermal subsystem.\n",
+				acerhdf_zone_params.governor_name);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -722,9 +790,15 @@ MODULE_ALIAS("dmi:*:*Acer*:pnAOA*:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAO751h*:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1410*:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1810*:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAspire*5755G:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1825PTZ:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAO521*:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAO531*:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAspire*5739G:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAspire*One*753:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAspire*5315:");
 MODULE_ALIAS("dmi:*:*Acer*:TravelMate*7730G:");
+MODULE_ALIAS("dmi:*:*Acer*:TM8573T:");
 MODULE_ALIAS("dmi:*:*Gateway*:pnAOA*:");
 MODULE_ALIAS("dmi:*:*Gateway*:pnLT31*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnAOA*:");
@@ -733,6 +807,7 @@ MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMU*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnENBFT*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMA*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTVR46*:");
+MODULE_ALIAS("dmi:*:*Acer*:pnExtensa 5420*:");
 
 module_init(acerhdf_init);
 module_exit(acerhdf_exit);
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 05647f1..f71700e 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -843,8 +843,7 @@ static int asus_backlight_init(struct asus_laptop *asus)
 
 static void asus_backlight_exit(struct asus_laptop *asus)
 {
-	if (asus->backlight_device)
-		backlight_device_unregister(asus->backlight_device);
+	backlight_device_unregister(asus->backlight_device);
 	asus->backlight_device = NULL;
 }
 
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index c1a6cd6..abdaed3 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -191,6 +191,15 @@ static const struct dmi_system_id asus_quirks[] = {
 	},
 	{
 		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. X551CA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X551CA"),
+		},
+		.driver_data = &quirk_asus_wapf4,
+	},
+	{
+		.callback = dmi_matched,
 		.ident = "ASUSTeK COMPUTER INC. X55A",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 21fc932..7543a56 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -1308,8 +1308,7 @@ static int asus_wmi_backlight_init(struct asus_wmi *asus)
 
 static void asus_wmi_backlight_exit(struct asus_wmi *asus)
 {
-	if (asus->backlight_device)
-		backlight_device_unregister(asus->backlight_device);
+	backlight_device_unregister(asus->backlight_device);
 
 	asus->backlight_device = NULL;
 }
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index f6a28d7..9411eae3 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -2,9 +2,11 @@
  *  Driver for Dell laptop extras
  *
  *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com>
+ *  Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com>
  *
- *  Based on documentation in the libsmbios package, Copyright (C) 2005 Dell
- *  Inc.
+ *  Based on documentation in the libsmbios package:
+ *  Copyright (C) 2005-2014 Dell Inc.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -32,6 +34,13 @@
 #include "../../firmware/dcdbas.h"
 
 #define BRIGHTNESS_TOKEN 0x7d
+#define KBD_LED_OFF_TOKEN 0x01E1
+#define KBD_LED_ON_TOKEN 0x01E2
+#define KBD_LED_AUTO_TOKEN 0x01E3
+#define KBD_LED_AUTO_25_TOKEN 0x02EA
+#define KBD_LED_AUTO_50_TOKEN 0x02EB
+#define KBD_LED_AUTO_75_TOKEN 0x02EC
+#define KBD_LED_AUTO_100_TOKEN 0x02F6
 
 /* This structure will be modified by the firmware when we enter
  * system management mode, hence the volatiles */
@@ -62,6 +71,13 @@ struct calling_interface_structure {
 
 struct quirk_entry {
 	u8 touchpad_led;
+
+	int needs_kbd_timeouts;
+	/*
+	 * Ordered list of timeouts expressed in seconds.
+	 * The list must end with -1
+	 */
+	int kbd_timeouts[];
 };
 
 static struct quirk_entry *quirks;
@@ -76,6 +92,15 @@ static int __init dmi_matched(const struct dmi_system_id *dmi)
 	return 1;
 }
 
+/*
+ * These values come from Windows utility provided by Dell. If any other value
+ * is used then BIOS silently set timeout to 0 without any error message.
+ */
+static struct quirk_entry quirk_dell_xps13_9333 = {
+	.needs_kbd_timeouts = 1,
+	.kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
+};
+
 static int da_command_address;
 static int da_command_code;
 static int da_num_tokens;
@@ -267,6 +292,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
 		},
 		.driver_data = &quirk_dell_vostro_v130,
 	},
+	{
+		.callback = dmi_matched,
+		.ident = "Dell XPS13 9333",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "XPS13 9333"),
+		},
+		.driver_data = &quirk_dell_xps13_9333,
+	},
 	{ }
 };
 
@@ -331,17 +365,29 @@ static void __init find_tokens(const struct dmi_header *dm, void *dummy)
 	}
 }
 
-static int find_token_location(int tokenid)
+static int find_token_id(int tokenid)
 {
 	int i;
+
 	for (i = 0; i < da_num_tokens; i++) {
 		if (da_tokens[i].tokenID == tokenid)
-			return da_tokens[i].location;
+			return i;
 	}
 
 	return -1;
 }
 
+static int find_token_location(int tokenid)
+{
+	int id;
+
+	id = find_token_id(tokenid);
+	if (id == -1)
+		return -1;
+
+	return da_tokens[id].location;
+}
+
 static struct calling_interface_buffer *
 dell_send_request(struct calling_interface_buffer *buffer, int class,
 		  int select)
@@ -362,6 +408,20 @@ dell_send_request(struct calling_interface_buffer *buffer, int class,
 	return buffer;
 }
 
+static inline int dell_smi_error(int value)
+{
+	switch (value) {
+	case 0: /* Completed successfully */
+		return 0;
+	case -1: /* Completed with error */
+		return -EIO;
+	case -2: /* Function not supported */
+		return -ENXIO;
+	default: /* Unknown error */
+		return -EINVAL;
+	}
+}
+
 /* Derived from information in DellWirelessCtl.cpp:
    Class 17, select 11 is radio control. It returns an array of 32-bit values.
 
@@ -563,7 +623,7 @@ static bool dell_laptop_i8042_filter(unsigned char data, unsigned char str,
 {
 	static bool extended;
 
-	if (str & 0x20)
+	if (str & I8042_STR_AUXDATA)
 		return false;
 
 	if (unlikely(data == 0xe0)) {
@@ -716,7 +776,7 @@ static int dell_send_intensity(struct backlight_device *bd)
 	else
 		dell_send_request(buffer, 1, 1);
 
-out:
+ out:
 	release_buffer();
 	return ret;
 }
@@ -740,7 +800,7 @@ static int dell_get_intensity(struct backlight_device *bd)
 
 	ret = buffer->output[1];
 
-out:
+ out:
 	release_buffer();
 	return ret;
 }
@@ -789,6 +849,984 @@ static void touchpad_led_exit(void)
 	led_classdev_unregister(&touchpad_led);
 }
 
+/*
+ * Derived from information in smbios-keyboard-ctl:
+ *
+ * cbClass 4
+ * cbSelect 11
+ * Keyboard illumination
+ * cbArg1 determines the function to be performed
+ *
+ * cbArg1 0x0 = Get Feature Information
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbRES2, word0  Bitmap of user-selectable modes
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *  cbRES2, byte2  Reserved for future use
+ *  cbRES2, byte3  Keyboard illumination type
+ *     0         Reserved
+ *     1         Tasklight
+ *     2         Backlight
+ *     3-255     Reserved for future use
+ *  cbRES3, byte0  Supported auto keyboard illumination trigger bitmap.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbRES3, byte1  Supported timeout unit bitmap
+ *     bit 0     Seconds
+ *     bit 1     Minutes
+ *     bit 2     Hours
+ *     bit 3     Days
+ *     bits 4-7  Reserved for future use
+ *  cbRES3, byte2  Number of keyboard light brightness levels
+ *  cbRES4, byte0  Maximum acceptable seconds value (0 if seconds not supported).
+ *  cbRES4, byte1  Maximum acceptable minutes value (0 if minutes not supported).
+ *  cbRES4, byte2  Maximum acceptable hours value (0 if hours not supported).
+ *  cbRES4, byte3  Maximum acceptable days value (0 if days not supported)
+ *
+ * cbArg1 0x1 = Get Current State
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbRES2, word0  Bitmap of current mode state
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *     Note: Only One bit can be set
+ *  cbRES2, byte2  Currently active auto keyboard illumination triggers.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbRES2, byte3  Current Timeout
+ *     bits 7:6  Timeout units indicator:
+ *     00b       Seconds
+ *     01b       Minutes
+ *     10b       Hours
+ *     11b       Days
+ *     bits 5:0  Timeout value (0-63) in sec/min/hr/day
+ *     NOTE: A value of 0 means always on (no timeout) if any bits of RES3 byte
+ *     are set upon return from the [Get feature information] call.
+ *  cbRES3, byte0  Current setting of ALS value that turns the light on or off.
+ *  cbRES3, byte1  Current ALS reading
+ *  cbRES3, byte2  Current keyboard light level.
+ *
+ * cbArg1 0x2 = Set New State
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbArg2, word0  Bitmap of current mode state
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *     Note: Only One bit can be set
+ *  cbArg2, byte2  Desired auto keyboard illumination triggers. Must remain inactive to allow
+ *                 keyboard to turn off automatically.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbArg2, byte3  Desired Timeout
+ *     bits 7:6  Timeout units indicator:
+ *     00b       Seconds
+ *     01b       Minutes
+ *     10b       Hours
+ *     11b       Days
+ *     bits 5:0  Timeout value (0-63) in sec/min/hr/day
+ *  cbArg3, byte0  Desired setting of ALS value that turns the light on or off.
+ *  cbArg3, byte2  Desired keyboard light level.
+ */
+
+
+enum kbd_timeout_unit {
+	KBD_TIMEOUT_SECONDS = 0,
+	KBD_TIMEOUT_MINUTES,
+	KBD_TIMEOUT_HOURS,
+	KBD_TIMEOUT_DAYS,
+};
+
+enum kbd_mode_bit {
+	KBD_MODE_BIT_OFF = 0,
+	KBD_MODE_BIT_ON,
+	KBD_MODE_BIT_ALS,
+	KBD_MODE_BIT_TRIGGER_ALS,
+	KBD_MODE_BIT_TRIGGER,
+	KBD_MODE_BIT_TRIGGER_25,
+	KBD_MODE_BIT_TRIGGER_50,
+	KBD_MODE_BIT_TRIGGER_75,
+	KBD_MODE_BIT_TRIGGER_100,
+};
+
+#define kbd_is_als_mode_bit(bit) \
+	((bit) == KBD_MODE_BIT_ALS || (bit) == KBD_MODE_BIT_TRIGGER_ALS)
+#define kbd_is_trigger_mode_bit(bit) \
+	((bit) >= KBD_MODE_BIT_TRIGGER_ALS && (bit) <= KBD_MODE_BIT_TRIGGER_100)
+#define kbd_is_level_mode_bit(bit) \
+	((bit) >= KBD_MODE_BIT_TRIGGER_25 && (bit) <= KBD_MODE_BIT_TRIGGER_100)
+
+struct kbd_info {
+	u16 modes;
+	u8 type;
+	u8 triggers;
+	u8 levels;
+	u8 seconds;
+	u8 minutes;
+	u8 hours;
+	u8 days;
+};
+
+struct kbd_state {
+	u8 mode_bit;
+	u8 triggers;
+	u8 timeout_value;
+	u8 timeout_unit;
+	u8 als_setting;
+	u8 als_value;
+	u8 level;
+};
+
+static const int kbd_tokens[] = {
+	KBD_LED_OFF_TOKEN,
+	KBD_LED_AUTO_25_TOKEN,
+	KBD_LED_AUTO_50_TOKEN,
+	KBD_LED_AUTO_75_TOKEN,
+	KBD_LED_AUTO_100_TOKEN,
+	KBD_LED_ON_TOKEN,
+};
+
+static u16 kbd_token_bits;
+
+static struct kbd_info kbd_info;
+static bool kbd_als_supported;
+static bool kbd_triggers_supported;
+
+static u8 kbd_mode_levels[16];
+static int kbd_mode_levels_count;
+
+static u8 kbd_previous_level;
+static u8 kbd_previous_mode_bit;
+
+static bool kbd_led_present;
+
+/*
+ * NOTE: there are three ways to set the keyboard backlight level.
+ * First, via kbd_state.mode_bit (assigning KBD_MODE_BIT_TRIGGER_* value).
+ * Second, via kbd_state.level (assigning numerical value <= kbd_info.levels).
+ * Third, via SMBIOS tokens (KBD_LED_* in kbd_tokens)
+ *
+ * There are laptops which support only one of these methods. If we want to
+ * support as many machines as possible we need to implement all three methods.
+ * The first two methods use the kbd_state structure. The third uses SMBIOS
+ * tokens. If kbd_info.levels == 0, the machine does not support setting the
+ * keyboard backlight level via kbd_state.level.
+ */
+
+static int kbd_get_info(struct kbd_info *info)
+{
+	u8 units;
+	int ret;
+
+	get_buffer();
+
+	buffer->input[0] = 0x0;
+	dell_send_request(buffer, 4, 11);
+	ret = buffer->output[0];
+
+	if (ret) {
+		ret = dell_smi_error(ret);
+		goto out;
+	}
+
+	info->modes = buffer->output[1] & 0xFFFF;
+	info->type = (buffer->output[1] >> 24) & 0xFF;
+	info->triggers = buffer->output[2] & 0xFF;
+	units = (buffer->output[2] >> 8) & 0xFF;
+	info->levels = (buffer->output[2] >> 16) & 0xFF;
+
+	if (units & BIT(0))
+		info->seconds = (buffer->output[3] >> 0) & 0xFF;
+	if (units & BIT(1))
+		info->minutes = (buffer->output[3] >> 8) & 0xFF;
+	if (units & BIT(2))
+		info->hours = (buffer->output[3] >> 16) & 0xFF;
+	if (units & BIT(3))
+		info->days = (buffer->output[3] >> 24) & 0xFF;
+
+ out:
+	release_buffer();
+	return ret;
+}
+
+static unsigned int kbd_get_max_level(void)
+{
+	if (kbd_info.levels != 0)
+		return kbd_info.levels;
+	if (kbd_mode_levels_count > 0)
+		return kbd_mode_levels_count - 1;
+	return 0;
+}
+
+static int kbd_get_level(struct kbd_state *state)
+{
+	int i;
+
+	if (kbd_info.levels != 0)
+		return state->level;
+
+	if (kbd_mode_levels_count > 0) {
+		for (i = 0; i < kbd_mode_levels_count; ++i)
+			if (kbd_mode_levels[i] == state->mode_bit)
+				return i;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int kbd_set_level(struct kbd_state *state, u8 level)
+{
+	if (kbd_info.levels != 0) {
+		if (level != 0)
+			kbd_previous_level = level;
+		if (state->level == level)
+			return 0;
+		state->level = level;
+		if (level != 0 && state->mode_bit == KBD_MODE_BIT_OFF)
+			state->mode_bit = kbd_previous_mode_bit;
+		else if (level == 0 && state->mode_bit != KBD_MODE_BIT_OFF) {
+			kbd_previous_mode_bit = state->mode_bit;
+			state->mode_bit = KBD_MODE_BIT_OFF;
+		}
+		return 0;
+	}
+
+	if (kbd_mode_levels_count > 0 && level < kbd_mode_levels_count) {
+		if (level != 0)
+			kbd_previous_level = level;
+		state->mode_bit = kbd_mode_levels[level];
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int kbd_get_state(struct kbd_state *state)
+{
+	int ret;
+
+	get_buffer();
+
+	buffer->input[0] = 0x1;
+	dell_send_request(buffer, 4, 11);
+	ret = buffer->output[0];
+
+	if (ret) {
+		ret = dell_smi_error(ret);
+		goto out;
+	}
+
+	state->mode_bit = ffs(buffer->output[1] & 0xFFFF);
+	if (state->mode_bit != 0)
+		state->mode_bit--;
+
+	state->triggers = (buffer->output[1] >> 16) & 0xFF;
+	state->timeout_value = (buffer->output[1] >> 24) & 0x3F;
+	state->timeout_unit = (buffer->output[1] >> 30) & 0x3;
+	state->als_setting = buffer->output[2] & 0xFF;
+	state->als_value = (buffer->output[2] >> 8) & 0xFF;
+	state->level = (buffer->output[2] >> 16) & 0xFF;
+
+ out:
+	release_buffer();
+	return ret;
+}
+
+static int kbd_set_state(struct kbd_state *state)
+{
+	int ret;
+
+	get_buffer();
+	buffer->input[0] = 0x2;
+	buffer->input[1] = BIT(state->mode_bit) & 0xFFFF;
+	buffer->input[1] |= (state->triggers & 0xFF) << 16;
+	buffer->input[1] |= (state->timeout_value & 0x3F) << 24;
+	buffer->input[1] |= (state->timeout_unit & 0x3) << 30;
+	buffer->input[2] = state->als_setting & 0xFF;
+	buffer->input[2] |= (state->level & 0xFF) << 16;
+	dell_send_request(buffer, 4, 11);
+	ret = buffer->output[0];
+	release_buffer();
+
+	return dell_smi_error(ret);
+}
+
+static int kbd_set_state_safe(struct kbd_state *state, struct kbd_state *old)
+{
+	int ret;
+
+	ret = kbd_set_state(state);
+	if (ret == 0)
+		return 0;
+
+	/*
+	 * When setting the new state fails,try to restore the previous one.
+	 * This is needed on some machines where BIOS sets a default state when
+	 * setting a new state fails. This default state could be all off.
+	 */
+
+	if (kbd_set_state(old))
+		pr_err("Setting old previous keyboard state failed\n");
+
+	return ret;
+}
+
+static int kbd_set_token_bit(u8 bit)
+{
+	int id;
+	int ret;
+
+	if (bit >= ARRAY_SIZE(kbd_tokens))
+		return -EINVAL;
+
+	id = find_token_id(kbd_tokens[bit]);
+	if (id == -1)
+		return -EINVAL;
+
+	get_buffer();
+	buffer->input[0] = da_tokens[id].location;
+	buffer->input[1] = da_tokens[id].value;
+	dell_send_request(buffer, 1, 0);
+	ret = buffer->output[0];
+	release_buffer();
+
+	return dell_smi_error(ret);
+}
+
+static int kbd_get_token_bit(u8 bit)
+{
+	int id;
+	int ret;
+	int val;
+
+	if (bit >= ARRAY_SIZE(kbd_tokens))
+		return -EINVAL;
+
+	id = find_token_id(kbd_tokens[bit]);
+	if (id == -1)
+		return -EINVAL;
+
+	get_buffer();
+	buffer->input[0] = da_tokens[id].location;
+	dell_send_request(buffer, 0, 0);
+	ret = buffer->output[0];
+	val = buffer->output[1];
+	release_buffer();
+
+	if (ret)
+		return dell_smi_error(ret);
+
+	return (val == da_tokens[id].value);
+}
+
+static int kbd_get_first_active_token_bit(void)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i) {
+		ret = kbd_get_token_bit(i);
+		if (ret == 1)
+			return i;
+	}
+
+	return ret;
+}
+
+static int kbd_get_valid_token_counts(void)
+{
+	return hweight16(kbd_token_bits);
+}
+
+static inline int kbd_init_info(void)
+{
+	struct kbd_state state;
+	int ret;
+	int i;
+
+	ret = kbd_get_info(&kbd_info);
+	if (ret)
+		return ret;
+
+	kbd_get_state(&state);
+
+	/* NOTE: timeout value is stored in 6 bits so max value is 63 */
+	if (kbd_info.seconds > 63)
+		kbd_info.seconds = 63;
+	if (kbd_info.minutes > 63)
+		kbd_info.minutes = 63;
+	if (kbd_info.hours > 63)
+		kbd_info.hours = 63;
+	if (kbd_info.days > 63)
+		kbd_info.days = 63;
+
+	/* NOTE: On tested machines ON mode did not work and caused
+	 *       problems (turned backlight off) so do not use it
+	 */
+	kbd_info.modes &= ~BIT(KBD_MODE_BIT_ON);
+
+	kbd_previous_level = kbd_get_level(&state);
+	kbd_previous_mode_bit = state.mode_bit;
+
+	if (kbd_previous_level == 0 && kbd_get_max_level() != 0)
+		kbd_previous_level = 1;
+
+	if (kbd_previous_mode_bit == KBD_MODE_BIT_OFF) {
+		kbd_previous_mode_bit =
+			ffs(kbd_info.modes & ~BIT(KBD_MODE_BIT_OFF));
+		if (kbd_previous_mode_bit != 0)
+			kbd_previous_mode_bit--;
+	}
+
+	if (kbd_info.modes & (BIT(KBD_MODE_BIT_ALS) |
+			      BIT(KBD_MODE_BIT_TRIGGER_ALS)))
+		kbd_als_supported = true;
+
+	if (kbd_info.modes & (
+	    BIT(KBD_MODE_BIT_TRIGGER_ALS) | BIT(KBD_MODE_BIT_TRIGGER) |
+	    BIT(KBD_MODE_BIT_TRIGGER_25) | BIT(KBD_MODE_BIT_TRIGGER_50) |
+	    BIT(KBD_MODE_BIT_TRIGGER_75) | BIT(KBD_MODE_BIT_TRIGGER_100)
+	   ))
+		kbd_triggers_supported = true;
+
+	/* kbd_mode_levels[0] is reserved, see below */
+	for (i = 0; i < 16; ++i)
+		if (kbd_is_level_mode_bit(i) && (BIT(i) & kbd_info.modes))
+			kbd_mode_levels[1 + kbd_mode_levels_count++] = i;
+
+	/*
+	 * Find the first supported mode and assign to kbd_mode_levels[0].
+	 * This should be 0 (off), but we cannot depend on the BIOS to
+	 * support 0.
+	 */
+	if (kbd_mode_levels_count > 0) {
+		for (i = 0; i < 16; ++i) {
+			if (BIT(i) & kbd_info.modes) {
+				kbd_mode_levels[0] = i;
+				break;
+			}
+		}
+		kbd_mode_levels_count++;
+	}
+
+	return 0;
+
+}
+
+static inline void kbd_init_tokens(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i)
+		if (find_token_id(kbd_tokens[i]) != -1)
+			kbd_token_bits |= BIT(i);
+}
+
+static void kbd_init(void)
+{
+	int ret;
+
+	ret = kbd_init_info();
+	kbd_init_tokens();
+
+	if (kbd_token_bits != 0 || ret == 0)
+		kbd_led_present = true;
+}
+
+static ssize_t kbd_led_timeout_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct kbd_state new_state;
+	struct kbd_state state;
+	bool convert;
+	int value;
+	int ret;
+	char ch;
+	u8 unit;
+	int i;
+
+	ret = sscanf(buf, "%d %c", &value, &ch);
+	if (ret < 1)
+		return -EINVAL;
+	else if (ret == 1)
+		ch = 's';
+
+	if (value < 0)
+		return -EINVAL;
+
+	convert = false;
+
+	switch (ch) {
+	case 's':
+		if (value > kbd_info.seconds)
+			convert = true;
+		unit = KBD_TIMEOUT_SECONDS;
+		break;
+	case 'm':
+		if (value > kbd_info.minutes)
+			convert = true;
+		unit = KBD_TIMEOUT_MINUTES;
+		break;
+	case 'h':
+		if (value > kbd_info.hours)
+			convert = true;
+		unit = KBD_TIMEOUT_HOURS;
+		break;
+	case 'd':
+		if (value > kbd_info.days)
+			convert = true;
+		unit = KBD_TIMEOUT_DAYS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (quirks && quirks->needs_kbd_timeouts)
+		convert = true;
+
+	if (convert) {
+		/* Convert value from current units to seconds */
+		switch (unit) {
+		case KBD_TIMEOUT_DAYS:
+			value *= 24;
+		case KBD_TIMEOUT_HOURS:
+			value *= 60;
+		case KBD_TIMEOUT_MINUTES:
+			value *= 60;
+			unit = KBD_TIMEOUT_SECONDS;
+		}
+
+		if (quirks && quirks->needs_kbd_timeouts) {
+			for (i = 0; quirks->kbd_timeouts[i] != -1; i++) {
+				if (value <= quirks->kbd_timeouts[i]) {
+					value = quirks->kbd_timeouts[i];
+					break;
+				}
+			}
+		}
+
+		if (value <= kbd_info.seconds && kbd_info.seconds) {
+			unit = KBD_TIMEOUT_SECONDS;
+		} else if (value / 60 <= kbd_info.minutes && kbd_info.minutes) {
+			value /= 60;
+			unit = KBD_TIMEOUT_MINUTES;
+		} else if (value / (60 * 60) <= kbd_info.hours && kbd_info.hours) {
+			value /= (60 * 60);
+			unit = KBD_TIMEOUT_HOURS;
+		} else if (value / (60 * 60 * 24) <= kbd_info.days && kbd_info.days) {
+			value /= (60 * 60 * 24);
+			unit = KBD_TIMEOUT_DAYS;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	new_state = state;
+	new_state.timeout_value = value;
+	new_state.timeout_unit = unit;
+
+	ret = kbd_set_state_safe(&new_state, &state);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t kbd_led_timeout_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct kbd_state state;
+	int ret;
+	int len;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	len = sprintf(buf, "%d", state.timeout_value);
+
+	switch (state.timeout_unit) {
+	case KBD_TIMEOUT_SECONDS:
+		return len + sprintf(buf+len, "s\n");
+	case KBD_TIMEOUT_MINUTES:
+		return len + sprintf(buf+len, "m\n");
+	case KBD_TIMEOUT_HOURS:
+		return len + sprintf(buf+len, "h\n");
+	case KBD_TIMEOUT_DAYS:
+		return len + sprintf(buf+len, "d\n");
+	default:
+		return -EINVAL;
+	}
+
+	return len;
+}
+
+static DEVICE_ATTR(stop_timeout, S_IRUGO | S_IWUSR,
+		   kbd_led_timeout_show, kbd_led_timeout_store);
+
+static const char * const kbd_led_triggers[] = {
+	"keyboard",
+	"touchpad",
+	/*"trackstick"*/ NULL, /* NOTE: trackstick is just alias for touchpad */
+	"mouse",
+};
+
+static ssize_t kbd_led_triggers_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbd_state new_state;
+	struct kbd_state state;
+	bool triggers_enabled = false;
+	bool als_enabled = false;
+	bool disable_als = false;
+	bool enable_als = false;
+	int trigger_bit = -1;
+	char trigger[21];
+	int i, ret;
+
+	ret = sscanf(buf, "%20s", trigger);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (trigger[0] != '+' && trigger[0] != '-')
+		return -EINVAL;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	if (kbd_als_supported)
+		als_enabled = kbd_is_als_mode_bit(state.mode_bit);
+
+	if (kbd_triggers_supported)
+		triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+
+	if (kbd_als_supported) {
+		if (strcmp(trigger, "+als") == 0) {
+			if (als_enabled)
+				return count;
+			enable_als = true;
+		} else if (strcmp(trigger, "-als") == 0) {
+			if (!als_enabled)
+				return count;
+			disable_als = true;
+		}
+	}
+
+	if (enable_als || disable_als) {
+		new_state = state;
+		if (enable_als) {
+			if (triggers_enabled)
+				new_state.mode_bit = KBD_MODE_BIT_TRIGGER_ALS;
+			else
+				new_state.mode_bit = KBD_MODE_BIT_ALS;
+		} else {
+			if (triggers_enabled) {
+				new_state.mode_bit = KBD_MODE_BIT_TRIGGER;
+				kbd_set_level(&new_state, kbd_previous_level);
+			} else {
+				new_state.mode_bit = KBD_MODE_BIT_ON;
+			}
+		}
+		if (!(kbd_info.modes & BIT(new_state.mode_bit)))
+			return -EINVAL;
+		ret = kbd_set_state_safe(&new_state, &state);
+		if (ret)
+			return ret;
+		kbd_previous_mode_bit = new_state.mode_bit;
+		return count;
+	}
+
+	if (kbd_triggers_supported) {
+		for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); ++i) {
+			if (!(kbd_info.triggers & BIT(i)))
+				continue;
+			if (!kbd_led_triggers[i])
+				continue;
+			if (strcmp(trigger+1, kbd_led_triggers[i]) != 0)
+				continue;
+			if (trigger[0] == '+' &&
+			    triggers_enabled && (state.triggers & BIT(i)))
+				return count;
+			if (trigger[0] == '-' &&
+			    (!triggers_enabled || !(state.triggers & BIT(i))))
+				return count;
+			trigger_bit = i;
+			break;
+		}
+	}
+
+	if (trigger_bit != -1) {
+		new_state = state;
+		if (trigger[0] == '+')
+			new_state.triggers |= BIT(trigger_bit);
+		else {
+			new_state.triggers &= ~BIT(trigger_bit);
+			/* NOTE: trackstick bit (2) must be disabled when
+			 *       disabling touchpad bit (1), otherwise touchpad
+			 *       bit (1) will not be disabled */
+			if (trigger_bit == 1)
+				new_state.triggers &= ~BIT(2);
+		}
+		if ((kbd_info.triggers & new_state.triggers) !=
+		    new_state.triggers)
+			return -EINVAL;
+		if (new_state.triggers && !triggers_enabled) {
+			if (als_enabled)
+				new_state.mode_bit = KBD_MODE_BIT_TRIGGER_ALS;
+			else {
+				new_state.mode_bit = KBD_MODE_BIT_TRIGGER;
+				kbd_set_level(&new_state, kbd_previous_level);
+			}
+		} else if (new_state.triggers == 0) {
+			if (als_enabled)
+				new_state.mode_bit = KBD_MODE_BIT_ALS;
+			else
+				kbd_set_level(&new_state, 0);
+		}
+		if (!(kbd_info.modes & BIT(new_state.mode_bit)))
+			return -EINVAL;
+		ret = kbd_set_state_safe(&new_state, &state);
+		if (ret)
+			return ret;
+		if (new_state.mode_bit != KBD_MODE_BIT_OFF)
+			kbd_previous_mode_bit = new_state.mode_bit;
+		return count;
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t kbd_led_triggers_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct kbd_state state;
+	bool triggers_enabled;
+	int level, i, ret;
+	int len = 0;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	len = 0;
+
+	if (kbd_triggers_supported) {
+		triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+		level = kbd_get_level(&state);
+		for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); ++i) {
+			if (!(kbd_info.triggers & BIT(i)))
+				continue;
+			if (!kbd_led_triggers[i])
+				continue;
+			if ((triggers_enabled || level <= 0) &&
+			    (state.triggers & BIT(i)))
+				buf[len++] = '+';
+			else
+				buf[len++] = '-';
+			len += sprintf(buf+len, "%s ", kbd_led_triggers[i]);
+		}
+	}
+
+	if (kbd_als_supported) {
+		if (kbd_is_als_mode_bit(state.mode_bit))
+			len += sprintf(buf+len, "+als ");
+		else
+			len += sprintf(buf+len, "-als ");
+	}
+
+	if (len)
+		buf[len - 1] = '\n';
+
+	return len;
+}
+
+static DEVICE_ATTR(start_triggers, S_IRUGO | S_IWUSR,
+		   kbd_led_triggers_show, kbd_led_triggers_store);
+
+static ssize_t kbd_led_als_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct kbd_state state;
+	struct kbd_state new_state;
+	u8 setting;
+	int ret;
+
+	ret = kstrtou8(buf, 10, &setting);
+	if (ret)
+		return ret;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	new_state = state;
+	new_state.als_setting = setting;
+
+	ret = kbd_set_state_safe(&new_state, &state);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t kbd_led_als_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbd_state state;
+	int ret;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%d\n", state.als_setting);
+}
+
+static DEVICE_ATTR(als_setting, S_IRUGO | S_IWUSR,
+		   kbd_led_als_show, kbd_led_als_store);
+
+static struct attribute *kbd_led_attrs[] = {
+	&dev_attr_stop_timeout.attr,
+	&dev_attr_start_triggers.attr,
+	&dev_attr_als_setting.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(kbd_led);
+
+static enum led_brightness kbd_led_level_get(struct led_classdev *led_cdev)
+{
+	int ret;
+	u16 num;
+	struct kbd_state state;
+
+	if (kbd_get_max_level()) {
+		ret = kbd_get_state(&state);
+		if (ret)
+			return 0;
+		ret = kbd_get_level(&state);
+		if (ret < 0)
+			return 0;
+		return ret;
+	}
+
+	if (kbd_get_valid_token_counts()) {
+		ret = kbd_get_first_active_token_bit();
+		if (ret < 0)
+			return 0;
+		for (num = kbd_token_bits; num != 0 && ret > 0; --ret)
+			num &= num - 1; /* clear the first bit set */
+		if (num == 0)
+			return 0;
+		return ffs(num) - 1;
+	}
+
+	pr_warn("Keyboard brightness level control not supported\n");
+	return 0;
+}
+
+static void kbd_led_level_set(struct led_classdev *led_cdev,
+			      enum led_brightness value)
+{
+	struct kbd_state state;
+	struct kbd_state new_state;
+	u16 num;
+
+	if (kbd_get_max_level()) {
+		if (kbd_get_state(&state))
+			return;
+		new_state = state;
+		if (kbd_set_level(&new_state, value))
+			return;
+		kbd_set_state_safe(&new_state, &state);
+		return;
+	}
+
+	if (kbd_get_valid_token_counts()) {
+		for (num = kbd_token_bits; num != 0 && value > 0; --value)
+			num &= num - 1; /* clear the first bit set */
+		if (num == 0)
+			return;
+		kbd_set_token_bit(ffs(num) - 1);
+		return;
+	}
+
+	pr_warn("Keyboard brightness level control not supported\n");
+}
+
+static struct led_classdev kbd_led = {
+	.name           = "dell::kbd_backlight",
+	.brightness_set = kbd_led_level_set,
+	.brightness_get = kbd_led_level_get,
+	.groups         = kbd_led_groups,
+};
+
+static int __init kbd_led_init(struct device *dev)
+{
+	kbd_init();
+	if (!kbd_led_present)
+		return -ENODEV;
+	kbd_led.max_brightness = kbd_get_max_level();
+	if (!kbd_led.max_brightness) {
+		kbd_led.max_brightness = kbd_get_valid_token_counts();
+		if (kbd_led.max_brightness)
+			kbd_led.max_brightness--;
+	}
+	return led_classdev_register(dev, &kbd_led);
+}
+
+static void brightness_set_exit(struct led_classdev *led_cdev,
+				enum led_brightness value)
+{
+	/* Don't change backlight level on exit */
+};
+
+static void kbd_led_exit(void)
+{
+	if (!kbd_led_present)
+		return;
+	kbd_led.brightness_set = brightness_set_exit;
+	led_classdev_unregister(&kbd_led);
+}
+
 static int __init dell_init(void)
 {
 	int max_intensity = 0;
@@ -841,6 +1879,8 @@ static int __init dell_init(void)
 	if (quirks && quirks->touchpad_led)
 		touchpad_led_init(&platform_device->dev);
 
+	kbd_led_init(&platform_device->dev);
+
 	dell_laptop_dir = debugfs_create_dir("dell_laptop", NULL);
 	if (dell_laptop_dir != NULL)
 		debugfs_create_file("rfkill", 0444, dell_laptop_dir, NULL,
@@ -908,6 +1948,7 @@ static void __exit dell_exit(void)
 	debugfs_remove_recursive(dell_laptop_dir);
 	if (quirks && quirks->touchpad_led)
 		touchpad_led_exit();
+	kbd_led_exit();
 	i8042_remove_filter(dell_laptop_i8042_filter);
 	cancel_delayed_work_sync(&dell_rfkill_work);
 	backlight_device_unregister(dell_backlight_device);
@@ -924,5 +1965,7 @@ module_init(dell_init);
 module_exit(dell_exit);
 
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
+MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
 MODULE_DESCRIPTION("Dell laptop driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/dell-smo8800.c b/drivers/platform/x86/dell-smo8800.c
index a653716..0aec4fd 100644
--- a/drivers/platform/x86/dell-smo8800.c
+++ b/drivers/platform/x86/dell-smo8800.c
@@ -1,5 +1,5 @@
 /*
- *  dell-smo8800.c - Dell Latitude ACPI SMO8800/SMO8810 freefall sensor driver
+ *  dell-smo8800.c - Dell Latitude ACPI SMO88XX freefall sensor driver
  *
  *  Copyright (C) 2012 Sonal Santan <sonal.santan@gmail.com>
  *  Copyright (C) 2014 Pali Rohár <pali.rohar@gmail.com>
@@ -209,7 +209,13 @@ static int smo8800_remove(struct acpi_device *device)
 
 static const struct acpi_device_id smo8800_ids[] = {
 	{ "SMO8800", 0 },
+	{ "SMO8801", 0 },
 	{ "SMO8810", 0 },
+	{ "SMO8811", 0 },
+	{ "SMO8820", 0 },
+	{ "SMO8821", 0 },
+	{ "SMO8830", 0 },
+	{ "SMO8831", 0 },
 	{ "", 0 },
 };
 
@@ -228,6 +234,6 @@ static struct acpi_driver smo8800_driver = {
 
 module_acpi_driver(smo8800_driver);
 
-MODULE_DESCRIPTION("Dell Latitude freefall driver (ACPI SMO8800/SMO8810)");
+MODULE_DESCRIPTION("Dell Latitude freefall driver (ACPI SMO88XX)");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sonal Santan, Pali Rohár");
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 25721bf..6512a06 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -65,10 +65,8 @@ static const struct key_entry dell_wmi_legacy_keymap[] __initconst = {
 	/* Battery health status button */
 	{ KE_KEY, 0xe007, { KEY_BATTERY } },
 
-	/* This is actually for all radios. Although physically a
-	 * switch, the notification does not provide an indication of
-	 * state and so it should be reported as a key */
-	{ KE_KEY, 0xe008, { KEY_WLAN } },
+	/* Radio devices state change */
+	{ KE_IGNORE, 0xe008, { KEY_RFKILL } },
 
 	/* The next device is at offset 6, the active devices are at
 	   offset 8 and the attached devices at offset 10 */
@@ -145,57 +143,154 @@ static const u16 bios_to_linux_keycode[256] __initconst = {
 
 static struct input_dev *dell_wmi_input_dev;
 
+static void dell_wmi_process_key(int reported_key)
+{
+	const struct key_entry *key;
+
+	key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev,
+						reported_key);
+	if (!key) {
+		pr_info("Unknown key %x pressed\n", reported_key);
+		return;
+	}
+
+	pr_debug("Key %x pressed\n", reported_key);
+
+	/* Don't report brightness notifications that will also come via ACPI */
+	if ((key->keycode == KEY_BRIGHTNESSUP ||
+	     key->keycode == KEY_BRIGHTNESSDOWN) && acpi_video)
+		return;
+
+	sparse_keymap_report_entry(dell_wmi_input_dev, key, 1, true);
+}
+
 static void dell_wmi_notify(u32 value, void *context)
 {
 	struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	acpi_status status;
+	acpi_size buffer_size;
+	u16 *buffer_entry, *buffer_end;
+	int len, i;
 
 	status = wmi_get_event_data(value, &response);
 	if (status != AE_OK) {
-		pr_info("bad event status 0x%x\n", status);
+		pr_warn("bad event status 0x%x\n", status);
 		return;
 	}
 
 	obj = (union acpi_object *)response.pointer;
+	if (!obj) {
+		pr_warn("no response\n");
+		return;
+	}
 
-	if (obj && obj->type == ACPI_TYPE_BUFFER) {
-		const struct key_entry *key;
-		int reported_key;
-		u16 *buffer_entry = (u16 *)obj->buffer.pointer;
-		int buffer_size = obj->buffer.length/2;
-
-		if (buffer_size >= 2 && dell_new_hk_type && buffer_entry[1] != 0x10) {
-			pr_info("Received unknown WMI event (0x%x)\n",
-				buffer_entry[1]);
-			kfree(obj);
-			return;
-		}
+	if (obj->type != ACPI_TYPE_BUFFER) {
+		pr_warn("bad response type %x\n", obj->type);
+		kfree(obj);
+		return;
+	}
 
-		if (buffer_size >= 3 && (dell_new_hk_type || buffer_entry[1] == 0x0))
-			reported_key = (int)buffer_entry[2];
+	pr_debug("Received WMI event (%*ph)\n",
+		obj->buffer.length, obj->buffer.pointer);
+
+	buffer_entry = (u16 *)obj->buffer.pointer;
+	buffer_size = obj->buffer.length/2;
+
+	if (!dell_new_hk_type) {
+		if (buffer_size >= 3 && buffer_entry[1] == 0x0)
+			dell_wmi_process_key(buffer_entry[2]);
 		else if (buffer_size >= 2)
-			reported_key = (int)buffer_entry[1] & 0xffff;
-		else {
+			dell_wmi_process_key(buffer_entry[1]);
+		else
 			pr_info("Received unknown WMI event\n");
-			kfree(obj);
-			return;
+		kfree(obj);
+		return;
+	}
+
+	buffer_end = buffer_entry + buffer_size;
+
+	while (buffer_entry < buffer_end) {
+
+		len = buffer_entry[0];
+		if (len == 0)
+			break;
+
+		len++;
+
+		if (buffer_entry + len > buffer_end) {
+			pr_warn("Invalid length of WMI event\n");
+			break;
 		}
 
-		key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev,
-							reported_key);
-		if (!key) {
-			pr_info("Unknown key %x pressed\n", reported_key);
-		} else if ((key->keycode == KEY_BRIGHTNESSUP ||
-			    key->keycode == KEY_BRIGHTNESSDOWN) && acpi_video) {
-			/* Don't report brightness notifications that will also
-			 * come via ACPI */
-			;
-		} else {
-			sparse_keymap_report_entry(dell_wmi_input_dev, key,
-						   1, true);
+		pr_debug("Process buffer (%*ph)\n", len*2, buffer_entry);
+
+		switch (buffer_entry[1]) {
+		case 0x00:
+			for (i = 2; i < len; ++i) {
+				switch (buffer_entry[i]) {
+				case 0xe043:
+					/* NIC Link is Up */
+					pr_debug("NIC Link is Up\n");
+					break;
+				case 0xe044:
+					/* NIC Link is Down */
+					pr_debug("NIC Link is Down\n");
+					break;
+				case 0xe045:
+					/* Unknown event but defined in DSDT */
+				default:
+					/* Unknown event */
+					pr_info("Unknown WMI event type 0x00: "
+						"0x%x\n", (int)buffer_entry[i]);
+					break;
+				}
+			}
+			break;
+		case 0x10:
+			/* Keys pressed */
+			for (i = 2; i < len; ++i)
+				dell_wmi_process_key(buffer_entry[i]);
+			break;
+		case 0x11:
+			for (i = 2; i < len; ++i) {
+				switch (buffer_entry[i]) {
+				case 0xfff0:
+					/* Battery unplugged */
+					pr_debug("Battery unplugged\n");
+					break;
+				case 0xfff1:
+					/* Battery inserted */
+					pr_debug("Battery inserted\n");
+					break;
+				case 0x01e1:
+				case 0x02ea:
+				case 0x02eb:
+				case 0x02ec:
+				case 0x02f6:
+					/* Keyboard backlight level changed */
+					pr_debug("Keyboard backlight level "
+						 "changed\n");
+					break;
+				default:
+					/* Unknown event */
+					pr_info("Unknown WMI event type 0x11: "
+						"0x%x\n", (int)buffer_entry[i]);
+					break;
+				}
+			}
+			break;
+		default:
+			/* Unknown event */
+			pr_info("Unknown WMI event type 0x%x\n",
+				(int)buffer_entry[1]);
+			break;
 		}
+
+		buffer_entry += len;
+
 	}
+
 	kfree(obj);
 }
 
@@ -213,11 +308,16 @@ static const struct key_entry * __init dell_wmi_prepare_new_keymap(void)
 	for (i = 0; i < hotkey_num; i++) {
 		const struct dell_bios_keymap_entry *bios_entry =
 					&dell_bios_hotkey_table->keymap[i];
-		keymap[i].type = KE_KEY;
-		keymap[i].code = bios_entry->scancode;
-		keymap[i].keycode = bios_entry->keycode < 256 ?
+		u16 keycode = bios_entry->keycode < 256 ?
 				    bios_to_linux_keycode[bios_entry->keycode] :
 				    KEY_RESERVED;
+
+		if (keycode == KEY_KBDILLUMTOGGLE)
+			keymap[i].type = KE_IGNORE;
+		else
+			keymap[i].type = KE_KEY;
+		keymap[i].code = bios_entry->scancode;
+		keymap[i].keycode = keycode;
 	}
 
 	keymap[hotkey_num].type = KE_END;
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 5a54d35..844c209 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -417,8 +417,7 @@ static ssize_t cpufv_disabled_store(struct device *dev,
 	switch (value) {
 	case 0:
 		if (eeepc->cpufv_disabled)
-			pr_warn("cpufv enabled (not officially supported "
-				"on this model)\n");
+			pr_warn("cpufv enabled (not officially supported on this model)\n");
 		eeepc->cpufv_disabled = false;
 		return count;
 	case 1:
@@ -580,59 +579,58 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle)
 	mutex_lock(&eeepc->hotplug_lock);
 	pci_lock_rescan_remove();
 
-	if (eeepc->hotplug_slot) {
-		port = acpi_get_pci_dev(handle);
-		if (!port) {
-			pr_warning("Unable to find port\n");
-			goto out_unlock;
-		}
+	if (!eeepc->hotplug_slot)
+		goto out_unlock;
 
-		bus = port->subordinate;
+	port = acpi_get_pci_dev(handle);
+	if (!port) {
+		pr_warning("Unable to find port\n");
+		goto out_unlock;
+	}
 
-		if (!bus) {
-			pr_warn("Unable to find PCI bus 1?\n");
-			goto out_put_dev;
-		}
+	bus = port->subordinate;
 
-		if (pci_bus_read_config_dword(bus, 0, PCI_VENDOR_ID, &l)) {
-			pr_err("Unable to read PCI config space?\n");
-			goto out_put_dev;
-		}
+	if (!bus) {
+		pr_warn("Unable to find PCI bus 1?\n");
+		goto out_put_dev;
+	}
+
+	if (pci_bus_read_config_dword(bus, 0, PCI_VENDOR_ID, &l)) {
+		pr_err("Unable to read PCI config space?\n");
+		goto out_put_dev;
+	}
 
-		absent = (l == 0xffffffff);
+	absent = (l == 0xffffffff);
 
-		if (blocked != absent) {
-			pr_warn("BIOS says wireless lan is %s, "
-				"but the pci device is %s\n",
-				blocked ? "blocked" : "unblocked",
-				absent ? "absent" : "present");
-			pr_warn("skipped wireless hotplug as probably "
-				"inappropriate for this model\n");
+	if (blocked != absent) {
+		pr_warn("BIOS says wireless lan is %s, but the pci device is %s\n",
+			blocked ? "blocked" : "unblocked",
+			absent ? "absent" : "present");
+		pr_warn("skipped wireless hotplug as probably inappropriate for this model\n");
+		goto out_put_dev;
+	}
+
+	if (!blocked) {
+		dev = pci_get_slot(bus, 0);
+		if (dev) {
+			/* Device already present */
+			pci_dev_put(dev);
 			goto out_put_dev;
 		}
-
-		if (!blocked) {
-			dev = pci_get_slot(bus, 0);
-			if (dev) {
-				/* Device already present */
-				pci_dev_put(dev);
-				goto out_put_dev;
-			}
-			dev = pci_scan_single_device(bus, 0);
-			if (dev) {
-				pci_bus_assign_resources(bus);
-				pci_bus_add_device(dev);
-			}
-		} else {
-			dev = pci_get_slot(bus, 0);
-			if (dev) {
-				pci_stop_and_remove_bus_device(dev);
-				pci_dev_put(dev);
-			}
+		dev = pci_scan_single_device(bus, 0);
+		if (dev) {
+			pci_bus_assign_resources(bus);
+			pci_bus_add_device(dev);
+		}
+	} else {
+		dev = pci_get_slot(bus, 0);
+		if (dev) {
+			pci_stop_and_remove_bus_device(dev);
+			pci_dev_put(dev);
 		}
-out_put_dev:
-		pci_dev_put(port);
 	}
+out_put_dev:
+	pci_dev_put(port);
 
 out_unlock:
 	pci_unlock_rescan_remove();
@@ -821,11 +819,15 @@ static int eeepc_new_rfkill(struct eeepc_laptop *eeepc,
 	return 0;
 }
 
+static char EEEPC_RFKILL_NODE_1[] = "\\_SB.PCI0.P0P5";
+static char EEEPC_RFKILL_NODE_2[] = "\\_SB.PCI0.P0P6";
+static char EEEPC_RFKILL_NODE_3[] = "\\_SB.PCI0.P0P7";
+
 static void eeepc_rfkill_exit(struct eeepc_laptop *eeepc)
 {
-	eeepc_unregister_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P5");
-	eeepc_unregister_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P6");
-	eeepc_unregister_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P7");
+	eeepc_unregister_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_1);
+	eeepc_unregister_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_2);
+	eeepc_unregister_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_3);
 	if (eeepc->wlan_rfkill) {
 		rfkill_unregister(eeepc->wlan_rfkill);
 		rfkill_destroy(eeepc->wlan_rfkill);
@@ -897,9 +899,9 @@ static int eeepc_rfkill_init(struct eeepc_laptop *eeepc)
 	if (result == -EBUSY)
 		result = 0;
 
-	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P5");
-	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P6");
-	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P7");
+	eeepc_register_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_1);
+	eeepc_register_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_2);
+	eeepc_register_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_3);
 
 exit:
 	if (result && result != -ENODEV)
@@ -915,7 +917,7 @@ static int eeepc_hotk_thaw(struct device *device)
 	struct eeepc_laptop *eeepc = dev_get_drvdata(device);
 
 	if (eeepc->wlan_rfkill) {
-		bool wlan;
+		int wlan;
 
 		/*
 		 * Work around bios bug - acpi _PTS turns off the wireless led
@@ -923,7 +925,8 @@ static int eeepc_hotk_thaw(struct device *device)
 		 * we should kick it ourselves in case hibernation is aborted.
 		 */
 		wlan = get_acpi(eeepc, CM_ASL_WLAN);
-		set_acpi(eeepc, CM_ASL_WLAN, wlan);
+		if (wlan >= 0)
+			set_acpi(eeepc, CM_ASL_WLAN, wlan);
 	}
 
 	return 0;
@@ -935,9 +938,9 @@ static int eeepc_hotk_restore(struct device *device)
 
 	/* Refresh both wlan rfkill state and pci hotplug */
 	if (eeepc->wlan_rfkill) {
-		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P5");
-		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P6");
-		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P7");
+		eeepc_rfkill_hotplug_update(eeepc, EEEPC_RFKILL_NODE_1);
+		eeepc_rfkill_hotplug_update(eeepc, EEEPC_RFKILL_NODE_2);
+		eeepc_rfkill_hotplug_update(eeepc, EEEPC_RFKILL_NODE_3);
 	}
 
 	if (eeepc->bluetooth_rfkill)
@@ -977,18 +980,28 @@ static struct platform_driver platform_driver = {
 #define EEEPC_EC_SFB0      0xD0
 #define EEEPC_EC_FAN_CTRL  (EEEPC_EC_SFB0 + 3) /* Byte containing SF25  */
 
+static inline int eeepc_pwm_to_lmsensors(int value)
+{
+	return value * 255 / 100;
+}
+
+static inline int eeepc_lmsensors_to_pwm(int value)
+{
+	value = clamp_val(value, 0, 255);
+	return value * 100 / 255;
+}
+
 static int eeepc_get_fan_pwm(void)
 {
 	u8 value = 0;
 
 	ec_read(EEEPC_EC_FAN_PWM, &value);
-	return value * 255 / 100;
+	return eeepc_pwm_to_lmsensors(value);
 }
 
 static void eeepc_set_fan_pwm(int value)
 {
-	value = clamp_val(value, 0, 255);
-	value = value * 100 / 255;
+	value = eeepc_lmsensors_to_pwm(value);
 	ec_write(EEEPC_EC_FAN_PWM, value);
 }
 
@@ -1002,15 +1015,19 @@ static int eeepc_get_fan_rpm(void)
 	return high << 8 | low;
 }
 
+#define EEEPC_EC_FAN_CTRL_BIT	0x02
+#define EEEPC_FAN_CTRL_MANUAL	1
+#define EEEPC_FAN_CTRL_AUTO	2
+
 static int eeepc_get_fan_ctrl(void)
 {
 	u8 value = 0;
 
 	ec_read(EEEPC_EC_FAN_CTRL, &value);
-	if (value & 0x02)
-		return 1; /* manual */
+	if (value & EEEPC_EC_FAN_CTRL_BIT)
+		return EEEPC_FAN_CTRL_MANUAL;
 	else
-		return 2; /* automatic */
+		return EEEPC_FAN_CTRL_AUTO;
 }
 
 static void eeepc_set_fan_ctrl(int manual)
@@ -1018,10 +1035,10 @@ static void eeepc_set_fan_ctrl(int manual)
 	u8 value = 0;
 
 	ec_read(EEEPC_EC_FAN_CTRL, &value);
-	if (manual == 1)
-		value |= 0x02;
+	if (manual == EEEPC_FAN_CTRL_MANUAL)
+		value |= EEEPC_EC_FAN_CTRL_BIT;
 	else
-		value &= ~0x02;
+		value &= ~EEEPC_EC_FAN_CTRL_BIT;
 	ec_write(EEEPC_EC_FAN_CTRL, value);
 }
 
@@ -1156,8 +1173,7 @@ static int eeepc_backlight_init(struct eeepc_laptop *eeepc)
 
 static void eeepc_backlight_exit(struct eeepc_laptop *eeepc)
 {
-	if (eeepc->backlight_device)
-		backlight_device_unregister(eeepc->backlight_device);
+	backlight_device_unregister(eeepc->backlight_device);
 	eeepc->backlight_device = NULL;
 }
 
@@ -1216,7 +1232,7 @@ static void eeepc_input_exit(struct eeepc_laptop *eeepc)
 static void eeepc_input_notify(struct eeepc_laptop *eeepc, int event)
 {
 	if (!eeepc->inputdev)
-		return ;
+		return;
 	if (!sparse_keymap_report_event(eeepc->inputdev, event, 1, true))
 		pr_info("Unknown key %x pressed\n", event);
 }
@@ -1224,6 +1240,7 @@ static void eeepc_input_notify(struct eeepc_laptop *eeepc, int event)
 static void eeepc_acpi_notify(struct acpi_device *device, u32 event)
 {
 	struct eeepc_laptop *eeepc = acpi_driver_data(device);
+	int old_brightness, new_brightness;
 	u16 count;
 
 	if (event > ACPI_MAX_SYS_NOTIFY)
@@ -1234,34 +1251,32 @@ static void eeepc_acpi_notify(struct acpi_device *device, u32 event)
 					count);
 
 	/* Brightness events are special */
-	if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX) {
-
-		/* Ignore them completely if the acpi video driver is used */
-		if (eeepc->backlight_device != NULL) {
-			int old_brightness, new_brightness;
-
-			/* Update the backlight device. */
-			old_brightness = eeepc_backlight_notify(eeepc);
-
-			/* Convert event to keypress (obsolescent hack) */
-			new_brightness = event - NOTIFY_BRN_MIN;
-
-			if (new_brightness < old_brightness) {
-				event = NOTIFY_BRN_MIN; /* brightness down */
-			} else if (new_brightness > old_brightness) {
-				event = NOTIFY_BRN_MAX; /* brightness up */
-			} else {
-				/*
-				* no change in brightness - already at min/max,
-				* event will be desired value (or else ignored)
-				*/
-			}
-			eeepc_input_notify(eeepc, event);
-		}
-	} else {
-		/* Everything else is a bona-fide keypress event */
+	if (event < NOTIFY_BRN_MIN || event > NOTIFY_BRN_MAX) {
 		eeepc_input_notify(eeepc, event);
+		return;
+	}
+
+	/* Ignore them completely if the acpi video driver is used */
+	if (!eeepc->backlight_device)
+		return;
+
+	/* Update the backlight device. */
+	old_brightness = eeepc_backlight_notify(eeepc);
+
+	/* Convert event to keypress (obsolescent hack) */
+	new_brightness = event - NOTIFY_BRN_MIN;
+
+	if (new_brightness < old_brightness) {
+		event = NOTIFY_BRN_MIN; /* brightness down */
+	} else if (new_brightness > old_brightness) {
+		event = NOTIFY_BRN_MAX; /* brightness up */
+	} else {
+		/*
+		 * no change in brightness - already at min/max,
+		 * event will be desired value (or else ignored)
+		 */
 	}
+	eeepc_input_notify(eeepc, event);
 }
 
 static void eeepc_dmi_check(struct eeepc_laptop *eeepc)
@@ -1293,8 +1308,8 @@ static void eeepc_dmi_check(struct eeepc_laptop *eeepc)
 	 */
 	if (strcmp(model, "701") == 0 || strcmp(model, "702") == 0) {
 		eeepc->cpufv_disabled = true;
-		pr_info("model %s does not officially support setting cpu "
-			"speed\n", model);
+		pr_info("model %s does not officially support setting cpu speed\n",
+			model);
 		pr_info("cpufv disabled to avoid instability\n");
 	}
 
@@ -1320,8 +1335,8 @@ static void cmsg_quirk(struct eeepc_laptop *eeepc, int cm, const char *name)
 	   Check if cm_getv[cm] works and, if yes, assume cm should be set. */
 	if (!(eeepc->cm_supported & (1 << cm))
 	    && !read_acpi_int(eeepc->handle, cm_getv[cm], &dummy)) {
-		pr_info("%s (%x) not reported by BIOS,"
-			" enabling anyway\n", name, 1 << cm);
+		pr_info("%s (%x) not reported by BIOS, enabling anyway\n",
+			name, 1 << cm);
 		eeepc->cm_supported |= 1 << cm;
 	}
 }
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index be55bd7..7c21c1c 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1153,8 +1153,7 @@ fail_hotkey1:
 fail_hotkey:
 	platform_driver_unregister(&fujitsupf_driver);
 fail_backlight:
-	if (fujitsu->bl_device)
-		backlight_device_unregister(fujitsu->bl_device);
+	backlight_device_unregister(fujitsu->bl_device);
 fail_sysfs_group:
 	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
 			   &fujitsupf_attribute_group);
@@ -1178,8 +1177,7 @@ static void __exit fujitsu_cleanup(void)
 
 	platform_driver_unregister(&fujitsupf_driver);
 
-	if (fujitsu->bl_device)
-		backlight_device_unregister(fujitsu->bl_device);
+	backlight_device_unregister(fujitsu->bl_device);
 
 	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
 			   &fujitsupf_attribute_group);
diff --git a/drivers/platform/x86/hp-wireless.c b/drivers/platform/x86/hp-wireless.c
index 415348f..4e4cc8b 100644
--- a/drivers/platform/x86/hp-wireless.c
+++ b/drivers/platform/x86/hp-wireless.c
@@ -85,6 +85,9 @@ static int hpwl_add(struct acpi_device *device)
 	int err;
 
 	err = hp_wireless_input_setup();
+	if (err)
+		pr_err("Failed to setup hp wireless hotkeys\n");
+
 	return err;
 }
 
diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c
index 6bec745..10ce6cb 100644
--- a/drivers/platform/x86/hp_accel.c
+++ b/drivers/platform/x86/hp_accel.c
@@ -246,6 +246,7 @@ static const struct dmi_system_id lis3lv02d_dmi_ids[] = {
 	AXIS_DMI_MATCH("HPB64xx", "HP ProBook 64", xy_swap),
 	AXIS_DMI_MATCH("HPB64xx", "HP EliteBook 84", xy_swap),
 	AXIS_DMI_MATCH("HPB65xx", "HP ProBook 65", x_inverted),
+	AXIS_DMI_MATCH("HPZBook15", "HP ZBook 15", x_inverted),
 	{ NULL, }
 /* Laptop models without axis info (yet):
  * "NC6910" "HP Compaq 6910"
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index c860eac..b3d419a 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -729,8 +729,7 @@ static int ideapad_backlight_init(struct ideapad_private *priv)
 
 static void ideapad_backlight_exit(struct ideapad_private *priv)
 {
-	if (priv->blightdev)
-		backlight_device_unregister(priv->blightdev);
+	backlight_device_unregister(priv->blightdev);
 	priv->blightdev = NULL;
 }
 
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index ecd36e3..e2065e0 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -33,7 +33,7 @@
  * performance by allocating more power or thermal budget to the CPU or GPU
  * based on available headroom and activity.
  *
- * The basic algorithm is driven by a 5s moving average of tempurature.  If
+ * The basic algorithm is driven by a 5s moving average of temperature.  If
  * thermal headroom is available, the CPU and/or GPU power clamps may be
  * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
  * we scale back the clamp.  Aside from trigger events (when we're critically
diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c
index 0afaaef..a4a4258 100644
--- a/drivers/platform/x86/intel_oaktrail.c
+++ b/drivers/platform/x86/intel_oaktrail.c
@@ -271,8 +271,7 @@ static int oaktrail_backlight_init(void)
 
 static void oaktrail_backlight_exit(void)
 {
-	if (oaktrail_bl_device)
-		backlight_device_unregister(oaktrail_bl_device);
+	backlight_device_unregister(oaktrail_bl_device);
 }
 
 static int oaktrail_probe(struct platform_device *pdev)
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index a3f06cb..0859877 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -820,7 +820,7 @@ static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str,
 {
 	static bool extended;
 
-	if (str & 0x20)
+	if (str & I8042_STR_AUXDATA)
 		return false;
 
 	/* 0x54 wwan, 0x62 bluetooth, 0x76 wlan, 0xE4 touchpad toggle*/
diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index 70222f2..6d2bac0 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c
@@ -354,8 +354,7 @@ static void __exit msi_wmi_exit(void)
 		sparse_keymap_free(msi_wmi_input_dev);
 		input_unregister_device(msi_wmi_input_dev);
 	}
-	if (backlight)
-		backlight_device_unregister(backlight);
+	backlight_device_unregister(backlight);
 }
 
 module_init(msi_wmi_init);
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index a1a0fd7..6dd1c0e 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -3140,8 +3140,7 @@ static void sony_nc_backlight_setup(void)
 
 static void sony_nc_backlight_cleanup(void)
 {
-	if (sony_bl_props.dev)
-		backlight_device_unregister(sony_bl_props.dev);
+	backlight_device_unregister(sony_bl_props.dev);
 }
 
 static int sony_nc_add(struct acpi_device *device)
@@ -3716,8 +3715,7 @@ static void sony_pic_detect_device_type(struct sony_pic_dev *dev)
 	dev->event_types = type2_events;
 
 out:
-	if (pcidev)
-		pci_dev_put(pcidev);
+	pci_dev_put(pcidev);
 
 	pr_info("detected Type%d model\n",
 		dev->model == SONYPI_DEVICE_TYPE1 ? 1 :
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 6414cfe..c3d11fa 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -6557,6 +6557,17 @@ static struct ibm_struct brightness_driver_data = {
  * bits 3-0 (volume).  Other bits in NVRAM may have other functions,
  * such as bit 7 which is used to detect repeated presses of MUTE,
  * and we leave them unchanged.
+ *
+ * On newer Lenovo ThinkPads, the EC can automatically change the volume
+ * in response to user input.  Unfortunately, this rarely works well.
+ * The laptop changes the state of its internal MUTE gate and, on some
+ * models, sends KEY_MUTE, causing any user code that responds to the
+ * mute button to get confused.  The hardware MUTE gate is also
+ * unnecessary, since user code can handle the mute button without
+ * kernel or EC help.
+ *
+ * To avoid confusing userspace, we simply disable all EC-based mute
+ * and volume controls when possible.
  */
 
 #ifdef CONFIG_THINKPAD_ACPI_ALSA_SUPPORT
@@ -6611,11 +6622,21 @@ enum tpacpi_volume_capabilities {
 	TPACPI_VOL_CAP_MAX
 };
 
+enum tpacpi_mute_btn_mode {
+	TP_EC_MUTE_BTN_LATCH  = 0,	/* Mute mutes; up/down unmutes */
+	/* We don't know what mode 1 is. */
+	TP_EC_MUTE_BTN_NONE   = 2,	/* Mute and up/down are just keys */
+	TP_EC_MUTE_BTN_TOGGLE = 3,	/* Mute toggles; up/down unmutes */
+};
+
 static enum tpacpi_volume_access_mode volume_mode =
 	TPACPI_VOL_MODE_MAX;
 
 static enum tpacpi_volume_capabilities volume_capabilities;
 static bool volume_control_allowed;
+static bool software_mute_requested = true;
+static bool software_mute_active;
+static int software_mute_orig_mode;
 
 /*
  * Used to syncronize writers to TP_EC_AUDIO and
@@ -6633,6 +6654,8 @@ static void tpacpi_volume_checkpoint_nvram(void)
 		return;
 	if (!volume_control_allowed)
 		return;
+	if (software_mute_active)
+		return;
 
 	vdbg_printk(TPACPI_DBG_MIXER,
 		"trying to checkpoint mixer state to NVRAM...\n");
@@ -6694,6 +6717,12 @@ static int volume_set_status_ec(const u8 status)
 
 	dbg_printk(TPACPI_DBG_MIXER, "set EC mixer to 0x%02x\n", status);
 
+	/*
+	 * On X200s, and possibly on others, it can take a while for
+	 * reads to become correct.
+	 */
+	msleep(1);
+
 	return 0;
 }
 
@@ -6776,6 +6805,57 @@ unlock:
 	return rc;
 }
 
+static int volume_set_software_mute(bool startup)
+{
+	int result;
+
+	if (!tpacpi_is_lenovo())
+		return -ENODEV;
+
+	if (startup) {
+		if (!acpi_evalf(ec_handle, &software_mute_orig_mode,
+				"HAUM", "qd"))
+			return -EIO;
+
+		dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_MIXER,
+			    "Initial HAUM setting was %d\n",
+			    software_mute_orig_mode);
+	}
+
+	if (!acpi_evalf(ec_handle, &result, "SAUM", "qdd",
+			(int)TP_EC_MUTE_BTN_NONE))
+		return -EIO;
+
+	if (result != TP_EC_MUTE_BTN_NONE)
+		pr_warn("Unexpected SAUM result %d\n",
+			result);
+
+	/*
+	 * In software mute mode, the standard codec controls take
+	 * precendence, so we unmute the ThinkPad HW switch at
+	 * startup.  Just on case there are SAUM-capable ThinkPads
+	 * with level controls, set max HW volume as well.
+	 */
+	if (tp_features.mixer_no_level_control)
+		result = volume_set_mute(false);
+	else
+		result = volume_set_status(TP_EC_VOLUME_MAX);
+
+	if (result != 0)
+		pr_warn("Failed to unmute the HW mute switch\n");
+
+	return 0;
+}
+
+static void volume_exit_software_mute(void)
+{
+	int r;
+
+	if (!acpi_evalf(ec_handle, &r, "SAUM", "qdd", software_mute_orig_mode)
+	    || r != software_mute_orig_mode)
+		pr_warn("Failed to restore mute mode\n");
+}
+
 static int volume_alsa_set_volume(const u8 vol)
 {
 	dbg_printk(TPACPI_DBG_MIXER,
@@ -6883,7 +6963,12 @@ static void volume_suspend(void)
 
 static void volume_resume(void)
 {
-	volume_alsa_notify_change();
+	if (software_mute_active) {
+		if (volume_set_software_mute(false) < 0)
+			pr_warn("Failed to restore software mute\n");
+	} else {
+		volume_alsa_notify_change();
+	}
 }
 
 static void volume_shutdown(void)
@@ -6899,6 +6984,9 @@ static void volume_exit(void)
 	}
 
 	tpacpi_volume_checkpoint_nvram();
+
+	if (software_mute_active)
+		volume_exit_software_mute();
 }
 
 static int __init volume_create_alsa_mixer(void)
@@ -7083,16 +7171,20 @@ static int __init volume_init(struct ibm_init_struct *iibm)
 			"mute is supported, volume control is %s\n",
 			str_supported(!tp_features.mixer_no_level_control));
 
-	rc = volume_create_alsa_mixer();
-	if (rc) {
-		pr_err("Could not create the ALSA mixer interface\n");
-		return rc;
-	}
+	if (software_mute_requested && volume_set_software_mute(true) == 0) {
+		software_mute_active = true;
+	} else {
+		rc = volume_create_alsa_mixer();
+		if (rc) {
+			pr_err("Could not create the ALSA mixer interface\n");
+			return rc;
+		}
 
-	pr_info("Console audio control enabled, mode: %s\n",
-		(volume_control_allowed) ?
-			"override (read/write)" :
-			"monitor (read only)");
+		pr_info("Console audio control enabled, mode: %s\n",
+			(volume_control_allowed) ?
+				"override (read/write)" :
+				"monitor (read only)");
+	}
 
 	vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_MIXER,
 		"registering volume hotkeys as change notification\n");
@@ -9089,6 +9181,10 @@ MODULE_PARM_DESC(volume_control,
 		 "Enables software override for the console audio "
 		 "control when true");
 
+module_param_named(software_mute, software_mute_requested, bool, 0444);
+MODULE_PARM_DESC(software_mute,
+		 "Request full software mute control");
+
 /* ALSA module API parameters */
 module_param_named(index, alsa_index, int, 0444);
 MODULE_PARM_DESC(index, "ALSA index for the ACPI EC Mixer");
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index ab6151f..fc34a71 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -186,6 +186,7 @@ static struct toshiba_acpi_dev *toshiba_acpi;
 
 static const struct acpi_device_id toshiba_device_ids[] = {
 	{"TOS6200", 0},
+	{"TOS6207", 0},
 	{"TOS6208", 0},
 	{"TOS1900", 0},
 	{"", 0},
@@ -928,9 +929,7 @@ static int lcd_proc_open(struct inode *inode, struct file *file)
 
 static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value)
 {
-	u32 in[TCI_WORDS] = { HCI_SET, HCI_LCD_BRIGHTNESS, 0, 0, 0, 0 };
-	u32 out[TCI_WORDS];
-	acpi_status status;
+	u32 hci_result;
 
 	if (dev->tr_backlight_supported) {
 		bool enable = !value;
@@ -941,20 +940,9 @@ static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value)
 			value--;
 	}
 
-	in[2] = value << HCI_LCD_BRIGHTNESS_SHIFT;
-	status = tci_raw(dev, in, out);
-	if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
-		pr_err("ACPI call to set brightness failed");
-		return -EIO;
-	}
-	/* Extra check for "incomplete" backlight method, where the AML code
-	 * doesn't check for HCI_SET or HCI_GET and returns TOS_SUCCESS,
-	 * the actual brightness, and in some cases the max brightness.
-	 */
-	if (out[2] > 0  || out[3] == 0xE000)
-		return -ENODEV;
-
-	return out[0] == TOS_SUCCESS ? 0 : -EIO;
+	value = value << HCI_LCD_BRIGHTNESS_SHIFT;
+	hci_result = hci_write1(dev, HCI_LCD_BRIGHTNESS, value);
+	return hci_result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int set_lcd_status(struct backlight_device *bd)
@@ -1406,12 +1394,6 @@ static ssize_t toshiba_kbd_bl_mode_store(struct device *dev,
 		if (ret)
 			return ret;
 
-		/* Update sysfs entries on successful mode change*/
-		ret = sysfs_update_group(&toshiba->acpi_dev->dev.kobj,
-					 &toshiba_attr_group);
-		if (ret)
-			return ret;
-
 		toshiba->kbd_mode = mode;
 	}
 
@@ -1586,10 +1568,32 @@ static umode_t toshiba_sysfs_is_visible(struct kobject *kobj,
 	return exists ? attr->mode : 0;
 }
 
+/*
+ * Hotkeys
+ */
+static int toshiba_acpi_enable_hotkeys(struct toshiba_acpi_dev *dev)
+{
+	acpi_status status;
+	u32 result;
+
+	status = acpi_evaluate_object(dev->acpi_dev->handle,
+				      "ENAB", NULL, NULL);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	result = hci_write1(dev, HCI_HOTKEY_EVENT, HCI_HOTKEY_ENABLE);
+	if (result == TOS_FAILURE)
+		return -EIO;
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+
+	return 0;
+}
+
 static bool toshiba_acpi_i8042_filter(unsigned char data, unsigned char str,
 				      struct serio *port)
 {
-	if (str & 0x20)
+	if (str & I8042_STR_AUXDATA)
 		return false;
 
 	if (unlikely(data == 0xe0))
@@ -1648,9 +1652,45 @@ static void toshiba_acpi_report_hotkey(struct toshiba_acpi_dev *dev,
 		pr_info("Unknown key %x\n", scancode);
 }
 
+static void toshiba_acpi_process_hotkeys(struct toshiba_acpi_dev *dev)
+{
+	u32 hci_result, value;
+	int retries = 3;
+	int scancode;
+
+	if (dev->info_supported) {
+		scancode = toshiba_acpi_query_hotkey(dev);
+		if (scancode < 0)
+			pr_err("Failed to query hotkey event\n");
+		else if (scancode != 0)
+			toshiba_acpi_report_hotkey(dev, scancode);
+	} else if (dev->system_event_supported) {
+		do {
+			hci_result = hci_read1(dev, HCI_SYSTEM_EVENT, &value);
+			switch (hci_result) {
+			case TOS_SUCCESS:
+				toshiba_acpi_report_hotkey(dev, (int)value);
+				break;
+			case TOS_NOT_SUPPORTED:
+				/*
+				 * This is a workaround for an unresolved
+				 * issue on some machines where system events
+				 * sporadically become disabled.
+				 */
+				hci_result =
+					hci_write1(dev, HCI_SYSTEM_EVENT, 1);
+				pr_notice("Re-enabled hotkeys\n");
+				/* fall through */
+			default:
+				retries--;
+				break;
+			}
+		} while (retries && hci_result != TOS_FIFO_EMPTY);
+	}
+}
+
 static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 {
-	acpi_status status;
 	acpi_handle ec_handle;
 	int error;
 	u32 hci_result;
@@ -1677,7 +1717,6 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 	 * supported, so if it's present set up an i8042 key filter
 	 * for this purpose.
 	 */
-	status = AE_ERROR;
 	ec_handle = ec_get_handle();
 	if (ec_handle && acpi_has_method(ec_handle, "NTFY")) {
 		INIT_WORK(&dev->hotkey_work, toshiba_acpi_hotkey_work);
@@ -1708,10 +1747,9 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 		goto err_remove_filter;
 	}
 
-	status = acpi_evaluate_object(dev->acpi_dev->handle, "ENAB", NULL, NULL);
-	if (ACPI_FAILURE(status)) {
+	error = toshiba_acpi_enable_hotkeys(dev);
+	if (error) {
 		pr_info("Unable to enable hotkeys\n");
-		error = -ENODEV;
 		goto err_remove_filter;
 	}
 
@@ -1721,7 +1759,6 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 		goto err_remove_filter;
 	}
 
-	hci_result = hci_write1(dev, HCI_HOTKEY_EVENT, HCI_HOTKEY_ENABLE);
 	return 0;
 
  err_remove_filter:
@@ -1810,8 +1847,7 @@ static int toshiba_acpi_remove(struct acpi_device *acpi_dev)
 		rfkill_destroy(dev->bt_rfk);
 	}
 
-	if (dev->backlight_dev)
-		backlight_device_unregister(dev->backlight_dev);
+	backlight_device_unregister(dev->backlight_dev);
 
 	if (dev->illumination_supported)
 		led_classdev_unregister(&dev->led_dev);
@@ -1967,41 +2003,29 @@ error:
 static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
 {
 	struct toshiba_acpi_dev *dev = acpi_driver_data(acpi_dev);
-	u32 hci_result, value;
-	int retries = 3;
-	int scancode;
-
-	if (event != 0x80)
-		return;
+	int ret;
 
-	if (dev->info_supported) {
-		scancode = toshiba_acpi_query_hotkey(dev);
-		if (scancode < 0)
-			pr_err("Failed to query hotkey event\n");
-		else if (scancode != 0)
-			toshiba_acpi_report_hotkey(dev, scancode);
-	} else if (dev->system_event_supported) {
-		do {
-			hci_result = hci_read1(dev, HCI_SYSTEM_EVENT, &value);
-			switch (hci_result) {
-			case TOS_SUCCESS:
-				toshiba_acpi_report_hotkey(dev, (int)value);
-				break;
-			case TOS_NOT_SUPPORTED:
-				/*
-				 * This is a workaround for an unresolved
-				 * issue on some machines where system events
-				 * sporadically become disabled.
-				 */
-				hci_result =
-					hci_write1(dev, HCI_SYSTEM_EVENT, 1);
-				pr_notice("Re-enabled hotkeys\n");
-				/* fall through */
-			default:
-				retries--;
-				break;
-			}
-		} while (retries && hci_result != TOS_FIFO_EMPTY);
+	switch (event) {
+	case 0x80: /* Hotkeys and some system events */
+		toshiba_acpi_process_hotkeys(dev);
+		break;
+	case 0x92: /* Keyboard backlight mode changed */
+		/* Update sysfs entries */
+		ret = sysfs_update_group(&acpi_dev->dev.kobj,
+					 &toshiba_attr_group);
+		if (ret)
+			pr_err("Unable to update sysfs entries\n");
+		break;
+	case 0x81: /* Unknown */
+	case 0x82: /* Unknown */
+	case 0x83: /* Unknown */
+	case 0x8c: /* Unknown */
+	case 0x8e: /* Unknown */
+	case 0x8f: /* Unknown */
+	case 0x90: /* Unknown */
+	default:
+		pr_info("Unknown event received %x\n", event);
+		break;
 	}
 }
 
@@ -2020,16 +2044,12 @@ static int toshiba_acpi_suspend(struct device *device)
 static int toshiba_acpi_resume(struct device *device)
 {
 	struct toshiba_acpi_dev *dev = acpi_driver_data(to_acpi_device(device));
-	u32 result;
-	acpi_status status;
+	int error;
 
 	if (dev->hotkey_dev) {
-		status = acpi_evaluate_object(dev->acpi_dev->handle, "ENAB",
-				NULL, NULL);
-		if (ACPI_FAILURE(status))
+		error = toshiba_acpi_enable_hotkeys(dev);
+		if (error)
 			pr_info("Unable to re-enable hotkeys\n");
-
-		result = hci_write1(dev, HCI_HOTKEY_EVENT, HCI_HOTKEY_ENABLE);
 	}
 
 	return 0;
diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index 62c15af..7773249 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c
@@ -951,8 +951,6 @@ static int pm2xxx_wall_charger_suspend(struct device *dev)
 
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
-
 static int  pm2xxx_runtime_suspend(struct device *dev)
 {
 	struct i2c_client *pm2xxx_i2c_client = to_i2c_client(dev);
@@ -977,8 +975,6 @@ static int  pm2xxx_runtime_resume(struct device *dev)
 	return 0;
 }
 
-#endif
-
 static const struct dev_pm_ops pm2xxx_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pm2xxx_wall_charger_suspend,
 		pm2xxx_wall_charger_resume)
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index ef2dd2e..a3ecf58 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -50,6 +50,17 @@ config PWM_ATMEL
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-atmel.
 
+config PWM_ATMEL_HLCDC_PWM
+	tristate "Atmel HLCDC PWM support"
+	depends on MFD_ATMEL_HLCDC
+	help
+	  Generic PWM framework driver for the PWM output of the HLCDC
+	  (Atmel High-end LCD Controller). This PWM output is mainly used
+	  to control the LCD backlight.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-atmel-hlcdc.
+
 config PWM_ATMEL_TCB
 	tristate "Atmel TC Block PWM support"
 	depends on ATMEL_TCLIB && OF
@@ -71,6 +82,15 @@ config PWM_BCM_KONA
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-bcm-kona.
 
+config PWM_BCM2835
+	tristate "BCM2835 PWM support"
+	depends on ARCH_BCM2835
+	help
+	  PWM framework driver for BCM2835 controller (Raspberry Pi)
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called pwm-bcm2835.
+
 config PWM_BFIN
 	tristate "Blackfin PWM support"
 	depends on BFIN_GPTIMERS
@@ -235,7 +255,7 @@ config PWM_ROCKCHIP
 
 config PWM_SAMSUNG
 	tristate "Samsung PWM support"
-	depends on PLAT_SAMSUNG
+	depends on PLAT_SAMSUNG || ARCH_EXYNOS
 	help
 	  Generic PWM framework driver for Samsung.
 
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index c458606..65259ac 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -2,8 +2,10 @@ obj-$(CONFIG_PWM)		+= core.o
 obj-$(CONFIG_PWM_SYSFS)		+= sysfs.o
 obj-$(CONFIG_PWM_AB8500)	+= pwm-ab8500.o
 obj-$(CONFIG_PWM_ATMEL)		+= pwm-atmel.o
+obj-$(CONFIG_PWM_ATMEL_HLCDC_PWM)	+= pwm-atmel-hlcdc.o
 obj-$(CONFIG_PWM_ATMEL_TCB)	+= pwm-atmel-tcb.o
 obj-$(CONFIG_PWM_BCM_KONA)	+= pwm-bcm-kona.o
+obj-$(CONFIG_PWM_BCM2835)	+= pwm-bcm2835.o
 obj-$(CONFIG_PWM_BFIN)		+= pwm-bfin.o
 obj-$(CONFIG_PWM_CLPS711X)	+= pwm-clps711x.o
 obj-$(CONFIG_PWM_EP93XX)	+= pwm-ep93xx.o
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
new file mode 100644
index 0000000..e7a785f
--- /dev/null
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2014 Free Electrons
+ * Copyright (C) 2014 Atmel
+ *
+ * Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/mfd/atmel-hlcdc.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/regmap.h>
+
+#define ATMEL_HLCDC_PWMCVAL_MASK	GENMASK(15, 8)
+#define ATMEL_HLCDC_PWMCVAL(x)		(((x) << 8) & ATMEL_HLCDC_PWMCVAL_MASK)
+#define ATMEL_HLCDC_PWMPOL		BIT(4)
+#define ATMEL_HLCDC_PWMPS_MASK		GENMASK(2, 0)
+#define ATMEL_HLCDC_PWMPS_MAX		0x6
+#define ATMEL_HLCDC_PWMPS(x)		((x) & ATMEL_HLCDC_PWMPS_MASK)
+
+struct atmel_hlcdc_pwm_errata {
+	bool slow_clk_erratum;
+	bool div1_clk_erratum;
+};
+
+struct atmel_hlcdc_pwm {
+	struct pwm_chip chip;
+	struct atmel_hlcdc *hlcdc;
+	struct clk *cur_clk;
+	const struct atmel_hlcdc_pwm_errata *errata;
+};
+
+static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip)
+{
+	return container_of(chip, struct atmel_hlcdc_pwm, chip);
+}
+
+static int atmel_hlcdc_pwm_config(struct pwm_chip *c,
+				  struct pwm_device *pwm,
+				  int duty_ns, int period_ns)
+{
+	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
+	struct atmel_hlcdc *hlcdc = chip->hlcdc;
+	struct clk *new_clk = hlcdc->slow_clk;
+	u64 pwmcval = duty_ns * 256;
+	unsigned long clk_freq;
+	u64 clk_period_ns;
+	u32 pwmcfg;
+	int pres;
+
+	if (!chip->errata || !chip->errata->slow_clk_erratum) {
+		clk_freq = clk_get_rate(new_clk);
+		clk_period_ns = (u64)NSEC_PER_SEC * 256;
+		do_div(clk_period_ns, clk_freq);
+	}
+
+	/* Errata: cannot use slow clk on some IP revisions */
+	if ((chip->errata && chip->errata->slow_clk_erratum) ||
+	    clk_period_ns > period_ns) {
+		new_clk = hlcdc->sys_clk;
+		clk_freq = clk_get_rate(new_clk);
+		clk_period_ns = (u64)NSEC_PER_SEC * 256;
+		do_div(clk_period_ns, clk_freq);
+	}
+
+	for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) {
+		/* Errata: cannot divide by 1 on some IP revisions */
+		if (!pres && chip->errata && chip->errata->div1_clk_erratum)
+			continue;
+
+		if ((clk_period_ns << pres) >= period_ns)
+			break;
+	}
+
+	if (pres > ATMEL_HLCDC_PWMPS_MAX)
+		return -EINVAL;
+
+	pwmcfg = ATMEL_HLCDC_PWMPS(pres);
+
+	if (new_clk != chip->cur_clk) {
+		u32 gencfg = 0;
+		int ret;
+
+		ret = clk_prepare_enable(new_clk);
+		if (ret)
+			return ret;
+
+		clk_disable_unprepare(chip->cur_clk);
+		chip->cur_clk = new_clk;
+
+		if (new_clk == hlcdc->sys_clk)
+			gencfg = ATMEL_HLCDC_CLKPWMSEL;
+
+		ret = regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(0),
+					 ATMEL_HLCDC_CLKPWMSEL, gencfg);
+		if (ret)
+			return ret;
+	}
+
+	do_div(pwmcval, period_ns);
+
+	/*
+	 * The PWM duty cycle is configurable from 0/256 to 255/256 of the
+	 * period cycle. Hence we can't set a duty cycle occupying the
+	 * whole period cycle if we're asked to.
+	 * Set it to 255 if pwmcval is greater than 256.
+	 */
+	if (pwmcval > 255)
+		pwmcval = 255;
+
+	pwmcfg |= ATMEL_HLCDC_PWMCVAL(pwmcval);
+
+	return regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6),
+				  ATMEL_HLCDC_PWMCVAL_MASK |
+				  ATMEL_HLCDC_PWMPS_MASK,
+				  pwmcfg);
+}
+
+static int atmel_hlcdc_pwm_set_polarity(struct pwm_chip *c,
+					struct pwm_device *pwm,
+					enum pwm_polarity polarity)
+{
+	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
+	struct atmel_hlcdc *hlcdc = chip->hlcdc;
+	u32 cfg = 0;
+
+	if (polarity == PWM_POLARITY_NORMAL)
+		cfg = ATMEL_HLCDC_PWMPOL;
+
+	return regmap_update_bits(hlcdc->regmap, ATMEL_HLCDC_CFG(6),
+				  ATMEL_HLCDC_PWMPOL, cfg);
+}
+
+static int atmel_hlcdc_pwm_enable(struct pwm_chip *c, struct pwm_device *pwm)
+{
+	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
+	struct atmel_hlcdc *hlcdc = chip->hlcdc;
+	u32 status;
+	int ret;
+
+	ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_EN, ATMEL_HLCDC_PWM);
+	if (ret)
+		return ret;
+
+	while (true) {
+		ret = regmap_read(hlcdc->regmap, ATMEL_HLCDC_SR, &status);
+		if (ret)
+			return ret;
+
+		if ((status & ATMEL_HLCDC_PWM) != 0)
+			break;
+
+		usleep_range(1, 10);
+	}
+
+	return 0;
+}
+
+static void atmel_hlcdc_pwm_disable(struct pwm_chip *c,
+				    struct pwm_device *pwm)
+{
+	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
+	struct atmel_hlcdc *hlcdc = chip->hlcdc;
+	u32 status;
+	int ret;
+
+	ret = regmap_write(hlcdc->regmap, ATMEL_HLCDC_DIS, ATMEL_HLCDC_PWM);
+	if (ret)
+		return;
+
+	while (true) {
+		ret = regmap_read(hlcdc->regmap, ATMEL_HLCDC_SR, &status);
+		if (ret)
+			return;
+
+		if ((status & ATMEL_HLCDC_PWM) == 0)
+			break;
+
+		usleep_range(1, 10);
+	}
+}
+
+static const struct pwm_ops atmel_hlcdc_pwm_ops = {
+	.config = atmel_hlcdc_pwm_config,
+	.set_polarity = atmel_hlcdc_pwm_set_polarity,
+	.enable = atmel_hlcdc_pwm_enable,
+	.disable = atmel_hlcdc_pwm_disable,
+	.owner = THIS_MODULE,
+};
+
+static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_at91sam9x5_errata = {
+	.slow_clk_erratum = true,
+};
+
+static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = {
+	.div1_clk_erratum = true,
+};
+
+static const struct of_device_id atmel_hlcdc_dt_ids[] = {
+	{
+		.compatible = "atmel,at91sam9x5-hlcdc",
+		.data = &atmel_hlcdc_pwm_at91sam9x5_errata,
+	},
+	{
+		.compatible = "atmel,sama5d3-hlcdc",
+		.data = &atmel_hlcdc_pwm_sama5d3_errata,
+	},
+	{ /* sentinel */ },
+};
+
+static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct device *dev = &pdev->dev;
+	struct atmel_hlcdc_pwm *chip;
+	struct atmel_hlcdc *hlcdc;
+	int ret;
+
+	hlcdc = dev_get_drvdata(dev->parent);
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	ret = clk_prepare_enable(hlcdc->periph_clk);
+	if (ret)
+		return ret;
+
+	match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node);
+	if (match)
+		chip->errata = match->data;
+
+	chip->hlcdc = hlcdc;
+	chip->chip.ops = &atmel_hlcdc_pwm_ops;
+	chip->chip.dev = dev;
+	chip->chip.base = -1;
+	chip->chip.npwm = 1;
+	chip->chip.of_xlate = of_pwm_xlate_with_flags;
+	chip->chip.of_pwm_n_cells = 3;
+	chip->chip.can_sleep = 1;
+
+	ret = pwmchip_add(&chip->chip);
+	if (ret) {
+		clk_disable_unprepare(hlcdc->periph_clk);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, chip);
+
+	return 0;
+}
+
+static int atmel_hlcdc_pwm_remove(struct platform_device *pdev)
+{
+	struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = pwmchip_remove(&chip->chip);
+	if (ret)
+		return ret;
+
+	clk_disable_unprepare(chip->hlcdc->periph_clk);
+
+	return 0;
+}
+
+static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = {
+	{ .compatible = "atmel,hlcdc-pwm" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver atmel_hlcdc_pwm_driver = {
+	.driver = {
+		.name = "atmel-hlcdc-pwm",
+		.of_match_table = atmel_hlcdc_pwm_dt_ids,
+	},
+	.probe = atmel_hlcdc_pwm_probe,
+	.remove = atmel_hlcdc_pwm_remove,
+};
+module_platform_driver(atmel_hlcdc_pwm_driver);
+
+MODULE_ALIAS("platform:atmel-hlcdc-pwm");
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("Atmel HLCDC PWM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c
new file mode 100644
index 0000000..b4c7f95
--- /dev/null
+++ b/drivers/pwm/pwm-bcm2835.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2014 Bart Tanghe <bart.tanghe@thomasmore.be>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+
+#define PWM_CONTROL		0x000
+#define PWM_CONTROL_SHIFT(x)	((x) * 8)
+#define PWM_CONTROL_MASK	0xff
+#define PWM_MODE		0x80		/* set timer in PWM mode */
+#define PWM_ENABLE		(1 << 0)
+#define PWM_POLARITY		(1 << 4)
+
+#define PERIOD(x)		(((x) * 0x10) + 0x10)
+#define DUTY(x)			(((x) * 0x10) + 0x14)
+
+#define MIN_PERIOD		108		/* 9.2 MHz max. PWM clock */
+
+struct bcm2835_pwm {
+	struct pwm_chip chip;
+	struct device *dev;
+	unsigned long scaler;
+	void __iomem *base;
+	struct clk *clk;
+};
+
+static inline struct bcm2835_pwm *to_bcm2835_pwm(struct pwm_chip *chip)
+{
+	return container_of(chip, struct bcm2835_pwm, chip);
+}
+
+static int bcm2835_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+	value &= ~(PWM_CONTROL_MASK << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	value |= (PWM_MODE << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	writel(value, pc->base + PWM_CONTROL);
+
+	return 0;
+}
+
+static void bcm2835_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+	value &= ~(PWM_CONTROL_MASK << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	writel(value, pc->base + PWM_CONTROL);
+}
+
+static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
+			      int duty_ns, int period_ns)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+
+	if (period_ns <= MIN_PERIOD) {
+		dev_err(pc->dev, "period %d not supported, minimum %d\n",
+			period_ns, MIN_PERIOD);
+		return -EINVAL;
+	}
+
+	writel(duty_ns / pc->scaler, pc->base + DUTY(pwm->hwpwm));
+	writel(period_ns / pc->scaler, pc->base + PERIOD(pwm->hwpwm));
+
+	return 0;
+}
+
+static int bcm2835_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+	value |= PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm);
+	writel(value, pc->base + PWM_CONTROL);
+
+	return 0;
+}
+
+static void bcm2835_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+	value &= ~(PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	writel(value, pc->base + PWM_CONTROL);
+}
+
+static int bcm2835_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
+				enum pwm_polarity polarity)
+{
+	struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
+	u32 value;
+
+	value = readl(pc->base + PWM_CONTROL);
+
+	if (polarity == PWM_POLARITY_NORMAL)
+		value &= ~(PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm));
+	else
+		value |= PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm);
+
+	writel(value, pc->base + PWM_CONTROL);
+
+	return 0;
+}
+
+static const struct pwm_ops bcm2835_pwm_ops = {
+	.request = bcm2835_pwm_request,
+	.free = bcm2835_pwm_free,
+	.config = bcm2835_pwm_config,
+	.enable = bcm2835_pwm_enable,
+	.disable = bcm2835_pwm_disable,
+	.set_polarity = bcm2835_set_polarity,
+	.owner = THIS_MODULE,
+};
+
+static int bcm2835_pwm_probe(struct platform_device *pdev)
+{
+	struct bcm2835_pwm *pc;
+	struct resource *res;
+	int ret;
+
+	pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
+	if (!pc)
+		return -ENOMEM;
+
+	pc->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pc->base))
+		return PTR_ERR(pc->base);
+
+	pc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(pc->clk)) {
+		dev_err(&pdev->dev, "clock not found: %ld\n", PTR_ERR(pc->clk));
+		return PTR_ERR(pc->clk);
+	}
+
+	ret = clk_prepare_enable(pc->clk);
+	if (ret)
+		return ret;
+
+	pc->scaler = NSEC_PER_SEC / clk_get_rate(pc->clk);
+
+	pc->chip.dev = &pdev->dev;
+	pc->chip.ops = &bcm2835_pwm_ops;
+	pc->chip.npwm = 2;
+
+	platform_set_drvdata(pdev, pc);
+
+	ret = pwmchip_add(&pc->chip);
+	if (ret < 0)
+		goto add_fail;
+
+	return 0;
+
+add_fail:
+	clk_disable_unprepare(pc->clk);
+	return ret;
+}
+
+static int bcm2835_pwm_remove(struct platform_device *pdev)
+{
+	struct bcm2835_pwm *pc = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(pc->clk);
+
+	return pwmchip_remove(&pc->chip);
+}
+
+static const struct of_device_id bcm2835_pwm_of_match[] = {
+	{ .compatible = "brcm,bcm2835-pwm", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, bcm2835_pwm_of_match);
+
+static struct platform_driver bcm2835_pwm_driver = {
+	.driver = {
+		.name = "bcm2835-pwm",
+		.of_match_table = bcm2835_pwm_of_match,
+	},
+	.probe = bcm2835_pwm_probe,
+	.remove = bcm2835_pwm_remove,
+};
+module_platform_driver(bcm2835_pwm_driver);
+
+MODULE_AUTHOR("Bart Tanghe <bart.tanghe@thomasmore.be");
+MODULE_DESCRIPTION("Broadcom BCM2835 PWM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index 0f2cc7e..f9dfc8b 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
@@ -299,7 +300,7 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
 {
 	int ret;
 
-	if (fpc->use_count != 0)
+	if (fpc->use_count++ != 0)
 		return 0;
 
 	/* select counter clock source */
@@ -316,8 +317,6 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
 		return ret;
 	}
 
-	fpc->use_count++;
-
 	return 0;
 }
 
@@ -399,12 +398,23 @@ static int fsl_pwm_init(struct fsl_pwm_chip *fpc)
 	return 0;
 }
 
+static bool fsl_pwm_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case FTM_CNT:
+		return true;
+	}
+	return false;
+}
+
 static const struct regmap_config fsl_pwm_regmap_config = {
 	.reg_bits = 32,
 	.reg_stride = 4,
 	.val_bits = 32,
 
 	.max_register = FTM_PWMLOAD,
+	.volatile_reg = fsl_pwm_volatile_reg,
+	.cache_type = REGCACHE_RBTREE,
 };
 
 static int fsl_pwm_probe(struct platform_device *pdev)
@@ -427,7 +437,7 @@ static int fsl_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-	fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base,
+	fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "ftm_sys", base,
 						&fsl_pwm_regmap_config);
 	if (IS_ERR(fpc->regmap)) {
 		dev_err(&pdev->dev, "regmap init failed\n");
@@ -478,6 +488,51 @@ static int fsl_pwm_remove(struct platform_device *pdev)
 	return pwmchip_remove(&fpc->chip);
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int fsl_pwm_suspend(struct device *dev)
+{
+	struct fsl_pwm_chip *fpc = dev_get_drvdata(dev);
+	u32 val;
+
+	regcache_cache_only(fpc->regmap, true);
+	regcache_mark_dirty(fpc->regmap);
+
+	/* read from cache */
+	regmap_read(fpc->regmap, FTM_OUTMASK, &val);
+	if ((val & 0xFF) != 0xFF) {
+		clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
+		clk_disable_unprepare(fpc->clk[fpc->cnt_select]);
+		clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_SYS]);
+	}
+
+	return 0;
+}
+
+static int fsl_pwm_resume(struct device *dev)
+{
+	struct fsl_pwm_chip *fpc = dev_get_drvdata(dev);
+	u32 val;
+
+	/* read from cache */
+	regmap_read(fpc->regmap, FTM_OUTMASK, &val);
+	if ((val & 0xFF) != 0xFF) {
+		clk_prepare_enable(fpc->clk[FSL_PWM_CLK_SYS]);
+		clk_prepare_enable(fpc->clk[fpc->cnt_select]);
+		clk_prepare_enable(fpc->clk[FSL_PWM_CLK_CNTEN]);
+	}
+
+	/* restore all registers from cache */
+	regcache_cache_only(fpc->regmap, false);
+	regcache_sync(fpc->regmap);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops fsl_pwm_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(fsl_pwm_suspend, fsl_pwm_resume)
+};
+
 static const struct of_device_id fsl_pwm_dt_ids[] = {
 	{ .compatible = "fsl,vf610-ftm-pwm", },
 	{ /* sentinel */ }
@@ -488,6 +543,7 @@ static struct platform_driver fsl_pwm_driver = {
 	.driver = {
 		.name = "fsl-ftm-pwm",
 		.of_match_table = fsl_pwm_dt_ids,
+		.pm = &fsl_pwm_pm_ops,
 	},
 	.probe = fsl_pwm_probe,
 	.remove = fsl_pwm_remove,
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index aa915da..82abfce 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -176,7 +176,6 @@ STATIC int NCR_700_slave_alloc(struct scsi_device *SDpnt);
 STATIC int NCR_700_slave_configure(struct scsi_device *SDpnt);
 STATIC void NCR_700_slave_destroy(struct scsi_device *SDpnt);
 static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth);
-static int NCR_700_change_queue_type(struct scsi_device *SDpnt, int depth);
 
 STATIC struct device_attribute *NCR_700_dev_attrs[];
 
@@ -326,7 +325,6 @@ NCR_700_detect(struct scsi_host_template *tpnt,
 	tpnt->slave_destroy = NCR_700_slave_destroy;
 	tpnt->slave_alloc = NCR_700_slave_alloc;
 	tpnt->change_queue_depth = NCR_700_change_queue_depth;
-	tpnt->change_queue_type = NCR_700_change_queue_type;
 	tpnt->use_blk_tags = 1;
 
 	if(tpnt->name == NULL)
@@ -904,8 +902,8 @@ process_message(struct Scsi_Host *host,	struct NCR_700_Host_Parameters *hostdata
 			hostdata->tag_negotiated &= ~(1<<scmd_id(SCp));
 
 			SCp->device->tagged_supported = 0;
+			SCp->device->simple_tags = 0;
 			scsi_change_queue_depth(SCp->device, host->cmd_per_lun);
-			scsi_set_tag_type(SCp->device, 0);
 		} else {
 			shost_printk(KERN_WARNING, host,
 				"(%d:%d) Unexpected REJECT Message %s\n",
@@ -1818,8 +1816,8 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 		hostdata->tag_negotiated &= ~(1<<scmd_id(SCp));
 	}
 
-	if((hostdata->tag_negotiated &(1<<scmd_id(SCp)))
-	   && scsi_get_tag_type(SCp->device)) {
+	if ((hostdata->tag_negotiated & (1<<scmd_id(SCp))) &&
+	    SCp->device->simple_tags) {
 		slot->tag = SCp->request->tag;
 		CDEBUG(KERN_DEBUG, SCp, "sending out tag %d, slot %p\n",
 		       slot->tag, slot);
@@ -2082,39 +2080,6 @@ NCR_700_change_queue_depth(struct scsi_device *SDp, int depth)
 	return scsi_change_queue_depth(SDp, depth);
 }
 
-static int NCR_700_change_queue_type(struct scsi_device *SDp, int tag_type)
-{
-	int change_tag = ((tag_type ==0 &&  scsi_get_tag_type(SDp) != 0)
-			  || (tag_type != 0 && scsi_get_tag_type(SDp) == 0));
-	struct NCR_700_Host_Parameters *hostdata = 
-		(struct NCR_700_Host_Parameters *)SDp->host->hostdata[0];
-
-	/* We have a global (per target) flag to track whether TCQ is
-	 * enabled, so we'll be turning it off for the entire target here.
-	 * our tag algorithm will fail if we mix tagged and untagged commands,
-	 * so quiesce the device before doing this */
-	if (change_tag)
-		scsi_target_quiesce(SDp->sdev_target);
-
-	scsi_set_tag_type(SDp, tag_type);
-	if (!tag_type) {
-		/* shift back to the default unqueued number of commands
-		 * (the user can still raise this) */
-		scsi_change_queue_depth(SDp, SDp->host->cmd_per_lun);
-		hostdata->tag_negotiated &= ~(1 << sdev_id(SDp));
-	} else {
-		/* Here, we cleared the negotiation flag above, so this
-		 * will force the driver to renegotiate */
-		scsi_change_queue_depth(SDp, SDp->queue_depth);
-		if (change_tag)
-			NCR_700_set_tag_neg_state(SDp, NCR_700_START_TAG_NEGOTIATION);
-	}
-	if (change_tag)
-		scsi_target_resume(SDp->sdev_target);
-
-	return tag_type;
-}
-
 static ssize_t
 NCR_700_show_active_tags(struct device *dev, struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 86cf3d6..9c92f41 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1462,18 +1462,17 @@ config SCSI_WD719X
 	  SCSI controllers (based on WD33C296A chip).
 
 config SCSI_DEBUG
-	tristate "SCSI debugging host simulator"
+	tristate "SCSI debugging host and device simulator"
 	depends on SCSI
 	select CRC_T10DIF
 	help
-	  This is a host adapter simulator that can simulate multiple hosts
-	  each with multiple dummy SCSI devices (disks). It defaults to one
-	  host adapter with one dummy SCSI disk. Each dummy disk uses kernel
-	  RAM as storage (i.e. it is a ramdisk). To save space when multiple
-	  dummy disks are simulated, they share the same kernel RAM for 
-	  their storage. See <http://sg.danny.cz/sg/sdebug26.html> for more
-	  information. This driver is primarily of use to those testing the
-	  SCSI and block subsystems. If unsure, say N.
+	  This pseudo driver simulates one or more hosts (SCSI initiators),
+	  each with one or more targets, each with one or more logical units.
+	  Defaults to one of each, creating a small RAM disk device. Many
+	  parameters found in the /sys/bus/pseudo/drivers/scsi_debug
+	  directory can be tweaked at run time.
+	  See <http://sg.danny.cz/sg/sdebug26.html> for more information.
+	  Mainly used for testing and best as a module. If unsure, say N.
 
 config SCSI_MESH
 	tristate "MESH (Power Mac internal SCSI) support"
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index 6719a33..2c5ce48 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -7921,9 +7921,9 @@ static int asc_build_req(struct asc_board *boardp, struct scsi_cmnd *scp,
 	 */
 	if ((asc_dvc->cur_dvc_qng[scp->device->id] > 0) &&
 	    (boardp->reqcnt[scp->device->id] % 255) == 0) {
-		asc_scsi_q->q2.tag_code = MSG_ORDERED_TAG;
+		asc_scsi_q->q2.tag_code = ORDERED_QUEUE_TAG;
 	} else {
-		asc_scsi_q->q2.tag_code = MSG_SIMPLE_TAG;
+		asc_scsi_q->q2.tag_code = SIMPLE_QUEUE_TAG;
 	}
 
 	/* Build ASC_SCSI_Q */
@@ -8351,7 +8351,7 @@ static int AscPutReadyQueue(ASC_DVC_VAR *asc_dvc, ASC_SCSI_Q *scsiq, uchar q_no)
 	}
 	q_addr = ASC_QNO_TO_QADDR(q_no);
 	if ((scsiq->q1.target_id & asc_dvc->use_tagged_qng) == 0) {
-		scsiq->q2.tag_code &= ~MSG_SIMPLE_TAG;
+		scsiq->q2.tag_code &= ~SIMPLE_QUEUE_TAG;
 	}
 	scsiq->q1.status = QS_FREE;
 	AscMemWordCopyPtrToLram(iop_base,
@@ -8669,7 +8669,7 @@ static int AscExeScsiQueue(ASC_DVC_VAR *asc_dvc, ASC_SCSI_Q *scsiq)
 		}
 	}
 	if (disable_syn_offset_one_fix) {
-		scsiq->q2.tag_code &= ~MSG_SIMPLE_TAG;
+		scsiq->q2.tag_code &= ~SIMPLE_QUEUE_TAG;
 		scsiq->q2.tag_code |= (ASC_TAG_FLAG_DISABLE_ASYN_USE_SYN_FIX |
 				       ASC_TAG_FLAG_DISABLE_DISCONNECT);
 	} else {
diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 14fc018..02a2512 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c
@@ -63,7 +63,6 @@ static struct scsi_host_template aic94xx_sht = {
 	.scan_finished		= asd_scan_finished,
 	.scan_start		= asd_scan_start,
 	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
 	.bios_param		= sas_bios_param,
 	.can_queue		= 1,
 	.cmd_per_lun		= 1,
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index e861f28..98d06d1 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2792,7 +2792,6 @@ static struct scsi_host_template bnx2fc_shost_template = {
 	.eh_host_reset_handler	= fc_eh_host_reset,
 	.slave_alloc		= fc_slave_alloc,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 4b56858..9ecca85 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -1737,11 +1737,7 @@ void bnx2fc_build_fcp_cmnd(struct bnx2fc_cmd *io_req,
 	fcp_cmnd->fc_pri_ta = 0;
 	fcp_cmnd->fc_tm_flags = io_req->mp_req.tm_flags;
 	fcp_cmnd->fc_flags = io_req->io_req_flags;
-
-	if (sc_cmd->flags & SCMD_TAGGED)
-		fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
-	else
-		fcp_cmnd->fc_pri_ta = 0;
+	fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
 }
 
 static void bnx2fc_parse_fcp_rsp(struct bnx2fc_cmd *io_req,
diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
index 51ea5dc..3987284 100644
--- a/drivers/scsi/csiostor/csio_scsi.c
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -172,10 +172,7 @@ csio_scsi_fcp_cmnd(struct csio_ioreq *req, void *addr)
 		fcp_cmnd->fc_cmdref = 0;
 
 		memcpy(fcp_cmnd->fc_cdb, scmnd->cmnd, 16);
-		if (scmnd->flags & SCMD_TAGGED)
-			fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
-		else
-			fcp_cmnd->fc_pri_ta = 0;
+		fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
 		fcp_cmnd->fc_dl = cpu_to_be32(scsi_bufflen(scmnd));
 
 		if (req->nsge)
diff --git a/drivers/scsi/esas2r/esas2r_flash.c b/drivers/scsi/esas2r/esas2r_flash.c
index b7dc59f..7bd376d 100644
--- a/drivers/scsi/esas2r/esas2r_flash.c
+++ b/drivers/scsi/esas2r/esas2r_flash.c
@@ -684,9 +684,9 @@ static u16 calc_fi_checksum(struct esas2r_flash_context *fc)
  *              1)  verify the fi_version is correct
  *              2)  verify the checksum of the entire image.
  *              3)  validate the adap_typ, action and length fields.
- *              4)  valdiate each component header. check the img_type and
+ *              4)  validate each component header. check the img_type and
  *                  length fields
- *              5)  valdiate each component image.  validate signatures and
+ *              5)  validate each component image.  validate signatures and
  *                  local checksums
  */
 static bool verify_fi(struct esas2r_adapter *a,
diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index 593ff8a..7e1c21e 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -255,7 +255,6 @@ static struct scsi_host_template driver_template = {
 	.emulated			= 0,
 	.proc_name			= ESAS2R_DRVR_NAME,
 	.change_queue_depth		= scsi_change_queue_depth,
-	.change_queue_type		= scsi_change_queue_type,
 	.max_sectors			= 0xFFFF,
 	.use_blk_tags			= 1,
 };
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index cd00a6c..ec193a8 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -281,7 +281,6 @@ static struct scsi_host_template fcoe_shost_template = {
 	.eh_host_reset_handler = fc_eh_host_reset,
 	.slave_alloc = fc_slave_alloc,
 	.change_queue_depth = scsi_change_queue_depth,
-	.change_queue_type = scsi_change_queue_type,
 	.this_id = -1,
 	.cmd_per_lun = 3,
 	.can_queue = FCOE_MAX_OUTSTANDING_COMMANDS,
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 0c1f817..8a0d4d7 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -111,7 +111,6 @@ static struct scsi_host_template fnic_host_template = {
 	.eh_host_reset_handler = fnic_host_reset,
 	.slave_alloc = fnic_slave_alloc,
 	.change_queue_depth = scsi_change_queue_depth,
-	.change_queue_type = scsi_change_queue_type,
 	.this_id = -1,
 	.cmd_per_lun = 3,
 	.can_queue = FNIC_DFLT_IO_REQ,
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index f58c6d8..057d277 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1615,7 +1615,6 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd,
 	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
 	struct ibmvfc_cmd *vfc_cmd;
 	struct ibmvfc_event *evt;
-	u8 tag[2];
 	int rc;
 
 	if (unlikely((rc = fc_remote_port_chkready(rport))) ||
@@ -3089,7 +3088,6 @@ static struct scsi_host_template driver_template = {
 	.target_alloc = ibmvfc_target_alloc,
 	.scan_finished = ibmvfc_scan_finished,
 	.change_queue_depth = ibmvfc_change_queue_depth,
-	.change_queue_type = scsi_change_queue_type,
 	.cmd_per_lun = 16,
 	.can_queue = IBMVFC_MAX_REQUESTS_DEFAULT,
 	.this_id = -1,
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 5402943..df4e27c 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -1426,16 +1426,14 @@ static void ipr_handle_config_change(struct ipr_ioa_cfg *ioa_cfg,
 		if (res->sdev) {
 			res->del_from_ml = 1;
 			res->res_handle = IPR_INVALID_RES_HANDLE;
-			if (ioa_cfg->allow_ml_add_del)
-				schedule_work(&ioa_cfg->work_q);
+			schedule_work(&ioa_cfg->work_q);
 		} else {
 			ipr_clear_res_target(res);
 			list_move_tail(&res->queue, &ioa_cfg->free_res_q);
 		}
 	} else if (!res->sdev || res->del_from_ml) {
 		res->add_to_ml = 1;
-		if (ioa_cfg->allow_ml_add_del)
-			schedule_work(&ioa_cfg->work_q);
+		schedule_work(&ioa_cfg->work_q);
 	}
 
 	ipr_send_hcam(ioa_cfg, IPR_HCAM_CDB_OP_CODE_CONFIG_CHANGE, hostrcb);
@@ -3273,8 +3271,7 @@ static void ipr_worker_thread(struct work_struct *work)
 restart:
 	do {
 		did_work = 0;
-		if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds ||
-		    !ioa_cfg->allow_ml_add_del) {
+		if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) {
 			spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 			return;
 		}
@@ -3311,6 +3308,7 @@ restart:
 		}
 	}
 
+	ioa_cfg->scan_done = 1;
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 	kobject_uevent(&ioa_cfg->host->shost_dev.kobj, KOBJ_CHANGE);
 	LEAVE;
@@ -4346,30 +4344,6 @@ static int ipr_change_queue_depth(struct scsi_device *sdev, int qdepth)
 }
 
 /**
- * ipr_change_queue_type - Change the device's queue type
- * @dsev:		scsi device struct
- * @tag_type:	type of tags to use
- *
- * Return value:
- * 	actual queue type set
- **/
-static int ipr_change_queue_type(struct scsi_device *sdev, int tag_type)
-{
-	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)sdev->host->hostdata;
-	struct ipr_resource_entry *res;
-	unsigned long lock_flags = 0;
-
-	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-	res = (struct ipr_resource_entry *)sdev->hostdata;
-	if (res && ipr_is_gscsi(res))
-		tag_type = scsi_change_queue_type(sdev, tag_type);
-	else
-		tag_type = 0;
-	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-	return tag_type;
-}
-
-/**
  * ipr_show_adapter_handle - Show the adapter's resource handle for this device
  * @dev:	device struct
  * @attr:	device attribute structure
@@ -4739,6 +4713,7 @@ static int ipr_slave_configure(struct scsi_device *sdev)
 			sdev->no_uld_attach = 1;
 		}
 		if (ipr_is_vset_device(res)) {
+			sdev->scsi_level = SCSI_SPC_3;
 			blk_queue_rq_timeout(sdev->request_queue,
 					     IPR_VSET_RW_TIMEOUT);
 			blk_queue_max_hw_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS);
@@ -5231,6 +5206,28 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
  * @scsi_cmd:	scsi command struct
  *
  * Return value:
+ *	0 if scan in progress / 1 if scan is complete
+ **/
+static int ipr_scan_finished(struct Scsi_Host *shost, unsigned long elapsed_time)
+{
+	unsigned long lock_flags;
+	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *) shost->hostdata;
+	int rc = 0;
+
+	spin_lock_irqsave(shost->host_lock, lock_flags);
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead || ioa_cfg->scan_done)
+		rc = 1;
+	if ((elapsed_time/HZ) > (ioa_cfg->transop_timeout * 2))
+		rc = 1;
+	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	return rc;
+}
+
+/**
+ * ipr_eh_host_reset - Reset the host adapter
+ * @scsi_cmd:	scsi command struct
+ *
+ * Return value:
  * 	SUCCESS / FAILED
  **/
 static int ipr_eh_abort(struct scsi_cmnd *scsi_cmd)
@@ -5779,7 +5776,7 @@ static void ipr_erp_cancel_all(struct ipr_cmnd *ipr_cmd)
 
 	ipr_reinit_ipr_cmnd_for_erp(ipr_cmd);
 
-	if (!scsi_get_tag_type(scsi_cmd->device)) {
+	if (!scsi_cmd->device->simple_tags) {
 		ipr_erp_request_sense(ipr_cmd);
 		return;
 	}
@@ -6299,10 +6296,10 @@ static struct scsi_host_template driver_template = {
 	.slave_alloc = ipr_slave_alloc,
 	.slave_configure = ipr_slave_configure,
 	.slave_destroy = ipr_slave_destroy,
+	.scan_finished = ipr_scan_finished,
 	.target_alloc = ipr_target_alloc,
 	.target_destroy = ipr_target_destroy,
 	.change_queue_depth = ipr_change_queue_depth,
-	.change_queue_type = ipr_change_queue_type,
 	.bios_param = ipr_biosparam,
 	.can_queue = IPR_MAX_COMMANDS,
 	.this_id = -1,
@@ -6841,7 +6838,7 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd)
 	ioa_cfg->doorbell |= IPR_RUNTIME_RESET;
 
 	list_for_each_entry(res, &ioa_cfg->used_res_q, queue) {
-		if (ioa_cfg->allow_ml_add_del && (res->add_to_ml || res->del_from_ml)) {
+		if (res->add_to_ml || res->del_from_ml) {
 			ipr_trace;
 			break;
 		}
@@ -6870,6 +6867,7 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd)
 	if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds)
 		scsi_block_requests(ioa_cfg->host);
 
+	schedule_work(&ioa_cfg->work_q);
 	LEAVE;
 	return IPR_RC_JOB_RETURN;
 }
@@ -7610,6 +7608,19 @@ static int ipr_ioafp_page0_inquiry(struct ipr_cmnd *ipr_cmd)
 	type[4] = '\0';
 	ioa_cfg->type = simple_strtoul((char *)type, NULL, 16);
 
+	if (ipr_invalid_adapter(ioa_cfg)) {
+		dev_err(&ioa_cfg->pdev->dev,
+			"Adapter not supported in this hardware configuration.\n");
+
+		if (!ipr_testmode) {
+			ioa_cfg->reset_retries += IPR_NUM_RESET_RELOAD_RETRIES;
+			ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
+			list_add_tail(&ipr_cmd->queue,
+					&ioa_cfg->hrrq->hrrq_free_q);
+			return IPR_RC_JOB_RETURN;
+		}
+	}
+
 	ipr_cmd->job_step = ipr_ioafp_page3_inquiry;
 
 	ipr_ioafp_inquiry(ipr_cmd, 1, 0,
@@ -8797,20 +8808,6 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg)
 		_ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa,
 					IPR_SHUTDOWN_NONE);
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
-	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
-	spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
-
-	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
-		rc = -EIO;
-	} else if (ipr_invalid_adapter(ioa_cfg)) {
-		if (!ipr_testmode)
-			rc = -EIO;
-
-		dev_err(&ioa_cfg->pdev->dev,
-			"Adapter not supported in this hardware configuration.\n");
-	}
-
-	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
 
 	LEAVE;
 	return rc;
@@ -9264,7 +9261,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
 					       * ioa_cfg->max_devs_supported)));
 	}
 
-	host->max_channel = IPR_MAX_BUS_TO_SCAN;
+	host->max_channel = IPR_VSET_BUS;
 	host->unique_id = host->host_no;
 	host->max_cmd_len = IPR_MAX_CDB_LEN;
 	host->can_queue = ioa_cfg->max_cmds;
@@ -9764,25 +9761,6 @@ out_scsi_host_put:
 }
 
 /**
- * ipr_scan_vsets - Scans for VSET devices
- * @ioa_cfg:	ioa config struct
- *
- * Description: Since the VSET resources do not follow SAM in that we can have
- * sparse LUNs with no LUN 0, we have to scan for these ourselves.
- *
- * Return value:
- * 	none
- **/
-static void ipr_scan_vsets(struct ipr_ioa_cfg *ioa_cfg)
-{
-	int target, lun;
-
-	for (target = 0; target < IPR_MAX_NUM_TARGETS_PER_BUS; target++)
-		for (lun = 0; lun < IPR_MAX_NUM_VSET_LUNS_PER_TARGET; lun++)
-			scsi_add_device(ioa_cfg->host, IPR_VSET_BUS, target, lun);
-}
-
-/**
  * ipr_initiate_ioa_bringdown - Bring down an adapter
  * @ioa_cfg:		ioa config struct
  * @shutdown_type:	shutdown type
@@ -9937,10 +9915,6 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
 	}
 
 	scsi_scan_host(ioa_cfg->host);
-	ipr_scan_vsets(ioa_cfg);
-	scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN);
-	ioa_cfg->allow_ml_add_del = 1;
-	ioa_cfg->host->max_channel = IPR_VSET_BUS;
 	ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight;
 
 	if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index 9ebdebd..b4f3eec 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -157,13 +157,11 @@
 
 #define IPR_MAX_NUM_TARGETS_PER_BUS			256
 #define IPR_MAX_NUM_LUNS_PER_TARGET			256
-#define IPR_MAX_NUM_VSET_LUNS_PER_TARGET	8
 #define IPR_VSET_BUS					0xff
 #define IPR_IOA_BUS						0xff
 #define IPR_IOA_TARGET					0xff
 #define IPR_IOA_LUN						0xff
 #define IPR_MAX_NUM_BUSES				16
-#define IPR_MAX_BUS_TO_SCAN				IPR_MAX_NUM_BUSES
 
 #define IPR_NUM_RESET_RELOAD_RETRIES		3
 
@@ -1453,7 +1451,7 @@ struct ipr_ioa_cfg {
 	u8 in_ioa_bringdown:1;
 	u8 ioa_unit_checked:1;
 	u8 dump_taken:1;
-	u8 allow_ml_add_del:1;
+	u8 scan_done:1;
 	u8 needs_hard_reset:1;
 	u8 dual_raid:1;
 	u8 needs_warm_reset:1;
diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c
index 724c626..cd41b63 100644
--- a/drivers/scsi/isci/init.c
+++ b/drivers/scsi/isci/init.c
@@ -158,7 +158,6 @@ static struct scsi_host_template isci_sht = {
 	.scan_finished			= isci_host_scan_finished,
 	.scan_start			= isci_host_start,
 	.change_queue_depth		= sas_change_queue_depth,
-	.change_queue_type		= sas_change_queue_type,
 	.bios_param			= sas_bios_param,
 	.can_queue			= ISCI_CAN_QUEUE_VAL,
 	.cmd_per_lun			= 1,
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 72918d2..519dac4 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -906,13 +906,6 @@ int sas_change_queue_depth(struct scsi_device *sdev, int depth)
 	return scsi_change_queue_depth(sdev, depth);
 }
 
-int sas_change_queue_type(struct scsi_device *scsi_dev, int type)
-{
-	if (dev_is_sata(sdev_to_domain_dev(scsi_dev)))
-		return -EINVAL;
-	return scsi_change_queue_type(scsi_dev, type);
-}
-
 int sas_bios_param(struct scsi_device *scsi_dev,
 			  struct block_device *bdev,
 			  sector_t capacity, int *hsc)
@@ -1011,7 +1004,6 @@ EXPORT_SYMBOL_GPL(sas_queuecommand);
 EXPORT_SYMBOL_GPL(sas_target_alloc);
 EXPORT_SYMBOL_GPL(sas_slave_configure);
 EXPORT_SYMBOL_GPL(sas_change_queue_depth);
-EXPORT_SYMBOL_GPL(sas_change_queue_type);
 EXPORT_SYMBOL_GPL(sas_bios_param);
 EXPORT_SYMBOL_GPL(sas_task_abort);
 EXPORT_SYMBOL_GPL(sas_phy_reset);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index fd85952..4f9222e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5879,7 +5879,6 @@ struct scsi_host_template lpfc_template = {
 	.max_sectors		= 0xFFFF,
 	.vendor_id		= LPFC_NL_VENDOR_ID,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.use_blk_tags		= 1,
 	.track_queue_depth	= 1,
 };
@@ -5904,7 +5903,6 @@ struct scsi_host_template lpfc_vport_template = {
 	.shost_attrs		= lpfc_vport_attrs,
 	.max_sectors		= 0xFFFF,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.use_blk_tags		= 1,
 	.track_queue_depth	= 1,
 };
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 8431eb1..6a1c036 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -7592,7 +7592,6 @@ static struct scsi_host_template scsih_driver_template = {
 	.scan_finished			= _scsih_scan_finished,
 	.scan_start			= _scsih_scan_start,
 	.change_queue_depth 		= _scsih_change_queue_depth,
-	.change_queue_type		= scsi_change_queue_type,
 	.eh_abort_handler		= _scsih_abort,
 	.eh_device_reset_handler	= _scsih_dev_reset,
 	.eh_target_reset_handler	= _scsih_target_reset,
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index 0d1d064..e689bf2 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1006,12 +1006,9 @@ mpt2sas_transport_update_links(struct MPT2SAS_ADAPTER *ioc,
 		    &mpt2sas_phy->remote_identify);
 		_transport_add_phy_to_an_existing_port(ioc, sas_node,
 		    mpt2sas_phy, mpt2sas_phy->remote_identify.sas_address);
-	} else {
+	} else
 		memset(&mpt2sas_phy->remote_identify, 0 , sizeof(struct
 		    sas_identify));
-		_transport_del_phy_from_an_existing_port(ioc, sas_node,
-		    mpt2sas_phy);
-	}
 
 	if (mpt2sas_phy->phy)
 		mpt2sas_phy->phy->negotiated_linkrate =
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index a2b6099..94261ee 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -7229,7 +7229,6 @@ static struct scsi_host_template scsih_driver_template = {
 	.scan_finished			= _scsih_scan_finished,
 	.scan_start			= _scsih_scan_start,
 	.change_queue_depth		= _scsih_change_queue_depth,
-	.change_queue_type		= scsi_change_queue_type,
 	.eh_abort_handler		= _scsih_abort,
 	.eh_device_reset_handler	= _scsih_dev_reset,
 	.eh_target_reset_handler	= _scsih_target_reset,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index d4bafaa..3637ae6 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -1003,12 +1003,9 @@ mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc,
 		    &mpt3sas_phy->remote_identify);
 		_transport_add_phy_to_an_existing_port(ioc, sas_node,
 		    mpt3sas_phy, mpt3sas_phy->remote_identify.sas_address);
-	} else {
+	} else
 		memset(&mpt3sas_phy->remote_identify, 0 , sizeof(struct
 		    sas_identify));
-		_transport_del_phy_from_an_existing_port(ioc, sas_node,
-		    mpt3sas_phy);
-	}
 
 	if (mpt3sas_phy->phy)
 		mpt3sas_phy->phy->negotiated_linkrate =
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index f15df3d..53030b0 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -54,7 +54,6 @@ static struct scsi_host_template mvs_sht = {
 	.scan_finished		= mvs_scan_finished,
 	.scan_start		= mvs_scan_start,
 	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
 	.bios_param		= sas_bios_param,
 	.can_queue		= 1,
 	.cmd_per_lun		= 1,
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 329aba0..6555591 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -76,7 +76,6 @@ static struct scsi_host_template pm8001_sht = {
 	.scan_finished		= pm8001_scan_finished,
 	.scan_start		= pm8001_scan_start,
 	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
 	.bios_param		= sas_bios_param,
 	.can_queue		= 1,
 	.cmd_per_lun		= 1,
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index b1b1f66..8c27b6a 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -4251,7 +4251,6 @@ static struct scsi_host_template pmcraid_host_template = {
 	.slave_configure = pmcraid_slave_configure,
 	.slave_destroy = pmcraid_slave_destroy,
 	.change_queue_depth = pmcraid_change_queue_depth,
-	.change_queue_type  = scsi_change_queue_type,
 	.can_queue = PMCRAID_MAX_IO_CMD,
 	.this_id = -1,
 	.sg_tablesize = PMCRAID_MAX_IOADLS,
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a4dde7e..e59f25b 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3237,8 +3237,6 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
 	struct fc_rport *rport;
 	unsigned long flags;
 
-	qla2x00_rport_del(fcport);
-
 	rport_ids.node_name = wwn_to_u64(fcport->node_name);
 	rport_ids.port_name = wwn_to_u64(fcport->port_name);
 	rport_ids.port_id = fcport->d_id.b.domain << 16 |
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 6b4d923..12ca291 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -258,7 +258,6 @@ struct scsi_host_template qla2xxx_driver_template = {
 	.scan_finished		= qla2xxx_scan_finished,
 	.scan_start		= qla2xxx_scan_start,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index a902fa1..5741825 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3218,25 +3218,25 @@ static inline int qlt_get_fcp_task_attr(struct scsi_qla_host *vha,
 
 	switch (task_codes) {
 	case ATIO_SIMPLE_QUEUE:
-		fcp_task_attr = MSG_SIMPLE_TAG;
+		fcp_task_attr = TCM_SIMPLE_TAG;
 		break;
 	case ATIO_HEAD_OF_QUEUE:
-		fcp_task_attr = MSG_HEAD_TAG;
+		fcp_task_attr = TCM_HEAD_TAG;
 		break;
 	case ATIO_ORDERED_QUEUE:
-		fcp_task_attr = MSG_ORDERED_TAG;
+		fcp_task_attr = TCM_ORDERED_TAG;
 		break;
 	case ATIO_ACA_QUEUE:
-		fcp_task_attr = MSG_ACA_TAG;
+		fcp_task_attr = TCM_ACA_TAG;
 		break;
 	case ATIO_UNTAGGED:
-		fcp_task_attr = MSG_SIMPLE_TAG;
+		fcp_task_attr = TCM_SIMPLE_TAG;
 		break;
 	default:
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05d,
 		    "qla_target: unknown task code %x, use ORDERED instead\n",
 		    task_codes);
-		fcp_task_attr = MSG_ORDERED_TAG;
+		fcp_task_attr = TCM_ORDERED_TAG;
 		break;
 	}
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1ad0c36..e028854 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -739,34 +739,12 @@ int scsi_track_queue_full(struct scsi_device *sdev, int depth)
 
 	if (sdev->last_queue_full_count <= 10)
 		return 0;
-	if (sdev->last_queue_full_depth < 8) {
-		/* Drop back to untagged */
-		scsi_set_tag_type(sdev, 0);
-		scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
-		return -1;
-	}
 
 	return scsi_change_queue_depth(sdev, depth);
 }
 EXPORT_SYMBOL(scsi_track_queue_full);
 
 /**
- * scsi_change_queue_type() - Change a device's queue type
- * @sdev:     The SCSI device whose queue depth is to change
- * @tag_type: Identifier for queue type
- */
-int scsi_change_queue_type(struct scsi_device *sdev, int tag_type)
-{
-	if (!sdev->tagged_supported)
-		return 0;
-
-	scsi_set_tag_type(sdev, tag_type);
-	return tag_type;
-
-}
-EXPORT_SYMBOL(scsi_change_queue_type);
-
-/**
  * scsi_vpd_inquiry - Request a device provide us with a VPD page
  * @sdev: The device to ask
  * @buffer: Where to put the result
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index aa4b6b8..7b8b51b 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -128,7 +128,6 @@ static const char *scsi_debug_version_date = "20141022";
 #define DEF_REMOVABLE false
 #define DEF_SCSI_LEVEL   6    /* INQUIRY, byte2 [6->SPC-4] */
 #define DEF_SECTOR_SIZE 512
-#define DEF_TAGGED_QUEUING 0 /* 0 | MSG_SIMPLE_TAG | MSG_ORDERED_TAG */
 #define DEF_UNMAP_ALIGNMENT 0
 #define DEF_UNMAP_GRANULARITY 1
 #define DEF_UNMAP_MAX_BLOCKS 0xFFFFFFFF
@@ -817,6 +816,7 @@ static int check_readiness(struct scsi_cmnd *SCpnt, int uas_only,
 					UA_CHANGED_ASC, CAPACITY_CHANGED_ASCQ);
 			if (debug)
 				cp = "capacity data changed";
+			break;
 		default:
 			pr_warn("%s: unexpected unit attention code=%d\n",
 				__func__, k);
@@ -3045,18 +3045,12 @@ resp_comp_write(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	u8 num;
 	unsigned long iflags;
 	int ret;
+	int retval = 0;
 
-	lba = get_unaligned_be32(cmd + 2);
+	lba = get_unaligned_be64(cmd + 2);
 	num = cmd[13];		/* 1 to a maximum of 255 logical blocks */
 	if (0 == num)
 		return 0;	/* degenerate case, not an error */
-	dnum = 2 * num;
-	arr = kzalloc(dnum * lb_size, GFP_ATOMIC);
-	if (NULL == arr) {
-		mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
-				INSUFF_RES_ASCQ);
-		return check_condition_result;
-	}
 	if (scsi_debug_dif == SD_DIF_TYPE2_PROTECTION &&
 	    (cmd[1] & 0xe0)) {
 		mk_sense_invalid_opcode(scp);
@@ -3079,6 +3073,13 @@ resp_comp_write(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 		mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
 		return check_condition_result;
 	}
+	dnum = 2 * num;
+	arr = kzalloc(dnum * lb_size, GFP_ATOMIC);
+	if (NULL == arr) {
+		mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
+				INSUFF_RES_ASCQ);
+		return check_condition_result;
+	}
 
 	write_lock_irqsave(&atomic_rw, iflags);
 
@@ -3089,24 +3090,24 @@ resp_comp_write(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	ret = do_device_access(scp, 0, dnum, true);
 	fake_storep = fake_storep_hold;
 	if (ret == -1) {
-		write_unlock_irqrestore(&atomic_rw, iflags);
-		kfree(arr);
-		return DID_ERROR << 16;
+		retval = DID_ERROR << 16;
+		goto cleanup;
 	} else if ((ret < (dnum * lb_size)) &&
 		 (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts))
 		sdev_printk(KERN_INFO, scp->device, "%s: compare_write: cdb "
 			    "indicated=%u, IO sent=%d bytes\n", my_name,
 			    dnum * lb_size, ret);
 	if (!comp_write_worker(lba, num, arr)) {
-		write_unlock_irqrestore(&atomic_rw, iflags);
-		kfree(arr);
 		mk_sense_buffer(scp, MISCOMPARE, MISCOMPARE_VERIFY_ASC, 0);
-		return check_condition_result;
+		retval = check_condition_result;
+		goto cleanup;
 	}
 	if (scsi_debug_lbp())
 		map_region(lba, num);
+cleanup:
 	write_unlock_irqrestore(&atomic_rw, iflags);
-	return 0;
+	kfree(arr);
+	return retval;
 }
 
 struct unmap_block_desc {
@@ -4438,6 +4439,7 @@ static ssize_t virtual_gb_store(struct device_driver *ddp, const char *buf,
 			struct sdebug_host_info *sdhp;
 			struct sdebug_dev_info *dp;
 
+			spin_lock(&sdebug_host_list_lock);
 			list_for_each_entry(sdhp, &sdebug_host_list,
 					    host_list) {
 				list_for_each_entry(dp, &sdhp->dev_info_list,
@@ -4446,6 +4448,7 @@ static ssize_t virtual_gb_store(struct device_driver *ddp, const char *buf,
 						dp->uas_bm);
 				}
 			}
+			spin_unlock(&sdebug_host_list_lock);
 		}
 		return count;
 	}
@@ -4988,32 +4991,6 @@ sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
 }
 
 static int
-sdebug_change_qtype(struct scsi_device *sdev, int qtype)
-{
-	qtype = scsi_change_queue_type(sdev, qtype);
-	if (SCSI_DEBUG_OPT_Q_NOISE & scsi_debug_opts) {
-		const char *cp;
-
-		switch (qtype) {
-		case 0:
-			cp = "untagged";
-			break;
-		case MSG_SIMPLE_TAG:
-			cp = "simple tags";
-			break;
-		case MSG_ORDERED_TAG:
-			cp = "ordered tags";
-			break;
-		default:
-			cp = "unknown";
-			break;
-		}
-		sdev_printk(KERN_INFO, sdev, "%s: to %s\n", __func__, cp);
-	}
-	return qtype;
-}
-
-static int
 check_inject(struct scsi_cmnd *scp)
 {
 	struct sdebug_scmd_extra_t *ep = scsi_cmd_priv(scp);
@@ -5212,7 +5189,6 @@ static struct scsi_host_template sdebug_driver_template = {
 	.ioctl =		scsi_debug_ioctl,
 	.queuecommand =		sdebug_queuecommand_lock_or_not,
 	.change_queue_depth =	sdebug_change_qdepth,
-	.change_queue_type =	sdebug_change_qtype,
 	.eh_abort_handler =	scsi_debug_abort,
 	.eh_device_reset_handler = scsi_debug_device_reset,
 	.eh_target_reset_handler = scsi_debug_target_reset,
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index c1d04d4..262ab83 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -211,6 +211,7 @@ static struct {
 	{"Medion", "Flash XL  MMC/SD", "2.6D", BLIST_FORCELUN},
 	{"MegaRAID", "LD", NULL, BLIST_FORCELUN},
 	{"MICROP", "4110", NULL, BLIST_NOTQ},
+	{"MSFT", "Virtual HD", NULL, BLIST_NO_RSOC},
 	{"MYLEX", "DACARMRB", "*", BLIST_REPORTLUN2},
 	{"nCipher", "Fastness Crypto", NULL, BLIST_FORCELUN},
 	{"NAKAMICH", "MJ-4.8S", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 43318d5..9ea95dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1918,7 +1918,9 @@ static int scsi_mq_prep_fn(struct request *req)
 
 	if (scsi_host_get_prot(shost)) {
 		cmd->prot_sdb = (void *)sg +
-			shost->sg_tablesize * sizeof(struct scatterlist);
+			min_t(unsigned int,
+			      shost->sg_tablesize, SCSI_MAX_SG_SEGMENTS) *
+			sizeof(struct scatterlist);
 		memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer));
 
 		cmd->prot_sdb->table.sgl =
diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c
index 7454498..9e43ae1 100644
--- a/drivers/scsi/scsi_pm.c
+++ b/drivers/scsi/scsi_pm.c
@@ -213,8 +213,6 @@ static int scsi_bus_restore(struct device *dev)
 
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
-
 static int sdev_runtime_suspend(struct device *dev)
 {
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
@@ -332,14 +330,6 @@ void scsi_autopm_put_host(struct Scsi_Host *shost)
 	pm_runtime_put_sync(&shost->shost_gendev);
 }
 
-#else
-
-#define scsi_runtime_suspend	NULL
-#define scsi_runtime_resume	NULL
-#define scsi_runtime_idle	NULL
-
-#endif /* CONFIG_PM_RUNTIME */
-
 const struct dev_pm_ops scsi_bus_pm_ops = {
 	.prepare =		scsi_bus_prepare,
 	.suspend =		scsi_bus_suspend,
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 2dc4a83..e3902fc 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -155,8 +155,7 @@ static inline void scsi_netlink_exit(void) {}
 /* scsi_pm.c */
 #ifdef CONFIG_PM
 extern const struct dev_pm_ops scsi_bus_pm_ops;
-#endif
-#ifdef CONFIG_PM_RUNTIME
+
 extern void scsi_autopm_get_target(struct scsi_target *);
 extern void scsi_autopm_put_target(struct scsi_target *);
 extern int scsi_autopm_get_host(struct Scsi_Host *);
@@ -166,7 +165,7 @@ static inline void scsi_autopm_get_target(struct scsi_target *t) {}
 static inline void scsi_autopm_put_target(struct scsi_target *t) {}
 static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; }
 static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 extern struct async_domain scsi_sd_pm_domain;
 extern struct async_domain scsi_sd_probe_domain;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 1cb64a8..1ac38e7 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -738,30 +738,12 @@ store_queue_type_field(struct device *dev, struct device_attribute *attr,
 		       const char *buf, size_t count)
 {
 	struct scsi_device *sdev = to_scsi_device(dev);
-	struct scsi_host_template *sht = sdev->host->hostt;
-	int tag_type = 0, retval;
-	int prev_tag_type = scsi_get_tag_type(sdev);
-
-	if (!sdev->tagged_supported || !sht->change_queue_type)
-		return -EINVAL;
 
-	/*
-	 * We're never issueing order tags these days, but allow the value
-	 * for backwards compatibility.
-	 */
-	if (strncmp(buf, "ordered", 7) == 0 ||
-	    strncmp(buf, "simple", 6) == 0)
-		tag_type = MSG_SIMPLE_TAG;
-	else if (strncmp(buf, "none", 4) != 0)
+	if (!sdev->tagged_supported)
 		return -EINVAL;
-
-	if (tag_type == prev_tag_type)
-		return count;
-
-	retval = sht->change_queue_type(sdev, tag_type);
-	if (retval < 0)
-		return retval;
-
+		
+	sdev_printk(KERN_INFO, sdev,
+		    "ignoring write to deprecated queue_type attribute");
 	return count;
 }
 
@@ -938,10 +920,6 @@ static umode_t scsi_sdev_attr_is_visible(struct kobject *kobj,
 	    !sdev->host->hostt->change_queue_depth)
 		return 0;
 
-	if (attr == &dev_attr_queue_type.attr &&
-	    !sdev->host->hostt->change_queue_type)
-		return S_IRUGO;
-
 	return attr->mode;
 }
 
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index fa2aece..31bbb0d 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -1221,7 +1221,7 @@ EXPORT_SYMBOL_GPL(spi_populate_ppr_msg);
 int spi_populate_tag_msg(unsigned char *msg, struct scsi_cmnd *cmd)
 {
         if (cmd->flags & SCMD_TAGGED) {
-		*msg++ = MSG_SIMPLE_TAG;
+		*msg++ = SIMPLE_QUEUE_TAG;
         	*msg++ = cmd->request->tag;
         	return 2;
 	}
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index e3ba251..4cff0dd 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1688,13 +1688,12 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	if (ret == -EAGAIN) {
 		/* no more space */
 
-		if (cmd_request->bounce_sgl_count) {
+		if (cmd_request->bounce_sgl_count)
 			destroy_bounce_buffer(cmd_request->bounce_sgl,
 					cmd_request->bounce_sgl_count);
 
-			ret = SCSI_MLQUEUE_DEVICE_BUSY;
-			goto queue_error;
-		}
+		ret = SCSI_MLQUEUE_DEVICE_BUSY;
+		goto queue_error;
 	}
 
 	return 0;
diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
index 955ed55..d15eaa4 100644
--- a/drivers/scsi/ufs/ufshcd-pci.c
+++ b/drivers/scsi/ufs/ufshcd-pci.c
@@ -62,12 +62,7 @@ static int ufshcd_pci_resume(struct device *dev)
 {
 	return ufshcd_system_resume(dev_get_drvdata(dev));
 }
-#else
-#define ufshcd_pci_suspend	NULL
-#define ufshcd_pci_resume	NULL
-#endif /* CONFIG_PM */
 
-#ifdef CONFIG_PM_RUNTIME
 static int ufshcd_pci_runtime_suspend(struct device *dev)
 {
 	return ufshcd_runtime_suspend(dev_get_drvdata(dev));
@@ -80,11 +75,13 @@ static int ufshcd_pci_runtime_idle(struct device *dev)
 {
 	return ufshcd_runtime_idle(dev_get_drvdata(dev));
 }
-#else /* !CONFIG_PM_RUNTIME */
+#else /* !CONFIG_PM */
+#define ufshcd_pci_suspend	NULL
+#define ufshcd_pci_resume	NULL
 #define ufshcd_pci_runtime_suspend	NULL
 #define ufshcd_pci_runtime_resume	NULL
 #define ufshcd_pci_runtime_idle	NULL
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 /**
  * ufshcd_pci_shutdown - main function to put the controller in reset state
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index 0c030ad..7db9564 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -261,12 +261,7 @@ static int ufshcd_pltfrm_resume(struct device *dev)
 {
 	return ufshcd_system_resume(dev_get_drvdata(dev));
 }
-#else
-#define ufshcd_pltfrm_suspend	NULL
-#define ufshcd_pltfrm_resume	NULL
-#endif
 
-#ifdef CONFIG_PM_RUNTIME
 static int ufshcd_pltfrm_runtime_suspend(struct device *dev)
 {
 	return ufshcd_runtime_suspend(dev_get_drvdata(dev));
@@ -279,11 +274,13 @@ static int ufshcd_pltfrm_runtime_idle(struct device *dev)
 {
 	return ufshcd_runtime_idle(dev_get_drvdata(dev));
 }
-#else /* !CONFIG_PM_RUNTIME */
+#else /* !CONFIG_PM */
+#define ufshcd_pltfrm_suspend	NULL
+#define ufshcd_pltfrm_resume	NULL
 #define ufshcd_pltfrm_runtime_suspend	NULL
 #define ufshcd_pltfrm_runtime_resume	NULL
 #define ufshcd_pltfrm_runtime_idle	NULL
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static void ufshcd_pltfrm_shutdown(struct platform_device *pdev)
 {
diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c
index e2fa628..41b5dc4 100644
--- a/drivers/spi/spi-coldfire-qspi.c
+++ b/drivers/spi/spi-coldfire-qspi.c
@@ -491,7 +491,7 @@ static int mcfqspi_resume(struct device *dev)
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int mcfqspi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index 43781c9..b410499 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c
@@ -663,7 +663,7 @@ static int img_spfi_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int img_spfi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -692,7 +692,7 @@ static int img_spfi_runtime_resume(struct device *dev)
 
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_SLEEP
 static int img_spfi_suspend(struct device *dev)
diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c
index 0e48f8c..1bbac03 100644
--- a/drivers/spi/spi-meson-spifc.c
+++ b/drivers/spi/spi-meson-spifc.c
@@ -413,7 +413,7 @@ static int meson_spifc_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int meson_spifc_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -431,7 +431,7 @@ static int meson_spifc_runtime_resume(struct device *dev)
 
 	return clk_prepare_enable(spifc->clk);
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops meson_spifc_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(meson_spifc_suspend, meson_spifc_resume)
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 932da48..3dec9e0 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -523,7 +523,7 @@ static int orion_spi_remove(struct platform_device *pdev)
 
 MODULE_ALIAS("platform:" DRIVER_NAME);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int orion_spi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 2a41b2d..05c623c 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1531,7 +1531,7 @@ static int pxa2xx_spi_resume(struct device *dev)
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int pxa2xx_spi_runtime_suspend(struct device *dev)
 {
 	struct driver_data *drv_data = dev_get_drvdata(dev);
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 390ed71..e7fb5a0 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -646,7 +646,7 @@ error:
 	return ret;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int spi_qup_pm_suspend_runtime(struct device *device)
 {
 	struct spi_master *master = dev_get_drvdata(device);
@@ -672,7 +672,7 @@ static int spi_qup_pm_resume_runtime(struct device *device)
 	writel_relaxed(config, controller->base + QUP_CONFIG);
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_SLEEP
 static int spi_qup_suspend(struct device *device)
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 44c1225..daabbab 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -799,7 +799,7 @@ static int rockchip_spi_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int rockchip_spi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -827,7 +827,7 @@ static int rockchip_spi_runtime_resume(struct device *dev)
 
 	return ret;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops rockchip_spi_pm = {
 	SET_SYSTEM_SLEEP_PM_OPS(rockchip_spi_suspend, rockchip_spi_resume)
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 197bcf0..37b1983 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -1267,7 +1267,7 @@ static int s3c64xx_spi_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int s3c64xx_spi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -1297,7 +1297,7 @@ static int s3c64xx_spi_runtime_resume(struct device *dev)
 
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops s3c64xx_spi_pm = {
 	SET_SYSTEM_SLEEP_PM_OPS(s3c64xx_spi_suspend, s3c64xx_spi_resume)
diff --git a/drivers/staging/gdm72xx/Kconfig b/drivers/staging/gdm72xx/Kconfig
index 5836503..bf11a7f 100644
--- a/drivers/staging/gdm72xx/Kconfig
+++ b/drivers/staging/gdm72xx/Kconfig
@@ -53,7 +53,7 @@ if WIMAX_GDM72XX_USB
 
 config WIMAX_GDM72XX_USB_PM
 	bool "Enable power management support"
-	depends on PM_RUNTIME
+	depends on PM
 	help
 	  Enable USB power management in order to reduce power consumption
 	  while the interface is not in use.
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 8156b4c..3925db1 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
@@ -42,28 +42,6 @@
 
 #include "lustre_patchless_compat.h"
 
-# define LOCK_FS_STRUCT(fs)	spin_lock(&(fs)->lock)
-# define UNLOCK_FS_STRUCT(fs)	spin_unlock(&(fs)->lock)
-
-static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-				 struct dentry *dentry)
-{
-	struct path path;
-	struct path old_pwd;
-
-	path.mnt = mnt;
-	path.dentry = dentry;
-	LOCK_FS_STRUCT(fs);
-	old_pwd = fs->pwd;
-	path_get(&path);
-	fs->pwd = path;
-	UNLOCK_FS_STRUCT(fs);
-
-	if (old_pwd.dentry)
-		path_put(&old_pwd);
-}
-
-
 /*
  * set ATTR_BLOCKS to a high value to avoid any risk of collision with other
  * ATTR_* attributes (see bug 13828)
@@ -110,8 +88,6 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
 #define cfs_bio_io_error(a, b)   bio_io_error((a))
 #define cfs_bio_endio(a, b, c)    bio_endio((a), (c))
 
-#define cfs_fs_pwd(fs)       ((fs)->pwd.dentry)
-#define cfs_fs_mnt(fs)       ((fs)->pwd.mnt)
 #define cfs_path_put(nd)     path_put(&(nd)->path)
 
 
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 407718a..1ac7a70 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -661,7 +661,7 @@ int ll_dir_setdirstripe(struct inode *dir, struct lmv_user_md *lump,
 	int mode;
 	int err;
 
-	mode = (0755 & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
+	mode = (0755 & ~current_umask()) | S_IFDIR;
 	op_data = ll_prep_md_op_data(NULL, dir, NULL, filename,
 				     strlen(filename), mode, LUSTRE_OPC_MKDIR,
 				     lump);
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 6e423aa..a3367bf 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -2372,21 +2372,6 @@ char *ll_get_fsname(struct super_block *sb, char *buf, int buflen)
 	return buf;
 }
 
-static char *ll_d_path(struct dentry *dentry, char *buf, int bufsize)
-{
-	char *path = NULL;
-
-	struct path p;
-
-	p.dentry = dentry;
-	p.mnt = current->fs->root.mnt;
-	path_get(&p);
-	path = d_path(&p, buf, bufsize);
-	path_put(&p);
-
-	return path;
-}
-
 void ll_dirty_page_discard_warn(struct page *page, int ioret)
 {
 	char *buf, *path = NULL;
@@ -2398,7 +2383,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
 	if (buf != NULL) {
 		dentry = d_find_alias(page->mapping->host);
 		if (dentry != NULL)
-			path = ll_d_path(dentry, buf, PAGE_SIZE);
+			path = dentry_path_raw(dentry, buf, PAGE_SIZE);
 	}
 
 	CDEBUG(D_WARNING,
diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig
index 96498b7..2a054a9 100644
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig
@@ -27,12 +27,18 @@ source "drivers/staging/media/davinci_vpfe/Kconfig"
 
 source "drivers/staging/media/dt3155v4l/Kconfig"
 
+source "drivers/staging/media/tlg2300/Kconfig"
+
 source "drivers/staging/media/mn88472/Kconfig"
 
 source "drivers/staging/media/mn88473/Kconfig"
 
 source "drivers/staging/media/omap4iss/Kconfig"
 
+source "drivers/staging/media/parport/Kconfig"
+
+source "drivers/staging/media/vino/Kconfig"
+
 # Keep LIRC at the end, as it has sub-menus
 source "drivers/staging/media/lirc/Kconfig"
 
diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile
index 30fb352..412b284 100644
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile
@@ -6,4 +6,7 @@ obj-$(CONFIG_VIDEO_DM365_VPFE)	+= davinci_vpfe/
 obj-$(CONFIG_VIDEO_OMAP4)	+= omap4iss/
 obj-$(CONFIG_DVB_MN88472)       += mn88472/
 obj-$(CONFIG_DVB_MN88473)       += mn88473/
+obj-y				+= parport/
+obj-$(CONFIG_VIDEO_TLG2300)	+= tlg2300/
+obj-y                           += vino/
 
diff --git a/drivers/media/parport/Kconfig b/drivers/staging/media/parport/Kconfig
index 948c981..15974ef 100644
--- a/drivers/media/parport/Kconfig
+++ b/drivers/staging/media/parport/Kconfig
@@ -7,18 +7,22 @@ menuconfig MEDIA_PARPORT_SUPPORT
 
 if MEDIA_PARPORT_SUPPORT
 config VIDEO_BWQCAM
-	tristate "Quickcam BW Video For Linux"
+	tristate "Quickcam BW Video For Linux (Deprecated)"
 	depends on PARPORT && VIDEO_V4L2
 	select VIDEOBUF2_VMALLOC
 	help
 	  Say Y have if you the black and white version of the QuickCam
 	  camera. See the next option for the color version.
 
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called bw-qcam.
 
 config VIDEO_CQCAM
-	tristate "QuickCam Colour Video For Linux"
+	tristate "QuickCam Colour Video For Linux (Deprecated)"
 	depends on PARPORT && VIDEO_V4L2
 	help
 	  This is the video4linux driver for the colour version of the
@@ -28,18 +32,26 @@ config VIDEO_CQCAM
 	  as a module (c-qcam).
 	  Read <file:Documentation/video4linux/CQcam.txt> for more information.
 
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
 config VIDEO_PMS
-	tristate "Mediavision Pro Movie Studio Video For Linux"
+	tristate "Mediavision Pro Movie Studio Video For Linux (Deprecated)"
 	depends on ISA && VIDEO_V4L2
 	help
 	  Say Y if you have the ISA Mediavision Pro Movie Studio
 	  capture card.
 
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called pms.
 
 config VIDEO_W9966
-	tristate "W9966CF Webcam (FlyCam Supra and others) Video For Linux"
+	tristate "W9966CF Webcam (FlyCam Supra and others) Video For Linux (Deprecated)"
 	depends on PARPORT_1284 && PARPORT && VIDEO_V4L2
 	help
 	  Video4linux driver for Winbond's w9966 based Webcams.
@@ -50,4 +62,8 @@ config VIDEO_W9966
 
 	  Check out <file:Documentation/video4linux/w9966.txt> for more
 	  information.
+
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
 endif
diff --git a/drivers/media/parport/Makefile b/drivers/staging/media/parport/Makefile
index 4eea06d..4eea06d 100644
--- a/drivers/media/parport/Makefile
+++ b/drivers/staging/media/parport/Makefile
diff --git a/drivers/media/parport/bw-qcam.c b/drivers/staging/media/parport/bw-qcam.c
index 67b9da1..67b9da1 100644
--- a/drivers/media/parport/bw-qcam.c
+++ b/drivers/staging/media/parport/bw-qcam.c
diff --git a/drivers/media/parport/c-qcam.c b/drivers/staging/media/parport/c-qcam.c
index b9010bd..b9010bd 100644
--- a/drivers/media/parport/c-qcam.c
+++ b/drivers/staging/media/parport/c-qcam.c
diff --git a/drivers/media/parport/pms.c b/drivers/staging/media/parport/pms.c
index e6b4975..e6b4975 100644
--- a/drivers/media/parport/pms.c
+++ b/drivers/staging/media/parport/pms.c
diff --git a/drivers/media/parport/w9966.c b/drivers/staging/media/parport/w9966.c
index f7502f3..f7502f3 100644
--- a/drivers/media/parport/w9966.c
+++ b/drivers/staging/media/parport/w9966.c
diff --git a/drivers/media/usb/tlg2300/Kconfig b/drivers/staging/media/tlg2300/Kconfig
index 645d915..81784c6 100644
--- a/drivers/media/usb/tlg2300/Kconfig
+++ b/drivers/staging/media/tlg2300/Kconfig
@@ -1,5 +1,5 @@
 config VIDEO_TLG2300
-	tristate "Telegent TLG2300 USB video capture support"
+	tristate "Telegent TLG2300 USB video capture support (Deprecated)"
 	depends on VIDEO_DEV && I2C && SND && DVB_CORE
 	select VIDEO_TUNER
 	select VIDEO_TVEEPROM
@@ -12,5 +12,9 @@ config VIDEO_TLG2300
 	  This is a video4linux driver for Telegent tlg2300 based TV cards.
 	  The driver supports V4L2, DVB-T and radio.
 
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called poseidon
diff --git a/drivers/media/usb/tlg2300/Makefile b/drivers/staging/media/tlg2300/Makefile
index 137f8e3..137f8e3 100644
--- a/drivers/media/usb/tlg2300/Makefile
+++ b/drivers/staging/media/tlg2300/Makefile
diff --git a/drivers/media/usb/tlg2300/pd-alsa.c b/drivers/staging/media/tlg2300/pd-alsa.c
index dd8fe10..dd8fe10 100644
--- a/drivers/media/usb/tlg2300/pd-alsa.c
+++ b/drivers/staging/media/tlg2300/pd-alsa.c
diff --git a/drivers/media/usb/tlg2300/pd-common.h b/drivers/staging/media/tlg2300/pd-common.h
index 9e23ad32..9e23ad32 100644
--- a/drivers/media/usb/tlg2300/pd-common.h
+++ b/drivers/staging/media/tlg2300/pd-common.h
diff --git a/drivers/media/usb/tlg2300/pd-dvb.c b/drivers/staging/media/tlg2300/pd-dvb.c
index ca4994a..ca4994a 100644
--- a/drivers/media/usb/tlg2300/pd-dvb.c
+++ b/drivers/staging/media/tlg2300/pd-dvb.c
diff --git a/drivers/media/usb/tlg2300/pd-main.c b/drivers/staging/media/tlg2300/pd-main.c
index b31f479..b31f479 100644
--- a/drivers/media/usb/tlg2300/pd-main.c
+++ b/drivers/staging/media/tlg2300/pd-main.c
diff --git a/drivers/media/usb/tlg2300/pd-radio.c b/drivers/staging/media/tlg2300/pd-radio.c
index b391194..b391194 100644
--- a/drivers/media/usb/tlg2300/pd-radio.c
+++ b/drivers/staging/media/tlg2300/pd-radio.c
diff --git a/drivers/media/usb/tlg2300/pd-video.c b/drivers/staging/media/tlg2300/pd-video.c
index 8cd7f02..8cd7f02 100644
--- a/drivers/media/usb/tlg2300/pd-video.c
+++ b/drivers/staging/media/tlg2300/pd-video.c
diff --git a/drivers/media/usb/tlg2300/vendorcmds.h b/drivers/staging/media/tlg2300/vendorcmds.h
index ba6f4ae..ba6f4ae 100644
--- a/drivers/media/usb/tlg2300/vendorcmds.h
+++ b/drivers/staging/media/tlg2300/vendorcmds.h
diff --git a/drivers/staging/media/vino/Kconfig b/drivers/staging/media/vino/Kconfig
new file mode 100644
index 0000000..03700da
--- /dev/null
+++ b/drivers/staging/media/vino/Kconfig
@@ -0,0 +1,24 @@
+config VIDEO_VINO
+	tristate "SGI Vino Video For Linux (Deprecated)"
+	depends on I2C && SGI_IP22 && VIDEO_V4L2
+	select VIDEO_SAA7191 if MEDIA_SUBDRV_AUTOSELECT
+	help
+	  Say Y here to build in support for the Vino video input system found
+	  on SGI Indy machines.
+
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
+config VIDEO_SAA7191
+	tristate "Philips SAA7191 video decoder (Deprecated)"
+	depends on VIDEO_V4L2 && I2C
+	---help---
+	  Support for the Philips SAA7191 video decoder.
+
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called saa7191.
diff --git a/drivers/staging/media/vino/Makefile b/drivers/staging/media/vino/Makefile
new file mode 100644
index 0000000..914c251
--- /dev/null
+++ b/drivers/staging/media/vino/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_VIDEO_VINO) += indycam.o
+obj-$(CONFIG_VIDEO_VINO) += vino.o
+obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o
diff --git a/drivers/media/platform/indycam.c b/drivers/staging/media/vino/indycam.c
index f1d192b..f1d192b 100644
--- a/drivers/media/platform/indycam.c
+++ b/drivers/staging/media/vino/indycam.c
diff --git a/drivers/media/platform/indycam.h b/drivers/staging/media/vino/indycam.h
index 881f21c..881f21c 100644
--- a/drivers/media/platform/indycam.h
+++ b/drivers/staging/media/vino/indycam.h
diff --git a/drivers/media/i2c/saa7191.c b/drivers/staging/media/vino/saa7191.c
index 8e96992..8e96992 100644
--- a/drivers/media/i2c/saa7191.c
+++ b/drivers/staging/media/vino/saa7191.c
diff --git a/drivers/media/i2c/saa7191.h b/drivers/staging/media/vino/saa7191.h
index 803c74d..803c74d 100644
--- a/drivers/media/i2c/saa7191.h
+++ b/drivers/staging/media/vino/saa7191.h
diff --git a/drivers/media/platform/vino.c b/drivers/staging/media/vino/vino.c
index 2c85357..2c85357 100644
--- a/drivers/media/platform/vino.c
+++ b/drivers/staging/media/vino/vino.c
diff --git a/drivers/media/platform/vino.h b/drivers/staging/media/vino/vino.h
index de2d615..de2d615 100644
--- a/drivers/media/platform/vino.h
+++ b/drivers/staging/media/vino/vino.h
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 73e58d2..55f6774 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -609,6 +609,7 @@ static int __init iscsi_target_init_module(void)
 
 	return ret;
 r2t_out:
+	iscsit_unregister_transport(&iscsi_target_transport);
 	kmem_cache_destroy(lio_r2t_cache);
 ooo_out:
 	kmem_cache_destroy(lio_ooo_cache);
@@ -943,17 +944,17 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	 */
 	if ((iscsi_task_attr == ISCSI_ATTR_UNTAGGED) ||
 	    (iscsi_task_attr == ISCSI_ATTR_SIMPLE))
-		sam_task_attr = MSG_SIMPLE_TAG;
+		sam_task_attr = TCM_SIMPLE_TAG;
 	else if (iscsi_task_attr == ISCSI_ATTR_ORDERED)
-		sam_task_attr = MSG_ORDERED_TAG;
+		sam_task_attr = TCM_ORDERED_TAG;
 	else if (iscsi_task_attr == ISCSI_ATTR_HEAD_OF_QUEUE)
-		sam_task_attr = MSG_HEAD_TAG;
+		sam_task_attr = TCM_HEAD_TAG;
 	else if (iscsi_task_attr == ISCSI_ATTR_ACA)
-		sam_task_attr = MSG_ACA_TAG;
+		sam_task_attr = TCM_ACA_TAG;
 	else {
 		pr_debug("Unknown iSCSI Task Attribute: 0x%02x, using"
-			" MSG_SIMPLE_TAG\n", iscsi_task_attr);
-		sam_task_attr = MSG_SIMPLE_TAG;
+			" TCM_SIMPLE_TAG\n", iscsi_task_attr);
+		sam_task_attr = TCM_SIMPLE_TAG;
 	}
 
 	cmd->iscsi_opcode	= ISCSI_OP_SCSI_CMD;
@@ -1811,7 +1812,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 		transport_init_se_cmd(&cmd->se_cmd,
 				      &lio_target_fabric_configfs->tf_ops,
 				      conn->sess->se_sess, 0, DMA_NONE,
-				      MSG_SIMPLE_TAG, cmd->sense_buffer + 2);
+				      TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
 
 		target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true);
 		sess_ref = true;
diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
index 302eb3b..09a522b 100644
--- a/drivers/target/iscsi/iscsi_target_core.h
+++ b/drivers/target/iscsi/iscsi_target_core.h
@@ -790,7 +790,6 @@ struct iscsi_np {
 	void			*np_context;
 	struct iscsit_transport *np_transport;
 	struct list_head	np_list;
-	struct iscsi_tpg_np	*tpg_np;
 } ____cacheline_aligned;
 
 struct iscsi_tpg_np {
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 480f2e0..713c0c1 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -281,7 +281,6 @@ static int iscsi_login_zero_tsih_s1(
 {
 	struct iscsi_session *sess = NULL;
 	struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
-	enum target_prot_op sup_pro_ops;
 	int ret;
 
 	sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL);
@@ -343,9 +342,8 @@ static int iscsi_login_zero_tsih_s1(
 		kfree(sess);
 		return -ENOMEM;
 	}
-	sup_pro_ops = conn->conn_transport->iscsit_get_sup_prot_ops(conn);
 
-	sess->se_sess = transport_init_session(sup_pro_ops);
+	sess->se_sess = transport_init_session(TARGET_PROT_NORMAL);
 	if (IS_ERR(sess->se_sess)) {
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
@@ -1161,6 +1159,7 @@ void iscsi_target_login_sess_out(struct iscsi_conn *conn,
 	}
 	kfree(conn->sess->sess_ops);
 	kfree(conn->sess);
+	conn->sess = NULL;
 
 old_sess_out:
 	iscsi_stop_login_thread_timer(np);
@@ -1204,6 +1203,9 @@ old_sess_out:
 		conn->sock = NULL;
 	}
 
+	if (conn->conn_transport->iscsit_wait_conn)
+		conn->conn_transport->iscsit_wait_conn(conn);
+
 	if (conn->conn_transport->iscsit_free_conn)
 		conn->conn_transport->iscsit_free_conn(conn);
 
@@ -1364,6 +1366,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
 	}
 	login->zero_tsih = zero_tsih;
 
+	conn->sess->se_sess->sup_prot_ops =
+		conn->conn_transport->iscsit_get_sup_prot_ops(conn);
+
 	tpg = conn->tpg;
 	if (!tpg) {
 		pr_err("Unable to locate struct iscsi_conn->tpg\n");
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index c3cb5c1..9053a3c 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -501,7 +501,6 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal(
 	init_completion(&tpg_np->tpg_np_comp);
 	kref_init(&tpg_np->tpg_np_kref);
 	tpg_np->tpg_np		= np;
-	np->tpg_np		= tpg_np;
 	tpg_np->tpg		= tpg;
 
 	spin_lock(&tpg->tpg_np_lock);
diff --git a/drivers/target/iscsi/iscsi_target_transport.c b/drivers/target/iscsi/iscsi_target_transport.c
index 882728f..08217d6 100644
--- a/drivers/target/iscsi/iscsi_target_transport.c
+++ b/drivers/target/iscsi/iscsi_target_transport.c
@@ -26,8 +26,7 @@ struct iscsit_transport *iscsit_get_transport(int type)
 
 void iscsit_put_transport(struct iscsit_transport *t)
 {
-	if (t->owner)
-		module_put(t->owner);
+	module_put(t->owner);
 }
 
 int iscsit_register_transport(struct iscsit_transport *t)
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 7c6a95b..bcd88ec 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1356,15 +1356,15 @@ static int iscsit_do_tx_data(
 	struct iscsi_conn *conn,
 	struct iscsi_data_count *count)
 {
-	int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len;
+	int ret, iov_len;
 	struct kvec *iov_p;
 	struct msghdr msg;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
 		return -1;
 
-	if (data <= 0) {
-		pr_err("Data length is: %d\n", data);
+	if (count->data_length <= 0) {
+		pr_err("Data length is: %d\n", count->data_length);
 		return -1;
 	}
 
@@ -1373,20 +1373,16 @@ static int iscsit_do_tx_data(
 	iov_p = count->iov;
 	iov_len = count->iov_count;
 
-	while (total_tx < data) {
-		tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
-					(data - total_tx));
-		if (tx_loop <= 0) {
-			pr_debug("tx_loop: %d total_tx %d\n",
-				tx_loop, total_tx);
-			return tx_loop;
-		}
-		total_tx += tx_loop;
-		pr_debug("tx_loop: %d, total_tx: %d, data: %d\n",
-					tx_loop, total_tx, data);
+	ret = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
+			     count->data_length);
+	if (ret != count->data_length) {
+		pr_err("Unexpected ret: %d send data %d\n",
+		       ret, count->data_length);
+		return -EPIPE;
 	}
+	pr_debug("ret: %d, sent data: %d\n", ret, count->data_length);
 
-	return total_tx;
+	return ret;
 }
 
 int rx_data(
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 4d1b722..6b3c329 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -138,7 +138,7 @@ static void tcm_loop_submission_work(struct work_struct *work)
 		set_host_byte(sc, DID_TRANSPORT_DISRUPTED);
 		goto out_done;
 	}
-	tl_nexus = tl_hba->tl_nexus;
+	tl_nexus = tl_tpg->tl_nexus;
 	if (!tl_nexus) {
 		scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus"
 				" does not exist\n");
@@ -168,7 +168,7 @@ static void tcm_loop_submission_work(struct work_struct *work)
 
 	rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd,
 			&tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun,
-			transfer_length, MSG_SIMPLE_TAG,
+			transfer_length, TCM_SIMPLE_TAG,
 			sc->sc_data_direction, 0,
 			scsi_sglist(sc), scsi_sg_count(sc),
 			sgl_bidi, sgl_bidi_count,
@@ -218,16 +218,26 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
  * to struct scsi_device
  */
 static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
-			      struct tcm_loop_nexus *tl_nexus,
 			      int lun, int task, enum tcm_tmreq_table tmr)
 {
 	struct se_cmd *se_cmd = NULL;
 	struct se_session *se_sess;
 	struct se_portal_group *se_tpg;
+	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_cmd *tl_cmd = NULL;
 	struct tcm_loop_tmr *tl_tmr = NULL;
 	int ret = TMR_FUNCTION_FAILED, rc;
 
+	/*
+	 * Locate the tl_nexus and se_sess pointers
+	 */
+	tl_nexus = tl_tpg->tl_nexus;
+	if (!tl_nexus) {
+		pr_err("Unable to perform device reset without"
+				" active I_T Nexus\n");
+		return ret;
+	}
+
 	tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL);
 	if (!tl_cmd) {
 		pr_err("Unable to allocate memory for tl_cmd\n");
@@ -243,12 +253,12 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 
 	se_cmd = &tl_cmd->tl_se_cmd;
 	se_tpg = &tl_tpg->tl_se_tpg;
-	se_sess = tl_nexus->se_sess;
+	se_sess = tl_tpg->tl_nexus->se_sess;
 	/*
 	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
 	 */
 	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess, 0,
-				DMA_NONE, MSG_SIMPLE_TAG,
+				DMA_NONE, TCM_SIMPLE_TAG,
 				&tl_cmd->tl_sense_buf[0]);
 
 	rc = core_tmr_alloc_req(se_cmd, tl_tmr, tmr, GFP_KERNEL);
@@ -288,7 +298,6 @@ release:
 static int tcm_loop_abort_task(struct scsi_cmnd *sc)
 {
 	struct tcm_loop_hba *tl_hba;
-	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_tpg *tl_tpg;
 	int ret = FAILED;
 
@@ -296,21 +305,8 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc)
 	 * Locate the tcm_loop_hba_t pointer
 	 */
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
-	/*
-	 * Locate the tl_nexus and se_sess pointers
-	 */
-	tl_nexus = tl_hba->tl_nexus;
-	if (!tl_nexus) {
-		pr_err("Unable to perform device reset without"
-				" active I_T Nexus\n");
-		return FAILED;
-	}
-
-	/*
-	 * Locate the tl_tpg pointer from TargetID in sc->device->id
-	 */
 	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
-	ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun,
+	ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun,
 				 sc->request->tag, TMR_ABORT_TASK);
 	return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
 }
@@ -322,7 +318,6 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc)
 static int tcm_loop_device_reset(struct scsi_cmnd *sc)
 {
 	struct tcm_loop_hba *tl_hba;
-	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_tpg *tl_tpg;
 	int ret = FAILED;
 
@@ -330,20 +325,9 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc)
 	 * Locate the tcm_loop_hba_t pointer
 	 */
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
-	/*
-	 * Locate the tl_nexus and se_sess pointers
-	 */
-	tl_nexus = tl_hba->tl_nexus;
-	if (!tl_nexus) {
-		pr_err("Unable to perform device reset without"
-				" active I_T Nexus\n");
-		return FAILED;
-	}
-	/*
-	 * Locate the tl_tpg pointer from TargetID in sc->device->id
-	 */
 	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
-	ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun,
+
+	ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun,
 				 0, TMR_LUN_RESET);
 	return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
 }
@@ -385,7 +369,6 @@ static struct scsi_host_template tcm_loop_driver_template = {
 	.name			= "TCM_Loopback",
 	.queuecommand		= tcm_loop_queuecommand,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.eh_abort_handler = tcm_loop_abort_task,
 	.eh_device_reset_handler = tcm_loop_device_reset,
 	.eh_target_reset_handler = tcm_loop_target_reset,
@@ -940,8 +923,8 @@ static int tcm_loop_make_nexus(
 	struct tcm_loop_nexus *tl_nexus;
 	int ret = -ENOMEM;
 
-	if (tl_tpg->tl_hba->tl_nexus) {
-		pr_debug("tl_tpg->tl_hba->tl_nexus already exists\n");
+	if (tl_tpg->tl_nexus) {
+		pr_debug("tl_tpg->tl_nexus already exists\n");
 		return -EEXIST;
 	}
 	se_tpg = &tl_tpg->tl_se_tpg;
@@ -976,7 +959,7 @@ static int tcm_loop_make_nexus(
 	 */
 	__transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl,
 			tl_nexus->se_sess, tl_nexus);
-	tl_tpg->tl_hba->tl_nexus = tl_nexus;
+	tl_tpg->tl_nexus = tl_nexus;
 	pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated"
 		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
 		name);
@@ -992,12 +975,8 @@ static int tcm_loop_drop_nexus(
 {
 	struct se_session *se_sess;
 	struct tcm_loop_nexus *tl_nexus;
-	struct tcm_loop_hba *tl_hba = tpg->tl_hba;
 
-	if (!tl_hba)
-		return -ENODEV;
-
-	tl_nexus = tl_hba->tl_nexus;
+	tl_nexus = tpg->tl_nexus;
 	if (!tl_nexus)
 		return -ENODEV;
 
@@ -1013,13 +992,13 @@ static int tcm_loop_drop_nexus(
 	}
 
 	pr_debug("TCM_Loop_ConfigFS: Removing I_T Nexus to emulated"
-		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
+		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tpg->tl_hba),
 		tl_nexus->se_sess->se_node_acl->initiatorname);
 	/*
 	 * Release the SCSI I_T Nexus to the emulated SAS Target Port
 	 */
 	transport_deregister_session(tl_nexus->se_sess);
-	tpg->tl_hba->tl_nexus = NULL;
+	tpg->tl_nexus = NULL;
 	kfree(tl_nexus);
 	return 0;
 }
@@ -1035,7 +1014,7 @@ static ssize_t tcm_loop_tpg_show_nexus(
 	struct tcm_loop_nexus *tl_nexus;
 	ssize_t ret;
 
-	tl_nexus = tl_tpg->tl_hba->tl_nexus;
+	tl_nexus = tl_tpg->tl_nexus;
 	if (!tl_nexus)
 		return -ENODEV;
 
diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
index 54c59d0..6ae49f2 100644
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h
@@ -27,11 +27,6 @@ struct tcm_loop_tmr {
 };
 
 struct tcm_loop_nexus {
-	int it_nexus_active;
-	/*
-	 * Pointer to Linux/SCSI HBA from linux/include/scsi_host.h
-	 */
-	struct scsi_host *sh;
 	/*
 	 * Pointer to TCM session for I_T Nexus
 	 */
@@ -51,6 +46,7 @@ struct tcm_loop_tpg {
 	atomic_t tl_tpg_port_count;
 	struct se_portal_group tl_se_tpg;
 	struct tcm_loop_hba *tl_hba;
+	struct tcm_loop_nexus *tl_nexus;
 };
 
 struct tcm_loop_hba {
@@ -59,7 +55,6 @@ struct tcm_loop_hba {
 	struct se_hba_s *se_hba;
 	struct se_lun *tl_hba_lun;
 	struct se_port *tl_hba_lun_sep;
-	struct tcm_loop_nexus *tl_nexus;
 	struct device dev;
 	struct Scsi_Host *sh;
 	struct tcm_loop_tpg tl_hba_tpgs[TL_TPGS_PER_HBA];
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index e7e9372..9512af6 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -1237,7 +1237,7 @@ static void sbp_handle_command(struct sbp_target_request *req)
 
 	if (target_submit_cmd(&req->se_cmd, sess->se_sess, req->cmd_buf,
 			      req->sense_buf, unpacked_lun, data_length,
-			      MSG_SIMPLE_TAG, data_dir, 0))
+			      TCM_SIMPLE_TAG, data_dir, 0))
 		goto err;
 
 	return;
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 79f9296..75d89ad 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -50,6 +50,19 @@
 #include "target_core_rd.h"
 #include "target_core_xcopy.h"
 
+#define TB_CIT_SETUP(_name, _item_ops, _group_ops, _attrs)		\
+static void target_core_setup_##_name##_cit(struct se_subsystem_api *sa) \
+{									\
+	struct target_backend_cits *tbc = &sa->tb_cits;			\
+	struct config_item_type *cit = &tbc->tb_##_name##_cit;		\
+									\
+	cit->ct_item_ops = _item_ops;					\
+	cit->ct_group_ops = _group_ops;					\
+	cit->ct_attrs = _attrs;						\
+	cit->ct_owner = sa->owner;					\
+	pr_debug("Setup generic %s\n", __stringify(_name));		\
+}
+
 extern struct t10_alua_lu_gp *default_lu_gp;
 
 static LIST_HEAD(g_tf_list);
@@ -126,48 +139,57 @@ static struct config_group *target_core_register_fabric(
 
 	pr_debug("Target_Core_ConfigFS: REGISTER -> group: %p name:"
 			" %s\n", group, name);
-	/*
-	 * Below are some hardcoded request_module() calls to automatically
-	 * local fabric modules when the following is called:
-	 *
-	 * mkdir -p /sys/kernel/config/target/$MODULE_NAME
-	 *
-	 * Note that this does not limit which TCM fabric module can be
-	 * registered, but simply provids auto loading logic for modules with
-	 * mkdir(2) system calls with known TCM fabric modules.
-	 */
-	if (!strncmp(name, "iscsi", 5)) {
+
+	tf = target_core_get_fabric(name);
+	if (!tf) {
+		pr_err("target_core_register_fabric() trying autoload for %s\n",
+			name);
+
 		/*
-		 * Automatically load the LIO Target fabric module when the
-		 * following is called:
+		 * Below are some hardcoded request_module() calls to automatically
+		 * local fabric modules when the following is called:
 		 *
-		 * mkdir -p $CONFIGFS/target/iscsi
-		 */
-		ret = request_module("iscsi_target_mod");
-		if (ret < 0) {
-			pr_err("request_module() failed for"
-				" iscsi_target_mod.ko: %d\n", ret);
-			return ERR_PTR(-EINVAL);
-		}
-	} else if (!strncmp(name, "loopback", 8)) {
-		/*
-		 * Automatically load the tcm_loop fabric module when the
-		 * following is called:
+		 * mkdir -p /sys/kernel/config/target/$MODULE_NAME
 		 *
-		 * mkdir -p $CONFIGFS/target/loopback
+		 * Note that this does not limit which TCM fabric module can be
+		 * registered, but simply provids auto loading logic for modules with
+		 * mkdir(2) system calls with known TCM fabric modules.
 		 */
-		ret = request_module("tcm_loop");
-		if (ret < 0) {
-			pr_err("request_module() failed for"
-				" tcm_loop.ko: %d\n", ret);
-			return ERR_PTR(-EINVAL);
+
+		if (!strncmp(name, "iscsi", 5)) {
+			/*
+			 * Automatically load the LIO Target fabric module when the
+			 * following is called:
+			 *
+			 * mkdir -p $CONFIGFS/target/iscsi
+			 */
+			ret = request_module("iscsi_target_mod");
+			if (ret < 0) {
+				pr_err("request_module() failed for"
+				       " iscsi_target_mod.ko: %d\n", ret);
+				return ERR_PTR(-EINVAL);
+			}
+		} else if (!strncmp(name, "loopback", 8)) {
+			/*
+			 * Automatically load the tcm_loop fabric module when the
+			 * following is called:
+			 *
+			 * mkdir -p $CONFIGFS/target/loopback
+			 */
+			ret = request_module("tcm_loop");
+			if (ret < 0) {
+				pr_err("request_module() failed for"
+				       " tcm_loop.ko: %d\n", ret);
+				return ERR_PTR(-EINVAL);
+			}
 		}
+
+		tf = target_core_get_fabric(name);
 	}
 
-	tf = target_core_get_fabric(name);
 	if (!tf) {
 		pr_err("target_core_get_fabric() failed for %s\n",
-			name);
+		       name);
 		return ERR_PTR(-EINVAL);
 	}
 	pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:"
@@ -562,198 +584,21 @@ EXPORT_SYMBOL(target_fabric_configfs_deregister);
 // Stop functions called by external Target Fabrics Modules
 //############################################################################*/
 
-/* Start functions for struct config_item_type target_core_dev_attrib_cit */
-
-#define DEF_DEV_ATTRIB_SHOW(_name)					\
-static ssize_t target_core_dev_show_attr_##_name(			\
-	struct se_dev_attrib *da,					\
-	char *page)							\
-{									\
-	return snprintf(page, PAGE_SIZE, "%u\n",			\
-		(u32)da->da_dev->dev_attrib._name);			\
-}
-
-#define DEF_DEV_ATTRIB_STORE(_name)					\
-static ssize_t target_core_dev_store_attr_##_name(			\
-	struct se_dev_attrib *da,					\
-	const char *page,						\
-	size_t count)							\
-{									\
-	unsigned long val;						\
-	int ret;							\
-									\
-	ret = kstrtoul(page, 0, &val);				\
-	if (ret < 0) {							\
-		pr_err("kstrtoul() failed with"		\
-			" ret: %d\n", ret);				\
-		return -EINVAL;						\
-	}								\
-	ret = se_dev_set_##_name(da->da_dev, (u32)val);			\
-									\
-	return (!ret) ? count : -EINVAL;				\
-}
-
-#define DEF_DEV_ATTRIB(_name)						\
-DEF_DEV_ATTRIB_SHOW(_name);						\
-DEF_DEV_ATTRIB_STORE(_name);
-
-#define DEF_DEV_ATTRIB_RO(_name)					\
-DEF_DEV_ATTRIB_SHOW(_name);
+/* Start functions for struct config_item_type tb_dev_attrib_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_attrib, se_dev_attrib);
-#define SE_DEV_ATTR(_name, _mode)					\
-static struct target_core_dev_attrib_attribute				\
-			target_core_dev_attrib_##_name =		\
-		__CONFIGFS_EATTR(_name, _mode,				\
-		target_core_dev_show_attr_##_name,			\
-		target_core_dev_store_attr_##_name);
-
-#define SE_DEV_ATTR_RO(_name);						\
-static struct target_core_dev_attrib_attribute				\
-			target_core_dev_attrib_##_name =		\
-	__CONFIGFS_EATTR_RO(_name,					\
-	target_core_dev_show_attr_##_name);
-
-DEF_DEV_ATTRIB(emulate_model_alias);
-SE_DEV_ATTR(emulate_model_alias, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_dpo);
-SE_DEV_ATTR(emulate_dpo, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_fua_write);
-SE_DEV_ATTR(emulate_fua_write, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_fua_read);
-SE_DEV_ATTR(emulate_fua_read, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_write_cache);
-SE_DEV_ATTR(emulate_write_cache, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_ua_intlck_ctrl);
-SE_DEV_ATTR(emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tas);
-SE_DEV_ATTR(emulate_tas, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tpu);
-SE_DEV_ATTR(emulate_tpu, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tpws);
-SE_DEV_ATTR(emulate_tpws, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_caw);
-SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_3pc);
-SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(pi_prot_type);
-SE_DEV_ATTR(pi_prot_type, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_pi_prot_type);
-SE_DEV_ATTR_RO(hw_pi_prot_type);
-
-DEF_DEV_ATTRIB(pi_prot_format);
-SE_DEV_ATTR(pi_prot_format, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(enforce_pr_isids);
-SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(is_nonrot);
-SE_DEV_ATTR(is_nonrot, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_rest_reord);
-SE_DEV_ATTR(emulate_rest_reord, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(force_pr_aptpl);
-SE_DEV_ATTR(force_pr_aptpl, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_block_size);
-SE_DEV_ATTR_RO(hw_block_size);
-
-DEF_DEV_ATTRIB(block_size);
-SE_DEV_ATTR(block_size, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_max_sectors);
-SE_DEV_ATTR_RO(hw_max_sectors);
-
-DEF_DEV_ATTRIB(fabric_max_sectors);
-SE_DEV_ATTR(fabric_max_sectors, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(optimal_sectors);
-SE_DEV_ATTR(optimal_sectors, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_queue_depth);
-SE_DEV_ATTR_RO(hw_queue_depth);
-
-DEF_DEV_ATTRIB(queue_depth);
-SE_DEV_ATTR(queue_depth, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_unmap_lba_count);
-SE_DEV_ATTR(max_unmap_lba_count, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_unmap_block_desc_count);
-SE_DEV_ATTR(max_unmap_block_desc_count, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(unmap_granularity);
-SE_DEV_ATTR(unmap_granularity, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(unmap_granularity_alignment);
-SE_DEV_ATTR(unmap_granularity_alignment, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_write_same_len);
-SE_DEV_ATTR(max_write_same_len, S_IRUGO | S_IWUSR);
-
 CONFIGFS_EATTR_OPS(target_core_dev_attrib, se_dev_attrib, da_group);
 
-static struct configfs_attribute *target_core_dev_attrib_attrs[] = {
-	&target_core_dev_attrib_emulate_model_alias.attr,
-	&target_core_dev_attrib_emulate_dpo.attr,
-	&target_core_dev_attrib_emulate_fua_write.attr,
-	&target_core_dev_attrib_emulate_fua_read.attr,
-	&target_core_dev_attrib_emulate_write_cache.attr,
-	&target_core_dev_attrib_emulate_ua_intlck_ctrl.attr,
-	&target_core_dev_attrib_emulate_tas.attr,
-	&target_core_dev_attrib_emulate_tpu.attr,
-	&target_core_dev_attrib_emulate_tpws.attr,
-	&target_core_dev_attrib_emulate_caw.attr,
-	&target_core_dev_attrib_emulate_3pc.attr,
-	&target_core_dev_attrib_pi_prot_type.attr,
-	&target_core_dev_attrib_hw_pi_prot_type.attr,
-	&target_core_dev_attrib_pi_prot_format.attr,
-	&target_core_dev_attrib_enforce_pr_isids.attr,
-	&target_core_dev_attrib_force_pr_aptpl.attr,
-	&target_core_dev_attrib_is_nonrot.attr,
-	&target_core_dev_attrib_emulate_rest_reord.attr,
-	&target_core_dev_attrib_hw_block_size.attr,
-	&target_core_dev_attrib_block_size.attr,
-	&target_core_dev_attrib_hw_max_sectors.attr,
-	&target_core_dev_attrib_fabric_max_sectors.attr,
-	&target_core_dev_attrib_optimal_sectors.attr,
-	&target_core_dev_attrib_hw_queue_depth.attr,
-	&target_core_dev_attrib_queue_depth.attr,
-	&target_core_dev_attrib_max_unmap_lba_count.attr,
-	&target_core_dev_attrib_max_unmap_block_desc_count.attr,
-	&target_core_dev_attrib_unmap_granularity.attr,
-	&target_core_dev_attrib_unmap_granularity_alignment.attr,
-	&target_core_dev_attrib_max_write_same_len.attr,
-	NULL,
-};
-
 static struct configfs_item_operations target_core_dev_attrib_ops = {
 	.show_attribute		= target_core_dev_attrib_attr_show,
 	.store_attribute	= target_core_dev_attrib_attr_store,
 };
 
-static struct config_item_type target_core_dev_attrib_cit = {
-	.ct_item_ops		= &target_core_dev_attrib_ops,
-	.ct_attrs		= target_core_dev_attrib_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_attrib, &target_core_dev_attrib_ops, NULL, NULL);
 
-/* End functions for struct config_item_type target_core_dev_attrib_cit */
+/* End functions for struct config_item_type tb_dev_attrib_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_wwn_cit */
+/*  Start functions for struct config_item_type tb_dev_wwn_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_wwn, t10_wwn);
 #define SE_DEV_WWN_ATTR(_name, _mode)					\
@@ -984,15 +829,11 @@ static struct configfs_item_operations target_core_dev_wwn_ops = {
 	.store_attribute	= target_core_dev_wwn_attr_store,
 };
 
-static struct config_item_type target_core_dev_wwn_cit = {
-	.ct_item_ops		= &target_core_dev_wwn_ops,
-	.ct_attrs		= target_core_dev_wwn_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_wwn, &target_core_dev_wwn_ops, NULL, target_core_dev_wwn_attrs);
 
-/*  End functions for struct config_item_type target_core_dev_wwn_cit */
+/*  End functions for struct config_item_type tb_dev_wwn_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_pr_cit */
+/*  Start functions for struct config_item_type tb_dev_pr_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_pr, se_device);
 #define SE_DEV_PR_ATTR(_name, _mode)					\
@@ -1453,15 +1294,11 @@ static struct configfs_item_operations target_core_dev_pr_ops = {
 	.store_attribute	= target_core_dev_pr_attr_store,
 };
 
-static struct config_item_type target_core_dev_pr_cit = {
-	.ct_item_ops		= &target_core_dev_pr_ops,
-	.ct_attrs		= target_core_dev_pr_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_pr, &target_core_dev_pr_ops, NULL, target_core_dev_pr_attrs);
 
-/*  End functions for struct config_item_type target_core_dev_pr_cit */
+/*  End functions for struct config_item_type tb_dev_pr_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_cit */
+/*  Start functions for struct config_item_type tb_dev_cit */
 
 static ssize_t target_core_show_dev_info(void *p, char *page)
 {
@@ -1925,7 +1762,7 @@ static struct target_core_configfs_attribute target_core_attr_dev_lba_map = {
 	.store	= target_core_store_dev_lba_map,
 };
 
-static struct configfs_attribute *lio_core_dev_attrs[] = {
+static struct configfs_attribute *target_core_dev_attrs[] = {
 	&target_core_attr_dev_info.attr,
 	&target_core_attr_dev_control.attr,
 	&target_core_attr_dev_alias.attr,
@@ -1984,13 +1821,9 @@ static struct configfs_item_operations target_core_dev_item_ops = {
 	.store_attribute	= target_core_dev_store,
 };
 
-static struct config_item_type target_core_dev_cit = {
-	.ct_item_ops		= &target_core_dev_item_ops,
-	.ct_attrs		= lio_core_dev_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev, &target_core_dev_item_ops, NULL, target_core_dev_attrs);
 
-/* End functions for struct config_item_type target_core_dev_cit */
+/* End functions for struct config_item_type tb_dev_cit */
 
 /* Start functions for struct config_item_type target_core_alua_lu_gp_cit */
 
@@ -2670,7 +2503,7 @@ static struct config_item_type target_core_alua_tg_pt_gp_cit = {
 
 /* End functions for struct config_item_type target_core_alua_tg_pt_gp_cit */
 
-/* Start functions for struct config_item_type target_core_alua_tg_pt_gps_cit */
+/* Start functions for struct config_item_type tb_alua_tg_pt_gps_cit */
 
 static struct config_group *target_core_alua_create_tg_pt_gp(
 	struct config_group *group,
@@ -2721,12 +2554,9 @@ static struct configfs_group_operations target_core_alua_tg_pt_gps_group_ops = {
 	.drop_item		= &target_core_alua_drop_tg_pt_gp,
 };
 
-static struct config_item_type target_core_alua_tg_pt_gps_cit = {
-	.ct_group_ops		= &target_core_alua_tg_pt_gps_group_ops,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_alua_tg_pt_gps, NULL, &target_core_alua_tg_pt_gps_group_ops, NULL);
 
-/* End functions for struct config_item_type target_core_alua_tg_pt_gps_cit */
+/* End functions for struct config_item_type tb_alua_tg_pt_gps_cit */
 
 /* Start functions for struct config_item_type target_core_alua_cit */
 
@@ -2744,7 +2574,7 @@ static struct config_item_type target_core_alua_cit = {
 
 /* End functions for struct config_item_type target_core_alua_cit */
 
-/* Start functions for struct config_item_type target_core_stat_cit */
+/* Start functions for struct config_item_type tb_dev_stat_cit */
 
 static struct config_group *target_core_stat_mkdir(
 	struct config_group *group,
@@ -2765,12 +2595,9 @@ static struct configfs_group_operations target_core_stat_group_ops = {
 	.drop_item		= &target_core_stat_rmdir,
 };
 
-static struct config_item_type target_core_stat_cit = {
-	.ct_group_ops		= &target_core_stat_group_ops,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_stat, NULL, &target_core_stat_group_ops, NULL);
 
-/* End functions for struct config_item_type target_core_stat_cit */
+/* End functions for struct config_item_type tb_dev_stat_cit */
 
 /* Start functions for struct config_item_type target_core_hba_cit */
 
@@ -2806,17 +2633,17 @@ static struct config_group *target_core_make_subdev(
 	if (!dev_cg->default_groups)
 		goto out_free_device;
 
-	config_group_init_type_name(dev_cg, name, &target_core_dev_cit);
+	config_group_init_type_name(dev_cg, name, &t->tb_cits.tb_dev_cit);
 	config_group_init_type_name(&dev->dev_attrib.da_group, "attrib",
-			&target_core_dev_attrib_cit);
+			&t->tb_cits.tb_dev_attrib_cit);
 	config_group_init_type_name(&dev->dev_pr_group, "pr",
-			&target_core_dev_pr_cit);
+			&t->tb_cits.tb_dev_pr_cit);
 	config_group_init_type_name(&dev->t10_wwn.t10_wwn_group, "wwn",
-			&target_core_dev_wwn_cit);
+			&t->tb_cits.tb_dev_wwn_cit);
 	config_group_init_type_name(&dev->t10_alua.alua_tg_pt_gps_group,
-			"alua", &target_core_alua_tg_pt_gps_cit);
+			"alua", &t->tb_cits.tb_dev_alua_tg_pt_gps_cit);
 	config_group_init_type_name(&dev->dev_stat_grps.stat_group,
-			"statistics", &target_core_stat_cit);
+			"statistics", &t->tb_cits.tb_dev_stat_cit);
 
 	dev_cg->default_groups[0] = &dev->dev_attrib.da_group;
 	dev_cg->default_groups[1] = &dev->dev_pr_group;
@@ -3110,6 +2937,17 @@ static struct config_item_type target_core_cit = {
 
 /* Stop functions for struct config_item_type target_core_hba_cit */
 
+void target_core_setup_sub_cits(struct se_subsystem_api *sa)
+{
+	target_core_setup_dev_cit(sa);
+	target_core_setup_dev_attrib_cit(sa);
+	target_core_setup_dev_pr_cit(sa);
+	target_core_setup_dev_wwn_cit(sa);
+	target_core_setup_dev_alua_tg_pt_gps_cit(sa);
+	target_core_setup_dev_stat_cit(sa);
+}
+EXPORT_SYMBOL(target_core_setup_sub_cits);
+
 static int __init target_core_init_configfs(void)
 {
 	struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index c45f9e9..7653cfb 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -659,6 +659,7 @@ int se_dev_set_max_unmap_lba_count(
 			dev, dev->dev_attrib.max_unmap_lba_count);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_unmap_lba_count);
 
 int se_dev_set_max_unmap_block_desc_count(
 	struct se_device *dev,
@@ -670,6 +671,7 @@ int se_dev_set_max_unmap_block_desc_count(
 			dev, dev->dev_attrib.max_unmap_block_desc_count);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_unmap_block_desc_count);
 
 int se_dev_set_unmap_granularity(
 	struct se_device *dev,
@@ -680,6 +682,7 @@ int se_dev_set_unmap_granularity(
 			dev, dev->dev_attrib.unmap_granularity);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_unmap_granularity);
 
 int se_dev_set_unmap_granularity_alignment(
 	struct se_device *dev,
@@ -690,6 +693,7 @@ int se_dev_set_unmap_granularity_alignment(
 			dev, dev->dev_attrib.unmap_granularity_alignment);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_unmap_granularity_alignment);
 
 int se_dev_set_max_write_same_len(
 	struct se_device *dev,
@@ -700,6 +704,7 @@ int se_dev_set_max_write_same_len(
 			dev, dev->dev_attrib.max_write_same_len);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_write_same_len);
 
 static void dev_set_t10_wwn_model_alias(struct se_device *dev)
 {
@@ -738,6 +743,7 @@ int se_dev_set_emulate_model_alias(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_model_alias);
 
 int se_dev_set_emulate_dpo(struct se_device *dev, int flag)
 {
@@ -753,6 +759,7 @@ int se_dev_set_emulate_dpo(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_dpo);
 
 int se_dev_set_emulate_fua_write(struct se_device *dev, int flag)
 {
@@ -760,17 +767,12 @@ int se_dev_set_emulate_fua_write(struct se_device *dev, int flag)
 		pr_err("Illegal value %d\n", flag);
 		return -EINVAL;
 	}
-
-	if (flag &&
-	    dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("emulate_fua_write not supported for pSCSI\n");
-		return -EINVAL;
-	}
 	dev->dev_attrib.emulate_fua_write = flag;
 	pr_debug("dev[%p]: SE Device Forced Unit Access WRITEs: %d\n",
 			dev, dev->dev_attrib.emulate_fua_write);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_fua_write);
 
 int se_dev_set_emulate_fua_read(struct se_device *dev, int flag)
 {
@@ -786,6 +788,7 @@ int se_dev_set_emulate_fua_read(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_fua_read);
 
 int se_dev_set_emulate_write_cache(struct se_device *dev, int flag)
 {
@@ -794,11 +797,6 @@ int se_dev_set_emulate_write_cache(struct se_device *dev, int flag)
 		return -EINVAL;
 	}
 	if (flag &&
-	    dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("emulate_write_cache not supported for pSCSI\n");
-		return -EINVAL;
-	}
-	if (flag &&
 	    dev->transport->get_write_cache) {
 		pr_err("emulate_write_cache not supported for this device\n");
 		return -EINVAL;
@@ -809,6 +807,7 @@ int se_dev_set_emulate_write_cache(struct se_device *dev, int flag)
 			dev, dev->dev_attrib.emulate_write_cache);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_write_cache);
 
 int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *dev, int flag)
 {
@@ -829,6 +828,7 @@ int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_ua_intlck_ctrl);
 
 int se_dev_set_emulate_tas(struct se_device *dev, int flag)
 {
@@ -849,6 +849,7 @@ int se_dev_set_emulate_tas(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tas);
 
 int se_dev_set_emulate_tpu(struct se_device *dev, int flag)
 {
@@ -870,6 +871,7 @@ int se_dev_set_emulate_tpu(struct se_device *dev, int flag)
 				dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tpu);
 
 int se_dev_set_emulate_tpws(struct se_device *dev, int flag)
 {
@@ -891,6 +893,7 @@ int se_dev_set_emulate_tpws(struct se_device *dev, int flag)
 				dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tpws);
 
 int se_dev_set_emulate_caw(struct se_device *dev, int flag)
 {
@@ -904,6 +907,7 @@ int se_dev_set_emulate_caw(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_caw);
 
 int se_dev_set_emulate_3pc(struct se_device *dev, int flag)
 {
@@ -917,6 +921,7 @@ int se_dev_set_emulate_3pc(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_3pc);
 
 int se_dev_set_pi_prot_type(struct se_device *dev, int flag)
 {
@@ -970,6 +975,7 @@ int se_dev_set_pi_prot_type(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_pi_prot_type);
 
 int se_dev_set_pi_prot_format(struct se_device *dev, int flag)
 {
@@ -1005,6 +1011,7 @@ int se_dev_set_pi_prot_format(struct se_device *dev, int flag)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_pi_prot_format);
 
 int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag)
 {
@@ -1017,6 +1024,7 @@ int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag)
 		(dev->dev_attrib.enforce_pr_isids) ? "Enabled" : "Disabled");
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_enforce_pr_isids);
 
 int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag)
 {
@@ -1034,6 +1042,7 @@ int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag)
 	pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_force_pr_aptpl);
 
 int se_dev_set_is_nonrot(struct se_device *dev, int flag)
 {
@@ -1046,6 +1055,7 @@ int se_dev_set_is_nonrot(struct se_device *dev, int flag)
 	       dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_is_nonrot);
 
 int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag)
 {
@@ -1058,6 +1068,7 @@ int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag)
 	pr_debug("dev[%p]: SE Device emulate_rest_reord: %d\n", dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_rest_reord);
 
 /*
  * Note, this can only be called on unexported SE Device Object.
@@ -1076,31 +1087,21 @@ int se_dev_set_queue_depth(struct se_device *dev, u32 queue_depth)
 		return -EINVAL;
 	}
 
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
+	if (queue_depth > dev->dev_attrib.queue_depth) {
 		if (queue_depth > dev->dev_attrib.hw_queue_depth) {
-			pr_err("dev[%p]: Passed queue_depth: %u"
-				" exceeds TCM/SE_Device TCQ: %u\n",
-				dev, queue_depth,
+			pr_err("dev[%p]: Passed queue_depth:"
+				" %u exceeds TCM/SE_Device MAX"
+				" TCQ: %u\n", dev, queue_depth,
 				dev->dev_attrib.hw_queue_depth);
 			return -EINVAL;
 		}
-	} else {
-		if (queue_depth > dev->dev_attrib.queue_depth) {
-			if (queue_depth > dev->dev_attrib.hw_queue_depth) {
-				pr_err("dev[%p]: Passed queue_depth:"
-					" %u exceeds TCM/SE_Device MAX"
-					" TCQ: %u\n", dev, queue_depth,
-					dev->dev_attrib.hw_queue_depth);
-				return -EINVAL;
-			}
-		}
 	}
-
 	dev->dev_attrib.queue_depth = dev->queue_depth = queue_depth;
 	pr_debug("dev[%p]: SE Device TCQ Depth changed to: %u\n",
 			dev, queue_depth);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_queue_depth);
 
 int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors)
 {
@@ -1123,22 +1124,12 @@ int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors)
 				DA_STATUS_MAX_SECTORS_MIN);
 		return -EINVAL;
 	}
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		if (fabric_max_sectors > dev->dev_attrib.hw_max_sectors) {
-			pr_err("dev[%p]: Passed fabric_max_sectors: %u"
-				" greater than TCM/SE_Device max_sectors:"
-				" %u\n", dev, fabric_max_sectors,
-				dev->dev_attrib.hw_max_sectors);
-			 return -EINVAL;
-		}
-	} else {
-		if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) {
-			pr_err("dev[%p]: Passed fabric_max_sectors: %u"
-				" greater than DA_STATUS_MAX_SECTORS_MAX:"
-				" %u\n", dev, fabric_max_sectors,
-				DA_STATUS_MAX_SECTORS_MAX);
-			return -EINVAL;
-		}
+	if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) {
+		pr_err("dev[%p]: Passed fabric_max_sectors: %u"
+			" greater than DA_STATUS_MAX_SECTORS_MAX:"
+			" %u\n", dev, fabric_max_sectors,
+			DA_STATUS_MAX_SECTORS_MAX);
+		return -EINVAL;
 	}
 	/*
 	 * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
@@ -1155,6 +1146,7 @@ int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors)
 			dev, fabric_max_sectors);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_fabric_max_sectors);
 
 int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors)
 {
@@ -1164,11 +1156,6 @@ int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors)
 			dev, dev->export_count);
 		return -EINVAL;
 	}
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("dev[%p]: Passed optimal_sectors cannot be"
-				" changed for TCM/pSCSI\n", dev);
-		return -EINVAL;
-	}
 	if (optimal_sectors > dev->dev_attrib.fabric_max_sectors) {
 		pr_err("dev[%p]: Passed optimal_sectors %u cannot be"
 			" greater than fabric_max_sectors: %u\n", dev,
@@ -1181,6 +1168,7 @@ int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors)
 			dev, optimal_sectors);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_optimal_sectors);
 
 int se_dev_set_block_size(struct se_device *dev, u32 block_size)
 {
@@ -1201,13 +1189,6 @@ int se_dev_set_block_size(struct se_device *dev, u32 block_size)
 		return -EINVAL;
 	}
 
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("dev[%p]: Not allowed to change block_size for"
-			" Physical Device, use for Linux/SCSI to change"
-			" block_size for underlying hardware\n", dev);
-		return -EINVAL;
-	}
-
 	dev->dev_attrib.block_size = block_size;
 	pr_debug("dev[%p]: SE Device block_size changed to %u\n",
 			dev, block_size);
@@ -1218,6 +1199,7 @@ int se_dev_set_block_size(struct se_device *dev, u32 block_size)
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_block_size);
 
 struct se_lun *core_dev_add_lun(
 	struct se_portal_group *tpg,
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 72c83d9..c2aea09 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -37,6 +37,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_file.h"
 
@@ -934,6 +935,42 @@ fd_parse_cdb(struct se_cmd *cmd)
 	return sbc_parse_cdb(cmd, &fd_sbc_ops);
 }
 
+DEF_TB_DEFAULT_ATTRIBS(fileio);
+
+static struct configfs_attribute *fileio_backend_dev_attrs[] = {
+	&fileio_dev_attrib_emulate_model_alias.attr,
+	&fileio_dev_attrib_emulate_dpo.attr,
+	&fileio_dev_attrib_emulate_fua_write.attr,
+	&fileio_dev_attrib_emulate_fua_read.attr,
+	&fileio_dev_attrib_emulate_write_cache.attr,
+	&fileio_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&fileio_dev_attrib_emulate_tas.attr,
+	&fileio_dev_attrib_emulate_tpu.attr,
+	&fileio_dev_attrib_emulate_tpws.attr,
+	&fileio_dev_attrib_emulate_caw.attr,
+	&fileio_dev_attrib_emulate_3pc.attr,
+	&fileio_dev_attrib_pi_prot_type.attr,
+	&fileio_dev_attrib_hw_pi_prot_type.attr,
+	&fileio_dev_attrib_pi_prot_format.attr,
+	&fileio_dev_attrib_enforce_pr_isids.attr,
+	&fileio_dev_attrib_is_nonrot.attr,
+	&fileio_dev_attrib_emulate_rest_reord.attr,
+	&fileio_dev_attrib_force_pr_aptpl.attr,
+	&fileio_dev_attrib_hw_block_size.attr,
+	&fileio_dev_attrib_block_size.attr,
+	&fileio_dev_attrib_hw_max_sectors.attr,
+	&fileio_dev_attrib_fabric_max_sectors.attr,
+	&fileio_dev_attrib_optimal_sectors.attr,
+	&fileio_dev_attrib_hw_queue_depth.attr,
+	&fileio_dev_attrib_queue_depth.attr,
+	&fileio_dev_attrib_max_unmap_lba_count.attr,
+	&fileio_dev_attrib_max_unmap_block_desc_count.attr,
+	&fileio_dev_attrib_unmap_granularity.attr,
+	&fileio_dev_attrib_unmap_granularity_alignment.attr,
+	&fileio_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api fileio_template = {
 	.name			= "fileio",
 	.inquiry_prod		= "FILEIO",
@@ -957,6 +994,11 @@ static struct se_subsystem_api fileio_template = {
 
 static int __init fileio_module_init(void)
 {
+	struct target_backend_cits *tbc = &fileio_template.tb_cits;
+
+	target_core_setup_sub_cits(&fileio_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = fileio_backend_dev_attrs;
+
 	return transport_subsystem_register(&fileio_template);
 }
 
diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c
index a25051a..ff95f95 100644
--- a/drivers/target/target_core_hba.c
+++ b/drivers/target/target_core_hba.c
@@ -36,6 +36,7 @@
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
 #include <target/target_core_fabric.h>
+#include <target/target_core_configfs.h>
 
 #include "target_core_internal.h"
 
@@ -137,8 +138,7 @@ core_alloc_hba(const char *plugin_name, u32 plugin_dep_id, u32 hba_flags)
 	return hba;
 
 out_module_put:
-	if (hba->transport->owner)
-		module_put(hba->transport->owner);
+	module_put(hba->transport->owner);
 	hba->transport = NULL;
 out_free_hba:
 	kfree(hba);
@@ -159,8 +159,7 @@ core_delete_hba(struct se_hba *hba)
 	pr_debug("CORE_HBA[%d] - Detached HBA from Generic Target"
 			" Core\n", hba->hba_id);
 
-	if (hba->transport->owner)
-		module_put(hba->transport->owner);
+	module_put(hba->transport->owner);
 
 	hba->transport = NULL;
 	kfree(hba);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 7e6b857..3efff94 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -41,6 +41,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_iblock.h"
 
@@ -858,6 +859,42 @@ static bool iblock_get_write_cache(struct se_device *dev)
 	return q->flush_flags & REQ_FLUSH;
 }
 
+DEF_TB_DEFAULT_ATTRIBS(iblock);
+
+static struct configfs_attribute *iblock_backend_dev_attrs[] = {
+	&iblock_dev_attrib_emulate_model_alias.attr,
+	&iblock_dev_attrib_emulate_dpo.attr,
+	&iblock_dev_attrib_emulate_fua_write.attr,
+	&iblock_dev_attrib_emulate_fua_read.attr,
+	&iblock_dev_attrib_emulate_write_cache.attr,
+	&iblock_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&iblock_dev_attrib_emulate_tas.attr,
+	&iblock_dev_attrib_emulate_tpu.attr,
+	&iblock_dev_attrib_emulate_tpws.attr,
+	&iblock_dev_attrib_emulate_caw.attr,
+	&iblock_dev_attrib_emulate_3pc.attr,
+	&iblock_dev_attrib_pi_prot_type.attr,
+	&iblock_dev_attrib_hw_pi_prot_type.attr,
+	&iblock_dev_attrib_pi_prot_format.attr,
+	&iblock_dev_attrib_enforce_pr_isids.attr,
+	&iblock_dev_attrib_is_nonrot.attr,
+	&iblock_dev_attrib_emulate_rest_reord.attr,
+	&iblock_dev_attrib_force_pr_aptpl.attr,
+	&iblock_dev_attrib_hw_block_size.attr,
+	&iblock_dev_attrib_block_size.attr,
+	&iblock_dev_attrib_hw_max_sectors.attr,
+	&iblock_dev_attrib_fabric_max_sectors.attr,
+	&iblock_dev_attrib_optimal_sectors.attr,
+	&iblock_dev_attrib_hw_queue_depth.attr,
+	&iblock_dev_attrib_queue_depth.attr,
+	&iblock_dev_attrib_max_unmap_lba_count.attr,
+	&iblock_dev_attrib_max_unmap_block_desc_count.attr,
+	&iblock_dev_attrib_unmap_granularity.attr,
+	&iblock_dev_attrib_unmap_granularity_alignment.attr,
+	&iblock_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api iblock_template = {
 	.name			= "iblock",
 	.inquiry_prod		= "IBLOCK",
@@ -883,6 +920,11 @@ static struct se_subsystem_api iblock_template = {
 
 static int __init iblock_module_init(void)
 {
+	struct target_backend_cits *tbc = &iblock_template.tb_cits;
+
+	target_core_setup_sub_cits(&iblock_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = iblock_backend_dev_attrs;
+
 	return transport_subsystem_register(&iblock_template);
 }
 
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index e31f42f..60381db 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -18,34 +18,6 @@ int	core_dev_export(struct se_device *, struct se_portal_group *,
 		struct se_lun *);
 void	core_dev_unexport(struct se_device *, struct se_portal_group *,
 		struct se_lun *);
-int	se_dev_set_task_timeout(struct se_device *, u32);
-int	se_dev_set_max_unmap_lba_count(struct se_device *, u32);
-int	se_dev_set_max_unmap_block_desc_count(struct se_device *, u32);
-int	se_dev_set_unmap_granularity(struct se_device *, u32);
-int	se_dev_set_unmap_granularity_alignment(struct se_device *, u32);
-int	se_dev_set_max_write_same_len(struct se_device *, u32);
-int	se_dev_set_emulate_model_alias(struct se_device *, int);
-int	se_dev_set_emulate_dpo(struct se_device *, int);
-int	se_dev_set_emulate_fua_write(struct se_device *, int);
-int	se_dev_set_emulate_fua_read(struct se_device *, int);
-int	se_dev_set_emulate_write_cache(struct se_device *, int);
-int	se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int);
-int	se_dev_set_emulate_tas(struct se_device *, int);
-int	se_dev_set_emulate_tpu(struct se_device *, int);
-int	se_dev_set_emulate_tpws(struct se_device *, int);
-int	se_dev_set_emulate_caw(struct se_device *, int);
-int	se_dev_set_emulate_3pc(struct se_device *, int);
-int	se_dev_set_pi_prot_type(struct se_device *, int);
-int	se_dev_set_pi_prot_format(struct se_device *, int);
-int	se_dev_set_enforce_pr_isids(struct se_device *, int);
-int	se_dev_set_force_pr_aptpl(struct se_device *, int);
-int	se_dev_set_is_nonrot(struct se_device *, int);
-int	se_dev_set_emulate_rest_reord(struct se_device *dev, int);
-int	se_dev_set_queue_depth(struct se_device *, u32);
-int	se_dev_set_max_sectors(struct se_device *, u32);
-int	se_dev_set_fabric_max_sectors(struct se_device *, u32);
-int	se_dev_set_optimal_sectors(struct se_device *, u32);
-int	se_dev_set_block_size(struct se_device *, u32);
 struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32);
 void	core_dev_del_lun(struct se_portal_group *, struct se_lun *);
 struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 4c261c3..d56f2aa 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -76,7 +76,7 @@ enum preempt_type {
 };
 
 static void __core_scsi3_complete_pro_release(struct se_device *, struct se_node_acl *,
-			struct t10_pr_registration *, int);
+					      struct t10_pr_registration *, int, int);
 
 static sense_reason_t
 target_scsi2_reservation_check(struct se_cmd *cmd)
@@ -1177,7 +1177,7 @@ static int core_scsi3_check_implicit_release(
 		 *    service action with the SERVICE ACTION RESERVATION KEY
 		 *    field set to zero (see 5.7.11.3).
 		 */
-		__core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0);
+		__core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0, 1);
 		ret = 1;
 		/*
 		 * For 'All Registrants' reservation types, all existing
@@ -1219,7 +1219,8 @@ static void __core_scsi3_free_registration(
 
 	pr_reg->pr_reg_deve->def_pr_registered = 0;
 	pr_reg->pr_reg_deve->pr_res_key = 0;
-	list_del(&pr_reg->pr_reg_list);
+	if (!list_empty(&pr_reg->pr_reg_list))
+		list_del(&pr_reg->pr_reg_list);
 	/*
 	 * Caller accessing *pr_reg using core_scsi3_locate_pr_reg(),
 	 * so call core_scsi3_put_pr_reg() to decrement our reference.
@@ -1271,6 +1272,7 @@ void core_scsi3_free_pr_reg_from_nacl(
 {
 	struct t10_reservation *pr_tmpl = &dev->t10_pr;
 	struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_res_holder;
+	bool free_reg = false;
 	/*
 	 * If the passed se_node_acl matches the reservation holder,
 	 * release the reservation.
@@ -1278,13 +1280,18 @@ void core_scsi3_free_pr_reg_from_nacl(
 	spin_lock(&dev->dev_reservation_lock);
 	pr_res_holder = dev->dev_pr_res_holder;
 	if ((pr_res_holder != NULL) &&
-	    (pr_res_holder->pr_reg_nacl == nacl))
-		__core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0);
+	    (pr_res_holder->pr_reg_nacl == nacl)) {
+		__core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0, 1);
+		free_reg = true;
+	}
 	spin_unlock(&dev->dev_reservation_lock);
 	/*
 	 * Release any registration associated with the struct se_node_acl.
 	 */
 	spin_lock(&pr_tmpl->registration_lock);
+	if (pr_res_holder && free_reg)
+		__core_scsi3_free_registration(dev, pr_res_holder, NULL, 0);
+
 	list_for_each_entry_safe(pr_reg, pr_reg_tmp,
 			&pr_tmpl->registration_list, pr_reg_list) {
 
@@ -1307,7 +1314,7 @@ void core_scsi3_free_all_registrations(
 	if (pr_res_holder != NULL) {
 		struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 		__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-				pr_res_holder, 0);
+						  pr_res_holder, 0, 0);
 	}
 	spin_unlock(&dev->dev_reservation_lock);
 
@@ -1429,14 +1436,12 @@ core_scsi3_decode_spec_i_port(
 	struct target_core_fabric_ops *tmp_tf_ops;
 	unsigned char *buf;
 	unsigned char *ptr, *i_str = NULL, proto_ident, tmp_proto_ident;
-	char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN];
+	char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN];
 	sense_reason_t ret;
 	u32 tpdl, tid_len = 0;
 	int dest_local_nexus;
 	u32 dest_rtpi = 0;
 
-	memset(dest_iport, 0, 64);
-
 	local_se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 	/*
 	 * Allocate a struct pr_transport_id_holder and setup the
@@ -2105,13 +2110,13 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key,
 		/*
 		 * sa_res_key=0 Unregister Reservation Key for registered I_T Nexus.
 		 */
-		pr_holder = core_scsi3_check_implicit_release(
-				cmd->se_dev, pr_reg);
+		type = pr_reg->pr_res_type;
+		pr_holder = core_scsi3_check_implicit_release(cmd->se_dev,
+							      pr_reg);
 		if (pr_holder < 0) {
 			ret = TCM_RESERVATION_CONFLICT;
 			goto out;
 		}
-		type = pr_reg->pr_res_type;
 
 		spin_lock(&pr_tmpl->registration_lock);
 		/*
@@ -2269,6 +2274,7 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
 	spin_lock(&dev->dev_reservation_lock);
 	pr_res_holder = dev->dev_pr_res_holder;
 	if (pr_res_holder) {
+		int pr_res_type = pr_res_holder->pr_res_type;
 		/*
 		 * From spc4r17 Section 5.7.9: Reserving:
 		 *
@@ -2279,7 +2285,9 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
 		 * the logical unit, then the command shall be completed with
 		 * RESERVATION CONFLICT status.
 		 */
-		if (pr_res_holder != pr_reg) {
+		if ((pr_res_holder != pr_reg) &&
+		    (pr_res_type != PR_TYPE_WRITE_EXCLUSIVE_ALLREG) &&
+		    (pr_res_type != PR_TYPE_EXCLUSIVE_ACCESS_ALLREG)) {
 			struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 			pr_err("SPC-3 PR: Attempted RESERVE from"
 				" [%s]: %s while reservation already held by"
@@ -2385,23 +2393,59 @@ static void __core_scsi3_complete_pro_release(
 	struct se_device *dev,
 	struct se_node_acl *se_nacl,
 	struct t10_pr_registration *pr_reg,
-	int explicit)
+	int explicit,
+	int unreg)
 {
 	struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo;
 	char i_buf[PR_REG_ISID_ID_LEN];
+	int pr_res_type = 0, pr_res_scope = 0;
 
 	memset(i_buf, 0, PR_REG_ISID_ID_LEN);
 	core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN);
 	/*
 	 * Go ahead and release the current PR reservation holder.
+	 * If an All Registrants reservation is currently active and
+	 * a unregister operation is requested, replace the current
+	 * dev_pr_res_holder with another active registration.
 	 */
-	dev->dev_pr_res_holder = NULL;
+	if (dev->dev_pr_res_holder) {
+		pr_res_type = dev->dev_pr_res_holder->pr_res_type;
+		pr_res_scope = dev->dev_pr_res_holder->pr_res_scope;
+		dev->dev_pr_res_holder->pr_res_type = 0;
+		dev->dev_pr_res_holder->pr_res_scope = 0;
+		dev->dev_pr_res_holder->pr_res_holder = 0;
+		dev->dev_pr_res_holder = NULL;
+	}
+	if (!unreg)
+		goto out;
 
-	pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared"
-		" reservation holder TYPE: %s ALL_TG_PT: %d\n",
-		tfo->get_fabric_name(), (explicit) ? "explicit" : "implicit",
-		core_scsi3_pr_dump_type(pr_reg->pr_res_type),
-		(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
+	spin_lock(&dev->t10_pr.registration_lock);
+	list_del_init(&pr_reg->pr_reg_list);
+	/*
+	 * If the I_T nexus is a reservation holder, the persistent reservation
+	 * is of an all registrants type, and the I_T nexus is the last remaining
+	 * registered I_T nexus, then the device server shall also release the
+	 * persistent reservation.
+	 */
+	if (!list_empty(&dev->t10_pr.registration_list) &&
+	    ((pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG) ||
+	     (pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG))) {
+		dev->dev_pr_res_holder =
+			list_entry(dev->t10_pr.registration_list.next,
+				   struct t10_pr_registration, pr_reg_list);
+		dev->dev_pr_res_holder->pr_res_type = pr_res_type;
+		dev->dev_pr_res_holder->pr_res_scope = pr_res_scope;
+		dev->dev_pr_res_holder->pr_res_holder = 1;
+	}
+	spin_unlock(&dev->t10_pr.registration_lock);
+out:
+	if (!dev->dev_pr_res_holder) {
+		pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared"
+			" reservation holder TYPE: %s ALL_TG_PT: %d\n",
+			tfo->get_fabric_name(), (explicit) ? "explicit" :
+			"implicit", core_scsi3_pr_dump_type(pr_res_type),
+			(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
+	}
 	pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n",
 		tfo->get_fabric_name(), se_nacl->initiatorname,
 		i_buf);
@@ -2532,7 +2576,7 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope,
 	 *    server shall not establish a unit attention condition.
 	 */
 	__core_scsi3_complete_pro_release(dev, se_sess->se_node_acl,
-			pr_reg, 1);
+					  pr_reg, 1, 0);
 
 	spin_unlock(&dev->dev_reservation_lock);
 
@@ -2620,7 +2664,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key)
 	if (pr_res_holder) {
 		struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 		__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-			pr_res_holder, 0);
+						  pr_res_holder, 0, 0);
 	}
 	spin_unlock(&dev->dev_reservation_lock);
 	/*
@@ -2679,7 +2723,7 @@ static void __core_scsi3_complete_pro_preempt(
 	 */
 	if (dev->dev_pr_res_holder)
 		__core_scsi3_complete_pro_release(dev, nacl,
-				dev->dev_pr_res_holder, 0);
+						  dev->dev_pr_res_holder, 0, 0);
 
 	dev->dev_pr_res_holder = pr_reg;
 	pr_reg->pr_res_holder = 1;
@@ -2924,8 +2968,8 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key,
 	 */
 	if (pr_reg_n != pr_res_holder)
 		__core_scsi3_complete_pro_release(dev,
-				pr_res_holder->pr_reg_nacl,
-				dev->dev_pr_res_holder, 0);
+						  pr_res_holder->pr_reg_nacl,
+						  dev->dev_pr_res_holder, 0, 0);
 	/*
 	 * b) Remove the registrations for all I_T nexuses identified
 	 *    by the SERVICE ACTION RESERVATION KEY field, except the
@@ -3059,7 +3103,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
 	struct t10_reservation *pr_tmpl = &dev->t10_pr;
 	unsigned char *buf;
 	unsigned char *initiator_str;
-	char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN];
+	char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN];
 	u32 tid_len, tmp_tid_len;
 	int new_reg = 0, type, scope, matching_iname;
 	sense_reason_t ret;
@@ -3071,7 +3115,6 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 	}
 
-	memset(dest_iport, 0, 64);
 	memset(i_buf, 0, PR_REG_ISID_ID_LEN);
 	se_tpg = se_sess->se_tpg;
 	tf_ops = se_tpg->se_tpg_tfo;
@@ -3389,7 +3432,7 @@ after_iport_check:
 	 *    holder (i.e., the I_T nexus on which the
 	 */
 	__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-			dev->dev_pr_res_holder, 0);
+					  dev->dev_pr_res_holder, 0, 0);
 	/*
 	 * g) Move the persistent reservation to the specified I_T nexus using
 	 *    the same scope and type as the persistent reservation released in
@@ -3837,7 +3880,8 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 	unsigned char *buf;
 	u32 add_desc_len = 0, add_len = 0, desc_len, exp_desc_len;
 	u32 off = 8; /* off into first Full Status descriptor */
-	int format_code = 0;
+	int format_code = 0, pr_res_type = 0, pr_res_scope = 0;
+	bool all_reg = false;
 
 	if (cmd->data_length < 8) {
 		pr_err("PRIN SA READ_FULL_STATUS SCSI Data Length: %u"
@@ -3854,6 +3898,19 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
 	buf[3] = (dev->t10_pr.pr_generation & 0xff);
 
+	spin_lock(&dev->dev_reservation_lock);
+	if (dev->dev_pr_res_holder) {
+		struct t10_pr_registration *pr_holder = dev->dev_pr_res_holder;
+
+		if (pr_holder->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG ||
+		    pr_holder->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG) {
+			all_reg = true;
+			pr_res_type = pr_holder->pr_res_type;
+			pr_res_scope = pr_holder->pr_res_scope;
+		}
+	}
+	spin_unlock(&dev->dev_reservation_lock);
+
 	spin_lock(&pr_tmpl->registration_lock);
 	list_for_each_entry_safe(pr_reg, pr_reg_tmp,
 			&pr_tmpl->registration_list, pr_reg_list) {
@@ -3901,14 +3958,20 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 		 * reservation holder for PR_HOLDER bit.
 		 *
 		 * Also, if this registration is the reservation
-		 * holder, fill in SCOPE and TYPE in the next byte.
+		 * holder or there is an All Registrants reservation
+		 * active, fill in SCOPE and TYPE in the next byte.
 		 */
 		if (pr_reg->pr_res_holder) {
 			buf[off++] |= 0x01;
 			buf[off++] = (pr_reg->pr_res_scope & 0xf0) |
 				     (pr_reg->pr_res_type & 0x0f);
-		} else
+		} else if (all_reg) {
+			buf[off++] |= 0x01;
+			buf[off++] = (pr_res_scope & 0xf0) |
+				     (pr_res_type & 0x0f);
+		} else {
 			off += 2;
+		}
 
 		off += 4; /* Skip over reserved area */
 		/*
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c8291f..1045dcd 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -44,6 +44,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_alua.h"
 #include "target_core_pscsi.h"
@@ -1094,7 +1095,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 	req->retries = PS_RETRY;
 
 	blk_execute_rq_nowait(pdv->pdv_sd->request_queue, NULL, req,
-			(cmd->sam_task_attr == MSG_HEAD_TAG),
+			(cmd->sam_task_attr == TCM_HEAD_TAG),
 			pscsi_req_done);
 
 	return 0;
@@ -1165,6 +1166,26 @@ static void pscsi_req_done(struct request *req, int uptodate)
 	kfree(pt);
 }
 
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_pi_prot_type);
+TB_DEV_ATTR_RO(pscsi, hw_pi_prot_type);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_block_size);
+TB_DEV_ATTR_RO(pscsi, hw_block_size);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_max_sectors);
+TB_DEV_ATTR_RO(pscsi, hw_max_sectors);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_queue_depth);
+TB_DEV_ATTR_RO(pscsi, hw_queue_depth);
+
+static struct configfs_attribute *pscsi_backend_dev_attrs[] = {
+	&pscsi_dev_attrib_hw_pi_prot_type.attr,
+	&pscsi_dev_attrib_hw_block_size.attr,
+	&pscsi_dev_attrib_hw_max_sectors.attr,
+	&pscsi_dev_attrib_hw_queue_depth.attr,
+	NULL,
+};
+
 static struct se_subsystem_api pscsi_template = {
 	.name			= "pscsi",
 	.owner			= THIS_MODULE,
@@ -1185,6 +1206,11 @@ static struct se_subsystem_api pscsi_template = {
 
 static int __init pscsi_module_init(void)
 {
+	struct target_backend_cits *tbc = &pscsi_template.tb_cits;
+
+	target_core_setup_sub_cits(&pscsi_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = pscsi_backend_dev_attrs;
+
 	return transport_subsystem_register(&pscsi_template);
 }
 
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index b920db3..60ebd17 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -34,6 +34,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_rd.h"
 
@@ -632,6 +633,42 @@ rd_parse_cdb(struct se_cmd *cmd)
 	return sbc_parse_cdb(cmd, &rd_sbc_ops);
 }
 
+DEF_TB_DEFAULT_ATTRIBS(rd_mcp);
+
+static struct configfs_attribute *rd_mcp_backend_dev_attrs[] = {
+	&rd_mcp_dev_attrib_emulate_model_alias.attr,
+	&rd_mcp_dev_attrib_emulate_dpo.attr,
+	&rd_mcp_dev_attrib_emulate_fua_write.attr,
+	&rd_mcp_dev_attrib_emulate_fua_read.attr,
+	&rd_mcp_dev_attrib_emulate_write_cache.attr,
+	&rd_mcp_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&rd_mcp_dev_attrib_emulate_tas.attr,
+	&rd_mcp_dev_attrib_emulate_tpu.attr,
+	&rd_mcp_dev_attrib_emulate_tpws.attr,
+	&rd_mcp_dev_attrib_emulate_caw.attr,
+	&rd_mcp_dev_attrib_emulate_3pc.attr,
+	&rd_mcp_dev_attrib_pi_prot_type.attr,
+	&rd_mcp_dev_attrib_hw_pi_prot_type.attr,
+	&rd_mcp_dev_attrib_pi_prot_format.attr,
+	&rd_mcp_dev_attrib_enforce_pr_isids.attr,
+	&rd_mcp_dev_attrib_is_nonrot.attr,
+	&rd_mcp_dev_attrib_emulate_rest_reord.attr,
+	&rd_mcp_dev_attrib_force_pr_aptpl.attr,
+	&rd_mcp_dev_attrib_hw_block_size.attr,
+	&rd_mcp_dev_attrib_block_size.attr,
+	&rd_mcp_dev_attrib_hw_max_sectors.attr,
+	&rd_mcp_dev_attrib_fabric_max_sectors.attr,
+	&rd_mcp_dev_attrib_optimal_sectors.attr,
+	&rd_mcp_dev_attrib_hw_queue_depth.attr,
+	&rd_mcp_dev_attrib_queue_depth.attr,
+	&rd_mcp_dev_attrib_max_unmap_lba_count.attr,
+	&rd_mcp_dev_attrib_max_unmap_block_desc_count.attr,
+	&rd_mcp_dev_attrib_unmap_granularity.attr,
+	&rd_mcp_dev_attrib_unmap_granularity_alignment.attr,
+	&rd_mcp_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api rd_mcp_template = {
 	.name			= "rd_mcp",
 	.inquiry_prod		= "RAMDISK-MCP",
@@ -653,8 +690,12 @@ static struct se_subsystem_api rd_mcp_template = {
 
 int __init rd_module_init(void)
 {
+	struct target_backend_cits *tbc = &rd_mcp_template.tb_cits;
 	int ret;
 
+	target_core_setup_sub_cits(&rd_mcp_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = rd_mcp_backend_dev_attrs;
+
 	ret = transport_subsystem_register(&rd_mcp_template);
 	if (ret < 0) {
 		return ret;
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 8d171ff..11bea19 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -485,7 +485,7 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
 	cmd->t_data_nents_orig = cmd->t_data_nents;
 	cmd->t_data_nents = 1;
 
-	cmd->sam_task_attr = MSG_HEAD_TAG;
+	cmd->sam_task_attr = TCM_HEAD_TAG;
 	cmd->transport_complete_callback = compare_and_write_post;
 	/*
 	 * Now reset ->execute_cmd() to the normal sbc_execute_rw() handler
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index bc286a6..1307600 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -1357,7 +1357,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		 * Do implicit HEAD_OF_QUEUE processing for INQUIRY.
 		 * See spc4r17 section 5.3
 		 */
-		cmd->sam_task_attr = MSG_HEAD_TAG;
+		cmd->sam_task_attr = TCM_HEAD_TAG;
 		cmd->execute_cmd = spc_emulate_inquiry;
 		break;
 	case SECURITY_PROTOCOL_IN:
@@ -1391,7 +1391,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		 * Do implicit HEAD_OF_QUEUE processing for REPORT_LUNS
 		 * See spc4r17 section 5.3
 		 */
-		cmd->sam_task_attr = MSG_HEAD_TAG;
+		cmd->sam_task_attr = TCM_HEAD_TAG;
 		break;
 	case TEST_UNIT_READY:
 		cmd->execute_cmd = spc_emulate_testunitready;
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index be877bf..0adc0f6 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1159,7 +1159,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd)
 	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
 		return 0;
 
-	if (cmd->sam_task_attr == MSG_ACA_TAG) {
+	if (cmd->sam_task_attr == TCM_ACA_TAG) {
 		pr_debug("SAM Task Attribute ACA"
 			" emulation is not supported\n");
 		return TCM_INVALID_CDB_FIELD;
@@ -1531,7 +1531,7 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
 	BUG_ON(!se_tpg);
 
 	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess,
-			      0, DMA_NONE, MSG_SIMPLE_TAG, sense);
+			      0, DMA_NONE, TCM_SIMPLE_TAG, sense);
 	/*
 	 * FIXME: Currently expect caller to handle se_cmd->se_tmr_req
 	 * allocation failure.
@@ -1718,12 +1718,12 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
 	 * to allow the passed struct se_cmd list of tasks to the front of the list.
 	 */
 	switch (cmd->sam_task_attr) {
-	case MSG_HEAD_TAG:
+	case TCM_HEAD_TAG:
 		pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x, "
 			 "se_ordered_id: %u\n",
 			 cmd->t_task_cdb[0], cmd->se_ordered_id);
 		return false;
-	case MSG_ORDERED_TAG:
+	case TCM_ORDERED_TAG:
 		atomic_inc_mb(&dev->dev_ordered_sync);
 
 		pr_debug("Added ORDERED for CDB: 0x%02x to ordered list, "
@@ -1828,7 +1828,7 @@ static void target_restart_delayed_cmds(struct se_device *dev)
 
 		__target_execute_cmd(cmd);
 
-		if (cmd->sam_task_attr == MSG_ORDERED_TAG)
+		if (cmd->sam_task_attr == TCM_ORDERED_TAG)
 			break;
 	}
 }
@@ -1844,18 +1844,18 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
 	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
 		return;
 
-	if (cmd->sam_task_attr == MSG_SIMPLE_TAG) {
+	if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
 		atomic_dec_mb(&dev->simple_cmds);
 		dev->dev_cur_ordered_id++;
 		pr_debug("Incremented dev->dev_cur_ordered_id: %u for"
 			" SIMPLE: %u\n", dev->dev_cur_ordered_id,
 			cmd->se_ordered_id);
-	} else if (cmd->sam_task_attr == MSG_HEAD_TAG) {
+	} else if (cmd->sam_task_attr == TCM_HEAD_TAG) {
 		dev->dev_cur_ordered_id++;
 		pr_debug("Incremented dev_cur_ordered_id: %u for"
 			" HEAD_OF_QUEUE: %u\n", dev->dev_cur_ordered_id,
 			cmd->se_ordered_id);
-	} else if (cmd->sam_task_attr == MSG_ORDERED_TAG) {
+	} else if (cmd->sam_task_attr == TCM_ORDERED_TAG) {
 		atomic_dec_mb(&dev->dev_ordered_sync);
 
 		dev->dev_cur_ordered_id++;
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 9a1b314..8bfa61c 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -28,6 +28,8 @@
 #include <target/target_core_base.h>
 #include <target/target_core_fabric.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
+
 #include <linux/target_core_user.h>
 
 /*
@@ -1092,6 +1094,42 @@ tcmu_parse_cdb(struct se_cmd *cmd)
 	return ret;
 }
 
+DEF_TB_DEFAULT_ATTRIBS(tcmu);
+
+static struct configfs_attribute *tcmu_backend_dev_attrs[] = {
+	&tcmu_dev_attrib_emulate_model_alias.attr,
+	&tcmu_dev_attrib_emulate_dpo.attr,
+	&tcmu_dev_attrib_emulate_fua_write.attr,
+	&tcmu_dev_attrib_emulate_fua_read.attr,
+	&tcmu_dev_attrib_emulate_write_cache.attr,
+	&tcmu_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&tcmu_dev_attrib_emulate_tas.attr,
+	&tcmu_dev_attrib_emulate_tpu.attr,
+	&tcmu_dev_attrib_emulate_tpws.attr,
+	&tcmu_dev_attrib_emulate_caw.attr,
+	&tcmu_dev_attrib_emulate_3pc.attr,
+	&tcmu_dev_attrib_pi_prot_type.attr,
+	&tcmu_dev_attrib_hw_pi_prot_type.attr,
+	&tcmu_dev_attrib_pi_prot_format.attr,
+	&tcmu_dev_attrib_enforce_pr_isids.attr,
+	&tcmu_dev_attrib_is_nonrot.attr,
+	&tcmu_dev_attrib_emulate_rest_reord.attr,
+	&tcmu_dev_attrib_force_pr_aptpl.attr,
+	&tcmu_dev_attrib_hw_block_size.attr,
+	&tcmu_dev_attrib_block_size.attr,
+	&tcmu_dev_attrib_hw_max_sectors.attr,
+	&tcmu_dev_attrib_fabric_max_sectors.attr,
+	&tcmu_dev_attrib_optimal_sectors.attr,
+	&tcmu_dev_attrib_hw_queue_depth.attr,
+	&tcmu_dev_attrib_queue_depth.attr,
+	&tcmu_dev_attrib_max_unmap_lba_count.attr,
+	&tcmu_dev_attrib_max_unmap_block_desc_count.attr,
+	&tcmu_dev_attrib_unmap_granularity.attr,
+	&tcmu_dev_attrib_unmap_granularity_alignment.attr,
+	&tcmu_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api tcmu_template = {
 	.name			= "user",
 	.inquiry_prod		= "USER",
@@ -1112,6 +1150,7 @@ static struct se_subsystem_api tcmu_template = {
 
 static int __init tcmu_module_init(void)
 {
+	struct target_backend_cits *tbc = &tcmu_template.tb_cits;
 	int ret;
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
@@ -1134,6 +1173,9 @@ static int __init tcmu_module_init(void)
 		goto out_unreg_device;
 	}
 
+	target_core_setup_sub_cits(&tcmu_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = tcmu_backend_dev_attrs;
+
 	ret = transport_subsystem_register(&tcmu_template);
 	if (ret)
 		goto out_unreg_genl;
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index be0c0d0..edcafa4 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -554,17 +554,17 @@ static void ft_send_work(struct work_struct *work)
 	 */
 	switch (fcp->fc_pri_ta & FCP_PTA_MASK) {
 	case FCP_PTA_HEADQ:
-		task_attr = MSG_HEAD_TAG;
+		task_attr = TCM_HEAD_TAG;
 		break;
 	case FCP_PTA_ORDERED:
-		task_attr = MSG_ORDERED_TAG;
+		task_attr = TCM_ORDERED_TAG;
 		break;
 	case FCP_PTA_ACA:
-		task_attr = MSG_ACA_TAG;
+		task_attr = TCM_ACA_TAG;
 		break;
 	case FCP_PTA_SIMPLE: /* Fallthrough */
 	default:
-		task_attr = MSG_SIMPLE_TAG;
+		task_attr = TCM_SIMPLE_TAG;
 	}
 
 	fc_seq_exch(cmd->seq)->lp->tt.seq_set_resp(cmd->seq, ft_recv_seq, cmd);
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f554d25..af40db0 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -112,6 +112,18 @@ config CPU_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config CLOCK_THERMAL
+	bool "Generic clock cooling support"
+	depends on COMMON_CLK
+	depends on PM_OPP
+	help
+	  This entry implements the generic clock cooling mechanism through
+	  frequency clipping. Typically used to cool off co-processors. The
+	  device that is configured to use this cooling mechanism will be
+	  controlled to reduce clock frequency whenever temperature is high.
+
+	  If you want this support, you should say Y here.
+
 config THERMAL_EMULATION
 	bool "Thermal emulation mode support"
 	help
@@ -143,6 +155,16 @@ config SPEAR_THERMAL
 	  Enable this to plug the SPEAr thermal sensor driver into the Linux
 	  thermal framework.
 
+config ROCKCHIP_THERMAL
+	tristate "Rockchip thermal driver"
+	depends on ARCH_ROCKCHIP
+	depends on RESET_CONTROLLER
+	help
+	  Rockchip thermal driver provides support for Temperature sensor
+	  ADC (TS-ADC) found on Rockchip SoCs. It supports one critical
+	  trip point. Cpufreq is used as the cooling device and will throttle
+	  CPUs when the Temperature crosses the passive trip point.
+
 config RCAR_THERMAL
 	tristate "Renesas R-Car thermal driver"
 	depends on ARCH_SHMOBILE || COMPILE_TEST
@@ -185,6 +207,16 @@ config ARMADA_THERMAL
 	  Enable this option if you want to have support for thermal management
 	  controller present in Armada 370 and Armada XP SoC.
 
+config TEGRA_SOCTHERM
+	tristate "Tegra SOCTHERM thermal management"
+	depends on ARCH_TEGRA
+	help
+	  Enable this option for integrated thermal management support on NVIDIA
+	  Tegra124 systems-on-chip. The driver supports four thermal zones
+	  (CPU, GPU, MEM, PLLX). Cooling devices can be bound to the thermal
+	  zones to manage temperatures. This option is also required for the
+	  emergency thermal reset (thermtrip) feature to function.
+
 config DB8500_CPUFREQ_COOLING
 	tristate "DB8500 cpufreq cooling"
 	depends on ARCH_U8500
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 39c4fe8..fa0dc48 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -18,8 +18,12 @@ thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE)	+= user_space.o
 # cpufreq cooling
 thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 
+# clock cooling
+thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
+
 # platform thermal drivers
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
+obj-$(CONFIG_ROCKCHIP_THERMAL)	+= rockchip_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
 obj-$(CONFIG_KIRKWOOD_THERMAL)  += kirkwood_thermal.o
 obj-y				+= samsung/
@@ -34,3 +38,4 @@ obj-$(CONFIG_INTEL_SOC_DTS_THERMAL)	+= intel_soc_dts_thermal.o
 obj-$(CONFIG_TI_SOC_THERMAL)	+= ti-soc-thermal/
 obj-$(CONFIG_INT340X_THERMAL)  += int340x_thermal/
 obj-$(CONFIG_ST_THERMAL)	+= st/
+obj-$(CONFIG_TEGRA_SOCTHERM)	+= tegra_soctherm.o
diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
index eaaf59c..c2556cf5 100644
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c
@@ -35,10 +35,6 @@
 #define PMU_TDC0_OTF_CAL_MASK		(0x1 << 30)
 #define PMU_TDC0_START_CAL_MASK		(0x1 << 25)
 
-#define A375_Z1_CAL_RESET_LSB		0x8011e214
-#define A375_Z1_CAL_RESET_MSB		0x30a88019
-#define A375_Z1_WORKAROUND_BIT		BIT(9)
-
 #define A375_UNIT_CONTROL_SHIFT		27
 #define A375_UNIT_CONTROL_MASK		0x7
 #define A375_READOUT_INVERT		BIT(15)
@@ -124,24 +120,12 @@ static void armada375_init_sensor(struct platform_device *pdev,
 				  struct armada_thermal_priv *priv)
 {
 	unsigned long reg;
-	bool quirk_needed =
-		!!of_device_is_compatible(pdev->dev.of_node,
-					  "marvell,armada375-z1-thermal");
-
-	if (quirk_needed) {
-		/* Ensure these registers have the default (reset) values */
-		writel(A375_Z1_CAL_RESET_LSB, priv->control);
-		writel(A375_Z1_CAL_RESET_MSB, priv->control + 0x4);
-	}
 
 	reg = readl(priv->control + 4);
 	reg &= ~(A375_UNIT_CONTROL_MASK << A375_UNIT_CONTROL_SHIFT);
 	reg &= ~A375_READOUT_INVERT;
 	reg &= ~A375_HW_RESETn;
 
-	if (quirk_needed)
-		reg |= A375_Z1_WORKAROUND_BIT;
-
 	writel(reg, priv->control + 4);
 	mdelay(20);
 
@@ -260,10 +244,6 @@ static const struct of_device_id armada_thermal_id_table[] = {
 		.data       = &armada375_data,
 	},
 	{
-		.compatible = "marvell,armada375-z1-thermal",
-		.data       = &armada375_data,
-	},
-	{
 		.compatible = "marvell,armada380-thermal",
 		.data       = &armada380_data,
 	},
diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c
new file mode 100644
index 0000000..1b4ff0f
--- /dev/null
+++ b/drivers/thermal/clock_cooling.c
@@ -0,0 +1,485 @@
+/*
+ *  drivers/thermal/clock_cooling.c
+ *
+ *  Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Copyright (C) 2013	Texas Instruments Inc.
+ *  Contact:  Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ *  Highly based on cpu_cooling.c.
+ *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
+ *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/pm_opp.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/clock_cooling.h>
+
+/**
+ * struct clock_cooling_device - data for cooling device with clock
+ * @id: unique integer value corresponding to each clock_cooling_device
+ *	registered.
+ * @dev: struct device pointer to the device being used to cool off using
+ *       clock frequencies.
+ * @cdev: thermal_cooling_device pointer to keep track of the
+ *	registered cooling device.
+ * @clk_rate_change_nb: reference to notifier block used to receive clock
+ *                      rate changes.
+ * @freq_table: frequency table used to keep track of available frequencies.
+ * @clock_state: integer value representing the current state of clock
+ *	cooling	devices.
+ * @clock_val: integer value representing the absolute value of the clipped
+ *	frequency.
+ * @clk: struct clk reference used to enforce clock limits.
+ * @lock: mutex lock to protect this struct.
+ *
+ * This structure is required for keeping information of each
+ * clock_cooling_device registered. In order to prevent corruption of this a
+ * mutex @lock is used.
+ */
+struct clock_cooling_device {
+	int id;
+	struct device *dev;
+	struct thermal_cooling_device *cdev;
+	struct notifier_block clk_rate_change_nb;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned long clock_state;
+	unsigned long clock_val;
+	struct clk *clk;
+	struct mutex lock; /* lock to protect the content of this struct */
+};
+#define to_clock_cooling_device(x) \
+		container_of(x, struct clock_cooling_device, clk_rate_change_nb)
+static DEFINE_IDR(clock_idr);
+static DEFINE_MUTEX(cooling_clock_lock);
+
+/**
+ * clock_cooling_get_idr - function to get an unique id.
+ * @id: int * value generated by this function.
+ *
+ * This function will populate @id with an unique
+ * id, using the idr API.
+ *
+ * Return: 0 on success, an error code on failure.
+ */
+static int clock_cooling_get_idr(int *id)
+{
+	int ret;
+
+	mutex_lock(&cooling_clock_lock);
+	ret = idr_alloc(&clock_idr, NULL, 0, 0, GFP_KERNEL);
+	mutex_unlock(&cooling_clock_lock);
+	if (unlikely(ret < 0))
+		return ret;
+	*id = ret;
+
+	return 0;
+}
+
+/**
+ * release_idr - function to free the unique id.
+ * @id: int value representing the unique id.
+ */
+static void release_idr(int id)
+{
+	mutex_lock(&cooling_clock_lock);
+	idr_remove(&clock_idr, id);
+	mutex_unlock(&cooling_clock_lock);
+}
+
+/* Below code defines functions to be used for clock as cooling device */
+
+enum clock_cooling_property {
+	GET_LEVEL,
+	GET_FREQ,
+	GET_MAXL,
+};
+
+/**
+ * clock_cooling_get_property - fetch a property of interest for a give cpu.
+ * @ccdev: clock cooling device reference
+ * @input: query parameter
+ * @output: query return
+ * @property: type of query (frequency, level, max level)
+ *
+ * This is the common function to
+ * 1. get maximum clock cooling states
+ * 2. translate frequency to cooling state
+ * 3. translate cooling state to frequency
+ * Note that the code may be not in good shape
+ * but it is written in this way in order to:
+ * a) reduce duplicate code as most of the code can be shared.
+ * b) make sure the logic is consistent when translating between
+ *    cooling states and frequencies.
+ *
+ * Return: 0 on success, -EINVAL when invalid parameters are passed.
+ */
+static int clock_cooling_get_property(struct clock_cooling_device *ccdev,
+				      unsigned long input,
+				      unsigned long *output,
+				      enum clock_cooling_property property)
+{
+	int i;
+	unsigned long max_level = 0, level = 0;
+	unsigned int freq = CPUFREQ_ENTRY_INVALID;
+	int descend = -1;
+	struct cpufreq_frequency_table *pos, *table = ccdev->freq_table;
+
+	if (!output)
+		return -EINVAL;
+
+	if (!table)
+		return -EINVAL;
+
+	cpufreq_for_each_valid_entry(pos, table) {
+		/* ignore duplicate entry */
+		if (freq == pos->frequency)
+			continue;
+
+		/* get the frequency order */
+		if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
+			descend = freq > pos->frequency;
+
+		freq = pos->frequency;
+		max_level++;
+	}
+
+	/* No valid cpu frequency entry */
+	if (max_level == 0)
+		return -EINVAL;
+
+	/* max_level is an index, not a counter */
+	max_level--;
+
+	/* get max level */
+	if (property == GET_MAXL) {
+		*output = max_level;
+		return 0;
+	}
+
+	if (property == GET_FREQ)
+		level = descend ? input : (max_level - input);
+
+	i = 0;
+	cpufreq_for_each_valid_entry(pos, table) {
+		/* ignore duplicate entry */
+		if (freq == pos->frequency)
+			continue;
+
+		/* now we have a valid frequency entry */
+		freq = pos->frequency;
+
+		if (property == GET_LEVEL && (unsigned int)input == freq) {
+			/* get level by frequency */
+			*output = descend ? i : (max_level - i);
+			return 0;
+		}
+		if (property == GET_FREQ && level == i) {
+			/* get frequency by level */
+			*output = freq;
+			return 0;
+		}
+		i++;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * clock_cooling_get_level - return the cooling level of given clock cooling.
+ * @cdev: reference of a thermal cooling device of used as clock cooling device
+ * @freq: the frequency of interest
+ *
+ * This function will match the cooling level corresponding to the
+ * requested @freq and return it.
+ *
+ * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
+ * otherwise.
+ */
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+	unsigned long val;
+
+	if (clock_cooling_get_property(ccdev, (unsigned long)freq, &val,
+				       GET_LEVEL))
+		return THERMAL_CSTATE_INVALID;
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_get_level);
+
+/**
+ * clock_cooling_get_frequency - get the absolute value of frequency from level.
+ * @ccdev: clock cooling device reference
+ * @level: cooling level
+ *
+ * This function matches cooling level with frequency. Based on a cooling level
+ * of frequency, equals cooling state of cpu cooling device, it will return
+ * the corresponding frequency.
+ *	e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
+ *
+ * Return: 0 on error, the corresponding frequency otherwise.
+ */
+static unsigned long
+clock_cooling_get_frequency(struct clock_cooling_device *ccdev,
+			    unsigned long level)
+{
+	int ret = 0;
+	unsigned long freq;
+
+	ret = clock_cooling_get_property(ccdev, level, &freq, GET_FREQ);
+	if (ret)
+		return 0;
+
+	return freq;
+}
+
+/**
+ * clock_cooling_apply - function to apply frequency clipping.
+ * @ccdev: clock_cooling_device pointer containing frequency clipping data.
+ * @cooling_state: value of the cooling state.
+ *
+ * Function used to make sure the clock layer is aware of current thermal
+ * limits. The limits are applied by updating the clock rate in case it is
+ * higher than the corresponding frequency based on the requested cooling_state.
+ *
+ * Return: 0 on success, an error code otherwise (-EINVAL in case wrong
+ * cooling state).
+ */
+static int clock_cooling_apply(struct clock_cooling_device *ccdev,
+			       unsigned long cooling_state)
+{
+	unsigned long clip_freq, cur_freq;
+	int ret = 0;
+
+	/* Here we write the clipping */
+	/* Check if the old cooling action is same as new cooling action */
+	if (ccdev->clock_state == cooling_state)
+		return 0;
+
+	clip_freq = clock_cooling_get_frequency(ccdev, cooling_state);
+	if (!clip_freq)
+		return -EINVAL;
+
+	cur_freq = clk_get_rate(ccdev->clk);
+
+	mutex_lock(&ccdev->lock);
+	ccdev->clock_state = cooling_state;
+	ccdev->clock_val = clip_freq;
+	/* enforce clock level */
+	if (cur_freq > clip_freq)
+		ret = clk_set_rate(ccdev->clk, clip_freq);
+	mutex_unlock(&ccdev->lock);
+
+	return ret;
+}
+
+/**
+ * clock_cooling_clock_notifier - notifier callback on clock rate changes.
+ * @nb:	struct notifier_block * with callback info.
+ * @event: value showing clock event for which this function invoked.
+ * @data: callback-specific data
+ *
+ * Callback to hijack the notification on clock transition.
+ * Every time there is a clock change, we intercept all pre change events
+ * and block the transition in case the new rate infringes thermal limits.
+ *
+ * Return: NOTIFY_DONE (success) or NOTIFY_BAD (new_rate > thermal limit).
+ */
+static int clock_cooling_clock_notifier(struct notifier_block *nb,
+					unsigned long event, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+	struct clock_cooling_device *ccdev = to_clock_cooling_device(nb);
+
+	switch (event) {
+	case PRE_RATE_CHANGE:
+		/*
+		 * checks on current state
+		 * TODO: current method is not best we can find as it
+		 * allows possibly voltage transitions, in case DVFS
+		 * layer is also hijacking clock pre notifications.
+		 */
+		if (ndata->new_rate > ccdev->clock_val)
+			return NOTIFY_BAD;
+		/* fall through */
+	case POST_RATE_CHANGE:
+	case ABORT_RATE_CHANGE:
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+/* clock cooling device thermal callback functions are defined below */
+
+/**
+ * clock_cooling_get_max_state - callback function to get the max cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the max cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * max cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_get_max_state(struct thermal_cooling_device *cdev,
+				       unsigned long *state)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+	unsigned long count = 0;
+	int ret;
+
+	ret = clock_cooling_get_property(ccdev, 0, &count, GET_MAXL);
+	if (!ret)
+		*state = count;
+
+	return ret;
+}
+
+/**
+ * clock_cooling_get_cur_state - function to get the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the current cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * current cooling state.
+ *
+ * Return: 0 (success)
+ */
+static int clock_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long *state)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+
+	*state = ccdev->clock_state;
+
+	return 0;
+}
+
+/**
+ * clock_cooling_set_cur_state - function to set the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: set this variable to the current cooling state.
+ *
+ * Callback for the thermal cooling device to change the clock cooling
+ * current cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long state)
+{
+	struct clock_cooling_device *clock_device = cdev->devdata;
+
+	return clock_cooling_apply(clock_device, state);
+}
+
+/* Bind clock callbacks to thermal cooling device ops */
+static struct thermal_cooling_device_ops const clock_cooling_ops = {
+	.get_max_state = clock_cooling_get_max_state,
+	.get_cur_state = clock_cooling_get_cur_state,
+	.set_cur_state = clock_cooling_set_cur_state,
+};
+
+/**
+ * clock_cooling_register - function to create clock cooling device.
+ * @dev: struct device pointer to the device used as clock cooling device.
+ * @clock_name: string containing the clock used as cooling mechanism.
+ *
+ * This interface function registers the clock cooling device with the name
+ * "thermal-clock-%x". The cooling device is based on clock frequencies.
+ * The struct device is assumed to be capable of DVFS transitions.
+ * The OPP layer is used to fetch and fill the available frequencies for
+ * the referred device. The ordered frequency table is used to control
+ * the clock cooling device cooling states and to limit clock transitions
+ * based on the cooling state requested by the thermal framework.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name)
+{
+	struct thermal_cooling_device *cdev;
+	struct clock_cooling_device *ccdev = NULL;
+	char dev_name[THERMAL_NAME_LENGTH];
+	int ret = 0;
+
+	ccdev = devm_kzalloc(dev, sizeof(*ccdev), GFP_KERNEL);
+	if (!ccdev)
+		return ERR_PTR(-ENOMEM);
+
+	ccdev->dev = dev;
+	ccdev->clk = devm_clk_get(dev, clock_name);
+	if (IS_ERR(ccdev->clk))
+		return ERR_CAST(ccdev->clk);
+
+	ret = clock_cooling_get_idr(&ccdev->id);
+	if (ret)
+		return ERR_PTR(-EINVAL);
+
+	snprintf(dev_name, sizeof(dev_name), "thermal-clock-%d", ccdev->id);
+
+	cdev = thermal_cooling_device_register(dev_name, ccdev,
+					       &clock_cooling_ops);
+	if (IS_ERR(cdev)) {
+		release_idr(ccdev->id);
+		return ERR_PTR(-EINVAL);
+	}
+	ccdev->cdev = cdev;
+	ccdev->clk_rate_change_nb.notifier_call = clock_cooling_clock_notifier;
+
+	/* Assuming someone has already filled the opp table for this device */
+	ret = dev_pm_opp_init_cpufreq_table(dev, &ccdev->freq_table);
+	if (ret) {
+		release_idr(ccdev->id);
+		return ERR_PTR(ret);
+	}
+	ccdev->clock_state = 0;
+	ccdev->clock_val = clock_cooling_get_frequency(ccdev, 0);
+
+	clk_notifier_register(ccdev->clk, &ccdev->clk_rate_change_nb);
+
+	return cdev;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_register);
+
+/**
+ * clock_cooling_unregister - function to remove clock cooling device.
+ * @cdev: thermal cooling device pointer.
+ *
+ * This interface function unregisters the "thermal-clock-%x" cooling device.
+ */
+void clock_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+	struct clock_cooling_device *ccdev;
+
+	if (!cdev)
+		return;
+
+	ccdev = cdev->devdata;
+
+	clk_notifier_unregister(ccdev->clk, &ccdev->clk_rate_change_nb);
+	dev_pm_opp_free_cpufreq_table(ccdev->dev, &ccdev->freq_table);
+
+	thermal_cooling_device_unregister(ccdev->cdev);
+	release_idr(ccdev->id);
+}
+EXPORT_SYMBOL_GPL(clock_cooling_unregister);
diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
index 0d8db80..e4e61b3 100644
--- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
@@ -131,6 +131,8 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
 			pr_warn("Failed to get target ACPI device\n");
 	}
 
+	result = 0;
+
 	*trtp = trts;
 	/* don't count bad entries */
 	*trt_count -= nr_bad_entries;
@@ -317,21 +319,21 @@ static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd,
 {
 	int ret = 0;
 	unsigned long length = 0;
-	unsigned long count = 0;
+	int count = 0;
 	char __user *arg = (void __user *)__arg;
 	struct trt *trts;
 	struct art *arts;
 
 	switch (cmd) {
 	case ACPI_THERMAL_GET_TRT_COUNT:
-		ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count,
+		ret = acpi_parse_trt(acpi_thermal_rel_handle, &count,
 				&trts, false);
 		kfree(trts);
 		if (!ret)
 			return put_user(count, (unsigned long __user *)__arg);
 		return ret;
 	case ACPI_THERMAL_GET_TRT_LEN:
-		ret = acpi_parse_trt(acpi_thermal_rel_handle, (int *)&count,
+		ret = acpi_parse_trt(acpi_thermal_rel_handle, &count,
 				&trts, false);
 		kfree(trts);
 		length = count * sizeof(union trt_object);
@@ -341,14 +343,14 @@ static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd,
 	case ACPI_THERMAL_GET_TRT:
 		return fill_trt(arg);
 	case ACPI_THERMAL_GET_ART_COUNT:
-		ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count,
+		ret = acpi_parse_art(acpi_thermal_rel_handle, &count,
 				&arts, false);
 		kfree(arts);
 		if (!ret)
 			return put_user(count, (unsigned long __user *)__arg);
 		return ret;
 	case ACPI_THERMAL_GET_ART_LEN:
-		ret = acpi_parse_art(acpi_thermal_rel_handle, (int *)&count,
+		ret = acpi_parse_art(acpi_thermal_rel_handle, &count,
 				&arts, false);
 		kfree(arts);
 		length = count * sizeof(union art_object);
diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c
index edc1cce..dcb306e 100644
--- a/drivers/thermal/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3400_thermal.c
@@ -43,6 +43,74 @@ struct int3400_thermal_priv {
 	struct trt *trts;
 	u8 uuid_bitmap;
 	int rel_misc_dev_res;
+	int current_uuid_index;
+};
+
+static ssize_t available_uuids_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
+	int i;
+	int length = 0;
+
+	for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; i++) {
+		if (priv->uuid_bitmap & (1 << i))
+			if (PAGE_SIZE - length > 0)
+				length += snprintf(&buf[length],
+						   PAGE_SIZE - length,
+						   "%s\n",
+						   int3400_thermal_uuids[i]);
+	}
+
+	return length;
+}
+
+static ssize_t current_uuid_show(struct device *dev,
+				 struct device_attribute *devattr, char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
+
+	if (priv->uuid_bitmap & (1 << priv->current_uuid_index))
+		return sprintf(buf, "%s\n",
+			       int3400_thermal_uuids[priv->current_uuid_index]);
+	else
+		return sprintf(buf, "INVALID\n");
+}
+
+static ssize_t current_uuid_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; ++i) {
+		if ((priv->uuid_bitmap & (1 << i)) &&
+		    !(strncmp(buf, int3400_thermal_uuids[i],
+			      sizeof(int3400_thermal_uuids[i]) - 1))) {
+			priv->current_uuid_index = i;
+			return count;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static DEVICE_ATTR(current_uuid, 0644, current_uuid_show, current_uuid_store);
+static DEVICE_ATTR_RO(available_uuids);
+static struct attribute *uuid_attrs[] = {
+	&dev_attr_available_uuids.attr,
+	&dev_attr_current_uuid.attr,
+	NULL
+};
+
+static struct attribute_group uuid_attribute_group = {
+	.attrs = uuid_attrs,
+	.name = "uuids"
 };
 
 static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv)
@@ -160,9 +228,9 @@ static int int3400_thermal_set_mode(struct thermal_zone_device *thermal,
 
 	if (enable != priv->mode) {
 		priv->mode = enable;
-		/* currently, only PASSIVE COOLING is supported */
 		result = int3400_thermal_run_osc(priv->adev->handle,
-					INT3400_THERMAL_PASSIVE_1, enable);
+						 priv->current_uuid_index,
+						 enable);
 	}
 	return result;
 }
@@ -223,7 +291,14 @@ static int int3400_thermal_probe(struct platform_device *pdev)
 	priv->rel_misc_dev_res = acpi_thermal_rel_misc_device_add(
 							priv->adev->handle);
 
+	result = sysfs_create_group(&pdev->dev.kobj, &uuid_attribute_group);
+	if (result)
+		goto free_zone;
+
 	return 0;
+
+free_zone:
+	thermal_zone_device_unregister(priv->thermal);
 free_trt:
 	kfree(priv->trts);
 free_art:
@@ -240,6 +315,7 @@ static int int3400_thermal_remove(struct platform_device *pdev)
 	if (!priv->rel_misc_dev_res)
 		acpi_thermal_rel_misc_device_remove(priv->adev->handle);
 
+	sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
 	thermal_zone_device_unregister(priv->thermal);
 	kfree(priv->trts);
 	kfree(priv->arts);
diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
index 6e9fb62..1bfa6a6 100644
--- a/drivers/thermal/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c
@@ -293,8 +293,7 @@ static int int3403_sensor_add(struct int3403_priv *priv)
 	return 0;
 
  err_free_obj:
-	if (obj->tzone)
-		thermal_zone_device_unregister(obj->tzone);
+	thermal_zone_device_unregister(obj->tzone);
 	return result;
 }
 
@@ -471,7 +470,6 @@ static struct platform_driver int3403_driver = {
 	.remove = int3403_remove,
 	.driver = {
 		.name = "int3403 thermal",
-		.owner  = THIS_MODULE,
 		.acpi_match_table = int3403_device_ids,
 	},
 };
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index 95cb7fc..e98b424 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -435,7 +435,6 @@ static int clamp_thread(void *arg)
 		 * allowed. thus jiffies are updated properly.
 		 */
 		preempt_disable();
-		tick_nohz_idle_enter();
 		/* mwait until target jiffies is reached */
 		while (time_before(jiffies, target_jiffies)) {
 			unsigned long ecx = 1;
@@ -451,7 +450,6 @@ static int clamp_thread(void *arg)
 			start_critical_timings();
 			atomic_inc(&idle_wakeup_counter);
 		}
-		tick_nohz_idle_exit();
 		preempt_enable();
 	}
 	del_timer_sync(&wakeup_timer);
@@ -689,6 +687,7 @@ static const struct x86_cpu_id intel_powerclamp_ids[] = {
 	{ X86_VENDOR_INTEL, 6, 0x3f},
 	{ X86_VENDOR_INTEL, 6, 0x45},
 	{ X86_VENDOR_INTEL, 6, 0x46},
+	{ X86_VENDOR_INTEL, 6, 0x4c},
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c
index a6a0a18..5580f5b 100644
--- a/drivers/thermal/intel_soc_dts_thermal.c
+++ b/drivers/thermal/intel_soc_dts_thermal.c
@@ -360,6 +360,9 @@ static void proc_thermal_interrupt(void)
 	u32 sticky_out;
 	int status;
 	u32 ptmc_out;
+	unsigned long flags;
+
+	spin_lock_irqsave(&intr_notify_lock, flags);
 
 	/* Clear APIC interrupt */
 	status = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ,
@@ -378,21 +381,20 @@ static void proc_thermal_interrupt(void)
 		/* reset sticky bit */
 		status = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE,
 					SOC_DTS_OFFSET_PTTSS, sticky_out);
+		spin_unlock_irqrestore(&intr_notify_lock, flags);
+
 		for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
 			pr_debug("TZD update for zone %d\n", i);
 			thermal_zone_device_update(soc_dts[i]->tzone);
 		}
-	}
+	} else
+		spin_unlock_irqrestore(&intr_notify_lock, flags);
 
 }
 
 static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&intr_notify_lock, flags);
 	proc_thermal_interrupt();
-	spin_unlock_irqrestore(&intr_notify_lock, flags);
 	pr_debug("proc_thermal_interrupt\n");
 
 	return IRQ_HANDLED;
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 62143ba..e145b66 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -30,27 +30,13 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/string.h>
+#include <linux/thermal.h>
 
 #include "thermal_core.h"
 
 /***   Private data structures to represent thermal device tree data ***/
 
 /**
- * struct __thermal_trip - representation of a point in temperature domain
- * @np: pointer to struct device_node that this trip point was created from
- * @temperature: temperature value in miliCelsius
- * @hysteresis: relative hysteresis in miliCelsius
- * @type: trip point type
- */
-
-struct __thermal_trip {
-	struct device_node *np;
-	unsigned long int temperature;
-	unsigned long int hysteresis;
-	enum thermal_trip_type type;
-};
-
-/**
  * struct __thermal_bind_param - a match between trip and cooling device
  * @cooling_device: a pointer to identify the referred cooling device
  * @trip_id: the trip point index
@@ -77,8 +63,7 @@ struct __thermal_bind_params {
  * @num_tbps: number of thermal bind params
  * @tbps: an array of thermal bind params (0..num_tbps - 1)
  * @sensor_data: sensor private data used while reading temperature and trend
- * @get_temp: sensor callback to read temperature
- * @get_trend: sensor callback to read temperature trend
+ * @ops: set of callbacks to handle the thermal zone based on DT
  */
 
 struct __thermal_zone {
@@ -88,7 +73,7 @@ struct __thermal_zone {
 
 	/* trip data */
 	int ntrips;
-	struct __thermal_trip *trips;
+	struct thermal_trip *trips;
 
 	/* cooling binding data */
 	int num_tbps;
@@ -96,8 +81,7 @@ struct __thermal_zone {
 
 	/* sensor interface */
 	void *sensor_data;
-	int (*get_temp)(void *, long *);
-	int (*get_trend)(void *, long *);
+	const struct thermal_zone_of_device_ops *ops;
 };
 
 /***   DT thermal zone device callbacks   ***/
@@ -107,10 +91,96 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz,
 {
 	struct __thermal_zone *data = tz->devdata;
 
-	if (!data->get_temp)
+	if (!data->ops->get_temp)
 		return -EINVAL;
 
-	return data->get_temp(data->sensor_data, temp);
+	return data->ops->get_temp(data->sensor_data, temp);
+}
+
+/**
+ * of_thermal_get_ntrips - function to export number of available trip
+ *			   points.
+ * @tz: pointer to a thermal zone
+ *
+ * This function is a globally visible wrapper to get number of trip points
+ * stored in the local struct __thermal_zone
+ *
+ * Return: number of available trip points, -ENODEV when data not available
+ */
+int of_thermal_get_ntrips(struct thermal_zone_device *tz)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data || IS_ERR(data))
+		return -ENODEV;
+
+	return data->ntrips;
+}
+EXPORT_SYMBOL_GPL(of_thermal_get_ntrips);
+
+/**
+ * of_thermal_is_trip_valid - function to check if trip point is valid
+ *
+ * @tz:	pointer to a thermal zone
+ * @trip:	trip point to evaluate
+ *
+ * This function is responsible for checking if passed trip point is valid
+ *
+ * Return: true if trip point is valid, false otherwise
+ */
+bool of_thermal_is_trip_valid(struct thermal_zone_device *tz, int trip)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data || trip >= data->ntrips || trip < 0)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(of_thermal_is_trip_valid);
+
+/**
+ * of_thermal_get_trip_points - function to get access to a globally exported
+ *				trip points
+ *
+ * @tz:	pointer to a thermal zone
+ *
+ * This function provides a pointer to trip points table
+ *
+ * Return: pointer to trip points table, NULL otherwise
+ */
+const struct thermal_trip * const
+of_thermal_get_trip_points(struct thermal_zone_device *tz)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data)
+		return NULL;
+
+	return data->trips;
+}
+EXPORT_SYMBOL_GPL(of_thermal_get_trip_points);
+
+/**
+ * of_thermal_set_emul_temp - function to set emulated temperature
+ *
+ * @tz:	pointer to a thermal zone
+ * @temp:	temperature to set
+ *
+ * This function gives the ability to set emulated value of temperature,
+ * which is handy for debugging
+ *
+ * Return: zero on success, error code otherwise
+ */
+static int of_thermal_set_emul_temp(struct thermal_zone_device *tz,
+				    unsigned long temp)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data->ops || !data->ops->set_emul_temp)
+		return -EINVAL;
+
+	return data->ops->set_emul_temp(data->sensor_data, temp);
 }
 
 static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
@@ -120,10 +190,10 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
 	long dev_trend;
 	int r;
 
-	if (!data->get_trend)
+	if (!data->ops->get_trend)
 		return -EINVAL;
 
-	r = data->get_trend(data->sensor_data, &dev_trend);
+	r = data->ops->get_trend(data->sensor_data, &dev_trend);
 	if (r)
 		return r;
 
@@ -324,8 +394,7 @@ static struct thermal_zone_device_ops of_thermal_ops = {
 static struct thermal_zone_device *
 thermal_zone_of_add_sensor(struct device_node *zone,
 			   struct device_node *sensor, void *data,
-			   int (*get_temp)(void *, long *),
-			   int (*get_trend)(void *, long *))
+			   const struct thermal_zone_of_device_ops *ops)
 {
 	struct thermal_zone_device *tzd;
 	struct __thermal_zone *tz;
@@ -336,13 +405,16 @@ thermal_zone_of_add_sensor(struct device_node *zone,
 
 	tz = tzd->devdata;
 
+	if (!ops)
+		return ERR_PTR(-EINVAL);
+
 	mutex_lock(&tzd->lock);
-	tz->get_temp = get_temp;
-	tz->get_trend = get_trend;
+	tz->ops = ops;
 	tz->sensor_data = data;
 
 	tzd->ops->get_temp = of_thermal_get_temp;
 	tzd->ops->get_trend = of_thermal_get_trend;
+	tzd->ops->set_emul_temp = of_thermal_set_emul_temp;
 	mutex_unlock(&tzd->lock);
 
 	return tzd;
@@ -356,8 +428,7 @@ thermal_zone_of_add_sensor(struct device_node *zone,
  *             than one sensors
  * @data: a private pointer (owned by the caller) that will be passed
  *        back, when a temperature reading is needed.
- * @get_temp: a pointer to a function that reads the sensor temperature.
- * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp.
  *
  * This function will search the list of thermal zones described in device
  * tree and look for the zone that refer to the sensor device pointed by
@@ -382,9 +453,8 @@ thermal_zone_of_add_sensor(struct device_node *zone,
  * check the return value with help of IS_ERR() helper.
  */
 struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *))
+thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data,
+				const struct thermal_zone_of_device_ops *ops)
 {
 	struct device_node *np, *child, *sensor_np;
 	struct thermal_zone_device *tzd = ERR_PTR(-ENODEV);
@@ -426,9 +496,7 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
 
 		if (sensor_specs.np == sensor_np && id == sensor_id) {
 			tzd = thermal_zone_of_add_sensor(child, sensor_np,
-							 data,
-							 get_temp,
-							 get_trend);
+							 data, ops);
 			of_node_put(sensor_specs.np);
 			of_node_put(child);
 			goto exit;
@@ -475,9 +543,9 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 	mutex_lock(&tzd->lock);
 	tzd->ops->get_temp = NULL;
 	tzd->ops->get_trend = NULL;
+	tzd->ops->set_emul_temp = NULL;
 
-	tz->get_temp = NULL;
-	tz->get_trend = NULL;
+	tz->ops = NULL;
 	tz->sensor_data = NULL;
 	mutex_unlock(&tzd->lock);
 }
@@ -501,7 +569,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
  */
 static int thermal_of_populate_bind_params(struct device_node *np,
 					   struct __thermal_bind_params *__tbp,
-					   struct __thermal_trip *trips,
+					   struct thermal_trip *trips,
 					   int ntrips)
 {
 	struct of_phandle_args cooling_spec;
@@ -604,7 +672,7 @@ static int thermal_of_get_trip_type(struct device_node *np,
  * Return: 0 on success, proper error code otherwise
  */
 static int thermal_of_populate_trip(struct device_node *np,
-				    struct __thermal_trip *trip)
+				    struct thermal_trip *trip)
 {
 	int prop;
 	int ret;
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
new file mode 100644
index 0000000..1bcddfc
--- /dev/null
+++ b/drivers/thermal/rockchip_thermal.c
@@ -0,0 +1,693 @@
+/*
+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/thermal.h>
+
+/**
+ * If the temperature over a period of time High,
+ * the resulting TSHUT gave CRU module,let it reset the entire chip,
+ * or via GPIO give PMIC.
+ */
+enum tshut_mode {
+	TSHUT_MODE_CRU = 0,
+	TSHUT_MODE_GPIO,
+};
+
+/**
+ * the system Temperature Sensors tshut(tshut) polarity
+ * the bit 8 is tshut polarity.
+ * 0: low active, 1: high active
+ */
+enum tshut_polarity {
+	TSHUT_LOW_ACTIVE = 0,
+	TSHUT_HIGH_ACTIVE,
+};
+
+/**
+ * The system has three Temperature Sensors.  channel 0 is reserved,
+ * channel 1 is for CPU, and channel 2 is for GPU.
+ */
+enum sensor_id {
+	SENSOR_CPU = 1,
+	SENSOR_GPU,
+};
+
+struct rockchip_tsadc_chip {
+	/* The hardware-controlled tshut property */
+	long tshut_temp;
+	enum tshut_mode tshut_mode;
+	enum tshut_polarity tshut_polarity;
+
+	/* Chip-wide methods */
+	void (*initialize)(void __iomem *reg, enum tshut_polarity p);
+	void (*irq_ack)(void __iomem *reg);
+	void (*control)(void __iomem *reg, bool on);
+
+	/* Per-sensor methods */
+	int (*get_temp)(int chn, void __iomem *reg, long *temp);
+	void (*set_tshut_temp)(int chn, void __iomem *reg, long temp);
+	void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m);
+};
+
+struct rockchip_thermal_sensor {
+	struct rockchip_thermal_data *thermal;
+	struct thermal_zone_device *tzd;
+	enum sensor_id id;
+};
+
+#define NUM_SENSORS	2 /* Ignore unused sensor 0 */
+
+struct rockchip_thermal_data {
+	const struct rockchip_tsadc_chip *chip;
+	struct platform_device *pdev;
+	struct reset_control *reset;
+
+	struct rockchip_thermal_sensor sensors[NUM_SENSORS];
+
+	struct clk *clk;
+	struct clk *pclk;
+
+	void __iomem *regs;
+
+	long tshut_temp;
+	enum tshut_mode tshut_mode;
+	enum tshut_polarity tshut_polarity;
+};
+
+/* TSADC V2 Sensor info define: */
+#define TSADCV2_AUTO_CON			0x04
+#define TSADCV2_INT_EN				0x08
+#define TSADCV2_INT_PD				0x0c
+#define TSADCV2_DATA(chn)			(0x20 + (chn) * 0x04)
+#define TSADCV2_COMP_SHUT(chn)		        (0x40 + (chn) * 0x04)
+#define TSADCV2_HIGHT_INT_DEBOUNCE		0x60
+#define TSADCV2_HIGHT_TSHUT_DEBOUNCE		0x64
+#define TSADCV2_AUTO_PERIOD			0x68
+#define TSADCV2_AUTO_PERIOD_HT			0x6c
+
+#define TSADCV2_AUTO_EN				BIT(0)
+#define TSADCV2_AUTO_DISABLE			~BIT(0)
+#define TSADCV2_AUTO_SRC_EN(chn)		BIT(4 + (chn))
+#define TSADCV2_AUTO_TSHUT_POLARITY_HIGH	BIT(8)
+#define TSADCV2_AUTO_TSHUT_POLARITY_LOW		~BIT(8)
+
+#define TSADCV2_INT_SRC_EN(chn)			BIT(chn)
+#define TSADCV2_SHUT_2GPIO_SRC_EN(chn)		BIT(4 + (chn))
+#define TSADCV2_SHUT_2CRU_SRC_EN(chn)		BIT(8 + (chn))
+
+#define TSADCV2_INT_PD_CLEAR			~BIT(8)
+
+#define TSADCV2_DATA_MASK			0xfff
+#define TSADCV2_HIGHT_INT_DEBOUNCE_COUNT	4
+#define TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT	4
+#define TSADCV2_AUTO_PERIOD_TIME		250 /* msec */
+#define TSADCV2_AUTO_PERIOD_HT_TIME		50  /* msec */
+
+struct tsadc_table {
+	unsigned long code;
+	long temp;
+};
+
+static const struct tsadc_table v2_code_table[] = {
+	{TSADCV2_DATA_MASK, -40000},
+	{3800, -40000},
+	{3792, -35000},
+	{3783, -30000},
+	{3774, -25000},
+	{3765, -20000},
+	{3756, -15000},
+	{3747, -10000},
+	{3737, -5000},
+	{3728, 0},
+	{3718, 5000},
+	{3708, 10000},
+	{3698, 15000},
+	{3688, 20000},
+	{3678, 25000},
+	{3667, 30000},
+	{3656, 35000},
+	{3645, 40000},
+	{3634, 45000},
+	{3623, 50000},
+	{3611, 55000},
+	{3600, 60000},
+	{3588, 65000},
+	{3575, 70000},
+	{3563, 75000},
+	{3550, 80000},
+	{3537, 85000},
+	{3524, 90000},
+	{3510, 95000},
+	{3496, 100000},
+	{3482, 105000},
+	{3467, 110000},
+	{3452, 115000},
+	{3437, 120000},
+	{3421, 125000},
+	{0, 125000},
+};
+
+static u32 rk_tsadcv2_temp_to_code(long temp)
+{
+	int high, low, mid;
+
+	low = 0;
+	high = ARRAY_SIZE(v2_code_table) - 1;
+	mid = (high + low) / 2;
+
+	if (temp < v2_code_table[low].temp || temp > v2_code_table[high].temp)
+		return 0;
+
+	while (low <= high) {
+		if (temp == v2_code_table[mid].temp)
+			return v2_code_table[mid].code;
+		else if (temp < v2_code_table[mid].temp)
+			high = mid - 1;
+		else
+			low = mid + 1;
+		mid = (low + high) / 2;
+	}
+
+	return 0;
+}
+
+static long rk_tsadcv2_code_to_temp(u32 code)
+{
+	int high, low, mid;
+
+	low = 0;
+	high = ARRAY_SIZE(v2_code_table) - 1;
+	mid = (high + low) / 2;
+
+	if (code > v2_code_table[low].code || code < v2_code_table[high].code)
+		return 125000; /* No code available, return max temperature */
+
+	while (low <= high) {
+		if (code >= v2_code_table[mid].code && code <
+		    v2_code_table[mid - 1].code)
+			return v2_code_table[mid].temp;
+		else if (code < v2_code_table[mid].code)
+			low = mid + 1;
+		else
+			high = mid - 1;
+		mid = (low + high) / 2;
+	}
+
+	return 125000;
+}
+
+/**
+ * rk_tsadcv2_initialize - initialize TASDC Controller
+ * (1) Set TSADCV2_AUTO_PERIOD, configure the interleave between
+ * every two accessing of TSADC in normal operation.
+ * (2) Set TSADCV2_AUTO_PERIOD_HT, configure the interleave between
+ * every two accessing of TSADC after the temperature is higher
+ * than COM_SHUT or COM_INT.
+ * (3) Set TSADCV2_HIGH_INT_DEBOUNCE and TSADC_HIGHT_TSHUT_DEBOUNCE,
+ * if the temperature is higher than COMP_INT or COMP_SHUT for
+ * "debounce" times, TSADC controller will generate interrupt or TSHUT.
+ */
+static void rk_tsadcv2_initialize(void __iomem *regs,
+				  enum tshut_polarity tshut_polarity)
+{
+	if (tshut_polarity == TSHUT_HIGH_ACTIVE)
+		writel_relaxed(0 | (TSADCV2_AUTO_TSHUT_POLARITY_HIGH),
+			       regs + TSADCV2_AUTO_CON);
+	else
+		writel_relaxed(0 | (TSADCV2_AUTO_TSHUT_POLARITY_LOW),
+			       regs + TSADCV2_AUTO_CON);
+
+	writel_relaxed(TSADCV2_AUTO_PERIOD_TIME, regs + TSADCV2_AUTO_PERIOD);
+	writel_relaxed(TSADCV2_HIGHT_INT_DEBOUNCE_COUNT,
+		       regs + TSADCV2_HIGHT_INT_DEBOUNCE);
+	writel_relaxed(TSADCV2_AUTO_PERIOD_HT_TIME,
+		       regs + TSADCV2_AUTO_PERIOD_HT);
+	writel_relaxed(TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT,
+		       regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
+}
+
+static void rk_tsadcv2_irq_ack(void __iomem *regs)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_INT_PD);
+	writel_relaxed(val & TSADCV2_INT_PD_CLEAR, regs + TSADCV2_INT_PD);
+}
+
+static void rk_tsadcv2_control(void __iomem *regs, bool enable)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_AUTO_CON);
+	if (enable)
+		val |= TSADCV2_AUTO_EN;
+	else
+		val &= ~TSADCV2_AUTO_EN;
+
+	writel_relaxed(val, regs + TSADCV2_AUTO_CON);
+}
+
+static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, long *temp)
+{
+	u32 val;
+
+	/* the A/D value of the channel last conversion need some time */
+	val = readl_relaxed(regs + TSADCV2_DATA(chn));
+	if (val == 0)
+		return -EAGAIN;
+
+	*temp = rk_tsadcv2_code_to_temp(val);
+
+	return 0;
+}
+
+static void rk_tsadcv2_tshut_temp(int chn, void __iomem *regs, long temp)
+{
+	u32 tshut_value, val;
+
+	tshut_value = rk_tsadcv2_temp_to_code(temp);
+	writel_relaxed(tshut_value, regs + TSADCV2_COMP_SHUT(chn));
+
+	/* TSHUT will be valid */
+	val = readl_relaxed(regs + TSADCV2_AUTO_CON);
+	writel_relaxed(val | TSADCV2_AUTO_SRC_EN(chn), regs + TSADCV2_AUTO_CON);
+}
+
+static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs,
+				  enum tshut_mode mode)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_INT_EN);
+	if (mode == TSHUT_MODE_GPIO) {
+		val &= ~TSADCV2_SHUT_2CRU_SRC_EN(chn);
+		val |= TSADCV2_SHUT_2GPIO_SRC_EN(chn);
+	} else {
+		val &= ~TSADCV2_SHUT_2GPIO_SRC_EN(chn);
+		val |= TSADCV2_SHUT_2CRU_SRC_EN(chn);
+	}
+
+	writel_relaxed(val, regs + TSADCV2_INT_EN);
+}
+
+static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
+	.tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+	.tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+	.tshut_temp = 95000,
+
+	.initialize = rk_tsadcv2_initialize,
+	.irq_ack = rk_tsadcv2_irq_ack,
+	.control = rk_tsadcv2_control,
+	.get_temp = rk_tsadcv2_get_temp,
+	.set_tshut_temp = rk_tsadcv2_tshut_temp,
+	.set_tshut_mode = rk_tsadcv2_tshut_mode,
+};
+
+static const struct of_device_id of_rockchip_thermal_match[] = {
+	{
+		.compatible = "rockchip,rk3288-tsadc",
+		.data = (void *)&rk3288_tsadc_data,
+	},
+	{ /* end */ },
+};
+MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
+
+static void
+rockchip_thermal_toggle_sensor(struct rockchip_thermal_sensor *sensor, bool on)
+{
+	struct thermal_zone_device *tzd = sensor->tzd;
+
+	tzd->ops->set_mode(tzd,
+		on ? THERMAL_DEVICE_ENABLED : THERMAL_DEVICE_DISABLED);
+}
+
+static irqreturn_t rockchip_thermal_alarm_irq_thread(int irq, void *dev)
+{
+	struct rockchip_thermal_data *thermal = dev;
+	int i;
+
+	dev_dbg(&thermal->pdev->dev, "thermal alarm\n");
+
+	thermal->chip->irq_ack(thermal->regs);
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+		thermal_zone_device_update(thermal->sensors[i].tzd);
+
+	return IRQ_HANDLED;
+}
+
+static int rockchip_thermal_get_temp(void *_sensor, long *out_temp)
+{
+	struct rockchip_thermal_sensor *sensor = _sensor;
+	struct rockchip_thermal_data *thermal = sensor->thermal;
+	const struct rockchip_tsadc_chip *tsadc = sensor->thermal->chip;
+	int retval;
+
+	retval = tsadc->get_temp(sensor->id, thermal->regs, out_temp);
+	dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %ld, retval: %d\n",
+		sensor->id, *out_temp, retval);
+
+	return retval;
+}
+
+static const struct thermal_zone_of_device_ops rockchip_of_thermal_ops = {
+	.get_temp = rockchip_thermal_get_temp,
+};
+
+static int rockchip_configure_from_dt(struct device *dev,
+				      struct device_node *np,
+				      struct rockchip_thermal_data *thermal)
+{
+	u32 shut_temp, tshut_mode, tshut_polarity;
+
+	if (of_property_read_u32(np, "rockchip,hw-tshut-temp", &shut_temp)) {
+		dev_warn(dev,
+			 "Missing tshut temp property, using default %ld\n",
+			 thermal->chip->tshut_temp);
+		thermal->tshut_temp = thermal->chip->tshut_temp;
+	} else {
+		thermal->tshut_temp = shut_temp;
+	}
+
+	if (thermal->tshut_temp > INT_MAX) {
+		dev_err(dev, "Invalid tshut temperature specified: %ld\n",
+			thermal->tshut_temp);
+		return -ERANGE;
+	}
+
+	if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) {
+		dev_warn(dev,
+			 "Missing tshut mode property, using default (%s)\n",
+			 thermal->chip->tshut_mode == TSHUT_MODE_GPIO ?
+				"gpio" : "cru");
+		thermal->tshut_mode = thermal->chip->tshut_mode;
+	} else {
+		thermal->tshut_mode = tshut_mode;
+	}
+
+	if (thermal->tshut_mode > 1) {
+		dev_err(dev, "Invalid tshut mode specified: %d\n",
+			thermal->tshut_mode);
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(np, "rockchip,hw-tshut-polarity",
+				 &tshut_polarity)) {
+		dev_warn(dev,
+			 "Missing tshut-polarity property, using default (%s)\n",
+			 thermal->chip->tshut_polarity == TSHUT_LOW_ACTIVE ?
+				"low" : "high");
+		thermal->tshut_polarity = thermal->chip->tshut_polarity;
+	} else {
+		thermal->tshut_polarity = tshut_polarity;
+	}
+
+	if (thermal->tshut_polarity > 1) {
+		dev_err(dev, "Invalid tshut-polarity specified: %d\n",
+			thermal->tshut_polarity);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+rockchip_thermal_register_sensor(struct platform_device *pdev,
+				 struct rockchip_thermal_data *thermal,
+				 struct rockchip_thermal_sensor *sensor,
+				 enum sensor_id id)
+{
+	const struct rockchip_tsadc_chip *tsadc = thermal->chip;
+	int error;
+
+	tsadc->set_tshut_mode(id, thermal->regs, thermal->tshut_mode);
+	tsadc->set_tshut_temp(id, thermal->regs, thermal->tshut_temp);
+
+	sensor->thermal = thermal;
+	sensor->id = id;
+	sensor->tzd = thermal_zone_of_sensor_register(&pdev->dev, id, sensor,
+						      &rockchip_of_thermal_ops);
+	if (IS_ERR(sensor->tzd)) {
+		error = PTR_ERR(sensor->tzd);
+		dev_err(&pdev->dev, "failed to register sensor %d: %d\n",
+			id, error);
+		return error;
+	}
+
+	return 0;
+}
+
+/*
+ * Reset TSADC Controller, reset all tsadc registers.
+ */
+static void rockchip_thermal_reset_controller(struct reset_control *reset)
+{
+	reset_control_assert(reset);
+	usleep_range(10, 20);
+	reset_control_deassert(reset);
+}
+
+static int rockchip_thermal_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct rockchip_thermal_data *thermal;
+	const struct of_device_id *match;
+	struct resource *res;
+	int irq;
+	int i;
+	int error;
+
+	match = of_match_node(of_rockchip_thermal_match, np);
+	if (!match)
+		return -ENXIO;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no irq resource?\n");
+		return -EINVAL;
+	}
+
+	thermal = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_thermal_data),
+			       GFP_KERNEL);
+	if (!thermal)
+		return -ENOMEM;
+
+	thermal->pdev = pdev;
+
+	thermal->chip = (const struct rockchip_tsadc_chip *)match->data;
+	if (!thermal->chip)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	thermal->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(thermal->regs))
+		return PTR_ERR(thermal->regs);
+
+	thermal->reset = devm_reset_control_get(&pdev->dev, "tsadc-apb");
+	if (IS_ERR(thermal->reset)) {
+		error = PTR_ERR(thermal->reset);
+		dev_err(&pdev->dev, "failed to get tsadc reset: %d\n", error);
+		return error;
+	}
+
+	thermal->clk = devm_clk_get(&pdev->dev, "tsadc");
+	if (IS_ERR(thermal->clk)) {
+		error = PTR_ERR(thermal->clk);
+		dev_err(&pdev->dev, "failed to get tsadc clock: %d\n", error);
+		return error;
+	}
+
+	thermal->pclk = devm_clk_get(&pdev->dev, "apb_pclk");
+	if (IS_ERR(thermal->pclk)) {
+		error = PTR_ERR(thermal->clk);
+		dev_err(&pdev->dev, "failed to get apb_pclk clock: %d\n",
+			error);
+		return error;
+	}
+
+	error = clk_prepare_enable(thermal->clk);
+	if (error) {
+		dev_err(&pdev->dev, "failed to enable converter clock: %d\n",
+			error);
+		return error;
+	}
+
+	error = clk_prepare_enable(thermal->pclk);
+	if (error) {
+		dev_err(&pdev->dev, "failed to enable pclk: %d\n", error);
+		goto err_disable_clk;
+	}
+
+	rockchip_thermal_reset_controller(thermal->reset);
+
+	error = rockchip_configure_from_dt(&pdev->dev, np, thermal);
+	if (error) {
+		dev_err(&pdev->dev, "failed to parse device tree data: %d\n",
+			error);
+		goto err_disable_pclk;
+	}
+
+	thermal->chip->initialize(thermal->regs, thermal->tshut_polarity);
+
+	error = rockchip_thermal_register_sensor(pdev, thermal,
+						 &thermal->sensors[0],
+						 SENSOR_CPU);
+	if (error) {
+		dev_err(&pdev->dev,
+			"failed to register CPU thermal sensor: %d\n", error);
+		goto err_disable_pclk;
+	}
+
+	error = rockchip_thermal_register_sensor(pdev, thermal,
+						 &thermal->sensors[1],
+						 SENSOR_GPU);
+	if (error) {
+		dev_err(&pdev->dev,
+			"failed to register GPU thermal sensor: %d\n", error);
+		goto err_unregister_cpu_sensor;
+	}
+
+	error = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+					  &rockchip_thermal_alarm_irq_thread,
+					  IRQF_ONESHOT,
+					  "rockchip_thermal", thermal);
+	if (error) {
+		dev_err(&pdev->dev,
+			"failed to request tsadc irq: %d\n", error);
+		goto err_unregister_gpu_sensor;
+	}
+
+	thermal->chip->control(thermal->regs, true);
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+		rockchip_thermal_toggle_sensor(&thermal->sensors[i], true);
+
+	platform_set_drvdata(pdev, thermal);
+
+	return 0;
+
+err_unregister_gpu_sensor:
+	thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[1].tzd);
+err_unregister_cpu_sensor:
+	thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[0].tzd);
+err_disable_pclk:
+	clk_disable_unprepare(thermal->pclk);
+err_disable_clk:
+	clk_disable_unprepare(thermal->clk);
+
+	return error;
+}
+
+static int rockchip_thermal_remove(struct platform_device *pdev)
+{
+	struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) {
+		struct rockchip_thermal_sensor *sensor = &thermal->sensors[i];
+
+		rockchip_thermal_toggle_sensor(sensor, false);
+		thermal_zone_of_sensor_unregister(&pdev->dev, sensor->tzd);
+	}
+
+	thermal->chip->control(thermal->regs, false);
+
+	clk_disable_unprepare(thermal->pclk);
+	clk_disable_unprepare(thermal->clk);
+
+	return 0;
+}
+
+static int __maybe_unused rockchip_thermal_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+		rockchip_thermal_toggle_sensor(&thermal->sensors[i], false);
+
+	thermal->chip->control(thermal->regs, false);
+
+	clk_disable(thermal->pclk);
+	clk_disable(thermal->clk);
+
+	return 0;
+}
+
+static int __maybe_unused rockchip_thermal_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
+	int i;
+	int error;
+
+	error = clk_enable(thermal->clk);
+	if (error)
+		return error;
+
+	error = clk_enable(thermal->pclk);
+	if (error)
+		return error;
+
+	rockchip_thermal_reset_controller(thermal->reset);
+
+	thermal->chip->initialize(thermal->regs, thermal->tshut_polarity);
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) {
+		enum sensor_id id = thermal->sensors[i].id;
+
+		thermal->chip->set_tshut_mode(id, thermal->regs,
+					      thermal->tshut_mode);
+		thermal->chip->set_tshut_temp(id, thermal->regs,
+					      thermal->tshut_temp);
+	}
+
+	thermal->chip->control(thermal->regs, true);
+
+	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+		rockchip_thermal_toggle_sensor(&thermal->sensors[i], true);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rockchip_thermal_pm_ops,
+			 rockchip_thermal_suspend, rockchip_thermal_resume);
+
+static struct platform_driver rockchip_thermal_driver = {
+	.driver = {
+		.name = "rockchip-thermal",
+		.owner = THIS_MODULE,
+		.pm = &rockchip_thermal_pm_ops,
+		.of_match_table = of_rockchip_thermal_match,
+	},
+	.probe = rockchip_thermal_probe,
+	.remove = rockchip_thermal_remove,
+};
+
+module_platform_driver(rockchip_thermal_driver);
+
+MODULE_DESCRIPTION("ROCKCHIP THERMAL Driver");
+MODULE_AUTHOR("Rockchip, Inc.");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:rockchip-thermal");
diff --git a/drivers/thermal/samsung/exynos_thermal_common.h b/drivers/thermal/samsung/exynos_thermal_common.h
index 158f5aa..cd44719 100644
--- a/drivers/thermal/samsung/exynos_thermal_common.h
+++ b/drivers/thermal/samsung/exynos_thermal_common.h
@@ -27,7 +27,6 @@
 #define SENSOR_NAME_LEN	16
 #define MAX_TRIP_COUNT	8
 #define MAX_COOLING_DEVICE 4
-#define MAX_TRIMINFO_CTRL_REG	2
 
 #define ACTIVE_INTERVAL 500
 #define IDLE_INTERVAL 10000
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index 1e7d073..d44d91d 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -33,7 +33,87 @@
 
 #include "exynos_thermal_common.h"
 #include "exynos_tmu.h"
-#include "exynos_tmu_data.h"
+
+/* Exynos generic registers */
+#define EXYNOS_TMU_REG_TRIMINFO		0x0
+#define EXYNOS_TMU_REG_CONTROL		0x20
+#define EXYNOS_TMU_REG_STATUS		0x28
+#define EXYNOS_TMU_REG_CURRENT_TEMP	0x40
+#define EXYNOS_TMU_REG_INTEN		0x70
+#define EXYNOS_TMU_REG_INTSTAT		0x74
+#define EXYNOS_TMU_REG_INTCLEAR		0x78
+
+#define EXYNOS_TMU_TEMP_MASK		0xff
+#define EXYNOS_TMU_REF_VOLTAGE_SHIFT	24
+#define EXYNOS_TMU_REF_VOLTAGE_MASK	0x1f
+#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK	0xf
+#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT	8
+#define EXYNOS_TMU_CORE_EN_SHIFT	0
+
+/* Exynos3250 specific registers */
+#define EXYNOS_TMU_TRIMINFO_CON1	0x10
+
+/* Exynos4210 specific registers */
+#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP	0x44
+#define EXYNOS4210_TMU_REG_TRIG_LEVEL0	0x50
+
+/* Exynos5250, Exynos4412, Exynos3250 specific registers */
+#define EXYNOS_TMU_TRIMINFO_CON2	0x14
+#define EXYNOS_THD_TEMP_RISE		0x50
+#define EXYNOS_THD_TEMP_FALL		0x54
+#define EXYNOS_EMUL_CON		0x80
+
+#define EXYNOS_TRIMINFO_RELOAD_ENABLE	1
+#define EXYNOS_TRIMINFO_25_SHIFT	0
+#define EXYNOS_TRIMINFO_85_SHIFT	8
+#define EXYNOS_TMU_TRIP_MODE_SHIFT	13
+#define EXYNOS_TMU_TRIP_MODE_MASK	0x7
+#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT	12
+
+#define EXYNOS_TMU_INTEN_RISE0_SHIFT	0
+#define EXYNOS_TMU_INTEN_RISE1_SHIFT	4
+#define EXYNOS_TMU_INTEN_RISE2_SHIFT	8
+#define EXYNOS_TMU_INTEN_RISE3_SHIFT	12
+#define EXYNOS_TMU_INTEN_FALL0_SHIFT	16
+
+#define EXYNOS_EMUL_TIME	0x57F0
+#define EXYNOS_EMUL_TIME_MASK	0xffff
+#define EXYNOS_EMUL_TIME_SHIFT	16
+#define EXYNOS_EMUL_DATA_SHIFT	8
+#define EXYNOS_EMUL_DATA_MASK	0xFF
+#define EXYNOS_EMUL_ENABLE	0x1
+
+/* Exynos5260 specific */
+#define EXYNOS5260_TMU_REG_INTEN		0xC0
+#define EXYNOS5260_TMU_REG_INTSTAT		0xC4
+#define EXYNOS5260_TMU_REG_INTCLEAR		0xC8
+#define EXYNOS5260_EMUL_CON			0x100
+
+/* Exynos4412 specific */
+#define EXYNOS4412_MUX_ADDR_VALUE          6
+#define EXYNOS4412_MUX_ADDR_SHIFT          20
+
+/*exynos5440 specific registers*/
+#define EXYNOS5440_TMU_S0_7_TRIM		0x000
+#define EXYNOS5440_TMU_S0_7_CTRL		0x020
+#define EXYNOS5440_TMU_S0_7_DEBUG		0x040
+#define EXYNOS5440_TMU_S0_7_TEMP		0x0f0
+#define EXYNOS5440_TMU_S0_7_TH0			0x110
+#define EXYNOS5440_TMU_S0_7_TH1			0x130
+#define EXYNOS5440_TMU_S0_7_TH2			0x150
+#define EXYNOS5440_TMU_S0_7_IRQEN		0x210
+#define EXYNOS5440_TMU_S0_7_IRQ			0x230
+/* exynos5440 common registers */
+#define EXYNOS5440_TMU_IRQ_STATUS		0x000
+#define EXYNOS5440_TMU_PMIN			0x004
+
+#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT	0
+#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT	1
+#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT	2
+#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT	3
+#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT	4
+#define EXYNOS5440_TMU_TH_RISE4_SHIFT		24
+#define EXYNOS5440_EFUSE_SWAP_OFFSET		8
 
 /**
  * struct exynos_tmu_data : A structure to hold the private data of the TMU
@@ -52,6 +132,11 @@
  * @temp_error2: fused value of the second point trim.
  * @regulator: pointer to the TMU regulator structure.
  * @reg_conf: pointer to structure to register with core thermal.
+ * @tmu_initialize: SoC specific TMU initialization method
+ * @tmu_control: SoC specific TMU control method
+ * @tmu_read: SoC specific TMU temperature read method
+ * @tmu_set_emulation: SoC specific TMU emulation setting method
+ * @tmu_clear_irqs: SoC specific TMU interrupts clearing method
  */
 struct exynos_tmu_data {
 	int id;
@@ -66,6 +151,12 @@ struct exynos_tmu_data {
 	u8 temp_error1, temp_error2;
 	struct regulator *regulator;
 	struct thermal_sensor_conf *reg_conf;
+	int (*tmu_initialize)(struct platform_device *pdev);
+	void (*tmu_control)(struct platform_device *pdev, bool on);
+	int (*tmu_read)(struct exynos_tmu_data *data);
+	void (*tmu_set_emulation)(struct exynos_tmu_data *data,
+				  unsigned long temp);
+	void (*tmu_clear_irqs)(struct exynos_tmu_data *data);
 };
 
 /*
@@ -122,83 +213,10 @@ static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code)
 	return temp;
 }
 
-static void exynos_tmu_clear_irqs(struct exynos_tmu_data *data)
-{
-	const struct exynos_tmu_registers *reg = data->pdata->registers;
-	unsigned int val_irq;
-
-	val_irq = readl(data->base + reg->tmu_intstat);
-	/*
-	 * Clear the interrupts.  Please note that the documentation for
-	 * Exynos3250, Exynos4412, Exynos5250 and Exynos5260 incorrectly
-	 * states that INTCLEAR register has a different placing of bits
-	 * responsible for FALL IRQs than INTSTAT register.  Exynos5420
-	 * and Exynos5440 documentation is correct (Exynos4210 doesn't
-	 * support FALL IRQs at all).
-	 */
-	writel(val_irq, data->base + reg->tmu_intclear);
-}
-
-static int exynos_tmu_initialize(struct platform_device *pdev)
+static void sanitize_temp_error(struct exynos_tmu_data *data, u32 trim_info)
 {
-	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
-	unsigned int status, trim_info = 0, con, ctrl;
-	unsigned int rising_threshold = 0, falling_threshold = 0;
-	int ret = 0, threshold_code, i;
-
-	mutex_lock(&data->lock);
-	clk_enable(data->clk);
-	if (!IS_ERR(data->clk_sec))
-		clk_enable(data->clk_sec);
 
-	if (TMU_SUPPORTS(pdata, READY_STATUS)) {
-		status = readb(data->base + reg->tmu_status);
-		if (!status) {
-			ret = -EBUSY;
-			goto out;
-		}
-	}
-
-	if (TMU_SUPPORTS(pdata, TRIM_RELOAD)) {
-		for (i = 0; i < reg->triminfo_ctrl_count; i++) {
-			if (pdata->triminfo_reload[i]) {
-				ctrl = readl(data->base +
-						reg->triminfo_ctrl[i]);
-				ctrl |= pdata->triminfo_reload[i];
-				writel(ctrl, data->base +
-						reg->triminfo_ctrl[i]);
-			}
-		}
-	}
-
-	/* Save trimming info in order to perform calibration */
-	if (data->soc == SOC_ARCH_EXYNOS5440) {
-		/*
-		 * For exynos5440 soc triminfo value is swapped between TMU0 and
-		 * TMU2, so the below logic is needed.
-		 */
-		switch (data->id) {
-		case 0:
-			trim_info = readl(data->base +
-			EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data);
-			break;
-		case 1:
-			trim_info = readl(data->base + reg->triminfo_data);
-			break;
-		case 2:
-			trim_info = readl(data->base -
-			EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data);
-		}
-	} else {
-		/* On exynos5420 the triminfo register is in the shared space */
-		if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO)
-			trim_info = readl(data->base_second +
-							reg->triminfo_data);
-		else
-			trim_info = readl(data->base + reg->triminfo_data);
-	}
 	data->temp_error1 = trim_info & EXYNOS_TMU_TEMP_MASK;
 	data->temp_error2 = ((trim_info >> EXYNOS_TRIMINFO_85_SHIFT) &
 				EXYNOS_TMU_TEMP_MASK);
@@ -212,69 +230,37 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 		data->temp_error2 =
 			(pdata->efuse_value >> EXYNOS_TRIMINFO_85_SHIFT) &
 			EXYNOS_TMU_TEMP_MASK;
+}
 
-	rising_threshold = readl(data->base + reg->threshold_th0);
+static u32 get_th_reg(struct exynos_tmu_data *data, u32 threshold, bool falling)
+{
+	struct exynos_tmu_platform_data *pdata = data->pdata;
+	int i;
 
-	if (data->soc == SOC_ARCH_EXYNOS4210) {
-		/* Write temperature code for threshold */
-		threshold_code = temp_to_code(data, pdata->threshold);
-		writeb(threshold_code,
-			data->base + reg->threshold_temp);
-		for (i = 0; i < pdata->non_hw_trigger_levels; i++)
-			writeb(pdata->trigger_levels[i], data->base +
-			reg->threshold_th0 + i * sizeof(reg->threshold_th0));
+	for (i = 0; i < pdata->non_hw_trigger_levels; i++) {
+		u8 temp = pdata->trigger_levels[i];
 
-		exynos_tmu_clear_irqs(data);
-	} else {
-		/* Write temperature code for rising and falling threshold */
-		for (i = 0; i < pdata->non_hw_trigger_levels; i++) {
-			threshold_code = temp_to_code(data,
-						pdata->trigger_levels[i]);
-			rising_threshold &= ~(0xff << 8 * i);
-			rising_threshold |= threshold_code << 8 * i;
-			if (pdata->threshold_falling) {
-				threshold_code = temp_to_code(data,
-						pdata->trigger_levels[i] -
-						pdata->threshold_falling);
-				falling_threshold |= threshold_code << 8 * i;
-			}
-		}
+		if (falling)
+			temp -= pdata->threshold_falling;
+		else
+			threshold &= ~(0xff << 8 * i);
 
-		writel(rising_threshold,
-				data->base + reg->threshold_th0);
-		writel(falling_threshold,
-				data->base + reg->threshold_th1);
-
-		exynos_tmu_clear_irqs(data);
-
-		/* if last threshold limit is also present */
-		i = pdata->max_trigger_level - 1;
-		if (pdata->trigger_levels[i] &&
-				(pdata->trigger_type[i] == HW_TRIP)) {
-			threshold_code = temp_to_code(data,
-						pdata->trigger_levels[i]);
-			if (i == EXYNOS_MAX_TRIGGER_PER_REG - 1) {
-				/* 1-4 level to be assigned in th0 reg */
-				rising_threshold &= ~(0xff << 8 * i);
-				rising_threshold |= threshold_code << 8 * i;
-				writel(rising_threshold,
-					data->base + reg->threshold_th0);
-			} else if (i == EXYNOS_MAX_TRIGGER_PER_REG) {
-				/* 5th level to be assigned in th2 reg */
-				rising_threshold =
-				threshold_code << reg->threshold_th3_l0_shift;
-				writel(rising_threshold,
-					data->base + reg->threshold_th2);
-			}
-			con = readl(data->base + reg->tmu_ctrl);
-			con |= (1 << reg->therm_trip_en_shift);
-			writel(con, data->base + reg->tmu_ctrl);
-		}
+		threshold |= temp_to_code(data, temp) << 8 * i;
 	}
-	/*Clear the PMIN in the common TMU register*/
-	if (reg->tmu_pmin && !data->id)
-		writel(0, data->base_second + reg->tmu_pmin);
-out:
+
+	return threshold;
+}
+
+static int exynos_tmu_initialize(struct platform_device *pdev)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	int ret;
+
+	mutex_lock(&data->lock);
+	clk_enable(data->clk);
+	if (!IS_ERR(data->clk_sec))
+		clk_enable(data->clk_sec);
+	ret = data->tmu_initialize(pdev);
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
 	if (!IS_ERR(data->clk_sec))
@@ -283,20 +269,13 @@ out:
 	return ret;
 }
 
-static void exynos_tmu_control(struct platform_device *pdev, bool on)
+static u32 get_con_reg(struct exynos_tmu_data *data, u32 con)
 {
-	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
-	unsigned int con, interrupt_en;
 
-	mutex_lock(&data->lock);
-	clk_enable(data->clk);
-
-	con = readl(data->base + reg->tmu_ctrl);
-
-	if (pdata->test_mux)
-		con |= (pdata->test_mux << reg->test_mux_addr_shift);
+	if (data->soc == SOC_ARCH_EXYNOS4412 ||
+	    data->soc == SOC_ARCH_EXYNOS3250)
+		con |= (EXYNOS4412_MUX_ADDR_VALUE << EXYNOS4412_MUX_ADDR_SHIFT);
 
 	con &= ~(EXYNOS_TMU_REF_VOLTAGE_MASK << EXYNOS_TMU_REF_VOLTAGE_SHIFT);
 	con |= pdata->reference_voltage << EXYNOS_TMU_REF_VOLTAGE_SHIFT;
@@ -305,95 +284,287 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on)
 	con |= (pdata->gain << EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT);
 
 	if (pdata->noise_cancel_mode) {
-		con &= ~(reg->therm_trip_mode_mask <<
-					reg->therm_trip_mode_shift);
-		con |= (pdata->noise_cancel_mode << reg->therm_trip_mode_shift);
+		con &= ~(EXYNOS_TMU_TRIP_MODE_MASK << EXYNOS_TMU_TRIP_MODE_SHIFT);
+		con |= (pdata->noise_cancel_mode << EXYNOS_TMU_TRIP_MODE_SHIFT);
 	}
 
-	if (on) {
-		con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT);
-		interrupt_en =
-			pdata->trigger_enable[3] << reg->inten_rise3_shift |
-			pdata->trigger_enable[2] << reg->inten_rise2_shift |
-			pdata->trigger_enable[1] << reg->inten_rise1_shift |
-			pdata->trigger_enable[0] << reg->inten_rise0_shift;
-		if (TMU_SUPPORTS(pdata, FALLING_TRIP))
-			interrupt_en |=
-				interrupt_en << reg->inten_fall0_shift;
-	} else {
-		con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT);
-		interrupt_en = 0; /* Disable all interrupts */
-	}
-	writel(interrupt_en, data->base + reg->tmu_inten);
-	writel(con, data->base + reg->tmu_ctrl);
+	return con;
+}
+
+static void exynos_tmu_control(struct platform_device *pdev, bool on)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 
+	mutex_lock(&data->lock);
+	clk_enable(data->clk);
+	data->tmu_control(pdev, on);
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
 }
 
-static int exynos_tmu_read(struct exynos_tmu_data *data)
+static int exynos4210_tmu_initialize(struct platform_device *pdev)
 {
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
-	u8 temp_code;
-	int temp;
+	unsigned int status;
+	int ret = 0, threshold_code, i;
 
-	mutex_lock(&data->lock);
-	clk_enable(data->clk);
+	status = readb(data->base + EXYNOS_TMU_REG_STATUS);
+	if (!status) {
+		ret = -EBUSY;
+		goto out;
+	}
 
-	temp_code = readb(data->base + reg->tmu_cur_temp);
+	sanitize_temp_error(data, readl(data->base + EXYNOS_TMU_REG_TRIMINFO));
 
-	if (data->soc == SOC_ARCH_EXYNOS4210)
-		/* temp_code should range between 75 and 175 */
-		if (temp_code < 75 || temp_code > 175) {
-			temp = -ENODATA;
-			goto out;
+	/* Write temperature code for threshold */
+	threshold_code = temp_to_code(data, pdata->threshold);
+	writeb(threshold_code, data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP);
+
+	for (i = 0; i < pdata->non_hw_trigger_levels; i++)
+		writeb(pdata->trigger_levels[i], data->base +
+		       EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4);
+
+	data->tmu_clear_irqs(data);
+out:
+	return ret;
+}
+
+static int exynos4412_tmu_initialize(struct platform_device *pdev)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	struct exynos_tmu_platform_data *pdata = data->pdata;
+	unsigned int status, trim_info, con, ctrl, rising_threshold;
+	int ret = 0, threshold_code, i;
+
+	status = readb(data->base + EXYNOS_TMU_REG_STATUS);
+	if (!status) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (data->soc == SOC_ARCH_EXYNOS3250 ||
+	    data->soc == SOC_ARCH_EXYNOS4412 ||
+	    data->soc == SOC_ARCH_EXYNOS5250) {
+		if (data->soc == SOC_ARCH_EXYNOS3250) {
+			ctrl = readl(data->base + EXYNOS_TMU_TRIMINFO_CON1);
+			ctrl |= EXYNOS_TRIMINFO_RELOAD_ENABLE;
+			writel(ctrl, data->base + EXYNOS_TMU_TRIMINFO_CON1);
 		}
+		ctrl = readl(data->base + EXYNOS_TMU_TRIMINFO_CON2);
+		ctrl |= EXYNOS_TRIMINFO_RELOAD_ENABLE;
+		writel(ctrl, data->base + EXYNOS_TMU_TRIMINFO_CON2);
+	}
 
-	temp = code_to_temp(data, temp_code);
+	/* On exynos5420 the triminfo register is in the shared space */
+	if (data->soc == SOC_ARCH_EXYNOS5420_TRIMINFO)
+		trim_info = readl(data->base_second + EXYNOS_TMU_REG_TRIMINFO);
+	else
+		trim_info = readl(data->base + EXYNOS_TMU_REG_TRIMINFO);
+
+	sanitize_temp_error(data, trim_info);
+
+	/* Write temperature code for rising and falling threshold */
+	rising_threshold = readl(data->base + EXYNOS_THD_TEMP_RISE);
+	rising_threshold = get_th_reg(data, rising_threshold, false);
+	writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE);
+	writel(get_th_reg(data, 0, true), data->base + EXYNOS_THD_TEMP_FALL);
+
+	data->tmu_clear_irqs(data);
+
+	/* if last threshold limit is also present */
+	i = pdata->max_trigger_level - 1;
+	if (pdata->trigger_levels[i] && pdata->trigger_type[i] == HW_TRIP) {
+		threshold_code = temp_to_code(data, pdata->trigger_levels[i]);
+		/* 1-4 level to be assigned in th0 reg */
+		rising_threshold &= ~(0xff << 8 * i);
+		rising_threshold |= threshold_code << 8 * i;
+		writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE);
+		con = readl(data->base + EXYNOS_TMU_REG_CONTROL);
+		con |= (1 << EXYNOS_TMU_THERM_TRIP_EN_SHIFT);
+		writel(con, data->base + EXYNOS_TMU_REG_CONTROL);
+	}
 out:
-	clk_disable(data->clk);
-	mutex_unlock(&data->lock);
+	return ret;
+}
 
-	return temp;
+static int exynos5440_tmu_initialize(struct platform_device *pdev)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	struct exynos_tmu_platform_data *pdata = data->pdata;
+	unsigned int trim_info = 0, con, rising_threshold;
+	int ret = 0, threshold_code, i;
+
+	/*
+	 * For exynos5440 soc triminfo value is swapped between TMU0 and
+	 * TMU2, so the below logic is needed.
+	 */
+	switch (data->id) {
+	case 0:
+		trim_info = readl(data->base + EXYNOS5440_EFUSE_SWAP_OFFSET +
+				 EXYNOS5440_TMU_S0_7_TRIM);
+		break;
+	case 1:
+		trim_info = readl(data->base + EXYNOS5440_TMU_S0_7_TRIM);
+		break;
+	case 2:
+		trim_info = readl(data->base - EXYNOS5440_EFUSE_SWAP_OFFSET +
+				  EXYNOS5440_TMU_S0_7_TRIM);
+	}
+	sanitize_temp_error(data, trim_info);
+
+	/* Write temperature code for rising and falling threshold */
+	rising_threshold = readl(data->base + EXYNOS5440_TMU_S0_7_TH0);
+	rising_threshold = get_th_reg(data, rising_threshold, false);
+	writel(rising_threshold, data->base + EXYNOS5440_TMU_S0_7_TH0);
+	writel(0, data->base + EXYNOS5440_TMU_S0_7_TH1);
+
+	data->tmu_clear_irqs(data);
+
+	/* if last threshold limit is also present */
+	i = pdata->max_trigger_level - 1;
+	if (pdata->trigger_levels[i] && pdata->trigger_type[i] == HW_TRIP) {
+		threshold_code = temp_to_code(data, pdata->trigger_levels[i]);
+		/* 5th level to be assigned in th2 reg */
+		rising_threshold =
+			threshold_code << EXYNOS5440_TMU_TH_RISE4_SHIFT;
+		writel(rising_threshold, data->base + EXYNOS5440_TMU_S0_7_TH2);
+		con = readl(data->base + EXYNOS5440_TMU_S0_7_CTRL);
+		con |= (1 << EXYNOS_TMU_THERM_TRIP_EN_SHIFT);
+		writel(con, data->base + EXYNOS5440_TMU_S0_7_CTRL);
+	}
+	/* Clear the PMIN in the common TMU register */
+	if (!data->id)
+		writel(0, data->base_second + EXYNOS5440_TMU_PMIN);
+	return ret;
 }
 
-#ifdef CONFIG_THERMAL_EMULATION
-static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp)
+static void exynos4210_tmu_control(struct platform_device *pdev, bool on)
 {
-	struct exynos_tmu_data *data = drv_data;
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
-	unsigned int val;
-	int ret = -EINVAL;
+	unsigned int con, interrupt_en;
 
-	if (!TMU_SUPPORTS(pdata, EMULATION))
-		goto out;
+	con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL));
 
-	if (temp && temp < MCELSIUS)
-		goto out;
+	if (on) {
+		con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT);
+		interrupt_en =
+			pdata->trigger_enable[3] << EXYNOS_TMU_INTEN_RISE3_SHIFT |
+			pdata->trigger_enable[2] << EXYNOS_TMU_INTEN_RISE2_SHIFT |
+			pdata->trigger_enable[1] << EXYNOS_TMU_INTEN_RISE1_SHIFT |
+			pdata->trigger_enable[0] << EXYNOS_TMU_INTEN_RISE0_SHIFT;
+		if (data->soc != SOC_ARCH_EXYNOS4210)
+			interrupt_en |=
+				interrupt_en << EXYNOS_TMU_INTEN_FALL0_SHIFT;
+	} else {
+		con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT);
+		interrupt_en = 0; /* Disable all interrupts */
+	}
+	writel(interrupt_en, data->base + EXYNOS_TMU_REG_INTEN);
+	writel(con, data->base + EXYNOS_TMU_REG_CONTROL);
+}
+
+static void exynos5440_tmu_control(struct platform_device *pdev, bool on)
+{
+	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+	struct exynos_tmu_platform_data *pdata = data->pdata;
+	unsigned int con, interrupt_en;
+
+	con = get_con_reg(data, readl(data->base + EXYNOS5440_TMU_S0_7_CTRL));
+
+	if (on) {
+		con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT);
+		interrupt_en =
+			pdata->trigger_enable[3] << EXYNOS5440_TMU_INTEN_RISE3_SHIFT |
+			pdata->trigger_enable[2] << EXYNOS5440_TMU_INTEN_RISE2_SHIFT |
+			pdata->trigger_enable[1] << EXYNOS5440_TMU_INTEN_RISE1_SHIFT |
+			pdata->trigger_enable[0] << EXYNOS5440_TMU_INTEN_RISE0_SHIFT;
+		interrupt_en |= interrupt_en << EXYNOS5440_TMU_INTEN_FALL0_SHIFT;
+	} else {
+		con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT);
+		interrupt_en = 0; /* Disable all interrupts */
+	}
+	writel(interrupt_en, data->base + EXYNOS5440_TMU_S0_7_IRQEN);
+	writel(con, data->base + EXYNOS5440_TMU_S0_7_CTRL);
+}
+
+static int exynos_tmu_read(struct exynos_tmu_data *data)
+{
+	int ret;
 
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
+	ret = data->tmu_read(data);
+	if (ret >= 0)
+		ret = code_to_temp(data, ret);
+	clk_disable(data->clk);
+	mutex_unlock(&data->lock);
 
-	val = readl(data->base + reg->emul_con);
+	return ret;
+}
 
+#ifdef CONFIG_THERMAL_EMULATION
+static u32 get_emul_con_reg(struct exynos_tmu_data *data, unsigned int val,
+			    unsigned long temp)
+{
 	if (temp) {
 		temp /= MCELSIUS;
 
-		if (TMU_SUPPORTS(pdata, EMUL_TIME)) {
-			val &= ~(EXYNOS_EMUL_TIME_MASK << reg->emul_time_shift);
-			val |= (EXYNOS_EMUL_TIME << reg->emul_time_shift);
+		if (data->soc != SOC_ARCH_EXYNOS5440) {
+			val &= ~(EXYNOS_EMUL_TIME_MASK << EXYNOS_EMUL_TIME_SHIFT);
+			val |= (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT);
 		}
-		val &= ~(EXYNOS_EMUL_DATA_MASK << reg->emul_temp_shift);
-		val |= (temp_to_code(data, temp) << reg->emul_temp_shift) |
+		val &= ~(EXYNOS_EMUL_DATA_MASK << EXYNOS_EMUL_DATA_SHIFT);
+		val |= (temp_to_code(data, temp) << EXYNOS_EMUL_DATA_SHIFT) |
 			EXYNOS_EMUL_ENABLE;
 	} else {
 		val &= ~EXYNOS_EMUL_ENABLE;
 	}
 
-	writel(val, data->base + reg->emul_con);
+	return val;
+}
+
+static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data,
+					 unsigned long temp)
+{
+	unsigned int val;
+	u32 emul_con;
+
+	if (data->soc == SOC_ARCH_EXYNOS5260)
+		emul_con = EXYNOS5260_EMUL_CON;
+	else
+		emul_con = EXYNOS_EMUL_CON;
+
+	val = readl(data->base + emul_con);
+	val = get_emul_con_reg(data, val, temp);
+	writel(val, data->base + emul_con);
+}
+
+static void exynos5440_tmu_set_emulation(struct exynos_tmu_data *data,
+					 unsigned long temp)
+{
+	unsigned int val;
+
+	val = readl(data->base + EXYNOS5440_TMU_S0_7_DEBUG);
+	val = get_emul_con_reg(data, val, temp);
+	writel(val, data->base + EXYNOS5440_TMU_S0_7_DEBUG);
+}
+
+static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp)
+{
+	struct exynos_tmu_data *data = drv_data;
+	int ret = -EINVAL;
+
+	if (data->soc == SOC_ARCH_EXYNOS4210)
+		goto out;
 
+	if (temp && temp < MCELSIUS)
+		goto out;
+
+	mutex_lock(&data->lock);
+	clk_enable(data->clk);
+	data->tmu_set_emulation(data, temp);
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
 	return 0;
@@ -401,23 +572,41 @@ out:
 	return ret;
 }
 #else
+#define exynos4412_tmu_set_emulation NULL
+#define exynos5440_tmu_set_emulation NULL
 static int exynos_tmu_set_emulation(void *drv_data,	unsigned long temp)
 	{ return -EINVAL; }
 #endif/*CONFIG_THERMAL_EMULATION*/
 
+static int exynos4210_tmu_read(struct exynos_tmu_data *data)
+{
+	int ret = readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP);
+
+	/* "temp_code" should range between 75 and 175 */
+	return (ret < 75 || ret > 175) ? -ENODATA : ret;
+}
+
+static int exynos4412_tmu_read(struct exynos_tmu_data *data)
+{
+	return readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP);
+}
+
+static int exynos5440_tmu_read(struct exynos_tmu_data *data)
+{
+	return readb(data->base + EXYNOS5440_TMU_S0_7_TEMP);
+}
+
 static void exynos_tmu_work(struct work_struct *work)
 {
 	struct exynos_tmu_data *data = container_of(work,
 			struct exynos_tmu_data, irq_work);
-	struct exynos_tmu_platform_data *pdata = data->pdata;
-	const struct exynos_tmu_registers *reg = pdata->registers;
 	unsigned int val_type;
 
 	if (!IS_ERR(data->clk_sec))
 		clk_enable(data->clk_sec);
 	/* Find which sensor generated this interrupt */
-	if (reg->tmu_irqstatus) {
-		val_type = readl(data->base_second + reg->tmu_irqstatus);
+	if (data->soc == SOC_ARCH_EXYNOS5440) {
+		val_type = readl(data->base_second + EXYNOS5440_TMU_IRQ_STATUS);
 		if (!((val_type >> data->id) & 0x1))
 			goto out;
 	}
@@ -429,7 +618,7 @@ static void exynos_tmu_work(struct work_struct *work)
 	clk_enable(data->clk);
 
 	/* TODO: take action based on particular interrupt */
-	exynos_tmu_clear_irqs(data);
+	data->tmu_clear_irqs(data);
 
 	clk_disable(data->clk);
 	mutex_unlock(&data->lock);
@@ -437,6 +626,40 @@ out:
 	enable_irq(data->irq);
 }
 
+static void exynos4210_tmu_clear_irqs(struct exynos_tmu_data *data)
+{
+	unsigned int val_irq;
+	u32 tmu_intstat, tmu_intclear;
+
+	if (data->soc == SOC_ARCH_EXYNOS5260) {
+		tmu_intstat = EXYNOS5260_TMU_REG_INTSTAT;
+		tmu_intclear = EXYNOS5260_TMU_REG_INTCLEAR;
+	} else {
+		tmu_intstat = EXYNOS_TMU_REG_INTSTAT;
+		tmu_intclear = EXYNOS_TMU_REG_INTCLEAR;
+	}
+
+	val_irq = readl(data->base + tmu_intstat);
+	/*
+	 * Clear the interrupts.  Please note that the documentation for
+	 * Exynos3250, Exynos4412, Exynos5250 and Exynos5260 incorrectly
+	 * states that INTCLEAR register has a different placing of bits
+	 * responsible for FALL IRQs than INTSTAT register.  Exynos5420
+	 * and Exynos5440 documentation is correct (Exynos4210 doesn't
+	 * support FALL IRQs at all).
+	 */
+	writel(val_irq, data->base + tmu_intclear);
+}
+
+static void exynos5440_tmu_clear_irqs(struct exynos_tmu_data *data)
+{
+	unsigned int val_irq;
+
+	val_irq = readl(data->base + EXYNOS5440_TMU_S0_7_IRQ);
+	/* clear the interrupts */
+	writel(val_irq, data->base + EXYNOS5440_TMU_S0_7_IRQ);
+}
+
 static irqreturn_t exynos_tmu_irq(int irq, void *id)
 {
 	struct exynos_tmu_data *data = id;
@@ -450,35 +673,35 @@ static irqreturn_t exynos_tmu_irq(int irq, void *id)
 static const struct of_device_id exynos_tmu_match[] = {
 	{
 		.compatible = "samsung,exynos3250-tmu",
-		.data = (void *)EXYNOS3250_TMU_DRV_DATA,
+		.data = &exynos3250_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos4210-tmu",
-		.data = (void *)EXYNOS4210_TMU_DRV_DATA,
+		.data = &exynos4210_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos4412-tmu",
-		.data = (void *)EXYNOS4412_TMU_DRV_DATA,
+		.data = &exynos4412_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5250-tmu",
-		.data = (void *)EXYNOS5250_TMU_DRV_DATA,
+		.data = &exynos5250_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5260-tmu",
-		.data = (void *)EXYNOS5260_TMU_DRV_DATA,
+		.data = &exynos5260_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5420-tmu",
-		.data = (void *)EXYNOS5420_TMU_DRV_DATA,
+		.data = &exynos5420_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5420-tmu-ext-triminfo",
-		.data = (void *)EXYNOS5420_TMU_DRV_DATA,
+		.data = &exynos5420_default_tmu_data,
 	},
 	{
 		.compatible = "samsung,exynos5440-tmu",
-		.data = (void *)EXYNOS5440_TMU_DRV_DATA,
+		.data = &exynos5440_default_tmu_data,
 	},
 	{},
 };
@@ -553,12 +776,47 @@ static int exynos_map_dt_data(struct platform_device *pdev)
 		dev_err(&pdev->dev, "No platform init data supplied.\n");
 		return -ENODEV;
 	}
+
 	data->pdata = pdata;
+	data->soc = pdata->type;
+
+	switch (data->soc) {
+	case SOC_ARCH_EXYNOS4210:
+		data->tmu_initialize = exynos4210_tmu_initialize;
+		data->tmu_control = exynos4210_tmu_control;
+		data->tmu_read = exynos4210_tmu_read;
+		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		break;
+	case SOC_ARCH_EXYNOS3250:
+	case SOC_ARCH_EXYNOS4412:
+	case SOC_ARCH_EXYNOS5250:
+	case SOC_ARCH_EXYNOS5260:
+	case SOC_ARCH_EXYNOS5420:
+	case SOC_ARCH_EXYNOS5420_TRIMINFO:
+		data->tmu_initialize = exynos4412_tmu_initialize;
+		data->tmu_control = exynos4210_tmu_control;
+		data->tmu_read = exynos4412_tmu_read;
+		data->tmu_set_emulation = exynos4412_tmu_set_emulation;
+		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		break;
+	case SOC_ARCH_EXYNOS5440:
+		data->tmu_initialize = exynos5440_tmu_initialize;
+		data->tmu_control = exynos5440_tmu_control;
+		data->tmu_read = exynos5440_tmu_read;
+		data->tmu_set_emulation = exynos5440_tmu_set_emulation;
+		data->tmu_clear_irqs = exynos5440_tmu_clear_irqs;
+		break;
+	default:
+		dev_err(&pdev->dev, "Platform not supported\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * Check if the TMU shares some registers and then try to map the
 	 * memory of common registers.
 	 */
-	if (!TMU_SUPPORTS(pdata, ADDRESS_MULTIPLE))
+	if (data->soc != SOC_ARCH_EXYNOS5420_TRIMINFO &&
+	    data->soc != SOC_ARCH_EXYNOS5440)
 		return 0;
 
 	if (of_address_to_resource(pdev->dev.of_node, 1, &res)) {
@@ -625,20 +883,6 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 		goto err_clk_sec;
 	}
 
-	if (pdata->type == SOC_ARCH_EXYNOS3250 ||
-	    pdata->type == SOC_ARCH_EXYNOS4210 ||
-	    pdata->type == SOC_ARCH_EXYNOS4412 ||
-	    pdata->type == SOC_ARCH_EXYNOS5250 ||
-	    pdata->type == SOC_ARCH_EXYNOS5260 ||
-	    pdata->type == SOC_ARCH_EXYNOS5420_TRIMINFO ||
-	    pdata->type == SOC_ARCH_EXYNOS5440)
-		data->soc = pdata->type;
-	else {
-		ret = -EINVAL;
-		dev_err(&pdev->dev, "Platform not supported\n");
-		goto err_clk;
-	}
-
 	ret = exynos_tmu_initialize(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to initialize TMU\n");
diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h
index c58c766..da3009b 100644
--- a/drivers/thermal/samsung/exynos_tmu.h
+++ b/drivers/thermal/samsung/exynos_tmu.h
@@ -40,115 +40,12 @@ enum soc_type {
 	SOC_ARCH_EXYNOS4412,
 	SOC_ARCH_EXYNOS5250,
 	SOC_ARCH_EXYNOS5260,
+	SOC_ARCH_EXYNOS5420,
 	SOC_ARCH_EXYNOS5420_TRIMINFO,
 	SOC_ARCH_EXYNOS5440,
 };
 
 /**
- * EXYNOS TMU supported features.
- * TMU_SUPPORT_EMULATION - This features is used to set user defined
- *			temperature to the TMU controller.
- * TMU_SUPPORT_MULTI_INST - This features denotes that the soc
- *			has many instances of TMU.
- * TMU_SUPPORT_TRIM_RELOAD - This features shows that trimming can
- *			be reloaded.
- * TMU_SUPPORT_FALLING_TRIP - This features shows that interrupt can
- *			be registered for falling trips also.
- * TMU_SUPPORT_READY_STATUS - This feature tells that the TMU current
- *			state(active/idle) can be checked.
- * TMU_SUPPORT_EMUL_TIME - This features allows to set next temp emulation
- *			sample time.
- * TMU_SUPPORT_ADDRESS_MULTIPLE - This feature tells that the different TMU
- *			sensors shares some common registers.
- * TMU_SUPPORT - macro to compare the above features with the supplied.
- */
-#define TMU_SUPPORT_EMULATION			BIT(0)
-#define TMU_SUPPORT_MULTI_INST			BIT(1)
-#define TMU_SUPPORT_TRIM_RELOAD			BIT(2)
-#define TMU_SUPPORT_FALLING_TRIP		BIT(3)
-#define TMU_SUPPORT_READY_STATUS		BIT(4)
-#define TMU_SUPPORT_EMUL_TIME			BIT(5)
-#define TMU_SUPPORT_ADDRESS_MULTIPLE		BIT(6)
-
-#define TMU_SUPPORTS(a, b)	(a->features & TMU_SUPPORT_ ## b)
-
-/**
- * struct exynos_tmu_register - register descriptors to access registers and
- * bitfields. The register validity, offsets and bitfield values may vary
- * slightly across different exynos SOC's.
- * @triminfo_data: register containing 2 pont trimming data
- * @triminfo_ctrl: trim info controller register.
- * @triminfo_ctrl_count: the number of trim info controller register.
- * @tmu_ctrl: TMU main controller register.
- * @test_mux_addr_shift: shift bits of test mux address.
- * @therm_trip_mode_shift: shift bits of tripping mode in tmu_ctrl register.
- * @therm_trip_mode_mask: mask bits of tripping mode in tmu_ctrl register.
- * @therm_trip_en_shift: shift bits of tripping enable in tmu_ctrl register.
- * @tmu_status: register drescribing the TMU status.
- * @tmu_cur_temp: register containing the current temperature of the TMU.
- * @threshold_temp: register containing the base threshold level.
- * @threshold_th0: Register containing first set of rising levels.
- * @threshold_th1: Register containing second set of rising levels.
- * @threshold_th2: Register containing third set of rising levels.
- * @threshold_th3_l0_shift: shift bits of level0 threshold temperature.
- * @tmu_inten: register containing the different threshold interrupt
-	enable bits.
- * @inten_rise0_shift: shift bits of rising 0 interrupt bits.
- * @inten_rise1_shift: shift bits of rising 1 interrupt bits.
- * @inten_rise2_shift: shift bits of rising 2 interrupt bits.
- * @inten_rise3_shift: shift bits of rising 3 interrupt bits.
- * @inten_fall0_shift: shift bits of falling 0 interrupt bits.
- * @tmu_intstat: Register containing the interrupt status values.
- * @tmu_intclear: Register for clearing the raised interrupt status.
- * @emul_con: TMU emulation controller register.
- * @emul_temp_shift: shift bits of emulation temperature.
- * @emul_time_shift: shift bits of emulation time.
- * @tmu_irqstatus: register to find which TMU generated interrupts.
- * @tmu_pmin: register to get/set the Pmin value.
- */
-struct exynos_tmu_registers {
-	u32	triminfo_data;
-
-	u32	triminfo_ctrl[MAX_TRIMINFO_CTRL_REG];
-	u32	triminfo_ctrl_count;
-
-	u32	tmu_ctrl;
-	u32     test_mux_addr_shift;
-	u32	therm_trip_mode_shift;
-	u32	therm_trip_mode_mask;
-	u32	therm_trip_en_shift;
-
-	u32	tmu_status;
-
-	u32	tmu_cur_temp;
-
-	u32	threshold_temp;
-
-	u32	threshold_th0;
-	u32	threshold_th1;
-	u32	threshold_th2;
-	u32	threshold_th3_l0_shift;
-
-	u32	tmu_inten;
-	u32	inten_rise0_shift;
-	u32	inten_rise1_shift;
-	u32	inten_rise2_shift;
-	u32	inten_rise3_shift;
-	u32	inten_fall0_shift;
-
-	u32	tmu_intstat;
-
-	u32	tmu_intclear;
-
-	u32	emul_con;
-	u32	emul_temp_shift;
-	u32	emul_time_shift;
-
-	u32	tmu_irqstatus;
-	u32	tmu_pmin;
-};
-
-/**
  * struct exynos_tmu_platform_data
  * @threshold: basic temperature for generating interrupt
  *	       25 <= threshold <= 125 [unit: degree Celsius]
@@ -192,16 +89,10 @@ struct exynos_tmu_registers {
  * @first_point_trim: temp value of the first point trimming
  * @second_point_trim: temp value of the second point trimming
  * @default_temp_offset: default temperature offset in case of no trimming
- * @test_mux; information if SoC supports test MUX
- * @triminfo_reload: reload value to read TRIMINFO register
  * @cal_type: calibration type for temperature
  * @freq_clip_table: Table representing frequency reduction percentage.
  * @freq_tab_count: Count of the above table as frequency reduction may
  *	applicable to only some of the trigger levels.
- * @registers: Pointer to structure containing all the TMU controller registers
- *	and bitfields shifts and masks.
- * @features: a bitfield value indicating the features supported in SOC like
- *	emulation, multi instance etc
  *
  * This structure is required for configuration of exynos_tmu driver.
  */
@@ -223,15 +114,11 @@ struct exynos_tmu_platform_data {
 	u8 first_point_trim;
 	u8 second_point_trim;
 	u8 default_temp_offset;
-	u8 test_mux;
-	u8 triminfo_reload[MAX_TRIMINFO_CTRL_REG];
 
 	enum calibration_type cal_type;
 	enum soc_type type;
 	struct freq_clip_table freq_tab[4];
 	unsigned int freq_tab_count;
-	const struct exynos_tmu_registers *registers;
-	unsigned int features;
 };
 
 /**
@@ -246,4 +133,12 @@ struct exynos_tmu_init_data {
 	struct exynos_tmu_platform_data tmu_data[];
 };
 
+extern struct exynos_tmu_init_data const exynos3250_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos4210_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos4412_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos5250_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos5260_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos5420_default_tmu_data;
+extern struct exynos_tmu_init_data const exynos5440_default_tmu_data;
+
 #endif /* _EXYNOS_TMU_H */
diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c
index 1724f6c..b239100 100644
--- a/drivers/thermal/samsung/exynos_tmu_data.c
+++ b/drivers/thermal/samsung/exynos_tmu_data.c
@@ -22,24 +22,6 @@
 
 #include "exynos_thermal_common.h"
 #include "exynos_tmu.h"
-#include "exynos_tmu_data.h"
-
-#if defined(CONFIG_CPU_EXYNOS4210)
-static const struct exynos_tmu_registers exynos4210_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_temp = EXYNOS4210_TMU_REG_THRESHOLD_TEMP,
-	.threshold_th0 = EXYNOS4210_TMU_REG_TRIG_LEVEL0,
-	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
-	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
-};
 
 struct exynos_tmu_init_data const exynos4210_default_tmu_data = {
 	.tmu_data = {
@@ -75,40 +57,10 @@ struct exynos_tmu_init_data const exynos4210_default_tmu_data = {
 		},
 		.freq_tab_count = 2,
 		.type = SOC_ARCH_EXYNOS4210,
-		.registers = &exynos4210_tmu_registers,
-		.features = TMU_SUPPORT_READY_STATUS,
 		},
 	},
 	.tmu_count = 1,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS3250)
-static const struct exynos_tmu_registers exynos3250_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.triminfo_ctrl[0] = EXYNOS_TMU_TRIMINFO_CON1,
-	.triminfo_ctrl[1] = EXYNOS_TMU_TRIMINFO_CON2,
-	.triminfo_ctrl_count = 2,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.test_mux_addr_shift = EXYNOS4412_MUX_ADDR_SHIFT,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
-	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
-	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
-	.emul_con = EXYNOS_EMUL_CON,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
-};
 
 #define EXYNOS3250_TMU_DATA \
 	.threshold_falling = 10, \
@@ -144,54 +96,17 @@ static const struct exynos_tmu_registers exynos3250_tmu_registers = {
 		.freq_clip_max = 400 * 1000, \
 		.temp_level = 95, \
 	}, \
-	.freq_tab_count = 2, \
-	.triminfo_reload[0] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \
-	.triminfo_reload[1] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \
-	.registers = &exynos3250_tmu_registers, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \
-			TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
-			TMU_SUPPORT_EMUL_TIME)
-#endif
+	.freq_tab_count = 2
 
-#if defined(CONFIG_SOC_EXYNOS3250)
 struct exynos_tmu_init_data const exynos3250_default_tmu_data = {
 	.tmu_data = {
 		{
 			EXYNOS3250_TMU_DATA,
 			.type = SOC_ARCH_EXYNOS3250,
-			.test_mux = EXYNOS4412_MUX_ADDR_VALUE,
 		},
 	},
 	.tmu_count = 1,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS4412) || defined(CONFIG_SOC_EXYNOS5250)
-static const struct exynos_tmu_registers exynos4412_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.triminfo_ctrl[0] = EXYNOS_TMU_TRIMINFO_CON2,
-	.triminfo_ctrl_count = 1,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.test_mux_addr_shift = EXYNOS4412_MUX_ADDR_SHIFT,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
-	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
-	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
-	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
-	.emul_con = EXYNOS_EMUL_CON,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
-};
 
 #define EXYNOS4412_TMU_DATA \
 	.threshold_falling = 10, \
@@ -227,28 +142,18 @@ static const struct exynos_tmu_registers exynos4412_tmu_registers = {
 		.freq_clip_max = 400 * 1000, \
 		.temp_level = 95, \
 	}, \
-	.freq_tab_count = 2, \
-	.triminfo_reload[0] = EXYNOS_TRIMINFO_RELOAD_ENABLE, \
-	.registers = &exynos4412_tmu_registers, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \
-			TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
-			TMU_SUPPORT_EMUL_TIME)
-#endif
+	.freq_tab_count = 2
 
-#if defined(CONFIG_SOC_EXYNOS4412)
 struct exynos_tmu_init_data const exynos4412_default_tmu_data = {
 	.tmu_data = {
 		{
 			EXYNOS4412_TMU_DATA,
 			.type = SOC_ARCH_EXYNOS4412,
-			.test_mux = EXYNOS4412_MUX_ADDR_VALUE,
 		},
 	},
 	.tmu_count = 1,
 };
-#endif
 
-#if defined(CONFIG_SOC_EXYNOS5250)
 struct exynos_tmu_init_data const exynos5250_default_tmu_data = {
 	.tmu_data = {
 		{
@@ -258,31 +163,6 @@ struct exynos_tmu_init_data const exynos5250_default_tmu_data = {
 	},
 	.tmu_count = 1,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5260)
-static const struct exynos_tmu_registers exynos5260_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
-	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
-	.tmu_inten = EXYNOS5260_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
-	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS5260_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS5260_TMU_REG_INTCLEAR,
-	.emul_con = EXYNOS5260_EMUL_CON,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
-};
 
 #define __EXYNOS5260_TMU_DATA	\
 	.threshold_falling = 10, \
@@ -319,13 +199,10 @@ static const struct exynos_tmu_registers exynos5260_tmu_registers = {
 		.temp_level = 103, \
 	}, \
 	.freq_tab_count = 2, \
-	.registers = &exynos5260_tmu_registers, \
 
 #define EXYNOS5260_TMU_DATA \
 	__EXYNOS5260_TMU_DATA \
-	.type = SOC_ARCH_EXYNOS5260, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME)
+	.type = SOC_ARCH_EXYNOS5260
 
 struct exynos_tmu_init_data const exynos5260_default_tmu_data = {
 	.tmu_data = {
@@ -337,82 +214,14 @@ struct exynos_tmu_init_data const exynos5260_default_tmu_data = {
 	},
 	.tmu_count = 5,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5420)
-static const struct exynos_tmu_registers exynos5420_tmu_registers = {
-	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
-	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS_TMU_REG_STATUS,
-	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
-	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
-	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
-	.tmu_inten = EXYNOS_TMU_REG_INTEN,
-	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
-	/* INTEN_RISE3 Not availble in exynos5420 */
-	.inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
-	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
-	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
-	.emul_con = EXYNOS_EMUL_CON,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
-};
-
-#define __EXYNOS5420_TMU_DATA	\
-	.threshold_falling = 10, \
-	.trigger_levels[0] = 85, \
-	.trigger_levels[1] = 103, \
-	.trigger_levels[2] = 110, \
-	.trigger_levels[3] = 120, \
-	.trigger_enable[0] = true, \
-	.trigger_enable[1] = true, \
-	.trigger_enable[2] = true, \
-	.trigger_enable[3] = false, \
-	.trigger_type[0] = THROTTLE_ACTIVE, \
-	.trigger_type[1] = THROTTLE_ACTIVE, \
-	.trigger_type[2] = SW_TRIP, \
-	.trigger_type[3] = HW_TRIP, \
-	.max_trigger_level = 4, \
-	.non_hw_trigger_levels = 3, \
-	.gain = 8, \
-	.reference_voltage = 16, \
-	.noise_cancel_mode = 4, \
-	.cal_type = TYPE_ONE_POINT_TRIMMING, \
-	.efuse_value = 55, \
-	.min_efuse_value = 40, \
-	.max_efuse_value = 100, \
-	.first_point_trim = 25, \
-	.second_point_trim = 85, \
-	.default_temp_offset = 50, \
-	.freq_tab[0] = { \
-		.freq_clip_max = 800 * 1000, \
-		.temp_level = 85, \
-	}, \
-	.freq_tab[1] = { \
-		.freq_clip_max = 200 * 1000, \
-		.temp_level = 103, \
-	}, \
-	.freq_tab_count = 2, \
-	.registers = &exynos5420_tmu_registers, \
 
 #define EXYNOS5420_TMU_DATA \
-	__EXYNOS5420_TMU_DATA \
-	.type = SOC_ARCH_EXYNOS5250, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME)
+	__EXYNOS5260_TMU_DATA \
+	.type = SOC_ARCH_EXYNOS5420
 
 #define EXYNOS5420_TMU_DATA_SHARED \
-	__EXYNOS5420_TMU_DATA \
-	.type = SOC_ARCH_EXYNOS5420_TRIMINFO, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_READY_STATUS | TMU_SUPPORT_EMUL_TIME | \
-			TMU_SUPPORT_ADDRESS_MULTIPLE)
+	__EXYNOS5260_TMU_DATA \
+	.type = SOC_ARCH_EXYNOS5420_TRIMINFO
 
 struct exynos_tmu_init_data const exynos5420_default_tmu_data = {
 	.tmu_data = {
@@ -424,34 +233,6 @@ struct exynos_tmu_init_data const exynos5420_default_tmu_data = {
 	},
 	.tmu_count = 5,
 };
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5440)
-static const struct exynos_tmu_registers exynos5440_tmu_registers = {
-	.triminfo_data = EXYNOS5440_TMU_S0_7_TRIM,
-	.tmu_ctrl = EXYNOS5440_TMU_S0_7_CTRL,
-	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
-	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
-	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
-	.tmu_status = EXYNOS5440_TMU_S0_7_STATUS,
-	.tmu_cur_temp = EXYNOS5440_TMU_S0_7_TEMP,
-	.threshold_th0 = EXYNOS5440_TMU_S0_7_TH0,
-	.threshold_th1 = EXYNOS5440_TMU_S0_7_TH1,
-	.threshold_th2 = EXYNOS5440_TMU_S0_7_TH2,
-	.threshold_th3_l0_shift = EXYNOS5440_TMU_TH_RISE4_SHIFT,
-	.tmu_inten = EXYNOS5440_TMU_S0_7_IRQEN,
-	.inten_rise0_shift = EXYNOS5440_TMU_INTEN_RISE0_SHIFT,
-	.inten_rise1_shift = EXYNOS5440_TMU_INTEN_RISE1_SHIFT,
-	.inten_rise2_shift = EXYNOS5440_TMU_INTEN_RISE2_SHIFT,
-	.inten_rise3_shift = EXYNOS5440_TMU_INTEN_RISE3_SHIFT,
-	.inten_fall0_shift = EXYNOS5440_TMU_INTEN_FALL0_SHIFT,
-	.tmu_intstat = EXYNOS5440_TMU_S0_7_IRQ,
-	.tmu_intclear = EXYNOS5440_TMU_S0_7_IRQ,
-	.tmu_irqstatus = EXYNOS5440_TMU_IRQ_STATUS,
-	.emul_con = EXYNOS5440_TMU_S0_7_DEBUG,
-	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
-	.tmu_pmin = EXYNOS5440_TMU_PMIN,
-};
 
 #define EXYNOS5440_TMU_DATA \
 	.trigger_levels[0] = 100, \
@@ -471,10 +252,7 @@ static const struct exynos_tmu_registers exynos5440_tmu_registers = {
 	.first_point_trim = 25, \
 	.second_point_trim = 70, \
 	.default_temp_offset = 25, \
-	.type = SOC_ARCH_EXYNOS5440, \
-	.registers = &exynos5440_tmu_registers, \
-	.features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
-			TMU_SUPPORT_MULTI_INST | TMU_SUPPORT_ADDRESS_MULTIPLE),
+	.type = SOC_ARCH_EXYNOS5440
 
 struct exynos_tmu_init_data const exynos5440_default_tmu_data = {
 	.tmu_data = {
@@ -484,4 +262,3 @@ struct exynos_tmu_init_data const exynos5440_default_tmu_data = {
 	},
 	.tmu_count = 3,
 };
-#endif
diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h
deleted file mode 100644
index 63de598..0000000
--- a/drivers/thermal/samsung/exynos_tmu_data.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * exynos_tmu_data.h - Samsung EXYNOS tmu data header file
- *
- *  Copyright (C) 2013 Samsung Electronics
- *  Amit Daniel Kachhap <amit.daniel@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#ifndef _EXYNOS_TMU_DATA_H
-#define _EXYNOS_TMU_DATA_H
-
-/* Exynos generic registers */
-#define EXYNOS_TMU_REG_TRIMINFO		0x0
-#define EXYNOS_TMU_REG_CONTROL		0x20
-#define EXYNOS_TMU_REG_STATUS		0x28
-#define EXYNOS_TMU_REG_CURRENT_TEMP	0x40
-#define EXYNOS_TMU_REG_INTEN		0x70
-#define EXYNOS_TMU_REG_INTSTAT		0x74
-#define EXYNOS_TMU_REG_INTCLEAR		0x78
-
-#define EXYNOS_TMU_TEMP_MASK		0xff
-#define EXYNOS_TMU_REF_VOLTAGE_SHIFT	24
-#define EXYNOS_TMU_REF_VOLTAGE_MASK	0x1f
-#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK	0xf
-#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT	8
-#define EXYNOS_TMU_CORE_EN_SHIFT	0
-
-/* Exynos3250 specific registers */
-#define EXYNOS_TMU_TRIMINFO_CON1	0x10
-
-/* Exynos4210 specific registers */
-#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP	0x44
-#define EXYNOS4210_TMU_REG_TRIG_LEVEL0	0x50
-
-/* Exynos5250, Exynos4412, Exynos3250 specific registers */
-#define EXYNOS_TMU_TRIMINFO_CON2	0x14
-#define EXYNOS_THD_TEMP_RISE		0x50
-#define EXYNOS_THD_TEMP_FALL		0x54
-#define EXYNOS_EMUL_CON		0x80
-
-#define EXYNOS_TRIMINFO_RELOAD_ENABLE	1
-#define EXYNOS_TRIMINFO_25_SHIFT	0
-#define EXYNOS_TRIMINFO_85_SHIFT	8
-#define EXYNOS_TMU_TRIP_MODE_SHIFT	13
-#define EXYNOS_TMU_TRIP_MODE_MASK	0x7
-#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT	12
-
-#define EXYNOS_TMU_INTEN_RISE0_SHIFT	0
-#define EXYNOS_TMU_INTEN_RISE1_SHIFT	4
-#define EXYNOS_TMU_INTEN_RISE2_SHIFT	8
-#define EXYNOS_TMU_INTEN_RISE3_SHIFT	12
-#define EXYNOS_TMU_INTEN_FALL0_SHIFT	16
-
-#define EXYNOS_EMUL_TIME	0x57F0
-#define EXYNOS_EMUL_TIME_MASK	0xffff
-#define EXYNOS_EMUL_TIME_SHIFT	16
-#define EXYNOS_EMUL_DATA_SHIFT	8
-#define EXYNOS_EMUL_DATA_MASK	0xFF
-#define EXYNOS_EMUL_ENABLE	0x1
-
-#define EXYNOS_MAX_TRIGGER_PER_REG	4
-
-/* Exynos5260 specific */
-#define EXYNOS5260_TMU_REG_INTEN		0xC0
-#define EXYNOS5260_TMU_REG_INTSTAT		0xC4
-#define EXYNOS5260_TMU_REG_INTCLEAR		0xC8
-#define EXYNOS5260_EMUL_CON			0x100
-
-/* Exynos4412 specific */
-#define EXYNOS4412_MUX_ADDR_VALUE          6
-#define EXYNOS4412_MUX_ADDR_SHIFT          20
-
-/*exynos5440 specific registers*/
-#define EXYNOS5440_TMU_S0_7_TRIM		0x000
-#define EXYNOS5440_TMU_S0_7_CTRL		0x020
-#define EXYNOS5440_TMU_S0_7_DEBUG		0x040
-#define EXYNOS5440_TMU_S0_7_STATUS		0x060
-#define EXYNOS5440_TMU_S0_7_TEMP		0x0f0
-#define EXYNOS5440_TMU_S0_7_TH0			0x110
-#define EXYNOS5440_TMU_S0_7_TH1			0x130
-#define EXYNOS5440_TMU_S0_7_TH2			0x150
-#define EXYNOS5440_TMU_S0_7_IRQEN		0x210
-#define EXYNOS5440_TMU_S0_7_IRQ			0x230
-/* exynos5440 common registers */
-#define EXYNOS5440_TMU_IRQ_STATUS		0x000
-#define EXYNOS5440_TMU_PMIN			0x004
-
-#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT	0
-#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT	1
-#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT	2
-#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT	3
-#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT	4
-#define EXYNOS5440_TMU_TH_RISE4_SHIFT		24
-#define EXYNOS5440_EFUSE_SWAP_OFFSET		8
-
-#if defined(CONFIG_SOC_EXYNOS3250)
-extern struct exynos_tmu_init_data const exynos3250_default_tmu_data;
-#define EXYNOS3250_TMU_DRV_DATA (&exynos3250_default_tmu_data)
-#else
-#define EXYNOS3250_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_CPU_EXYNOS4210)
-extern struct exynos_tmu_init_data const exynos4210_default_tmu_data;
-#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data)
-#else
-#define EXYNOS4210_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS4412)
-extern struct exynos_tmu_init_data const exynos4412_default_tmu_data;
-#define EXYNOS4412_TMU_DRV_DATA (&exynos4412_default_tmu_data)
-#else
-#define EXYNOS4412_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5250)
-extern struct exynos_tmu_init_data const exynos5250_default_tmu_data;
-#define EXYNOS5250_TMU_DRV_DATA (&exynos5250_default_tmu_data)
-#else
-#define EXYNOS5250_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5260)
-extern struct exynos_tmu_init_data const exynos5260_default_tmu_data;
-#define EXYNOS5260_TMU_DRV_DATA (&exynos5260_default_tmu_data)
-#else
-#define EXYNOS5260_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5420)
-extern struct exynos_tmu_init_data const exynos5420_default_tmu_data;
-#define EXYNOS5420_TMU_DRV_DATA (&exynos5420_default_tmu_data)
-#else
-#define EXYNOS5420_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5440)
-extern struct exynos_tmu_init_data const exynos5440_default_tmu_data;
-#define EXYNOS5440_TMU_DRV_DATA (&exynos5440_default_tmu_data)
-#else
-#define EXYNOS5440_TMU_DRV_DATA (NULL)
-#endif
-
-#endif /*_EXYNOS_TMU_DATA_H*/
diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c
new file mode 100644
index 0000000..9197fc0
--- /dev/null
+++ b/drivers/thermal/tegra_soctherm.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Author:
+ *	Mikko Perttunen <mperttunen@nvidia.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/thermal.h>
+
+#include <soc/tegra/fuse.h>
+
+#define SENSOR_CONFIG0				0
+#define SENSOR_CONFIG0_STOP			BIT(0)
+#define SENSOR_CONFIG0_TALL_SHIFT		8
+#define SENSOR_CONFIG0_TCALC_OVER		BIT(4)
+#define SENSOR_CONFIG0_OVER			BIT(3)
+#define SENSOR_CONFIG0_CPTR_OVER		BIT(2)
+
+#define SENSOR_CONFIG1				4
+#define SENSOR_CONFIG1_TSAMPLE_SHIFT		0
+#define SENSOR_CONFIG1_TIDDQ_EN_SHIFT		15
+#define SENSOR_CONFIG1_TEN_COUNT_SHIFT		24
+#define SENSOR_CONFIG1_TEMP_ENABLE		BIT(31)
+
+#define SENSOR_CONFIG2				8
+#define SENSOR_CONFIG2_THERMA_SHIFT		16
+#define SENSOR_CONFIG2_THERMB_SHIFT		0
+
+#define SENSOR_PDIV				0x1c0
+#define SENSOR_PDIV_T124			0x8888
+#define SENSOR_HOTSPOT_OFF			0x1c4
+#define SENSOR_HOTSPOT_OFF_T124			0x00060600
+#define SENSOR_TEMP1				0x1c8
+#define SENSOR_TEMP2				0x1cc
+
+#define SENSOR_TEMP_MASK			0xffff
+#define READBACK_VALUE_MASK			0xff00
+#define READBACK_VALUE_SHIFT			8
+#define READBACK_ADD_HALF			BIT(7)
+#define READBACK_NEGATE				BIT(1)
+
+#define FUSE_TSENSOR8_CALIB			0x180
+#define FUSE_SPARE_REALIGNMENT_REG_0		0x1fc
+
+#define FUSE_TSENSOR_CALIB_CP_TS_BASE_MASK	0x1fff
+#define FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK	(0x1fff << 13)
+#define FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT	13
+
+#define FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK	0x3ff
+#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK	(0x7ff << 10)
+#define FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT	10
+
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_CP_MASK 0x3f
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK (0x1f << 21)
+#define FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT 21
+
+#define NOMINAL_CALIB_FT_T124			105
+#define NOMINAL_CALIB_CP_T124			25
+
+struct tegra_tsensor_configuration {
+	u32 tall, tsample, tiddq_en, ten_count, pdiv, tsample_ate, pdiv_ate;
+};
+
+struct tegra_tsensor {
+	const struct tegra_tsensor_configuration *config;
+	u32 base, calib_fuse_offset;
+	/* Correction values used to modify values read from calibration fuses */
+	s32 fuse_corr_alpha, fuse_corr_beta;
+};
+
+struct tegra_thermctl_zone {
+	void __iomem *reg;
+	unsigned int shift;
+};
+
+static const struct tegra_tsensor_configuration t124_tsensor_config = {
+	.tall = 16300,
+	.tsample = 120,
+	.tiddq_en = 1,
+	.ten_count = 1,
+	.pdiv = 8,
+	.tsample_ate = 480,
+	.pdiv_ate = 8
+};
+
+static const struct tegra_tsensor t124_tsensors[] = {
+	{
+		.config = &t124_tsensor_config,
+		.base = 0xc0,
+		.calib_fuse_offset = 0x098,
+		.fuse_corr_alpha = 1135400,
+		.fuse_corr_beta = -6266900,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0xe0,
+		.calib_fuse_offset = 0x084,
+		.fuse_corr_alpha = 1122220,
+		.fuse_corr_beta = -5700700,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x100,
+		.calib_fuse_offset = 0x088,
+		.fuse_corr_alpha = 1127000,
+		.fuse_corr_beta = -6768200,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x120,
+		.calib_fuse_offset = 0x12c,
+		.fuse_corr_alpha = 1110900,
+		.fuse_corr_beta = -6232000,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x140,
+		.calib_fuse_offset = 0x158,
+		.fuse_corr_alpha = 1122300,
+		.fuse_corr_beta = -5936400,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x160,
+		.calib_fuse_offset = 0x15c,
+		.fuse_corr_alpha = 1145700,
+		.fuse_corr_beta = -7124600,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x180,
+		.calib_fuse_offset = 0x154,
+		.fuse_corr_alpha = 1120100,
+		.fuse_corr_beta = -6000500,
+	},
+	{
+		.config = &t124_tsensor_config,
+		.base = 0x1a0,
+		.calib_fuse_offset = 0x160,
+		.fuse_corr_alpha = 1106500,
+		.fuse_corr_beta = -6729300,
+	},
+};
+
+struct tegra_soctherm {
+	struct reset_control *reset;
+	struct clk *clock_tsensor;
+	struct clk *clock_soctherm;
+	void __iomem *regs;
+
+	struct thermal_zone_device *thermctl_tzs[4];
+};
+
+struct tsensor_shared_calibration {
+	u32 base_cp, base_ft;
+	u32 actual_temp_cp, actual_temp_ft;
+};
+
+static int calculate_shared_calibration(struct tsensor_shared_calibration *r)
+{
+	u32 val, shifted_cp, shifted_ft;
+	int err;
+
+	err = tegra_fuse_readl(FUSE_TSENSOR8_CALIB, &val);
+	if (err)
+		return err;
+	r->base_cp = val & FUSE_TSENSOR8_CALIB_CP_TS_BASE_MASK;
+	r->base_ft = (val & FUSE_TSENSOR8_CALIB_FT_TS_BASE_MASK)
+		>> FUSE_TSENSOR8_CALIB_FT_TS_BASE_SHIFT;
+	val = ((val & FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_MASK)
+		>> FUSE_SPARE_REALIGNMENT_REG_SHIFT_FT_SHIFT);
+	shifted_ft = sign_extend32(val, 4);
+
+	err = tegra_fuse_readl(FUSE_SPARE_REALIGNMENT_REG_0, &val);
+	if (err)
+		return err;
+	shifted_cp = sign_extend32(val, 5);
+
+	r->actual_temp_cp = 2 * NOMINAL_CALIB_CP_T124 + shifted_cp;
+	r->actual_temp_ft = 2 * NOMINAL_CALIB_FT_T124 + shifted_ft;
+
+	return 0;
+}
+
+static s64 div64_s64_precise(s64 a, s64 b)
+{
+	s64 r, al;
+
+	/* Scale up for increased precision division */
+	al = a << 16;
+
+	r = div64_s64(al * 2 + 1, 2 * b);
+	return r >> 16;
+}
+
+static int
+calculate_tsensor_calibration(const struct tegra_tsensor *sensor,
+			      const struct tsensor_shared_calibration *shared,
+			      u32 *calib)
+{
+	u32 val;
+	s32 actual_tsensor_ft, actual_tsensor_cp, delta_sens, delta_temp,
+	    mult, div;
+	s16 therma, thermb;
+	s64 tmp;
+	int err;
+
+	err = tegra_fuse_readl(sensor->calib_fuse_offset, &val);
+	if (err)
+		return err;
+
+	actual_tsensor_cp = (shared->base_cp * 64) + sign_extend32(val, 12);
+	val = (val & FUSE_TSENSOR_CALIB_FT_TS_BASE_MASK)
+		>> FUSE_TSENSOR_CALIB_FT_TS_BASE_SHIFT;
+	actual_tsensor_ft = (shared->base_ft * 32) + sign_extend32(val, 12);
+
+	delta_sens = actual_tsensor_ft - actual_tsensor_cp;
+	delta_temp = shared->actual_temp_ft - shared->actual_temp_cp;
+
+	mult = sensor->config->pdiv * sensor->config->tsample_ate;
+	div = sensor->config->tsample * sensor->config->pdiv_ate;
+
+	therma = div64_s64_precise((s64) delta_temp * (1LL << 13) * mult,
+				   (s64) delta_sens * div);
+
+	tmp = (s64)actual_tsensor_ft * shared->actual_temp_cp -
+	      (s64)actual_tsensor_cp * shared->actual_temp_ft;
+	thermb = div64_s64_precise(tmp, (s64)delta_sens);
+
+	therma = div64_s64_precise((s64)therma * sensor->fuse_corr_alpha,
+				   (s64)1000000LL);
+	thermb = div64_s64_precise((s64)thermb * sensor->fuse_corr_alpha +
+				   sensor->fuse_corr_beta, (s64)1000000LL);
+
+	*calib = ((u16)therma << SENSOR_CONFIG2_THERMA_SHIFT) |
+		 ((u16)thermb << SENSOR_CONFIG2_THERMB_SHIFT);
+
+	return 0;
+}
+
+static int enable_tsensor(struct tegra_soctherm *tegra,
+			  const struct tegra_tsensor *sensor,
+			  const struct tsensor_shared_calibration *shared)
+{
+	void __iomem *base = tegra->regs + sensor->base;
+	unsigned int val;
+	u32 calib;
+	int err;
+
+	err = calculate_tsensor_calibration(sensor, shared, &calib);
+	if (err)
+		return err;
+
+	val = sensor->config->tall << SENSOR_CONFIG0_TALL_SHIFT;
+	writel(val, base + SENSOR_CONFIG0);
+
+	val  = (sensor->config->tsample - 1) << SENSOR_CONFIG1_TSAMPLE_SHIFT;
+	val |= sensor->config->tiddq_en << SENSOR_CONFIG1_TIDDQ_EN_SHIFT;
+	val |= sensor->config->ten_count << SENSOR_CONFIG1_TEN_COUNT_SHIFT;
+	val |= SENSOR_CONFIG1_TEMP_ENABLE;
+	writel(val, base + SENSOR_CONFIG1);
+
+	writel(calib, base + SENSOR_CONFIG2);
+
+	return 0;
+}
+
+/*
+ * Translate from soctherm readback format to millicelsius.
+ * The soctherm readback format in bits is as follows:
+ *   TTTTTTTT H______N
+ * where T's contain the temperature in Celsius,
+ * H denotes an addition of 0.5 Celsius and N denotes negation
+ * of the final value.
+ */
+static long translate_temp(u16 val)
+{
+	long t;
+
+	t = ((val & READBACK_VALUE_MASK) >> READBACK_VALUE_SHIFT) * 1000;
+	if (val & READBACK_ADD_HALF)
+		t += 500;
+	if (val & READBACK_NEGATE)
+		t *= -1;
+
+	return t;
+}
+
+static int tegra_thermctl_get_temp(void *data, long *out_temp)
+{
+	struct tegra_thermctl_zone *zone = data;
+	u32 val;
+
+	val = (readl(zone->reg) >> zone->shift) & SENSOR_TEMP_MASK;
+	*out_temp = translate_temp(val);
+
+	return 0;
+}
+
+static const struct thermal_zone_of_device_ops tegra_of_thermal_ops = {
+	.get_temp = tegra_thermctl_get_temp,
+};
+
+static const struct of_device_id tegra_soctherm_of_match[] = {
+	{ .compatible = "nvidia,tegra124-soctherm" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_soctherm_of_match);
+
+struct thermctl_zone_desc {
+	unsigned int offset;
+	unsigned int shift;
+};
+
+static const struct thermctl_zone_desc t124_thermctl_temp_zones[] = {
+	{ SENSOR_TEMP1, 16 },
+	{ SENSOR_TEMP2, 16 },
+	{ SENSOR_TEMP1, 0 },
+	{ SENSOR_TEMP2, 0 }
+};
+
+static int tegra_soctherm_probe(struct platform_device *pdev)
+{
+	struct tegra_soctherm *tegra;
+	struct thermal_zone_device *tz;
+	struct tsensor_shared_calibration shared_calib;
+	struct resource *res;
+	unsigned int i;
+	int err;
+
+	const struct tegra_tsensor *tsensors = t124_tsensors;
+
+	tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL);
+	if (!tegra)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tegra->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(tegra->regs))
+		return PTR_ERR(tegra->regs);
+
+	tegra->reset = devm_reset_control_get(&pdev->dev, "soctherm");
+	if (IS_ERR(tegra->reset)) {
+		dev_err(&pdev->dev, "can't get soctherm reset\n");
+		return PTR_ERR(tegra->reset);
+	}
+
+	tegra->clock_tsensor = devm_clk_get(&pdev->dev, "tsensor");
+	if (IS_ERR(tegra->clock_tsensor)) {
+		dev_err(&pdev->dev, "can't get tsensor clock\n");
+		return PTR_ERR(tegra->clock_tsensor);
+	}
+
+	tegra->clock_soctherm = devm_clk_get(&pdev->dev, "soctherm");
+	if (IS_ERR(tegra->clock_soctherm)) {
+		dev_err(&pdev->dev, "can't get soctherm clock\n");
+		return PTR_ERR(tegra->clock_soctherm);
+	}
+
+	reset_control_assert(tegra->reset);
+
+	err = clk_prepare_enable(tegra->clock_soctherm);
+	if (err)
+		return err;
+
+	err = clk_prepare_enable(tegra->clock_tsensor);
+	if (err) {
+		clk_disable_unprepare(tegra->clock_soctherm);
+		return err;
+	}
+
+	reset_control_deassert(tegra->reset);
+
+	/* Initialize raw sensors */
+
+	err = calculate_shared_calibration(&shared_calib);
+	if (err)
+		goto disable_clocks;
+
+	for (i = 0; i < ARRAY_SIZE(t124_tsensors); ++i) {
+		err = enable_tsensor(tegra, tsensors + i, &shared_calib);
+		if (err)
+			goto disable_clocks;
+	}
+
+	writel(SENSOR_PDIV_T124, tegra->regs + SENSOR_PDIV);
+	writel(SENSOR_HOTSPOT_OFF_T124, tegra->regs + SENSOR_HOTSPOT_OFF);
+
+	/* Initialize thermctl sensors */
+
+	for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) {
+		struct tegra_thermctl_zone *zone =
+			devm_kzalloc(&pdev->dev, sizeof(*zone), GFP_KERNEL);
+		if (!zone) {
+			err = -ENOMEM;
+			goto unregister_tzs;
+		}
+
+		zone->reg = tegra->regs + t124_thermctl_temp_zones[i].offset;
+		zone->shift = t124_thermctl_temp_zones[i].shift;
+
+		tz = thermal_zone_of_sensor_register(&pdev->dev, i, zone,
+						     &tegra_of_thermal_ops);
+		if (IS_ERR(tz)) {
+			err = PTR_ERR(tz);
+			dev_err(&pdev->dev, "failed to register sensor: %d\n",
+				err);
+			goto unregister_tzs;
+		}
+
+		tegra->thermctl_tzs[i] = tz;
+	}
+
+	return 0;
+
+unregister_tzs:
+	while (i--)
+		thermal_zone_of_sensor_unregister(&pdev->dev,
+						  tegra->thermctl_tzs[i]);
+
+disable_clocks:
+	clk_disable_unprepare(tegra->clock_tsensor);
+	clk_disable_unprepare(tegra->clock_soctherm);
+
+	return err;
+}
+
+static int tegra_soctherm_remove(struct platform_device *pdev)
+{
+	struct tegra_soctherm *tegra = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(tegra->thermctl_tzs); ++i) {
+		thermal_zone_of_sensor_unregister(&pdev->dev,
+						  tegra->thermctl_tzs[i]);
+	}
+
+	clk_disable_unprepare(tegra->clock_tsensor);
+	clk_disable_unprepare(tegra->clock_soctherm);
+
+	return 0;
+}
+
+static struct platform_driver tegra_soctherm_driver = {
+	.probe = tegra_soctherm_probe,
+	.remove = tegra_soctherm_remove,
+	.driver = {
+		.name = "tegra-soctherm",
+		.of_match_table = tegra_soctherm_of_match,
+	},
+};
+module_platform_driver(tegra_soctherm_driver);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_DESCRIPTION("NVIDIA Tegra SOCTHERM thermal management driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 43b9070..84fdf07 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -368,7 +368,7 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 	tz->ops->get_trip_temp(tz, trip, &trip_temp);
 
 	/* If we have not crossed the trip_temp, we do not care. */
-	if (tz->temperature < trip_temp)
+	if (trip_temp <= 0 || tz->temperature < trip_temp)
 		return;
 
 	trace_thermal_zone_trip(tz, trip, trip_type);
@@ -757,6 +757,7 @@ policy_store(struct device *dev, struct device_attribute *attr,
 	snprintf(name, sizeof(name), "%s", buf);
 
 	mutex_lock(&thermal_governor_lock);
+	mutex_lock(&tz->lock);
 
 	gov = __find_governor(strim(name));
 	if (!gov)
@@ -766,6 +767,7 @@ policy_store(struct device *dev, struct device_attribute *attr,
 	ret = count;
 
 exit:
+	mutex_unlock(&tz->lock);
 	mutex_unlock(&thermal_governor_lock);
 	return ret;
 }
@@ -1835,10 +1837,10 @@ static int __init thermal_init(void)
 
 exit_netlink:
 	genetlink_exit();
-unregister_governors:
-	thermal_unregister_governors();
 unregister_class:
 	class_unregister(&thermal_class);
+unregister_governors:
+	thermal_unregister_governors();
 error:
 	idr_destroy(&thermal_tz_idr);
 	idr_destroy(&thermal_cdev_idr);
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d15d243..9083e75 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -89,9 +89,27 @@ static inline void thermal_gov_user_space_unregister(void) {}
 #ifdef CONFIG_THERMAL_OF
 int of_parse_thermal_zones(void);
 void of_thermal_destroy_zones(void);
+int of_thermal_get_ntrips(struct thermal_zone_device *);
+bool of_thermal_is_trip_valid(struct thermal_zone_device *, int);
+const struct thermal_trip * const
+of_thermal_get_trip_points(struct thermal_zone_device *);
 #else
 static inline int of_parse_thermal_zones(void) { return 0; }
 static inline void of_thermal_destroy_zones(void) { }
+static inline int of_thermal_get_ntrips(struct thermal_zone_device *tz)
+{
+	return 0;
+}
+static inline bool of_thermal_is_trip_valid(struct thermal_zone_device *tz,
+					    int trip)
+{
+	return 0;
+}
+static inline const struct thermal_trip * const
+of_thermal_get_trip_points(struct thermal_zone_device *tz)
+{
+	return NULL;
+}
 #endif
 
 #endif /* __THERMAL_CORE_H__ */
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 9eec26d..5fd0386 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -286,6 +286,11 @@ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
 	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
 }
 
+static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
+	.get_temp = __ti_thermal_get_temp,
+	.get_trend = __ti_thermal_get_trend,
+};
+
 static struct thermal_zone_device_ops ti_thermal_ops = {
 	.get_temp = ti_thermal_get_temp,
 	.get_trend = ti_thermal_get_trend,
@@ -333,8 +338,7 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
 
 	/* in case this is specified by DT */
 	data->ti_thermal = thermal_zone_of_sensor_register(bgp->dev, id,
-					data, __ti_thermal_get_temp,
-					__ti_thermal_get_trend);
+					data, &ti_of_thermal_ops);
 	if (IS_ERR(data->ti_thermal)) {
 		/* Create thermal zone */
 		data->ti_thermal = thermal_zone_device_register(domain,
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index b4b58ae..555de07 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -530,7 +530,7 @@ static int dw8250_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int dw8250_runtime_suspend(struct device *dev)
 {
 	struct dw8250_data *data = dev_get_drvdata(dev);
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index 6f93123..7a11fac 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -244,7 +244,7 @@ static int mtk8250_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int mtk8250_runtime_suspend(struct device *dev)
 {
 	struct mtk8250_data *data = dev_get_drvdata(dev);
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 336602e..96b69bf 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -561,7 +561,7 @@ static int omap_8250_startup(struct uart_port *port)
 	if (ret)
 		goto err;
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	up->capabilities |= UART_CAP_RPM;
 #endif
 
@@ -997,12 +997,12 @@ static int omap8250_probe(struct platform_device *pdev)
 	up.port.fifosize = 64;
 	up.tx_loadsz = 64;
 	up.capabilities = UART_CAP_FIFO;
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	/*
-	 * PM_RUNTIME is mostly transparent. However to do it right we need to a
+	 * Runtime PM is mostly transparent. However to do it right we need to a
 	 * TX empty interrupt before we can put the device to auto idle. So if
-	 * PM_RUNTIME is not enabled we don't add that flag and can spare that
-	 * one extra interrupt in the TX path.
+	 * PM is not enabled we don't add that flag and can spare that one extra
+	 * interrupt in the TX path.
 	 */
 	up.capabilities |= UART_CAP_RPM;
 #endif
@@ -1105,7 +1105,7 @@ static int omap8250_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
+#ifdef CONFIG_PM
 
 static inline void omap8250_enable_wakeirq(struct omap8250_priv *priv,
 					   bool enable)
@@ -1179,7 +1179,7 @@ static int omap8250_resume(struct device *dev)
 #define omap8250_complete NULL
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int omap8250_lost_context(struct uart_8250_port *up)
 {
 	u32 val;
diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c
index e1f4fda..8fe4501 100644
--- a/drivers/tty/serial/mfd.c
+++ b/drivers/tty/serial/mfd.c
@@ -1252,12 +1252,7 @@ static int serial_hsu_resume(struct pci_dev *pdev)
 	}
 	return 0;
 }
-#else
-#define serial_hsu_suspend	NULL
-#define serial_hsu_resume	NULL
-#endif
 
-#ifdef CONFIG_PM_RUNTIME
 static int serial_hsu_runtime_idle(struct device *dev)
 {
 	pm_schedule_suspend(dev, 500);
@@ -1274,6 +1269,8 @@ static int serial_hsu_runtime_resume(struct device *dev)
 	return 0;
 }
 #else
+#define serial_hsu_suspend		NULL
+#define serial_hsu_resume		NULL
 #define serial_hsu_runtime_idle		NULL
 #define serial_hsu_runtime_suspend	NULL
 #define serial_hsu_runtime_resume	NULL
diff --git a/drivers/tty/serial/msm_serial_hs.c b/drivers/tty/serial/msm_serial_hs.c
index 8abe8ea..62da853 100644
--- a/drivers/tty/serial/msm_serial_hs.c
+++ b/drivers/tty/serial/msm_serial_hs.c
@@ -1792,7 +1792,7 @@ static void __exit msm_serial_hs_exit(void)
 }
 module_exit(msm_serial_hs_exit);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int msm_hs_runtime_idle(struct device *dev)
 {
 	/*
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 435478a..2e1073d 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1776,7 +1776,7 @@ static void serial_omap_mdr1_errataset(struct uart_omap_port *up, u8 mdr1)
 	}
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static void serial_omap_restore_context(struct uart_omap_port *up)
 {
 	if (up->errata & UART_ERRATA_i202_MDR1_ACCESS)
diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index 9cfda6a..cc0ced0 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -43,7 +43,7 @@ config USB_DYNAMIC_MINORS
 
 config USB_OTG
 	bool "OTG support"
-	depends on PM_RUNTIME
+	depends on PM
 	default n
 	help
 	  The most notable feature of USB OTG is support for a
diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c
index 024f584..3a49416 100644
--- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c
+++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c
@@ -1131,19 +1131,19 @@ static int usbg_submit_command(struct f_uas *fu,
 
 	switch (cmd_iu->prio_attr & 0x7) {
 	case UAS_HEAD_TAG:
-		cmd->prio_attr = MSG_HEAD_TAG;
+		cmd->prio_attr = TCM_HEAD_TAG;
 		break;
 	case UAS_ORDERED_TAG:
-		cmd->prio_attr = MSG_ORDERED_TAG;
+		cmd->prio_attr = TCM_ORDERED_TAG;
 		break;
 	case UAS_ACA:
-		cmd->prio_attr = MSG_ACA_TAG;
+		cmd->prio_attr = TCM_ACA_TAG;
 		break;
 	default:
 		pr_debug_once("Unsupported prio_attr: %02x.\n",
 				cmd_iu->prio_attr);
 	case UAS_SIMPLE_TAG:
-		cmd->prio_attr = MSG_SIMPLE_TAG;
+		cmd->prio_attr = TCM_SIMPLE_TAG;
 		break;
 	}
 
@@ -1240,7 +1240,7 @@ static int bot_submit_command(struct f_uas *fu,
 		goto err;
 	}
 
-	cmd->prio_attr = MSG_SIMPLE_TAG;
+	cmd->prio_attr = TCM_SIMPLE_TAG;
 	se_cmd = &cmd->se_cmd;
 	cmd->unpacked_lun = cbw->Lun;
 	cmd->is_read = cbw->Flags & US_BULK_FLAG_IN ? 1 : 0;
diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index e752c30..395649f 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -1739,7 +1739,7 @@ static int isp1760_hub_status_data(struct usb_hcd *hcd, char *buf)
 	int retval = 1;
 	unsigned long flags;
 
-	/* if !PM_RUNTIME, root hub timers won't get shut down ... */
+	/* if !PM, root hub timers won't get shut down ... */
 	if (!HC_IS_RUNNING(hcd->state))
 		return 0;
 
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index 75811dd..036924e 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -3087,7 +3087,7 @@ static int oxu_hub_status_data(struct usb_hcd *hcd, char *buf)
 	int ports, i, retval = 1;
 	unsigned long flags;
 
-	/* if !PM_RUNTIME, root hub timers won't get shut down ... */
+	/* if !PM, root hub timers won't get shut down ... */
 	if (!HC_IS_RUNNING(hcd->state))
 		return 0;
 
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 0cd1f44..c6d0c8e74 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -20,7 +20,7 @@ config AB8500_USB
 
 config FSL_USB2_OTG
 	bool "Freescale USB OTG Transceiver Driver"
-	depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM_RUNTIME
+	depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM
 	select USB_OTG
 	select USB_PHY
 	help
@@ -153,7 +153,7 @@ config USB_MSM_OTG
 
 config USB_MV_OTG
 	tristate "Marvell USB OTG support"
-	depends on USB_EHCI_MV && USB_MV_UDC && PM_RUNTIME
+	depends on USB_EHCI_MV && USB_MV_UDC && PM
 	select USB_OTG
 	select USB_PHY
 	help
diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig
index 715f299..ec84758 100644
--- a/drivers/usb/storage/Kconfig
+++ b/drivers/usb/storage/Kconfig
@@ -41,7 +41,7 @@ config USB_STORAGE_REALTEK
 
 config REALTEK_AUTOPM
 	bool "Realtek Card Reader autosuspend support"
-	depends on USB_STORAGE_REALTEK && PM_RUNTIME
+	depends on USB_STORAGE_REALTEK && PM
 	default y
 
 config USB_STORAGE_DATAFAB
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index d8c5763..14e27ab 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -16,7 +16,7 @@ config VFIO_SPAPR_EEH
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	depends on IOMMU_API
-	select VFIO_IOMMU_TYPE1 if X86
+	select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU)
 	select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
 	select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
 	select ANON_INODES
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index c41b01e..c6bb5da 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -16,3 +16,11 @@ config VFIO_PCI_VGA
 	  BIOS and generic video drivers.
 
 	  If you don't know what to do here, say N.
+
+config VFIO_PCI_MMAP
+	depends on VFIO_PCI
+	def_bool y if !S390
+
+config VFIO_PCI_INTX
+	depends on VFIO_PCI
+	def_bool y if !S390
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 9558da3..255201f 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -215,7 +215,7 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
 	if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
 		u8 pin;
 		pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
-		if (pin)
+		if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && pin)
 			return 1;
 
 	} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
@@ -406,7 +406,8 @@ static long vfio_pci_ioctl(void *device_data,
 
 			info.flags = VFIO_REGION_INFO_FLAG_READ |
 				     VFIO_REGION_INFO_FLAG_WRITE;
-			if (pci_resource_flags(pdev, info.index) &
+			if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
+			    pci_resource_flags(pdev, info.index) &
 			    IORESOURCE_MEM && info.size >= PAGE_SIZE)
 				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
 			break;
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 1de3f94..ff75ca3 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -609,6 +609,10 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
 
 	/* Sometimes used by sw, just virtualize */
 	p_setb(perm, PCI_INTERRUPT_LINE, (u8)ALL_VIRT, (u8)ALL_WRITE);
+
+	/* Virtualize interrupt pin to allow hiding INTx */
+	p_setb(perm, PCI_INTERRUPT_PIN, (u8)ALL_VIRT, (u8)NO_WRITE);
+
 	return 0;
 }
 
@@ -1445,6 +1449,9 @@ int vfio_config_init(struct vfio_pci_device *vdev)
 		*(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device);
 	}
 
+	if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX))
+		vconfig[PCI_INTERRUPT_PIN] = 0;
+
 	ret = vfio_cap_init(vdev);
 	if (ret)
 		goto out;
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 5174eba..3bb02c6 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -11,6 +11,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <uapi/linux/virtio_config.h>
 
 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
 {
@@ -28,13 +29,14 @@ static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
 
 /* Returns vring->num if empty, -ve on error. */
 static inline int __vringh_get_head(const struct vringh *vrh,
-				    int (*getu16)(u16 *val, const u16 *p),
+				    int (*getu16)(const struct vringh *vrh,
+						  u16 *val, const __virtio16 *p),
 				    u16 *last_avail_idx)
 {
 	u16 avail_idx, i, head;
 	int err;
 
-	err = getu16(&avail_idx, &vrh->vring.avail->idx);
+	err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
 	if (err) {
 		vringh_bad("Failed to access avail idx at %p",
 			   &vrh->vring.avail->idx);
@@ -49,7 +51,7 @@ static inline int __vringh_get_head(const struct vringh *vrh,
 
 	i = *last_avail_idx & (vrh->vring.num - 1);
 
-	err = getu16(&head, &vrh->vring.avail->ring[i]);
+	err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
 	if (err) {
 		vringh_bad("Failed to read head: idx %d address %p",
 			   *last_avail_idx, &vrh->vring.avail->ring[i]);
@@ -144,28 +146,32 @@ static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
 }
 
 /* No reason for this code to be inline. */
-static int move_to_indirect(int *up_next, u16 *i, void *addr,
+static int move_to_indirect(const struct vringh *vrh,
+			    int *up_next, u16 *i, void *addr,
 			    const struct vring_desc *desc,
 			    struct vring_desc **descs, int *desc_max)
 {
+	u32 len;
+
 	/* Indirect tables can't have indirect. */
 	if (*up_next != -1) {
 		vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
 		return -EINVAL;
 	}
 
-	if (unlikely(desc->len % sizeof(struct vring_desc))) {
+	len = vringh32_to_cpu(vrh, desc->len);
+	if (unlikely(len % sizeof(struct vring_desc))) {
 		vringh_bad("Strange indirect len %u", desc->len);
 		return -EINVAL;
 	}
 
 	/* We will check this when we follow it! */
-	if (desc->flags & VRING_DESC_F_NEXT)
-		*up_next = desc->next;
+	if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
+		*up_next = vringh16_to_cpu(vrh, desc->next);
 	else
 		*up_next = -2;
 	*descs = addr;
-	*desc_max = desc->len / sizeof(struct vring_desc);
+	*desc_max = len / sizeof(struct vring_desc);
 
 	/* Now, start at the first indirect. */
 	*i = 0;
@@ -287,22 +293,25 @@ __vringh_iov(struct vringh *vrh, u16 i,
 		if (unlikely(err))
 			goto fail;
 
-		if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
+		if (unlikely(desc.flags &
+			     cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
+			u64 a = vringh64_to_cpu(vrh, desc.addr);
+
 			/* Make sure it's OK, and get offset. */
-			len = desc.len;
-			if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
+			len = vringh32_to_cpu(vrh, desc.len);
+			if (!rcheck(vrh, a, &len, &range, getrange)) {
 				err = -EINVAL;
 				goto fail;
 			}
 
-			if (unlikely(len != desc.len)) {
+			if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
 				slow = true;
 				/* We need to save this range to use offset */
 				slowrange = range;
 			}
 
-			addr = (void *)(long)(desc.addr + range.offset);
-			err = move_to_indirect(&up_next, &i, addr, &desc,
+			addr = (void *)(long)(a + range.offset);
+			err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
 					       &descs, &desc_max);
 			if (err)
 				goto fail;
@@ -315,7 +324,7 @@ __vringh_iov(struct vringh *vrh, u16 i,
 			goto fail;
 		}
 
-		if (desc.flags & VRING_DESC_F_WRITE)
+		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
 			iov = wiov;
 		else {
 			iov = riov;
@@ -336,12 +345,14 @@ __vringh_iov(struct vringh *vrh, u16 i,
 
 	again:
 		/* Make sure it's OK, and get offset. */
-		len = desc.len;
-		if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
+		len = vringh32_to_cpu(vrh, desc.len);
+		if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
+			    getrange)) {
 			err = -EINVAL;
 			goto fail;
 		}
-		addr = (void *)(unsigned long)(desc.addr + range.offset);
+		addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
+					       range.offset);
 
 		if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
 			err = resize_iovec(iov, gfp);
@@ -353,14 +364,16 @@ __vringh_iov(struct vringh *vrh, u16 i,
 		iov->iov[iov->used].iov_len = len;
 		iov->used++;
 
-		if (unlikely(len != desc.len)) {
-			desc.len -= len;
-			desc.addr += len;
+		if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
+			desc.len = cpu_to_vringh32(vrh,
+				   vringh32_to_cpu(vrh, desc.len) - len);
+			desc.addr = cpu_to_vringh64(vrh,
+				    vringh64_to_cpu(vrh, desc.addr) + len);
 			goto again;
 		}
 
-		if (desc.flags & VRING_DESC_F_NEXT) {
-			i = desc.next;
+		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
+			i = vringh16_to_cpu(vrh, desc.next);
 		} else {
 			/* Just in case we need to finish traversing above. */
 			if (unlikely(up_next > 0)) {
@@ -387,7 +400,8 @@ fail:
 static inline int __vringh_complete(struct vringh *vrh,
 				    const struct vring_used_elem *used,
 				    unsigned int num_used,
-				    int (*putu16)(u16 *p, u16 val),
+				    int (*putu16)(const struct vringh *vrh,
+						  __virtio16 *p, u16 val),
 				    int (*putused)(struct vring_used_elem *dst,
 						   const struct vring_used_elem
 						   *src, unsigned num))
@@ -420,7 +434,7 @@ static inline int __vringh_complete(struct vringh *vrh,
 	/* Make sure buffer is written before we update index. */
 	virtio_wmb(vrh->weak_barriers);
 
-	err = putu16(&vrh->vring.used->idx, used_idx + num_used);
+	err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
 	if (err) {
 		vringh_bad("Failed to update used index at %p",
 			   &vrh->vring.used->idx);
@@ -433,7 +447,9 @@ static inline int __vringh_complete(struct vringh *vrh,
 
 
 static inline int __vringh_need_notify(struct vringh *vrh,
-				       int (*getu16)(u16 *val, const u16 *p))
+				       int (*getu16)(const struct vringh *vrh,
+						     u16 *val,
+						     const __virtio16 *p))
 {
 	bool notify;
 	u16 used_event;
@@ -447,7 +463,7 @@ static inline int __vringh_need_notify(struct vringh *vrh,
 	/* Old-style, without event indices. */
 	if (!vrh->event_indices) {
 		u16 flags;
-		err = getu16(&flags, &vrh->vring.avail->flags);
+		err = getu16(vrh, &flags, &vrh->vring.avail->flags);
 		if (err) {
 			vringh_bad("Failed to get flags at %p",
 				   &vrh->vring.avail->flags);
@@ -457,7 +473,7 @@ static inline int __vringh_need_notify(struct vringh *vrh,
 	}
 
 	/* Modern: we know when other side wants to know. */
-	err = getu16(&used_event, &vring_used_event(&vrh->vring));
+	err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
 	if (err) {
 		vringh_bad("Failed to get used event idx at %p",
 			   &vring_used_event(&vrh->vring));
@@ -478,20 +494,22 @@ static inline int __vringh_need_notify(struct vringh *vrh,
 }
 
 static inline bool __vringh_notify_enable(struct vringh *vrh,
-					  int (*getu16)(u16 *val, const u16 *p),
-					  int (*putu16)(u16 *p, u16 val))
+					  int (*getu16)(const struct vringh *vrh,
+							u16 *val, const __virtio16 *p),
+					  int (*putu16)(const struct vringh *vrh,
+							__virtio16 *p, u16 val))
 {
 	u16 avail;
 
 	if (!vrh->event_indices) {
 		/* Old-school; update flags. */
-		if (putu16(&vrh->vring.used->flags, 0) != 0) {
+		if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
 			vringh_bad("Clearing used flags %p",
 				   &vrh->vring.used->flags);
 			return true;
 		}
 	} else {
-		if (putu16(&vring_avail_event(&vrh->vring),
+		if (putu16(vrh, &vring_avail_event(&vrh->vring),
 			   vrh->last_avail_idx) != 0) {
 			vringh_bad("Updating avail event index %p",
 				   &vring_avail_event(&vrh->vring));
@@ -503,7 +521,7 @@ static inline bool __vringh_notify_enable(struct vringh *vrh,
 	 * sure it's written, then check again. */
 	virtio_mb(vrh->weak_barriers);
 
-	if (getu16(&avail, &vrh->vring.avail->idx) != 0) {
+	if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
 		vringh_bad("Failed to check avail idx at %p",
 			   &vrh->vring.avail->idx);
 		return true;
@@ -516,11 +534,13 @@ static inline bool __vringh_notify_enable(struct vringh *vrh,
 }
 
 static inline void __vringh_notify_disable(struct vringh *vrh,
-					   int (*putu16)(u16 *p, u16 val))
+					   int (*putu16)(const struct vringh *vrh,
+							 __virtio16 *p, u16 val))
 {
 	if (!vrh->event_indices) {
 		/* Old-school; update flags. */
-		if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) {
+		if (putu16(vrh, &vrh->vring.used->flags,
+			   VRING_USED_F_NO_NOTIFY)) {
 			vringh_bad("Setting used flags %p",
 				   &vrh->vring.used->flags);
 		}
@@ -528,14 +548,18 @@ static inline void __vringh_notify_disable(struct vringh *vrh,
 }
 
 /* Userspace access helpers: in this case, addresses are really userspace. */
-static inline int getu16_user(u16 *val, const u16 *p)
+static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
 {
-	return get_user(*val, (__force u16 __user *)p);
+	__virtio16 v = 0;
+	int rc = get_user(v, (__force __virtio16 __user *)p);
+	*val = vringh16_to_cpu(vrh, v);
+	return rc;
 }
 
-static inline int putu16_user(u16 *p, u16 val)
+static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	return put_user(val, (__force u16 __user *)p);
+	__virtio16 v = cpu_to_vringh16(vrh, val);
+	return put_user(v, (__force __virtio16 __user *)p);
 }
 
 static inline int copydesc_user(void *dst, const void *src, size_t len)
@@ -577,7 +601,7 @@ static inline int xfer_to_user(void *dst, void *src, size_t len)
  * Returns an error if num is invalid: you should check pointers
  * yourself!
  */
-int vringh_init_user(struct vringh *vrh, u32 features,
+int vringh_init_user(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc __user *desc,
 		     struct vring_avail __user *avail,
@@ -589,6 +613,7 @@ int vringh_init_user(struct vringh *vrh, u32 features,
 		return -EINVAL;
 	}
 
+	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
 	vrh->weak_barriers = weak_barriers;
 	vrh->completed = 0;
@@ -729,8 +754,8 @@ int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
 {
 	struct vring_used_elem used;
 
-	used.id = head;
-	used.len = len;
+	used.id = cpu_to_vringh32(vrh, head);
+	used.len = cpu_to_vringh32(vrh, len);
 	return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
 }
 EXPORT_SYMBOL(vringh_complete_user);
@@ -792,15 +817,16 @@ int vringh_need_notify_user(struct vringh *vrh)
 EXPORT_SYMBOL(vringh_need_notify_user);
 
 /* Kernelspace access helpers. */
-static inline int getu16_kern(u16 *val, const u16 *p)
+static inline int getu16_kern(const struct vringh *vrh,
+			      u16 *val, const __virtio16 *p)
 {
-	*val = ACCESS_ONCE(*p);
+	*val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p));
 	return 0;
 }
 
-static inline int putu16_kern(u16 *p, u16 val)
+static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	ACCESS_ONCE(*p) = val;
+	ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val);
 	return 0;
 }
 
@@ -836,7 +862,7 @@ static inline int xfer_kern(void *src, void *dst, size_t len)
  *
  * Returns an error if num is invalid.
  */
-int vringh_init_kern(struct vringh *vrh, u32 features,
+int vringh_init_kern(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc *desc,
 		     struct vring_avail *avail,
@@ -848,6 +874,7 @@ int vringh_init_kern(struct vringh *vrh, u32 features,
 		return -EINVAL;
 	}
 
+	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
 	vrh->weak_barriers = weak_barriers;
 	vrh->completed = 0;
@@ -962,8 +989,8 @@ int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
 {
 	struct vring_used_elem used;
 
-	used.id = head;
-	used.len = len;
+	used.id = cpu_to_vringh32(vrh, head);
+	used.len = cpu_to_vringh32(vrh, len);
 
 	return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
 }
diff --git a/drivers/video/fbdev/s3c-fb.c b/drivers/video/fbdev/s3c-fb.c
index a623a4d..7e3a05f 100644
--- a/drivers/video/fbdev/s3c-fb.c
+++ b/drivers/video/fbdev/s3c-fb.c
@@ -1630,7 +1630,7 @@ static int s3c_fb_resume(struct device *dev)
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int s3c_fb_runtime_suspend(struct device *dev)
 {
 	struct s3c_fb *sfb = dev_get_drvdata(dev);
diff --git a/drivers/video/fbdev/sh_mobile_meram.c b/drivers/video/fbdev/sh_mobile_meram.c
index 1d56108..baadfb2 100644
--- a/drivers/video/fbdev/sh_mobile_meram.c
+++ b/drivers/video/fbdev/sh_mobile_meram.c
@@ -569,7 +569,7 @@ EXPORT_SYMBOL_GPL(sh_mobile_meram_cache_update);
  * Power management
  */
 
-#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
+#ifdef CONFIG_PM
 static int sh_mobile_meram_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -612,7 +612,7 @@ static int sh_mobile_meram_resume(struct device *dev)
 		meram_write_reg(priv->base, common_regs[i], priv->regs[i]);
 	return 0;
 }
-#endif /* CONFIG_PM_SLEEP || CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static UNIVERSAL_DEV_PM_OPS(sh_mobile_meram_dev_pm_ops,
 			    sh_mobile_meram_suspend,
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index f226658..b9f70df 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -162,6 +162,27 @@ static void virtio_config_enable(struct virtio_device *dev)
 	spin_unlock_irq(&dev->config_lock);
 }
 
+static int virtio_finalize_features(struct virtio_device *dev)
+{
+	int ret = dev->config->finalize_features(dev);
+	unsigned status;
+
+	if (ret)
+		return ret;
+
+	if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
+		return 0;
+
+	add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+	status = dev->config->get_status(dev);
+	if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
+		dev_err(&dev->dev, "virtio: device refuses features: %x\n",
+			status);
+		return -ENODEV;
+	}
+	return 0;
+}
+
 static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
@@ -170,7 +191,6 @@ static int virtio_dev_probe(struct device *_d)
 	u64 device_features;
 	u64 driver_features;
 	u64 driver_features_legacy;
-	unsigned status;
 
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
@@ -208,21 +228,10 @@ static int virtio_dev_probe(struct device *_d)
 		if (device_features & (1ULL << i))
 			__virtio_set_bit(dev, i);
 
-	err = dev->config->finalize_features(dev);
+	err = virtio_finalize_features(dev);
 	if (err)
 		goto err;
 
-	if (virtio_has_feature(dev, VIRTIO_F_VERSION_1)) {
-		add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
-		status = dev->config->get_status(dev);
-		if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
-			dev_err(_d, "virtio: device refuses features: %x\n",
-			       status);
-			err = -ENODEV;
-			goto err;
-		}
-	}
-
 	err = drv->probe(dev);
 	if (err)
 		goto err;
@@ -372,7 +381,7 @@ int virtio_device_restore(struct virtio_device *dev)
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
-	ret = dev->config->finalize_features(dev);
+	ret = virtio_finalize_features(dev);
 	if (ret)
 		goto err;
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c9703d4..50c5f42 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/balloon_compaction.h>
+#include <linux/oom.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -36,6 +37,12 @@
  */
 #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
+#define OOM_VBALLOON_DEFAULT_PAGES 256
+#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
+
+static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
+module_param(oom_pages, int, S_IRUSR | S_IWUSR);
+MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
 
 struct virtio_balloon
 {
@@ -71,6 +78,9 @@ struct virtio_balloon
 	/* Memory statistics */
 	int need_stats_update;
 	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
+
+	/* To register callback in oom notifier call chain */
+	struct notifier_block nb;
 };
 
 static struct virtio_device_id id_table[] = {
@@ -168,8 +178,9 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
 	}
 }
 
-static void leak_balloon(struct virtio_balloon *vb, size_t num)
+static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
 {
+	unsigned num_freed_pages;
 	struct page *page;
 	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 
@@ -186,6 +197,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
 	}
 
+	num_freed_pages = vb->num_pfns;
 	/*
 	 * Note that if
 	 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
@@ -195,6 +207,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 		tell_host(vb, vb->deflate_vq);
 	mutex_unlock(&vb->balloon_lock);
 	release_pages_by_pfn(vb->pfns, vb->num_pfns);
+	return num_freed_pages;
 }
 
 static inline void update_stat(struct virtio_balloon *vb, int idx,
@@ -287,6 +300,38 @@ static void update_balloon_size(struct virtio_balloon *vb)
 		      &actual);
 }
 
+/*
+ * virtballoon_oom_notify - release pages when system is under severe
+ *			    memory pressure (called from out_of_memory())
+ * @self : notifier block struct
+ * @dummy: not used
+ * @parm : returned - number of freed pages
+ *
+ * The balancing of memory by use of the virtio balloon should not cause
+ * the termination of processes while there are pages in the balloon.
+ * If virtio balloon manages to release some memory, it will make the
+ * system return and retry the allocation that forced the OOM killer
+ * to run.
+ */
+static int virtballoon_oom_notify(struct notifier_block *self,
+				  unsigned long dummy, void *parm)
+{
+	struct virtio_balloon *vb;
+	unsigned long *freed;
+	unsigned num_freed_pages;
+
+	vb = container_of(self, struct virtio_balloon, nb);
+	if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
+		return NOTIFY_OK;
+
+	freed = parm;
+	num_freed_pages = leak_balloon(vb, oom_pages);
+	update_balloon_size(vb);
+	*freed += num_freed_pages;
+
+	return NOTIFY_OK;
+}
+
 static int balloon(void *_vballoon)
 {
 	struct virtio_balloon *vb = _vballoon;
@@ -443,6 +488,12 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	if (err)
 		goto out_free_vb;
 
+	vb->nb.notifier_call = virtballoon_oom_notify;
+	vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY;
+	err = register_oom_notifier(&vb->nb);
+	if (err < 0)
+		goto out_oom_notify;
+
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
 		err = PTR_ERR(vb->thread);
@@ -452,6 +503,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	return 0;
 
 out_del_vqs:
+	unregister_oom_notifier(&vb->nb);
+out_oom_notify:
 	vdev->config->del_vqs(vdev);
 out_free_vb:
 	kfree(vb);
@@ -476,6 +529,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb = vdev->priv;
 
+	unregister_oom_notifier(&vb->nb);
 	kthread_stop(vb->thread);
 	remove_common(vb);
 	kfree(vb);
@@ -515,6 +569,7 @@ static int virtballoon_restore(struct virtio_device *vdev)
 static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
+	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 953057d..2ef9529 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -458,7 +458,44 @@ static int virtio_pci_restore(struct device *dev)
 	return virtio_device_restore(&vp_dev->vdev);
 }
 
-const struct dev_pm_ops virtio_pci_pm_ops = {
+static const struct dev_pm_ops virtio_pci_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
 };
 #endif
+
+
+/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
+static const struct pci_device_id virtio_pci_id_table[] = {
+	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
+
+static int virtio_pci_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	return virtio_pci_legacy_probe(pci_dev, id);
+}
+
+static void virtio_pci_remove(struct pci_dev *pci_dev)
+{
+     virtio_pci_legacy_remove(pci_dev);
+}
+
+static struct pci_driver virtio_pci_driver = {
+	.name		= "virtio-pci",
+	.id_table	= virtio_pci_id_table,
+	.probe		= virtio_pci_probe,
+	.remove		= virtio_pci_remove,
+#ifdef CONFIG_PM_SLEEP
+	.driver.pm	= &virtio_pci_pm_ops,
+#endif
+};
+
+module_pci_driver(virtio_pci_driver);
+
+MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
+MODULE_DESCRIPTION("virtio-pci");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index d840dad..adddb64 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -27,7 +27,6 @@
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
-#define VIRTIO_PCI_NO_LEGACY
 #include <linux/virtio_pci.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
@@ -129,8 +128,8 @@ const char *vp_bus_name(struct virtio_device *vdev);
 int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
 void virtio_pci_release_dev(struct device *);
 
-#ifdef CONFIG_PM_SLEEP
-extern const struct dev_pm_ops virtio_pci_pm_ops;
-#endif
+int virtio_pci_legacy_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id);
+void virtio_pci_legacy_remove(struct pci_dev *pci_dev);
 
 #endif
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 2588252..6c76f0f 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -19,14 +19,6 @@
 
 #include "virtio_pci_common.h"
 
-/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
-static const struct pci_device_id virtio_pci_id_table[] = {
-	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
-	{ 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
-
 /* virtio config->get_features() implementation */
 static u64 vp_get_features(struct virtio_device *vdev)
 {
@@ -220,7 +212,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 };
 
 /* the PCI probing function */
-static int virtio_pci_probe(struct pci_dev *pci_dev,
+int virtio_pci_legacy_probe(struct pci_dev *pci_dev,
 			    const struct pci_device_id *id)
 {
 	struct virtio_pci_device *vp_dev;
@@ -300,7 +292,7 @@ out:
 	return err;
 }
 
-static void virtio_pci_remove(struct pci_dev *pci_dev)
+void virtio_pci_legacy_remove(struct pci_dev *pci_dev)
 {
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 
@@ -312,15 +304,3 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
 	pci_disable_device(pci_dev);
 	kfree(vp_dev);
 }
-
-static struct pci_driver virtio_pci_driver = {
-	.name		= "virtio-pci",
-	.id_table	= virtio_pci_id_table,
-	.probe		= virtio_pci_probe,
-	.remove		= virtio_pci_remove,
-#ifdef CONFIG_PM_SLEEP
-	.driver.pm	= &virtio_pci_pm_ops,
-#endif
-};
-
-module_pci_driver(virtio_pci_driver);
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 65b84d8..d6add51 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -326,6 +326,52 @@ static void imx2_wdt_shutdown(struct platform_device *pdev)
 	}
 }
 
+#ifdef CONFIG_PM_SLEEP
+/* Disable watchdog if it is active during suspend */
+static int imx2_wdt_suspend(struct device *dev)
+{
+	struct watchdog_device *wdog = dev_get_drvdata(dev);
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+
+	imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
+	imx2_wdt_ping(wdog);
+
+	/* Watchdog has been stopped but IP block is still running */
+	if (!watchdog_active(wdog) && imx2_wdt_is_running(wdev))
+		del_timer_sync(&wdev->timer);
+
+	clk_disable_unprepare(wdev->clk);
+
+	return 0;
+}
+
+/* Enable watchdog and configure it if necessary */
+static int imx2_wdt_resume(struct device *dev)
+{
+	struct watchdog_device *wdog = dev_get_drvdata(dev);
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+
+	clk_prepare_enable(wdev->clk);
+
+	if (watchdog_active(wdog) && !imx2_wdt_is_running(wdev)) {
+		/* Resumes from deep sleep we need restart
+		 * the watchdog again.
+		 */
+		imx2_wdt_setup(wdog);
+		imx2_wdt_set_timeout(wdog, wdog->timeout);
+		imx2_wdt_ping(wdog);
+	} else if (imx2_wdt_is_running(wdev)) {
+		imx2_wdt_ping(wdog);
+		mod_timer(&wdev->timer, jiffies + wdog->timeout * HZ / 2);
+	}
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(imx2_wdt_pm_ops, imx2_wdt_suspend,
+			 imx2_wdt_resume);
+
 static const struct of_device_id imx2_wdt_dt_ids[] = {
 	{ .compatible = "fsl,imx21-wdt", },
 	{ /* sentinel */ }
@@ -337,6 +383,7 @@ static struct platform_driver imx2_wdt_driver = {
 	.shutdown	= imx2_wdt_shutdown,
 	.driver		= {
 		.name	= DRIVER_NAME,
+		.pm     = &imx2_wdt_pm_ops,
 		.of_match_table = imx2_wdt_dt_ids,
 	},
 };
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 50610a6..e999496e 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -606,7 +606,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 	init_waitqueue_head(&tmr->tmr_wait);
 
 	transport_init_se_cmd(se_cmd, tpg->se_tpg.se_tpg_tfo,
-		tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, MSG_SIMPLE_TAG,
+		tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG,
 		&pending_req->sense_buffer[0]);
 
 	rc = core_tmr_alloc_req(se_cmd, tmr, act, GFP_KERNEL);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c04ef1d..97aff28 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -254,6 +254,7 @@ static char *scanarg(char *s, char del)
 				return NULL;
 		}
 	}
+	s[-1] ='\0';
 	return s;
 }
 
@@ -378,8 +379,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		p = scanarg(p, del);
 		if (!p)
 			goto einval;
-		p[-1] = '\0';
-		if (p == e->magic)
+		if (!e->magic[0])
 			goto einval;
 		if (USE_DEBUG)
 			print_hex_dump_bytes(
@@ -391,8 +391,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
 		p = scanarg(p, del);
 		if (!p)
 			goto einval;
-		p[-1] = '\0';
-		if (p == e->mask) {
+		if (!e->mask[0]) {
 			e->mask = NULL;
 			pr_debug("register:  mask[raw]: none\n");
 		} else if (USE_DEBUG)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e6fbbd7..7e60741 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3481,8 +3481,8 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
 int btrfs_error_unpin_extent_range(struct btrfs_root *root,
 				   u64 start, u64 end);
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-			       u64 num_bytes, u64 *actual_bytes);
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+			 u64 num_bytes, u64 *actual_bytes);
 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root, u64 type);
 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3096512..8c63419 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4121,12 +4121,6 @@ again:
 		if (ret)
 			break;
 
-		/* opt_discard */
-		if (btrfs_test_opt(root, DISCARD))
-			ret = btrfs_error_discard_extent(root, start,
-							 end + 1 - start,
-							 NULL);
-
 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
 		btrfs_error_unpin_extent_range(root, start, end);
 		cond_resched();
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 222d6ae..a80b971 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1889,8 +1889,8 @@ static int btrfs_issue_discard(struct block_device *bdev,
 	return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
 }
 
-static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
-				u64 num_bytes, u64 *actual_bytes)
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+			 u64 num_bytes, u64 *actual_bytes)
 {
 	int ret;
 	u64 discarded_bytes = 0;
@@ -5727,7 +5727,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
 	update_global_block_rsv(fs_info);
 }
 
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+			      const bool return_free_space)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_block_group_cache *cache = NULL;
@@ -5751,7 +5752,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 
 		if (start < cache->last_byte_to_unpin) {
 			len = min(len, cache->last_byte_to_unpin - start);
-			btrfs_add_free_space(cache, start, len);
+			if (return_free_space)
+				btrfs_add_free_space(cache, start, len);
 		}
 
 		start += len;
@@ -5815,7 +5817,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 						   end + 1 - start, NULL);
 
 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
-		unpin_extent_range(root, start, end);
+		unpin_extent_range(root, start, end, true);
 		cond_resched();
 	}
 
@@ -8872,6 +8874,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 				       cache_node);
 		rb_erase(&block_group->cache_node,
 			 &info->block_group_cache_tree);
+		RB_CLEAR_NODE(&block_group->cache_node);
 		spin_unlock(&info->block_group_cache_lock);
 
 		down_write(&block_group->space_info->groups_sem);
@@ -9130,6 +9133,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 			spin_lock(&info->block_group_cache_lock);
 			rb_erase(&cache->cache_node,
 				 &info->block_group_cache_tree);
+			RB_CLEAR_NODE(&cache->cache_node);
 			spin_unlock(&info->block_group_cache_lock);
 			btrfs_put_block_group(cache);
 			goto error;
@@ -9271,6 +9275,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 		spin_lock(&root->fs_info->block_group_cache_lock);
 		rb_erase(&cache->cache_node,
 			 &root->fs_info->block_group_cache_tree);
+		RB_CLEAR_NODE(&cache->cache_node);
 		spin_unlock(&root->fs_info->block_group_cache_lock);
 		btrfs_put_block_group(cache);
 		return ret;
@@ -9690,13 +9695,7 @@ out:
 
 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 {
-	return unpin_extent_range(root, start, end);
-}
-
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-			       u64 num_bytes, u64 *actual_bytes)
-{
-	return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
+	return unpin_extent_range(root, start, end, false);
 }
 
 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 030847b..d6c03f7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2966,8 +2966,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
 	spin_unlock(&block_group->lock);
 	spin_unlock(&space_info->lock);
 
-	ret = btrfs_error_discard_extent(fs_info->extent_root,
-					 start, bytes, &trimmed);
+	ret = btrfs_discard_extent(fs_info->extent_root,
+				   start, bytes, &trimmed);
 	if (!ret)
 		*total_trimmed += trimmed;
 
@@ -3185,16 +3185,18 @@ out:
 
 		spin_unlock(&block_group->lock);
 
+		lock_chunks(block_group->fs_info->chunk_root);
 		em_tree = &block_group->fs_info->mapping_tree.map_tree;
 		write_lock(&em_tree->lock);
 		em = lookup_extent_mapping(em_tree, block_group->key.objectid,
 					   1);
 		BUG_ON(!em); /* logic error, can't happen */
+		/*
+		 * remove_extent_mapping() will delete us from the pinned_chunks
+		 * list, which is protected by the chunk mutex.
+		 */
 		remove_extent_mapping(em_tree, em);
 		write_unlock(&em_tree->lock);
-
-		lock_chunks(block_group->fs_info->chunk_root);
-		list_del_init(&em->list);
 		unlock_chunks(block_group->fs_info->chunk_root);
 
 		/* once for us and once for the tree */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0144790..50c5a87 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1485,7 +1485,7 @@ static void update_dev_time(char *path_name)
 	struct file *filp;
 
 	filp = filp_open(path_name, O_RDWR, 0);
-	if (!filp)
+	if (IS_ERR(filp))
 		return;
 	file_update_time(filp);
 	filp_close(filp, NULL);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 18c06bb..f5013d9 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 	struct ceph_osd_client *osdc =
 		&ceph_inode_to_client(inode)->client->osdc;
 	int err = 0;
+	u64 off = page_offset(page);
 	u64 len = PAGE_CACHE_SIZE;
 
-	err = ceph_readpage_from_fscache(inode, page);
+	if (off >= i_size_read(inode)) {
+		zero_user_segment(page, err, PAGE_CACHE_SIZE);
+		SetPageUptodate(page);
+		return 0;
+	}
 
+	/*
+	 * Uptodate inline data should have been added into page cache
+	 * while getting Fcr caps.
+	 */
+	if (ci->i_inline_version != CEPH_INLINE_NONE)
+		return -EINVAL;
+
+	err = ceph_readpage_from_fscache(inode, page);
 	if (err == 0)
 		goto out;
 
 	dout("readpage inode %p file %p page %p index %lu\n",
 	     inode, filp, page, page->index);
 	err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
-				  (u64) page_offset(page), &len,
+				  off, &len,
 				  ci->i_truncate_seq, ci->i_truncate_size,
 				  &page, 1, 0);
 	if (err == -ENOENT)
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 	     off, len);
 	vino = ceph_vino(inode);
 	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
-				    1, CEPH_OSD_OP_READ,
+				    0, 1, CEPH_OSD_OP_READ,
 				    CEPH_OSD_FLAG_READ, NULL,
 				    ci->i_truncate_seq, ci->i_truncate_size,
 				    false);
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 	int rc = 0;
 	int max = 0;
 
+	if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
+		return -EINVAL;
+
 	rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
 					 &nr_pages);
 
@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	int rc = 0;
 	unsigned wsize = 1 << inode->i_blkbits;
 	struct ceph_osd_request *req = NULL;
-	int do_sync;
+	int do_sync = 0;
 	u64 truncate_size, snap_size;
 	u32 truncate_seq;
 
@@ -750,7 +766,6 @@ retry:
 	last_snapc = snapc;
 
 	while (!done && index <= end) {
-		int num_ops = do_sync ? 2 : 1;
 		unsigned i;
 		int first;
 		pgoff_t next;
@@ -850,7 +865,8 @@ get_more_pages:
 				len = wsize;
 				req = ceph_osdc_new_request(&fsc->client->osdc,
 							&ci->i_layout, vino,
-							offset, &len, num_ops,
+							offset, &len, 0,
+							do_sync ? 2 : 1,
 							CEPH_OSD_OP_WRITE,
 							CEPH_OSD_FLAG_WRITE |
 							CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +878,9 @@ get_more_pages:
 					break;
 				}
 
+				if (do_sync)
+					osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
 				req->r_callback = writepages_finish;
 				req->r_inode = inode;
 
@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct inode *inode = file_inode(vma->vm_file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_info *fi = vma->vm_file->private_data;
+	struct page *pinned_page = NULL;
 	loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
 	int want, got, ret;
 
@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		want = CEPH_CAP_FILE_CACHE;
 	while (1) {
 		got = 0;
-		ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+		ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want,
+				    -1, &got, &pinned_page);
 		if (ret == 0)
 			break;
 		if (ret != -ERESTARTSYS) {
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
 	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
 
-	ret = filemap_fault(vma, vmf);
+	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
+	    ci->i_inline_version == CEPH_INLINE_NONE)
+		ret = filemap_fault(vma, vmf);
+	else
+		ret = -EAGAIN;
 
 	dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
 	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+	if (pinned_page)
+		page_cache_release(pinned_page);
 	ceph_put_cap_refs(ci, got);
 
+	if (ret != -EAGAIN)
+		return ret;
+
+	/* read inline data */
+	if (off >= PAGE_CACHE_SIZE) {
+		/* does not support inline data > PAGE_SIZE */
+		ret = VM_FAULT_SIGBUS;
+	} else {
+		int ret1;
+		struct address_space *mapping = inode->i_mapping;
+		struct page *page = find_or_create_page(mapping, 0,
+						mapping_gfp_mask(mapping) &
+						~__GFP_FS);
+		if (!page) {
+			ret = VM_FAULT_OOM;
+			goto out;
+		}
+		ret1 = __ceph_do_getattr(inode, page,
+					 CEPH_STAT_CAP_INLINE_DATA, true);
+		if (ret1 < 0 || off >= i_size_read(inode)) {
+			unlock_page(page);
+			page_cache_release(page);
+			ret = VM_FAULT_SIGBUS;
+			goto out;
+		}
+		if (ret1 < PAGE_CACHE_SIZE)
+			zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
+		else
+			flush_dcache_page(page);
+		SetPageUptodate(page);
+		vmf->page = page;
+		ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
+	}
+out:
+	dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
+	     inode, off, (size_t)PAGE_CACHE_SIZE, ret);
 	return ret;
 }
 
@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	size_t len;
 	int want, got, ret;
 
+	if (ci->i_inline_version != CEPH_INLINE_NONE) {
+		struct page *locked_page = NULL;
+		if (off == 0) {
+			lock_page(page);
+			locked_page = page;
+		}
+		ret = ceph_uninline_data(vma->vm_file, locked_page);
+		if (locked_page)
+			unlock_page(locked_page);
+		if (ret < 0)
+			return VM_FAULT_SIGBUS;
+	}
+
 	if (off + PAGE_CACHE_SIZE <= size)
 		len = PAGE_CACHE_SIZE;
 	else
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		want = CEPH_CAP_FILE_BUFFER;
 	while (1) {
 		got = 0;
-		ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
+		ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+				    &got, NULL);
 		if (ret == 0)
 			break;
 		if (ret != -ERESTARTSYS) {
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 			ret = VM_FAULT_SIGBUS;
 	}
 out:
-	if (ret != VM_FAULT_LOCKED) {
+	if (ret != VM_FAULT_LOCKED)
 		unlock_page(page);
-	} else {
+	if (ret == VM_FAULT_LOCKED ||
+	    ci->i_inline_version != CEPH_INLINE_NONE) {
 		int dirty;
 		spin_lock(&ci->i_ceph_lock);
+		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
@@ -1315,6 +1394,178 @@ out:
 	return ret;
 }
 
+void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+			   char	*data, size_t len)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct page *page;
+
+	if (locked_page) {
+		page = locked_page;
+	} else {
+		if (i_size_read(inode) == 0)
+			return;
+		page = find_or_create_page(mapping, 0,
+					   mapping_gfp_mask(mapping) & ~__GFP_FS);
+		if (!page)
+			return;
+		if (PageUptodate(page)) {
+			unlock_page(page);
+			page_cache_release(page);
+			return;
+		}
+	}
+
+	dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
+	     inode, ceph_vinop(inode), len, locked_page);
+
+	if (len > 0) {
+		void *kaddr = kmap_atomic(page);
+		memcpy(kaddr, data, len);
+		kunmap_atomic(kaddr);
+	}
+
+	if (page != locked_page) {
+		if (len < PAGE_CACHE_SIZE)
+			zero_user_segment(page, len, PAGE_CACHE_SIZE);
+		else
+			flush_dcache_page(page);
+
+		SetPageUptodate(page);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+}
+
+int ceph_uninline_data(struct file *filp, struct page *locked_page)
+{
+	struct inode *inode = file_inode(filp);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_osd_request *req;
+	struct page *page = NULL;
+	u64 len, inline_version;
+	int err = 0;
+	bool from_pagecache = false;
+
+	spin_lock(&ci->i_ceph_lock);
+	inline_version = ci->i_inline_version;
+	spin_unlock(&ci->i_ceph_lock);
+
+	dout("uninline_data %p %llx.%llx inline_version %llu\n",
+	     inode, ceph_vinop(inode), inline_version);
+
+	if (inline_version == 1 || /* initial version, no data */
+	    inline_version == CEPH_INLINE_NONE)
+		goto out;
+
+	if (locked_page) {
+		page = locked_page;
+		WARN_ON(!PageUptodate(page));
+	} else if (ceph_caps_issued(ci) &
+		   (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
+		page = find_get_page(inode->i_mapping, 0);
+		if (page) {
+			if (PageUptodate(page)) {
+				from_pagecache = true;
+				lock_page(page);
+			} else {
+				page_cache_release(page);
+				page = NULL;
+			}
+		}
+	}
+
+	if (page) {
+		len = i_size_read(inode);
+		if (len > PAGE_CACHE_SIZE)
+			len = PAGE_CACHE_SIZE;
+	} else {
+		page = __page_cache_alloc(GFP_NOFS);
+		if (!page) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = __ceph_do_getattr(inode, page,
+					CEPH_STAT_CAP_INLINE_DATA, true);
+		if (err < 0) {
+			/* no inline data */
+			if (err == -ENODATA)
+				err = 0;
+			goto out;
+		}
+		len = err;
+	}
+
+	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+				    ceph_vino(inode), 0, &len, 0, 1,
+				    CEPH_OSD_OP_CREATE,
+				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    ci->i_snap_realm->cached_context,
+				    0, 0, false);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto out;
+	}
+
+	ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+	if (!err)
+		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+	ceph_osdc_put_request(req);
+	if (err < 0)
+		goto out;
+
+	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+				    ceph_vino(inode), 0, &len, 1, 3,
+				    CEPH_OSD_OP_WRITE,
+				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    ci->i_snap_realm->cached_context,
+				    ci->i_truncate_seq, ci->i_truncate_size,
+				    false);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto out;
+	}
+
+	osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
+
+	err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+				    "inline_version", &inline_version,
+				    sizeof(inline_version),
+				    CEPH_OSD_CMPXATTR_OP_GT,
+				    CEPH_OSD_CMPXATTR_MODE_U64);
+	if (err)
+		goto out_put;
+
+	err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+				    "inline_version", &inline_version,
+				    sizeof(inline_version), 0, 0);
+	if (err)
+		goto out_put;
+
+	ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+	if (!err)
+		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+out_put:
+	ceph_osdc_put_request(req);
+	if (err == -ECANCELED)
+		err = 0;
+out:
+	if (page && page != locked_page) {
+		if (from_pagecache) {
+			unlock_page(page);
+			page_cache_release(page);
+		} else
+			__free_pages(page, 0);
+	}
+
+	dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
+	     inode, ceph_vinop(inode), inline_version, err);
+	return err;
+}
+
 static struct vm_operations_struct ceph_vmops = {
 	.fault		= ceph_filemap_fault,
 	.page_mkwrite	= ceph_page_mkwrite,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cefca66..b93c631 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session,
 			kuid_t uid, kgid_t gid, umode_t mode,
 			u64 xattr_version,
 			struct ceph_buffer *xattrs_buf,
-			u64 follows)
+			u64 follows, bool inline_data)
 {
 	struct ceph_mds_caps *fc;
 	struct ceph_msg *msg;
+	void *p;
+	size_t extra_len;
 
 	dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
 	     " seq %u/%u mseq %u follows %lld size %llu/%llu"
@@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
 	     seq, issue_seq, mseq, follows, size, max_size,
 	     xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
 
-	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
+	/* flock buffer size + inline version + inline data size */
+	extra_len = 4 + 8 + 4;
+	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
+			   GFP_NOFS, false);
 	if (!msg)
 		return -ENOMEM;
 
@@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session,
 	fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
 	fc->mode = cpu_to_le32(mode);
 
+	p = fc + 1;
+	/* flock buffer size */
+	ceph_encode_32(&p, 0);
+	/* inline version */
+	ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
+	/* inline data size */
+	ceph_encode_32(&p, 0);
+
 	fc->xattr_version = cpu_to_le64(xattr_version);
 	if (xattrs_buf) {
 		msg->middle = ceph_buffer_get(xattrs_buf);
@@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	u64 flush_tid = 0;
 	int i;
 	int ret;
+	bool inline_data;
 
 	held = cap->issued | cap->implemented;
 	revoking = cap->implemented & ~cap->issued;
@@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 		xattr_version = ci->i_xattrs.version;
 	}
 
+	inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
 	spin_unlock(&ci->i_ceph_lock);
 
 	ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
 		op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
 		size, max_size, &mtime, &atime, time_warp_seq,
 		uid, gid, mode, xattr_version, xattr_blob,
-		follows);
+		follows, inline_data);
 	if (ret < 0) {
 		dout("error sending cap msg, must requeue %p\n", inode);
 		delayed = 1;
@@ -1336,7 +1352,7 @@ retry:
 			     capsnap->time_warp_seq,
 			     capsnap->uid, capsnap->gid, capsnap->mode,
 			     capsnap->xattr_version, capsnap->xattr_blob,
-			     capsnap->follows);
+			     capsnap->follows, capsnap->inline_data);
 
 		next_follows = capsnap->follows + 1;
 		ceph_put_cap_snap(capsnap);
@@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
  * requested from the MDS.
  */
 static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
-			    int *got, loff_t endoff, int *check_max, int *err)
+			    loff_t endoff, int *got, struct page **pinned_page,
+			    int *check_max, int *err)
 {
 	struct inode *inode = &ci->vfs_inode;
 	int ret = 0;
-	int have, implemented;
+	int have, implemented, _got = 0;
 	int file_wanted;
 
 	dout("get_cap_refs %p need %s want %s\n", inode,
 	     ceph_cap_string(need), ceph_cap_string(want));
+again:
 	spin_lock(&ci->i_ceph_lock);
 
 	/* make sure file is actually open */
@@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 		     ceph_cap_string(need), ceph_cap_string(file_wanted));
 		*err = -EBADF;
 		ret = 1;
-		goto out;
+		goto out_unlock;
 	}
 
 	/* finish pending truncate */
@@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 				*check_max = 1;
 				ret = 1;
 			}
-			goto out;
+			goto out_unlock;
 		}
 		/*
 		 * If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 		 */
 		if (__ceph_have_pending_cap_snap(ci)) {
 			dout("get_cap_refs %p cap_snap_pending\n", inode);
-			goto out;
+			goto out_unlock;
 		}
 	}
 
@@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 		     inode, ceph_cap_string(have), ceph_cap_string(not),
 		     ceph_cap_string(revoking));
 		if ((revoking & not) == 0) {
-			*got = need | (have & want);
-			__take_cap_refs(ci, *got);
+			_got = need | (have & want);
+			__take_cap_refs(ci, _got);
 			ret = 1;
 		}
 	} else {
 		dout("get_cap_refs %p have %s needed %s\n", inode,
 		     ceph_cap_string(have), ceph_cap_string(need));
 	}
-out:
+out_unlock:
 	spin_unlock(&ci->i_ceph_lock);
+
+	if (ci->i_inline_version != CEPH_INLINE_NONE &&
+	    (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+	    i_size_read(inode) > 0) {
+		int ret1;
+		struct page *page = find_get_page(inode->i_mapping, 0);
+		if (page) {
+			if (PageUptodate(page)) {
+				*pinned_page = page;
+				goto out;
+			}
+			page_cache_release(page);
+		}
+		/*
+		 * drop cap refs first because getattr while holding
+		 * caps refs can cause deadlock.
+		 */
+		ceph_put_cap_refs(ci, _got);
+		_got = 0;
+
+		/* getattr request will bring inline data into page cache */
+		ret1 = __ceph_do_getattr(inode, NULL,
+					 CEPH_STAT_CAP_INLINE_DATA, true);
+		if (ret1 >= 0) {
+			ret = 0;
+			goto again;
+		}
+		*err = ret1;
+		ret = 1;
+	}
+out:
 	dout("get_cap_refs %p ret %d got %s\n", inode,
-	     ret, ceph_cap_string(*got));
+	     ret, ceph_cap_string(_got));
+	*got = _got;
 	return ret;
 }
 
@@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
  * due to a small max_size, make sure we check_max_size (and possibly
  * ask the mds) so we don't get hung up indefinitely.
  */
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
-		  loff_t endoff)
+int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+		  loff_t endoff, int *got, struct page **pinned_page)
 {
 	int check_max, ret, err;
 
@@ -2179,8 +2229,8 @@ retry:
 	check_max = 0;
 	err = 0;
 	ret = wait_event_interruptible(ci->i_cap_wq,
-				       try_get_cap_refs(ci, need, want,
-							got, endoff,
+				       try_get_cap_refs(ci, need, want, endoff,
+							got, pinned_page,
 							&check_max, &err));
 	if (err)
 		ret = err;
@@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode)
 static void handle_cap_grant(struct ceph_mds_client *mdsc,
 			     struct inode *inode, struct ceph_mds_caps *grant,
 			     void *snaptrace, int snaptrace_len,
+			     u64 inline_version,
+			     void *inline_data, int inline_len,
 			     struct ceph_buffer *xattr_buf,
 			     struct ceph_mds_session *session,
 			     struct ceph_cap *cap, int issued)
@@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 	bool queue_invalidate = false;
 	bool queue_revalidate = false;
 	bool deleted_inode = false;
+	bool fill_inline = false;
 
 	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
 	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 	}
 	BUG_ON(cap->issued & ~cap->implemented);
 
+	if (inline_version > 0 && inline_version >= ci->i_inline_version) {
+		ci->i_inline_version = inline_version;
+		if (ci->i_inline_version != CEPH_INLINE_NONE &&
+		    (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+			fill_inline = true;
+	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 			wake = true;
 	}
 
+	if (fill_inline)
+		ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
+
 	if (queue_trunc) {
 		ceph_queue_vmtruncate(inode);
 		ceph_queue_revalidate(inode);
@@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	u64 cap_id;
 	u64 size, max_size;
 	u64 tid;
+	u64 inline_version = 0;
+	void *inline_data = NULL;
+	u32  inline_len = 0;
 	void *snaptrace;
 	size_t snaptrace_len;
-	void *flock;
-	void *end;
-	u32 flock_len;
+	void *p, *end;
 
 	dout("handle_caps from mds%d\n", mds);
 
@@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
 	snaptrace = h + 1;
 	snaptrace_len = le32_to_cpu(h->snap_trace_len);
+	p = snaptrace + snaptrace_len;
 
 	if (le16_to_cpu(msg->hdr.version) >= 2) {
-		void *p = snaptrace + snaptrace_len;
+		u32 flock_len;
 		ceph_decode_32_safe(&p, end, flock_len, bad);
 		if (p + flock_len > end)
 			goto bad;
-		flock = p;
-	} else {
-		flock = NULL;
-		flock_len = 0;
+		p += flock_len;
 	}
 
 	if (le16_to_cpu(msg->hdr.version) >= 3) {
 		if (op == CEPH_CAP_OP_IMPORT) {
-			void *p = flock + flock_len;
 			if (p + sizeof(*peer) > end)
 				goto bad;
 			peer = p;
+			p += sizeof(*peer);
 		} else if (op == CEPH_CAP_OP_EXPORT) {
 			/* recorded in unused fields */
 			peer = (void *)&h->size;
 		}
 	}
 
+	if (le16_to_cpu(msg->hdr.version) >= 4) {
+		ceph_decode_64_safe(&p, end, inline_version, bad);
+		ceph_decode_32_safe(&p, end, inline_len, bad);
+		if (p + inline_len > end)
+			goto bad;
+		inline_data = p;
+		p += inline_len;
+	}
+
 	/* lookup ino */
 	inode = ceph_find_inode(sb, vino);
 	ci = ceph_inode(inode);
@@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 		handle_cap_import(mdsc, inode, h, peer, session,
 				  &cap, &issued);
 		handle_cap_grant(mdsc, inode, h,  snaptrace, snaptrace_len,
+				 inline_version, inline_data, inline_len,
 				 msg->middle, session, cap, issued);
 		goto done_unlocked;
 	}
@@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	case CEPH_CAP_OP_GRANT:
 		__ceph_caps_issued(ci, &issued);
 		issued |= __ceph_caps_dirty(ci);
-		handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
-				 session, cap, issued);
+		handle_cap_grant(mdsc, inode, h, NULL, 0,
+				 inline_version, inline_data, inline_len,
+				 msg->middle, session, cap, issued);
 		goto done_unlocked;
 
 	case CEPH_CAP_OP_FLUSH_ACK:
@@ -3137,8 +3210,7 @@ flush_cap_releases:
 done:
 	mutex_unlock(&session->s_mutex);
 done_unlocked:
-	if (inode)
-		iput(inode);
+	iput(inode);
 	return;
 
 bad:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 681a853..c241603 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -183,7 +183,7 @@ more:
 	spin_unlock(&parent->d_lock);
 
 	/* make sure a dentry wasn't dropped while we didn't have parent lock */
-	if (!ceph_dir_is_complete(dir)) {
+	if (!ceph_dir_is_complete_ordered(dir)) {
 		dout(" lost dir complete on %p; falling back to mds\n", dir);
 		dput(dentry);
 		err = -EAGAIN;
@@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 
 	/* always start with . and .. */
 	if (ctx->pos == 0) {
-		/* note dir version at start of readdir so we can tell
-		 * if any dentries get dropped */
-		fi->dir_release_count = atomic_read(&ci->i_release_count);
-
 		dout("readdir off 0 -> '.'\n");
 		if (!dir_emit(ctx, ".", 1, 
 			    ceph_translate_ino(inode->i_sb, inode->i_ino),
@@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	if ((ctx->pos == 2 || fi->dentry) &&
 	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
 	    ceph_snap(inode) != CEPH_SNAPDIR &&
-	    __ceph_dir_is_complete(ci) &&
+	    __ceph_dir_is_complete_ordered(ci) &&
 	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
 		u32 shared_gen = ci->i_shared_gen;
 		spin_unlock(&ci->i_ceph_lock);
@@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 
 	/* proceed with a normal readdir */
 
+	if (ctx->pos == 2) {
+		/* note dir version at start of readdir so we can tell
+		 * if any dentries get dropped */
+		fi->dir_release_count = atomic_read(&ci->i_release_count);
+		fi->dir_ordered_count = ci->i_ordered_count;
+	}
+
 more:
 	/* do we have the correct frag content buffered? */
 	if (fi->frag != frag || fi->last_readdir == NULL) {
@@ -446,8 +449,12 @@ more:
 	 */
 	spin_lock(&ci->i_ceph_lock);
 	if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
-		dout(" marking %p complete\n", inode);
-		__ceph_dir_set_complete(ci, fi->dir_release_count);
+		if (ci->i_ordered_count == fi->dir_ordered_count)
+			dout(" marking %p complete and ordered\n", inode);
+		else
+			dout(" marking %p complete\n", inode);
+		__ceph_dir_set_complete(ci, fi->dir_release_count,
+					fi->dir_ordered_count);
 	}
 	spin_unlock(&ci->i_ceph_lock);
 
@@ -805,7 +812,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		acls.pagelist = NULL;
 	}
 	err = ceph_mdsc_do_request(mdsc, dir, req);
-	if (!err && !req->r_reply_info.head->is_dentry)
+	if (!err &&
+	    !req->r_reply_info.head->is_target &&
+	    !req->r_reply_info.head->is_dentry)
 		err = ceph_handle_notrace_create(dir, dentry);
 	ceph_mdsc_put_request(req);
 out:
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9f8e357..ce74b39 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -333,6 +333,11 @@ int ceph_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+enum {
+	CHECK_EOF = 1,
+	READ_INLINE = 2,
+};
+
 /*
  * Read a range of bytes striped over one or more objects.  Iterate over
  * objects we stripe over.  (That's not atomic, but good enough for now.)
@@ -412,7 +417,7 @@ more:
 		ret = read;
 		/* did we bounce off eof? */
 		if (pos + left > inode->i_size)
-			*checkeof = 1;
+			*checkeof = CHECK_EOF;
 	}
 
 	dout("striped_read returns %d\n", ret);
@@ -598,7 +603,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 		snapc = ci->i_snap_realm->cached_context;
 		vino = ceph_vino(inode);
 		req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-					    vino, pos, &len,
+					    vino, pos, &len, 0,
 					    2,/*include a 'startsync' command*/
 					    CEPH_OSD_OP_WRITE, flags, snapc,
 					    ci->i_truncate_seq,
@@ -609,6 +614,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 			break;
 		}
 
+		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
 		n = iov_iter_get_pages_alloc(from, &pages, len, &start);
 		if (unlikely(n < 0)) {
 			ret = n;
@@ -713,7 +720,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 		snapc = ci->i_snap_realm->cached_context;
 		vino = ceph_vino(inode);
 		req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-					    vino, pos, &len, 1,
+					    vino, pos, &len, 0, 1,
 					    CEPH_OSD_OP_WRITE, flags, snapc,
 					    ci->i_truncate_seq,
 					    ci->i_truncate_size,
@@ -803,9 +810,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	size_t len = iocb->ki_nbytes;
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct page *pinned_page = NULL;
 	ssize_t ret;
 	int want, got = 0;
-	int checkeof = 0, read = 0;
+	int retry_op = 0, read = 0;
 
 again:
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -815,7 +823,7 @@ again:
 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 	else
 		want = CEPH_CAP_FILE_CACHE;
-	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
 	if (ret < 0)
 		return ret;
 
@@ -827,8 +835,12 @@ again:
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
 		     ceph_cap_string(got));
 
-		/* hmm, this isn't really async... */
-		ret = ceph_sync_read(iocb, to, &checkeof);
+		if (ci->i_inline_version == CEPH_INLINE_NONE) {
+			/* hmm, this isn't really async... */
+			ret = ceph_sync_read(iocb, to, &retry_op);
+		} else {
+			retry_op = READ_INLINE;
+		}
 	} else {
 		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
@@ -838,13 +850,55 @@ again:
 	}
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
+	if (pinned_page) {
+		page_cache_release(pinned_page);
+		pinned_page = NULL;
+	}
 	ceph_put_cap_refs(ci, got);
+	if (retry_op && ret >= 0) {
+		int statret;
+		struct page *page = NULL;
+		loff_t i_size;
+		if (retry_op == READ_INLINE) {
+			page = __page_cache_alloc(GFP_NOFS);
+			if (!page)
+				return -ENOMEM;
+		}
 
-	if (checkeof && ret >= 0) {
-		int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
+		statret = __ceph_do_getattr(inode, page,
+					    CEPH_STAT_CAP_INLINE_DATA, !!page);
+		if (statret < 0) {
+			 __free_page(page);
+			if (statret == -ENODATA) {
+				BUG_ON(retry_op != READ_INLINE);
+				goto again;
+			}
+			return statret;
+		}
+
+		i_size = i_size_read(inode);
+		if (retry_op == READ_INLINE) {
+			/* does not support inline data > PAGE_SIZE */
+			if (i_size > PAGE_CACHE_SIZE) {
+				ret = -EIO;
+			} else if (iocb->ki_pos < i_size) {
+				loff_t end = min_t(loff_t, i_size,
+						   iocb->ki_pos + len);
+				if (statret < end)
+					zero_user_segment(page, statret, end);
+				ret = copy_page_to_iter(page,
+						iocb->ki_pos & ~PAGE_MASK,
+						end - iocb->ki_pos, to);
+				iocb->ki_pos += ret;
+			} else {
+				ret = 0;
+			}
+			__free_pages(page, 0);
+			return ret;
+		}
 
 		/* hit EOF or hole? */
-		if (statret == 0 && iocb->ki_pos < inode->i_size &&
+		if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
 			ret < len) {
 			dout("sync_read hit hole, ppos %lld < size %lld"
 			     ", reading more\n", iocb->ki_pos,
@@ -852,7 +906,7 @@ again:
 
 			read += ret;
 			len -= ret;
-			checkeof = 0;
+			retry_op = 0;
 			goto again;
 		}
 	}
@@ -909,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (err)
 		goto out;
 
+	if (ci->i_inline_version != CEPH_INLINE_NONE) {
+		err = ceph_uninline_data(file, NULL);
+		if (err < 0)
+			goto out;
+	}
+
 retry_snap:
 	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
 		err = -ENOSPC;
@@ -922,7 +982,8 @@ retry_snap:
 	else
 		want = CEPH_CAP_FILE_BUFFER;
 	got = 0;
-	err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
+	err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
+			    &got, NULL);
 	if (err < 0)
 		goto out;
 
@@ -969,6 +1030,7 @@ retry_snap:
 	if (written >= 0) {
 		int dirty;
 		spin_lock(&ci->i_ceph_lock);
+		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
@@ -1111,7 +1173,7 @@ static int ceph_zero_partial_object(struct inode *inode,
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 					ceph_vino(inode),
 					offset, length,
-					1, op,
+					0, 1, op,
 					CEPH_OSD_FLAG_WRITE |
 					CEPH_OSD_FLAG_ONDISK,
 					NULL, 0, 0, false);
@@ -1214,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode,
 		goto unlock;
 	}
 
+	if (ci->i_inline_version != CEPH_INLINE_NONE) {
+		ret = ceph_uninline_data(file, NULL);
+		if (ret < 0)
+			goto unlock;
+	}
+
 	size = i_size_read(inode);
 	if (!(mode & FALLOC_FL_KEEP_SIZE))
 		endoff = offset + length;
@@ -1223,7 +1291,7 @@ static long ceph_fallocate(struct file *file, int mode,
 	else
 		want = CEPH_CAP_FILE_BUFFER;
 
-	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
+	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
 	if (ret < 0)
 		goto unlock;
 
@@ -1240,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode,
 
 	if (!ret) {
 		spin_lock(&ci->i_ceph_lock);
+		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a5593d5..f61a741 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -387,8 +387,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	spin_lock_init(&ci->i_ceph_lock);
 
 	ci->i_version = 0;
+	ci->i_inline_version = 0;
 	ci->i_time_warp_seq = 0;
 	ci->i_ceph_flags = 0;
+	ci->i_ordered_count = 0;
 	atomic_set(&ci->i_release_count, 1);
 	atomic_set(&ci->i_complete_count, 0);
 	ci->i_symlink = NULL;
@@ -657,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
  * Populate an inode based on info from mds.  May be called on new or
  * existing inodes.
  */
-static int fill_inode(struct inode *inode,
+static int fill_inode(struct inode *inode, struct page *locked_page,
 		      struct ceph_mds_reply_info_in *iinfo,
 		      struct ceph_mds_reply_dirfrag *dirinfo,
 		      struct ceph_mds_session *session,
@@ -675,6 +677,7 @@ static int fill_inode(struct inode *inode,
 	bool wake = false;
 	bool queue_trunc = false;
 	bool new_version = false;
+	bool fill_inline = false;
 
 	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -845,7 +848,8 @@ static int fill_inode(struct inode *inode,
 	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
 	    !__ceph_dir_is_complete(ci)) {
 		dout(" marking %p complete (empty)\n", inode);
-		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
+		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count),
+					ci->i_ordered_count);
 	}
 
 	/* were we issued a capability? */
@@ -873,8 +877,23 @@ static int fill_inode(struct inode *inode,
 			   ceph_vinop(inode));
 		__ceph_get_fmode(ci, cap_fmode);
 	}
+
+	if (iinfo->inline_version > 0 &&
+	    iinfo->inline_version >= ci->i_inline_version) {
+		int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+		ci->i_inline_version = iinfo->inline_version;
+		if (ci->i_inline_version != CEPH_INLINE_NONE &&
+		    (locked_page ||
+		     (le32_to_cpu(info->cap.caps) & cache_caps)))
+			fill_inline = true;
+	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
+	if (fill_inline)
+		ceph_fill_inline_data(inode, locked_page,
+				      iinfo->inline_data, iinfo->inline_len);
+
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
@@ -1062,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 		struct inode *dir = req->r_locked_dir;
 
 		if (dir) {
-			err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+			err = fill_inode(dir, NULL,
+					 &rinfo->diri, rinfo->dirfrag,
 					 session, req->r_request_started, -1,
 					 &req->r_caps_reservation);
 			if (err < 0)
@@ -1132,7 +1152,7 @@ retry_lookup:
 		}
 		req->r_target_inode = in;
 
-		err = fill_inode(in, &rinfo->targeti, NULL,
+		err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
 				session, req->r_request_started,
 				(!req->r_aborted && rinfo->head->result == 0) ?
 				req->r_fmode : -1,
@@ -1204,8 +1224,8 @@ retry_lookup:
 			ceph_invalidate_dentry_lease(dn);
 
 			/* d_move screws up sibling dentries' offsets */
-			ceph_dir_clear_complete(dir);
-			ceph_dir_clear_complete(olddir);
+			ceph_dir_clear_ordered(dir);
+			ceph_dir_clear_ordered(olddir);
 
 			dout("dn %p gets new offset %lld\n", req->r_old_dentry,
 			     ceph_dentry(req->r_old_dentry)->offset);
@@ -1217,6 +1237,7 @@ retry_lookup:
 		if (!rinfo->head->is_target) {
 			dout("fill_trace null dentry\n");
 			if (dn->d_inode) {
+				ceph_dir_clear_ordered(dir);
 				dout("d_delete %p\n", dn);
 				d_delete(dn);
 			} else {
@@ -1233,7 +1254,7 @@ retry_lookup:
 
 		/* attach proper inode */
 		if (!dn->d_inode) {
-			ceph_dir_clear_complete(dir);
+			ceph_dir_clear_ordered(dir);
 			ihold(in);
 			dn = splice_dentry(dn, in, &have_lease);
 			if (IS_ERR(dn)) {
@@ -1263,7 +1284,7 @@ retry_lookup:
 		BUG_ON(!dir);
 		BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
 		dout(" linking snapped dir %p to dn %p\n", in, dn);
-		ceph_dir_clear_complete(dir);
+		ceph_dir_clear_ordered(dir);
 		ihold(in);
 		dn = splice_dentry(dn, in, NULL);
 		if (IS_ERR(dn)) {
@@ -1300,7 +1321,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
 			dout("new_inode badness got %d\n", err);
 			continue;
 		}
-		rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+		rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
 				req->r_request_started, -1,
 				&req->r_caps_reservation);
 		if (rc < 0) {
@@ -1416,7 +1437,7 @@ retry_lookup:
 			}
 		}
 
-		if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
+		if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
 			       req->r_request_started, -1,
 			       &req->r_caps_reservation) < 0) {
 			pr_err("fill_inode badness on %p\n", in);
@@ -1899,7 +1920,8 @@ out_put:
  * Verify that we have a lease on the given mask.  If not,
  * do a getattr against an mds.
  */
-int ceph_do_getattr(struct inode *inode, int mask, bool force)
+int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+		      int mask, bool force)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1911,7 +1933,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
 		return 0;
 	}
 
-	dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
+	dout("do_getattr inode %p mask %s mode 0%o\n",
+	     inode, ceph_cap_string(mask), inode->i_mode);
 	if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
 		return 0;
 
@@ -1922,7 +1945,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
 	ihold(inode);
 	req->r_num_caps = 1;
 	req->r_args.getattr.mask = cpu_to_le32(mask);
+	req->r_locked_page = locked_page;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	if (locked_page && err == 0) {
+		u64 inline_version = req->r_reply_info.targeti.inline_version;
+		if (inline_version == 0) {
+			/* the reply is supposed to contain inline data */
+			err = -EINVAL;
+		} else if (inline_version == CEPH_INLINE_NONE) {
+			err = -ENODATA;
+		} else {
+			err = req->r_reply_info.targeti.inline_len;
+		}
+	}
 	ceph_mdsc_put_request(req);
 	dout("do_getattr result=%d\n", err);
 	return err;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fbc39c4..c35c5c6 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -9,6 +9,8 @@
 #include <linux/ceph/pagelist.h>
 
 static u64 lock_secret;
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+                                         struct ceph_mds_request *req);
 
 static inline u64 secure_addr(void *addr)
 {
@@ -40,6 +42,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	u64 length = 0;
 	u64 owner;
 
+	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
+		wait = 0;
+
 	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -68,6 +73,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	req->r_args.filelock_change.length = cpu_to_le64(length);
 	req->r_args.filelock_change.wait = wait;
 
+	if (wait)
+		req->r_wait_for_completion = ceph_lock_wait_for_completion;
+
 	err = ceph_mdsc_do_request(mdsc, inode, req);
 
 	if (operation == CEPH_MDS_OP_GETFILELOCK) {
@@ -96,6 +104,52 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	return err;
 }
 
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+                                         struct ceph_mds_request *req)
+{
+	struct ceph_mds_request *intr_req;
+	struct inode *inode = req->r_inode;
+	int err, lock_type;
+
+	BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
+	if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
+		lock_type = CEPH_LOCK_FCNTL_INTR;
+	else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
+		lock_type = CEPH_LOCK_FLOCK_INTR;
+	else
+		BUG_ON(1);
+	BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
+
+	err = wait_for_completion_interruptible(&req->r_completion);
+	if (!err)
+		return 0;
+
+	dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
+	     req->r_tid);
+
+	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
+					    USE_AUTH_MDS);
+	if (IS_ERR(intr_req))
+		return PTR_ERR(intr_req);
+
+	intr_req->r_inode = inode;
+	ihold(inode);
+	intr_req->r_num_caps = 1;
+
+	intr_req->r_args.filelock_change = req->r_args.filelock_change;
+	intr_req->r_args.filelock_change.rule = lock_type;
+	intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
+
+	err = ceph_mdsc_do_request(mdsc, inode, intr_req);
+	ceph_mdsc_put_request(intr_req);
+
+	if (err && err != -ERESTARTSYS)
+		return err;
+
+	wait_for_completion(&req->r_completion);
+	return 0;
+}
+
 /**
  * Attempt to set an fcntl lock.
  * For now, this just goes away to the server. Later it may be more awesome.
@@ -143,11 +197,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 				     err);
 			}
 		}
-
-	} else if (err == -ERESTARTSYS) {
-		dout("undoing lock\n");
-		ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-				  CEPH_LOCK_UNLOCK, 0, fl);
 	}
 	return err;
 }
@@ -186,11 +235,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 					  file, CEPH_LOCK_UNLOCK, 0, fl);
 			dout("got %d on flock_lock_file_wait, undid lock", err);
 		}
-	} else if (err == -ERESTARTSYS) {
-		dout("undoing lock\n");
-		ceph_lock_message(CEPH_LOCK_FLOCK,
-				  CEPH_MDS_OP_SETFILELOCK,
-				  file, CEPH_LOCK_UNLOCK, 0, fl);
 	}
 	return err;
 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a92d3f5..d2171f4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -89,6 +89,16 @@ static int parse_reply_info_in(void **p, void *end,
 	ceph_decode_need(p, end, info->xattr_len, bad);
 	info->xattr_data = *p;
 	*p += info->xattr_len;
+
+	if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
+		ceph_decode_64_safe(p, end, info->inline_version, bad);
+		ceph_decode_32_safe(p, end, info->inline_len, bad);
+		ceph_decode_need(p, end, info->inline_len, bad);
+		info->inline_data = *p;
+		*p += info->inline_len;
+	} else
+		info->inline_version = CEPH_INLINE_NONE;
+
 	return 0;
 bad:
 	return err;
@@ -524,8 +534,7 @@ void ceph_mdsc_release_request(struct kref *kref)
 	}
 	if (req->r_locked_dir)
 		ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
-	if (req->r_target_inode)
-		iput(req->r_target_inode);
+	iput(req->r_target_inode);
 	if (req->r_dentry)
 		dput(req->r_dentry);
 	if (req->r_old_dentry)
@@ -861,8 +870,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	/*
 	 * Serialize client metadata into waiting buffer space, using
 	 * the format that userspace expects for map<string, string>
+	 *
+	 * ClientSession messages with metadata are v2
 	 */
-	msg->hdr.version = 2;  /* ClientSession messages with metadata are v2 */
+	msg->hdr.version = cpu_to_le16(2);
+	msg->hdr.compat_version = cpu_to_le16(1);
 
 	/* The write pointer, following the session_head structure */
 	p = msg->front.iov_base + sizeof(*h);
@@ -1066,8 +1078,7 @@ out:
 	session->s_cap_iterator = NULL;
 	spin_unlock(&session->s_cap_lock);
 
-	if (last_inode)
-		iput(last_inode);
+	iput(last_inode);
 	if (old_cap)
 		ceph_put_cap(session->s_mdsc, old_cap);
 
@@ -1874,7 +1885,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 		goto out_free2;
 	}
 
-	msg->hdr.version = 2;
+	msg->hdr.version = cpu_to_le16(2);
 	msg->hdr.tid = cpu_to_le64(req->r_tid);
 
 	head = msg->front.iov_base;
@@ -2208,6 +2219,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 			&req->r_completion, req->r_timeout);
 		if (err == 0)
 			err = -EIO;
+	} else if (req->r_wait_for_completion) {
+		err = req->r_wait_for_completion(mdsc, req);
 	} else {
 		err = wait_for_completion_killable(&req->r_completion);
 	}
@@ -3744,6 +3757,20 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
 	return msg;
 }
 
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+       struct ceph_mds_session *s = con->private;
+       struct ceph_auth_handshake *auth = &s->s_auth;
+       return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+       struct ceph_mds_session *s = con->private;
+       struct ceph_auth_handshake *auth = &s->s_auth;
+       return ceph_auth_check_message_signature(auth, msg);
+}
+
 static const struct ceph_connection_operations mds_con_ops = {
 	.get = con_get,
 	.put = con_put,
@@ -3753,6 +3780,8 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.invalidate_authorizer = invalidate_authorizer,
 	.peer_reset = peer_reset,
 	.alloc_msg = mds_alloc_msg,
+	.sign_message = sign_message,
+	.check_message_signature = check_message_signature,
 };
 
 /* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3288359..e2817d0 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -41,6 +41,9 @@ struct ceph_mds_reply_info_in {
 	char *symlink;
 	u32 xattr_len;
 	char *xattr_data;
+	u64 inline_version;
+	u32 inline_len;
+	char *inline_data;
 };
 
 /*
@@ -166,6 +169,11 @@ struct ceph_mds_client;
  */
 typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
 					     struct ceph_mds_request *req);
+/*
+ * wait for request completion callback
+ */
+typedef int (*ceph_mds_request_wait_callback_t) (struct ceph_mds_client *mdsc,
+						 struct ceph_mds_request *req);
 
 /*
  * an in-flight mds request
@@ -215,6 +223,7 @@ struct ceph_mds_request {
 	int r_request_release_offset;
 	struct ceph_msg  *r_reply;
 	struct ceph_mds_reply_info_parsed r_reply_info;
+	struct page *r_locked_page;
 	int r_err;
 	bool r_aborted;
 
@@ -239,6 +248,7 @@ struct ceph_mds_request {
 	struct completion r_completion;
 	struct completion r_safe_completion;
 	ceph_mds_request_callback_t r_callback;
+	ceph_mds_request_wait_callback_t r_wait_for_completion;
 	struct list_head  r_unsafe_item;  /* per-session unsafe list item */
 	bool		  r_got_unsafe, r_got_safe, r_got_result;
 
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f01645a..ce35fbd 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
 	return 0;
 }
 
+
+static struct ceph_snap_context *empty_snapc;
+
 /*
  * build the snap context for a given realm.
  */
@@ -328,6 +331,12 @@ static int build_snap_context(struct ceph_snap_realm *realm)
 		return 0;
 	}
 
+	if (num == 0 && realm->seq == empty_snapc->seq) {
+		ceph_get_snap_context(empty_snapc);
+		snapc = empty_snapc;
+		goto done;
+	}
+
 	/* alloc new snap context */
 	err = -ENOMEM;
 	if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
@@ -365,8 +374,8 @@ static int build_snap_context(struct ceph_snap_realm *realm)
 	     realm->ino, realm, snapc, snapc->seq,
 	     (unsigned int) snapc->num_snaps);
 
-	if (realm->cached_context)
-		ceph_put_snap_context(realm->cached_context);
+done:
+	ceph_put_snap_context(realm->cached_context);
 	realm->cached_context = snapc;
 	return 0;
 
@@ -466,6 +475,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 		   cap_snap.  lucky us. */
 		dout("queue_cap_snap %p already pending\n", inode);
 		kfree(capsnap);
+	} else if (ci->i_snap_realm->cached_context == empty_snapc) {
+		dout("queue_cap_snap %p empty snapc\n", inode);
+		kfree(capsnap);
 	} else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
 			    CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
 		struct ceph_snap_context *snapc = ci->i_head_snapc;
@@ -504,6 +516,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 			capsnap->xattr_version = 0;
 		}
 
+		capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
 		/* dirty page count moved from _head to this cap_snap;
 		   all subsequent writes page dirties occur _after_ this
 		   snapshot. */
@@ -590,15 +604,13 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
 		if (!inode)
 			continue;
 		spin_unlock(&realm->inodes_with_caps_lock);
-		if (lastinode)
-			iput(lastinode);
+		iput(lastinode);
 		lastinode = inode;
 		ceph_queue_cap_snap(ci);
 		spin_lock(&realm->inodes_with_caps_lock);
 	}
 	spin_unlock(&realm->inodes_with_caps_lock);
-	if (lastinode)
-		iput(lastinode);
+	iput(lastinode);
 
 	list_for_each_entry(child, &realm->children, child_item) {
 		dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
@@ -928,5 +940,16 @@ out:
 	return;
 }
 
+int __init ceph_snap_init(void)
+{
+	empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
+	if (!empty_snapc)
+		return -ENOMEM;
+	empty_snapc->seq = 1;
+	return 0;
+}
 
-
+void ceph_snap_exit(void)
+{
+	ceph_put_snap_context(empty_snapc);
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f6e1237..50f06cd 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -515,7 +515,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	struct ceph_fs_client *fsc;
 	const u64 supported_features =
 		CEPH_FEATURE_FLOCK |
-		CEPH_FEATURE_DIRLAYOUTHASH;
+		CEPH_FEATURE_DIRLAYOUTHASH |
+		CEPH_FEATURE_MDS_INLINE_DATA;
 	const u64 required_features = 0;
 	int page_count;
 	size_t size;
@@ -1017,9 +1018,6 @@ static struct file_system_type ceph_fs_type = {
 };
 MODULE_ALIAS_FS("ceph");
 
-#define _STRINGIFY(x) #x
-#define STRINGIFY(x) _STRINGIFY(x)
-
 static int __init init_ceph(void)
 {
 	int ret = init_caches();
@@ -1028,15 +1026,20 @@ static int __init init_ceph(void)
 
 	ceph_flock_init();
 	ceph_xattr_init();
+	ret = ceph_snap_init();
+	if (ret)
+		goto out_xattr;
 	ret = register_filesystem(&ceph_fs_type);
 	if (ret)
-		goto out_icache;
+		goto out_snap;
 
 	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
 
 	return 0;
 
-out_icache:
+out_snap:
+	ceph_snap_exit();
+out_xattr:
 	ceph_xattr_exit();
 	destroy_caches();
 out:
@@ -1047,6 +1050,7 @@ static void __exit exit_ceph(void)
 {
 	dout("exit_ceph\n");
 	unregister_filesystem(&ceph_fs_type);
+	ceph_snap_exit();
 	ceph_xattr_exit();
 	destroy_caches();
 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b82f507..e1aa32d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -161,6 +161,7 @@ struct ceph_cap_snap {
 	u64 time_warp_seq;
 	int writing;   /* a sync write is still in progress */
 	int dirty_pages;     /* dirty pages awaiting writeback */
+	bool inline_data;
 };
 
 static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
@@ -253,9 +254,11 @@ struct ceph_inode_info {
 	spinlock_t i_ceph_lock;
 
 	u64 i_version;
+	u64 i_inline_version;
 	u32 i_time_warp_seq;
 
 	unsigned i_ceph_flags;
+	int i_ordered_count;
 	atomic_t i_release_count;
 	atomic_t i_complete_count;
 
@@ -434,14 +437,19 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 /*
  * Ceph inode.
  */
-#define CEPH_I_NODELAY   4  /* do not delay cap release */
-#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
+#define CEPH_I_DIR_ORDERED	1  /* dentries in dir are ordered */
+#define CEPH_I_NODELAY		4  /* do not delay cap release */
+#define CEPH_I_FLUSH		8  /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH		16 /* do not flush dirty caps */
 
 static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
-					   int release_count)
+					   int release_count, int ordered_count)
 {
 	atomic_set(&ci->i_complete_count, release_count);
+	if (ci->i_ordered_count == ordered_count)
+		ci->i_ceph_flags |= CEPH_I_DIR_ORDERED;
+	else
+		ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
 }
 
 static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
@@ -455,16 +463,35 @@ static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
 		atomic_read(&ci->i_release_count);
 }
 
+static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
+{
+	return __ceph_dir_is_complete(ci) &&
+		(ci->i_ceph_flags & CEPH_I_DIR_ORDERED);
+}
+
 static inline void ceph_dir_clear_complete(struct inode *inode)
 {
 	__ceph_dir_clear_complete(ceph_inode(inode));
 }
 
-static inline bool ceph_dir_is_complete(struct inode *inode)
+static inline void ceph_dir_clear_ordered(struct inode *inode)
 {
-	return __ceph_dir_is_complete(ceph_inode(inode));
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	spin_lock(&ci->i_ceph_lock);
+	ci->i_ordered_count++;
+	ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
+	spin_unlock(&ci->i_ceph_lock);
 }
 
+static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	bool ret;
+	spin_lock(&ci->i_ceph_lock);
+	ret = __ceph_dir_is_complete_ordered(ci);
+	spin_unlock(&ci->i_ceph_lock);
+	return ret;
+}
 
 /* find a specific frag @f */
 extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
@@ -580,6 +607,7 @@ struct ceph_file_info {
 	char *last_name;       /* last entry in previous chunk */
 	struct dentry *dentry; /* next dentry (for dcache readdir) */
 	int dir_release_count;
+	int dir_ordered_count;
 
 	/* used for -o dirstat read() on directory thing */
 	char *dir_info;
@@ -673,6 +701,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
 extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 				  struct ceph_cap_snap *capsnap);
 extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
+extern int ceph_snap_init(void);
+extern void ceph_snap_exit(void);
 
 /*
  * a cap_snap is "pending" if it is still awaiting an in-progress
@@ -715,7 +745,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
 extern void ceph_queue_invalidate(struct inode *inode);
 extern void ceph_queue_writeback(struct inode *inode);
 
-extern int ceph_do_getattr(struct inode *inode, int mask, bool force);
+extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+			     int mask, bool force);
+static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
+{
+	return __ceph_do_getattr(inode, NULL, mask, force);
+}
 extern int ceph_permission(struct inode *inode, int mask);
 extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
 extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -830,7 +865,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
 				      int mds, int drop, int unless);
 
 extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
-			 int *got, loff_t endoff);
+			 loff_t endoff, int *got, struct page **pinned_page);
 
 /* for counting open files by mode */
 static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
@@ -852,7 +887,9 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags, umode_t mode,
 			    int *opened);
 extern int ceph_release(struct inode *inode, struct file *filp);
-
+extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+				  char *data, size_t len);
+int ceph_uninline_data(struct file *filp, struct page *locked_page);
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
 extern const struct inode_operations ceph_dir_iops;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 678b0d2..5a492ca 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -854,7 +854,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 	struct ceph_pagelist *pagelist = NULL;
 	int err;
 
-	if (value) {
+	if (size > 0) {
 		/* copy value into pagelist */
 		pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
 		if (!pagelist)
@@ -864,7 +864,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 		err = ceph_pagelist_append(pagelist, value, size);
 		if (err)
 			goto out;
-	} else {
+	} else if (!value) {
 		flags |= CEPH_XATTR_REMOVE;
 	}
 
@@ -1001,6 +1001,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
 	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return generic_setxattr(dentry, name, value, size, flags);
 
+	if (size == 0)
+		value = "";  /* empty EA, do not remove */
+
 	return __ceph_setxattr(dentry, name, value, size, flags);
 }
 
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c2d6604..719e1ce 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1917,7 +1917,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
 			break;
 		case 2:
 			dst[dst_byte_offset++] |= (src_byte);
-			dst[dst_byte_offset] = 0;
 			current_bit_offset = 0;
 			break;
 		}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 80154ec..6f4e659 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -190,23 +190,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 {
 	int rc = 0;
 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct dentry *ecryptfs_dentry = file->f_path.dentry;
 	/* Private value of ecryptfs_dentry allocated in
 	 * ecryptfs_lookup() */
 	struct ecryptfs_file_info *file_info;
 
-	mount_crypt_stat = &ecryptfs_superblock_to_private(
-		ecryptfs_dentry->d_sb)->mount_crypt_stat;
-	if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
-	    && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
-		|| (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
-		|| (file->f_flags & O_APPEND))) {
-		printk(KERN_WARNING "Mount has encrypted view enabled; "
-		       "files may only be read\n");
-		rc = -EPERM;
-		goto out;
-	}
 	/* Released in ecryptfs_release or end of function if failure */
 	file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
 	ecryptfs_set_file_private(file, file_info);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 635e8e1..917bd5c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -100,12 +100,12 @@ int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
 	(*size) = 0;
 	if (data[0] < 192) {
 		/* One-byte length */
-		(*size) = (unsigned char)data[0];
+		(*size) = data[0];
 		(*length_size) = 1;
 	} else if (data[0] < 224) {
 		/* Two-byte length */
-		(*size) = (((unsigned char)(data[0]) - 192) * 256);
-		(*size) += ((unsigned char)(data[1]) + 192);
+		(*size) = (data[0] - 192) * 256;
+		(*size) += data[1] + 192;
 		(*length_size) = 2;
 	} else if (data[0] == 255) {
 		/* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c4cd1fd..d9eb84b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -493,6 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 {
 	struct super_block *s;
 	struct ecryptfs_sb_info *sbi;
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct ecryptfs_dentry_info *root_info;
 	const char *err = "Getting sb failed";
 	struct inode *inode;
@@ -511,6 +512,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 		err = "Error parsing options";
 		goto out;
 	}
+	mount_crypt_stat = &sbi->mount_crypt_stat;
 
 	s = sget(fs_type, NULL, set_anon_super, flags, NULL);
 	if (IS_ERR(s)) {
@@ -557,11 +559,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 
 	/**
 	 * Set the POSIX ACL flag based on whether they're enabled in the lower
-	 * mount. Force a read-only eCryptfs mount if the lower mount is ro.
-	 * Allow a ro eCryptfs mount even when the lower mount is rw.
+	 * mount.
 	 */
 	s->s_flags = flags & ~MS_POSIXACL;
-	s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+	s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+	/**
+	 * Force a read-only eCryptfs mount when:
+	 *   1) The lower mount is ro
+	 *   2) The ecryptfs_encrypted_view mount option is specified
+	 */
+	if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+	    mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+		s->s_flags |= MS_RDONLY;
 
 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
 	s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 503ea15..370420b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -267,7 +267,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 	handle_t *handle;
 	ext4_lblk_t orig_blk_offset, donor_blk_offset;
 	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
-	unsigned int w_flags = 0;
 	unsigned int tmp_data_size, data_size, replaced_size;
 	int err2, jblocks, retries = 0;
 	int replaced_count = 0;
@@ -288,9 +287,6 @@ again:
 		return 0;
 	}
 
-	if (segment_eq(get_fs(), KERNEL_DS))
-		w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
 	orig_blk_offset = orig_page_offset * blocks_per_page +
 		data_offset_in_page;
 
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 966ace8..28d0c7ab 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -415,7 +415,7 @@ err_unlock:
 err_region:
 	unregister_chrdev_region(devt, 1);
 err:
-	fuse_conn_kill(fc);
+	fuse_abort_conn(fc);
 	goto out;
 }
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ca88731..ba11079 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -511,6 +511,35 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 }
 EXPORT_SYMBOL_GPL(fuse_request_send);
 
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
+{
+	struct fuse_req *req;
+	ssize_t ret;
+
+	req = fuse_get_req(fc, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->in.h.opcode = args->in.h.opcode;
+	req->in.h.nodeid = args->in.h.nodeid;
+	req->in.numargs = args->in.numargs;
+	memcpy(req->in.args, args->in.args,
+	       args->in.numargs * sizeof(struct fuse_in_arg));
+	req->out.argvar = args->out.argvar;
+	req->out.numargs = args->out.numargs;
+	memcpy(req->out.args, args->out.args,
+	       args->out.numargs * sizeof(struct fuse_arg));
+	fuse_request_send(fc, req);
+	ret = req->out.h.error;
+	if (!ret && args->out.argvar) {
+		BUG_ON(args->out.numargs != 1);
+		ret = req->out.args[0].size;
+	}
+	fuse_put_request(fc, req);
+
+	return ret;
+}
+
 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 					    struct fuse_req *req)
 {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index df562cc..252b8a5 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -145,22 +145,22 @@ static void fuse_invalidate_entry(struct dentry *entry)
 	fuse_invalidate_entry_cache(entry);
 }
 
-static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
 			     u64 nodeid, struct qstr *name,
 			     struct fuse_entry_out *outarg)
 {
 	memset(outarg, 0, sizeof(struct fuse_entry_out));
-	req->in.h.opcode = FUSE_LOOKUP;
-	req->in.h.nodeid = nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = name->len + 1;
-	req->in.args[0].value = name->name;
-	req->out.numargs = 1;
+	args->in.h.opcode = FUSE_LOOKUP;
+	args->in.h.nodeid = nodeid;
+	args->in.numargs = 1;
+	args->in.args[0].size = name->len + 1;
+	args->in.args[0].value = name->name;
+	args->out.numargs = 1;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+		args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(struct fuse_entry_out);
-	req->out.args[0].value = outarg;
+		args->out.args[0].size = sizeof(struct fuse_entry_out);
+	args->out.args[0].value = outarg;
 }
 
 u64 fuse_get_attr_version(struct fuse_conn *fc)
@@ -200,9 +200,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 		goto invalid;
 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
 		 (flags & LOOKUP_REVAL)) {
-		int err;
 		struct fuse_entry_out outarg;
-		struct fuse_req *req;
+		FUSE_ARGS(args);
 		struct fuse_forget_link *forget;
 		u64 attr_version;
 
@@ -215,31 +214,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 			goto out;
 
 		fc = get_fuse_conn(inode);
-		req = fuse_get_req_nopages(fc);
-		ret = PTR_ERR(req);
-		if (IS_ERR(req))
-			goto out;
 
 		forget = fuse_alloc_forget();
-		if (!forget) {
-			fuse_put_request(fc, req);
-			ret = -ENOMEM;
+		ret = -ENOMEM;
+		if (!forget)
 			goto out;
-		}
 
 		attr_version = fuse_get_attr_version(fc);
 
 		parent = dget_parent(entry);
-		fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+		fuse_lookup_init(fc, &args, get_node_id(parent->d_inode),
 				 &entry->d_name, &outarg);
-		fuse_request_send(fc, req);
+		ret = fuse_simple_request(fc, &args);
 		dput(parent);
-		err = req->out.h.error;
-		fuse_put_request(fc, req);
 		/* Zero nodeid is same as -ENOENT */
-		if (!err && !outarg.nodeid)
-			err = -ENOENT;
-		if (!err) {
+		if (!ret && !outarg.nodeid)
+			ret = -ENOENT;
+		if (!ret) {
 			fi = get_fuse_inode(inode);
 			if (outarg.nodeid != get_node_id(inode)) {
 				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
@@ -250,7 +241,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 			spin_unlock(&fc->lock);
 		}
 		kfree(forget);
-		if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+		if (ret == -ENOMEM)
+			goto out;
+		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
 			goto invalid;
 
 		fuse_change_attributes(inode, &outarg.attr,
@@ -296,7 +289,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 		     struct fuse_entry_out *outarg, struct inode **inode)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_forget_link *forget;
 	u64 attr_version;
 	int err;
@@ -306,24 +299,16 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 	if (name->len > FUSE_NAME_MAX)
 		goto out;
 
-	req = fuse_get_req_nopages(fc);
-	err = PTR_ERR(req);
-	if (IS_ERR(req))
-		goto out;
 
 	forget = fuse_alloc_forget();
 	err = -ENOMEM;
-	if (!forget) {
-		fuse_put_request(fc, req);
+	if (!forget)
 		goto out;
-	}
 
 	attr_version = fuse_get_attr_version(fc);
 
-	fuse_lookup_init(fc, req, nodeid, name, outarg);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	fuse_lookup_init(fc, &args, nodeid, name, outarg);
+	err = fuse_simple_request(fc, &args);
 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
 	if (err || !outarg->nodeid)
 		goto out_put_forget;
@@ -405,7 +390,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	int err;
 	struct inode *inode;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_forget_link *forget;
 	struct fuse_create_in inarg;
 	struct fuse_open_out outopen;
@@ -420,15 +405,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	if (!forget)
 		goto out_err;
 
-	req = fuse_get_req_nopages(fc);
-	err = PTR_ERR(req);
-	if (IS_ERR(req))
-		goto out_put_forget_req;
-
 	err = -ENOMEM;
 	ff = fuse_file_alloc(fc);
 	if (!ff)
-		goto out_put_request;
+		goto out_put_forget_req;
 
 	if (!fc->dont_mask)
 		mode &= ~current_umask();
@@ -439,24 +419,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	inarg.flags = flags;
 	inarg.mode = mode;
 	inarg.umask = current_umask();
-	req->in.h.opcode = FUSE_CREATE;
-	req->in.h.nodeid = get_node_id(dir);
-	req->in.numargs = 2;
-	req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+	args.in.h.opcode = FUSE_CREATE;
+	args.in.h.nodeid = get_node_id(dir);
+	args.in.numargs = 2;
+	args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
 						sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = entry->d_name.len + 1;
-	req->in.args[1].value = entry->d_name.name;
-	req->out.numargs = 2;
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = entry->d_name.len + 1;
+	args.in.args[1].value = entry->d_name.name;
+	args.out.numargs = 2;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+		args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(outentry);
-	req->out.args[0].value = &outentry;
-	req->out.args[1].size = sizeof(outopen);
-	req->out.args[1].value = &outopen;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
+		args.out.args[0].size = sizeof(outentry);
+	args.out.args[0].value = &outentry;
+	args.out.args[1].size = sizeof(outopen);
+	args.out.args[1].value = &outopen;
+	err = fuse_simple_request(fc, &args);
 	if (err)
 		goto out_free_ff;
 
@@ -464,7 +443,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
 		goto out_free_ff;
 
-	fuse_put_request(fc, req);
 	ff->fh = outopen.fh;
 	ff->nodeid = outentry.nodeid;
 	ff->open_flags = outopen.open_flags;
@@ -492,8 +470,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 
 out_free_ff:
 	fuse_file_free(ff);
-out_put_request:
-	fuse_put_request(fc, req);
 out_put_forget_req:
 	kfree(forget);
 out_err:
@@ -547,7 +523,7 @@ no_open:
 /*
  * Code shared between mknod, mkdir, symlink and link
  */
-static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 			    struct inode *dir, struct dentry *entry,
 			    umode_t mode)
 {
@@ -557,22 +533,18 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
 	struct fuse_forget_link *forget;
 
 	forget = fuse_alloc_forget();
-	if (!forget) {
-		fuse_put_request(fc, req);
+	if (!forget)
 		return -ENOMEM;
-	}
 
 	memset(&outarg, 0, sizeof(outarg));
-	req->in.h.nodeid = get_node_id(dir);
-	req->out.numargs = 1;
+	args->in.h.nodeid = get_node_id(dir);
+	args->out.numargs = 1;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+		args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+		args->out.args[0].size = sizeof(outarg);
+	args->out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, args);
 	if (err)
 		goto out_put_forget_req;
 
@@ -609,9 +581,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 {
 	struct fuse_mknod_in inarg;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	if (!fc->dont_mask)
 		mode &= ~current_umask();
@@ -620,14 +590,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 	inarg.mode = mode;
 	inarg.rdev = new_encode_dev(rdev);
 	inarg.umask = current_umask();
-	req->in.h.opcode = FUSE_MKNOD;
-	req->in.numargs = 2;
-	req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+	args.in.h.opcode = FUSE_MKNOD;
+	args.in.numargs = 2;
+	args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
 						sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = entry->d_name.len + 1;
-	req->in.args[1].value = entry->d_name.name;
-	return create_new_entry(fc, req, dir, entry, mode);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = entry->d_name.len + 1;
+	args.in.args[1].value = entry->d_name.name;
+	return create_new_entry(fc, &args, dir, entry, mode);
 }
 
 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
@@ -640,9 +610,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
 {
 	struct fuse_mkdir_in inarg;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	if (!fc->dont_mask)
 		mode &= ~current_umask();
@@ -650,13 +618,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
 	inarg.umask = current_umask();
-	req->in.h.opcode = FUSE_MKDIR;
-	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = entry->d_name.len + 1;
-	req->in.args[1].value = entry->d_name.name;
-	return create_new_entry(fc, req, dir, entry, S_IFDIR);
+	args.in.h.opcode = FUSE_MKDIR;
+	args.in.numargs = 2;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = entry->d_name.len + 1;
+	args.in.args[1].value = entry->d_name.name;
+	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
 }
 
 static int fuse_symlink(struct inode *dir, struct dentry *entry,
@@ -664,17 +632,15 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
 {
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	unsigned len = strlen(link) + 1;
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
-	req->in.h.opcode = FUSE_SYMLINK;
-	req->in.numargs = 2;
-	req->in.args[0].size = entry->d_name.len + 1;
-	req->in.args[0].value = entry->d_name.name;
-	req->in.args[1].size = len;
-	req->in.args[1].value = link;
-	return create_new_entry(fc, req, dir, entry, S_IFLNK);
+	args.in.h.opcode = FUSE_SYMLINK;
+	args.in.numargs = 2;
+	args.in.args[0].size = entry->d_name.len + 1;
+	args.in.args[0].value = entry->d_name.name;
+	args.in.args[1].size = len;
+	args.in.args[1].value = link;
+	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
 }
 
 static inline void fuse_update_ctime(struct inode *inode)
@@ -689,18 +655,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->in.h.opcode = FUSE_UNLINK;
-	req->in.h.nodeid = get_node_id(dir);
-	req->in.numargs = 1;
-	req->in.args[0].size = entry->d_name.len + 1;
-	req->in.args[0].value = entry->d_name.name;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	FUSE_ARGS(args);
+
+	args.in.h.opcode = FUSE_UNLINK;
+	args.in.h.nodeid = get_node_id(dir);
+	args.in.numargs = 1;
+	args.in.args[0].size = entry->d_name.len + 1;
+	args.in.args[0].value = entry->d_name.name;
+	err = fuse_simple_request(fc, &args);
 	if (!err) {
 		struct inode *inode = entry->d_inode;
 		struct fuse_inode *fi = get_fuse_inode(inode);
@@ -729,18 +691,14 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->in.h.opcode = FUSE_RMDIR;
-	req->in.h.nodeid = get_node_id(dir);
-	req->in.numargs = 1;
-	req->in.args[0].size = entry->d_name.len + 1;
-	req->in.args[0].value = entry->d_name.name;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	FUSE_ARGS(args);
+
+	args.in.h.opcode = FUSE_RMDIR;
+	args.in.h.nodeid = get_node_id(dir);
+	args.in.numargs = 1;
+	args.in.args[0].size = entry->d_name.len + 1;
+	args.in.args[0].value = entry->d_name.name;
+	err = fuse_simple_request(fc, &args);
 	if (!err) {
 		clear_nlink(entry->d_inode);
 		fuse_invalidate_attr(dir);
@@ -757,27 +715,21 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
 	int err;
 	struct fuse_rename2_in inarg;
 	struct fuse_conn *fc = get_fuse_conn(olddir);
-	struct fuse_req *req;
-
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	memset(&inarg, 0, argsize);
 	inarg.newdir = get_node_id(newdir);
 	inarg.flags = flags;
-	req->in.h.opcode = opcode;
-	req->in.h.nodeid = get_node_id(olddir);
-	req->in.numargs = 3;
-	req->in.args[0].size = argsize;
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = oldent->d_name.len + 1;
-	req->in.args[1].value = oldent->d_name.name;
-	req->in.args[2].size = newent->d_name.len + 1;
-	req->in.args[2].value = newent->d_name.name;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = opcode;
+	args.in.h.nodeid = get_node_id(olddir);
+	args.in.numargs = 3;
+	args.in.args[0].size = argsize;
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = oldent->d_name.len + 1;
+	args.in.args[1].value = oldent->d_name.name;
+	args.in.args[2].size = newent->d_name.len + 1;
+	args.in.args[2].value = newent->d_name.name;
+	err = fuse_simple_request(fc, &args);
 	if (!err) {
 		/* ctime changes */
 		fuse_invalidate_attr(oldent->d_inode);
@@ -849,19 +801,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
 	struct fuse_link_in inarg;
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.oldnodeid = get_node_id(inode);
-	req->in.h.opcode = FUSE_LINK;
-	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = newent->d_name.len + 1;
-	req->in.args[1].value = newent->d_name.name;
-	err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+	args.in.h.opcode = FUSE_LINK;
+	args.in.numargs = 2;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = newent->d_name.len + 1;
+	args.in.args[1].value = newent->d_name.name;
+	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
 	/* Contrary to "normal" filesystems it can happen that link
 	   makes two "logical" inodes point to the same "physical"
 	   inode.  We invalidate the attributes of the old one, so it
@@ -929,13 +879,9 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
 	struct fuse_getattr_in inarg;
 	struct fuse_attr_out outarg;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	u64 attr_version;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	attr_version = fuse_get_attr_version(fc);
 
 	memset(&inarg, 0, sizeof(inarg));
@@ -947,20 +893,18 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
 		inarg.getattr_flags |= FUSE_GETATTR_FH;
 		inarg.fh = ff->fh;
 	}
-	req->in.h.opcode = FUSE_GETATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->out.numargs = 1;
+	args.in.h.opcode = FUSE_GETATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.out.numargs = 1;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+		args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+		args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 	if (!err) {
 		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
 			make_bad_inode(inode);
@@ -1102,7 +1046,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
 static int fuse_access(struct inode *inode, int mask)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_access_in inarg;
 	int err;
 
@@ -1111,20 +1055,14 @@ static int fuse_access(struct inode *inode, int mask)
 	if (fc->no_access)
 		return 0;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
-	req->in.h.opcode = FUSE_ACCESS;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_ACCESS;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		fc->no_access = 1;
 		err = 0;
@@ -1445,31 +1383,27 @@ static char *read_link(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req = fuse_get_req_nopages(fc);
+	FUSE_ARGS(args);
 	char *link;
-
-	if (IS_ERR(req))
-		return ERR_CAST(req);
+	ssize_t ret;
 
 	link = (char *) __get_free_page(GFP_KERNEL);
-	if (!link) {
-		link = ERR_PTR(-ENOMEM);
-		goto out;
-	}
-	req->in.h.opcode = FUSE_READLINK;
-	req->in.h.nodeid = get_node_id(inode);
-	req->out.argvar = 1;
-	req->out.numargs = 1;
-	req->out.args[0].size = PAGE_SIZE - 1;
-	req->out.args[0].value = link;
-	fuse_request_send(fc, req);
-	if (req->out.h.error) {
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+
+	args.in.h.opcode = FUSE_READLINK;
+	args.in.h.nodeid = get_node_id(inode);
+	args.out.argvar = 1;
+	args.out.numargs = 1;
+	args.out.args[0].size = PAGE_SIZE - 1;
+	args.out.args[0].value = link;
+	ret = fuse_simple_request(fc, &args);
+	if (ret < 0) {
 		free_page((unsigned long) link);
-		link = ERR_PTR(req->out.h.error);
-	} else
-		link[req->out.args[0].size] = '\0';
- out:
-	fuse_put_request(fc, req);
+		link = ERR_PTR(ret);
+	} else {
+		link[ret] = '\0';
+	}
 	fuse_invalidate_atime(inode);
 	return link;
 }
@@ -1629,22 +1563,22 @@ void fuse_release_nowrite(struct inode *inode)
 	spin_unlock(&fc->lock);
 }
 
-static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
 			      struct inode *inode,
 			      struct fuse_setattr_in *inarg_p,
 			      struct fuse_attr_out *outarg_p)
 {
-	req->in.h.opcode = FUSE_SETATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(*inarg_p);
-	req->in.args[0].value = inarg_p;
-	req->out.numargs = 1;
+	args->in.h.opcode = FUSE_SETATTR;
+	args->in.h.nodeid = get_node_id(inode);
+	args->in.numargs = 1;
+	args->in.args[0].size = sizeof(*inarg_p);
+	args->in.args[0].value = inarg_p;
+	args->out.numargs = 1;
 	if (fc->minor < 9)
-		req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+		args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
 	else
-		req->out.args[0].size = sizeof(*outarg_p);
-	req->out.args[0].value = outarg_p;
+		args->out.args[0].size = sizeof(*outarg_p);
+	args->out.args[0].value = outarg_p;
 }
 
 /*
@@ -1653,14 +1587,9 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_setattr_in inarg;
 	struct fuse_attr_out outarg;
-	int err;
-
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
 
 	memset(&inarg, 0, sizeof(inarg));
 	memset(&outarg, 0, sizeof(outarg));
@@ -1677,12 +1606,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
 		inarg.valid |= FATTR_FH;
 		inarg.fh = ff->fh;
 	}
-	fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
 
-	return err;
+	return fuse_simple_request(fc, &args);
 }
 
 /*
@@ -1698,7 +1624,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_setattr_in inarg;
 	struct fuse_attr_out outarg;
 	bool is_truncate = false;
@@ -1723,10 +1649,6 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 	if (attr->ia_valid & ATTR_SIZE)
 		is_truncate = true;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	if (is_truncate) {
 		fuse_set_nowrite(inode);
 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
@@ -1747,10 +1669,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 		inarg.valid |= FATTR_LOCKOWNER;
 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
 	}
-	fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
+	err = fuse_simple_request(fc, &args);
 	if (err) {
 		if (err == -EINTR)
 			fuse_invalidate_attr(inode);
@@ -1837,32 +1757,26 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_setxattr_in inarg;
 	int err;
 
 	if (fc->no_setxattr)
 		return -EOPNOTSUPP;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
 	inarg.flags = flags;
-	req->in.h.opcode = FUSE_SETXATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 3;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = strlen(name) + 1;
-	req->in.args[1].value = name;
-	req->in.args[2].size = size;
-	req->in.args[2].value = value;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_SETXATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 3;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = strlen(name) + 1;
+	args.in.args[1].value = name;
+	args.in.args[2].size = size;
+	args.in.args[2].value = value;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		fc->no_setxattr = 1;
 		err = -EOPNOTSUPP;
@@ -1879,7 +1793,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_getxattr_in inarg;
 	struct fuse_getxattr_out outarg;
 	ssize_t ret;
@@ -1887,40 +1801,32 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
 	if (fc->no_getxattr)
 		return -EOPNOTSUPP;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
-	req->in.h.opcode = FUSE_GETXATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->in.args[1].size = strlen(name) + 1;
-	req->in.args[1].value = name;
+	args.in.h.opcode = FUSE_GETXATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 2;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.in.args[1].size = strlen(name) + 1;
+	args.in.args[1].value = name;
 	/* This is really two different operations rolled into one */
-	req->out.numargs = 1;
+	args.out.numargs = 1;
 	if (size) {
-		req->out.argvar = 1;
-		req->out.args[0].size = size;
-		req->out.args[0].value = value;
+		args.out.argvar = 1;
+		args.out.args[0].size = size;
+		args.out.args[0].value = value;
 	} else {
-		req->out.args[0].size = sizeof(outarg);
-		req->out.args[0].value = &outarg;
+		args.out.args[0].size = sizeof(outarg);
+		args.out.args[0].value = &outarg;
 	}
-	fuse_request_send(fc, req);
-	ret = req->out.h.error;
-	if (!ret)
-		ret = size ? req->out.args[0].size : outarg.size;
-	else {
-		if (ret == -ENOSYS) {
-			fc->no_getxattr = 1;
-			ret = -EOPNOTSUPP;
-		}
+	ret = fuse_simple_request(fc, &args);
+	if (!ret && !size)
+		ret = outarg.size;
+	if (ret == -ENOSYS) {
+		fc->no_getxattr = 1;
+		ret = -EOPNOTSUPP;
 	}
-	fuse_put_request(fc, req);
 	return ret;
 }
 
@@ -1928,7 +1834,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_getxattr_in inarg;
 	struct fuse_getxattr_out outarg;
 	ssize_t ret;
@@ -1939,38 +1845,30 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 	if (fc->no_listxattr)
 		return -EOPNOTSUPP;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.size = size;
-	req->in.h.opcode = FUSE_LISTXATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
+	args.in.h.opcode = FUSE_LISTXATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
 	/* This is really two different operations rolled into one */
-	req->out.numargs = 1;
+	args.out.numargs = 1;
 	if (size) {
-		req->out.argvar = 1;
-		req->out.args[0].size = size;
-		req->out.args[0].value = list;
+		args.out.argvar = 1;
+		args.out.args[0].size = size;
+		args.out.args[0].value = list;
 	} else {
-		req->out.args[0].size = sizeof(outarg);
-		req->out.args[0].value = &outarg;
+		args.out.args[0].size = sizeof(outarg);
+		args.out.args[0].value = &outarg;
 	}
-	fuse_request_send(fc, req);
-	ret = req->out.h.error;
-	if (!ret)
-		ret = size ? req->out.args[0].size : outarg.size;
-	else {
-		if (ret == -ENOSYS) {
-			fc->no_listxattr = 1;
-			ret = -EOPNOTSUPP;
-		}
+	ret = fuse_simple_request(fc, &args);
+	if (!ret && !size)
+		ret = outarg.size;
+	if (ret == -ENOSYS) {
+		fc->no_listxattr = 1;
+		ret = -EOPNOTSUPP;
 	}
-	fuse_put_request(fc, req);
 	return ret;
 }
 
@@ -1978,24 +1876,18 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
 {
 	struct inode *inode = entry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	int err;
 
 	if (fc->no_removexattr)
 		return -EOPNOTSUPP;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->in.h.opcode = FUSE_REMOVEXATTR;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = strlen(name) + 1;
-	req->in.args[0].value = name;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_REMOVEXATTR;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = strlen(name) + 1;
+	args.in.args[0].value = name;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		fc->no_removexattr = 1;
 		err = -EOPNOTSUPP;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index bf50259..760b2c5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -24,30 +24,22 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 			  int opcode, struct fuse_open_out *outargp)
 {
 	struct fuse_open_in inarg;
-	struct fuse_req *req;
-	int err;
-
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	FUSE_ARGS(args);
 
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
 	if (!fc->atomic_o_trunc)
 		inarg.flags &= ~O_TRUNC;
-	req->in.h.opcode = opcode;
-	req->in.h.nodeid = nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(*outargp);
-	req->out.args[0].value = outargp;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = opcode;
+	args.in.h.nodeid = nodeid;
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.out.numargs = 1;
+	args.out.args[0].size = sizeof(*outargp);
+	args.out.args[0].value = outargp;
 
-	return err;
+	return fuse_simple_request(fc, &args);
 }
 
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
@@ -89,37 +81,9 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
 	return ff;
 }
 
-static void fuse_release_async(struct work_struct *work)
-{
-	struct fuse_req *req;
-	struct fuse_conn *fc;
-	struct path path;
-
-	req = container_of(work, struct fuse_req, misc.release.work);
-	path = req->misc.release.path;
-	fc = get_fuse_conn(path.dentry->d_inode);
-
-	fuse_put_request(fc, req);
-	path_put(&path);
-}
-
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-	if (fc->destroy_req) {
-		/*
-		 * If this is a fuseblk mount, then it's possible that
-		 * releasing the path will result in releasing the
-		 * super block and sending the DESTROY request.  If
-		 * the server is single threaded, this would hang.
-		 * For this reason do the path_put() in a separate
-		 * thread.
-		 */
-		atomic_inc(&req->count);
-		INIT_WORK(&req->misc.release.work, fuse_release_async);
-		schedule_work(&req->misc.release.work);
-	} else {
-		path_put(&req->misc.release.path);
-	}
+	iput(req->misc.release.inode);
 }
 
 static void fuse_file_put(struct fuse_file *ff, bool sync)
@@ -133,12 +97,12 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
 			 * implement 'open'
 			 */
 			req->background = 0;
-			path_put(&req->misc.release.path);
+			iput(req->misc.release.inode);
 			fuse_put_request(ff->fc, req);
 		} else if (sync) {
 			req->background = 0;
 			fuse_request_send(ff->fc, req);
-			path_put(&req->misc.release.path);
+			iput(req->misc.release.inode);
 			fuse_put_request(ff->fc, req);
 		} else {
 			req->end = fuse_release_end;
@@ -297,9 +261,8 @@ void fuse_release_common(struct file *file, int opcode)
 		inarg->lock_owner = fuse_lock_owner_id(ff->fc,
 						       (fl_owner_t) file);
 	}
-	/* Hold vfsmount and dentry until release is finished */
-	path_get(&file->f_path);
-	req->misc.release.path = file->f_path;
+	/* Hold inode until release is finished */
+	req->misc.release.inode = igrab(file_inode(file));
 
 	/*
 	 * Normally this will send the RELEASE request, however if
@@ -480,7 +443,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
 	struct inode *inode = file->f_mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_fsync_in inarg;
 	int err;
 
@@ -506,23 +469,15 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
 	if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
 		goto out;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.fh = ff->fh;
 	inarg.fsync_flags = datasync ? 1 : 0;
-	req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		if (isdir)
 			fc->no_fsyncdir = 1;
@@ -2156,49 +2111,44 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
 	return 0;
 }
 
-static void fuse_lk_fill(struct fuse_req *req, struct file *file,
+static void fuse_lk_fill(struct fuse_args *args, struct file *file,
 			 const struct file_lock *fl, int opcode, pid_t pid,
-			 int flock)
+			 int flock, struct fuse_lk_in *inarg)
 {
 	struct inode *inode = file_inode(file);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_file *ff = file->private_data;
-	struct fuse_lk_in *arg = &req->misc.lk_in;
-
-	arg->fh = ff->fh;
-	arg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
-	arg->lk.start = fl->fl_start;
-	arg->lk.end = fl->fl_end;
-	arg->lk.type = fl->fl_type;
-	arg->lk.pid = pid;
+
+	memset(inarg, 0, sizeof(*inarg));
+	inarg->fh = ff->fh;
+	inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+	inarg->lk.start = fl->fl_start;
+	inarg->lk.end = fl->fl_end;
+	inarg->lk.type = fl->fl_type;
+	inarg->lk.pid = pid;
 	if (flock)
-		arg->lk_flags |= FUSE_LK_FLOCK;
-	req->in.h.opcode = opcode;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(*arg);
-	req->in.args[0].value = arg;
+		inarg->lk_flags |= FUSE_LK_FLOCK;
+	args->in.h.opcode = opcode;
+	args->in.h.nodeid = get_node_id(inode);
+	args->in.numargs = 1;
+	args->in.args[0].size = sizeof(*inarg);
+	args->in.args[0].value = inarg;
 }
 
 static int fuse_getlk(struct file *file, struct file_lock *fl)
 {
 	struct inode *inode = file_inode(file);
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
+	struct fuse_lk_in inarg;
 	struct fuse_lk_out outarg;
 	int err;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
+	args.out.numargs = 1;
+	args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 	if (!err)
 		err = convert_fuse_file_lock(&outarg.lk, fl);
 
@@ -2209,7 +2159,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 {
 	struct inode *inode = file_inode(file);
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
+	struct fuse_lk_in inarg;
 	int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
 	pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
 	int err;
@@ -2223,17 +2174,13 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
 	if (fl->fl_flags & FL_CLOSE)
 		return 0;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg);
+	err = fuse_simple_request(fc, &args);
 
-	fuse_lk_fill(req, file, fl, opcode, pid, flock);
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
 	/* locking is restartable */
 	if (err == -EINTR)
 		err = -ERESTARTSYS;
-	fuse_put_request(fc, req);
+
 	return err;
 }
 
@@ -2283,7 +2230,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
 {
 	struct inode *inode = mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_bmap_in inarg;
 	struct fuse_bmap_out outarg;
 	int err;
@@ -2291,24 +2238,18 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
 	if (!inode->i_sb->s_bdev || fc->no_bmap)
 		return 0;
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return 0;
-
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.block = block;
 	inarg.blocksize = inode->i_sb->s_blocksize;
-	req->in.h.opcode = FUSE_BMAP;
-	req->in.h.nodeid = get_node_id(inode);
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_BMAP;
+	args.in.h.nodeid = get_node_id(inode);
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.out.numargs = 1;
+	args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS)
 		fc->no_bmap = 1;
 
@@ -2776,7 +2717,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
 	struct fuse_conn *fc = ff->fc;
 	struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
 	struct fuse_poll_out outarg;
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	int err;
 
 	if (fc->no_poll)
@@ -2794,21 +2735,15 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
 		fuse_register_polled_file(fc, ff);
 	}
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return POLLERR;
-
-	req->in.h.opcode = FUSE_POLL;
-	req->in.h.nodeid = ff->nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
+	args.in.h.opcode = FUSE_POLL;
+	args.in.h.nodeid = ff->nodeid;
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	args.out.numargs = 1;
+	args.out.args[0].size = sizeof(outarg);
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 
 	if (!err)
 		return outarg.revents;
@@ -2949,10 +2884,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 				loff_t length)
 {
 	struct fuse_file *ff = file->private_data;
-	struct inode *inode = file->f_inode;
+	struct inode *inode = file_inode(file);
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = ff->fc;
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_fallocate_in inarg = {
 		.fh = ff->fh,
 		.offset = offset,
@@ -2985,25 +2920,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
 	if (!(mode & FALLOC_FL_KEEP_SIZE))
 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-
-	req->in.h.opcode = FUSE_FALLOCATE;
-	req->in.h.nodeid = ff->nodeid;
-	req->in.numargs = 1;
-	req->in.args[0].size = sizeof(inarg);
-	req->in.args[0].value = &inarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
+	args.in.h.opcode = FUSE_FALLOCATE;
+	args.in.h.nodeid = ff->nodeid;
+	args.in.numargs = 1;
+	args.in.args[0].size = sizeof(inarg);
+	args.in.args[0].value = &inarg;
+	err = fuse_simple_request(fc, &args);
 	if (err == -ENOSYS) {
 		fc->no_fallocate = 1;
 		err = -EOPNOTSUPP;
 	}
-	fuse_put_request(fc, req);
-
 	if (err)
 		goto out;
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e8e47a6..e0fc672 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -213,7 +213,7 @@ struct fuse_out {
 	unsigned numargs;
 
 	/** Array of arguments */
-	struct fuse_arg args[3];
+	struct fuse_arg args[2];
 };
 
 /** FUSE page descriptor */
@@ -222,6 +222,25 @@ struct fuse_page_desc {
 	unsigned int offset;
 };
 
+struct fuse_args {
+	struct {
+		struct {
+			uint32_t opcode;
+			uint64_t nodeid;
+		} h;
+		unsigned numargs;
+		struct fuse_in_arg args[3];
+
+	} in;
+	struct {
+		unsigned argvar:1;
+		unsigned numargs;
+		struct fuse_arg args[2];
+	} out;
+};
+
+#define FUSE_ARGS(args) struct fuse_args args = {}
+
 /** The request state */
 enum fuse_req_state {
 	FUSE_REQ_INIT = 0,
@@ -305,11 +324,8 @@ struct fuse_req {
 	/** Data for asynchronous requests */
 	union {
 		struct {
-			union {
-				struct fuse_release_in in;
-				struct work_struct work;
-			};
-			struct path path;
+			struct fuse_release_in in;
+			struct inode *inode;
 		} release;
 		struct fuse_init_in init_in;
 		struct fuse_init_out init_out;
@@ -324,7 +340,6 @@ struct fuse_req {
 			struct fuse_req *next;
 		} write;
 		struct fuse_notify_retrieve_in retrieve_in;
-		struct fuse_lk_in lk_in;
 	} misc;
 
 	/** page vector */
@@ -754,15 +769,6 @@ struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
 void __fuse_get_request(struct fuse_req *req);
 
 /**
- * Get a request, may fail with -ENOMEM,
- * useful for callers who doesn't use req->pages[]
- */
-static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
-{
-	return fuse_get_req(fc, 0);
-}
-
-/**
  * Gets a requests for a file operation, always succeeds
  */
 struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
@@ -780,6 +786,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
 
 /**
+ * Simple request sending that does request allocation and freeing
+ */
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
+
+/**
  * Send a request in the background
  */
 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
@@ -804,8 +815,6 @@ void fuse_invalidate_atime(struct inode *inode);
  */
 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
 
-void fuse_conn_kill(struct fuse_conn *fc);
-
 /**
  * Initialize fuse_conn
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 03246cd..6749109 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -376,28 +376,13 @@ static void fuse_bdi_destroy(struct fuse_conn *fc)
 		bdi_destroy(&fc->bdi);
 }
 
-void fuse_conn_kill(struct fuse_conn *fc)
-{
-	spin_lock(&fc->lock);
-	fc->connected = 0;
-	fc->blocked = 0;
-	fc->initialized = 1;
-	spin_unlock(&fc->lock);
-	/* Flush all readers on this fs */
-	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
-	wake_up_all(&fc->waitq);
-	wake_up_all(&fc->blocked_waitq);
-	wake_up_all(&fc->reserved_req_waitq);
-}
-EXPORT_SYMBOL_GPL(fuse_conn_kill);
-
 static void fuse_put_super(struct super_block *sb)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
 	fuse_send_destroy(fc);
 
-	fuse_conn_kill(fc);
+	fuse_abort_conn(fc);
 	mutex_lock(&fuse_mutex);
 	list_del(&fc->entry);
 	fuse_ctl_remove_conn(fc);
@@ -425,7 +410,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
-	struct fuse_req *req;
+	FUSE_ARGS(args);
 	struct fuse_statfs_out outarg;
 	int err;
 
@@ -434,23 +419,17 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 		return 0;
 	}
 
-	req = fuse_get_req_nopages(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
 	memset(&outarg, 0, sizeof(outarg));
-	req->in.numargs = 0;
-	req->in.h.opcode = FUSE_STATFS;
-	req->in.h.nodeid = get_node_id(dentry->d_inode);
-	req->out.numargs = 1;
-	req->out.args[0].size =
+	args.in.numargs = 0;
+	args.in.h.opcode = FUSE_STATFS;
+	args.in.h.nodeid = get_node_id(dentry->d_inode);
+	args.out.numargs = 1;
+	args.out.args[0].size =
 		fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
-	req->out.args[0].value = &outarg;
-	fuse_request_send(fc, req);
-	err = req->out.h.error;
+	args.out.args[0].value = &outarg;
+	err = fuse_simple_request(fc, &args);
 	if (!err)
 		convert_fuse_statfs(buf, &outarg.st);
-	fuse_put_request(fc, req);
 	return err;
 }
 
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c6..7892e6f 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
 	return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
 }
 
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
-			   u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+		hfsplus_btree_key *key, u32 parent, struct qstr *str)
 {
-	int len;
+	int len, err;
 
 	key->cat.parent = cpu_to_be32(parent);
-	if (str) {
-		hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
-					str->name, str->len);
-		len = be16_to_cpu(key->cat.name.length);
-	} else {
-		key->cat.name.length = 0;
-		len = 0;
-	}
+	err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+			str->name, str->len);
+	if (unlikely(err < 0))
+		return err;
+
+	len = be16_to_cpu(key->cat.name.length);
 	key->key_len = cpu_to_be16(6 + 2 * len);
+	return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+			hfsplus_btree_key *key, u32 parent)
+{
+	key->cat.parent = cpu_to_be32(parent);
+	key->cat.name.length = 0;
+	key->key_len = cpu_to_be16(6);
 }
 
 static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
 				   hfsplus_cat_entry *entry, int type,
 				   u32 parentid, struct qstr *str)
 {
+	int err;
+
 	entry->type = cpu_to_be16(type);
 	entry->thread.reserved = 0;
 	entry->thread.parentID = cpu_to_be32(parentid);
-	hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+	err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
 				str->name, str->len);
+	if (unlikely(err < 0))
+		return err;
+
 	return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
 }
 
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
 	int err;
 	u16 type;
 
-	hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
 	err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
 	if (err)
 		return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
 	if (err)
 		return err;
 
-	hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
 	entry_size = hfsplus_fill_cat_thread(sb, &entry,
 		S_ISDIR(inode->i_mode) ?
 			HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
 		dir->i_ino, str);
+	if (unlikely(entry_size < 0)) {
+		err = entry_size;
+		goto err2;
+	}
+
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err != -ENOENT) {
 		if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
 	if (err)
 		goto err2;
 
-	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+	err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+	if (unlikely(err))
+		goto err1;
+
 	entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
 	return 0;
 
 err1:
-	hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
 	if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
 		hfs_brec_remove(&fd);
 err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
 	if (!str) {
 		int len;
 
-		hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+		hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
 		err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 		if (err)
 			goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
 			off + 2, len);
 		fd.search_key->key_len = cpu_to_be16(6 + len);
 	} else
-		hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+		err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+		if (unlikely(err))
+			goto out;
 
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
 	if (err)
 		goto out;
 
-	hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
 	dst_fd = src_fd;
 
 	/* find the old dir entry and read the data */
-	hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+	err = hfsplus_cat_build_key(sb, src_fd.search_key,
+			src_dir->i_ino, src_name);
+	if (unlikely(err))
+		goto out;
+
 	err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
 	type = be16_to_cpu(entry.type);
 
 	/* create new dir entry with the data from the old entry */
-	hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+	err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+			dst_dir->i_ino, dst_name);
+	if (unlikely(err))
+		goto out;
+
 	err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
 	if (err != -ENOENT) {
 		if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
 	dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
 
 	/* finally remove the old entry */
-	hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+	err = hfsplus_cat_build_key(sb, src_fd.search_key,
+			src_dir->i_ino, src_name);
+	if (unlikely(err))
+		goto out;
+
 	err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
 	src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
 
 	/* remove old thread entry */
-	hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
 	err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
 		goto out;
 
 	/* create new thread entry */
-	hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
 	entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
 		dst_dir->i_ino, dst_name);
+	if (unlikely(entry_size < 0)) {
+		err = entry_size;
+		goto out;
+	}
+
 	err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
 	if (err != -ENOENT) {
 		if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 610a326..435bea2 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
 	if (err)
 		return ERR_PTR(err);
-	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+	err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+			&dentry->d_name);
+	if (unlikely(err < 0))
+		goto fail;
 again:
 	err = hfs_brec_read(&fd, &entry, sizeof(entry));
 	if (err) {
@@ -97,9 +100,11 @@ again:
 					be32_to_cpu(entry.file.permissions.dev);
 				str.len = sprintf(name, "iNode%d", linkid);
 				str.name = name;
-				hfsplus_cat_build_key(sb, fd.search_key,
+				err = hfsplus_cat_build_key(sb, fd.search_key,
 					HFSPLUS_SB(sb)->hidden_dir->i_ino,
 					&str);
+				if (unlikely(err < 0))
+					goto fail;
 				goto again;
 			}
 		} else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
 		err = -ENOMEM;
 		goto out;
 	}
-	hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index eb5e059..b0441d6 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
 			     const hfsplus_btree_key *k2);
 int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
 			    const hfsplus_btree_key *k2);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
+int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
 			   u32 parent, struct qstr *str);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+				     hfsplus_btree_key *key, u32 parent);
 void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
 int hfsplus_find_cat(struct super_block *sb, u32 cnid,
 		     struct hfs_find_data *fd);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4cf2024..593af2f 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 	err = hfs_find_init(sbi->cat_tree, &fd);
 	if (err)
 		goto out_put_root;
-	hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+	err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+	if (unlikely(err < 0))
+		goto out_put_root;
 	if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
 		hfs_find_exit(&fd);
 		if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 386303d..dddbde4 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -224,7 +224,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
 
 	dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw));
 
-	/* If a node has zero dsize, we only have to keep if it if it might be the
+	/* If a node has zero dsize, we only have to keep it if it might be the
 	   node with highest version -- i.e. the one which will end up as f->metadata.
 	   Note that such nodes won't be REF_UNCHECKED since there are no data to
 	   check anyway. */
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index c522d09..bc53854 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -844,6 +844,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
 /* Write out summary information - called from jffs2_do_reserve_space */
 
 int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
+	__must_hold(&c->erase_completion_block)
 {
 	int datasize, infosize, padsize;
 	struct jffs2_eraseblock *jeb;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 697390e..ddc9f96 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -448,27 +448,6 @@ static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
 	return pol;
 }
 
-static int kernfs_vma_migrate(struct vm_area_struct *vma,
-			      const nodemask_t *from, const nodemask_t *to,
-			      unsigned long flags)
-{
-	struct file *file = vma->vm_file;
-	struct kernfs_open_file *of = kernfs_of(file);
-	int ret;
-
-	if (!of->vm_ops)
-		return 0;
-
-	if (!kernfs_get_active(of->kn))
-		return 0;
-
-	ret = 0;
-	if (of->vm_ops->migrate)
-		ret = of->vm_ops->migrate(vma, from, to, flags);
-
-	kernfs_put_active(of->kn);
-	return ret;
-}
 #endif
 
 static const struct vm_operations_struct kernfs_vm_ops = {
@@ -479,7 +458,6 @@ static const struct vm_operations_struct kernfs_vm_ops = {
 #ifdef CONFIG_NUMA
 	.set_policy	= kernfs_vma_set_policy,
 	.get_policy	= kernfs_vma_get_policy,
-	.migrate	= kernfs_vma_migrate,
 #endif
 };
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 30df6e7..cd1e968 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	}
 
 	/* Don't allow unprivileged users to reveal what is under a mount */
-	if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
+	if ((flag & CL_UNPRIVILEGED) &&
+	    (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
 		mnt->mnt.mnt_flags |= MNT_LOCKED;
 
 	atomic_inc(&sb->s_active);
@@ -1369,6 +1370,8 @@ void umount_tree(struct mount *mnt, int how)
 	}
 	if (last) {
 		last->mnt_hash.next = unmounted.first;
+		if (unmounted.first)
+			unmounted.first->pprev = &last->mnt_hash.next;
 		unmounted.first = tmp_list.first;
 		unmounted.first->pprev = &unmounted.first;
 	}
@@ -1544,6 +1547,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 		goto dput_and_out;
 	if (mnt->mnt.mnt_flags & MNT_LOCKED)
 		goto dput_and_out;
+	retval = -EPERM;
+	if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+		goto dput_and_out;
 
 	retval = do_umount(mnt, flags);
 dput_and_out:
@@ -1606,7 +1612,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 	if (IS_ERR(q))
 		return q;
 
-	q->mnt.mnt_flags &= ~MNT_LOCKED;
 	q->mnt_mountpoint = mnt->mnt_mountpoint;
 
 	p = mnt;
@@ -2097,7 +2102,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	}
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
 	    !(mnt_flags & MNT_NODEV)) {
-		return -EPERM;
+		/* Was the nodev implicitly added in mount? */
+		if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
+		    !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+			mnt_flags |= MNT_NODEV;
+		} else {
+			return -EPERM;
+		}
 	}
 	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
 	    !(mnt_flags & MNT_NOSUID)) {
@@ -2958,6 +2969,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	/* mount new_root on / */
 	attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
+	/* A moved mount should not expire automatically */
+	list_del_init(&new_mnt->mnt_expire);
 	unlock_mount_hash();
 	chroot_fs_refs(&root, &new);
 	put_mountpoint(root_mp);
@@ -3002,6 +3015,7 @@ static void __init init_mount_tree(void)
 
 	root.mnt = mnt;
 	root.dentry = mnt->mnt_root;
+	mnt->mnt_flags |= MNT_LOCKED;
 
 	set_fs_pwd(current->fs, &root);
 	set_fs_root(current->fs, &root);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a93bf98..fcae9ef 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5662,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 			     struct ocfs2_extent_tree *et,
 			     u32 cpos, u32 phys_cpos, u32 len, int flags,
 			     struct ocfs2_cached_dealloc_ctxt *dealloc,
-			     u64 refcount_loc)
+			     u64 refcount_loc, bool refcount_tree_locked)
 {
 	int ret, credits = 0, extra_blocks = 0;
 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
@@ -5676,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
 			 OCFS2_HAS_REFCOUNT_FL));
 
-		ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
-					       &ref_tree, NULL);
-		if (ret) {
-			mlog_errno(ret);
-			goto bail;
+		if (!refcount_tree_locked) {
+			ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+						       &ref_tree, NULL);
+			if (ret) {
+				mlog_errno(ret);
+				goto bail;
+			}
 		}
 
 		ret = ocfs2_prepare_refcount_change_for_del(inode,
@@ -7021,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
 	u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
 	struct ocfs2_extent_tree et;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_refcount_tree *ref_tree = NULL;
 
 	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
 	ocfs2_init_dealloc_ctxt(&dealloc);
@@ -7130,9 +7133,18 @@ start:
 
 	phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
 
+	if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) {
+		status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+				&ref_tree, NULL);
+		if (status) {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
 	status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
 					  phys_cpos, trunc_len, flags, &dealloc,
-					  refcount_loc);
+					  refcount_loc, true);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -7147,6 +7159,8 @@ start:
 	goto start;
 
 bail:
+	if (ref_tree)
+		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index ca381c5..fb09b97 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 			     struct ocfs2_extent_tree *et,
 			     u32 cpos, u32 phys_cpos, u32 len, int flags,
 			     struct ocfs2_cached_dealloc_ctxt *dealloc,
-			     u64 refcount_loc);
+			     u64 refcount_loc, bool refcount_tree_locked);
 
 int ocfs2_num_free_extents(struct ocfs2_super *osb,
 			   struct ocfs2_extent_tree *et);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d9f2229..46d93e9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -894,7 +894,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
 	}
 }
 
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
 {
 	int i;
 
@@ -915,7 +915,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
 		page_cache_release(wc->w_target_page);
 	}
 	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
 
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+	ocfs2_unlock_pages(wc);
 	brelse(wc->w_di_bh);
 	kfree(wc);
 }
@@ -2042,11 +2046,19 @@ out_write_size:
 	ocfs2_update_inode_fsync_trans(handle, inode, 1);
 	ocfs2_journal_dirty(handle, wc->w_di_bh);
 
+	/* unlock pages before dealloc since it needs acquiring j_trans_barrier
+	 * lock, or it will cause a deadlock since journal commit threads holds
+	 * this lock and will ask for the page lock when flushing the data.
+	 * put it here to preserve the unlock order.
+	 */
+	ocfs2_unlock_pages(wc);
+
 	ocfs2_commit_trans(osb, handle);
 
 	ocfs2_run_deallocs(osb, &wc->w_dealloc);
 
-	ocfs2_free_write_ctxt(wc);
+	brelse(wc->w_di_bh);
+	kfree(wc);
 
 	return copied;
 }
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 79d56dc..319e786 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4479,7 +4479,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
 		p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
 
 		ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
-					       &dealloc, 0);
+					       &dealloc, 0, false);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3689b35..a6944b2 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -695,14 +695,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
 			res->inflight_assert_workers);
 }
 
-static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
-		struct dlm_lock_resource *res)
-{
-	spin_lock(&res->spinlock);
-	__dlm_lockres_grab_inflight_worker(dlm, res);
-	spin_unlock(&res->spinlock);
-}
-
 static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
 		struct dlm_lock_resource *res)
 {
@@ -1646,6 +1638,7 @@ send_response:
 		}
 		mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
 			     dlm->node_num, res->lockname.len, res->lockname.name);
+		spin_lock(&res->spinlock);
 		ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
 						 DLM_ASSERT_MASTER_MLE_CLEANUP);
 		if (ret < 0) {
@@ -1653,7 +1646,8 @@ send_response:
 			response = DLM_MASTER_RESP_ERROR;
 			dlm_lockres_put(res);
 		} else
-			dlm_lockres_grab_inflight_worker(dlm, res);
+			__dlm_lockres_grab_inflight_worker(dlm, res);
+		spin_unlock(&res->spinlock);
 	} else {
 		if (res)
 			dlm_lockres_put(res);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 69fb9f7..3950693 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1803,7 +1803,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
 
 		ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
 					       phys_cpos, trunc_len, flags,
-					       &dealloc, refcount_loc);
+					       &dealloc, refcount_loc, false);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;
diff --git a/fs/pnode.c b/fs/pnode.c
index aae331a..260ac8f 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
 	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
 	if (IS_ERR(child))
 		return PTR_ERR(child);
+	child->mnt.mnt_flags &= ~MNT_LOCKED;
 	mnt_set_mountpoint(m, mp, child);
 	last_dest = m;
 	last_source = child;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 590aeda..3f3d7ae 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
 	.llseek		= seq_lseek,
 	.release	= proc_id_map_release,
 };
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+	struct user_namespace *ns = NULL;
+	struct task_struct *task;
+	int ret;
+
+	ret = -ESRCH;
+	task = get_proc_task(inode);
+	if (task) {
+		rcu_read_lock();
+		ns = get_user_ns(task_cred_xxx(task, user_ns));
+		rcu_read_unlock();
+		put_task_struct(task);
+	}
+	if (!ns)
+		goto err;
+
+	if (file->f_mode & FMODE_WRITE) {
+		ret = -EACCES;
+		if (!ns_capable(ns, CAP_SYS_ADMIN))
+			goto err_put_ns;
+	}
+
+	ret = single_open(file, &proc_setgroups_show, ns);
+	if (ret)
+		goto err_put_ns;
+
+	return 0;
+err_put_ns:
+	put_user_ns(ns);
+err:
+	return ret;
+}
+
+static int proc_setgroups_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	int ret = single_release(inode, file);
+	put_user_ns(ns);
+	return ret;
+}
+
+static const struct file_operations proc_setgroups_operations = {
+	.open		= proc_setgroups_open,
+	.write		= proc_setgroups_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= proc_setgroups_release,
+};
 #endif /* CONFIG_USER_NS */
 
 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	REG("timers",	  S_IRUGO, proc_timers_operations),
@@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 };
 
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index aa1eee0..d3ebf2e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -12,6 +12,9 @@
 #include <linux/vmstat.h>
 #include <linux/atomic.h>
 #include <linux/vmalloc.h>
+#ifdef CONFIG_CMA
+#include <linux/cma.h>
+#endif
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include "internal.h"
@@ -138,6 +141,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		"AnonHugePages:  %8lu kB\n"
 #endif
+#ifdef CONFIG_CMA
+		"CmaTotal:       %8lu kB\n"
+		"CmaFree:        %8lu kB\n"
+#endif
 		,
 		K(i.totalram),
 		K(i.freeram),
@@ -187,12 +194,16 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		vmi.used >> 10,
 		vmi.largest_chunk >> 10
 #ifdef CONFIG_MEMORY_FAILURE
-		,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
+		, atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
 #endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
+		, K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
 		   HPAGE_PMD_NR)
 #endif
+#ifdef CONFIG_CMA
+		, K(totalcma_pages)
+		, K(global_page_state(NR_FREE_CMA_PAGES))
+#endif
 		);
 
 	hugetlb_report_meminfo(m);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf2d03f..510413eb 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v)
 
 	/* sum again ? it could be updated? */
 	for_each_irq_nr(j)
-		seq_put_decimal_ull(p, ' ', kstat_irqs(j));
+		seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
 
 	seq_printf(p,
 		"\nctxt %llu\n"
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 73ca174..0f96f71 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -91,6 +91,7 @@ static void show_type(struct seq_file *m, struct super_block *sb)
 
 static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
 {
+	struct proc_mounts *p = proc_mounts(m);
 	struct mount *r = real_mount(mnt);
 	int err = 0;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -104,7 +105,10 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
 		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
 	}
 	seq_putc(m, ' ');
-	seq_path(m, &mnt_path, " \t\n\\");
+	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+	err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+	if (err)
+		goto out;
 	seq_putc(m, ' ');
 	show_type(m, sb);
 	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
@@ -125,7 +129,6 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
 	struct mount *r = real_mount(mnt);
 	struct super_block *sb = mnt->mnt_sb;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-	struct path root = p->root;
 	int err = 0;
 
 	seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
@@ -139,7 +142,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
 	seq_putc(m, ' ');
 
 	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
-	err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+	err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
 	if (err)
 		goto out;
 
@@ -182,6 +185,7 @@ out:
 
 static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 {
+	struct proc_mounts *p = proc_mounts(m);
 	struct mount *r = real_mount(mnt);
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
 	struct super_block *sb = mnt_path.dentry->d_sb;
@@ -201,7 +205,10 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 
 	/* mount point */
 	seq_puts(m, " mounted on ");
-	seq_path(m, &mnt_path, " \t\n\\");
+	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+	err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+	if (err)
+		goto out;
 	seq_putc(m, ' ');
 
 	/* file system type */
@@ -216,6 +223,7 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 	}
 
 	seq_putc(m, '\n');
+out:
 	return err;
 }
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 7581518e3..61e32ec 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -313,6 +313,7 @@ struct acpi_device_wakeup_flags {
 	u8 valid:1;		/* Can successfully enable wakeup? */
 	u8 run_wake:1;		/* Run-Wake GPE devices */
 	u8 notifier_present:1;  /* Wake-up notify handler has been installed */
+	u8 enabled:1;		/* Enabled for wakeup */
 };
 
 struct acpi_device_wakeup_context {
diff --git a/include/dt-bindings/thermal/tegra124-soctherm.h b/include/dt-bindings/thermal/tegra124-soctherm.h
new file mode 100644
index 0000000..85aaf66
--- /dev/null
+++ b/include/dt-bindings/thermal/tegra124-soctherm.h
@@ -0,0 +1,13 @@
+/*
+ * This header provides constants for binding nvidia,tegra124-soctherm.
+ */
+
+#ifndef _DT_BINDINGS_THERMAL_TEGRA124_SOCTHERM_H
+#define _DT_BINDINGS_THERMAL_TEGRA124_SOCTHERM_H
+
+#define TEGRA124_SOCTHERM_SENSOR_CPU 0
+#define TEGRA124_SOCTHERM_SENSOR_MEM 1
+#define TEGRA124_SOCTHERM_SENSOR_GPU 2
+#define TEGRA124_SOCTHERM_SENSOR_PLLX 3
+
+#endif
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index ad9db60..b3f45a5 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -60,7 +60,8 @@ struct arch_timer_cpu {
 
 #ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
-int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_enable(struct kvm *kvm);
+void kvm_timer_init(struct kvm *kvm);
 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 			  const struct kvm_irq_level *irq);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
@@ -77,11 +78,8 @@ static inline int kvm_timer_hyp_init(void)
 	return 0;
 };
 
-static inline int kvm_timer_init(struct kvm *kvm)
-{
-	return 0;
-}
-
+static inline void kvm_timer_enable(struct kvm *kvm) {}
+static inline void kvm_timer_init(struct kvm *kvm) {}
 static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 					const struct kvm_irq_level *irq) {}
 static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 206dcc3..ac4888d 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -274,7 +274,7 @@ struct kvm_exit_mmio;
 #ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_map_resources(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
 void kvm_vgic_destroy(struct kvm *kvm);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
@@ -287,7 +287,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		      struct kvm_exit_mmio *mmio);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
-#define vgic_initialized(k)	((k)->arch.vgic.ready)
+#define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
+#define vgic_ready(k)		((k)->arch.vgic.ready)
 
 int vgic_v2_probe(struct device_node *vgic_node,
 		  const struct vgic_ops **ops,
@@ -321,7 +322,7 @@ static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr,
 	return -ENXIO;
 }
 
-static inline int kvm_vgic_init(struct kvm *kvm)
+static inline int kvm_vgic_map_resources(struct kvm *kvm)
 {
 	return 0;
 }
@@ -373,6 +374,11 @@ static inline bool vgic_initialized(struct kvm *kvm)
 {
 	return true;
 }
+
+static inline bool vgic_ready(struct kvm *kvm)
+{
+	return true;
+}
 #endif
 
 #endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6bff83b..856d381 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -153,6 +153,7 @@ int acpi_unmap_lsapic(int cpu);
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base);
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);
+int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base);
 void acpi_irq_stats_init(void);
 extern u32 acpi_irq_handled;
 extern u32 acpi_irq_not_handled;
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 5f33868..260d78b 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -13,6 +13,7 @@
 
 struct ceph_auth_client;
 struct ceph_authorizer;
+struct ceph_msg;
 
 struct ceph_auth_handshake {
 	struct ceph_authorizer *authorizer;
@@ -20,6 +21,10 @@ struct ceph_auth_handshake {
 	size_t authorizer_buf_len;
 	void *authorizer_reply_buf;
 	size_t authorizer_reply_buf_len;
+	int (*sign_message)(struct ceph_auth_handshake *auth,
+			    struct ceph_msg *msg);
+	int (*check_message_signature)(struct ceph_auth_handshake *auth,
+				       struct ceph_msg *msg);
 };
 
 struct ceph_auth_client_ops {
@@ -66,6 +71,11 @@ struct ceph_auth_client_ops {
 	void (*reset)(struct ceph_auth_client *ac);
 
 	void (*destroy)(struct ceph_auth_client *ac);
+
+	int (*sign_message)(struct ceph_auth_handshake *auth,
+			    struct ceph_msg *msg);
+	int (*check_message_signature)(struct ceph_auth_handshake *auth,
+				       struct ceph_msg *msg);
 };
 
 struct ceph_auth_client {
@@ -113,4 +123,20 @@ extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
 					    int peer_type);
 
+static inline int ceph_auth_sign_message(struct ceph_auth_handshake *auth,
+					 struct ceph_msg *msg)
+{
+	if (auth->sign_message)
+		return auth->sign_message(auth, msg);
+	return 0;
+}
+
+static inline
+int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
+				      struct ceph_msg *msg)
+{
+	if (auth->check_message_signature)
+		return auth->check_message_signature(auth, msg);
+	return 0;
+}
 #endif
diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
index 07ad423..07ca15e 100644
--- a/include/linux/ceph/buffer.h
+++ b/include/linux/ceph/buffer.h
@@ -10,8 +10,7 @@
 /*
  * a simple reference counted buffer.
  *
- * use kmalloc for small sizes (<= one page), vmalloc for larger
- * sizes.
+ * use kmalloc for smaller sizes, vmalloc for larger sizes.
  */
 struct ceph_buffer {
 	struct kref kref;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index d12659c..71e05bb 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -84,6 +84,7 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
 	 CEPH_FEATURE_CRUSH_TUNABLES |		\
+	 CEPH_FEATURE_MSG_AUTH |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
 	 CEPH_FEATURE_OSDHASHPSPOOL |		\
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 3c97d5e..c0dadaa 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -522,8 +522,11 @@ struct ceph_mds_reply_dirfrag {
 	__le32 dist[];
 } __attribute__ ((packed));
 
-#define CEPH_LOCK_FCNTL    1
-#define CEPH_LOCK_FLOCK    2
+#define CEPH_LOCK_FCNTL		1
+#define CEPH_LOCK_FLOCK		2
+#define CEPH_LOCK_FCNTL_INTR    3
+#define CEPH_LOCK_FLOCK_INTR    4
+
 
 #define CEPH_LOCK_SHARED   1
 #define CEPH_LOCK_EXCL     2
@@ -549,6 +552,7 @@ struct ceph_filelock {
 
 int ceph_flags_to_mode(int flags);
 
+#define CEPH_INLINE_NONE	((__u64)-1)
 
 /* capability bits */
 #define CEPH_CAP_PIN         1  /* no specific capabilities beyond the pin */
@@ -613,6 +617,8 @@ int ceph_flags_to_mode(int flags);
 				 CEPH_CAP_LINK_SHARED |	\
 				 CEPH_CAP_FILE_SHARED |	\
 				 CEPH_CAP_XATTR_SHARED)
+#define CEPH_STAT_CAP_INLINE_DATA (CEPH_CAP_FILE_SHARED | \
+				   CEPH_CAP_FILE_RD)
 
 #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED |			\
 			      CEPH_CAP_LINK_SHARED |			\
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 07bc359..8b11a79 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -29,6 +29,7 @@
 #define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
 #define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
 #define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
+#define CEPH_OPT_NOMSGAUTH	  (1<<4) /* not require cephx message signature */
 
 #define CEPH_OPT_DEFAULT   (0)
 
@@ -184,7 +185,6 @@ extern bool libceph_compatible(void *data);
 extern const char *ceph_msg_type_name(int type);
 extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
 extern void *ceph_kvmalloc(size_t size, gfp_t flags);
-extern void ceph_kvfree(const void *ptr);
 
 extern struct ceph_options *ceph_parse_options(char *options,
 			      const char *dev_name, const char *dev_name_end,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 40ae58e..d9d396c 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -42,6 +42,10 @@ struct ceph_connection_operations {
 	struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
 					struct ceph_msg_header *hdr,
 					int *skip);
+	int (*sign_message) (struct ceph_connection *con, struct ceph_msg *msg);
+
+	int (*check_message_signature) (struct ceph_connection *con,
+					struct ceph_msg *msg);
 };
 
 /* use format string %s%d */
@@ -142,7 +146,10 @@ struct ceph_msg_data_cursor {
  */
 struct ceph_msg {
 	struct ceph_msg_header hdr;	/* header */
-	struct ceph_msg_footer footer;	/* footer */
+	union {
+		struct ceph_msg_footer footer;		/* footer */
+		struct ceph_msg_footer_old old_footer;	/* old format footer */
+	};
 	struct kvec front;              /* unaligned blobs of message */
 	struct ceph_buffer *middle;
 
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 3d94a73..1c18872 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -152,7 +152,8 @@ struct ceph_msg_header {
 			     receiver: mask against ~PAGE_MASK */
 
 	struct ceph_entity_name src;
-	__le32 reserved;
+	__le16 compat_version;
+	__le16 reserved;
 	__le32 crc;       /* header crc32c */
 } __attribute__ ((packed));
 
@@ -164,13 +165,21 @@ struct ceph_msg_header {
 /*
  * follows data payload
  */
+struct ceph_msg_footer_old {
+	__le32 front_crc, middle_crc, data_crc;
+	__u8 flags;
+} __attribute__ ((packed));
+
 struct ceph_msg_footer {
 	__le32 front_crc, middle_crc, data_crc;
+	// sig holds the 64 bits of the digital signature for the message PLR
+	__le64  sig;
 	__u8 flags;
 } __attribute__ ((packed));
 
 #define CEPH_MSG_FOOTER_COMPLETE  (1<<0)   /* msg wasn't aborted */
 #define CEPH_MSG_FOOTER_NOCRC     (1<<1)   /* no data crc */
+#define CEPH_MSG_FOOTER_SIGNED	  (1<<2)   /* msg was signed */
 
 
 #endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03aeb27..5d86416 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -87,6 +87,13 @@ struct ceph_osd_req_op {
 			struct ceph_osd_data osd_data;
 		} extent;
 		struct {
+			__le32 name_len;
+			__le32 value_len;
+			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
+			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
+			struct ceph_osd_data osd_data;
+		} xattr;
+		struct {
 			const char *class_name;
 			const char *method_name;
 			struct ceph_osd_data request_info;
@@ -295,6 +302,9 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
 					unsigned int which, u16 opcode,
 					const char *class, const char *method);
+extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
+				 u16 opcode, const char *name, const void *value,
+				 size_t size, u8 cmp_op, u8 cmp_mode);
 extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
 					unsigned int which, u16 opcode,
 					u64 cookie, u64 version, int flag);
@@ -318,7 +328,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      struct ceph_file_layout *layout,
 				      struct ceph_vino vino,
 				      u64 offset, u64 *len,
-				      int num_ops, int opcode, int flags,
+				      unsigned int which, int num_ops,
+				      int opcode, int flags,
 				      struct ceph_snap_context *snapc,
 				      u32 truncate_seq, u64 truncate_size,
 				      bool use_mempool);
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 5f871d8..13d71fe 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -1,8 +1,10 @@
 #ifndef __FS_CEPH_PAGELIST_H
 #define __FS_CEPH_PAGELIST_H
 
-#include <linux/list.h>
+#include <asm/byteorder.h>
 #include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/types.h>
 
 struct ceph_pagelist {
 	struct list_head head;
diff --git a/include/linux/clock_cooling.h b/include/linux/clock_cooling.h
new file mode 100644
index 0000000..4d1019d
--- /dev/null
+++ b/include/linux/clock_cooling.h
@@ -0,0 +1,65 @@
+/*
+ *  linux/include/linux/clock_cooling.h
+ *
+ *  Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Copyright (C) 2013	Texas Instruments Inc.
+ *  Contact:  Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ *  Highly based on cpu_cooling.c.
+ *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
+ *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+
+#ifndef __CPU_COOLING_H__
+#define __CPU_COOLING_H__
+
+#include <linux/of.h>
+#include <linux/thermal.h>
+#include <linux/cpumask.h>
+
+#ifdef CONFIG_CLOCK_THERMAL
+/**
+ * clock_cooling_register - function to create clock cooling device.
+ * @dev: struct device pointer to the device used as clock cooling device.
+ * @clock_name: string containing the clock used as cooling mechanism.
+ */
+struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name);
+
+/**
+ * clock_cooling_unregister - function to remove clock cooling device.
+ * @cdev: thermal cooling device pointer.
+ */
+void clock_cooling_unregister(struct thermal_cooling_device *cdev);
+
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq);
+#else /* !CONFIG_CLOCK_THERMAL */
+static inline struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name)
+{
+	return NULL;
+}
+static inline
+void clock_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+}
+static inline
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq)
+{
+	return THERMAL_CSTATE_INVALID;
+}
+#endif	/* CONFIG_CLOCK_THERMAL */
+
+#endif /* __CPU_COOLING_H__ */
diff --git a/include/linux/cma.h b/include/linux/cma.h
index a93438b..9384ba6 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -15,6 +15,7 @@
 
 struct cma;
 
+extern unsigned long totalcma_pages;
 extern phys_addr_t cma_get_base(struct cma *cma);
 extern unsigned long cma_get_size(struct cma *cma);
 
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b2d0820..2fb2ca2 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -68,6 +68,7 @@ extern void groups_free(struct group_info *);
 extern int set_current_groups(struct group_info *);
 extern void set_groups(struct cred *, struct group_info *);
 extern int groups_search(const struct group_info *, kgid_t);
+extern bool may_setgroups(void);
 
 /* access the groups "array" with this macro */
 #define GROUP_AT(gi, i) \
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index f1863dc..ce447f0 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -188,7 +188,7 @@ extern struct devfreq *devm_devfreq_add_device(struct device *dev,
 extern void devm_devfreq_remove_device(struct device *dev,
 				  struct devfreq *devfreq);
 
-/* Supposed to be called by PM_SLEEP/PM_RUNTIME callbacks */
+/* Supposed to be called by PM callbacks */
 extern int devfreq_suspend_device(struct devfreq *devfreq);
 extern int devfreq_resume_device(struct devfreq *devfreq);
 
diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h
index 84d60cb..bf0321e 100644
--- a/include/linux/fsl_ifc.h
+++ b/include/linux/fsl_ifc.h
@@ -29,7 +29,16 @@
 #include <linux/of_platform.h>
 #include <linux/interrupt.h>
 
-#define FSL_IFC_BANK_COUNT 4
+/*
+ * The actual number of banks implemented depends on the IFC version
+ *    - IFC version 1.0 implements 4 banks.
+ *    - IFC version 1.1 onward implements 8 banks.
+ */
+#define FSL_IFC_BANK_COUNT 8
+
+#define FSL_IFC_VERSION_MASK	0x0F0F0000
+#define FSL_IFC_VERSION_1_0_0	0x01000000
+#define FSL_IFC_VERSION_1_1_0	0x01010000
 
 /*
  * CSPR - Chip Select Property Register
@@ -776,23 +785,23 @@ struct fsl_ifc_regs {
 		__be32 cspr;
 		u32 res2;
 	} cspr_cs[FSL_IFC_BANK_COUNT];
-	u32 res3[0x19];
+	u32 res3[0xd];
 	struct {
 		__be32 amask;
 		u32 res4[0x2];
 	} amask_cs[FSL_IFC_BANK_COUNT];
-	u32 res5[0x18];
+	u32 res5[0xc];
 	struct {
 		__be32 csor;
 		__be32 csor_ext;
 		u32 res6;
 	} csor_cs[FSL_IFC_BANK_COUNT];
-	u32 res7[0x18];
+	u32 res7[0xc];
 	struct {
 		__be32 ftim[4];
 		u32 res8[0x8];
 	} ftim_cs[FSL_IFC_BANK_COUNT];
-	u32 res9[0x60];
+	u32 res9[0x30];
 	__be32 rb_stat;
 	u32 res10[0x2];
 	__be32 ifc_gcr;
@@ -827,6 +836,8 @@ struct fsl_ifc_ctrl {
 	int				nand_irq;
 	spinlock_t			lock;
 	void				*nand;
+	int				version;
+	int				banks;
 
 	u32 nand_stat;
 	wait_queue_head_t nand_wait;
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index b9376cd..25a822f 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -68,6 +68,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
  * Number of interrupts per specific IRQ source, since bootup
  */
 extern unsigned int kstat_irqs(unsigned int irq);
+extern unsigned int kstat_irqs_usr(unsigned int irq);
 
 /*
  * Number of interrupts per cpu, since bootup
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a6059bd..26f1060 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -43,6 +43,7 @@
  * include/linux/kvm_h.
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
+#define KVM_MEMSLOT_INCOHERENT	(1UL << 17)
 
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
@@ -353,6 +354,8 @@ struct kvm_memslots {
 	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
 	/* The mapping table from slot id to the index in memslots[]. */
 	short id_to_index[KVM_MEM_SLOTS_NUM];
+	atomic_t lru_slot;
+	int used_slots;
 };
 
 struct kvm {
@@ -395,7 +398,6 @@ struct kvm {
 	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
-	struct hlist_head mask_notifier_list;
 #endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
@@ -447,6 +449,14 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_HAVE_IOAPIC
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+#else
+static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
@@ -711,44 +721,6 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
-struct kvm_assigned_dev_kernel {
-	struct kvm_irq_ack_notifier ack_notifier;
-	struct list_head list;
-	int assigned_dev_id;
-	int host_segnr;
-	int host_busnr;
-	int host_devfn;
-	unsigned int entries_nr;
-	int host_irq;
-	bool host_irq_disabled;
-	bool pci_2_3;
-	struct msix_entry *host_msix_entries;
-	int guest_irq;
-	struct msix_entry *guest_msix_entries;
-	unsigned long irq_requested_type;
-	int irq_source_id;
-	int flags;
-	struct pci_dev *dev;
-	struct kvm *kvm;
-	spinlock_t intx_lock;
-	spinlock_t intx_mask_lock;
-	char irq_name[32];
-	struct pci_saved_state *pci_saved_state;
-};
-
-struct kvm_irq_mask_notifier {
-	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
-	int irq;
-	struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-				    struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-				      struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-			     bool mask);
-
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
@@ -770,12 +742,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev);
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev);
 #else
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      struct kvm_memory_slot *slot)
@@ -787,11 +753,6 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
 					 struct kvm_memory_slot *slot)
 {
 }
-
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	return 0;
-}
 #endif
 
 static inline void kvm_guest_enter(void)
@@ -832,12 +793,28 @@ static inline void kvm_guest_exit(void)
 static inline struct kvm_memory_slot *
 search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 {
-	struct kvm_memory_slot *memslot;
+	int start = 0, end = slots->used_slots;
+	int slot = atomic_read(&slots->lru_slot);
+	struct kvm_memory_slot *memslots = slots->memslots;
+
+	if (gfn >= memslots[slot].base_gfn &&
+	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
+		return &memslots[slot];
 
-	kvm_for_each_memslot(memslot, slots)
-		if (gfn >= memslot->base_gfn &&
-		      gfn < memslot->base_gfn + memslot->npages)
-			return memslot;
+	while (start < end) {
+		slot = start + (end - start) / 2;
+
+		if (gfn >= memslots[slot].base_gfn)
+			end = slot;
+		else
+			start = slot + 1;
+	}
+
+	if (gfn >= memslots[start].base_gfn &&
+	    gfn < memslots[start].base_gfn + memslots[start].npages) {
+		atomic_set(&slots->lru_slot, start);
+		return &memslots[start];
+	}
 
 	return NULL;
 }
@@ -1011,25 +988,6 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
 
 #endif
 
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-				  unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-
-#else
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-						unsigned long arg)
-{
-	return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-
-#endif
-
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
 	set_bit(req, &vcpu->requests);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b606bb6..931da7e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -54,33 +54,6 @@ typedef u64            hfn_t;
 
 typedef hfn_t pfn_t;
 
-union kvm_ioapic_redirect_entry {
-	u64 bits;
-	struct {
-		u8 vector;
-		u8 delivery_mode:3;
-		u8 dest_mode:1;
-		u8 delivery_status:1;
-		u8 polarity:1;
-		u8 remote_irr:1;
-		u8 trig_mode:1;
-		u8 mask:1;
-		u8 reserve:7;
-		u8 reserved[4];
-		u8 dest_id;
-	} fields;
-};
-
-struct kvm_lapic_irq {
-	u32 vector;
-	u32 delivery_mode;
-	u32 dest_mode;
-	u32 level;
-	u32 trig_mode;
-	u32 shorthand;
-	u32 dest_id;
-};
-
 struct gfn_to_hva_cache {
 	u64 generation;
 	gpa_t gpa;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 01aad3e..fab9b32 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -36,9 +36,6 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
 
 extern int migrate_prep(void);
 extern int migrate_prep_local(void);
-extern int migrate_vmas(struct mm_struct *mm,
-		const nodemask_t *from, const nodemask_t *to,
-		unsigned long flags);
 extern void migrate_page_copy(struct page *newpage, struct page *page);
 extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page);
@@ -57,13 +54,6 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
 
-static inline int migrate_vmas(struct mm_struct *mm,
-		const nodemask_t *from, const nodemask_t *to,
-		unsigned long flags)
-{
-	return -ENOSYS;
-}
-
 static inline void migrate_page_copy(struct page *newpage,
 				     struct page *page) {}
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ea4f1c4..4e5bd81 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -120,6 +120,15 @@ enum {
 };
 
 enum {
+	MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31
+};
+
+enum {
+	MLX5_PFAULT_SUBTYPE_WQE = 0,
+	MLX5_PFAULT_SUBTYPE_RDMA = 1,
+};
+
+enum {
 	MLX5_PERM_LOCAL_READ	= 1 << 2,
 	MLX5_PERM_LOCAL_WRITE	= 1 << 3,
 	MLX5_PERM_REMOTE_READ	= 1 << 4,
@@ -180,6 +189,19 @@ enum {
 	MLX5_MKEY_MASK_FREE		= 1ull << 29,
 };
 
+enum {
+	MLX5_UMR_TRANSLATION_OFFSET_EN	= (1 << 4),
+
+	MLX5_UMR_CHECK_NOT_FREE		= (1 << 5),
+	MLX5_UMR_CHECK_FREE		= (2 << 5),
+
+	MLX5_UMR_INLINE			= (1 << 7),
+};
+
+#define MLX5_UMR_MTT_ALIGNMENT 0x40
+#define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
+#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
+
 enum mlx5_event {
 	MLX5_EVENT_TYPE_COMP		   = 0x0,
 
@@ -206,6 +228,8 @@ enum mlx5_event {
 
 	MLX5_EVENT_TYPE_CMD		   = 0x0a,
 	MLX5_EVENT_TYPE_PAGE_REQUEST	   = 0xb,
+
+	MLX5_EVENT_TYPE_PAGE_FAULT	   = 0xc,
 };
 
 enum {
@@ -225,6 +249,7 @@ enum {
 	MLX5_DEV_CAP_FLAG_APM		= 1LL << 17,
 	MLX5_DEV_CAP_FLAG_ATOMIC	= 1LL << 18,
 	MLX5_DEV_CAP_FLAG_BLOCK_MCAST	= 1LL << 23,
+	MLX5_DEV_CAP_FLAG_ON_DMND_PG	= 1LL << 24,
 	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
 	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
 	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 37,
@@ -290,6 +315,8 @@ enum {
 enum {
 	HCA_CAP_OPMOD_GET_MAX	= 0,
 	HCA_CAP_OPMOD_GET_CUR	= 1,
+	HCA_CAP_OPMOD_GET_ODP_MAX = 4,
+	HCA_CAP_OPMOD_GET_ODP_CUR = 5
 };
 
 struct mlx5_inbox_hdr {
@@ -319,6 +346,23 @@ struct mlx5_cmd_query_adapter_mbox_out {
 	u8			vsd_psid[16];
 };
 
+enum mlx5_odp_transport_cap_bits {
+	MLX5_ODP_SUPPORT_SEND	 = 1 << 31,
+	MLX5_ODP_SUPPORT_RECV	 = 1 << 30,
+	MLX5_ODP_SUPPORT_WRITE	 = 1 << 29,
+	MLX5_ODP_SUPPORT_READ	 = 1 << 28,
+};
+
+struct mlx5_odp_caps {
+	char reserved[0x10];
+	struct {
+		__be32			rc_odp_caps;
+		__be32			uc_odp_caps;
+		__be32			ud_odp_caps;
+	} per_transport_caps;
+	char reserved2[0xe4];
+};
+
 struct mlx5_cmd_init_hca_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	u8			rsvd0[2];
@@ -439,6 +483,27 @@ struct mlx5_eqe_page_req {
 	__be32		rsvd1[5];
 };
 
+struct mlx5_eqe_page_fault {
+	__be32 bytes_committed;
+	union {
+		struct {
+			u16     reserved1;
+			__be16  wqe_index;
+			u16	reserved2;
+			__be16  packet_length;
+			u8	reserved3[12];
+		} __packed wqe;
+		struct {
+			__be32  r_key;
+			u16	reserved1;
+			__be16  packet_length;
+			__be32  rdma_op_len;
+			__be64  rdma_va;
+		} __packed rdma;
+	} __packed;
+	__be32 flags_qpn;
+} __packed;
+
 union ev_data {
 	__be32				raw[7];
 	struct mlx5_eqe_cmd		cmd;
@@ -450,6 +515,7 @@ union ev_data {
 	struct mlx5_eqe_congestion	cong;
 	struct mlx5_eqe_stall_vl	stall_vl;
 	struct mlx5_eqe_page_req	req_pages;
+	struct mlx5_eqe_page_fault	page_fault;
 } __packed;
 
 struct mlx5_eqe {
@@ -776,6 +842,10 @@ struct mlx5_query_eq_mbox_out {
 	struct mlx5_eq_context	ctx;
 };
 
+enum {
+	MLX5_MKEY_STATUS_FREE = 1 << 6,
+};
+
 struct mlx5_mkey_seg {
 	/* This is a two bit field occupying bits 31-30.
 	 * bit 31 is always 0,
@@ -812,7 +882,7 @@ struct mlx5_query_special_ctxs_mbox_out {
 struct mlx5_create_mkey_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	__be32			input_mkey_index;
-	u8			rsvd0[4];
+	__be32			flags;
 	struct mlx5_mkey_seg	seg;
 	u8			rsvd1[16];
 	__be32			xlat_oct_act_size;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b1bf415..166d931 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -113,6 +113,13 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 };
 
+enum mlx5_page_fault_resume_flags {
+	MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
+	MLX5_PAGE_FAULT_RESUME_WRITE	 = 1 << 1,
+	MLX5_PAGE_FAULT_RESUME_RDMA	 = 1 << 2,
+	MLX5_PAGE_FAULT_RESUME_ERROR	 = 1 << 7,
+};
+
 enum dbg_rsc_type {
 	MLX5_DBG_RSC_QP,
 	MLX5_DBG_RSC_EQ,
@@ -467,7 +474,7 @@ struct mlx5_priv {
 	struct workqueue_struct *pg_wq;
 	struct rb_root		page_root;
 	int			fw_pages;
-	int			reg_pages;
+	atomic_t		reg_pages;
 	struct list_head	free_list;
 
 	struct mlx5_core_health health;
@@ -703,6 +710,9 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+#endif
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -740,6 +750,8 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index);
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
 void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
+			struct mlx5_odp_caps *odp_caps);
 
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 3fa075d..61f7a34 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -50,6 +50,9 @@
 #define MLX5_BSF_APPTAG_ESCAPE	0x1
 #define MLX5_BSF_APPREF_ESCAPE	0x2
 
+#define MLX5_QPN_BITS		24
+#define MLX5_QPN_MASK		((1 << MLX5_QPN_BITS) - 1)
+
 enum mlx5_qp_optpar {
 	MLX5_QP_OPTPAR_ALT_ADDR_PATH		= 1 << 0,
 	MLX5_QP_OPTPAR_RRE			= 1 << 1,
@@ -189,6 +192,14 @@ struct mlx5_wqe_ctrl_seg {
 	__be32			imm;
 };
 
+#define MLX5_WQE_CTRL_DS_MASK 0x3f
+#define MLX5_WQE_CTRL_QPN_MASK 0xffffff00
+#define MLX5_WQE_CTRL_QPN_SHIFT 8
+#define MLX5_WQE_DS_UNITS 16
+#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
+#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
+#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
+
 struct mlx5_wqe_xrc_seg {
 	__be32			xrc_srqn;
 	u8			rsvd[12];
@@ -292,6 +303,8 @@ struct mlx5_wqe_signature_seg {
 	u8	rsvd1[11];
 };
 
+#define MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK 0x3ff
+
 struct mlx5_wqe_inline_seg {
 	__be32	byte_count;
 };
@@ -360,9 +373,46 @@ struct mlx5_stride_block_ctrl_seg {
 	__be16		num_entries;
 };
 
+enum mlx5_pagefault_flags {
+	MLX5_PFAULT_REQUESTOR = 1 << 0,
+	MLX5_PFAULT_WRITE     = 1 << 1,
+	MLX5_PFAULT_RDMA      = 1 << 2,
+};
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+	u32			bytes_committed;
+	u8			event_subtype;
+	enum mlx5_pagefault_flags flags;
+	union {
+		/* Initiator or send message responder pagefault details. */
+		struct {
+			/* Received packet size, only valid for responders. */
+			u32	packet_size;
+			/*
+			 * WQE index. Refers to either the send queue or
+			 * receive queue, according to event_subtype.
+			 */
+			u16	wqe_index;
+		} wqe;
+		/* RDMA responder pagefault details */
+		struct {
+			u32	r_key;
+			/*
+			 * Received packet size, minimal size page fault
+			 * resolution required for forward progress.
+			 */
+			u32	packet_size;
+			u32	rdma_op_len;
+			u64	rdma_va;
+		} rdma;
+	};
+};
+
 struct mlx5_core_qp {
 	struct mlx5_core_rsc_common	common; /* must be first */
 	void (*event)		(struct mlx5_core_qp *, int);
+	void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
 	int			qpn;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
@@ -530,6 +580,17 @@ static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u
 	return radix_tree_lookup(&dev->priv.mr_table.tree, key);
 }
 
+struct mlx5_page_fault_resume_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	__be32			flags_qpn;
+	u8			reserved[4];
+};
+
+struct mlx5_page_fault_resume_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
 			struct mlx5_create_qp_mbox_in *in,
@@ -549,6 +610,10 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
 int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+				u8 context, int error);
+#endif
 
 static inline const char *mlx5_qp_type_str(int type)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c0a67b8..f80d019 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -286,8 +286,6 @@ struct vm_operations_struct {
 	 */
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
 					unsigned long addr);
-	int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,
-		const nodemask_t *to, unsigned long flags);
 #endif
 	/* called by sys_remap_file_pages() to populate non-linear mapping */
 	int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/linux/module.h b/include/linux/module.h
index 71f282a..ebfb0e1 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -210,20 +210,6 @@ enum module_state {
 	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
-/**
- * struct module_ref - per cpu module reference counts
- * @incs: number of module get on this cpu
- * @decs: number of module put on this cpu
- *
- * We force an alignment on 8 or 16 bytes, so that alloc_percpu()
- * put @incs/@decs in same cache line, with no extra memory cost,
- * since alloc_percpu() is fine grained.
- */
-struct module_ref {
-	unsigned long incs;
-	unsigned long decs;
-} __attribute((aligned(2 * sizeof(unsigned long))));
-
 struct module {
 	enum module_state state;
 
@@ -367,7 +353,7 @@ struct module {
 	/* Destruction function. */
 	void (*exit)(void);
 
-	struct module_ref __percpu *refptr;
+	atomic_t refcnt;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index e4d451e..3d4ea7e 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -455,8 +455,21 @@ struct nand_hw_control {
  *		be provided if an hardware ECC is available
  * @calculate:	function for ECC calculation or readback from ECC hardware
  * @correct:	function for ECC correction, matching to ECC generator (sw/hw)
- * @read_page_raw:	function to read a raw page without ECC
- * @write_page_raw:	function to write a raw page without ECC
+ * @read_page_raw:	function to read a raw page without ECC. This function
+ *			should hide the specific layout used by the ECC
+ *			controller and always return contiguous in-band and
+ *			out-of-band data even if they're not stored
+ *			contiguously on the NAND chip (e.g.
+ *			NAND_ECC_HW_SYNDROME interleaves in-band and
+ *			out-of-band data).
+ * @write_page_raw:	function to write a raw page without ECC. This function
+ *			should hide the specific layout used by the ECC
+ *			controller and consider the passed data as contiguous
+ *			in-band and out-of-band data. ECC controller is
+ *			responsible for doing the appropriate transformations
+ *			to adapt to its specific layout (e.g.
+ *			NAND_ECC_HW_SYNDROME interleaves in-band and
+ *			out-of-band data).
  * @read_page:	function to read a page according to the ECC generator
  *		requirements; returns maximum number of bitflips corrected in
  *		any single ECC step, 0 if bitflips uncorrectable, -EIO hw error
@@ -723,6 +736,7 @@ struct nand_chip {
 #define NAND_MFR_EON		0x92
 #define NAND_MFR_SANDISK	0x45
 #define NAND_MFR_INTEL		0x89
+#define NAND_MFR_ATO		0x9b
 
 /* The maximum expected count of bytes in the NAND ID sequence */
 #define NAND_MAX_ID_LEN 8
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 046a0a2..63aeccf 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -116,6 +116,10 @@ enum spi_nor_ops {
 	SPI_NOR_OPS_UNLOCK,
 };
 
+enum spi_nor_option_flags {
+	SNOR_F_USE_FSR		= BIT(0),
+};
+
 /**
  * struct spi_nor - Structure for defining a the SPI NOR layer
  * @mtd:		point to a mtd_info structure
@@ -129,6 +133,7 @@ enum spi_nor_ops {
  * @program_opcode:	the program opcode
  * @flash_read:		the mode of the read
  * @sst_write_second:	used by the SST write operation
+ * @flags:		flag options for the current SPI-NOR (SNOR_F_*)
  * @cfg:		used by the read_xfer/write_xfer
  * @cmd_buf:		used by the write_reg
  * @prepare:		[OPTIONAL] do some preparations for the
@@ -139,9 +144,6 @@ enum spi_nor_ops {
  * @write_xfer:		[OPTIONAL] the writefundamental primitive
  * @read_reg:		[DRIVER-SPECIFIC] read out the register
  * @write_reg:		[DRIVER-SPECIFIC] write data to the register
- * @read_id:		[REPLACEABLE] read out the ID data, and find
- *			the proper spi_device_id
- * @wait_till_ready:	[REPLACEABLE] wait till the NOR becomes ready
  * @read:		[DRIVER-SPECIFIC] read data from the SPI NOR
  * @write:		[DRIVER-SPECIFIC] write data to the SPI NOR
  * @erase:		[DRIVER-SPECIFIC] erase a sector of the SPI NOR
@@ -160,6 +162,7 @@ struct spi_nor {
 	u8			program_opcode;
 	enum read_mode		flash_read;
 	bool			sst_write_second;
+	u32			flags;
 	struct spi_nor_xfer_cfg	cfg;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
 
@@ -172,8 +175,6 @@ struct spi_nor {
 	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
 	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
 			int write_enable);
-	const struct spi_device_id *(*read_id)(struct spi_nor *nor);
-	int (*wait_till_ready)(struct spi_nor *nor);
 
 	int (*read)(struct spi_nor *nor, loff_t from,
 			size_t len, size_t *retlen, u_char *read_buf);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 44a2769..360a966 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -349,6 +349,7 @@ struct pci_dev {
 	unsigned int	__aer_firmware_first:1;
 	unsigned int	broken_intx_masking:1;
 	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
+	unsigned int	irq_managed:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index f2ca1b4..7e75bfe 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -11,7 +11,7 @@ struct fixed_phy_status {
 
 struct device_node;
 
-#ifdef CONFIG_FIXED_PHY
+#if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
 extern struct phy_device *fixed_phy_register(unsigned int irq,
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 66a656e..8b59763 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -351,8 +351,6 @@ struct dev_pm_ops {
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
 #endif
 
-#define SET_PM_RUNTIME_PM_OPS	SET_RUNTIME_PM_OPS
-
 /*
  * Use this if you want to use the same suspend and resume callbacks for suspend
  * to RAM and hibernation.
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index ef90838..c611a02 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -29,10 +29,10 @@
 #include <linux/idr.h>
 #include <linux/device.h>
 #include <linux/workqueue.h>
+#include <uapi/linux/thermal.h>
 
 #define THERMAL_TRIPS_NONE	-1
 #define THERMAL_MAX_TRIPS	12
-#define THERMAL_NAME_LENGTH	20
 
 /* invalid cooling state */
 #define THERMAL_CSTATE_INVALID -1UL
@@ -49,11 +49,6 @@
 #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
 #define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732)
 
-/* Adding event notification support elements */
-#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
-#define THERMAL_GENL_VERSION                    0x01
-#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_grp"
-
 /* Default Thermal Governor */
 #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
 #define DEFAULT_THERMAL_GOVERNOR       "step_wise"
@@ -86,30 +81,6 @@ enum thermal_trend {
 	THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */
 };
 
-/* Events supported by Thermal Netlink */
-enum events {
-	THERMAL_AUX0,
-	THERMAL_AUX1,
-	THERMAL_CRITICAL,
-	THERMAL_DEV_FAULT,
-};
-
-/* attributes of thermal_genl_family */
-enum {
-	THERMAL_GENL_ATTR_UNSPEC,
-	THERMAL_GENL_ATTR_EVENT,
-	__THERMAL_GENL_ATTR_MAX,
-};
-#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
-
-/* commands supported by the thermal_genl_family */
-enum {
-	THERMAL_GENL_CMD_UNSPEC,
-	THERMAL_GENL_CMD_EVENT,
-	__THERMAL_GENL_CMD_MAX,
-};
-#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
-
 struct thermal_zone_device_ops {
 	int (*bind) (struct thermal_zone_device *,
 		     struct thermal_cooling_device *);
@@ -289,19 +260,49 @@ struct thermal_genl_event {
 	enum events event;
 };
 
+/**
+ * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones
+ *
+ * Mandatory:
+ * @get_temp: a pointer to a function that reads the sensor temperature.
+ *
+ * Optional:
+ * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ * @set_emul_temp: a pointer to a function that sets sensor emulated
+ *		   temperature.
+ */
+struct thermal_zone_of_device_ops {
+	int (*get_temp)(void *, long *);
+	int (*get_trend)(void *, long *);
+	int (*set_emul_temp)(void *, unsigned long);
+};
+
+/**
+ * struct thermal_trip - representation of a point in temperature domain
+ * @np: pointer to struct device_node that this trip point was created from
+ * @temperature: temperature value in miliCelsius
+ * @hysteresis: relative hysteresis in miliCelsius
+ * @type: trip point type
+ */
+
+struct thermal_trip {
+	struct device_node *np;
+	unsigned long int temperature;
+	unsigned long int hysteresis;
+	enum thermal_trip_type type;
+};
+
 /* Function declarations */
 #ifdef CONFIG_THERMAL_OF
 struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *));
+thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
+				const struct thermal_zone_of_device_ops *ops);
 void thermal_zone_of_sensor_unregister(struct device *dev,
 				       struct thermal_zone_device *tz);
 #else
 static inline struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *))
+thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
+				const struct thermal_zone_of_device_ops *ops)
 {
 	return NULL;
 }
diff --git a/include/linux/uio.h b/include/linux/uio.h
index a41e252..1c5e453 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -101,6 +101,11 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 	return i->count;
 }
 
+static inline bool iter_is_iovec(struct iov_iter *i)
+{
+	return !(i->type & (ITER_BVEC | ITER_KVEC));
+}
+
 /*
  * Cap the iov_iter by given limit; note that the second argument is
  * *not* the new size - it's upper limit for such.  Passing it a value
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4cf06c1..8297e5b 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -18,6 +18,10 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 	} extent[UID_GID_MAP_MAX_EXTENTS];
 };
 
+#define USERNS_SETGROUPS_ALLOWED 1UL
+
+#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -28,6 +32,7 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	struct ns_common	ns;
+	unsigned long		flags;
 
 	/* Register of per-UID persistent keyrings for this namespace */
 #ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -64,6 +69,9 @@ extern const struct seq_operations proc_projid_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
+extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
+extern int proc_setgroups_show(struct seq_file *m, void *v);
+extern bool userns_may_setgroups(const struct user_namespace *ns);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -88,6 +96,10 @@ static inline void put_user_ns(struct user_namespace *ns)
 {
 }
 
+static inline bool userns_may_setgroups(const struct user_namespace *ns)
+{
+	return true;
+}
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d09e093..28f0e65 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -81,7 +81,7 @@ void *virtqueue_get_used(struct virtqueue *vq);
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
- * @failed: saved value for CONFIG_S_FAILED bit (for restore)
+ * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
  * @config_enabled: configuration change reporting enabled
  * @config_change_pending: configuration change reported while disabled
  * @config_lock: protects configuration change reporting
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7979f85..ca3ed78 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -19,6 +19,9 @@
  *	offset: the offset of the configuration field
  *	buf: the buffer to read the field value from.
  *	len: the length of the buffer
+ * @generation: config generation counter
+ *	vdev: the virtio_device
+ *	Returns the config generation counter
  * @get_status: read the status byte
  *	vdev: the virtio_device
  *	Returns the status byte
@@ -60,6 +63,7 @@ struct virtio_config_ops {
 		    void *buf, unsigned len);
 	void (*set)(struct virtio_device *vdev, unsigned offset,
 		    const void *buf, unsigned len);
+	u32 (*generation)(struct virtio_device *vdev);
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
@@ -301,11 +305,33 @@ static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
 	return ret;
 }
 
+/* Read @count fields, @bytes each. */
+static inline void __virtio_cread_many(struct virtio_device *vdev,
+				       unsigned int offset,
+				       void *buf, size_t count, size_t bytes)
+{
+	u32 old, gen = vdev->config->generation ?
+		vdev->config->generation(vdev) : 0;
+	int i;
+
+	do {
+		old = gen;
+
+		for (i = 0; i < count; i++)
+			vdev->config->get(vdev, offset + bytes * i,
+					  buf + i * bytes, bytes);
+
+		gen = vdev->config->generation ?
+			vdev->config->generation(vdev) : 0;
+	} while (gen != old);
+}
+
+
 static inline void virtio_cread_bytes(struct virtio_device *vdev,
 				      unsigned int offset,
 				      void *buf, size_t len)
 {
-	vdev->config->get(vdev, offset, buf, len);
+	__virtio_cread_many(vdev, offset, buf, len, 1);
 }
 
 static inline void virtio_cwrite8(struct virtio_device *vdev,
@@ -349,6 +375,7 @@ static inline u64 virtio_cread64(struct virtio_device *vdev,
 {
 	u64 ret;
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	__virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret));
 	return virtio64_to_cpu(vdev, (__force __virtio64)ret);
 }
 
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 749cde2..a3fa537 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -24,12 +24,16 @@
 #ifndef _LINUX_VRINGH_H
 #define _LINUX_VRINGH_H
 #include <uapi/linux/virtio_ring.h>
+#include <linux/virtio_byteorder.h>
 #include <linux/uio.h>
 #include <linux/slab.h>
 #include <asm/barrier.h>
 
 /* virtio_ring with information needed for host access. */
 struct vringh {
+	/* Everything is little endian */
+	bool little_endian;
+
 	/* Guest publishes used event idx (note: we always do). */
 	bool event_indices;
 
@@ -105,7 +109,7 @@ struct vringh_kiov {
 #define VRINGH_IOV_ALLOCATED 0x8000000
 
 /* Helpers for userspace vrings. */
-int vringh_init_user(struct vringh *vrh, u32 features,
+int vringh_init_user(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc __user *desc,
 		     struct vring_avail __user *avail,
@@ -167,7 +171,7 @@ bool vringh_notify_enable_user(struct vringh *vrh);
 void vringh_notify_disable_user(struct vringh *vrh);
 
 /* Helpers for kernelspace vrings. */
-int vringh_init_kern(struct vringh *vrh, u32 features,
+int vringh_init_kern(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc *desc,
 		     struct vring_avail *avail,
@@ -222,4 +226,33 @@ static inline void vringh_notify(struct vringh *vrh)
 		vrh->notify(vrh);
 }
 
+static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
+{
+	return __virtio16_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val)
+{
+	return __cpu_to_virtio16(vrh->little_endian, val);
+}
+
+static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val)
+{
+	return __virtio32_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val)
+{
+	return __cpu_to_virtio32(vrh->little_endian, val);
+}
+
+static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val)
+{
+	return __virtio64_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val)
+{
+	return __cpu_to_virtio64(vrh->little_endian, val);
+}
 #endif /* _LINUX_VRINGH_H */
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a2bf41e..2d83cfd 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -38,11 +38,12 @@
 #include <linux/workqueue.h>
 
 struct ib_ucontext;
+struct ib_umem_odp;
 
 struct ib_umem {
 	struct ib_ucontext     *context;
 	size_t			length;
-	int			offset;
+	unsigned long		address;
 	int			page_size;
 	int                     writable;
 	int                     hugetlb;
@@ -50,17 +51,43 @@ struct ib_umem {
 	struct pid             *pid;
 	struct mm_struct       *mm;
 	unsigned long		diff;
+	struct ib_umem_odp     *odp_data;
 	struct sg_table sg_head;
 	int             nmap;
 	int             npages;
 };
 
+/* Returns the offset of the umem start relative to the first page. */
+static inline int ib_umem_offset(struct ib_umem *umem)
+{
+	return umem->address & ((unsigned long)umem->page_size - 1);
+}
+
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem *umem)
+{
+	return umem->address - ib_umem_offset(umem);
+}
+
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem *umem)
+{
+	return PAGE_ALIGN(umem->address + umem->length);
+}
+
+static inline size_t ib_umem_num_pages(struct ib_umem *umem)
+{
+	return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
+}
+
 #ifdef CONFIG_INFINIBAND_USER_MEM
 
 struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 			    size_t size, int access, int dmasync);
 void ib_umem_release(struct ib_umem *umem);
 int ib_umem_page_count(struct ib_umem *umem);
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      size_t length);
 
 #else /* CONFIG_INFINIBAND_USER_MEM */
 
@@ -73,7 +100,10 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
 }
 static inline void ib_umem_release(struct ib_umem *umem) { }
 static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
-
+static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      		    size_t length) {
+	return -EINVAL;
+}
 #endif /* CONFIG_INFINIBAND_USER_MEM */
 
 #endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
new file mode 100644
index 0000000..3da0b16
--- /dev/null
+++ b/include/rdma/ib_umem_odp.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_UMEM_ODP_H
+#define IB_UMEM_ODP_H
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <linux/interval_tree.h>
+
+struct umem_odp_node {
+	u64 __subtree_last;
+	struct rb_node rb;
+};
+
+struct ib_umem_odp {
+	/*
+	 * An array of the pages included in the on-demand paging umem.
+	 * Indices of pages that are currently not mapped into the device will
+	 * contain NULL.
+	 */
+	struct page		**page_list;
+	/*
+	 * An array of the same size as page_list, with DMA addresses mapped
+	 * for pages the pages in page_list. The lower two bits designate
+	 * access permissions. See ODP_READ_ALLOWED_BIT and
+	 * ODP_WRITE_ALLOWED_BIT.
+	 */
+	dma_addr_t		*dma_list;
+	/*
+	 * The umem_mutex protects the page_list and dma_list fields of an ODP
+	 * umem, allowing only a single thread to map/unmap pages. The mutex
+	 * also protects access to the mmu notifier counters.
+	 */
+	struct mutex		umem_mutex;
+	void			*private; /* for the HW driver to use. */
+
+	/* When false, use the notifier counter in the ucontext struct. */
+	bool mn_counters_active;
+	int notifiers_seq;
+	int notifiers_count;
+
+	/* A linked list of umems that don't have private mmu notifier
+	 * counters yet. */
+	struct list_head no_private_counters;
+	struct ib_umem		*umem;
+
+	/* Tree tracking */
+	struct umem_odp_node	interval_tree;
+
+	struct completion	notifier_completion;
+	int			dying;
+};
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+
+void ib_umem_odp_release(struct ib_umem *umem);
+
+/*
+ * The lower 2 bits of the DMA address signal the R/W permissions for
+ * the entry. To upgrade the permissions, provide the appropriate
+ * bitmask to the map_dma_pages function.
+ *
+ * Be aware that upgrading a mapped address might result in change of
+ * the DMA address for the page.
+ */
+#define ODP_READ_ALLOWED_BIT  (1<<0ULL)
+#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
+
+#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
+
+int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
+			      u64 access_mask, unsigned long current_seq);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+				 u64 bound);
+
+void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root);
+void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root);
+typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+			      void *cookie);
+/*
+ * Call the callback on each ib_umem in the range. Returns the logical or of
+ * the return values of the functions called.
+ */
+int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
+				  umem_call_back cb, void *cookie);
+
+struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
+					     u64 start, u64 last);
+struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
+					    u64 start, u64 last);
+
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+					     unsigned long mmu_seq)
+{
+	/*
+	 * This code is strongly based on the KVM code from
+	 * mmu_notifier_retry. Should be called with
+	 * the relevant locks taken (item->odp_data->umem_mutex
+	 * and the ucontext umem_mutex semaphore locked for read).
+	 */
+
+	/* Do not allow page faults while the new ib_umem hasn't seen a state
+	 * with zero notifiers yet, and doesn't have its own valid set of
+	 * private counters. */
+	if (!item->odp_data->mn_counters_active)
+		return 1;
+
+	if (unlikely(item->odp_data->notifiers_count))
+		return 1;
+	if (item->odp_data->notifiers_seq != mmu_seq)
+		return 1;
+	return 0;
+}
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+static inline int ib_umem_odp_get(struct ib_ucontext *context,
+				  struct ib_umem *umem)
+{
+	return -EINVAL;
+}
+
+static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+#endif /* IB_UMEM_ODP_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 470a011..0d74f1d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -51,6 +51,7 @@
 #include <uapi/linux/if_ether.h>
 
 #include <linux/atomic.h>
+#include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 
 extern struct workqueue_struct *ib_wq;
@@ -123,7 +124,8 @@ enum ib_device_cap_flags {
 	IB_DEVICE_MEM_WINDOW_TYPE_2A	= (1<<23),
 	IB_DEVICE_MEM_WINDOW_TYPE_2B	= (1<<24),
 	IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
-	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30)
+	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30),
+	IB_DEVICE_ON_DEMAND_PAGING	= (1<<31),
 };
 
 enum ib_signature_prot_cap {
@@ -143,6 +145,27 @@ enum ib_atomic_cap {
 	IB_ATOMIC_GLOB
 };
 
+enum ib_odp_general_cap_bits {
+	IB_ODP_SUPPORT = 1 << 0,
+};
+
+enum ib_odp_transport_cap_bits {
+	IB_ODP_SUPPORT_SEND	= 1 << 0,
+	IB_ODP_SUPPORT_RECV	= 1 << 1,
+	IB_ODP_SUPPORT_WRITE	= 1 << 2,
+	IB_ODP_SUPPORT_READ	= 1 << 3,
+	IB_ODP_SUPPORT_ATOMIC	= 1 << 4,
+};
+
+struct ib_odp_caps {
+	uint64_t general_caps;
+	struct {
+		uint32_t  rc_odp_caps;
+		uint32_t  uc_odp_caps;
+		uint32_t  ud_odp_caps;
+	} per_transport_caps;
+};
+
 struct ib_device_attr {
 	u64			fw_ver;
 	__be64			sys_image_guid;
@@ -186,6 +209,7 @@ struct ib_device_attr {
 	u8			local_ca_ack_delay;
 	int			sig_prot_cap;
 	int			sig_guard_cap;
+	struct ib_odp_caps	odp_caps;
 };
 
 enum ib_mtu {
@@ -1073,7 +1097,8 @@ enum ib_access_flags {
 	IB_ACCESS_REMOTE_READ	= (1<<2),
 	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
 	IB_ACCESS_MW_BIND	= (1<<4),
-	IB_ZERO_BASED		= (1<<5)
+	IB_ZERO_BASED		= (1<<5),
+	IB_ACCESS_ON_DEMAND     = (1<<6),
 };
 
 struct ib_phys_buf {
@@ -1115,6 +1140,8 @@ struct ib_fmr_attr {
 	u8	page_shift;
 };
 
+struct ib_umem;
+
 struct ib_ucontext {
 	struct ib_device       *device;
 	struct list_head	pd_list;
@@ -1127,6 +1154,24 @@ struct ib_ucontext {
 	struct list_head	xrcd_list;
 	struct list_head	rule_list;
 	int			closing;
+
+	struct pid             *tgid;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	struct rb_root      umem_tree;
+	/*
+	 * Protects .umem_rbroot and tree, as well as odp_mrs_count and
+	 * mmu notifiers registration.
+	 */
+	struct rw_semaphore	umem_rwsem;
+	void (*invalidate_range)(struct ib_umem *umem,
+				 unsigned long start, unsigned long end);
+
+	struct mmu_notifier	mn;
+	atomic_t		notifier_count;
+	/* A list of umems that don't have private mmu notifier counters yet. */
+	struct list_head	no_private_counters;
+	int                     odp_mrs_count;
+#endif
 };
 
 struct ib_uobject {
@@ -1662,7 +1707,10 @@ static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t
 
 static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
 {
-	return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+	size_t copy_sz;
+
+	copy_sz = min_t(size_t, len, udata->outlen);
+	return copy_to_user(udata->outbuf, src, copy_sz) ? -EFAULT : 0;
 }
 
 /**
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 9d87a37..dae99d7 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -688,7 +688,6 @@ extern int sas_queuecommand(struct Scsi_Host * ,struct scsi_cmnd *);
 extern int sas_target_alloc(struct scsi_target *);
 extern int sas_slave_configure(struct scsi_device *);
 extern int sas_change_queue_depth(struct scsi_device *, int new_depth);
-extern int sas_change_queue_type(struct scsi_device *, int qt);
 extern int sas_bios_param(struct scsi_device *,
 			  struct block_device *,
 			  sector_t capacity, int *hsc);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 6364e23..3a4edd1 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -441,13 +441,13 @@ static inline int scsi_execute_req(struct scsi_device *sdev,
 extern void sdev_disable_disk_events(struct scsi_device *sdev);
 extern void sdev_enable_disk_events(struct scsi_device *sdev);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 extern int scsi_autopm_get_device(struct scsi_device *);
 extern void scsi_autopm_put_device(struct scsi_device *);
 #else
 static inline int scsi_autopm_get_device(struct scsi_device *d) { return 0; }
 static inline void scsi_autopm_put_device(struct scsi_device *d) {}
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static inline int __must_check scsi_device_reprobe(struct scsi_device *sdev)
 {
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index e939d2b..019e668 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -278,19 +278,6 @@ struct scsi_host_template {
 	int (* change_queue_depth)(struct scsi_device *, int);
 
 	/*
-	 * Fill in this function to allow the changing of tag types
-	 * (this also allows the enabling/disabling of tag command
-	 * queueing).  An error should only be returned if something
-	 * went wrong in the driver while trying to set the tag type.
-	 * If the driver doesn't support the requested tag type, then
-	 * it should set the closest type it does support without
-	 * returning an error.  Returns the actual tag type set.
-	 *
-	 * Status: OPTIONAL
-	 */
-	int (* change_queue_type)(struct scsi_device *, int);
-
-	/*
 	 * This function determines the BIOS parameters for a given
 	 * harddisk.  These tend to be numbers that are made up by
 	 * the host adapter.  Parameters:
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index fe4a702..9708b28 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -6,46 +6,10 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 
-#define MSG_SIMPLE_TAG	0x20
-#define MSG_HEAD_TAG	0x21
-#define MSG_ORDERED_TAG	0x22
-#define MSG_ACA_TAG	0x24	/* unsupported */
-
 #define SCSI_NO_TAG	(-1)    /* identify no tag in use */
 
 
 #ifdef CONFIG_BLOCK
-
-int scsi_change_queue_type(struct scsi_device *sdev, int tag_type);
-
-/**
- * scsi_get_tag_type - get the type of tag the device supports
- * @sdev:	the scsi device
- */
-static inline int scsi_get_tag_type(struct scsi_device *sdev)
-{
-	if (!sdev->tagged_supported)
-		return 0;
-	if (sdev->simple_tags)
-		return MSG_SIMPLE_TAG;
-	return 0;
-}
-
-static inline void scsi_set_tag_type(struct scsi_device *sdev, int tag)
-{
-	switch (tag) {
-	case MSG_ORDERED_TAG:
-	case MSG_SIMPLE_TAG:
-		sdev->simple_tags = 1;
-		break;
-	case 0:
-		/* fall through */
-	default:
-		sdev->simple_tags = 0;
-		break;
-	}
-}
-
 static inline struct scsi_cmnd *scsi_mq_find_tag(struct Scsi_Host *shost,
 						 int unique_tag)
 {
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 9adc1bc..430cfaf 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -5,6 +5,15 @@
 #define TRANSPORT_PLUGIN_VHBA_PDEV		2
 #define TRANSPORT_PLUGIN_VHBA_VDEV		3
 
+struct target_backend_cits {
+	struct config_item_type tb_dev_cit;
+	struct config_item_type tb_dev_attrib_cit;
+	struct config_item_type tb_dev_pr_cit;
+	struct config_item_type tb_dev_wwn_cit;
+	struct config_item_type tb_dev_alua_tg_pt_gps_cit;
+	struct config_item_type tb_dev_stat_cit;
+};
+
 struct se_subsystem_api {
 	struct list_head sub_api_list;
 
@@ -44,6 +53,8 @@ struct se_subsystem_api {
 	int (*init_prot)(struct se_device *);
 	int (*format_prot)(struct se_device *);
 	void (*free_prot)(struct se_device *);
+
+	struct target_backend_cits tb_cits;
 };
 
 struct sbc_ops {
@@ -96,4 +107,36 @@ sense_reason_t	transport_generic_map_mem_to_cmd(struct se_cmd *,
 
 void	array_free(void *array, int n);
 
+/* From target_core_configfs.c to setup default backend config_item_types */
+void	target_core_setup_sub_cits(struct se_subsystem_api *);
+
+/* attribute helpers from target_core_device.c for backend drivers */
+int	se_dev_set_max_unmap_lba_count(struct se_device *, u32);
+int	se_dev_set_max_unmap_block_desc_count(struct se_device *, u32);
+int	se_dev_set_unmap_granularity(struct se_device *, u32);
+int	se_dev_set_unmap_granularity_alignment(struct se_device *, u32);
+int	se_dev_set_max_write_same_len(struct se_device *, u32);
+int	se_dev_set_emulate_model_alias(struct se_device *, int);
+int	se_dev_set_emulate_dpo(struct se_device *, int);
+int	se_dev_set_emulate_fua_write(struct se_device *, int);
+int	se_dev_set_emulate_fua_read(struct se_device *, int);
+int	se_dev_set_emulate_write_cache(struct se_device *, int);
+int	se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int);
+int	se_dev_set_emulate_tas(struct se_device *, int);
+int	se_dev_set_emulate_tpu(struct se_device *, int);
+int	se_dev_set_emulate_tpws(struct se_device *, int);
+int	se_dev_set_emulate_caw(struct se_device *, int);
+int	se_dev_set_emulate_3pc(struct se_device *, int);
+int	se_dev_set_pi_prot_type(struct se_device *, int);
+int	se_dev_set_pi_prot_format(struct se_device *, int);
+int	se_dev_set_enforce_pr_isids(struct se_device *, int);
+int	se_dev_set_force_pr_aptpl(struct se_device *, int);
+int	se_dev_set_is_nonrot(struct se_device *, int);
+int	se_dev_set_emulate_rest_reord(struct se_device *dev, int);
+int	se_dev_set_queue_depth(struct se_device *, u32);
+int	se_dev_set_max_sectors(struct se_device *, u32);
+int	se_dev_set_fabric_max_sectors(struct se_device *, u32);
+int	se_dev_set_optimal_sectors(struct se_device *, u32);
+int	se_dev_set_block_size(struct se_device *, u32);
+
 #endif /* TARGET_CORE_BACKEND_H */
diff --git a/include/target/target_core_backend_configfs.h b/include/target/target_core_backend_configfs.h
new file mode 100644
index 0000000..3247d75
--- /dev/null
+++ b/include/target/target_core_backend_configfs.h
@@ -0,0 +1,120 @@
+#ifndef TARGET_CORE_BACKEND_CONFIGFS_H
+#define TARGET_CORE_BACKEND_CONFIGFS_H
+
+#include <target/configfs_macros.h>
+
+#define DEF_TB_DEV_ATTRIB_SHOW(_backend, _name)				\
+static ssize_t _backend##_dev_show_attr_##_name(			\
+	struct se_dev_attrib *da,					\
+	char *page)							\
+{									\
+	return snprintf(page, PAGE_SIZE, "%u\n",			\
+			(u32)da->da_dev->dev_attrib._name);		\
+}
+
+#define DEF_TB_DEV_ATTRIB_STORE(_backend, _name)			\
+static ssize_t _backend##_dev_store_attr_##_name(			\
+	struct se_dev_attrib *da,					\
+	const char *page,						\
+	size_t count)							\
+{									\
+	unsigned long val;						\
+	int ret;							\
+									\
+	ret = kstrtoul(page, 0, &val);					\
+	if (ret < 0) {							\
+		pr_err("kstrtoul() failed with ret: %d\n", ret);	\
+		return -EINVAL;						\
+	}								\
+	ret = se_dev_set_##_name(da->da_dev, (u32)val);			\
+									\
+	return (!ret) ? count : -EINVAL;				\
+}
+
+#define DEF_TB_DEV_ATTRIB(_backend, _name)				\
+DEF_TB_DEV_ATTRIB_SHOW(_backend, _name);				\
+DEF_TB_DEV_ATTRIB_STORE(_backend, _name);
+
+#define DEF_TB_DEV_ATTRIB_RO(_backend, name)				\
+DEF_TB_DEV_ATTRIB_SHOW(_backend, name);
+
+CONFIGFS_EATTR_STRUCT(target_backend_dev_attrib, se_dev_attrib);
+#define TB_DEV_ATTR(_backend, _name, _mode)				\
+static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \
+		__CONFIGFS_EATTR(_name, _mode,				\
+		_backend##_dev_show_attr_##_name,			\
+		_backend##_dev_store_attr_##_name);
+
+#define TB_DEV_ATTR_RO(_backend, _name)						\
+static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \
+	__CONFIGFS_EATTR_RO(_name,					\
+	_backend##_dev_show_attr_##_name);
+
+/*
+ * Default list of target backend device attributes as defined by
+ * struct se_dev_attrib
+ */
+
+#define DEF_TB_DEFAULT_ATTRIBS(_backend)				\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_model_alias);		\
+	TB_DEV_ATTR(_backend, emulate_model_alias, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_dpo);			\
+	TB_DEV_ATTR(_backend, emulate_dpo, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_fua_write);			\
+	TB_DEV_ATTR(_backend, emulate_fua_write, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_fua_read);			\
+	TB_DEV_ATTR(_backend, emulate_fua_read, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_write_cache);		\
+	TB_DEV_ATTR(_backend, emulate_write_cache, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_ua_intlck_ctrl);		\
+	TB_DEV_ATTR(_backend, emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR); \
+	DEF_TB_DEV_ATTRIB(_backend, emulate_tas);			\
+	TB_DEV_ATTR(_backend, emulate_tas, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_tpu);			\
+	TB_DEV_ATTR(_backend, emulate_tpu, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_tpws);			\
+	TB_DEV_ATTR(_backend, emulate_tpws, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_caw);			\
+	TB_DEV_ATTR(_backend, emulate_caw, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_3pc);			\
+	TB_DEV_ATTR(_backend, emulate_3pc, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, pi_prot_type);			\
+	TB_DEV_ATTR(_backend, pi_prot_type, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB_RO(_backend, hw_pi_prot_type);		\
+	TB_DEV_ATTR_RO(_backend, hw_pi_prot_type);			\
+	DEF_TB_DEV_ATTRIB(_backend, pi_prot_format);			\
+	TB_DEV_ATTR(_backend, pi_prot_format, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, enforce_pr_isids);			\
+	TB_DEV_ATTR(_backend, enforce_pr_isids, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, is_nonrot);				\
+	TB_DEV_ATTR(_backend, is_nonrot, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_rest_reord);		\
+	TB_DEV_ATTR(_backend, emulate_rest_reord, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, force_pr_aptpl);			\
+	TB_DEV_ATTR(_backend, force_pr_aptpl, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB_RO(_backend, hw_block_size);			\
+	TB_DEV_ATTR_RO(_backend, hw_block_size);			\
+	DEF_TB_DEV_ATTRIB(_backend, block_size);			\
+	TB_DEV_ATTR(_backend, block_size, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB_RO(_backend, hw_max_sectors);			\
+	TB_DEV_ATTR_RO(_backend, hw_max_sectors);			\
+	DEF_TB_DEV_ATTRIB(_backend, fabric_max_sectors);		\
+	TB_DEV_ATTR(_backend, fabric_max_sectors, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, optimal_sectors);			\
+	TB_DEV_ATTR(_backend, optimal_sectors, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB_RO(_backend, hw_queue_depth);			\
+	TB_DEV_ATTR_RO(_backend, hw_queue_depth);			\
+	DEF_TB_DEV_ATTRIB(_backend, queue_depth);			\
+	TB_DEV_ATTR(_backend, queue_depth, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, max_unmap_lba_count);		\
+	TB_DEV_ATTR(_backend, max_unmap_lba_count, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, max_unmap_block_desc_count);	\
+	TB_DEV_ATTR(_backend, max_unmap_block_desc_count, S_IRUGO | S_IWUSR); \
+	DEF_TB_DEV_ATTRIB(_backend, unmap_granularity);			\
+	TB_DEV_ATTR(_backend, unmap_granularity, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, unmap_granularity_alignment);	\
+	TB_DEV_ATTR(_backend, unmap_granularity_alignment, S_IRUGO | S_IWUSR); \
+	DEF_TB_DEV_ATTRIB(_backend, max_write_same_len);		\
+	TB_DEV_ATTR(_backend, max_write_same_len, S_IRUGO | S_IWUSR);
+
+#endif /* TARGET_CORE_BACKEND_CONFIGFS_H */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 23c518a..397fb63 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -476,6 +476,12 @@ struct se_dif_v1_tuple {
 	__be32			ref_tag;
 };
 
+/* for sam_task_attr */
+#define TCM_SIMPLE_TAG	0x20
+#define TCM_HEAD_TAG	0x21
+#define TCM_ORDERED_TAG	0x22
+#define TCM_ACA_TAG	0x24
+
 struct se_cmd {
 	/* SAM response code being sent to initiator */
 	u8			scsi_status;
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 7c5cbfe..81c4c18 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -80,7 +80,7 @@ DECLARE_EVENT_CLASS(module_refcnt,
 
 	TP_fast_assign(
 		__entry->ip	= ip;
-		__entry->refcnt	= __this_cpu_read(mod->refptr->incs) - __this_cpu_read(mod->refptr->decs);
+		__entry->refcnt	= atomic_read(&mod->refcnt);
 		__assign_str(name, mod->name);
 	),
 
diff --git a/include/trace/events/target.h b/include/trace/events/target.h
index 4540344..04c3c6ef 100644
--- a/include/trace/events/target.h
+++ b/include/trace/events/target.h
@@ -109,10 +109,10 @@
 
 #define show_task_attribute_name(val)				\
 	__print_symbolic(val,					\
-		{ MSG_SIMPLE_TAG,	"SIMPLE"	},	\
-		{ MSG_HEAD_TAG,		"HEAD"		},	\
-		{ MSG_ORDERED_TAG,	"ORDERED"	},	\
-		{ MSG_ACA_TAG,		"ACA"		} )
+		{ TCM_SIMPLE_TAG,	"SIMPLE"	},	\
+		{ TCM_HEAD_TAG,		"HEAD"		},	\
+		{ TCM_ORDERED_TAG,	"ORDERED"	},	\
+		{ TCM_ACA_TAG,		"ACA"		} )
 
 #define show_scsi_status_name(val)				\
 	__print_symbolic(val,					\
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 259d31f..00b10002 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -387,6 +387,7 @@ header-y += tcp.h
 header-y += tcp_metrics.h
 header-y += telephony.h
 header-y += termios.h
+header-y += thermal.h
 header-y += time.h
 header-y += times.h
 header-y += timex.h
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 12e2668..d3475e1 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -371,7 +371,9 @@ enum {
 #define AUDIT_ARCH_PARISC	(EM_PARISC)
 #define AUDIT_ARCH_PARISC64	(EM_PARISC|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_PPC		(EM_PPC)
+/* do not define AUDIT_ARCH_PPCLE since it is not supported by audit */
 #define AUDIT_ARCH_PPC64	(EM_PPC64|__AUDIT_ARCH_64BIT)
+#define AUDIT_ARCH_PPC64LE	(EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_S390		(EM_S390)
 #define AUDIT_ARCH_S390X	(EM_S390|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_SH		(EM_SH)
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 18b2403..50ae243 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -48,6 +48,8 @@
 #define TUNSETQUEUE  _IOW('T', 217, int)
 #define TUNSETIFINDEX	_IOW('T', 218, unsigned int)
 #define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNGETVNETLE _IOR('T', 221, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
@@ -57,7 +59,6 @@
 #define IFF_ONE_QUEUE	0x2000
 #define IFF_VNET_HDR	0x4000
 #define IFF_TUN_EXCL	0x8000
-#define IFF_VNET_LE	0x10000
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_DETACH_QUEUE 0x0400
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6076882..a37fd12 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -647,11 +647,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#define KVM_CAP_DEVICE_ASSIGNMENT 17
 #define KVM_CAP_IOMMU 18
-#ifdef __KVM_HAVE_MSI
-#define KVM_CAP_DEVICE_MSI 20
-#endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
 #define KVM_CAP_USER_NMI 22
@@ -663,10 +659,6 @@ struct kvm_ppc_smmu_info {
 #endif
 #define KVM_CAP_IRQ_ROUTING 25
 #define KVM_CAP_IRQ_INJECT_STATUS 26
-#define KVM_CAP_DEVICE_DEASSIGNMENT 27
-#ifdef __KVM_HAVE_MSIX
-#define KVM_CAP_DEVICE_MSIX 28
-#endif
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
@@ -1107,9 +1099,6 @@ struct kvm_s390_ucas_mapping {
 #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
 #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
 #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
-/* IA64 stack access */
-#define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
-#define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
 /* Available with KVM_CAP_VCPU_EVENTS */
 #define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
 #define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index 7dcfbe6..b483d19 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -6,10 +6,6 @@
 #include <linux/types.h>
 #include <linux/uio.h>
 
-#ifndef __packed
-#define __packed                        __attribute__((packed))
-#endif
-
 #define TCMU_VERSION "1.0"
 
 /*
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
new file mode 100644
index 0000000..ac55358
--- /dev/null
+++ b/include/uapi/linux/thermal.h
@@ -0,0 +1,35 @@
+#ifndef _UAPI_LINUX_THERMAL_H
+#define _UAPI_LINUX_THERMAL_H
+
+#define THERMAL_NAME_LENGTH	20
+
+/* Adding event notification support elements */
+#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
+#define THERMAL_GENL_VERSION                    0x01
+#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_grp"
+
+/* Events supported by Thermal Netlink */
+enum events {
+	THERMAL_AUX0,
+	THERMAL_AUX1,
+	THERMAL_CRITICAL,
+	THERMAL_DEV_FAULT,
+};
+
+/* attributes of thermal_genl_family */
+enum {
+	THERMAL_GENL_ATTR_UNSPEC,
+	THERMAL_GENL_ATTR_EVENT,
+	__THERMAL_GENL_ATTR_MAX,
+};
+#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
+
+/* commands supported by the thermal_genl_family */
+enum {
+	THERMAL_GENL_CMD_UNSPEC,
+	THERMAL_GENL_CMD_EVENT,
+	__THERMAL_GENL_CMD_MAX,
+};
+#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_THERMAL_H */
diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h
index 5a86d8e..26db206 100644
--- a/include/uapi/linux/v4l2-mediabus.h
+++ b/include/uapi/linux/v4l2-mediabus.h
@@ -31,9 +31,9 @@ struct v4l2_mbus_framefmt {
 	__u32			code;
 	__u32			field;
 	__u32			colorspace;
-	__u32			ycbcr_enc;
-	__u32			quantization;
-	__u32			reserved[5];
+	__u16			ycbcr_enc;
+	__u16			quantization;
+	__u32			reserved[6];
 };
 
 #ifndef __KERNEL__
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 5e26f61..be40f70 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -31,6 +31,7 @@
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
+#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index e5ec1ca..35b552c7 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -41,6 +41,8 @@
 
 #include <linux/virtio_config.h>
 
+#ifndef VIRTIO_PCI_NO_LEGACY
+
 /* A 32-bit r/o bitmask of the features supported by the host */
 #define VIRTIO_PCI_HOST_FEATURES	0
 
@@ -67,16 +69,11 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR			19
 
-/* The bit of the ISR which indicates a device configuration change. */
-#define VIRTIO_PCI_ISR_CONFIG		0x2
-
 /* MSI-X registers: only enabled if MSI-X is enabled. */
 /* A 16-bit vector for configuration changes. */
 #define VIRTIO_MSI_CONFIG_VECTOR        20
 /* A 16-bit vector for selected queue notifications. */
 #define VIRTIO_MSI_QUEUE_VECTOR         22
-/* Vector value used to disable MSI for queue */
-#define VIRTIO_MSI_NO_VECTOR            0xffff
 
 /* The remaining space is defined by each driver as the per-driver
  * configuration space */
@@ -94,4 +91,12 @@
 /* The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. */
 #define VIRTIO_PCI_VRING_ALIGN		4096
+
+#endif /* VIRTIO_PCI_NO_LEGACY */
+
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_PCI_ISR_CONFIG		0x2
+/* Vector value used to disable MSI for queue */
+#define VIRTIO_MSI_NO_VECTOR            0xffff
+
 #endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 26daf55..4275b96 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -90,8 +90,9 @@ enum {
 };
 
 enum {
+	IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
 	IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
-	IB_USER_VERBS_EX_CMD_DESTROY_FLOW
+	IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
 };
 
 /*
@@ -201,6 +202,32 @@ struct ib_uverbs_query_device_resp {
 	__u8  reserved[4];
 };
 
+enum {
+	IB_USER_VERBS_EX_QUERY_DEVICE_ODP =		1ULL << 0,
+};
+
+struct ib_uverbs_ex_query_device {
+	__u32 comp_mask;
+	__u32 reserved;
+};
+
+struct ib_uverbs_odp_caps {
+	__u64 general_caps;
+	struct {
+		__u32 rc_odp_caps;
+		__u32 uc_odp_caps;
+		__u32 ud_odp_caps;
+	} per_transport_caps;
+	__u32 reserved;
+};
+
+struct ib_uverbs_ex_query_device_resp {
+	struct ib_uverbs_query_device_resp base;
+	__u32 comp_mask;
+	__u32 reserved;
+	struct ib_uverbs_odp_caps odp_caps;
+};
+
 struct ib_uverbs_query_port {
 	__u64 response;
 	__u8  port_num;
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 9b3565c..eb41008 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -395,8 +395,6 @@ retry:
 			case 0:
 				goto out;
 			case -EACCES:
-				flags |= MS_RDONLY;
-				goto retry;
 			case -EINVAL:
 				continue;
 		}
@@ -419,6 +417,10 @@ retry:
 #endif
 		panic("VFS: Unable to mount root fs on %s", b);
 	}
+	if (!(flags & MS_RDONLY)) {
+		flags |= MS_RDONLY;
+		goto retry;
+	}
 
 	printk("List of all partitions:\n");
 	printk_all_partitions();
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 113b837..4c1ee7f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7477,11 +7477,11 @@ SYSCALL_DEFINE5(perf_event_open,
 
 	if (move_group) {
 		synchronize_rcu();
-		perf_install_in_context(ctx, group_leader, event->cpu);
+		perf_install_in_context(ctx, group_leader, group_leader->cpu);
 		get_ctx(ctx);
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
-			perf_install_in_context(ctx, sibling, event->cpu);
+			perf_install_in_context(ctx, sibling, sibling->cpu);
 			get_ctx(ctx);
 		}
 	}
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f..664411f 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -6,6 +6,7 @@
 #include <linux/slab.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 
 /* init to 2 - one for init_task, one to ensure it is never freed */
@@ -213,6 +214,14 @@ out:
 	return i;
 }
 
+bool may_setgroups(void)
+{
+	struct user_namespace *user_ns = current_user_ns();
+
+	return ns_capable(user_ns, CAP_SETGID) &&
+		userns_may_setgroups(user_ns);
+}
+
 /*
  *	SMP: Our groups are copy-on-write. We can set them safely
  *	without another task interfering.
@@ -223,7 +232,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
 	struct group_info *group_info;
 	int retval;
 
-	if (!ns_capable(current_user_ns(), CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4332d76..df553b0 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -78,8 +78,12 @@ extern void unmask_threaded_irq(struct irq_desc *desc);
 
 #ifdef CONFIG_SPARSE_IRQ
 static inline void irq_mark_irq(unsigned int irq) { }
+extern void irq_lock_sparse(void);
+extern void irq_unlock_sparse(void);
 #else
 extern void irq_mark_irq(unsigned int irq);
+static inline void irq_lock_sparse(void) { }
+static inline void irq_unlock_sparse(void) { }
 #endif
 
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a1782f8..99793b9 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -132,6 +132,16 @@ static void free_masks(struct irq_desc *desc)
 static inline void free_masks(struct irq_desc *desc) { }
 #endif
 
+void irq_lock_sparse(void)
+{
+	mutex_lock(&sparse_irq_lock);
+}
+
+void irq_unlock_sparse(void)
+{
+	mutex_unlock(&sparse_irq_lock);
+}
+
 static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
 {
 	struct irq_desc *desc;
@@ -168,6 +178,12 @@ static void free_desc(unsigned int irq)
 
 	unregister_irq_proc(irq, desc);
 
+	/*
+	 * sparse_irq_lock protects also show_interrupts() and
+	 * kstat_irq_usr(). Once we deleted the descriptor from the
+	 * sparse tree we can free it. Access in proc will fail to
+	 * lookup the descriptor.
+	 */
 	mutex_lock(&sparse_irq_lock);
 	delete_irq_desc(irq);
 	mutex_unlock(&sparse_irq_lock);
@@ -574,6 +590,15 @@ void kstat_incr_irq_this_cpu(unsigned int irq)
 	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 }
 
+/**
+ * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu
+ * @irq:	The interrupt number
+ * @cpu:	The cpu number
+ *
+ * Returns the sum of interrupt counts on @cpu since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -582,6 +607,14 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 			*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
 }
 
+/**
+ * kstat_irqs - Get the statistics for an interrupt
+ * @irq:	The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
 unsigned int kstat_irqs(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -594,3 +627,22 @@ unsigned int kstat_irqs(unsigned int irq)
 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
 	return sum;
 }
+
+/**
+ * kstat_irqs_usr - Get the statistics for an interrupt
+ * @irq:	The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. Contrary to kstat_irqs() this can be called from any
+ * preemptible context. It's protected against concurrent removal of
+ * an interrupt descriptor when sparse irqs are enabled.
+ */
+unsigned int kstat_irqs_usr(unsigned int irq)
+{
+	int sum;
+
+	irq_lock_sparse();
+	sum = kstat_irqs(irq);
+	irq_unlock_sparse();
+	return sum;
+}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index ac1ba2f..9dc9bfd 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -15,6 +15,23 @@
 
 #include "internals.h"
 
+/*
+ * Access rules:
+ *
+ * procfs protects read/write of /proc/irq/N/ files against a
+ * concurrent free of the interrupt descriptor. remove_proc_entry()
+ * immediately prevents new read/writes to happen and waits for
+ * already running read/write functions to complete.
+ *
+ * We remove the proc entries first and then delete the interrupt
+ * descriptor from the radix tree and free it. So it is guaranteed
+ * that irq_to_desc(N) is valid as long as the read/writes are
+ * permitted by procfs.
+ *
+ * The read from /proc/interrupts is a different problem because there
+ * is no protection. So the lookup and the access to irqdesc
+ * information must be protected by sparse_irq_lock.
+ */
 static struct proc_dir_entry *root_irq_dir;
 
 #ifdef CONFIG_SMP
@@ -437,9 +454,10 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
+	irq_lock_sparse();
 	desc = irq_to_desc(i);
 	if (!desc)
-		return 0;
+		goto outsparse;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
 	for_each_online_cpu(j)
@@ -479,6 +497,8 @@ int show_interrupts(struct seq_file *p, void *v)
 	seq_putc(p, '\n');
 out:
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
+outsparse:
+	irq_unlock_sparse();
 	return 0;
 }
 #endif
diff --git a/kernel/module.c b/kernel/module.c
index e52a873..3965511 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -42,7 +42,6 @@
 #include <linux/vermagic.h>
 #include <linux/notifier.h>
 #include <linux/sched.h>
-#include <linux/stop_machine.h>
 #include <linux/device.h>
 #include <linux/string.h>
 #include <linux/mutex.h>
@@ -98,7 +97,7 @@
  * 1) List of modules (also safely readable with preempt_disable),
  * 2) module_use links,
  * 3) module_addr_min/module_addr_max.
- * (delete uses stop_machine/add uses RCU list operations). */
+ * (delete and add uses RCU list operations). */
 DEFINE_MUTEX(module_mutex);
 EXPORT_SYMBOL_GPL(module_mutex);
 static LIST_HEAD(modules);
@@ -158,13 +157,13 @@ static BLOCKING_NOTIFIER_HEAD(module_notify_list);
  * Protected by module_mutex. */
 static unsigned long module_addr_min = -1UL, module_addr_max = 0;
 
-int register_module_notifier(struct notifier_block * nb)
+int register_module_notifier(struct notifier_block *nb)
 {
 	return blocking_notifier_chain_register(&module_notify_list, nb);
 }
 EXPORT_SYMBOL(register_module_notifier);
 
-int unregister_module_notifier(struct notifier_block * nb)
+int unregister_module_notifier(struct notifier_block *nb)
 {
 	return blocking_notifier_chain_unregister(&module_notify_list, nb);
 }
@@ -628,18 +627,23 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
 
 EXPORT_TRACEPOINT_SYMBOL(module_get);
 
+/* MODULE_REF_BASE is the base reference count by kmodule loader. */
+#define MODULE_REF_BASE	1
+
 /* Init the unload section of the module. */
 static int module_unload_init(struct module *mod)
 {
-	mod->refptr = alloc_percpu(struct module_ref);
-	if (!mod->refptr)
-		return -ENOMEM;
+	/*
+	 * Initialize reference counter to MODULE_REF_BASE.
+	 * refcnt == 0 means module is going.
+	 */
+	atomic_set(&mod->refcnt, MODULE_REF_BASE);
 
 	INIT_LIST_HEAD(&mod->source_list);
 	INIT_LIST_HEAD(&mod->target_list);
 
 	/* Hold reference count during initialization. */
-	raw_cpu_write(mod->refptr->incs, 1);
+	atomic_inc(&mod->refcnt);
 
 	return 0;
 }
@@ -721,8 +725,6 @@ static void module_unload_free(struct module *mod)
 		kfree(use);
 	}
 	mutex_unlock(&module_mutex);
-
-	free_percpu(mod->refptr);
 }
 
 #ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -740,60 +742,39 @@ static inline int try_force_unload(unsigned int flags)
 }
 #endif /* CONFIG_MODULE_FORCE_UNLOAD */
 
-struct stopref
+/* Try to release refcount of module, 0 means success. */
+static int try_release_module_ref(struct module *mod)
 {
-	struct module *mod;
-	int flags;
-	int *forced;
-};
+	int ret;
 
-/* Whole machine is stopped with interrupts off when this runs. */
-static int __try_stop_module(void *_sref)
-{
-	struct stopref *sref = _sref;
+	/* Try to decrement refcnt which we set at loading */
+	ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt);
+	BUG_ON(ret < 0);
+	if (ret)
+		/* Someone can put this right now, recover with checking */
+		ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0);
+
+	return ret;
+}
 
+static int try_stop_module(struct module *mod, int flags, int *forced)
+{
 	/* If it's not unused, quit unless we're forcing. */
-	if (module_refcount(sref->mod) != 0) {
-		if (!(*sref->forced = try_force_unload(sref->flags)))
+	if (try_release_module_ref(mod) != 0) {
+		*forced = try_force_unload(flags);
+		if (!(*forced))
 			return -EWOULDBLOCK;
 	}
 
 	/* Mark it as dying. */
-	sref->mod->state = MODULE_STATE_GOING;
-	return 0;
-}
-
-static int try_stop_module(struct module *mod, int flags, int *forced)
-{
-	struct stopref sref = { mod, flags, forced };
+	mod->state = MODULE_STATE_GOING;
 
-	return stop_machine(__try_stop_module, &sref, NULL);
+	return 0;
 }
 
 unsigned long module_refcount(struct module *mod)
 {
-	unsigned long incs = 0, decs = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		decs += per_cpu_ptr(mod->refptr, cpu)->decs;
-	/*
-	 * ensure the incs are added up after the decs.
-	 * module_put ensures incs are visible before decs with smp_wmb.
-	 *
-	 * This 2-count scheme avoids the situation where the refcount
-	 * for CPU0 is read, then CPU0 increments the module refcount,
-	 * then CPU1 drops that refcount, then the refcount for CPU1 is
-	 * read. We would record a decrement but not its corresponding
-	 * increment so we would see a low count (disaster).
-	 *
-	 * Rare situation? But module_refcount can be preempted, and we
-	 * might be tallying up 4096+ CPUs. So it is not impossible.
-	 */
-	smp_rmb();
-	for_each_possible_cpu(cpu)
-		incs += per_cpu_ptr(mod->refptr, cpu)->incs;
-	return incs - decs;
+	return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE;
 }
 EXPORT_SYMBOL(module_refcount);
 
@@ -877,8 +858,10 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
 
 	seq_printf(m, " %lu ", module_refcount(mod));
 
-	/* Always include a trailing , so userspace can differentiate
-           between this and the old multi-field proc format. */
+	/*
+	 * Always include a trailing , so userspace can differentiate
+	 * between this and the old multi-field proc format.
+	 */
 	list_for_each_entry(use, &mod->source_list, source_list) {
 		printed_something = 1;
 		seq_printf(m, "%s,", use->source->name);
@@ -886,11 +869,11 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
 
 	if (mod->init != NULL && mod->exit == NULL) {
 		printed_something = 1;
-		seq_printf(m, "[permanent],");
+		seq_puts(m, "[permanent],");
 	}
 
 	if (!printed_something)
-		seq_printf(m, "-");
+		seq_puts(m, "-");
 }
 
 void __symbol_put(const char *symbol)
@@ -935,7 +918,7 @@ void __module_get(struct module *module)
 {
 	if (module) {
 		preempt_disable();
-		__this_cpu_inc(module->refptr->incs);
+		atomic_inc(&module->refcnt);
 		trace_module_get(module, _RET_IP_);
 		preempt_enable();
 	}
@@ -948,11 +931,11 @@ bool try_module_get(struct module *module)
 
 	if (module) {
 		preempt_disable();
-
-		if (likely(module_is_live(module))) {
-			__this_cpu_inc(module->refptr->incs);
+		/* Note: here, we can fail to get a reference */
+		if (likely(module_is_live(module) &&
+			   atomic_inc_not_zero(&module->refcnt) != 0))
 			trace_module_get(module, _RET_IP_);
-		} else
+		else
 			ret = false;
 
 		preempt_enable();
@@ -963,11 +946,12 @@ EXPORT_SYMBOL(try_module_get);
 
 void module_put(struct module *module)
 {
+	int ret;
+
 	if (module) {
 		preempt_disable();
-		smp_wmb(); /* see comment in module_refcount */
-		__this_cpu_inc(module->refptr->decs);
-
+		ret = atomic_dec_if_positive(&module->refcnt);
+		WARN_ON(ret < 0);	/* Failed to put refcount */
 		trace_module_put(module, _RET_IP_);
 		preempt_enable();
 	}
@@ -978,7 +962,7 @@ EXPORT_SYMBOL(module_put);
 static inline void print_unload_info(struct seq_file *m, struct module *mod)
 {
 	/* We don't know the usage count, or what modules are using. */
-	seq_printf(m, " - -");
+	seq_puts(m, " - -");
 }
 
 static inline void module_unload_free(struct module *mod)
@@ -1131,7 +1115,7 @@ static unsigned long maybe_relocated(unsigned long crc,
 static int check_version(Elf_Shdr *sechdrs,
 			 unsigned int versindex,
 			 const char *symname,
-			 struct module *mod, 
+			 struct module *mod,
 			 const unsigned long *crc,
 			 const struct module *crc_owner)
 {
@@ -1165,7 +1149,7 @@ static int check_version(Elf_Shdr *sechdrs,
 	return 0;
 
 bad_version:
-	printk("%s: disagrees about version of symbol %s\n",
+	pr_warn("%s: disagrees about version of symbol %s\n",
 	       mod->name, symname);
 	return 0;
 }
@@ -1200,7 +1184,7 @@ static inline int same_magic(const char *amagic, const char *bmagic,
 static inline int check_version(Elf_Shdr *sechdrs,
 				unsigned int versindex,
 				const char *symname,
-				struct module *mod, 
+				struct module *mod,
 				const unsigned long *crc,
 				const struct module *crc_owner)
 {
@@ -1288,15 +1272,13 @@ static inline bool sect_empty(const Elf_Shdr *sect)
 	return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
 }
 
-struct module_sect_attr
-{
+struct module_sect_attr {
 	struct module_attribute mattr;
 	char *name;
 	unsigned long address;
 };
 
-struct module_sect_attrs
-{
+struct module_sect_attrs {
 	struct attribute_group grp;
 	unsigned int nsections;
 	struct module_sect_attr attrs[0];
@@ -1550,7 +1532,8 @@ static int module_add_modinfo_attrs(struct module *mod)
 		    (attr->test && attr->test(mod))) {
 			memcpy(temp_attr, attr, sizeof(*temp_attr));
 			sysfs_attr_init(&temp_attr->attr);
-			error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
+			error = sysfs_create_file(&mod->mkobj.kobj,
+					&temp_attr->attr);
 			++temp_attr;
 		}
 	}
@@ -1566,7 +1549,7 @@ static void module_remove_modinfo_attrs(struct module *mod)
 		/* pick a field to test for end of list */
 		if (!attr->attr.name)
 			break;
-		sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
+		sysfs_remove_file(&mod->mkobj.kobj, &attr->attr);
 		if (attr->free)
 			attr->free(mod);
 	}
@@ -1697,18 +1680,6 @@ static void mod_sysfs_teardown(struct module *mod)
 	mod_sysfs_fini(mod);
 }
 
-/*
- * unlink the module with the whole machine is stopped with interrupts off
- * - this defends against kallsyms not taking locks
- */
-static int __unlink_module(void *_mod)
-{
-	struct module *mod = _mod;
-	list_del(&mod->list);
-	module_bug_cleanup(mod);
-	return 0;
-}
-
 #ifdef CONFIG_DEBUG_SET_MODULE_RONX
 /*
  * LKM RO/NX protection: protect module's text/ro-data
@@ -1860,7 +1831,12 @@ static void free_module(struct module *mod)
 
 	/* Now we can delete it from the lists */
 	mutex_lock(&module_mutex);
-	stop_machine(__unlink_module, mod, NULL);
+	/* Unlink carefully: kallsyms could be walking list. */
+	list_del_rcu(&mod->list);
+	/* Remove this module from bug list, this uses list_del_rcu */
+	module_bug_cleanup(mod);
+	/* Wait for RCU synchronizing before releasing mod->list and buglist. */
+	synchronize_rcu();
 	mutex_unlock(&module_mutex);
 
 	/* This may be NULL, but that's OK */
@@ -1955,7 +1931,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 			/* We compiled with -fno-common.  These are not
 			   supposed to happen.  */
 			pr_debug("Common symbol: %s\n", name);
-			printk("%s: please compile with -fno-common\n",
+			pr_warn("%s: please compile with -fno-common\n",
 			       mod->name);
 			ret = -ENOEXEC;
 			break;
@@ -2259,7 +2235,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info)
 }
 
 static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
-                           unsigned int shnum)
+			unsigned int shnum)
 {
 	const Elf_Shdr *sec;
 
@@ -2735,7 +2711,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 		 * This shouldn't happen with same compiler and binutils
 		 * building all parts of the module.
 		 */
-		printk(KERN_WARNING "%s: has both .ctors and .init_array.\n",
+		pr_warn("%s: has both .ctors and .init_array.\n",
 		       mod->name);
 		return -EINVAL;
 	}
@@ -3023,8 +2999,10 @@ static int do_init_module(struct module *mod)
 	if (mod->init != NULL)
 		ret = do_one_initcall(mod->init);
 	if (ret < 0) {
-		/* Init routine failed: abort.  Try to protect us from
-                   buggy refcounters. */
+		/*
+		 * Init routine failed: abort.  Try to protect us from
+		 * buggy refcounters.
+		 */
 		mod->state = MODULE_STATE_GOING;
 		synchronize_sched();
 		module_put(mod);
@@ -3202,7 +3180,7 @@ out:
 
 static int unknown_module_param_cb(char *param, char *val, const char *modname)
 {
-	/* Check for magic 'dyndbg' arg */ 
+	/* Check for magic 'dyndbg' arg */
 	int ret = ddebug_dyndbg_module_param_cb(param, val, modname);
 	if (ret != 0)
 		pr_warn("%s: unknown parameter '%s' ignored\n", modname, param);
@@ -3352,6 +3330,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
 	wake_up_all(&module_wq);
+	/* Wait for RCU synchronizing before releasing mod->list. */
+	synchronize_rcu();
 	mutex_unlock(&module_mutex);
  free_module:
 	module_deallocate(mod, info);
@@ -3685,8 +3665,8 @@ static int m_show(struct seq_file *m, void *p)
 
 	/* Informative for users. */
 	seq_printf(m, " %s",
-		   mod->state == MODULE_STATE_GOING ? "Unloading":
-		   mod->state == MODULE_STATE_COMING ? "Loading":
+		   mod->state == MODULE_STATE_GOING ? "Unloading" :
+		   mod->state == MODULE_STATE_COMING ? "Loading" :
 		   "Live");
 	/* Used by oprofile and other similar tools. */
 	seq_printf(m, " 0x%pK", mod->module_core);
@@ -3695,7 +3675,7 @@ static int m_show(struct seq_file *m, void *p)
 	if (mod->taints)
 		seq_printf(m, " %s", module_flags(mod, buf));
 
-	seq_printf(m, "\n");
+	seq_puts(m, "\n");
 	return 0;
 }
 
diff --git a/kernel/params.c b/kernel/params.c
index db97b79..0af9b2c 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -603,74 +603,67 @@ static __modinit int add_sysfs_param(struct module_kobject *mk,
 				     const struct kernel_param *kp,
 				     const char *name)
 {
-	struct module_param_attrs *new;
-	struct attribute **attrs;
-	int err, num;
+	struct module_param_attrs *new_mp;
+	struct attribute **new_attrs;
+	unsigned int i;
 
 	/* We don't bother calling this with invisible parameters. */
 	BUG_ON(!kp->perm);
 
 	if (!mk->mp) {
-		num = 0;
-		attrs = NULL;
-	} else {
-		num = mk->mp->num;
-		attrs = mk->mp->grp.attrs;
+		/* First allocation. */
+		mk->mp = kzalloc(sizeof(*mk->mp), GFP_KERNEL);
+		if (!mk->mp)
+			return -ENOMEM;
+		mk->mp->grp.name = "parameters";
+		/* NULL-terminated attribute array. */
+		mk->mp->grp.attrs = kzalloc(sizeof(mk->mp->grp.attrs[0]),
+					    GFP_KERNEL);
+		/* Caller will cleanup via free_module_param_attrs */
+		if (!mk->mp->grp.attrs)
+			return -ENOMEM;
 	}
 
-	/* Enlarge. */
-	new = krealloc(mk->mp,
-		       sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
-		       GFP_KERNEL);
-	if (!new) {
-		kfree(attrs);
-		err = -ENOMEM;
-		goto fail;
-	}
-	/* Despite looking like the typical realloc() bug, this is safe.
-	 * We *want* the old 'attrs' to be freed either way, and we'll store
-	 * the new one in the success case. */
-	attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
-	if (!attrs) {
-		err = -ENOMEM;
-		goto fail_free_new;
-	}
+	/* Enlarge allocations. */
+	new_mp = krealloc(mk->mp,
+			  sizeof(*mk->mp) +
+			  sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1),
+			  GFP_KERNEL);
+	if (!new_mp)
+		return -ENOMEM;
+	mk->mp = new_mp;
 
-	/* Sysfs wants everything zeroed. */
-	memset(new, 0, sizeof(*new));
-	memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
-	memset(&attrs[num], 0, sizeof(attrs[num]));
-	new->grp.name = "parameters";
-	new->grp.attrs = attrs;
+	/* Extra pointer for NULL terminator */
+	new_attrs = krealloc(mk->mp->grp.attrs,
+			     sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2),
+			     GFP_KERNEL);
+	if (!new_attrs)
+		return -ENOMEM;
+	mk->mp->grp.attrs = new_attrs;
 
 	/* Tack new one on the end. */
-	sysfs_attr_init(&new->attrs[num].mattr.attr);
-	new->attrs[num].param = kp;
-	new->attrs[num].mattr.show = param_attr_show;
-	new->attrs[num].mattr.store = param_attr_store;
-	new->attrs[num].mattr.attr.name = (char *)name;
-	new->attrs[num].mattr.attr.mode = kp->perm;
-	new->num = num+1;
+	sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr);
+	mk->mp->attrs[mk->mp->num].param = kp;
+	mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show;
+	/* Do not allow runtime DAC changes to make param writable. */
+	if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
+		mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store;
+	mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name;
+	mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm;
+	mk->mp->num++;
 
 	/* Fix up all the pointers, since krealloc can move us */
-	for (num = 0; num < new->num; num++)
-		new->grp.attrs[num] = &new->attrs[num].mattr.attr;
-	new->grp.attrs[num] = NULL;
-
-	mk->mp = new;
+	for (i = 0; i < mk->mp->num; i++)
+		mk->mp->grp.attrs[i] = &mk->mp->attrs[i].mattr.attr;
+	mk->mp->grp.attrs[mk->mp->num] = NULL;
 	return 0;
-
-fail_free_new:
-	kfree(new);
-fail:
-	mk->mp = NULL;
-	return err;
 }
 
 #ifdef CONFIG_MODULES
 static void free_module_param_attrs(struct module_kobject *mk)
 {
-	kfree(mk->mp->grp.attrs);
+	if (mk->mp)
+		kfree(mk->mp->grp.attrs);
 	kfree(mk->mp);
 	mk->mp = NULL;
 }
@@ -695,8 +688,10 @@ int module_param_sysfs_setup(struct module *mod,
 		if (kparam[i].perm == 0)
 			continue;
 		err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
-		if (err)
+		if (err) {
+			free_module_param_attrs(&mod->mkobj);
 			return err;
+		}
 		params = true;
 	}
 
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6e7708c..48b28d3 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -94,7 +94,7 @@ config PM_STD_PARTITION
 config PM_SLEEP
 	def_bool y
 	depends on SUSPEND || HIBERNATE_CALLBACKS
-	select PM_RUNTIME
+	select PM
 
 config PM_SLEEP_SMP
 	def_bool y
@@ -130,23 +130,19 @@ config PM_WAKELOCKS_GC
 	depends on PM_WAKELOCKS
 	default y
 
-config PM_RUNTIME
-	bool "Run-time PM core functionality"
+config PM
+	bool "Device power management core functionality"
 	---help---
 	  Enable functionality allowing I/O devices to be put into energy-saving
-	  (low power) states at run time (or autosuspended) after a specified
-	  period of inactivity and woken up in response to a hardware-generated
+	  (low power) states, for example after a specified period of inactivity
+	  (autosuspended), and woken up in response to a hardware-generated
 	  wake-up event or a driver's request.
 
 	  Hardware support is generally required for this functionality to work
 	  and the bus type drivers of the buses the devices are on are
-	  responsible for the actual handling of the autosuspend requests and
+	  responsible for the actual handling of device suspend requests and
 	  wake-up events.
 
-config PM
-	def_bool y
-	depends on PM_SLEEP || PM_RUNTIME
-
 config PM_DEBUG
 	bool "Power Management Debug Support"
 	depends on PM
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4d54b75..1363d58 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -847,7 +847,6 @@ void tick_nohz_idle_enter(void)
 
 	local_irq_enable();
 }
-EXPORT_SYMBOL_GPL(tick_nohz_idle_enter);
 
 /**
  * tick_nohz_irq_exit - update next tick event from interrupt exit
@@ -974,7 +973,6 @@ void tick_nohz_idle_exit(void)
 
 	local_irq_enable();
 }
-EXPORT_SYMBOL_GPL(tick_nohz_idle_exit);
 
 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
 {
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 67d6369..979ccde 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -55,7 +55,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
-ifeq ($(CONFIG_PM_RUNTIME),y)
+ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
 endif
 ifeq ($(CONFIG_TRACING),y)
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 602e5bb..d58cc4d 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
 	struct group_info *group_info;
 	int retval;
 
-	if (!ns_capable(current_user_ns(), CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 69b800a..b069ccb 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -54,6 +54,7 @@ struct user_namespace init_user_ns = {
 #ifdef CONFIG_USER_NS
 	.ns.ops = &userns_operations,
 #endif
+	.flags = USERNS_INIT_FLAGS,
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	.persistent_keyring_register_sem =
 	__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 1491ad0..4109f83 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,6 +24,7 @@
 #include <linux/fs_struct.h>
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
+static DEFINE_MUTEX(userns_state_mutex);
 
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
@@ -100,6 +101,11 @@ int create_user_ns(struct cred *new)
 	ns->owner = owner;
 	ns->group = group;
 
+	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
+	mutex_lock(&userns_state_mutex);
+	ns->flags = parent_ns->flags;
+	mutex_unlock(&userns_state_mutex);
+
 	set_cred_user_ns(new, ns);
 
 #ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -584,9 +590,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map,
 	return false;
 }
 
-
-static DEFINE_MUTEX(id_map_mutex);
-
 static ssize_t map_write(struct file *file, const char __user *buf,
 			 size_t count, loff_t *ppos,
 			 int cap_setid,
@@ -603,7 +606,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	ssize_t ret = -EINVAL;
 
 	/*
-	 * The id_map_mutex serializes all writes to any given map.
+	 * The userns_state_mutex serializes all writes to any given map.
 	 *
 	 * Any map is only ever written once.
 	 *
@@ -621,7 +624,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	 * order and smp_rmb() is guaranteed that we don't have crazy
 	 * architectures returning stale data.
 	 */
-	mutex_lock(&id_map_mutex);
+	mutex_lock(&userns_state_mutex);
 
 	ret = -EPERM;
 	/* Only allow one successful write to the map */
@@ -641,7 +644,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	if (!page)
 		goto out;
 
-	/* Only allow <= page size writes at the beginning of the file */
+	/* Only allow < page size writes at the beginning of the file */
 	ret = -EINVAL;
 	if ((*ppos != 0) || (count >= PAGE_SIZE))
 		goto out;
@@ -751,7 +754,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	*ppos = count;
 	ret = count;
 out:
-	mutex_unlock(&id_map_mutex);
+	mutex_unlock(&userns_state_mutex);
 	if (page)
 		free_page(page);
 	return ret;
@@ -813,16 +816,21 @@ static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *new_map)
 {
-	/* Allow mapping to your own filesystem ids */
-	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+	const struct cred *cred = file->f_cred;
+	/* Don't allow mappings that would allow anything that wouldn't
+	 * be allowed without the establishment of unprivileged mappings.
+	 */
+	if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
+	    uid_eq(ns->owner, cred->euid)) {
 		u32 id = new_map->extent[0].lower_first;
 		if (cap_setid == CAP_SETUID) {
 			kuid_t uid = make_kuid(ns->parent, id);
-			if (uid_eq(uid, file->f_cred->fsuid))
+			if (uid_eq(uid, cred->euid))
 				return true;
 		} else if (cap_setid == CAP_SETGID) {
 			kgid_t gid = make_kgid(ns->parent, id);
-			if (gid_eq(gid, file->f_cred->fsgid))
+			if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
+			    gid_eq(gid, cred->egid))
 				return true;
 		}
 	}
@@ -842,6 +850,100 @@ static bool new_idmap_permitted(const struct file *file,
 	return false;
 }
 
+int proc_setgroups_show(struct seq_file *seq, void *v)
+{
+	struct user_namespace *ns = seq->private;
+	unsigned long userns_flags = ACCESS_ONCE(ns->flags);
+
+	seq_printf(seq, "%s\n",
+		   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
+		   "allow" : "deny");
+	return 0;
+}
+
+ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	char kbuf[8], *pos;
+	bool setgroups_allowed;
+	ssize_t ret;
+
+	/* Only allow a very narrow range of strings to be written */
+	ret = -EINVAL;
+	if ((*ppos != 0) || (count >= sizeof(kbuf)))
+		goto out;
+
+	/* What was written? */
+	ret = -EFAULT;
+	if (copy_from_user(kbuf, buf, count))
+		goto out;
+	kbuf[count] = '\0';
+	pos = kbuf;
+
+	/* What is being requested? */
+	ret = -EINVAL;
+	if (strncmp(pos, "allow", 5) == 0) {
+		pos += 5;
+		setgroups_allowed = true;
+	}
+	else if (strncmp(pos, "deny", 4) == 0) {
+		pos += 4;
+		setgroups_allowed = false;
+	}
+	else
+		goto out;
+
+	/* Verify there is not trailing junk on the line */
+	pos = skip_spaces(pos);
+	if (*pos != '\0')
+		goto out;
+
+	ret = -EPERM;
+	mutex_lock(&userns_state_mutex);
+	if (setgroups_allowed) {
+		/* Enabling setgroups after setgroups has been disabled
+		 * is not allowed.
+		 */
+		if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
+			goto out_unlock;
+	} else {
+		/* Permanently disabling setgroups after setgroups has
+		 * been enabled by writing the gid_map is not allowed.
+		 */
+		if (ns->gid_map.nr_extents != 0)
+			goto out_unlock;
+		ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
+	}
+	mutex_unlock(&userns_state_mutex);
+
+	/* Report a successful write */
+	*ppos = count;
+	ret = count;
+out:
+	return ret;
+out_unlock:
+	mutex_unlock(&userns_state_mutex);
+	goto out;
+}
+
+bool userns_may_setgroups(const struct user_namespace *ns)
+{
+	bool allowed;
+
+	mutex_lock(&userns_state_mutex);
+	/* It is not safe to use setgroups until a gid mapping in
+	 * the user namespace has been established.
+	 */
+	allowed = ns->gid_map.nr_extents != 0;
+	/* Is setgroups allowed? */
+	allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
+	mutex_unlock(&userns_state_mutex);
+
+	return allowed;
+}
+
 static inline struct user_namespace *to_user_ns(struct ns_common *ns)
 {
 	return container_of(ns, struct user_namespace, ns);
diff --git a/lib/bug.c b/lib/bug.c
index d1d7c78..0c3bd95 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -64,16 +64,22 @@ static LIST_HEAD(module_bug_list);
 static const struct bug_entry *module_find_bug(unsigned long bugaddr)
 {
 	struct module *mod;
+	const struct bug_entry *bug = NULL;
 
-	list_for_each_entry(mod, &module_bug_list, bug_list) {
-		const struct bug_entry *bug = mod->bug_table;
+	rcu_read_lock();
+	list_for_each_entry_rcu(mod, &module_bug_list, bug_list) {
 		unsigned i;
 
+		bug = mod->bug_table;
 		for (i = 0; i < mod->num_bugs; ++i, ++bug)
 			if (bugaddr == bug_addr(bug))
-				return bug;
+				goto out;
 	}
-	return NULL;
+	bug = NULL;
+out:
+	rcu_read_unlock();
+
+	return bug;
 }
 
 void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
@@ -99,13 +105,15 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 	 * Strictly speaking this should have a spinlock to protect against
 	 * traversals, but since we only traverse on BUG()s, a spinlock
 	 * could potentially lead to deadlock and thus be counter-productive.
+	 * Thus, this uses RCU to safely manipulate the bug list, since BUG
+	 * must run in non-interruptive state.
 	 */
-	list_add(&mod->bug_list, &module_bug_list);
+	list_add_rcu(&mod->bug_list, &module_bug_list);
 }
 
 void module_bug_cleanup(struct module *mod)
 {
-	list_del(&mod->bug_list);
+	list_del_rcu(&mod->bug_list);
 }
 
 #else
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 5e25627..7de89f4 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -8,6 +8,7 @@
 #include <linux/mm.h>
 #include <linux/nmi.h>
 #include <linux/quicklist.h>
+#include <linux/cma.h>
 
 void show_mem(unsigned int filter)
 {
@@ -38,7 +39,12 @@ void show_mem(unsigned int filter)
 
 	printk("%lu pages RAM\n", total);
 	printk("%lu pages HighMem/MovableOnly\n", highmem);
+#ifdef CONFIG_CMA
+	printk("%lu pages reserved\n", (reserved - totalcma_pages));
+	printk("%lu pages cma reserved\n", totalcma_pages);
+#else
 	printk("%lu pages reserved\n", reserved);
+#endif
 #ifdef CONFIG_QUICKLIST
 	printk("%lu pages in pagetable cache\n",
 		quicklist_total_size());
diff --git a/mm/cma.c b/mm/cma.c
index f891762..a85ae28 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -337,6 +337,7 @@ int __init cma_declare_contiguous(phys_addr_t base,
 	if (ret)
 		goto err;
 
+	totalcma_pages += (size / PAGE_SIZE);
 	pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M,
 		&base);
 	return 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index e8905bc..bd8543c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2464,7 +2464,7 @@ ssize_t generic_perform_write(struct file *file,
 	/*
 	 * Copies from kernel address space cannot fail (NFSD is a big user).
 	 */
-	if (segment_eq(get_fs(), KERNEL_DS))
+	if (!iter_is_iovec(i))
 		flags |= AOP_FLAG_UNINTERRUPTIBLE;
 
 	do {
diff --git a/mm/memory.c b/mm/memory.c
index c3b9097..d8aebc5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -235,9 +235,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
 
 static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
-	if (!tlb->end)
-		return;
-
 	tlb_flush(tlb);
 	mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
@@ -259,6 +256,9 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb)
 
 void tlb_flush_mmu(struct mmu_gather *tlb)
 {
+	if (!tlb->end)
+		return;
+
 	tlb_flush_mmu_tlbonly(tlb);
 	tlb_flush_mmu_free(tlb);
 }
@@ -2996,6 +2996,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	if (set_page_dirty(fault_page))
 		dirtied = 1;
+	/*
+	 * Take a local copy of the address_space - page.mapping may be zeroed
+	 * by truncate after unlock_page().   The address_space itself remains
+	 * pinned by vma->vm_file's reference.  We rely on unlock_page()'s
+	 * release semantics to prevent the compiler from undoing this copying.
+	 */
 	mapping = fault_page->mapping;
 	unlock_page(fault_page);
 	if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e58725a..0e0961b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -162,12 +162,6 @@ static const struct mempolicy_operations {
 			enum mpol_rebind_step step);
 } mpol_ops[MPOL_MAX];
 
-/* Check that the nodemask contains at least one populated zone */
-static int is_valid_nodemask(const nodemask_t *nodemask)
-{
-	return nodes_intersects(*nodemask, node_states[N_MEMORY]);
-}
-
 static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
 {
 	return pol->flags & MPOL_MODE_FLAGS;
@@ -202,7 +196,7 @@ static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
 
 static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
 {
-	if (!is_valid_nodemask(nodes))
+	if (nodes_empty(*nodes))
 		return -EINVAL;
 	pol->v.nodes = *nodes;
 	return 0;
@@ -234,7 +228,7 @@ static int mpol_set_nodemask(struct mempolicy *pol,
 		nodes = NULL;	/* explicit local allocation */
 	else {
 		if (pol->flags & MPOL_F_RELATIVE_NODES)
-			mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1);
+			mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1);
 		else
 			nodes_and(nsc->mask2, *nodes, nsc->mask1);
 
@@ -1047,10 +1041,6 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 
 	down_read(&mm->mmap_sem);
 
-	err = migrate_vmas(mm, from, to, flags);
-	if (err)
-		goto out;
-
 	/*
 	 * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
 	 * bit in 'to' is not also set in 'tmp'.  Clear the found 'source'
@@ -1130,7 +1120,6 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 		if (err < 0)
 			break;
 	}
-out:
 	up_read(&mm->mmap_sem);
 	if (err < 0)
 		return err;
diff --git a/mm/migrate.c b/mm/migrate.c
index b1d0212..344cdf6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1536,27 +1536,6 @@ out:
 	return err;
 }
 
-/*
- * Call migration functions in the vma_ops that may prepare
- * memory in a vm for migration. migration functions may perform
- * the migration for vmas that do not have an underlying page struct.
- */
-int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
-	const nodemask_t *from, unsigned long flags)
-{
- 	struct vm_area_struct *vma;
- 	int err = 0;
-
-	for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
- 		if (vma->vm_ops && vma->vm_ops->migrate) {
- 			err = vma->vm_ops->migrate(vma, to, from, flags);
- 			if (err)
- 				break;
- 		}
- 	}
- 	return err;
-}
-
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * Returns true if this is a safe migration target node for misplaced NUMA
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fa974d87..7633c50 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -111,6 +111,7 @@ static DEFINE_SPINLOCK(managed_page_count_lock);
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
+unsigned long totalcma_pages __read_mostly;
 /*
  * When calculating the number of globally allowed dirty pages, there
  * is a certain number of per-zone reserves that should not be
@@ -5586,7 +5587,7 @@ void __init mem_init_print_info(const char *str)
 
 	pr_info("Memory: %luK/%luK available "
 	       "(%luK kernel code, %luK rwdata, %luK rodata, "
-	       "%luK init, %luK bss, %luK reserved"
+	       "%luK init, %luK bss, %luK reserved, %luK cma-reserved"
 #ifdef	CONFIG_HIGHMEM
 	       ", %luK highmem"
 #endif
@@ -5594,7 +5595,8 @@ void __init mem_init_print_info(const char *str)
 	       nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
 	       codesize >> 10, datasize >> 10, rosize >> 10,
 	       (init_data_size + init_code_size) >> 10, bss_size >> 10,
-	       (physpages - totalram_pages) << (PAGE_SHIFT-10),
+	       (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10),
+	       totalcma_pages << (PAGE_SHIFT-10),
 #ifdef	CONFIG_HIGHMEM
 	       totalhigh_pages << (PAGE_SHIFT-10),
 #endif
diff --git a/mm/shmem.c b/mm/shmem.c
index 185836b..73ba1df 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1536,7 +1536,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	 * holes of a sparse file, we actually need to allocate those pages,
 	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
 	 */
-	if (segment_eq(get_fs(), KERNEL_DS))
+	if (!iter_is_iovec(to))
 		sgp = SGP_DIRTY;
 
 	index = *ppos >> PAGE_CACHE_SHIFT;
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 4d0a063..b724039 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -884,19 +884,6 @@ static struct notifier_block zs_cpu_nb = {
 	.notifier_call = zs_cpu_notifier
 };
 
-static void zs_unregister_cpu_notifier(void)
-{
-	int cpu;
-
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu)
-		zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
-	__unregister_cpu_notifier(&zs_cpu_nb);
-
-	cpu_notifier_register_done();
-}
-
 static int zs_register_cpu_notifier(void)
 {
 	int cpu, uninitialized_var(ret);
@@ -914,40 +901,28 @@ static int zs_register_cpu_notifier(void)
 	return notifier_to_errno(ret);
 }
 
-static void init_zs_size_classes(void)
+static void zs_unregister_cpu_notifier(void)
 {
-	int nr;
+	int cpu;
 
-	nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
-	if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
-		nr += 1;
+	cpu_notifier_register_begin();
 
-	zs_size_classes = nr;
-}
+	for_each_online_cpu(cpu)
+		zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
+	__unregister_cpu_notifier(&zs_cpu_nb);
 
-static void __exit zs_exit(void)
-{
-#ifdef CONFIG_ZPOOL
-	zpool_unregister_driver(&zs_zpool_driver);
-#endif
-	zs_unregister_cpu_notifier();
+	cpu_notifier_register_done();
 }
 
-static int __init zs_init(void)
+static void init_zs_size_classes(void)
 {
-	int ret = zs_register_cpu_notifier();
-
-	if (ret) {
-		zs_unregister_cpu_notifier();
-		return ret;
-	}
+	int nr;
 
-	init_zs_size_classes();
+	nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
+	if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
+		nr += 1;
 
-#ifdef CONFIG_ZPOOL
-	zpool_register_driver(&zs_zpool_driver);
-#endif
-	return 0;
+	zs_size_classes = nr;
 }
 
 static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
@@ -967,113 +942,101 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
 	return true;
 }
 
+unsigned long zs_get_total_pages(struct zs_pool *pool)
+{
+	return atomic_long_read(&pool->pages_allocated);
+}
+EXPORT_SYMBOL_GPL(zs_get_total_pages);
+
 /**
- * zs_create_pool - Creates an allocation pool to work from.
- * @flags: allocation flags used to allocate pool metadata
+ * zs_map_object - get address of allocated object from handle.
+ * @pool: pool from which the object was allocated
+ * @handle: handle returned from zs_malloc
  *
- * This function must be called before anything when using
- * the zsmalloc allocator.
+ * Before using an object allocated from zs_malloc, it must be mapped using
+ * this function. When done with the object, it must be unmapped using
+ * zs_unmap_object.
  *
- * On success, a pointer to the newly created pool is returned,
- * otherwise NULL.
+ * Only one object can be mapped per cpu at a time. There is no protection
+ * against nested mappings.
+ *
+ * This function returns with preemption and page faults disabled.
  */
-struct zs_pool *zs_create_pool(gfp_t flags)
+void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+			enum zs_mapmode mm)
 {
-	int i;
-	struct zs_pool *pool;
-	struct size_class *prev_class = NULL;
+	struct page *page;
+	unsigned long obj_idx, off;
 
-	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
-	if (!pool)
-		return NULL;
+	unsigned int class_idx;
+	enum fullness_group fg;
+	struct size_class *class;
+	struct mapping_area *area;
+	struct page *pages[2];
 
-	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
-			GFP_KERNEL);
-	if (!pool->size_class) {
-		kfree(pool);
-		return NULL;
-	}
+	BUG_ON(!handle);
 
 	/*
-	 * Iterate reversly, because, size of size_class that we want to use
-	 * for merging should be larger or equal to current size.
+	 * Because we use per-cpu mapping areas shared among the
+	 * pools/users, we can't allow mapping in interrupt context
+	 * because it can corrupt another users mappings.
 	 */
-	for (i = zs_size_classes - 1; i >= 0; i--) {
-		int size;
-		int pages_per_zspage;
-		struct size_class *class;
-
-		size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
-		if (size > ZS_MAX_ALLOC_SIZE)
-			size = ZS_MAX_ALLOC_SIZE;
-		pages_per_zspage = get_pages_per_zspage(size);
-
-		/*
-		 * size_class is used for normal zsmalloc operation such
-		 * as alloc/free for that size. Although it is natural that we
-		 * have one size_class for each size, there is a chance that we
-		 * can get more memory utilization if we use one size_class for
-		 * many different sizes whose size_class have same
-		 * characteristics. So, we makes size_class point to
-		 * previous size_class if possible.
-		 */
-		if (prev_class) {
-			if (can_merge(prev_class, size, pages_per_zspage)) {
-				pool->size_class[i] = prev_class;
-				continue;
-			}
-		}
-
-		class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
-		if (!class)
-			goto err;
+	BUG_ON(in_interrupt());
 
-		class->size = size;
-		class->index = i;
-		class->pages_per_zspage = pages_per_zspage;
-		spin_lock_init(&class->lock);
-		pool->size_class[i] = class;
+	obj_handle_to_location(handle, &page, &obj_idx);
+	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+	class = pool->size_class[class_idx];
+	off = obj_idx_to_offset(page, obj_idx, class->size);
 
-		prev_class = class;
+	area = &get_cpu_var(zs_map_area);
+	area->vm_mm = mm;
+	if (off + class->size <= PAGE_SIZE) {
+		/* this object is contained entirely within a page */
+		area->vm_addr = kmap_atomic(page);
+		return area->vm_addr + off;
 	}
 
-	pool->flags = flags;
-
-	return pool;
+	/* this object spans two pages */
+	pages[0] = page;
+	pages[1] = get_next_page(page);
+	BUG_ON(!pages[1]);
 
-err:
-	zs_destroy_pool(pool);
-	return NULL;
+	return __zs_map_object(area, pages, off, class->size);
 }
-EXPORT_SYMBOL_GPL(zs_create_pool);
+EXPORT_SYMBOL_GPL(zs_map_object);
 
-void zs_destroy_pool(struct zs_pool *pool)
+void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 {
-	int i;
+	struct page *page;
+	unsigned long obj_idx, off;
 
-	for (i = 0; i < zs_size_classes; i++) {
-		int fg;
-		struct size_class *class = pool->size_class[i];
+	unsigned int class_idx;
+	enum fullness_group fg;
+	struct size_class *class;
+	struct mapping_area *area;
 
-		if (!class)
-			continue;
+	BUG_ON(!handle);
 
-		if (class->index != i)
-			continue;
+	obj_handle_to_location(handle, &page, &obj_idx);
+	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+	class = pool->size_class[class_idx];
+	off = obj_idx_to_offset(page, obj_idx, class->size);
 
-		for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
-			if (class->fullness_list[fg]) {
-				pr_info("Freeing non-empty class with size %db, fullness group %d\n",
-					class->size, fg);
-			}
-		}
-		kfree(class);
-	}
+	area = this_cpu_ptr(&zs_map_area);
+	if (off + class->size <= PAGE_SIZE)
+		kunmap_atomic(area->vm_addr);
+	else {
+		struct page *pages[2];
 
-	kfree(pool->size_class);
-	kfree(pool);
+		pages[0] = page;
+		pages[1] = get_next_page(page);
+		BUG_ON(!pages[1]);
+
+		__zs_unmap_object(area, pages, off, class->size);
+	}
+	put_cpu_var(zs_map_area);
 }
-EXPORT_SYMBOL_GPL(zs_destroy_pool);
+EXPORT_SYMBOL_GPL(zs_unmap_object);
 
 /**
  * zs_malloc - Allocate block of given size from pool.
@@ -1176,100 +1139,137 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
 EXPORT_SYMBOL_GPL(zs_free);
 
 /**
- * zs_map_object - get address of allocated object from handle.
- * @pool: pool from which the object was allocated
- * @handle: handle returned from zs_malloc
- *
- * Before using an object allocated from zs_malloc, it must be mapped using
- * this function. When done with the object, it must be unmapped using
- * zs_unmap_object.
+ * zs_create_pool - Creates an allocation pool to work from.
+ * @flags: allocation flags used to allocate pool metadata
  *
- * Only one object can be mapped per cpu at a time. There is no protection
- * against nested mappings.
+ * This function must be called before anything when using
+ * the zsmalloc allocator.
  *
- * This function returns with preemption and page faults disabled.
+ * On success, a pointer to the newly created pool is returned,
+ * otherwise NULL.
  */
-void *zs_map_object(struct zs_pool *pool, unsigned long handle,
-			enum zs_mapmode mm)
+struct zs_pool *zs_create_pool(gfp_t flags)
 {
-	struct page *page;
-	unsigned long obj_idx, off;
+	int i;
+	struct zs_pool *pool;
+	struct size_class *prev_class = NULL;
 
-	unsigned int class_idx;
-	enum fullness_group fg;
-	struct size_class *class;
-	struct mapping_area *area;
-	struct page *pages[2];
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return NULL;
 
-	BUG_ON(!handle);
+	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
+			GFP_KERNEL);
+	if (!pool->size_class) {
+		kfree(pool);
+		return NULL;
+	}
 
 	/*
-	 * Because we use per-cpu mapping areas shared among the
-	 * pools/users, we can't allow mapping in interrupt context
-	 * because it can corrupt another users mappings.
+	 * Iterate reversly, because, size of size_class that we want to use
+	 * for merging should be larger or equal to current size.
 	 */
-	BUG_ON(in_interrupt());
+	for (i = zs_size_classes - 1; i >= 0; i--) {
+		int size;
+		int pages_per_zspage;
+		struct size_class *class;
 
-	obj_handle_to_location(handle, &page, &obj_idx);
-	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-	class = pool->size_class[class_idx];
-	off = obj_idx_to_offset(page, obj_idx, class->size);
+		size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
+		if (size > ZS_MAX_ALLOC_SIZE)
+			size = ZS_MAX_ALLOC_SIZE;
+		pages_per_zspage = get_pages_per_zspage(size);
 
-	area = &get_cpu_var(zs_map_area);
-	area->vm_mm = mm;
-	if (off + class->size <= PAGE_SIZE) {
-		/* this object is contained entirely within a page */
-		area->vm_addr = kmap_atomic(page);
-		return area->vm_addr + off;
+		/*
+		 * size_class is used for normal zsmalloc operation such
+		 * as alloc/free for that size. Although it is natural that we
+		 * have one size_class for each size, there is a chance that we
+		 * can get more memory utilization if we use one size_class for
+		 * many different sizes whose size_class have same
+		 * characteristics. So, we makes size_class point to
+		 * previous size_class if possible.
+		 */
+		if (prev_class) {
+			if (can_merge(prev_class, size, pages_per_zspage)) {
+				pool->size_class[i] = prev_class;
+				continue;
+			}
+		}
+
+		class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
+		if (!class)
+			goto err;
+
+		class->size = size;
+		class->index = i;
+		class->pages_per_zspage = pages_per_zspage;
+		spin_lock_init(&class->lock);
+		pool->size_class[i] = class;
+
+		prev_class = class;
 	}
 
-	/* this object spans two pages */
-	pages[0] = page;
-	pages[1] = get_next_page(page);
-	BUG_ON(!pages[1]);
+	pool->flags = flags;
 
-	return __zs_map_object(area, pages, off, class->size);
+	return pool;
+
+err:
+	zs_destroy_pool(pool);
+	return NULL;
 }
-EXPORT_SYMBOL_GPL(zs_map_object);
+EXPORT_SYMBOL_GPL(zs_create_pool);
 
-void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+void zs_destroy_pool(struct zs_pool *pool)
 {
-	struct page *page;
-	unsigned long obj_idx, off;
+	int i;
 
-	unsigned int class_idx;
-	enum fullness_group fg;
-	struct size_class *class;
-	struct mapping_area *area;
+	for (i = 0; i < zs_size_classes; i++) {
+		int fg;
+		struct size_class *class = pool->size_class[i];
 
-	BUG_ON(!handle);
+		if (!class)
+			continue;
 
-	obj_handle_to_location(handle, &page, &obj_idx);
-	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-	class = pool->size_class[class_idx];
-	off = obj_idx_to_offset(page, obj_idx, class->size);
+		if (class->index != i)
+			continue;
 
-	area = this_cpu_ptr(&zs_map_area);
-	if (off + class->size <= PAGE_SIZE)
-		kunmap_atomic(area->vm_addr);
-	else {
-		struct page *pages[2];
+		for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
+			if (class->fullness_list[fg]) {
+				pr_info("Freeing non-empty class with size %db, fullness group %d\n",
+					class->size, fg);
+			}
+		}
+		kfree(class);
+	}
 
-		pages[0] = page;
-		pages[1] = get_next_page(page);
-		BUG_ON(!pages[1]);
+	kfree(pool->size_class);
+	kfree(pool);
+}
+EXPORT_SYMBOL_GPL(zs_destroy_pool);
 
-		__zs_unmap_object(area, pages, off, class->size);
+static int __init zs_init(void)
+{
+	int ret = zs_register_cpu_notifier();
+
+	if (ret) {
+		zs_unregister_cpu_notifier();
+		return ret;
 	}
-	put_cpu_var(zs_map_area);
+
+	init_zs_size_classes();
+
+#ifdef CONFIG_ZPOOL
+	zpool_register_driver(&zs_zpool_driver);
+#endif
+	return 0;
 }
-EXPORT_SYMBOL_GPL(zs_unmap_object);
 
-unsigned long zs_get_total_pages(struct zs_pool *pool)
+static void __exit zs_exit(void)
 {
-	return atomic_long_read(&pool->pages_allocated);
+#ifdef CONFIG_ZPOOL
+	zpool_unregister_driver(&zs_zpool_driver);
+#endif
+	zs_unregister_cpu_notifier();
 }
-EXPORT_SYMBOL_GPL(zs_get_total_pages);
 
 module_init(zs_init);
 module_exit(zs_exit);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 79d84b8..fe18825 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -661,7 +661,7 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
 	memset(&cp, 0, sizeof(cp));
 
 	/* Update random address, but set require_privacy to false so
-	 * that we never connect with an unresolvable address.
+	 * that we never connect with an non-resolvable address.
 	 */
 	if (hci_update_random_address(req, false, &own_addr_type))
 		return;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 93f92a0..5dcacf9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1373,8 +1373,6 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt)
 
 static void bredr_setup(struct hci_request *req)
 {
-	struct hci_dev *hdev = req->hdev;
-
 	__le16 param;
 	__u8 flt_type;
 
@@ -1403,14 +1401,6 @@ static void bredr_setup(struct hci_request *req)
 	/* Connection accept timeout ~20 secs */
 	param = cpu_to_le16(0x7d00);
 	hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
-
-	/* AVM Berlin (31), aka "BlueFRITZ!", reports version 1.2,
-	 * but it does not support page scan related HCI commands.
-	 */
-	if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) {
-		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
-		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
-	}
 }
 
 static void le_setup(struct hci_request *req)
@@ -1718,6 +1708,16 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
 	if (hdev->commands[5] & 0x10)
 		hci_setup_link_policy(req);
 
+	if (hdev->commands[8] & 0x01)
+		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
+
+	/* Some older Broadcom based Bluetooth 1.2 controllers do not
+	 * support the Read Page Scan Type command. Check support for
+	 * this command in the bit mask of supported commands.
+	 */
+	if (hdev->commands[13] & 0x01)
+		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
+
 	if (lmp_le_capable(hdev)) {
 		u8 events[8];
 
@@ -2634,6 +2634,12 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	drain_workqueue(hdev->workqueue);
 
 	hci_dev_lock(hdev);
+
+	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+		if (hdev->dev_type == HCI_BREDR)
+			mgmt_powered(hdev, 0);
+	}
+
 	hci_inquiry_cache_flush(hdev);
 	hci_pend_le_actions_clear(hdev);
 	hci_conn_hash_flush(hdev);
@@ -2681,14 +2687,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	hdev->flags &= BIT(HCI_RAW);
 	hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
 
-	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
-		if (hdev->dev_type == HCI_BREDR) {
-			hci_dev_lock(hdev);
-			mgmt_powered(hdev, 0);
-			hci_dev_unlock(hdev);
-		}
-	}
-
 	/* Controller radio is available but is currently powered down */
 	hdev->amp_status = AMP_STATUS_POWERED_DOWN;
 
@@ -3083,7 +3081,9 @@ static void hci_power_on(struct work_struct *work)
 
 	err = hci_dev_do_open(hdev);
 	if (err < 0) {
+		hci_dev_lock(hdev);
 		mgmt_set_powered_failed(hdev, err);
+		hci_dev_unlock(hdev);
 		return;
 	}
 
@@ -3959,17 +3959,29 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 	}
 
 	/* In case of required privacy without resolvable private address,
-	 * use an unresolvable private address. This is useful for active
+	 * use an non-resolvable private address. This is useful for active
 	 * scanning and non-connectable advertising.
 	 */
 	if (require_privacy) {
-		bdaddr_t urpa;
+		bdaddr_t nrpa;
+
+		while (true) {
+			/* The non-resolvable private address is generated
+			 * from random six bytes with the two most significant
+			 * bits cleared.
+			 */
+			get_random_bytes(&nrpa, 6);
+			nrpa.b[5] &= 0x3f;
 
-		get_random_bytes(&urpa, 6);
-		urpa.b[5] &= 0x3f;	/* Clear two most significant bits */
+			/* The non-resolvable private address shall not be
+			 * equal to the public address.
+			 */
+			if (bacmp(&hdev->bdaddr, &nrpa))
+				break;
+		}
 
 		*own_addr_type = ADDR_LE_DEV_RANDOM;
-		set_random_addr(req, &urpa);
+		set_random_addr(req, &nrpa);
 		return 0;
 	}
 
@@ -5625,7 +5637,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 	u8 filter_policy;
 
 	/* Set require_privacy to false since no SCAN_REQ are send
-	 * during passive scanning. Not using an unresolvable address
+	 * during passive scanning. Not using an non-resolvable address
 	 * here is important so that peer devices using direct
 	 * advertising with our address will be correctly reported
 	 * by the controller.
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 322abbb..39a5c8a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -257,6 +257,8 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
 	if (!sent)
 		return;
 
+	hci_dev_lock(hdev);
+
 	if (!status) {
 		__u8 param = *((__u8 *) sent);
 
@@ -268,6 +270,8 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
 
 	if (test_bit(HCI_MGMT, &hdev->dev_flags))
 		mgmt_auth_enable_complete(hdev, status);
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -443,6 +447,8 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 	if (!sent)
 		return;
 
+	hci_dev_lock(hdev);
+
 	if (!status) {
 		if (sent->mode)
 			hdev->features[1][0] |= LMP_HOST_SSP;
@@ -458,6 +464,8 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
 		else
 			clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
 	}
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
@@ -471,6 +479,8 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
 	if (!sent)
 		return;
 
+	hci_dev_lock(hdev);
+
 	if (!status) {
 		if (sent->support)
 			hdev->features[1][0] |= LMP_HOST_SC;
@@ -486,6 +496,8 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
 		else
 			clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
 	}
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1135,6 +1147,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 	if (!cp)
 		return;
 
+	hci_dev_lock(hdev);
+
 	switch (cp->enable) {
 	case LE_SCAN_ENABLE:
 		set_bit(HCI_LE_SCAN, &hdev->dev_flags);
@@ -1184,6 +1198,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 		BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable);
 		break;
 	}
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
@@ -1278,6 +1294,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
 	if (!sent)
 		return;
 
+	hci_dev_lock(hdev);
+
 	if (sent->le) {
 		hdev->features[1][0] |= LMP_HOST_LE;
 		set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
@@ -1291,6 +1309,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
 		hdev->features[1][0] |= LMP_HOST_LE_BREDR;
 	else
 		hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a2b6dfa3..d04dc00 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6966,8 +6966,9 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
 	    test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags))
 		conn->local_fixed_chan |= L2CAP_FC_A2MP;
 
-	if (bredr_sc_enabled(hcon->hdev) &&
-	    test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
+	if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) &&
+	    (bredr_sc_enabled(hcon->hdev) ||
+	     test_bit(HCI_FORCE_LESC, &hcon->hdev->dbg_flags)))
 		conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
 
 	mutex_init(&conn->ident_lock);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7384f11..693ce8b 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2199,12 +2199,14 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
+	hci_dev_lock(hdev);
+
 	if (status) {
 		u8 mgmt_err = mgmt_status(status);
 
 		mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp,
 				     &mgmt_err);
-		return;
+		goto unlock;
 	}
 
 	mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match);
@@ -2222,17 +2224,16 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
 	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
 		struct hci_request req;
 
-		hci_dev_lock(hdev);
-
 		hci_req_init(&req, hdev);
 		update_adv_data(&req);
 		update_scan_rsp_data(&req);
 		hci_req_run(&req, NULL);
 
 		hci_update_background_scan(hdev);
-
-		hci_dev_unlock(hdev);
 	}
+
+unlock:
+	hci_dev_unlock(hdev);
 }
 
 static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
@@ -3114,14 +3115,13 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
 	conn->disconn_cfm_cb = NULL;
 
 	hci_conn_drop(conn);
-	hci_conn_put(conn);
-
-	mgmt_pending_remove(cmd);
 
 	/* The device is paired so there is no need to remove
 	 * its connection parameters anymore.
 	 */
 	clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
+
+	hci_conn_put(conn);
 }
 
 void mgmt_smp_complete(struct hci_conn *conn, bool complete)
@@ -3130,8 +3130,10 @@ void mgmt_smp_complete(struct hci_conn *conn, bool complete)
 	struct pending_cmd *cmd;
 
 	cmd = find_pairing(conn);
-	if (cmd)
+	if (cmd) {
 		cmd->cmd_complete(cmd, status);
+		mgmt_pending_remove(cmd);
+	}
 }
 
 static void pairing_complete_cb(struct hci_conn *conn, u8 status)
@@ -3141,10 +3143,13 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status)
 	BT_DBG("status %u", status);
 
 	cmd = find_pairing(conn);
-	if (!cmd)
+	if (!cmd) {
 		BT_DBG("Unable to find a pending command");
-	else
-		cmd->cmd_complete(cmd, mgmt_status(status));
+		return;
+	}
+
+	cmd->cmd_complete(cmd, mgmt_status(status));
+	mgmt_pending_remove(cmd);
 }
 
 static void le_pairing_complete_cb(struct hci_conn *conn, u8 status)
@@ -3157,10 +3162,13 @@ static void le_pairing_complete_cb(struct hci_conn *conn, u8 status)
 		return;
 
 	cmd = find_pairing(conn);
-	if (!cmd)
+	if (!cmd) {
 		BT_DBG("Unable to find a pending command");
-	else
-		cmd->cmd_complete(cmd, mgmt_status(status));
+		return;
+	}
+
+	cmd->cmd_complete(cmd, mgmt_status(status));
+	mgmt_pending_remove(cmd);
 }
 
 static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -3274,8 +3282,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	cmd->user_data = hci_conn_get(conn);
 
 	if ((conn->state == BT_CONNECTED || conn->state == BT_CONFIG) &&
-	    hci_conn_security(conn, sec_level, auth_type, true))
-		pairing_complete(cmd, 0);
+	    hci_conn_security(conn, sec_level, auth_type, true)) {
+		cmd->cmd_complete(cmd, 0);
+		mgmt_pending_remove(cmd);
+	}
 
 	err = 0;
 
@@ -3317,7 +3327,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 		goto unlock;
 	}
 
-	pairing_complete(cmd, MGMT_STATUS_CANCELLED);
+	cmd->cmd_complete(cmd, MGMT_STATUS_CANCELLED);
+	mgmt_pending_remove(cmd);
 
 	err = cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0,
 			   addr, sizeof(*addr));
@@ -3791,7 +3802,7 @@ static bool trigger_discovery(struct hci_request *req, u8 *status)
 
 		/* All active scans will be done with either a resolvable
 		 * private address (when privacy feature has been enabled)
-		 * or unresolvable private address.
+		 * or non-resolvable private address.
 		 */
 		err = hci_update_random_address(req, true, &own_addr_type);
 		if (err < 0) {
@@ -4279,12 +4290,14 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
+	hci_dev_lock(hdev);
+
 	if (status) {
 		u8 mgmt_err = mgmt_status(status);
 
 		mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev,
 				     cmd_status_rsp, &mgmt_err);
-		return;
+		goto unlock;
 	}
 
 	if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
@@ -4299,6 +4312,9 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status)
 
 	if (match.sk)
 		sock_put(match.sk);
+
+unlock:
+	hci_dev_unlock(hdev);
 }
 
 static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -6081,6 +6097,11 @@ static int powered_update_hci(struct hci_dev *hdev)
 		hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
 	}
 
+	if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
+		u8 sc = 0x01;
+		hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, sizeof(sc), &sc);
+	}
+
 	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
 	    lmp_bredr_capable(hdev)) {
 		struct hci_cp_write_le_host_supported cp;
@@ -6130,8 +6151,7 @@ static int powered_update_hci(struct hci_dev *hdev)
 int mgmt_powered(struct hci_dev *hdev, u8 powered)
 {
 	struct cmd_lookup match = { NULL, hdev };
-	u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
-	u8 zero_cod[] = { 0, 0, 0 };
+	u8 status, zero_cod[] = { 0, 0, 0 };
 	int err;
 
 	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -6147,7 +6167,20 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
 	}
 
 	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
-	mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status_not_powered);
+
+	/* If the power off is because of hdev unregistration let
+	 * use the appropriate INVALID_INDEX status. Otherwise use
+	 * NOT_POWERED. We cover both scenarios here since later in
+	 * mgmt_index_removed() any hci_conn callbacks will have already
+	 * been triggered, potentially causing misleading DISCONNECTED
+	 * status responses.
+	 */
+	if (test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+		status = MGMT_STATUS_INVALID_INDEX;
+	else
+		status = MGMT_STATUS_NOT_POWERED;
+
+	mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
 
 	if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
 		mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -6681,8 +6714,10 @@ void mgmt_auth_failed(struct hci_conn *conn, u8 hci_status)
 	mgmt_event(MGMT_EV_AUTH_FAILED, conn->hdev, &ev, sizeof(ev),
 		    cmd ? cmd->sk : NULL);
 
-	if (cmd)
-		pairing_complete(cmd, status);
+	if (cmd) {
+		cmd->cmd_complete(cmd, status);
+		mgmt_pending_remove(cmd);
+	}
 }
 
 void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
@@ -7046,13 +7081,15 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		 * kept and checking possible scan response data
 		 * will be skipped.
 		 */
-		if (hdev->discovery.uuid_count > 0) {
+		if (hdev->discovery.uuid_count > 0)
 			match = eir_has_uuids(eir, eir_len,
 					      hdev->discovery.uuid_count,
 					      hdev->discovery.uuids);
-			if (!match)
-				return;
-		}
+		else
+			match = true;
+
+		if (!match && !scan_rsp_len)
+			return;
 
 		/* Copy EIR or advertising data into event */
 		memcpy(ev->eir, eir, eir_len);
@@ -7061,8 +7098,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		 * provided, results with empty EIR or advertising data
 		 * should be dropped since they do not match any UUID.
 		 */
-		if (hdev->discovery.uuid_count > 0)
+		if (hdev->discovery.uuid_count > 0 && !scan_rsp_len)
 			return;
+
+		match = false;
 	}
 
 	if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV))
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 6a46252..b67749b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1673,7 +1673,8 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	/* SMP over BR/EDR requires special treatment */
 	if (conn->hcon->type == ACL_LINK) {
 		/* We must have a BR/EDR SC link */
-		if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags))
+		if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) &&
+		    !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
 			return SMP_CROSS_TRANSP_NOT_ALLOWED;
 
 		set_bit(SMP_FLAG_SC, &smp->flags);
@@ -2927,7 +2928,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 	tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, 0);
 	if (IS_ERR(tfm_aes)) {
 		BT_ERR("Unable to create crypto context");
-		return ERR_PTR(PTR_ERR(tfm_aes));
+		return ERR_CAST(tfm_aes);
 	}
 
 create_chan:
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 7e38b72..1584581 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
 
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/messenger.h>
 
 #include "crypto.h"
 #include "auth_x.h"
@@ -293,6 +294,11 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	dout("build_authorizer for %s %p\n",
 	     ceph_entity_type_name(th->service), au);
 
+	ceph_crypto_key_destroy(&au->session_key);
+	ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
+	if (ret)
+		return ret;
+
 	maxlen = sizeof(*msg_a) + sizeof(msg_b) +
 		ceph_x_encrypt_buflen(ticket_blob_len);
 	dout("  need len %d\n", maxlen);
@@ -302,8 +308,10 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	}
 	if (!au->buf) {
 		au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
-		if (!au->buf)
+		if (!au->buf) {
+			ceph_crypto_key_destroy(&au->session_key);
 			return -ENOMEM;
+		}
 	}
 	au->service = th->service;
 	au->secret_id = th->secret_id;
@@ -329,7 +337,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	get_random_bytes(&au->nonce, sizeof(au->nonce));
 	msg_b.struct_v = 1;
 	msg_b.nonce = cpu_to_le64(au->nonce);
-	ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
+	ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
 			     p, end - p);
 	if (ret < 0)
 		goto out_buf;
@@ -560,6 +568,8 @@ static int ceph_x_create_authorizer(
 	auth->authorizer_buf_len = au->buf->vec.iov_len;
 	auth->authorizer_reply_buf = au->reply_buf;
 	auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+	auth->sign_message = ac->ops->sign_message;
+	auth->check_message_signature = ac->ops->check_message_signature;
 
 	return 0;
 }
@@ -588,17 +598,13 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
 					  struct ceph_authorizer *a, size_t len)
 {
 	struct ceph_x_authorizer *au = (void *)a;
-	struct ceph_x_ticket_handler *th;
 	int ret = 0;
 	struct ceph_x_authorize_reply reply;
 	void *preply = &reply;
 	void *p = au->reply_buf;
 	void *end = p + sizeof(au->reply_buf);
 
-	th = get_ticket_handler(ac, au->service);
-	if (IS_ERR(th))
-		return PTR_ERR(th);
-	ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
+	ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply));
 	if (ret < 0)
 		return ret;
 	if (ret != sizeof(reply))
@@ -618,6 +624,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
 {
 	struct ceph_x_authorizer *au = (void *)a;
 
+	ceph_crypto_key_destroy(&au->session_key);
 	ceph_buffer_put(au->buf);
 	kfree(au);
 }
@@ -663,6 +670,59 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
 		memset(&th->validity, 0, sizeof(th->validity));
 }
 
+static int calcu_signature(struct ceph_x_authorizer *au,
+			   struct ceph_msg *msg, __le64 *sig)
+{
+	int ret;
+	char tmp_enc[40];
+	__le32 tmp[5] = {
+		16u, msg->hdr.crc, msg->footer.front_crc,
+		msg->footer.middle_crc, msg->footer.data_crc,
+	};
+	ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp),
+			     tmp_enc, sizeof(tmp_enc));
+	if (ret < 0)
+		return ret;
+	*sig = *(__le64*)(tmp_enc + 4);
+	return 0;
+}
+
+static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
+			       struct ceph_msg *msg)
+{
+	int ret;
+	if (!auth->authorizer)
+		return 0;
+	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+			      msg, &msg->footer.sig);
+	if (ret < 0)
+		return ret;
+	msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED;
+	return 0;
+}
+
+static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
+					  struct ceph_msg *msg)
+{
+	__le64 sig_check;
+	int ret;
+
+	if (!auth->authorizer)
+		return 0;
+	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+			      msg, &sig_check);
+	if (ret < 0)
+		return ret;
+	if (sig_check == msg->footer.sig)
+		return 0;
+	if (msg->footer.flags & CEPH_MSG_FOOTER_SIGNED)
+		dout("ceph_x_check_message_signature %p has signature %llx "
+		     "expect %llx\n", msg, msg->footer.sig, sig_check);
+	else
+		dout("ceph_x_check_message_signature %p sender did not set "
+		     "CEPH_MSG_FOOTER_SIGNED\n", msg);
+	return -EBADMSG;
+}
 
 static const struct ceph_auth_client_ops ceph_x_ops = {
 	.name = "x",
@@ -677,6 +737,8 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
 	.invalidate_authorizer = ceph_x_invalidate_authorizer,
 	.reset =  ceph_x_reset,
 	.destroy = ceph_x_destroy,
+	.sign_message = ceph_x_sign_message,
+	.check_message_signature = ceph_x_check_message_signature,
 };
 
 
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 65ee720..e8b7c69 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -26,6 +26,7 @@ struct ceph_x_ticket_handler {
 
 
 struct ceph_x_authorizer {
+	struct ceph_crypto_key session_key;
 	struct ceph_buffer *buf;
 	unsigned int service;
 	u64 nonce;
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 621b5f6..add5f92 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -6,7 +6,7 @@
 
 #include <linux/ceph/buffer.h>
 #include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */
+#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
 
 struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
 {
@@ -35,7 +35,7 @@ void ceph_buffer_release(struct kref *kref)
 	struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
 
 	dout("buffer_release %p\n", b);
-	ceph_kvfree(b->vec.iov_base);
+	kvfree(b->vec.iov_base);
 	kfree(b);
 }
 EXPORT_SYMBOL(ceph_buffer_release);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 58fbfe1..5d5ab67 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -184,14 +184,6 @@ void *ceph_kvmalloc(size_t size, gfp_t flags)
 	return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
 }
 
-void ceph_kvfree(const void *ptr)
-{
-	if (is_vmalloc_addr(ptr))
-		vfree(ptr);
-	else
-		kfree(ptr);
-}
-
 
 static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 {
@@ -245,6 +237,8 @@ enum {
 	Opt_noshare,
 	Opt_crc,
 	Opt_nocrc,
+	Opt_cephx_require_signatures,
+	Opt_nocephx_require_signatures,
 };
 
 static match_table_t opt_tokens = {
@@ -263,6 +257,8 @@ static match_table_t opt_tokens = {
 	{Opt_noshare, "noshare"},
 	{Opt_crc, "crc"},
 	{Opt_nocrc, "nocrc"},
+	{Opt_cephx_require_signatures, "cephx_require_signatures"},
+	{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
 	{-1, NULL}
 };
 
@@ -461,6 +457,12 @@ ceph_parse_options(char *options, const char *dev_name,
 		case Opt_nocrc:
 			opt->flags |= CEPH_OPT_NOCRC;
 			break;
+		case Opt_cephx_require_signatures:
+			opt->flags &= ~CEPH_OPT_NOMSGAUTH;
+			break;
+		case Opt_nocephx_require_signatures:
+			opt->flags |= CEPH_OPT_NOMSGAUTH;
+			break;
 
 		default:
 			BUG_ON(token);
@@ -504,6 +506,9 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
 	init_waitqueue_head(&client->auth_wq);
 	client->auth_err = 0;
 
+	if (!ceph_test_opt(client, NOMSGAUTH))
+		required_features |= CEPH_FEATURE_MSG_AUTH;
+
 	client->extra_mon_dispatch = NULL;
 	client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
 		supported_features;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8d1653c..33a2f20 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1196,8 +1196,18 @@ static void prepare_write_message_footer(struct ceph_connection *con)
 	dout("prepare_write_message_footer %p\n", con);
 	con->out_kvec_is_msg = true;
 	con->out_kvec[v].iov_base = &m->footer;
-	con->out_kvec[v].iov_len = sizeof(m->footer);
-	con->out_kvec_bytes += sizeof(m->footer);
+	if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
+		if (con->ops->sign_message)
+			con->ops->sign_message(con, m);
+		else
+			m->footer.sig = 0;
+		con->out_kvec[v].iov_len = sizeof(m->footer);
+		con->out_kvec_bytes += sizeof(m->footer);
+	} else {
+		m->old_footer.flags = m->footer.flags;
+		con->out_kvec[v].iov_len = sizeof(m->old_footer);
+		con->out_kvec_bytes += sizeof(m->old_footer);
+	}
 	con->out_kvec_left++;
 	con->out_more = m->more_to_follow;
 	con->out_msg_done = true;
@@ -2249,6 +2259,7 @@ static int read_partial_message(struct ceph_connection *con)
 	int ret;
 	unsigned int front_len, middle_len, data_len;
 	bool do_datacrc = !con->msgr->nocrc;
+	bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
 	u64 seq;
 	u32 crc;
 
@@ -2361,12 +2372,21 @@ static int read_partial_message(struct ceph_connection *con)
 	}
 
 	/* footer */
-	size = sizeof (m->footer);
+	if (need_sign)
+		size = sizeof(m->footer);
+	else
+		size = sizeof(m->old_footer);
+
 	end += size;
 	ret = read_partial(con, end, size, &m->footer);
 	if (ret <= 0)
 		return ret;
 
+	if (!need_sign) {
+		m->footer.flags = m->old_footer.flags;
+		m->footer.sig = 0;
+	}
+
 	dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
 	     m, front_len, m->footer.front_crc, middle_len,
 	     m->footer.middle_crc, data_len, m->footer.data_crc);
@@ -2390,6 +2410,12 @@ static int read_partial_message(struct ceph_connection *con)
 		return -EBADMSG;
 	}
 
+	if (need_sign && con->ops->check_message_signature &&
+	    con->ops->check_message_signature(con, m)) {
+		pr_err("read_partial_message %p signature check failed\n", m);
+		return -EBADMSG;
+	}
+
 	return 1; /* done! */
 }
 
@@ -3288,7 +3314,7 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
 static void ceph_msg_free(struct ceph_msg *m)
 {
 	dout("%s %p\n", __func__, m);
-	ceph_kvfree(m->front.iov_base);
+	kvfree(m->front.iov_base);
 	kmem_cache_free(ceph_msg_cache, m);
 }
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6f16428..53299c7 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -292,6 +292,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 		ceph_osd_data_release(&op->cls.request_data);
 		ceph_osd_data_release(&op->cls.response_data);
 		break;
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_CMPXATTR:
+		ceph_osd_data_release(&op->xattr.osd_data);
+		break;
 	default:
 		break;
 	}
@@ -476,8 +480,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
 	size_t payload_len = 0;
 
 	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
-	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
-	       opcode != CEPH_OSD_OP_TRUNCATE);
+	       opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE);
 
 	op->extent.offset = offset;
 	op->extent.length = length;
@@ -545,6 +548,39 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
+int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
+			  u16 opcode, const char *name, const void *value,
+			  size_t size, u8 cmp_op, u8 cmp_mode)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	struct ceph_pagelist *pagelist;
+	size_t payload_len;
+
+	BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
+
+	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	if (!pagelist)
+		return -ENOMEM;
+
+	ceph_pagelist_init(pagelist);
+
+	payload_len = strlen(name);
+	op->xattr.name_len = payload_len;
+	ceph_pagelist_append(pagelist, name, payload_len);
+
+	op->xattr.value_len = size;
+	ceph_pagelist_append(pagelist, value, size);
+	payload_len += size;
+
+	op->xattr.cmp_op = cmp_op;
+	op->xattr.cmp_mode = cmp_mode;
+
+	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
+	op->payload_len = payload_len;
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_op_xattr_init);
+
 void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
 				unsigned int which, u16 opcode,
 				u64 cookie, u64 version, int flag)
@@ -626,7 +662,6 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 	case CEPH_OSD_OP_READ:
 	case CEPH_OSD_OP_WRITE:
 	case CEPH_OSD_OP_ZERO:
-	case CEPH_OSD_OP_DELETE:
 	case CEPH_OSD_OP_TRUNCATE:
 		if (src->op == CEPH_OSD_OP_WRITE)
 			request_data_len = src->extent.length;
@@ -676,6 +711,19 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 		dst->alloc_hint.expected_write_size =
 		    cpu_to_le64(src->alloc_hint.expected_write_size);
 		break;
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_CMPXATTR:
+		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
+		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
+		dst->xattr.cmp_op = src->xattr.cmp_op;
+		dst->xattr.cmp_mode = src->xattr.cmp_mode;
+		osd_data = &src->xattr.osd_data;
+		ceph_osdc_msg_data_add(req->r_request, osd_data);
+		request_data_len = osd_data->pagelist->length;
+		break;
+	case CEPH_OSD_OP_CREATE:
+	case CEPH_OSD_OP_DELETE:
+		break;
 	default:
 		pr_err("unsupported osd opcode %s\n",
 			ceph_osd_op_name(src->op));
@@ -705,7 +753,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       struct ceph_file_layout *layout,
 					       struct ceph_vino vino,
-					       u64 off, u64 *plen, int num_ops,
+					       u64 off, u64 *plen,
+					       unsigned int which, int num_ops,
 					       int opcode, int flags,
 					       struct ceph_snap_context *snapc,
 					       u32 truncate_seq,
@@ -716,13 +765,11 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	u64 objnum = 0;
 	u64 objoff = 0;
 	u64 objlen = 0;
-	u32 object_size;
-	u64 object_base;
 	int r;
 
 	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
-	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
-	       opcode != CEPH_OSD_OP_TRUNCATE);
+	       opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
+	       opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
 
 	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
 					GFP_NOFS);
@@ -738,29 +785,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 		return ERR_PTR(r);
 	}
 
-	object_size = le32_to_cpu(layout->fl_object_size);
-	object_base = off - objoff;
-	if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
-		if (truncate_size <= object_base) {
-			truncate_size = 0;
-		} else {
-			truncate_size -= object_base;
-			if (truncate_size > object_size)
-				truncate_size = object_size;
+	if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
+		osd_req_op_init(req, which, opcode);
+	} else {
+		u32 object_size = le32_to_cpu(layout->fl_object_size);
+		u32 object_base = off - objoff;
+		if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+			if (truncate_size <= object_base) {
+				truncate_size = 0;
+			} else {
+				truncate_size -= object_base;
+				if (truncate_size > object_size)
+					truncate_size = object_size;
+			}
 		}
+		osd_req_op_extent_init(req, which, opcode, objoff, objlen,
+				       truncate_size, truncate_seq);
 	}
 
-	osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
-				truncate_size, truncate_seq);
-
-	/*
-	 * A second op in the ops array means the caller wants to
-	 * also issue a include a 'startsync' command so that the
-	 * osd will flush data quickly.
-	 */
-	if (num_ops > 1)
-		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
-
 	req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout);
 
 	snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name),
@@ -2626,7 +2668,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 
 	dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
 	     vino.snap, off, *plen);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
+	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
 				    NULL, truncate_seq, truncate_size,
 				    false);
@@ -2669,7 +2711,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 	int page_align = off & ~PAGE_MASK;
 
 	BUG_ON(vino.snap != CEPH_NOSNAP);	/* snapshots aren't writeable */
-	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
+	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
 				    CEPH_OSD_OP_WRITE,
 				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
 				    snapc, truncate_seq, truncate_size,
@@ -2920,6 +2962,20 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_auth_handshake *auth = &o->o_auth;
+	return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_auth_handshake *auth = &o->o_auth;
+	return ceph_auth_check_message_signature(auth, msg);
+}
+
 static const struct ceph_connection_operations osd_con_ops = {
 	.get = get_osd_con,
 	.put = put_osd_con,
@@ -2928,5 +2984,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
+	.sign_message = sign_message,
+	.check_message_signature = check_message_signature,
 	.fault = osd_reset,
 };
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d06107d..9cf6fe9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2368,6 +2368,11 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
 		return err;
 	}
 
+	if (vid) {
+		pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+		return err;
+	}
+
 	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
 		err = dev_uc_add_excl(dev, addr);
 	else if (is_multicast_ether_addr(addr))
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index a457232..95e47c9 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -159,6 +159,15 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs)
 	}
 }
 
+static void geneve_notify_del_rx_port(struct geneve_sock *gs)
+{
+	struct sock *sk = gs->sock->sk;
+	sa_family_t sa_family = sk->sk_family;
+
+	if (sa_family == AF_INET)
+		udp_del_offload(&gs->udp_offloads);
+}
+
 /* Callback from net/ipv4/udp.c to receive packets */
 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
@@ -287,6 +296,7 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
 				    geneve_rcv_t *rcv, void *data,
 				    bool no_share, bool ipv6)
 {
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
 	gs = geneve_socket_create(net, port, rcv, data, ipv6);
@@ -296,15 +306,15 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
 	if (no_share)	/* Return error if sharing is not allowed. */
 		return ERR_PTR(-EINVAL);
 
+	spin_lock(&gn->sock_lock);
 	gs = geneve_find_sock(net, port);
-	if (gs) {
-		if (gs->rcv == rcv)
-			atomic_inc(&gs->refcnt);
-		else
+	if (gs && ((gs->rcv != rcv) ||
+		   !atomic_add_unless(&gs->refcnt, 1, 0)))
 			gs = ERR_PTR(-EBUSY);
-	} else {
+	spin_unlock(&gn->sock_lock);
+
+	if (!gs)
 		gs = ERR_PTR(-EINVAL);
-	}
 
 	return gs;
 }
@@ -312,9 +322,17 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
 
 void geneve_sock_release(struct geneve_sock *gs)
 {
+	struct net *net = sock_net(gs->sock->sk);
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+
 	if (!atomic_dec_and_test(&gs->refcnt))
 		return;
 
+	spin_lock(&gn->sock_lock);
+	hlist_del_rcu(&gs->hlist);
+	geneve_notify_del_rx_port(gs);
+	spin_unlock(&gn->sock_lock);
+
 	queue_work(geneve_wq, &gs->del_work);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ac84912..4f4bf5b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -252,10 +252,6 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr *tnl_params;
 
-	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
-	if (IS_ERR(skb))
-		goto out;
-
 	if (dev->header_ops) {
 		/* Need space for new headers */
 		if (skb_cow_head(skb, dev->needed_headroom -
@@ -268,6 +264,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 		 * to gre header.
 		 */
 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+		skb_reset_mac_header(skb);
 	} else {
 		if (skb_cow_head(skb, dev->needed_headroom))
 			goto free_skb;
@@ -275,6 +272,10 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 		tnl_params = &tunnel->parms.iph;
 	}
 
+	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
+	if (IS_ERR(skb))
+		goto out;
+
 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
 
 	return NETDEV_TX_OK;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 63e745a..d3e4479 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -514,6 +514,9 @@ const struct ip_tunnel_encap_ops __rcu *
 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
 			    unsigned int num)
 {
+	if (num >= MAX_IPTUN_ENCAP_OPS)
+		return -ERANGE;
+
 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
 			&iptun_encaps[num],
 			NULL, ops) ? 0 : -1;
@@ -525,6 +528,9 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
 {
 	int ret;
 
+	if (num >= MAX_IPTUN_ENCAP_OPS)
+		return -ERANGE;
+
 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
 		       &iptun_encaps[num],
 		       ops, NULL) == ops) ? 0 : -1;
@@ -567,6 +573,9 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 	if (t->encap.type == TUNNEL_ENCAP_NONE)
 		return 0;
 
+	if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+		return -EINVAL;
+
 	rcu_read_lock();
 	ops = rcu_dereference(iptun_encaps[t->encap.type]);
 	if (likely(ops && ops->build_header))
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5d6dae9..da1c12c 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1011,6 +1011,10 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
 
 	ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
 
+	ieee80211_recalc_smps_chanctx(local, new_ctx);
+	ieee80211_recalc_radar_chanctx(local, new_ctx);
+	ieee80211_recalc_chanctx_min_def(local, new_ctx);
+
 	if (changed)
 		ieee80211_bss_info_change_notify(sdata, changed);
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 434a91a..0bb7038 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -656,7 +656,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
 	int i;
 
 	mutex_lock(&local->key_mtx);
-	for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+	for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) {
 		key = key_mtx_dereference(local, sta->gtk[i]);
 		if (!key)
 			continue;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 75a9bf5..2c36c47 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -174,6 +174,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 	if (!(ht_cap->cap_info &
 	      cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
 		ret = IEEE80211_STA_DISABLE_40MHZ;
+		vht_chandef = *chandef;
 		goto out;
 	}
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 49c23bd..683b10f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1761,14 +1761,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	frag = sc & IEEE80211_SCTL_FRAG;
 
-	if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
-		goto out;
-
 	if (is_multicast_ether_addr(hdr->addr1)) {
 		rx->local->dot11MulticastReceivedFrameCount++;
-		goto out;
+		goto out_no_led;
 	}
 
+	if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
+		goto out;
+
 	I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
 	if (skb_linearize(rx->skb))
@@ -1859,9 +1859,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	status->rx_flags |= IEEE80211_RX_FRAGMENTED;
 
  out:
+	ieee80211_led_rx(rx->local);
+ out_no_led:
 	if (rx->sta)
 		rx->sta->rx_packets++;
-	ieee80211_led_rx(rx->local);
 	return RX_CONTINUE;
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ef5f77b..074cf3e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -525,14 +525,14 @@ out:
 	return err;
 }
 
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
 {
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
 	struct page *p_start, *p_end;
 
 	/* First page is flushed through netlink_{get,set}_status */
 	p_start = pgvec_to_page(hdr + PAGE_SIZE);
-	p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+	p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
 	while (p_start <= p_end) {
 		flush_dcache_page(p_start);
 		p_start++;
@@ -550,9 +550,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
 static void netlink_set_status(struct nl_mmap_hdr *hdr,
 			       enum nl_mmap_status status)
 {
+	smp_mb();
 	hdr->nm_status = status;
 	flush_dcache_page(pgvec_to_page(hdr));
-	smp_wmb();
 }
 
 static struct nl_mmap_hdr *
@@ -714,24 +714,16 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 	struct nl_mmap_hdr *hdr;
 	struct sk_buff *skb;
 	unsigned int maxlen;
-	bool excl = true;
 	int err = 0, len = 0;
 
-	/* Netlink messages are validated by the receiver before processing.
-	 * In order to avoid userspace changing the contents of the message
-	 * after validation, the socket and the ring may only be used by a
-	 * single process, otherwise we fall back to copying.
-	 */
-	if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
-	    atomic_read(&nlk->mapped) > 1)
-		excl = false;
-
 	mutex_lock(&nlk->pg_vec_lock);
 
 	ring   = &nlk->tx_ring;
 	maxlen = ring->frame_size - NL_MMAP_HDRLEN;
 
 	do {
+		unsigned int nm_len;
+
 		hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
 		if (hdr == NULL) {
 			if (!(msg->msg_flags & MSG_DONTWAIT) &&
@@ -739,35 +731,23 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 				schedule();
 			continue;
 		}
-		if (hdr->nm_len > maxlen) {
+
+		nm_len = ACCESS_ONCE(hdr->nm_len);
+		if (nm_len > maxlen) {
 			err = -EINVAL;
 			goto out;
 		}
 
-		netlink_frame_flush_dcache(hdr);
+		netlink_frame_flush_dcache(hdr, nm_len);
 
-		if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
-			skb = alloc_skb_head(GFP_KERNEL);
-			if (skb == NULL) {
-				err = -ENOBUFS;
-				goto out;
-			}
-			sock_hold(sk);
-			netlink_ring_setup_skb(skb, sk, ring, hdr);
-			NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
-			__skb_put(skb, hdr->nm_len);
-			netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
-			atomic_inc(&ring->pending);
-		} else {
-			skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
-			if (skb == NULL) {
-				err = -ENOBUFS;
-				goto out;
-			}
-			__skb_put(skb, hdr->nm_len);
-			memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
-			netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+		skb = alloc_skb(nm_len, GFP_KERNEL);
+		if (skb == NULL) {
+			err = -ENOBUFS;
+			goto out;
 		}
+		__skb_put(skb, nm_len);
+		memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+		netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
 
 		netlink_increment_head(ring);
 
@@ -813,7 +793,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
 	hdr->nm_pid	= NETLINK_CB(skb).creds.pid;
 	hdr->nm_uid	= from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
 	hdr->nm_gid	= from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
-	netlink_frame_flush_dcache(hdr);
+	netlink_frame_flush_dcache(hdr, hdr->nm_len);
 	netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
 
 	NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
diff --git a/net/rds/message.c b/net/rds/message.c
index ff22022..5a21e6f 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -325,7 +325,8 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
 	copied = 0;
 
 	while (iov_iter_count(to) && copied < len) {
-		to_copy = min(iov_iter_count(to), sg->length - vec_off);
+		to_copy = min_t(unsigned long, iov_iter_count(to),
+				sg->length - vec_off);
 		to_copy = min_t(unsigned long, to_copy, len - copied);
 
 		rds_stats_add(s_copy_to_user, to_copy);
diff --git a/net/socket.c b/net/socket.c
index 70bbde6..a2c33a4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -372,7 +372,6 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 	path.mnt = mntget(sock_mnt);
 
 	d_instantiate(path.dentry, SOCK_INODE(sock));
-	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
 	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 85506f1d..7aaf741 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -603,7 +603,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 {
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
-	u32 width, control_freq;
+	u32 width, control_freq, cap;
 
 	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
 		return false;
@@ -643,7 +643,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 			return false;
 		break;
 	case NL80211_CHAN_WIDTH_80P80:
-		if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ))
+		cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+		if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
 			return false;
 	case NL80211_CHAN_WIDTH_80:
 		if (!vht_cap->vht_supported)
@@ -654,7 +655,9 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 	case NL80211_CHAN_WIDTH_160:
 		if (!vht_cap->vht_supported)
 			return false;
-		if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ))
+		cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+		if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
+		    cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
 			return false;
 		prohibited_flags |= IEEE80211_CHAN_NO_160MHZ;
 		width = 160;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a17d6bc..7ca4b51 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6002,7 +6002,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 		}
 
 		/* there was no other matchset, so the RSSI one is alone */
-		if (i == 0)
+		if (i == 0 && n_match_sets)
 			request->match_sets[0].rssi_thold = default_match_rssi;
 
 		request->min_rssi_thold = INT_MAX;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 47be616..7b83098 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1549,9 +1549,15 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
 		ret = cfg80211_reg_can_beacon(wiphy,
 					      &wdev->chandef, wdev->iftype);
 		break;
+	case NL80211_IFTYPE_ADHOC:
+		if (!wdev->ssid_len)
+			goto out;
+
+		ret = cfg80211_reg_can_beacon(wiphy,
+					      &wdev->chandef, wdev->iftype);
+		break;
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
-	case NL80211_IFTYPE_ADHOC:
 		if (!wdev->current_bss ||
 		    !wdev->current_bss->pub.channel)
 			goto out;
@@ -1907,7 +1913,7 @@ static enum reg_request_treatment
 reg_process_hint_driver(struct wiphy *wiphy,
 			struct regulatory_request *driver_request)
 {
-	const struct ieee80211_regdomain *regd;
+	const struct ieee80211_regdomain *regd, *tmp;
 	enum reg_request_treatment treatment;
 
 	treatment = __reg_process_hint_driver(driver_request);
@@ -1927,7 +1933,10 @@ reg_process_hint_driver(struct wiphy *wiphy,
 			reg_free_request(driver_request);
 			return REG_REQ_IGNORE;
 		}
+
+		tmp = get_wiphy_regdom(wiphy);
 		rcu_assign_pointer(wiphy->regd, regd);
+		rcu_free_regdom(tmp);
 	}
 
 
@@ -1986,11 +1995,8 @@ __reg_process_hint_country_ie(struct wiphy *wiphy,
 			return REG_REQ_IGNORE;
 		return REG_REQ_ALREADY_SET;
 	}
-	/*
-	 * Two consecutive Country IE hints on the same wiphy.
-	 * This should be picked up early by the driver/stack
-	 */
-	if (WARN_ON(regdom_changes(country_ie_request->alpha2)))
+
+	if (regdom_changes(country_ie_request->alpha2))
 		return REG_REQ_OK;
 	return REG_REQ_ALREADY_SET;
 }
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 5374b1b..edd2794 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -185,6 +185,18 @@ modbuiltin := -f $(srctree)/scripts/Makefile.modbuiltin obj
 # $(Q)$(MAKE) $(dtbinst)=dir
 dtbinst := -f $(if $(KBUILD_SRC),$(srctree)/)scripts/Makefile.dtbinst obj
 
+###
+# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=
+# Usage:
+# $(Q)$(MAKE) $(clean)=dir
+clean := -f $(srctree)/scripts/Makefile.clean obj
+
+###
+# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.headersinst obj=
+# Usage:
+# $(Q)$(MAKE) $(hdr-inst)=dir
+hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
+
 # Prefix -I with $(srctree) if it is not an absolute path.
 # skip if -I has no parameter
 addtree = $(if $(patsubst -I%,%,$(1)), \
diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean
index b1c668d..1bca180 100644
--- a/scripts/Makefile.clean
+++ b/scripts/Makefile.clean
@@ -7,10 +7,7 @@ src := $(obj)
 PHONY := __clean
 __clean:
 
-# Shorthand for $(Q)$(MAKE) scripts/Makefile.clean obj=dir
-# Usage:
-# $(Q)$(MAKE) $(clean)=dir
-clean := -f $(srctree)/scripts/Makefile.clean obj
+include scripts/Kbuild.include
 
 # The filename Kbuild has precedence over Makefile
 kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
@@ -91,11 +88,6 @@ PHONY += $(subdir-ymn)
 $(subdir-ymn):
 	$(Q)$(MAKE) $(clean)=$@
 
-# If quiet is set, only print short version of command
-
-cmd = @$(if $($(quiet)cmd_$(1)),echo '  $($(quiet)cmd_$(1))' &&) $(cmd_$(1))
-
-
 # Declare the contents of the .PHONY variable as phony.  We keep that
 # information in a variable se we can use it in if_changed and friends.
 
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 8ccf830..1106d6c 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -122,7 +122,6 @@ $(check-file): scripts/headers_check.pl $(output-files) FORCE
 endif
 
 # Recursion
-hdr-inst := -rR -f $(srctree)/scripts/Makefile.headersinst obj
 .PHONY: $(subdirs)
 $(subdirs):
 	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@
diff --git a/scripts/coccinelle/misc/bugon.cocci b/scripts/coccinelle/misc/bugon.cocci
index 556456c..3b7eec2 100644
--- a/scripts/coccinelle/misc/bugon.cocci
+++ b/scripts/coccinelle/misc/bugon.cocci
@@ -8,7 +8,7 @@
 // Confidence: High
 // Copyright: (C) 2014 Himangi Saraogi.  GPLv2.
 // Comments:
-// Options: --no-includes, --include-headers
+// Options: --no-includes --include-headers
 
 virtual patch
 virtual context
diff --git a/scripts/headers.sh b/scripts/headers.sh
index 95ece06..d4dc4de 100755
--- a/scripts/headers.sh
+++ b/scripts/headers.sh
@@ -19,8 +19,6 @@ for arch in ${archs}; do
 	case ${arch} in
 	um)        # no userspace export
 		;;
-	cris)      # headers export are known broken
-		;;
 	*)
 		if [ -d ${srctree}/arch/${arch} ]; then
 			do_command $1 ${arch}
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 14cea74..4dd3755 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -330,10 +330,10 @@ static void set_subtitle(void)
 	list_for_each_entry(sp, &trail, entries) {
 		if (sp->text) {
 			if (pos) {
-				pos->next = xcalloc(sizeof(*pos), 1);
+				pos->next = xcalloc(1, sizeof(*pos));
 				pos = pos->next;
 			} else {
-				subtitles = pos = xcalloc(sizeof(*pos), 1);
+				subtitles = pos = xcalloc(1, sizeof(*pos));
 			}
 			pos->text = sp->text;
 		}
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index a26cc5d..72c9dba 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -548,7 +548,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop,
 {
 	int i, j;
 	struct menu *submenu[8], *menu, *location = NULL;
-	struct jump_key *jump;
+	struct jump_key *jump = NULL;
 
 	str_printf(r, _("Prompt: %s\n"), _(prop->text));
 	menu = prop->menu->parent;
@@ -586,7 +586,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop,
 		str_printf(r, _("  Location:\n"));
 		for (j = 4; --i >= 0; j += 2) {
 			menu = submenu[i];
-			if (head && location && menu == location)
+			if (jump && menu == location)
 				jump->offset = strlen(r->s);
 			str_printf(r, "%*c-> %s", j, ' ',
 				   _(menu_get_prompt(menu)));
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 1395760..d9ab94b 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -117,6 +117,7 @@ echo 'mv vmlinux.bz2 $RPM_BUILD_ROOT'"/boot/vmlinux-$KERNELRELEASE.bz2"
 echo 'mv vmlinux.orig vmlinux'
 echo "%endif"
 
+if ! $PREBUILT; then
 echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/{build,source}"
 echo "mkdir -p "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE"
 echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude=firmware --exclude .config.old --exclude .missing-syscalls.d\""
@@ -124,6 +125,7 @@ echo "tar "'$EXCLUDES'" -cf- . | (cd "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNEL
 echo 'cd $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE"
 echo "ln -sf /usr/src/kernels/$KERNELRELEASE build"
 echo "ln -sf /usr/src/kernels/$KERNELRELEASE source"
+fi
 
 echo ""
 echo "%clean"
@@ -151,9 +153,11 @@ echo "%files headers"
 echo '%defattr (-, root, root)'
 echo "/usr/include"
 echo ""
+if ! $PREBUILT; then
 echo "%files devel"
 echo '%defattr (-, root, root)'
 echo "/usr/src/kernels/$KERNELRELEASE"
 echo "/lib/modules/$KERNELRELEASE/build"
 echo "/lib/modules/$KERNELRELEASE/source"
 echo ""
+fi
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index b80a93e..57515bc 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -10,7 +10,7 @@ config IMA
 	select CRYPTO_HASH_INFO
 	select TCG_TPM if HAS_IOMEM && !UML
 	select TCG_TIS if TCG_TPM && X86
-	select TCG_IBMVTPM if TCG_TPM && PPC64
+	select TCG_IBMVTPM if TCG_TPM && PPC_PSERIES
 	help
 	  The Trusted Computing Group(TCG) runtime Integrity
 	  Measurement Architecture(IMA) maintains a list of hash
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index db9675d..7bed4ad 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -1017,10 +1017,13 @@ static int __init init_encrypted(void)
 	ret = encrypted_shash_alloc();
 	if (ret < 0)
 		return ret;
+	ret = aes_get_sizes();
+	if (ret < 0)
+		goto out;
 	ret = register_key_type(&key_type_encrypted);
 	if (ret < 0)
 		goto out;
-	return aes_get_sizes();
+	return 0;
 out:
 	encrypted_shash_release();
 	return ret;
diff --git a/security/keys/key.c b/security/keys/key.c
index e17ba6a..aee2ec5 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -276,12 +276,10 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 	if (!key)
 		goto no_memory_2;
 
-	if (desc) {
-		key->index_key.desc_len = desclen;
-		key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
-		if (!key->description)
-			goto no_memory_3;
-	}
+	key->index_key.desc_len = desclen;
+	key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
+	if (!key->description)
+		goto no_memory_3;
 
 	atomic_set(&key->usage, 1);
 	init_rwsem(&key->sem);
diff --git a/sound/firewire/oxfw/oxfw-pcm.c b/sound/firewire/oxfw/oxfw-pcm.c
index 9bc556b..67ade07 100644
--- a/sound/firewire/oxfw/oxfw-pcm.c
+++ b/sound/firewire/oxfw/oxfw-pcm.c
@@ -19,7 +19,7 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params,
 		.min = UINT_MAX, .max = 0, .integer = 1
 	};
 	struct snd_oxfw_stream_formation formation;
-	unsigned int i, err;
+	int i, err;
 
 	for (i = 0; i < SND_OXFW_STREAM_FORMAT_ENTRIES; i++) {
 		if (formats[i] == NULL)
@@ -47,7 +47,7 @@ static int hw_rule_channels(struct snd_pcm_hw_params *params,
 	const struct snd_interval *r =
 		hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE);
 	struct snd_oxfw_stream_formation formation;
-	unsigned int i, j, err;
+	int i, j, err;
 	unsigned int count, list[SND_OXFW_STREAM_FORMAT_ENTRIES] = {0};
 
 	count = 0;
@@ -80,7 +80,7 @@ static int hw_rule_channels(struct snd_pcm_hw_params *params,
 static void limit_channels_and_rates(struct snd_pcm_hardware *hw, u8 **formats)
 {
 	struct snd_oxfw_stream_formation formation;
-	unsigned int i, err;
+	int i, err;
 
 	hw->channels_min = UINT_MAX;
 	hw->channels_max = 0;
diff --git a/sound/firewire/oxfw/oxfw-proc.c b/sound/firewire/oxfw/oxfw-proc.c
index 604808e..8ba4f9f 100644
--- a/sound/firewire/oxfw/oxfw-proc.c
+++ b/sound/firewire/oxfw/oxfw-proc.c
@@ -15,7 +15,7 @@ static void proc_read_formation(struct snd_info_entry *entry,
 	struct snd_oxfw_stream_formation formation, curr;
 	u8 *format;
 	char flag;
-	unsigned int i, err;
+	int i, err;
 
 	/* Show input. */
 	err = snd_oxfw_stream_get_current_formation(oxfw,
diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c
index b77cf80..bda845a 100644
--- a/sound/firewire/oxfw/oxfw-stream.c
+++ b/sound/firewire/oxfw/oxfw-stream.c
@@ -61,7 +61,8 @@ static int set_stream_format(struct snd_oxfw *oxfw, struct amdtp_stream *s,
 	u8 **formats;
 	struct snd_oxfw_stream_formation formation;
 	enum avc_general_plug_dir dir;
-	unsigned int i, err, len;
+	unsigned int len;
+	int i, err;
 
 	if (s == &oxfw->tx_stream) {
 		formats = oxfw->tx_stream_formats;
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index cf1d0b5..60e5cad 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -43,7 +43,7 @@ static bool detect_loud_models(struct fw_unit *unit)
 	err = fw_csr_string(unit->directory, CSR_MODEL,
 			    model, sizeof(model));
 	if (err < 0)
-		return err;
+		return false;
 
 	for (i = 0; i < ARRAY_SIZE(models); i++) {
 		if (strcmp(models[i], model) == 0)
diff --git a/sound/pci/asihpi/hpi_internal.h b/sound/pci/asihpi/hpi_internal.h
index 48380ce..aeea679 100644
--- a/sound/pci/asihpi/hpi_internal.h
+++ b/sound/pci/asihpi/hpi_internal.h
@@ -1367,9 +1367,9 @@ struct hpi_control_cache_single {
 struct hpi_control_cache_pad {
 	struct hpi_control_cache_info i;
 	u32 field_valid_flags;
-	u8 c_channel[8];
-	u8 c_artist[40];
-	u8 c_title[40];
+	u8 c_channel[40];
+	u8 c_artist[100];
+	u8 c_title[100];
 	u8 c_comment[200];
 	u32 pTY;
 	u32 pI;
diff --git a/sound/pci/asihpi/hpi_version.h b/sound/pci/asihpi/hpi_version.h
index e9146e5..6623ab1 100644
--- a/sound/pci/asihpi/hpi_version.h
+++ b/sound/pci/asihpi/hpi_version.h
@@ -11,13 +11,13 @@ Production releases have even minor version.
 /* Use single digits for versions less that 10 to avoid octal. */
 /* *** HPI_VER is the only edit required to update version *** */
 /** HPI version */
-#define HPI_VER HPI_VERSION_CONSTRUCTOR(4, 10, 1)
+#define HPI_VER HPI_VERSION_CONSTRUCTOR(4, 14, 3)
 
 /** HPI version string in dotted decimal format */
-#define HPI_VER_STRING "4.10.01"
+#define HPI_VER_STRING "4.14.03"
 
 /** Library version as documented in hpi-api-versions.txt */
-#define HPI_LIB_VER  HPI_VERSION_CONSTRUCTOR(10, 2, 0)
+#define HPI_LIB_VER  HPI_VERSION_CONSTRUCTOR(10, 4, 0)
 
 /** Construct hpi version number from major, minor, release numbers */
 #define HPI_VERSION_CONSTRUCTOR(maj, min, r) ((maj << 16) + (min << 8) + r)
diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c
index ac91637..3603c24 100644
--- a/sound/pci/asihpi/hpidspcd.c
+++ b/sound/pci/asihpi/hpidspcd.c
@@ -1,8 +1,9 @@
-/***********************************************************************/
-/**
+/***********************************************************************
 
     AudioScience HPI driver
-    Copyright (C) 1997-2011  AudioScience Inc. <support@audioscience.com>
+    Functions for reading DSP code using hotplug firmware loader
+
+    Copyright (C) 1997-2014  AudioScience Inc. <support@audioscience.com>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of version 2 of the GNU General Public License as
@@ -17,11 +18,7 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-\file
-Functions for reading DSP code using
-hotplug firmware loader from individual dsp code files
-*/
-/***********************************************************************/
+***********************************************************************/
 #define SOURCEFILE_NAME "hpidspcd.c"
 #include "hpidspcd.h"
 #include "hpidebug.h"
@@ -68,17 +65,18 @@ short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code,
 		goto error2;
 	}
 
-	if ((header.version >> 9) != (HPI_VER >> 9)) {
-		/* Consider even and subsequent odd minor versions to be compatible */
-		dev_err(&dev->dev, "Incompatible firmware version DSP image %X != Driver %X\n",
+	if (HPI_VER_MAJOR(header.version) != HPI_VER_MAJOR(HPI_VER)) {
+		/* Major version change probably means Host-DSP protocol change */
+		dev_err(&dev->dev,
+			"Incompatible firmware version DSP image %X != Driver %X\n",
 			header.version, HPI_VER);
 		goto error2;
 	}
 
 	if (header.version != HPI_VER) {
-		dev_info(&dev->dev,
-			 "Firmware: release version mismatch  DSP image %X != Driver %X\n",
-			 header.version, HPI_VER);
+		dev_warn(&dev->dev,
+			"Firmware version mismatch: DSP image %X != Driver %X\n",
+			header.version, HPI_VER);
 	}
 
 	HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name);
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 8337645..8276a74 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -1676,7 +1676,7 @@ irqreturn_t azx_interrupt(int irq, void *dev_id)
 	u8 sd_status;
 	int i;
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	if (chip->driver_caps & AZX_DCAPS_PM_RUNTIME)
 		if (!pm_runtime_active(chip->card->dev))
 			return IRQ_NONE;
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 63b69f7..b680b4e 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -3218,12 +3218,13 @@ static int create_input_ctls(struct hda_codec *codec)
 	}
 
 	/* add stereo mix when explicitly enabled via hint */
-	if (mixer && spec->add_stereo_mix_input &&
-	    snd_hda_get_bool_hint(codec, "add_stereo_mix_input") > 0) {
+	if (mixer && spec->add_stereo_mix_input == HDA_HINT_STEREO_MIX_ENABLE) {
 		err = parse_capture_source(codec, mixer, CFG_IDX_MIX, num_adcs,
 					   "Stereo Mix", 0);
 		if (err < 0)
 			return err;
+		else
+			spec->suppress_auto_mic = 1;
 	}
 
 	return 0;
@@ -4542,9 +4543,8 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec,
 
 	/* add stereo mix if available and not enabled yet */
 	if (!spec->auto_mic && spec->mixer_nid &&
-	    spec->add_stereo_mix_input &&
-	    spec->input_mux.num_items > 1 &&
-	    snd_hda_get_bool_hint(codec, "add_stereo_mix_input") < 0) {
+	    spec->add_stereo_mix_input == HDA_HINT_STEREO_MIX_AUTO &&
+	    spec->input_mux.num_items > 1) {
 		err = parse_capture_source(codec, spec->mixer_nid,
 					   CFG_IDX_MIX, spec->num_all_adcs,
 					   "Stereo Mix", 0);
diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h
index 61dd515..3d85266 100644
--- a/sound/pci/hda/hda_generic.h
+++ b/sound/pci/hda/hda_generic.h
@@ -222,7 +222,7 @@ struct hda_gen_spec {
 	unsigned int vmaster_mute_enum:1; /* add vmaster mute mode enum */
 	unsigned int indep_hp:1; /* independent HP supported */
 	unsigned int prefer_hp_amp:1; /* enable HP amp for speaker if any */
-	unsigned int add_stereo_mix_input:1; /* add aamix as a capture src */
+	unsigned int add_stereo_mix_input:2; /* add aamix as a capture src */
 	unsigned int add_jack_modes:1; /* add i/o jack mode enum ctls */
 	unsigned int power_down_unused:1; /* power down unused widgets */
 	unsigned int dac_min_mute:1; /* minimal = mute for DACs */
@@ -291,6 +291,13 @@ struct hda_gen_spec {
 				    struct hda_jack_callback *cb);
 };
 
+/* values for add_stereo_mix_input flag */
+enum {
+	HDA_HINT_STEREO_MIX_DISABLE,	/* No stereo mix input */
+	HDA_HINT_STEREO_MIX_ENABLE,	/* Add stereo mix input */
+	HDA_HINT_STEREO_MIX_AUTO,	/* Add only if auto-mic is disabled */
+};
+
 int snd_hda_gen_spec_init(struct hda_gen_spec *spec);
 
 int snd_hda_gen_init(struct hda_codec *codec);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 5ac0d39..2bf0b56 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -872,7 +872,7 @@ static int azx_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP || SUPPORT_VGA_SWITCHEROO */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int azx_runtime_suspend(struct device *dev)
 {
 	struct snd_card *card = dev_get_drvdata(dev);
@@ -970,9 +970,6 @@ static int azx_runtime_idle(struct device *dev)
 	return 0;
 }
 
-#endif /* CONFIG_PM_RUNTIME */
-
-#ifdef CONFIG_PM
 static const struct dev_pm_ops azx_pm = {
 	SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume)
 	SET_RUNTIME_PM_OPS(azx_runtime_suspend, azx_runtime_resume, azx_runtime_idle)
diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
index bef7215..ccc962a 100644
--- a/sound/pci/hda/hda_sysfs.c
+++ b/sound/pci/hda/hda_sysfs.c
@@ -468,7 +468,7 @@ int snd_hda_get_bool_hint(struct hda_codec *codec, const char *key)
 EXPORT_SYMBOL_GPL(snd_hda_get_bool_hint);
 
 /**
- * snd_hda_get_bool_hint - Get a boolean hint value
+ * snd_hda_get_int_hint - Get an integer hint value
  * @codec: the HDA codec
  * @key: the hint key string
  * @valp: pointer to store a value
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index c81b715..a9d78e2 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -195,7 +195,8 @@ static int ad198x_parse_auto_config(struct hda_codec *codec, bool indep_hp)
 	codec->no_sticky_stream = 1;
 
 	spec->gen.indep_hp = indep_hp;
-	spec->gen.add_stereo_mix_input = 1;
+	if (!spec->gen.add_stereo_mix_input)
+		spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
 
 	err = snd_hda_parse_pin_defcfg(codec, cfg, NULL, 0);
 	if (err < 0)
@@ -256,6 +257,18 @@ static void ad1986a_fixup_eapd(struct hda_codec *codec,
 	}
 }
 
+/* enable stereo-mix input for avoiding regression on KDE (bko#88251) */
+static void ad1986a_fixup_eapd_mix_in(struct hda_codec *codec,
+				      const struct hda_fixup *fix, int action)
+{
+	struct ad198x_spec *spec = codec->spec;
+
+	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		ad1986a_fixup_eapd(codec, fix, action);
+		spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_ENABLE;
+	}
+}
+
 enum {
 	AD1986A_FIXUP_INV_JACK_DETECT,
 	AD1986A_FIXUP_ULTRA,
@@ -264,6 +277,8 @@ enum {
 	AD1986A_FIXUP_LAPTOP,
 	AD1986A_FIXUP_LAPTOP_IMIC,
 	AD1986A_FIXUP_EAPD,
+	AD1986A_FIXUP_EAPD_MIX_IN,
+	AD1986A_FIXUP_EASYNOTE,
 };
 
 static const struct hda_fixup ad1986a_fixups[] = {
@@ -328,6 +343,30 @@ static const struct hda_fixup ad1986a_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = ad1986a_fixup_eapd,
 	},
+	[AD1986A_FIXUP_EAPD_MIX_IN] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = ad1986a_fixup_eapd_mix_in,
+	},
+	[AD1986A_FIXUP_EASYNOTE] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x1a, 0x0421402f }, /* headphone */
+			{ 0x1b, 0x90170110 }, /* speaker */
+			{ 0x1c, 0x411111f0 }, /* N/A */
+			{ 0x1d, 0x90a70130 }, /* int mic */
+			{ 0x1e, 0x411111f0 }, /* N/A */
+			{ 0x1f, 0x04a19040 }, /* mic */
+			{ 0x20, 0x411111f0 }, /* N/A */
+			{ 0x21, 0x411111f0 }, /* N/A */
+			{ 0x22, 0x411111f0 }, /* N/A */
+			{ 0x23, 0x411111f0 }, /* N/A */
+			{ 0x24, 0x411111f0 }, /* N/A */
+			{ 0x25, 0x411111f0 }, /* N/A */
+			{}
+		},
+		.chained = true,
+		.chain_id = AD1986A_FIXUP_EAPD_MIX_IN,
+	},
 };
 
 static const struct snd_pci_quirk ad1986a_fixup_tbl[] = {
@@ -341,6 +380,7 @@ static const struct snd_pci_quirk ad1986a_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x144d, 0xc01e, "FSC V2060", AD1986A_FIXUP_LAPTOP),
 	SND_PCI_QUIRK_MASK(0x144d, 0xff00, 0xc000, "Samsung", AD1986A_FIXUP_SAMSUNG),
 	SND_PCI_QUIRK(0x144d, 0xc027, "Samsung Q1", AD1986A_FIXUP_ULTRA),
+	SND_PCI_QUIRK(0x1631, 0xc022, "PackardBell EasyNote MX65", AD1986A_FIXUP_EASYNOTE),
 	SND_PCI_QUIRK(0x17aa, 0x2066, "Lenovo N100", AD1986A_FIXUP_INV_JACK_DETECT),
 	SND_PCI_QUIRK(0x17aa, 0x1011, "Lenovo M55", AD1986A_FIXUP_3STACK),
 	SND_PCI_QUIRK(0x17aa, 0x1017, "Lenovo A60", AD1986A_FIXUP_3STACK),
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index e9ebc7b..fd3ed18 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -855,14 +855,14 @@ static int patch_conexant_auto(struct hda_codec *codec)
 	case 0x14f15045:
 		codec->single_adc_amp = 1;
 		spec->gen.mixer_nid = 0x17;
-		spec->gen.add_stereo_mix_input = 1;
+		spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
 		snd_hda_pick_fixup(codec, cxt5045_fixup_models,
 				   cxt5045_fixups, cxt_fixups);
 		break;
 	case 0x14f15047:
 		codec->pin_amp_workaround = 1;
 		spec->gen.mixer_nid = 0x19;
-		spec->gen.add_stereo_mix_input = 1;
+		spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
 		snd_hda_pick_fixup(codec, cxt5047_fixup_models,
 				   cxt5047_fixups, cxt_fixups);
 		break;
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 9dc9cf8..5f13d2d 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -47,7 +47,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 
 #define is_haswell(codec)  ((codec)->vendor_id == 0x80862807)
 #define is_broadwell(codec)    ((codec)->vendor_id == 0x80862808)
-#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec))
+#define is_skylake(codec) ((codec)->vendor_id == 0x80862809)
+#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
+					|| is_skylake(codec))
 
 #define is_valleyview(codec) ((codec)->vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->vendor_id == 0x80862883)
@@ -3365,6 +3367,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = {
 { .id = 0x80862806, .name = "PantherPoint HDMI", .patch = patch_generic_hdmi },
 { .id = 0x80862807, .name = "Haswell HDMI",	.patch = patch_generic_hdmi },
 { .id = 0x80862808, .name = "Broadwell HDMI",	.patch = patch_generic_hdmi },
+{ .id = 0x80862809, .name = "Skylake HDMI",	.patch = patch_generic_hdmi },
 { .id = 0x80862880, .name = "CedarTrail HDMI",	.patch = patch_generic_hdmi },
 { .id = 0x80862882, .name = "Valleyview2 HDMI",	.patch = patch_generic_hdmi },
 { .id = 0x80862883, .name = "Braswell HDMI",	.patch = patch_generic_hdmi },
@@ -3425,6 +3428,7 @@ MODULE_ALIAS("snd-hda-codec-id:80862805");
 MODULE_ALIAS("snd-hda-codec-id:80862806");
 MODULE_ALIAS("snd-hda-codec-id:80862807");
 MODULE_ALIAS("snd-hda-codec-id:80862808");
+MODULE_ALIAS("snd-hda-codec-id:80862809");
 MODULE_ALIAS("snd-hda-codec-id:80862880");
 MODULE_ALIAS("snd-hda-codec-id:80862882");
 MODULE_ALIAS("snd-hda-codec-id:80862883");
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a722067..65f1f4e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -321,10 +321,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 		break;
 	case 0x10ec0233:
 	case 0x10ec0255:
+	case 0x10ec0256:
 	case 0x10ec0282:
 	case 0x10ec0283:
 	case 0x10ec0286:
 	case 0x10ec0288:
+	case 0x10ec0298:
 		alc_update_coef_idx(codec, 0x10, 1<<9, 0);
 		break;
 	case 0x10ec0285:
@@ -2659,7 +2661,9 @@ enum {
 	ALC269_TYPE_ALC284,
 	ALC269_TYPE_ALC285,
 	ALC269_TYPE_ALC286,
+	ALC269_TYPE_ALC298,
 	ALC269_TYPE_ALC255,
+	ALC269_TYPE_ALC256,
 };
 
 /*
@@ -2686,7 +2690,9 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
 	case ALC269_TYPE_ALC282:
 	case ALC269_TYPE_ALC283:
 	case ALC269_TYPE_ALC286:
+	case ALC269_TYPE_ALC298:
 	case ALC269_TYPE_ALC255:
+	case ALC269_TYPE_ALC256:
 		ssids = alc269_ssids;
 		break;
 	default:
@@ -4829,6 +4835,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -5417,9 +5424,15 @@ static int patch_alc269(struct hda_codec *codec)
 		spec->codec_variant = ALC269_TYPE_ALC286;
 		spec->shutup = alc286_shutup;
 		break;
+	case 0x10ec0298:
+		spec->codec_variant = ALC269_TYPE_ALC298;
+		break;
 	case 0x10ec0255:
 		spec->codec_variant = ALC269_TYPE_ALC255;
 		break;
+	case 0x10ec0256:
+		spec->codec_variant = ALC269_TYPE_ALC256;
+		break;
 	}
 
 	if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
@@ -6341,6 +6354,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
 	{ .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 },
 	{ .id = 0x10ec0235, .name = "ALC233", .patch = patch_alc269 },
 	{ .id = 0x10ec0255, .name = "ALC255", .patch = patch_alc269 },
+	{ .id = 0x10ec0256, .name = "ALC256", .patch = patch_alc269 },
 	{ .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 },
 	{ .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 },
 	{ .id = 0x10ec0267, .name = "ALC267", .patch = patch_alc268 },
@@ -6360,6 +6374,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
 	{ .id = 0x10ec0290, .name = "ALC290", .patch = patch_alc269 },
 	{ .id = 0x10ec0292, .name = "ALC292", .patch = patch_alc269 },
 	{ .id = 0x10ec0293, .name = "ALC293", .patch = patch_alc269 },
+	{ .id = 0x10ec0298, .name = "ALC298", .patch = patch_alc269 },
 	{ .id = 0x10ec0861, .rev = 0x100340, .name = "ALC660",
 	  .patch = patch_alc861 },
 	{ .id = 0x10ec0660, .name = "ALC660-VD", .patch = patch_alc861vd },
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 6c206b6..3de6d3d 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -137,7 +137,7 @@ static struct via_spec *via_new_spec(struct hda_codec *codec)
 	spec->gen.indep_hp = 1;
 	spec->gen.keep_eapd_on = 1;
 	spec->gen.pcm_playback_hook = via_playback_pcm_hook;
-	spec->gen.add_stereo_mix_input = 1;
+	spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
 	return spec;
 }
 
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index b1cc2a4..99ff35e 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -267,7 +267,7 @@ static void atmel_ssc_shutdown(struct snd_pcm_substream *substream,
 	if (!ssc_p->dir_mask) {
 		if (ssc_p->initialized) {
 			/* Shutdown the SSC clock. */
-			pr_debug("atmel_ssc_dau: Stopping clock\n");
+			pr_debug("atmel_ssc_dai: Stopping clock\n");
 			clk_disable(ssc_p->ssc->clk);
 
 			free_irq(ssc_p->ssc->irq, ssc_p);
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 883c577..8349f98 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -520,6 +520,8 @@ config SND_SOC_RT5670
 
 config SND_SOC_RT5677
 	tristate
+	select REGMAP_I2C
+	select REGMAP_IRQ
 
 config SND_SOC_RT5677_SPI
 	tristate
diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c
index c125925..ec55c590 100644
--- a/sound/soc/codecs/cs35l32.c
+++ b/sound/soc/codecs/cs35l32.c
@@ -550,7 +550,7 @@ static int cs35l32_i2c_remove(struct i2c_client *i2c_client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int cs35l32_runtime_suspend(struct device *dev)
 {
 	struct cs35l32_private *cs35l32 = dev_get_drvdata(dev);
diff --git a/sound/soc/codecs/cs42xx8.c b/sound/soc/codecs/cs42xx8.c
index 02b1520..670ebfe 100644
--- a/sound/soc/codecs/cs42xx8.c
+++ b/sound/soc/codecs/cs42xx8.c
@@ -537,7 +537,7 @@ err_enable:
 }
 EXPORT_SYMBOL_GPL(cs42xx8_probe);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int cs42xx8_runtime_resume(struct device *dev)
 {
 	struct cs42xx8_priv *cs42xx8 = dev_get_drvdata(dev);
diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c
index 151f718..b112b1c 100644
--- a/sound/soc/codecs/max98090.c
+++ b/sound/soc/codecs/max98090.c
@@ -2611,7 +2611,7 @@ static int max98090_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int max98090_runtime_resume(struct device *dev)
 {
 	struct max98090_priv *max98090 = dev_get_drvdata(dev);
diff --git a/sound/soc/codecs/pcm512x-i2c.c b/sound/soc/codecs/pcm512x-i2c.c
index 4d62230..d0547fa 100644
--- a/sound/soc/codecs/pcm512x-i2c.c
+++ b/sound/soc/codecs/pcm512x-i2c.c
@@ -24,8 +24,13 @@ static int pcm512x_i2c_probe(struct i2c_client *i2c,
 			     const struct i2c_device_id *id)
 {
 	struct regmap *regmap;
+	struct regmap_config config = pcm512x_regmap;
 
-	regmap = devm_regmap_init_i2c(i2c, &pcm512x_regmap);
+	/* msb needs to be set to enable auto-increment of addresses */
+	config.read_flag_mask = 0x80;
+	config.write_flag_mask = 0x80;
+
+	regmap = devm_regmap_init_i2c(i2c, &config);
 	if (IS_ERR(regmap))
 		return PTR_ERR(regmap);
 
diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c
index 0c8aefa..e5f2fb8 100644
--- a/sound/soc/codecs/pcm512x.c
+++ b/sound/soc/codecs/pcm512x.c
@@ -517,7 +517,7 @@ void pcm512x_remove(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pcm512x_remove);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int pcm512x_suspend(struct device *dev)
 {
 	struct pcm512x_priv *pcm512x = dev_get_drvdata(dev);
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index a7789a8..27141e2 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -2209,6 +2209,10 @@ static int rt5645_jack_detect(struct snd_soc_codec *codec)
 	int gpio_state, jack_type = 0;
 	unsigned int val;
 
+	if (!gpio_is_valid(rt5645->pdata.hp_det_gpio)) {
+		dev_err(codec->dev, "invalid gpio\n");
+		return -EINVAL;
+	}
 	gpio_state = gpio_get_value(rt5645->pdata.hp_det_gpio);
 
 	dev_dbg(codec->dev, "gpio = %d(%d)\n", rt5645->pdata.hp_det_gpio,
diff --git a/sound/soc/codecs/tas2552.c b/sound/soc/codecs/tas2552.c
index b505212..ae23acd 100644
--- a/sound/soc/codecs/tas2552.c
+++ b/sound/soc/codecs/tas2552.c
@@ -115,7 +115,7 @@ static const struct snd_soc_dapm_route tas2552_audio_map[] = {
 	{"ClassD", NULL, "PLL"},
 };
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static void tas2552_sw_shutdown(struct tas2552_data *tas_data, int sw_shutdown)
 {
 	u8 cfg1_reg;
@@ -264,7 +264,7 @@ static int tas2552_mute(struct snd_soc_dai *dai, int mute)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int tas2552_runtime_suspend(struct device *dev)
 {
 	struct tas2552_data *tas2552 = dev_get_drvdata(dev);
diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c
index cdea9d9..1559984 100644
--- a/sound/soc/codecs/wm2200.c
+++ b/sound/soc/codecs/wm2200.c
@@ -2440,7 +2440,7 @@ static int wm2200_i2c_remove(struct i2c_client *i2c)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int wm2200_runtime_suspend(struct device *dev)
 {
 	struct wm2200_priv *wm2200 = dev_get_drvdata(dev);
diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c
index a01ad62..b80970d 100644
--- a/sound/soc/codecs/wm5100.c
+++ b/sound/soc/codecs/wm5100.c
@@ -2664,7 +2664,7 @@ static int wm5100_i2c_remove(struct i2c_client *i2c)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int wm5100_runtime_suspend(struct device *dev)
 {
 	struct wm5100_priv *wm5100 = dev_get_drvdata(dev);
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 1534d88..d32d554 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -3785,7 +3785,7 @@ static int wm8962_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int wm8962_runtime_resume(struct device *dev)
 {
 	struct wm8962_priv *wm8962 = dev_get_drvdata(dev);
diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 9deabdd..026a801 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -928,7 +928,7 @@ static int fsl_asrc_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int fsl_asrc_runtime_resume(struct device *dev)
 {
 	struct fsl_asrc *asrc_priv = dev_get_drvdata(dev);
@@ -954,7 +954,7 @@ static int fsl_asrc_runtime_suspend(struct device *dev)
 
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_SLEEP
 static int fsl_asrc_suspend(struct device *dev)
diff --git a/sound/soc/intel/sst-haswell-pcm.c b/sound/soc/intel/sst-haswell-pcm.c
index b8a782c..6195252 100644
--- a/sound/soc/intel/sst-haswell-pcm.c
+++ b/sound/soc/intel/sst-haswell-pcm.c
@@ -998,7 +998,7 @@ static int hsw_pcm_dev_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 
 static int hsw_pcm_runtime_idle(struct device *dev)
 {
@@ -1057,7 +1057,7 @@ static int hsw_pcm_runtime_resume(struct device *dev)
 #define hsw_pcm_runtime_resume		NULL
 #endif
 
-#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_RUNTIME)
+#ifdef CONFIG_PM
 
 static void hsw_pcm_complete(struct device *dev)
 {
diff --git a/sound/soc/intel/sst/sst_acpi.c b/sound/soc/intel/sst/sst_acpi.c
index 31124aa..3abc29e 100644
--- a/sound/soc/intel/sst/sst_acpi.c
+++ b/sound/soc/intel/sst/sst_acpi.c
@@ -43,7 +43,7 @@
 #include "sst.h"
 
 struct sst_machines {
-	char codec_id[32];
+	char *codec_id;
 	char board[32];
 	char machine[32];
 	void (*machine_quirk)(void);
@@ -277,16 +277,16 @@ int sst_acpi_probe(struct platform_device *pdev)
 	dev_dbg(dev, "ACPI device id: %x\n", dev_id);
 
 	plat_dev = platform_device_register_data(dev, mach->pdata->platform, -1, NULL, 0);
-	if (plat_dev == NULL) {
+	if (IS_ERR(plat_dev)) {
 		dev_err(dev, "Failed to create machine device: %s\n", mach->pdata->platform);
-		return -ENODEV;
+		return PTR_ERR(plat_dev);
 	}
 
 	/* Create platform device for sst machine driver */
 	mdev = platform_device_register_data(dev, mach->machine, -1, NULL, 0);
-	if (mdev == NULL) {
+	if (IS_ERR(mdev)) {
 		dev_err(dev, "Failed to create machine device: %s\n", mach->machine);
-		return -ENODEV;
+		return PTR_ERR(mdev);
 	}
 
 	ret = sst_alloc_drv_context(&ctx, dev, dev_id);
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 95340ba..b5a80c5 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -1135,7 +1135,7 @@ static inline const struct samsung_i2s_dai_data *samsung_i2s_get_driver_data(
 				platform_get_device_id(pdev)->driver_data;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int i2s_runtime_suspend(struct device *dev)
 {
 	struct i2s_dai *i2s = dev_get_drvdata(dev);
@@ -1153,7 +1153,7 @@ static int i2s_runtime_resume(struct device *dev)
 
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static int samsung_i2s_probe(struct platform_device *pdev)
 {
@@ -1261,6 +1261,8 @@ static int samsung_i2s_probe(struct platform_device *pdev)
 			ret = -ENOMEM;
 			goto err;
 		}
+
+		sec_dai->variant_regs = pri_dai->variant_regs;
 		sec_dai->dma_playback.dma_addr = regs_base + I2STXDS;
 		sec_dai->dma_playback.ch_name = "tx-sec";
 
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 1994d41..b703cb3 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -333,8 +333,11 @@ static struct usbmix_name_map gamecom780_map[] = {
 	{}
 };
 
-static const struct usbmix_name_map kef_x300a_map[] = {
-	{ 10, NULL }, /* firmware locks up (?) when we try to access this FU */
+/* some (all?) SCMS USB3318 devices are affected by a firmware lock up
+ * when anything attempts to access FU 10 (control)
+ */
+static const struct usbmix_name_map scms_usb3318_map[] = {
+	{ 10, NULL },
 	{ 0 }
 };
 
@@ -434,8 +437,14 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = {
 		.map = ebox44_map,
 	},
 	{
+		/* KEF X300A */
 		.id = USB_ID(0x27ac, 0x1000),
-		.map = kef_x300a_map,
+		.map = scms_usb3318_map,
+	},
+	{
+		/* Arcam rPAC */
+		.id = USB_ID(0x25c4, 0x0003),
+		.map = scms_usb3318_map,
 	},
 	{ 0 } /* terminator */
 };
diff --git a/sound/usb/mixer_scarlett.c b/sound/usb/mixer_scarlett.c
index 9109652..7438e7c 100644
--- a/sound/usb/mixer_scarlett.c
+++ b/sound/usb/mixer_scarlett.c
@@ -655,7 +655,7 @@ static struct scarlett_device_info s6i6_info = {
 		.names = NULL
 	},
 
-	.num_controls = 0,
+	.num_controls = 9,
 	.controls = {
 		{ .num = 0, .type = SCARLETT_OUTPUTS, .name = "Monitor" },
 		{ .num = 1, .type = SCARLETT_OUTPUTS, .name = "Headphone" },
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 4dbfb3d..a739841 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1245,8 +1245,9 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 
 	/* XMOS based USB DACs */
 	switch (chip->usb_id) {
-	/* iFi Audio micro/nano iDSD */
-	case USB_ID(0x20b1, 0x3008):
+	case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */
+	case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */
+	case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */
 		if (fp->altsetting == 2)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
new file mode 100644
index 0000000..6eedba1
--- /dev/null
+++ b/tools/include/asm-generic/bitops.h
@@ -0,0 +1,27 @@
+#ifndef __TOOLS_ASM_GENERIC_BITOPS_H
+#define __TOOLS_ASM_GENERIC_BITOPS_H
+
+/*
+ * tools/ copied this from include/asm-generic/bitops.h, bit by bit as it needed
+ * some functions.
+ *
+ * For the benefit of those who are trying to port Linux to another
+ * architecture, here are some C-language equivalents.  You should
+ * recode these in the native assembly language, if at all possible.
+ *
+ * C language equivalents written by Theodore Ts'o, 9/26/92
+ */
+
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/find.h>
+
+#ifndef _TOOLS_LINUX_BITOPS_H_
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <asm-generic/bitops/atomic.h>
+
+#endif /* __TOOLS_ASM_GENERIC_BITOPS_H */
diff --git a/tools/include/asm-generic/bitops/__ffs.h b/tools/include/asm-generic/bitops/__ffs.h
new file mode 100644
index 0000000..c941750
--- /dev/null
+++ b/tools/include/asm-generic/bitops/__ffs.h
@@ -0,0 +1,43 @@
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
+
+#include <asm/types.h>
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+	int num = 0;
+
+#if __BITS_PER_LONG == 64
+	if ((word & 0xffffffff) == 0) {
+		num += 32;
+		word >>= 32;
+	}
+#endif
+	if ((word & 0xffff) == 0) {
+		num += 16;
+		word >>= 16;
+	}
+	if ((word & 0xff) == 0) {
+		num += 8;
+		word >>= 8;
+	}
+	if ((word & 0xf) == 0) {
+		num += 4;
+		word >>= 4;
+	}
+	if ((word & 0x3) == 0) {
+		num += 2;
+		word >>= 2;
+	}
+	if ((word & 0x1) == 0)
+		num += 1;
+	return num;
+}
+
+#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ */
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
new file mode 100644
index 0000000..2218b9a
--- /dev/null
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -0,0 +1 @@
+#include <../../../../include/asm-generic/bitops/__fls.h>
diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
new file mode 100644
index 0000000..4bccd7c3
--- /dev/null
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -0,0 +1,22 @@
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
+
+#include <asm/types.h>
+
+static inline void set_bit(int nr, unsigned long *addr)
+{
+	addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG);
+}
+
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+	addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
+}
+
+static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
+{
+	return ((1UL << (nr % __BITS_PER_LONG)) &
+		(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
+}
+
+#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h
new file mode 100644
index 0000000..31f5154
--- /dev/null
+++ b/tools/include/asm-generic/bitops/find.h
@@ -0,0 +1,33 @@
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
+
+#ifndef find_next_bit
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
+		size, unsigned long offset);
+#endif
+
+#ifndef find_first_bit
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first set bit.
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_first_bit(const unsigned long *addr,
+				    unsigned long size);
+
+#endif /* find_first_bit */
+
+#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
new file mode 100644
index 0000000..dbf711a
--- /dev/null
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -0,0 +1 @@
+#include <../../../../include/asm-generic/bitops/fls.h>
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
new file mode 100644
index 0000000..980b1f6
--- /dev/null
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -0,0 +1 @@
+#include <../../../../include/asm-generic/bitops/fls64.h>
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
new file mode 100644
index 0000000..26005a15
--- /dev/null
+++ b/tools/include/linux/bitops.h
@@ -0,0 +1,53 @@
+#ifndef _TOOLS_LINUX_BITOPS_H_
+#define _TOOLS_LINUX_BITOPS_H_
+
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <asm/hweight.h>
+
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
+#endif
+
+#define BITS_PER_LONG __WORDSIZE
+
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BITS_PER_BYTE		8
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_TO_U64(nr)		DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+#define BITS_TO_U32(nr)		DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
+#define BITS_TO_BYTES(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE)
+
+/*
+ * Include this here because some architectures need generic_ffs/fls in
+ * scope
+ *
+ * XXX: this needs to be asm/bitops.h, when we get to per arch optimizations
+ */
+#include <asm-generic/bitops.h>
+
+#define for_each_set_bit(bit, addr, size) \
+	for ((bit) = find_first_bit((addr), (size));		\
+	     (bit) < (size);					\
+	     (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+	for ((bit) = find_next_bit((addr), (size), (bit));	\
+	     (bit) < (size);					\
+	     (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
+}
+
+static inline unsigned fls_long(unsigned long l)
+{
+	if (sizeof(l) == 4)
+		return fls(l);
+	return fls64(l);
+}
+
+#endif
diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h
new file mode 100644
index 0000000..4144666
--- /dev/null
+++ b/tools/include/linux/log2.h
@@ -0,0 +1,185 @@
+/* Integer base 2 logarithm calculation
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _TOOLS_LINUX_LOG2_H
+#define _TOOLS_LINUX_LOG2_H
+
+/*
+ * deal with unrepresentable constant logarithms
+ */
+extern __attribute__((const, noreturn))
+int ____ilog2_NaN(void);
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ *   more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+	return fls(n) - 1;
+}
+
+static inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+	return fls64(n) - 1;
+}
+
+/*
+ *  Determine whether some value is a power of two, where zero is
+ * *not* considered a power of two.
+ */
+
+static inline __attribute__((const))
+bool is_power_of_2(unsigned long n)
+{
+	return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+/*
+ * round up to nearest power of two
+ */
+static inline __attribute__((const))
+unsigned long __roundup_pow_of_two(unsigned long n)
+{
+	return 1UL << fls_long(n - 1);
+}
+
+/*
+ * round down to nearest power of two
+ */
+static inline __attribute__((const))
+unsigned long __rounddown_pow_of_two(unsigned long n)
+{
+	return 1UL << (fls_long(n) - 1);
+}
+
+/**
+ * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
+ * @n - parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ *   the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n)				\
+(						\
+	__builtin_constant_p(n) ? (		\
+		(n) < 1 ? ____ilog2_NaN() :	\
+		(n) & (1ULL << 63) ? 63 :	\
+		(n) & (1ULL << 62) ? 62 :	\
+		(n) & (1ULL << 61) ? 61 :	\
+		(n) & (1ULL << 60) ? 60 :	\
+		(n) & (1ULL << 59) ? 59 :	\
+		(n) & (1ULL << 58) ? 58 :	\
+		(n) & (1ULL << 57) ? 57 :	\
+		(n) & (1ULL << 56) ? 56 :	\
+		(n) & (1ULL << 55) ? 55 :	\
+		(n) & (1ULL << 54) ? 54 :	\
+		(n) & (1ULL << 53) ? 53 :	\
+		(n) & (1ULL << 52) ? 52 :	\
+		(n) & (1ULL << 51) ? 51 :	\
+		(n) & (1ULL << 50) ? 50 :	\
+		(n) & (1ULL << 49) ? 49 :	\
+		(n) & (1ULL << 48) ? 48 :	\
+		(n) & (1ULL << 47) ? 47 :	\
+		(n) & (1ULL << 46) ? 46 :	\
+		(n) & (1ULL << 45) ? 45 :	\
+		(n) & (1ULL << 44) ? 44 :	\
+		(n) & (1ULL << 43) ? 43 :	\
+		(n) & (1ULL << 42) ? 42 :	\
+		(n) & (1ULL << 41) ? 41 :	\
+		(n) & (1ULL << 40) ? 40 :	\
+		(n) & (1ULL << 39) ? 39 :	\
+		(n) & (1ULL << 38) ? 38 :	\
+		(n) & (1ULL << 37) ? 37 :	\
+		(n) & (1ULL << 36) ? 36 :	\
+		(n) & (1ULL << 35) ? 35 :	\
+		(n) & (1ULL << 34) ? 34 :	\
+		(n) & (1ULL << 33) ? 33 :	\
+		(n) & (1ULL << 32) ? 32 :	\
+		(n) & (1ULL << 31) ? 31 :	\
+		(n) & (1ULL << 30) ? 30 :	\
+		(n) & (1ULL << 29) ? 29 :	\
+		(n) & (1ULL << 28) ? 28 :	\
+		(n) & (1ULL << 27) ? 27 :	\
+		(n) & (1ULL << 26) ? 26 :	\
+		(n) & (1ULL << 25) ? 25 :	\
+		(n) & (1ULL << 24) ? 24 :	\
+		(n) & (1ULL << 23) ? 23 :	\
+		(n) & (1ULL << 22) ? 22 :	\
+		(n) & (1ULL << 21) ? 21 :	\
+		(n) & (1ULL << 20) ? 20 :	\
+		(n) & (1ULL << 19) ? 19 :	\
+		(n) & (1ULL << 18) ? 18 :	\
+		(n) & (1ULL << 17) ? 17 :	\
+		(n) & (1ULL << 16) ? 16 :	\
+		(n) & (1ULL << 15) ? 15 :	\
+		(n) & (1ULL << 14) ? 14 :	\
+		(n) & (1ULL << 13) ? 13 :	\
+		(n) & (1ULL << 12) ? 12 :	\
+		(n) & (1ULL << 11) ? 11 :	\
+		(n) & (1ULL << 10) ? 10 :	\
+		(n) & (1ULL <<  9) ?  9 :	\
+		(n) & (1ULL <<  8) ?  8 :	\
+		(n) & (1ULL <<  7) ?  7 :	\
+		(n) & (1ULL <<  6) ?  6 :	\
+		(n) & (1ULL <<  5) ?  5 :	\
+		(n) & (1ULL <<  4) ?  4 :	\
+		(n) & (1ULL <<  3) ?  3 :	\
+		(n) & (1ULL <<  2) ?  2 :	\
+		(n) & (1ULL <<  1) ?  1 :	\
+		(n) & (1ULL <<  0) ?  0 :	\
+		____ilog2_NaN()			\
+				   ) :		\
+	(sizeof(n) <= 4) ?			\
+	__ilog2_u32(n) :			\
+	__ilog2_u64(n)				\
+ )
+
+/**
+ * roundup_pow_of_two - round the given value up to nearest power of two
+ * @n - parameter
+ *
+ * round the given value up to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define roundup_pow_of_two(n)			\
+(						\
+	__builtin_constant_p(n) ? (		\
+		(n == 1) ? 1 :			\
+		(1UL << (ilog2((n) - 1) + 1))	\
+				   ) :		\
+	__roundup_pow_of_two(n)			\
+ )
+
+/**
+ * rounddown_pow_of_two - round the given value down to nearest power of two
+ * @n - parameter
+ *
+ * round the given value down to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define rounddown_pow_of_two(n)			\
+(						\
+	__builtin_constant_p(n) ? (		\
+		(1UL << ilog2(n))) :		\
+	__rounddown_pow_of_two(n)		\
+ )
+
+#endif /* _TOOLS_LINUX_LOG2_H */
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index c1b49c3..65d9be3 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -7,6 +7,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/vfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #include "debugfs.h"
 #include "fs.h"
@@ -163,3 +167,33 @@ const char *name##__mountpoint(void)	\
 
 FS__MOUNTPOINT(sysfs,  FS__SYSFS);
 FS__MOUNTPOINT(procfs, FS__PROCFS);
+
+int filename__read_int(const char *filename, int *value)
+{
+	char line[64];
+	int fd = open(filename, O_RDONLY), err = -1;
+
+	if (fd < 0)
+		return -1;
+
+	if (read(fd, line, sizeof(line)) > 0) {
+		*value = atoi(line);
+		err = 0;
+	}
+
+	close(fd);
+	return err;
+}
+
+int sysctl__read_int(const char *sysctl, int *value)
+{
+	char path[PATH_MAX];
+	const char *procfs = procfs__mountpoint();
+
+	if (!procfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/sys/%s", procfs, sysctl);
+
+	return filename__read_int(path, value);
+}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index cb70495..6caa2bb 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -11,4 +11,7 @@
 
 const char *sysfs__mountpoint(void);
 const char *procfs__mountpoint(void);
+
+int filename__read_int(const char *filename, int *value);
+int sysctl__read_int(const char *sysctl, int *value);
 #endif /* __API_FS__ */
diff --git a/tools/lib/util/find_next_bit.c b/tools/lib/util/find_next_bit.c
new file mode 100644
index 0000000..41b44f6
--- /dev/null
+++ b/tools/lib/util/find_next_bit.c
@@ -0,0 +1,89 @@
+/* find_next_bit.c: fallback find next bit implementation
+ *
+ * Copied from lib/find_next_bit.c to tools/lib/next_bit.c
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
+
+#ifndef find_next_bit
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+	const unsigned long *p = addr + BITOP_WORD(offset);
+	unsigned long result = offset & ~(BITS_PER_LONG-1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= (~0UL << offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+
+found_first:
+	tmp &= (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + __ffs(tmp);
+}
+#endif
+
+#ifndef find_first_bit
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+	const unsigned long *p = addr;
+	unsigned long result = 0;
+	unsigned long tmp;
+
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+
+	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found:
+	return result + __ffs(tmp);
+}
+#endif
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index d240bb2..1e8e400 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -18,6 +18,10 @@ OPTIONS
 	  --debug verbose   # sets verbose = 1
 	  --debug verbose=2 # sets verbose = 2
 
+--buildid-dir::
+	Setup buildid cache directory. It has higher priority than
+	buildid.dir config file option.
+
 DESCRIPTION
 -----------
 Performance counters for Linux are a new kernel-based subsystem
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 344c4d3..83e2887 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -4,17 +4,31 @@ tools/lib/traceevent
 tools/lib/api
 tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
+tools/lib/util/find_next_bit.c
 tools/include/asm/bug.h
+tools/include/asm-generic/bitops/atomic.h
+tools/include/asm-generic/bitops/__ffs.h
+tools/include/asm-generic/bitops/__fls.h
+tools/include/asm-generic/bitops/find.h
+tools/include/asm-generic/bitops/fls64.h
+tools/include/asm-generic/bitops/fls.h
+tools/include/asm-generic/bitops.h
+tools/include/linux/bitops.h
 tools/include/linux/compiler.h
-tools/include/linux/hash.h
 tools/include/linux/export.h
+tools/include/linux/hash.h
+tools/include/linux/log2.h
 tools/include/linux/types.h
+include/asm-generic/bitops/fls64.h
+include/asm-generic/bitops/__fls.h
+include/asm-generic/bitops/fls.h
 include/linux/const.h
 include/linux/perf_event.h
 include/linux/rbtree.h
 include/linux/list.h
 include/linux/hash.h
 include/linux/stringify.h
+lib/find_next_bit.c
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 478efa9..67a03a82 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -231,8 +231,16 @@ LIB_H += ../../include/uapi/linux/const.h
 LIB_H += ../include/linux/hash.h
 LIB_H += ../../include/linux/stringify.h
 LIB_H += util/include/linux/bitmap.h
-LIB_H += util/include/linux/bitops.h
+LIB_H += ../include/linux/bitops.h
+LIB_H += ../include/asm-generic/bitops/atomic.h
+LIB_H += ../include/asm-generic/bitops/find.h
+LIB_H += ../include/asm-generic/bitops/fls64.h
+LIB_H += ../include/asm-generic/bitops/fls.h
+LIB_H += ../include/asm-generic/bitops/__ffs.h
+LIB_H += ../include/asm-generic/bitops/__fls.h
+LIB_H += ../include/asm-generic/bitops.h
 LIB_H += ../include/linux/compiler.h
+LIB_H += ../include/linux/log2.h
 LIB_H += util/include/linux/const.h
 LIB_H += util/include/linux/ctype.h
 LIB_H += util/include/linux/kernel.h
@@ -335,6 +343,7 @@ LIB_OBJS += $(OUTPUT)util/event.o
 LIB_OBJS += $(OUTPUT)util/evlist.o
 LIB_OBJS += $(OUTPUT)util/evsel.o
 LIB_OBJS += $(OUTPUT)util/exec_cmd.o
+LIB_OBJS += $(OUTPUT)util/find_next_bit.o
 LIB_OBJS += $(OUTPUT)util/help.o
 LIB_OBJS += $(OUTPUT)util/kallsyms.o
 LIB_OBJS += $(OUTPUT)util/levenshtein.o
@@ -458,7 +467,6 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
 endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
 BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
 BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
 BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
@@ -735,6 +743,9 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS
 $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
+$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
 $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
 
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 2465141..6c14afe 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -13,6 +13,7 @@
 #include "../util/cloexec.h"
 #include "bench.h"
 #include "mem-memcpy-arch.h"
+#include "mem-memset-arch.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -48,20 +49,24 @@ static const struct option options[] = {
 };
 
 typedef void *(*memcpy_t)(void *, const void *, size_t);
+typedef void *(*memset_t)(void *, int, size_t);
 
 struct routine {
 	const char *name;
 	const char *desc;
-	memcpy_t fn;
+	union {
+		memcpy_t memcpy;
+		memset_t memset;
+	} fn;
 };
 
-struct routine routines[] = {
-	{ "default",
-	  "Default memcpy() provided by glibc",
-	  memcpy },
+struct routine memcpy_routines[] = {
+	{ .name = "default",
+	  .desc = "Default memcpy() provided by glibc",
+	  .fn.memcpy = memcpy },
 #ifdef HAVE_ARCH_X86_64_SUPPORT
 
-#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
+#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
 #include "mem-memcpy-x86-64-asm-def.h"
 #undef MEMCPY_FN
 
@@ -69,7 +74,7 @@ struct routine routines[] = {
 
 	{ NULL,
 	  NULL,
-	  NULL   }
+	  {NULL}   }
 };
 
 static const char * const bench_mem_memcpy_usage[] = {
@@ -110,63 +115,6 @@ static double timeval2double(struct timeval *ts)
 		(double)ts->tv_usec / (double)1000000;
 }
 
-static void alloc_mem(void **dst, void **src, size_t length)
-{
-	*dst = zalloc(length);
-	if (!*dst)
-		die("memory allocation failed - maybe length is too large?\n");
-
-	*src = zalloc(length);
-	if (!*src)
-		die("memory allocation failed - maybe length is too large?\n");
-	/* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
-	memset(*src, 0, length);
-}
-
-static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
-{
-	u64 cycle_start = 0ULL, cycle_end = 0ULL;
-	void *src = NULL, *dst = NULL;
-	int i;
-
-	alloc_mem(&src, &dst, len);
-
-	if (prefault)
-		fn(dst, src, len);
-
-	cycle_start = get_cycle();
-	for (i = 0; i < iterations; ++i)
-		fn(dst, src, len);
-	cycle_end = get_cycle();
-
-	free(src);
-	free(dst);
-	return cycle_end - cycle_start;
-}
-
-static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
-{
-	struct timeval tv_start, tv_end, tv_diff;
-	void *src = NULL, *dst = NULL;
-	int i;
-
-	alloc_mem(&src, &dst, len);
-
-	if (prefault)
-		fn(dst, src, len);
-
-	BUG_ON(gettimeofday(&tv_start, NULL));
-	for (i = 0; i < iterations; ++i)
-		fn(dst, src, len);
-	BUG_ON(gettimeofday(&tv_end, NULL));
-
-	timersub(&tv_end, &tv_start, &tv_diff);
-
-	free(src);
-	free(dst);
-	return (double)((double)len / timeval2double(&tv_diff));
-}
-
 #define pf (no_prefault ? 0 : 1)
 
 #define print_bps(x) do {					\
@@ -180,16 +128,25 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
 			printf(" %14lf GB/Sec", x / K / K / K); \
 	} while (0)
 
-int bench_mem_memcpy(int argc, const char **argv,
-		     const char *prefix __maybe_unused)
+struct bench_mem_info {
+	const struct routine *routines;
+	u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault);
+	double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault);
+	const char *const *usage;
+};
+
+static int bench_mem_common(int argc, const char **argv,
+		     const char *prefix __maybe_unused,
+		     struct bench_mem_info *info)
 {
 	int i;
 	size_t len;
+	double totallen;
 	double result_bps[2];
 	u64 result_cycle[2];
 
 	argc = parse_options(argc, argv, options,
-			     bench_mem_memcpy_usage, 0);
+			     info->usage, 0);
 
 	if (no_prefault && only_prefault) {
 		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
@@ -200,6 +157,7 @@ int bench_mem_memcpy(int argc, const char **argv,
 		init_cycle();
 
 	len = (size_t)perf_atoll((char *)length_str);
+	totallen = (double)len * iterations;
 
 	result_cycle[0] = result_cycle[1] = 0ULL;
 	result_bps[0] = result_bps[1] = 0.0;
@@ -213,16 +171,16 @@ int bench_mem_memcpy(int argc, const char **argv,
 	if (only_prefault && no_prefault)
 		only_prefault = no_prefault = false;
 
-	for (i = 0; routines[i].name; i++) {
-		if (!strcmp(routines[i].name, routine))
+	for (i = 0; info->routines[i].name; i++) {
+		if (!strcmp(info->routines[i].name, routine))
 			break;
 	}
-	if (!routines[i].name) {
+	if (!info->routines[i].name) {
 		printf("Unknown routine:%s\n", routine);
 		printf("Available routines...\n");
-		for (i = 0; routines[i].name; i++) {
+		for (i = 0; info->routines[i].name; i++) {
 			printf("\t%s ... %s\n",
-			       routines[i].name, routines[i].desc);
+			       info->routines[i].name, info->routines[i].desc);
 		}
 		return 1;
 	}
@@ -234,25 +192,25 @@ int bench_mem_memcpy(int argc, const char **argv,
 		/* show both of results */
 		if (use_cycle) {
 			result_cycle[0] =
-				do_memcpy_cycle(routines[i].fn, len, false);
+				info->do_cycle(&info->routines[i], len, false);
 			result_cycle[1] =
-				do_memcpy_cycle(routines[i].fn, len, true);
+				info->do_cycle(&info->routines[i], len, true);
 		} else {
 			result_bps[0] =
-				do_memcpy_gettimeofday(routines[i].fn,
+				info->do_gettimeofday(&info->routines[i],
 						len, false);
 			result_bps[1] =
-				do_memcpy_gettimeofday(routines[i].fn,
+				info->do_gettimeofday(&info->routines[i],
 						len, true);
 		}
 	} else {
 		if (use_cycle) {
 			result_cycle[pf] =
-				do_memcpy_cycle(routines[i].fn,
+				info->do_cycle(&info->routines[i],
 						len, only_prefault);
 		} else {
 			result_bps[pf] =
-				do_memcpy_gettimeofday(routines[i].fn,
+				info->do_gettimeofday(&info->routines[i],
 						len, only_prefault);
 		}
 	}
@@ -263,10 +221,10 @@ int bench_mem_memcpy(int argc, const char **argv,
 			if (use_cycle) {
 				printf(" %14lf Cycle/Byte\n",
 					(double)result_cycle[0]
-					/ (double)len);
+					/ totallen);
 				printf(" %14lf Cycle/Byte (with prefault)\n",
 					(double)result_cycle[1]
-					/ (double)len);
+					/ totallen);
 			} else {
 				print_bps(result_bps[0]);
 				printf("\n");
@@ -277,7 +235,7 @@ int bench_mem_memcpy(int argc, const char **argv,
 			if (use_cycle) {
 				printf(" %14lf Cycle/Byte",
 					(double)result_cycle[pf]
-					/ (double)len);
+					/ totallen);
 			} else
 				print_bps(result_bps[pf]);
 
@@ -288,8 +246,8 @@ int bench_mem_memcpy(int argc, const char **argv,
 		if (!only_prefault && !no_prefault) {
 			if (use_cycle) {
 				printf("%lf %lf\n",
-					(double)result_cycle[0] / (double)len,
-					(double)result_cycle[1] / (double)len);
+					(double)result_cycle[0] / totallen,
+					(double)result_cycle[1] / totallen);
 			} else {
 				printf("%lf %lf\n",
 					result_bps[0], result_bps[1]);
@@ -297,7 +255,7 @@ int bench_mem_memcpy(int argc, const char **argv,
 		} else {
 			if (use_cycle) {
 				printf("%lf\n", (double)result_cycle[pf]
-					/ (double)len);
+					/ totallen);
 			} else
 				printf("%lf\n", result_bps[pf]);
 		}
@@ -310,3 +268,163 @@ int bench_mem_memcpy(int argc, const char **argv,
 
 	return 0;
 }
+
+static void memcpy_alloc_mem(void **dst, void **src, size_t length)
+{
+	*dst = zalloc(length);
+	if (!*dst)
+		die("memory allocation failed - maybe length is too large?\n");
+
+	*src = zalloc(length);
+	if (!*src)
+		die("memory allocation failed - maybe length is too large?\n");
+	/* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
+	memset(*src, 0, length);
+}
+
+static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	void *src = NULL, *dst = NULL;
+	memcpy_t fn = r->fn.memcpy;
+	int i;
+
+	memcpy_alloc_mem(&src, &dst, len);
+
+	if (prefault)
+		fn(dst, src, len);
+
+	cycle_start = get_cycle();
+	for (i = 0; i < iterations; ++i)
+		fn(dst, src, len);
+	cycle_end = get_cycle();
+
+	free(src);
+	free(dst);
+	return cycle_end - cycle_start;
+}
+
+static double do_memcpy_gettimeofday(const struct routine *r, size_t len,
+				     bool prefault)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	memcpy_t fn = r->fn.memcpy;
+	void *src = NULL, *dst = NULL;
+	int i;
+
+	memcpy_alloc_mem(&src, &dst, len);
+
+	if (prefault)
+		fn(dst, src, len);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < iterations; ++i)
+		fn(dst, src, len);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	free(src);
+	free(dst);
+	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+		     const char *prefix __maybe_unused)
+{
+	struct bench_mem_info info = {
+		.routines = memcpy_routines,
+		.do_cycle = do_memcpy_cycle,
+		.do_gettimeofday = do_memcpy_gettimeofday,
+		.usage = bench_mem_memcpy_usage,
+	};
+
+	return bench_mem_common(argc, argv, prefix, &info);
+}
+
+static void memset_alloc_mem(void **dst, size_t length)
+{
+	*dst = zalloc(length);
+	if (!*dst)
+		die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	memset_t fn = r->fn.memset;
+	void *dst = NULL;
+	int i;
+
+	memset_alloc_mem(&dst, len);
+
+	if (prefault)
+		fn(dst, -1, len);
+
+	cycle_start = get_cycle();
+	for (i = 0; i < iterations; ++i)
+		fn(dst, i, len);
+	cycle_end = get_cycle();
+
+	free(dst);
+	return cycle_end - cycle_start;
+}
+
+static double do_memset_gettimeofday(const struct routine *r, size_t len,
+				     bool prefault)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	memset_t fn = r->fn.memset;
+	void *dst = NULL;
+	int i;
+
+	memset_alloc_mem(&dst, len);
+
+	if (prefault)
+		fn(dst, -1, len);
+
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < iterations; ++i)
+		fn(dst, i, len);
+	BUG_ON(gettimeofday(&tv_end, NULL));
+
+	timersub(&tv_end, &tv_start, &tv_diff);
+
+	free(dst);
+	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
+}
+
+static const char * const bench_mem_memset_usage[] = {
+	"perf bench mem memset <options>",
+	NULL
+};
+
+static const struct routine memset_routines[] = {
+	{ .name ="default",
+	  .desc = "Default memset() provided by glibc",
+	  .fn.memset = memset },
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+
+#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
+#include "mem-memset-x86-64-asm-def.h"
+#undef MEMSET_FN
+
+#endif
+
+	{ .name = NULL,
+	  .desc = NULL,
+	  .fn.memset = NULL   }
+};
+
+int bench_mem_memset(int argc, const char **argv,
+		     const char *prefix __maybe_unused)
+{
+	struct bench_mem_info info = {
+		.routines = memset_routines,
+		.do_cycle = do_memset_cycle,
+		.do_gettimeofday = do_memset_gettimeofday,
+		.usage = bench_mem_memset_usage,
+	};
+
+	return bench_mem_common(argc, argv, prefix, &info);
+}
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
deleted file mode 100644
index 75fc3e6..0000000
--- a/tools/perf/bench/mem-memset.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * mem-memset.c
- *
- * memset: Simple memory set in various ways
- *
- * Trivial clone of mem-memcpy.c.
- */
-
-#include "../perf.h"
-#include "../util/util.h"
-#include "../util/parse-options.h"
-#include "../util/header.h"
-#include "../util/cloexec.h"
-#include "bench.h"
-#include "mem-memset-arch.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <errno.h>
-
-#define K 1024
-
-static const char	*length_str	= "1MB";
-static const char	*routine	= "default";
-static int		iterations	= 1;
-static bool		use_cycle;
-static int		cycle_fd;
-static bool		only_prefault;
-static bool		no_prefault;
-
-static const struct option options[] = {
-	OPT_STRING('l', "length", &length_str, "1MB",
-		    "Specify length of memory to set. "
-		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
-	OPT_STRING('r', "routine", &routine, "default",
-		    "Specify routine to set"),
-	OPT_INTEGER('i', "iterations", &iterations,
-		    "repeat memset() invocation this number of times"),
-	OPT_BOOLEAN('c', "cycle", &use_cycle,
-		    "Use cycles event instead of gettimeofday() for measuring"),
-	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
-		    "Show only the result with page faults before memset()"),
-	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
-		    "Show only the result without page faults before memset()"),
-	OPT_END()
-};
-
-typedef void *(*memset_t)(void *, int, size_t);
-
-struct routine {
-	const char *name;
-	const char *desc;
-	memset_t fn;
-};
-
-static const struct routine routines[] = {
-	{ "default",
-	  "Default memset() provided by glibc",
-	  memset },
-#ifdef HAVE_ARCH_X86_64_SUPPORT
-
-#define MEMSET_FN(fn, name, desc) { name, desc, fn },
-#include "mem-memset-x86-64-asm-def.h"
-#undef MEMSET_FN
-
-#endif
-
-	{ NULL,
-	  NULL,
-	  NULL   }
-};
-
-static const char * const bench_mem_memset_usage[] = {
-	"perf bench mem memset <options>",
-	NULL
-};
-
-static struct perf_event_attr cycle_attr = {
-	.type		= PERF_TYPE_HARDWARE,
-	.config		= PERF_COUNT_HW_CPU_CYCLES
-};
-
-static void init_cycle(void)
-{
-	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
-				       perf_event_open_cloexec_flag());
-
-	if (cycle_fd < 0 && errno == ENOSYS)
-		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
-	else
-		BUG_ON(cycle_fd < 0);
-}
-
-static u64 get_cycle(void)
-{
-	int ret;
-	u64 clk;
-
-	ret = read(cycle_fd, &clk, sizeof(u64));
-	BUG_ON(ret != sizeof(u64));
-
-	return clk;
-}
-
-static double timeval2double(struct timeval *ts)
-{
-	return (double)ts->tv_sec +
-		(double)ts->tv_usec / (double)1000000;
-}
-
-static void alloc_mem(void **dst, size_t length)
-{
-	*dst = zalloc(length);
-	if (!*dst)
-		die("memory allocation failed - maybe length is too large?\n");
-}
-
-static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault)
-{
-	u64 cycle_start = 0ULL, cycle_end = 0ULL;
-	void *dst = NULL;
-	int i;
-
-	alloc_mem(&dst, len);
-
-	if (prefault)
-		fn(dst, -1, len);
-
-	cycle_start = get_cycle();
-	for (i = 0; i < iterations; ++i)
-		fn(dst, i, len);
-	cycle_end = get_cycle();
-
-	free(dst);
-	return cycle_end - cycle_start;
-}
-
-static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
-{
-	struct timeval tv_start, tv_end, tv_diff;
-	void *dst = NULL;
-	int i;
-
-	alloc_mem(&dst, len);
-
-	if (prefault)
-		fn(dst, -1, len);
-
-	BUG_ON(gettimeofday(&tv_start, NULL));
-	for (i = 0; i < iterations; ++i)
-		fn(dst, i, len);
-	BUG_ON(gettimeofday(&tv_end, NULL));
-
-	timersub(&tv_end, &tv_start, &tv_diff);
-
-	free(dst);
-	return (double)((double)len / timeval2double(&tv_diff));
-}
-
-#define pf (no_prefault ? 0 : 1)
-
-#define print_bps(x) do {					\
-		if (x < K)					\
-			printf(" %14lf B/Sec", x);		\
-		else if (x < K * K)				\
-			printf(" %14lfd KB/Sec", x / K);	\
-		else if (x < K * K * K)				\
-			printf(" %14lf MB/Sec", x / K / K);	\
-		else						\
-			printf(" %14lf GB/Sec", x / K / K / K); \
-	} while (0)
-
-int bench_mem_memset(int argc, const char **argv,
-		     const char *prefix __maybe_unused)
-{
-	int i;
-	size_t len;
-	double result_bps[2];
-	u64 result_cycle[2];
-
-	argc = parse_options(argc, argv, options,
-			     bench_mem_memset_usage, 0);
-
-	if (no_prefault && only_prefault) {
-		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
-		return 1;
-	}
-
-	if (use_cycle)
-		init_cycle();
-
-	len = (size_t)perf_atoll((char *)length_str);
-
-	result_cycle[0] = result_cycle[1] = 0ULL;
-	result_bps[0] = result_bps[1] = 0.0;
-
-	if ((s64)len <= 0) {
-		fprintf(stderr, "Invalid length:%s\n", length_str);
-		return 1;
-	}
-
-	/* same to without specifying either of prefault and no-prefault */
-	if (only_prefault && no_prefault)
-		only_prefault = no_prefault = false;
-
-	for (i = 0; routines[i].name; i++) {
-		if (!strcmp(routines[i].name, routine))
-			break;
-	}
-	if (!routines[i].name) {
-		printf("Unknown routine:%s\n", routine);
-		printf("Available routines...\n");
-		for (i = 0; routines[i].name; i++) {
-			printf("\t%s ... %s\n",
-			       routines[i].name, routines[i].desc);
-		}
-		return 1;
-	}
-
-	if (bench_format == BENCH_FORMAT_DEFAULT)
-		printf("# Copying %s Bytes ...\n\n", length_str);
-
-	if (!only_prefault && !no_prefault) {
-		/* show both of results */
-		if (use_cycle) {
-			result_cycle[0] =
-				do_memset_cycle(routines[i].fn, len, false);
-			result_cycle[1] =
-				do_memset_cycle(routines[i].fn, len, true);
-		} else {
-			result_bps[0] =
-				do_memset_gettimeofday(routines[i].fn,
-						len, false);
-			result_bps[1] =
-				do_memset_gettimeofday(routines[i].fn,
-						len, true);
-		}
-	} else {
-		if (use_cycle) {
-			result_cycle[pf] =
-				do_memset_cycle(routines[i].fn,
-						len, only_prefault);
-		} else {
-			result_bps[pf] =
-				do_memset_gettimeofday(routines[i].fn,
-						len, only_prefault);
-		}
-	}
-
-	switch (bench_format) {
-	case BENCH_FORMAT_DEFAULT:
-		if (!only_prefault && !no_prefault) {
-			if (use_cycle) {
-				printf(" %14lf Cycle/Byte\n",
-					(double)result_cycle[0]
-					/ (double)len);
-				printf(" %14lf Cycle/Byte (with prefault)\n ",
-					(double)result_cycle[1]
-					/ (double)len);
-			} else {
-				print_bps(result_bps[0]);
-				printf("\n");
-				print_bps(result_bps[1]);
-				printf(" (with prefault)\n");
-			}
-		} else {
-			if (use_cycle) {
-				printf(" %14lf Cycle/Byte",
-					(double)result_cycle[pf]
-					/ (double)len);
-			} else
-				print_bps(result_bps[pf]);
-
-			printf("%s\n", only_prefault ? " (with prefault)" : "");
-		}
-		break;
-	case BENCH_FORMAT_SIMPLE:
-		if (!only_prefault && !no_prefault) {
-			if (use_cycle) {
-				printf("%lf %lf\n",
-					(double)result_cycle[0] / (double)len,
-					(double)result_cycle[1] / (double)len);
-			} else {
-				printf("%lf %lf\n",
-					result_bps[0], result_bps[1]);
-			}
-		} else {
-			if (use_cycle) {
-				printf("%lf\n", (double)result_cycle[pf]
-					/ (double)len);
-			} else
-				printf("%lf\n", result_bps[pf]);
-		}
-		break;
-	default:
-		/* reaching this means there's some disaster: */
-		die("unknown format: %d\n", bench_format);
-		break;
-	}
-
-	return 0;
-}
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 7038575..77d5cae 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -285,12 +285,11 @@ int cmd_buildid_cache(int argc, const char **argv,
 	struct str_node *pos;
 	int ret = 0;
 	bool force = false;
-	char debugdir[PATH_MAX];
 	char const *add_name_list_str = NULL,
 		   *remove_name_list_str = NULL,
 		   *missing_filename = NULL,
 		   *update_name_list_str = NULL,
-		   *kcore_filename;
+		   *kcore_filename = NULL;
 	char sbuf[STRERR_BUFSIZE];
 
 	struct perf_data_file file = {
@@ -335,13 +334,11 @@ int cmd_buildid_cache(int argc, const char **argv,
 
 	setup_pager();
 
-	snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
-
 	if (add_name_list_str) {
 		list = strlist__new(true, add_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__add_file(pos->s, debugdir)) {
+				if (build_id_cache__add_file(pos->s, buildid_dir)) {
 					if (errno == EEXIST) {
 						pr_debug("%s already in the cache\n",
 							 pos->s);
@@ -359,7 +356,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 		list = strlist__new(true, remove_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__remove_file(pos->s, debugdir)) {
+				if (build_id_cache__remove_file(pos->s, buildid_dir)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
 							 pos->s);
@@ -380,7 +377,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 		list = strlist__new(true, update_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__update_file(pos->s, debugdir)) {
+				if (build_id_cache__update_file(pos->s, buildid_dir)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
 							 pos->s);
@@ -395,7 +392,7 @@ int cmd_buildid_cache(int argc, const char **argv,
 	}
 
 	if (kcore_filename &&
-	    build_id_cache__add_kcore(kcore_filename, debugdir, force))
+	    build_id_cache__add_kcore(kcore_filename, buildid_dir, force))
 		pr_warning("Couldn't add %s\n", kcore_filename);
 
 out:
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 3c0f3d4..0894a81 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1293,7 +1293,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 		OPT_UINTEGER('d', "display", &kvm->display_time,
 			"time in seconds between display updates"),
 		OPT_STRING(0, "event", &kvm->report_event, "report event",
-			"event for reporting: vmexit, mmio, ioport"),
+			"event for reporting: "
+			"vmexit, mmio (x86 only), ioport (x86 only)"),
 		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
 			"vcpu id to report"),
 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 83a4835..badfabc 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2045,7 +2045,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	unsigned long before;
 	const bool forks = argc > 0;
 	bool draining = false;
-	char sbuf[STRERR_BUFSIZE];
 
 	trace->live = true;
 
@@ -2106,11 +2105,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		goto out_error_open;
 
 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
-	if (err < 0) {
-		fprintf(trace->output, "Couldn't mmap the events: %s\n",
-			strerror_r(errno, sbuf, sizeof(sbuf)));
-		goto out_delete_evlist;
-	}
+	if (err < 0)
+		goto out_error_mmap;
 
 	perf_evlist__enable(evlist);
 
@@ -2210,6 +2206,10 @@ out_error_tp:
 	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
 	goto out_error;
 
+out_error_mmap:
+	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
+	goto out_error;
+
 out_error_open:
 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
 
@@ -2485,7 +2485,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 			.user_freq     = UINT_MAX,
 			.user_interval = ULLONG_MAX,
 			.no_buffering  = true,
-			.mmap_pages    = 1024,
+			.mmap_pages    = UINT_MAX,
 		},
 		.output = stdout,
 		.show_comm = true,
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 452a847..3700a7f 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -200,6 +200,16 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 				*envchanged = 1;
 			(*argv)++;
 			(*argc)--;
+		} else if (!strcmp(cmd, "--buildid-dir")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --buildid-dir.\n");
+				usage(perf_usage_string);
+			}
+			set_buildid_dir((*argv)[1]);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
 		} else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
 			perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR));
 			fprintf(stderr, "dir: %s\n", debugfs_mountpoint);
@@ -499,7 +509,7 @@ int main(int argc, const char **argv)
 	}
 	if (!prefixcmp(cmd, "trace")) {
 #ifdef HAVE_LIBAUDIT_SUPPORT
-		set_buildid_dir();
+		set_buildid_dir(NULL);
 		setup_path();
 		argv[0] = "trace";
 		return cmd_trace(argc, argv, NULL);
@@ -514,7 +524,7 @@ int main(int argc, const char **argv)
 	argc--;
 	handle_options(&argv, &argc, NULL);
 	commit_pager_choice();
-	set_buildid_dir();
+	set_buildid_dir(NULL);
 
 	if (argc > 0) {
 		if (!prefixcmp(argv[0], "--"))
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index f710b92..d3095da 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -5,7 +5,7 @@ group_fd=-1
 flags=0|8
 cpu=*
 type=0|1
-size=96
+size=104
 config=0
 sample_period=4000
 sample_type=263
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
index dc3ada2..872ed7e 100644
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat
@@ -5,7 +5,7 @@ group_fd=-1
 flags=0|8
 cpu=*
 type=0
-size=96
+size=104
 config=0
 sample_period=0
 sample_type=0
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 502daff..e6bb04b 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1252,7 +1252,7 @@ static int hists__browser_title(struct hists *hists,
 
 	nr_samples = convert_unit(nr_samples, &unit);
 	printed = scnprintf(bf, size,
-			   "Samples: %lu%c of event '%s', Event count (approx.): %lu",
+			   "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64,
 			   nr_samples, unit, ev_name, nr_events);
 
 
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 2af1837..dc0d095 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -162,8 +162,8 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
 		return ret;
 
 	nr_members = evsel->nr_members;
-	fields_a = calloc(sizeof(*fields_a), nr_members);
-	fields_b = calloc(sizeof(*fields_b), nr_members);
+	fields_a = calloc(nr_members, sizeof(*fields_a));
+	fields_b = calloc(nr_members, sizeof(*fields_b));
 
 	if (!fields_a || !fields_b)
 		goto out;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e8d79e5..0c72680 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -410,21 +410,18 @@ int perf_session__cache_build_ids(struct perf_session *session)
 {
 	struct rb_node *nd;
 	int ret;
-	char debugdir[PATH_MAX];
 
 	if (no_buildid_cache)
 		return 0;
 
-	snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
-
-	if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
+	if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
 		return -1;
 
-	ret = machine__cache_build_ids(&session->machines.host, debugdir);
+	ret = machine__cache_build_ids(&session->machines.host, buildid_dir);
 
 	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret |= machine__cache_build_ids(pos, debugdir);
+		ret |= machine__cache_build_ids(pos, buildid_dir);
 	}
 	return ret ? -1 : 0;
 }
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index cf524a3..64b377e 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -77,7 +77,7 @@ int parse_callchain_record_opt(const char *arg)
 				ret = 0;
 			} else
 				pr_err("callchain: No more arguments "
-				       "needed for -g fp\n");
+				       "needed for --call-graph fp\n");
 			break;
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 57ff826..e18f653 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -522,7 +522,7 @@ static int buildid_dir_command_config(const char *var, const char *value,
 	const char *v;
 
 	/* same dir for all commands */
-	if (!prefixcmp(var, "buildid.") && !strcmp(var + 8, "dir")) {
+	if (!strcmp(var, "buildid.dir")) {
 		v = perf_config_dirname(var, value);
 		if (!v)
 			return -1;
@@ -539,12 +539,14 @@ static void check_buildid_dir_config(void)
 	perf_config(buildid_dir_command_config, &c);
 }
 
-void set_buildid_dir(void)
+void set_buildid_dir(const char *dir)
 {
-	buildid_dir[0] = '\0';
+	if (dir)
+		scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
 
 	/* try config file */
-	check_buildid_dir_config();
+	if (buildid_dir[0] == '\0')
+		check_buildid_dir_config();
 
 	/* default to $HOME/.debug */
 	if (buildid_dir[0] == '\0') {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index cfbe2b9..cbab1fb 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -8,6 +8,7 @@
  */
 #include "util.h"
 #include <api/fs/debugfs.h>
+#include <api/fs/fs.h>
 #include <poll.h>
 #include "cpumap.h"
 #include "thread_map.h"
@@ -24,6 +25,7 @@
 
 #include <linux/bitops.h>
 #include <linux/hash.h>
+#include <linux/log2.h>
 
 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
@@ -892,10 +894,24 @@ out_unmap:
 
 static size_t perf_evlist__mmap_size(unsigned long pages)
 {
-	/* 512 kiB: default amount of unprivileged mlocked memory */
-	if (pages == UINT_MAX)
-		pages = (512 * 1024) / page_size;
-	else if (!is_power_of_2(pages))
+	if (pages == UINT_MAX) {
+		int max;
+
+		if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+			/*
+			 * Pick a once upon a time good value, i.e. things look
+			 * strange since we can't read a sysctl value, but lets not
+			 * die yet...
+			 */
+			max = 512;
+		} else {
+			max -= (page_size / 1024);
+		}
+
+		pages = (max * 1024) / page_size;
+		if (!is_power_of_2(pages))
+			pages = rounddown_pow_of_two(pages);
+	} else if (!is_power_of_2(pages))
 		return 0;
 
 	return (pages + 1) * page_size;
@@ -932,7 +948,7 @@ static long parse_pages_arg(const char *str, unsigned long min,
 		/* leave number of pages at 0 */
 	} else if (!is_power_of_2(pages)) {
 		/* round pages up to next power of 2 */
-		pages = next_pow2_l(pages);
+		pages = roundup_pow_of_two(pages);
 		if (!pages)
 			return -EINVAL;
 		pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
@@ -1483,6 +1499,37 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
 	return 0;
 }
 
+int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
+{
+	char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
+	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
+
+	switch (err) {
+	case EPERM:
+		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
+		printed += scnprintf(buf + printed, size - printed,
+				     "Error:\t%s.\n"
+				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
+				     "Hint:\tTried using %zd kB.\n",
+				     emsg, pages_max_per_user, pages_attempted);
+
+		if (pages_attempted >= pages_max_per_user) {
+			printed += scnprintf(buf + printed, size - printed,
+					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
+					     pages_max_per_user + pages_attempted);
+		}
+
+		printed += scnprintf(buf + printed, size - printed,
+				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
+		break;
+	default:
+		scnprintf(buf, size, "%s", emsg);
+		break;
+	}
+
+	return 0;
+}
+
 void perf_evlist__to_front(struct perf_evlist *evlist,
 			   struct perf_evsel *move_evsel)
 {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 649b0c5..0ba93f6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -185,6 +185,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
 
 int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size);
 int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
+int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
 
 static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
 {
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
deleted file mode 100644
index c329416..0000000
--- a/tools/perf/util/include/linux/bitops.h
+++ /dev/null
@@ -1,162 +0,0 @@
-#ifndef _PERF_LINUX_BITOPS_H_
-#define _PERF_LINUX_BITOPS_H_
-
-#include <linux/kernel.h>
-#include <linux/compiler.h>
-#include <asm/hweight.h>
-
-#ifndef __WORDSIZE
-#define __WORDSIZE (__SIZEOF_LONG__ * 8)
-#endif
-
-#define BITS_PER_LONG __WORDSIZE
-#define BITS_PER_BYTE           8
-#define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
-#define BITS_TO_U64(nr)         DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
-#define BITS_TO_U32(nr)         DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
-#define BITS_TO_BYTES(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE)
-#define BIT_WORD(nr)            ((nr) / BITS_PER_LONG)
-#define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_LONG))
-
-#define for_each_set_bit(bit, addr, size) \
-	for ((bit) = find_first_bit((addr), (size));		\
-	     (bit) < (size);					\
-	     (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_from(bit, addr, size) \
-	for ((bit) = find_next_bit((addr), (size), (bit));	\
-	     (bit) < (size);					\
-	     (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-static inline void set_bit(int nr, unsigned long *addr)
-{
-	addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
-}
-
-static inline void clear_bit(int nr, unsigned long *addr)
-{
-	addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
-}
-
-static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
-{
-	return ((1UL << (nr % BITS_PER_LONG)) &
-		(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
-}
-
-static inline unsigned long hweight_long(unsigned long w)
-{
-	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
-}
-
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static __always_inline unsigned long __ffs(unsigned long word)
-{
-	int num = 0;
-
-#if BITS_PER_LONG == 64
-	if ((word & 0xffffffff) == 0) {
-		num += 32;
-		word >>= 32;
-	}
-#endif
-	if ((word & 0xffff) == 0) {
-		num += 16;
-		word >>= 16;
-	}
-	if ((word & 0xff) == 0) {
-		num += 8;
-		word >>= 8;
-	}
-	if ((word & 0xf) == 0) {
-		num += 4;
-		word >>= 4;
-	}
-	if ((word & 0x3) == 0) {
-		num += 2;
-		word >>= 2;
-	}
-	if ((word & 0x1) == 0)
-		num += 1;
-	return num;
-}
-
-typedef const unsigned long __attribute__((__may_alias__)) long_alias_t;
-
-/*
- * Find the first set bit in a memory region.
- */
-static inline unsigned long
-find_first_bit(const unsigned long *addr, unsigned long size)
-{
-	long_alias_t *p = (long_alias_t *) addr;
-	unsigned long result = 0;
-	unsigned long tmp;
-
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-
-	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found:
-	return result + __ffs(tmp);
-}
-
-/*
- * Find the next set bit in a memory region.
- */
-static inline unsigned long
-find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)
-{
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + __ffs(tmp);
-}
-
-#endif
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 15dd0a9..94de3e4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1385,19 +1385,46 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 static int add_callchain_ip(struct thread *thread,
 			    struct symbol **parent,
 			    struct addr_location *root_al,
-			    int cpumode,
+			    bool branch_history,
 			    u64 ip)
 {
 	struct addr_location al;
 
 	al.filtered = 0;
 	al.sym = NULL;
-	if (cpumode == -1)
+	if (branch_history)
 		thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
 						   ip, &al);
-	else
+	else {
+		u8 cpumode = PERF_RECORD_MISC_USER;
+
+		if (ip >= PERF_CONTEXT_MAX) {
+			switch (ip) {
+			case PERF_CONTEXT_HV:
+				cpumode = PERF_RECORD_MISC_HYPERVISOR;
+				break;
+			case PERF_CONTEXT_KERNEL:
+				cpumode = PERF_RECORD_MISC_KERNEL;
+				break;
+			case PERF_CONTEXT_USER:
+				cpumode = PERF_RECORD_MISC_USER;
+				break;
+			default:
+				pr_debug("invalid callchain context: "
+					 "%"PRId64"\n", (s64) ip);
+				/*
+				 * It seems the callchain is corrupted.
+				 * Discard all.
+				 */
+				callchain_cursor_reset(&callchain_cursor);
+				return 1;
+			}
+			return 0;
+		}
 		thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
 				   ip, &al);
+	}
+
 	if (al.sym != NULL) {
 		if (sort__has_parent && !*parent &&
 		    symbol__match_regex(al.sym, &parent_regex))
@@ -1480,11 +1507,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 					     struct addr_location *root_al,
 					     int max_stack)
 {
-	u8 cpumode = PERF_RECORD_MISC_USER;
 	int chain_nr = min(max_stack, (int)chain->nr);
-	int i;
-	int j;
-	int err;
+	int i, j, err;
 	int skip_idx = -1;
 	int first_call = 0;
 
@@ -1542,10 +1566,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 
 		for (i = 0; i < nr; i++) {
 			err = add_callchain_ip(thread, parent, root_al,
-					       -1, be[i].to);
+					       true, be[i].to);
 			if (!err)
 				err = add_callchain_ip(thread, parent, root_al,
-						       -1, be[i].from);
+						       true, be[i].from);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -1574,36 +1598,10 @@ check_calls:
 #endif
 		ip = chain->ips[j];
 
-		if (ip >= PERF_CONTEXT_MAX) {
-			switch (ip) {
-			case PERF_CONTEXT_HV:
-				cpumode = PERF_RECORD_MISC_HYPERVISOR;
-				break;
-			case PERF_CONTEXT_KERNEL:
-				cpumode = PERF_RECORD_MISC_KERNEL;
-				break;
-			case PERF_CONTEXT_USER:
-				cpumode = PERF_RECORD_MISC_USER;
-				break;
-			default:
-				pr_debug("invalid callchain context: "
-					 "%"PRId64"\n", (s64) ip);
-				/*
-				 * It seems the callchain is corrupted.
-				 * Discard all.
-				 */
-				callchain_cursor_reset(&callchain_cursor);
-				return 0;
-			}
-			continue;
-		}
+		err = add_callchain_ip(thread, parent, root_al, false, ip);
 
-		err = add_callchain_ip(thread, parent, root_al,
-				       cpumode, ip);
-		if (err == -EINVAL)
-			break;
 		if (err)
-			return err;
+			return (err < 0) ? err : 0;
 	}
 
 	return 0;
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index cf69325..8acd0df 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -137,16 +137,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
 
 static int get_max_rate(unsigned int *rate)
 {
-	char path[PATH_MAX];
-	const char *procfs = procfs__mountpoint();
-
-	if (!procfs)
-		return -1;
-
-	snprintf(path, PATH_MAX,
-		 "%s/sys/kernel/perf_event_max_sample_rate", procfs);
-
-	return filename__read_int(path, (int *) rate);
+	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
 }
 
 static int record_opts__config_freq(struct record_opts *opts)
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index e73b6a5..c93fb0c 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -20,7 +20,7 @@
 
 struct a2l_data {
 	const char 	*input;
-	unsigned long 	addr;
+	u64	 	addr;
 
 	bool 		found;
 	const char 	*filename;
@@ -147,7 +147,7 @@ static void addr2line_cleanup(struct a2l_data *a2l)
 	free(a2l);
 }
 
-static int addr2line(const char *dso_name, unsigned long addr,
+static int addr2line(const char *dso_name, u64 addr,
 		     char **file, unsigned int *line, struct dso *dso)
 {
 	int ret = 0;
@@ -193,7 +193,7 @@ void dso__free_a2l(struct dso *dso)
 
 #else /* HAVE_LIBBFD_SUPPORT */
 
-static int addr2line(const char *dso_name, unsigned long addr,
+static int addr2line(const char *dso_name, u64 addr,
 		     char **file, unsigned int *line_nr,
 		     struct dso *dso __maybe_unused)
 {
@@ -252,7 +252,7 @@ void dso__free_a2l(struct dso *dso __maybe_unused)
  */
 #define A2L_FAIL_LIMIT 123
 
-char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
 		  bool show_sym)
 {
 	char *file = NULL;
@@ -293,10 +293,10 @@ out:
 		dso__free_a2l(dso);
 	}
 	if (sym) {
-		if (asprintf(&srcline, "%s+%ld", show_sym ? sym->name : "",
+		if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "",
 					addr - sym->start) < 0)
 			return SRCLINE_UNKNOWN;
-	} else if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0)
+	} else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)
 		return SRCLINE_UNKNOWN;
 	return srcline;
 }
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index fa585c6..d7efb03 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -129,6 +129,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
 
 		for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
 			void *tmp;
+			long offset;
 
 			if (need_swap) {
 				phdr->p_type = bswap_32(phdr->p_type);
@@ -140,12 +141,13 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
 				continue;
 
 			buf_size = phdr->p_filesz;
+			offset = phdr->p_offset;
 			tmp = realloc(buf, buf_size);
 			if (tmp == NULL)
 				goto out_free;
 
 			buf = tmp;
-			fseek(fp, phdr->p_offset, SEEK_SET);
+			fseek(fp, offset, SEEK_SET);
 			if (fread(buf, buf_size, 1, fp) != 1)
 				goto out_free;
 
@@ -178,6 +180,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
 
 		for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
 			void *tmp;
+			long offset;
 
 			if (need_swap) {
 				phdr->p_type = bswap_32(phdr->p_type);
@@ -189,12 +192,13 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
 				continue;
 
 			buf_size = phdr->p_filesz;
+			offset = phdr->p_offset;
 			tmp = realloc(buf, buf_size);
 			if (tmp == NULL)
 				goto out_free;
 
 			buf = tmp;
-			fseek(fp, phdr->p_offset, SEEK_SET);
+			fseek(fp, offset, SEEK_SET);
 			if (fread(buf, buf_size, 1, fp) != 1)
 				goto out_free;
 
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d5eab3f3..b86744f 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -442,23 +442,6 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
 	return (unsigned long) -1;
 }
 
-int filename__read_int(const char *filename, int *value)
-{
-	char line[64];
-	int fd = open(filename, O_RDONLY), err = -1;
-
-	if (fd < 0)
-		return -1;
-
-	if (read(fd, line, sizeof(line)) > 0) {
-		*value = atoi(line);
-		err = 0;
-	}
-
-	close(fd);
-	return err;
-}
-
 int filename__read_str(const char *filename, char **buf, size_t *sizep)
 {
 	size_t size = 0, alloc_size = 0;
@@ -523,16 +506,9 @@ const char *get_filename_for_perf_kvm(void)
 
 int perf_event_paranoid(void)
 {
-	char path[PATH_MAX];
-	const char *procfs = procfs__mountpoint();
 	int value;
 
-	if (!procfs)
-		return INT_MAX;
-
-	scnprintf(path, PATH_MAX, "%s/sys/kernel/perf_event_paranoid", procfs);
-
-	if (filename__read_int(path, &value))
+	if (sysctl__read_int("kernel/perf_event_paranoid", &value))
 		return INT_MAX;
 
 	return value;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 419bee0..027a515 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -153,7 +153,7 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)))
 extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
 
 extern int prefixcmp(const char *str, const char *prefix);
-extern void set_buildid_dir(void);
+extern void set_buildid_dir(const char *dir);
 
 static inline const char *skip_prefix(const char *str, const char *prefix)
 {
@@ -269,35 +269,6 @@ void event_attr_init(struct perf_event_attr *attr);
 #define _STR(x) #x
 #define STR(x) _STR(x)
 
-/*
- *  Determine whether some value is a power of two, where zero is
- * *not* considered a power of two.
- */
-
-static inline __attribute__((const))
-bool is_power_of_2(unsigned long n)
-{
-	return (n != 0 && ((n & (n - 1)) == 0));
-}
-
-static inline unsigned next_pow2(unsigned x)
-{
-	if (!x)
-		return 1;
-	return 1ULL << (32 - __builtin_clz(x - 1));
-}
-
-static inline unsigned long next_pow2_l(unsigned long x)
-{
-#if BITS_PER_LONG == 64
-	if (x <= (1UL << 31))
-		return next_pow2(x);
-	return (unsigned long)next_pow2(x >> 32) << 32;
-#else
-	return next_pow2(x);
-#endif
-}
-
 size_t hex_width(u64 v);
 int hex2u64(const char *ptr, u64 *val);
 
@@ -339,11 +310,10 @@ static inline int path__join3(char *bf, size_t size,
 struct dso;
 struct symbol;
 
-char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
 		  bool show_sym);
 void free_srcline(char *srcline);
 
-int filename__read_int(const char *filename, int *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 int perf_event_paranoid(void);
 
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 458d69b..75e66de 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -22,13 +22,13 @@
 
 static void cpuidle_cpu_output(unsigned int cpu, int verbose)
 {
-	int idlestates, idlestate;
+	unsigned int idlestates, idlestate;
 	char *tmp;
 
 	printf(_ ("Analyzing CPU %d:\n"), cpu);
 
 	idlestates = sysfs_get_idlestate_count(cpu);
-	if (idlestates < 1) {
+	if (idlestates == 0) {
 		printf(_("CPU %u: No idle states\n"), cpu);
 		return;
 	}
@@ -100,10 +100,10 @@ static void cpuidle_general_output(void)
 static void proc_cpuidle_cpu_output(unsigned int cpu)
 {
 	long max_allowed_cstate = 2000000000;
-	int cstate, cstates;
+	unsigned int cstate, cstates;
 
 	cstates = sysfs_get_idlestate_count(cpu);
-	if (cstates < 1) {
+	if (cstates == 0) {
 		printf(_("CPU %u: No C-states info\n"), cpu);
 		return;
 	}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index b3831f4..4e51122 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,22 +1,23 @@
 TARGETS = breakpoints
 TARGETS += cpu-hotplug
 TARGETS += efivarfs
+TARGETS += exec
+TARGETS += firmware
+TARGETS += ftrace
 TARGETS += kcmp
 TARGETS += memfd
 TARGETS += memory-hotplug
-TARGETS += mqueue
 TARGETS += mount
+TARGETS += mqueue
 TARGETS += net
+TARGETS += powerpc
 TARGETS += ptrace
+TARGETS += size
+TARGETS += sysctl
 TARGETS += timers
-TARGETS += vm
-TARGETS += powerpc
 TARGETS += user
-TARGETS += sysctl
-TARGETS += firmware
-TARGETS += ftrace
-TARGETS += exec
-TARGETS += size
+TARGETS += vm
+#Please keep the TARGETS list alphabetically sorted
 
 TARGETS_HOTPLUG = cpu-hotplug
 TARGETS_HOTPLUG += memory-hotplug
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
index 1b3ff2f..5177850 100644
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -6,6 +6,8 @@
 #include <sys/types.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <fcntl.h>
@@ -32,11 +34,14 @@
 # define CLONE_NEWPID 0x20000000
 #endif
 
+#ifndef MS_REC
+# define MS_REC 16384
+#endif
 #ifndef MS_RELATIME
-#define MS_RELATIME (1 << 21)
+# define MS_RELATIME (1 << 21)
 #endif
 #ifndef MS_STRICTATIME
-#define MS_STRICTATIME (1 << 24)
+# define MS_STRICTATIME (1 << 24)
 #endif
 
 static void die(char *fmt, ...)
@@ -48,17 +53,14 @@ static void die(char *fmt, ...)
 	exit(EXIT_FAILURE);
 }
 
-static void write_file(char *filename, char *fmt, ...)
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
 {
 	char buf[4096];
 	int fd;
 	ssize_t written;
 	int buf_len;
-	va_list ap;
 
-	va_start(ap, fmt);
 	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
-	va_end(ap);
 	if (buf_len < 0) {
 		die("vsnprintf failed: %s\n",
 		    strerror(errno));
@@ -69,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...)
 
 	fd = open(filename, O_WRONLY);
 	if (fd < 0) {
+		if ((errno == ENOENT) && enoent_ok)
+			return;
 		die("open of %s failed: %s\n",
 		    filename, strerror(errno));
 	}
@@ -87,6 +91,65 @@ static void write_file(char *filename, char *fmt, ...)
 	}
 }
 
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(true, filename, fmt, ap);
+	va_end(ap);
+
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(false, filename, fmt, ap);
+	va_end(ap);
+
+}
+
+static int read_mnt_flags(const char *path)
+{
+	int ret;
+	struct statvfs stat;
+	int mnt_flags;
+
+	ret = statvfs(path, &stat);
+	if (ret != 0) {
+		die("statvfs of %s failed: %s\n",
+			path, strerror(errno));
+	}
+	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
+			ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
+			ST_SYNCHRONOUS | ST_MANDLOCK)) {
+		die("Unrecognized mount flags\n");
+	}
+	mnt_flags = 0;
+	if (stat.f_flag & ST_RDONLY)
+		mnt_flags |= MS_RDONLY;
+	if (stat.f_flag & ST_NOSUID)
+		mnt_flags |= MS_NOSUID;
+	if (stat.f_flag & ST_NODEV)
+		mnt_flags |= MS_NODEV;
+	if (stat.f_flag & ST_NOEXEC)
+		mnt_flags |= MS_NOEXEC;
+	if (stat.f_flag & ST_NOATIME)
+		mnt_flags |= MS_NOATIME;
+	if (stat.f_flag & ST_NODIRATIME)
+		mnt_flags |= MS_NODIRATIME;
+	if (stat.f_flag & ST_RELATIME)
+		mnt_flags |= MS_RELATIME;
+	if (stat.f_flag & ST_SYNCHRONOUS)
+		mnt_flags |= MS_SYNCHRONOUS;
+	if (stat.f_flag & ST_MANDLOCK)
+		mnt_flags |= ST_MANDLOCK;
+
+	return mnt_flags;
+}
+
 static void create_and_enter_userns(void)
 {
 	uid_t uid;
@@ -100,13 +163,10 @@ static void create_and_enter_userns(void)
 			strerror(errno));
 	}
 
+	maybe_write_file("/proc/self/setgroups", "deny");
 	write_file("/proc/self/uid_map", "0 %d 1", uid);
 	write_file("/proc/self/gid_map", "0 %d 1", gid);
 
-	if (setgroups(0, NULL) != 0) {
-		die("setgroups failed: %s\n",
-			strerror(errno));
-	}
 	if (setgid(0) != 0) {
 		die ("setgid(0) failed %s\n",
 			strerror(errno));
@@ -118,7 +178,8 @@ static void create_and_enter_userns(void)
 }
 
 static
-bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
+bool test_unpriv_remount(const char *fstype, const char *mount_options,
+			 int mount_flags, int remount_flags, int invalid_flags)
 {
 	pid_t child;
 
@@ -151,9 +212,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
 			strerror(errno));
 	}
 
-	if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
-		die("mount of /tmp failed: %s\n",
-			strerror(errno));
+	if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
+		die("mount of %s with options '%s' on /tmp failed: %s\n",
+		    fstype,
+		    mount_options? mount_options : "",
+		    strerror(errno));
 	}
 
 	create_and_enter_userns();
@@ -181,62 +244,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
 
 static bool test_unpriv_remount_simple(int mount_flags)
 {
-	return test_unpriv_remount(mount_flags, mount_flags, 0);
+	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
 }
 
 static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
 {
-	return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
+	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
+				   invalid_flags);
+}
+
+static bool test_priv_mount_unpriv_remount(void)
+{
+	pid_t child;
+	int ret;
+	const char *orig_path = "/dev";
+	const char *dest_path = "/tmp";
+	int orig_mnt_flags, remount_mnt_flags;
+
+	child = fork();
+	if (child == -1) {
+		die("fork failed: %s\n",
+			strerror(errno));
+	}
+	if (child != 0) { /* parent */
+		pid_t pid;
+		int status;
+		pid = waitpid(child, &status, 0);
+		if (pid == -1) {
+			die("waitpid failed: %s\n",
+				strerror(errno));
+		}
+		if (pid != child) {
+			die("waited for %d got %d\n",
+				child, pid);
+		}
+		if (!WIFEXITED(status)) {
+			die("child did not terminate cleanly\n");
+		}
+		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+	}
+
+	orig_mnt_flags = read_mnt_flags(orig_path);
+
+	create_and_enter_userns();
+	ret = unshare(CLONE_NEWNS);
+	if (ret != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
+	if (ret != 0) {
+		die("recursive bind mount of %s onto %s failed: %s\n",
+			orig_path, dest_path, strerror(errno));
+	}
+
+	ret = mount(dest_path, dest_path, "none",
+		    MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
+	if (ret != 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp failed: %s\n",
+		    strerror(errno));
+	}
+
+	remount_mnt_flags = read_mnt_flags(dest_path);
+	if (orig_mnt_flags != remount_mnt_flags) {
+		die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
+			dest_path, orig_path);
+	}
+	exit(EXIT_SUCCESS);
 }
 
 int main(int argc, char **argv)
 {
-	if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
+	if (!test_unpriv_remount_simple(MS_RDONLY)) {
 		die("MS_RDONLY malfunctions\n");
 	}
-	if (!test_unpriv_remount_simple(MS_NODEV)) {
+	if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
 		die("MS_NODEV malfunctions\n");
 	}
-	if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
+	if (!test_unpriv_remount_simple(MS_NOSUID)) {
 		die("MS_NOSUID malfunctions\n");
 	}
-	if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
+	if (!test_unpriv_remount_simple(MS_NOEXEC)) {
 		die("MS_NOEXEC malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
-				       MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_RELATIME,
+				       MS_NOATIME))
 	{
 		die("MS_RELATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
-				       MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_STRICTATIME,
+				       MS_NOATIME))
 	{
 		die("MS_STRICTATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
-				       MS_STRICTATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_NOATIME,
+				       MS_STRICTATIME))
 	{
-		die("MS_RELATIME malfunctions\n");
+		die("MS_NOATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
-				       MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
+				       MS_NOATIME))
 	{
-		die("MS_RELATIME malfunctions\n");
+		die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
-				       MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
+				       MS_NOATIME))
 	{
-		die("MS_RELATIME malfunctions\n");
+		die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
-				       MS_STRICTATIME|MS_NODEV))
+	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
+				       MS_STRICTATIME))
 	{
-		die("MS_RELATIME malfunctions\n");
+		die("MS_NOATIME|MS_DIRATIME malfunctions\n");
 	}
-	if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
-				 MS_NOATIME|MS_NODEV))
+	if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
 	{
 		die("Default atime malfunctions\n");
 	}
+	if (!test_priv_mount_unpriv_remount()) {
+		die("Mount flags unexpectedly changed after remount\n");
+	}
 	return EXIT_SUCCESS;
 }
diff --git a/tools/thermal/tmon/sysfs.c b/tools/thermal/tmon/sysfs.c
index dfe4548..1c12536 100644
--- a/tools/thermal/tmon/sysfs.c
+++ b/tools/thermal/tmon/sysfs.c
@@ -446,7 +446,7 @@ int probe_thermal_sysfs(void)
 		return -1;
 	}
 
-	ptdata.tzi = calloc(sizeof(struct tz_info), ptdata.max_tz_instance+1);
+	ptdata.tzi = calloc(ptdata.max_tz_instance+1, sizeof(struct tz_info));
 	if (!ptdata.tzi) {
 		fprintf(stderr, "Err: allocate tz_info\n");
 		return -1;
@@ -454,8 +454,8 @@ int probe_thermal_sysfs(void)
 
 	/* we still show thermal zone information if there is no cdev */
 	if (ptdata.nr_cooling_dev) {
-		ptdata.cdi = calloc(sizeof(struct cdev_info),
-				ptdata.max_cdev_instance + 1);
+		ptdata.cdi = calloc(ptdata.max_cdev_instance + 1,
+				sizeof(struct cdev_info));
 		if (!ptdata.cdi) {
 			free(ptdata.tzi);
 			fprintf(stderr, "Err: allocate cdev_info\n");
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index 9325f46..505ad51 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -3,7 +3,7 @@ test: virtio_test vringh_test
 virtio_test: virtio_ring.o virtio_test.o
 vringh_test: vringh_test.o vringh.o virtio_ring.o
 
-CFLAGS += -g -O2 -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
+CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
 vpath %.c ../../drivers/virtio ../../drivers/vhost
 mod:
 	${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 8eb6421..a3e0701 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -6,6 +6,7 @@
 /* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
 #define list_add_tail(a, b) do {} while (0)
 #define list_del(a) do {} while (0)
+#define list_for_each_entry(a, b, c) while (0)
 /* end of stubs */
 
 struct virtio_device {
diff --git a/tools/virtio/linux/virtio_byteorder.h b/tools/virtio/linux/virtio_byteorder.h
new file mode 100644
index 0000000..9de9e6a
--- /dev/null
+++ b/tools/virtio/linux/virtio_byteorder.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_VIRTIO_BYTEORDER_STUB_H
+#define _LINUX_VIRTIO_BYTEORDER_STUB_H
+
+#include <asm/byteorder.h>
+#include "../../include/linux/byteorder/generic.h"
+#include "../../include/linux/virtio_byteorder.h"
+
+#endif
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 83b27e8..806d683 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -1,6 +1,72 @@
-#define VIRTIO_TRANSPORT_F_START	28
-#define VIRTIO_TRANSPORT_F_END		32
+#include <linux/virtio_byteorder.h>
+#include <linux/virtio.h>
+#include <uapi/linux/virtio_config.h>
+
+/*
+ * __virtio_test_bit - helper to test feature bits. For use by transports.
+ *                     Devices should normally use virtio_has_feature,
+ *                     which includes more checks.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline bool __virtio_test_bit(const struct virtio_device *vdev,
+				     unsigned int fbit)
+{
+	return vdev->features & (1ULL << fbit);
+}
+
+/**
+ * __virtio_set_bit - helper to set feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_set_bit(struct virtio_device *vdev,
+				    unsigned int fbit)
+{
+	vdev->features |= (1ULL << fbit);
+}
+
+/**
+ * __virtio_clear_bit - helper to clear feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_clear_bit(struct virtio_device *vdev,
+				      unsigned int fbit)
+{
+	vdev->features &= ~(1ULL << fbit);
+}
 
 #define virtio_has_feature(dev, feature) \
 	(__virtio_test_bit((dev), feature))
 
+static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
+{
+	return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
+{
+	return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
+{
+	return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
+{
+	return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
+{
+	return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
+{
+	return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
diff --git a/tools/virtio/uapi/linux/virtio_types.h b/tools/virtio/uapi/linux/virtio_types.h
new file mode 100644
index 0000000..e7a1096
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_types.h
@@ -0,0 +1 @@
+#include "../../include/uapi/linux/virtio_types.h"
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index db3437c..e044589 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -11,6 +11,7 @@
 #include <sys/types.h>
 #include <fcntl.h>
 #include <stdbool.h>
+#include <linux/virtio_types.h>
 #include <linux/vhost.h>
 #include <linux/virtio.h>
 #include <linux/virtio_ring.h>
@@ -227,6 +228,14 @@ const struct option longopts[] = {
 		.val = 'i',
 	},
 	{
+		.name = "virtio-1",
+		.val = '1',
+	},
+	{
+		.name = "no-virtio-1",
+		.val = '0',
+	},
+	{
 		.name = "delayed-interrupt",
 		.val = 'D',
 	},
@@ -243,6 +252,7 @@ static void help(void)
 	fprintf(stderr, "Usage: virtio_test [--help]"
 		" [--no-indirect]"
 		" [--no-event-idx]"
+		" [--no-virtio-1]"
 		" [--delayed-interrupt]"
 		"\n");
 }
@@ -251,7 +261,7 @@ int main(int argc, char **argv)
 {
 	struct vdev_info dev;
 	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
-		(1ULL << VIRTIO_RING_F_EVENT_IDX);
+		(1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
 	int o;
 	bool delayed = false;
 
@@ -272,6 +282,9 @@ int main(int argc, char **argv)
 		case 'i':
 			features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
 			break;
+		case '0':
+			features &= ~(1ULL << VIRTIO_F_VERSION_1);
+			break;
 		case 'D':
 			delayed = true;
 			break;
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index 9d4b1bc..5f94f51 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c
@@ -7,6 +7,7 @@
 #include <linux/virtio.h>
 #include <linux/vringh.h>
 #include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
 #include <linux/uaccess.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -131,7 +132,7 @@ static inline int vringh_get_head(struct vringh *vrh, u16 *head)
 	return 1;
 }
 
-static int parallel_test(unsigned long features,
+static int parallel_test(u64 features,
 			 bool (*getrange)(struct vringh *vrh,
 					  u64 addr, struct vringh_range *r),
 			 bool fast_vringh)
@@ -456,6 +457,8 @@ int main(int argc, char *argv[])
 			__virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
 		else if (strcmp(argv[1], "--eventidx") == 0)
 			__virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX);
+		else if (strcmp(argv[1], "--virtio-1") == 0)
+			__virtio_set_bit(&vdev, VIRTIO_F_VERSION_1);
 		else if (strcmp(argv[1], "--slow-range") == 0)
 			getrange = getrange_slow;
 		else if (strcmp(argv[1], "--fast-vringh") == 0)
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 22fa819..1c0772b 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer)
 
 static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 {
+	int ret;
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 
 	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
-	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-			    timer->irq->irq,
-			    timer->irq->level);
+	ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+				  timer->irq->irq,
+				  timer->irq->level);
+	WARN_ON(ret);
 }
 
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 	timer_disarm(timer);
 }
 
-int kvm_timer_init(struct kvm *kvm)
+void kvm_timer_enable(struct kvm *kvm)
 {
-	if (timecounter && wqueue) {
-		kvm->arch.timer.cntvoff = kvm_phys_timer_read();
+	if (kvm->arch.timer.enabled)
+		return;
+
+	/*
+	 * There is a potential race here between VCPUs starting for the first
+	 * time, which may be enabling the timer multiple times.  That doesn't
+	 * hurt though, because we're just setting a variable to the same
+	 * variable that it already was.  The important thing is that all
+	 * VCPUs have the enabled variable set, before entering the guest, if
+	 * the arch timers are enabled.
+	 */
+	if (timecounter && wqueue)
 		kvm->arch.timer.enabled = 1;
-	}
+}
 
-	return 0;
+void kvm_timer_init(struct kvm *kvm)
+{
+	kvm->arch.timer.cntvoff = kvm_phys_timer_read();
 }
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index aacdb59..03affc7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -91,6 +91,7 @@
 #define ACCESS_WRITE_VALUE	(3 << 1)
 #define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
 
+static int vgic_init(struct kvm *kvm);
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
@@ -1607,7 +1608,7 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 	}
 }
 
-static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
+static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 				  unsigned int irq_num, bool level)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1643,9 +1644,10 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 			vgic_dist_irq_clear_level(vcpu, irq_num);
 			if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
 				vgic_dist_irq_clear_pending(vcpu, irq_num);
-		} else {
-			vgic_dist_irq_clear_pending(vcpu, irq_num);
 		}
+
+		ret = false;
+		goto out;
 	}
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
@@ -1672,7 +1674,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 out:
 	spin_unlock(&dist->lock);
 
-	return ret;
+	return ret ? cpuid : -EINVAL;
 }
 
 /**
@@ -1692,11 +1694,26 @@ out:
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (likely(vgic_initialized(kvm)) &&
-	    vgic_update_irq_pending(kvm, cpuid, irq_num, level))
-		vgic_kick_vcpus(kvm);
+	int ret = 0;
+	int vcpu_id;
 
-	return 0;
+	if (unlikely(!vgic_initialized(kvm))) {
+		mutex_lock(&kvm->lock);
+		ret = vgic_init(kvm);
+		mutex_unlock(&kvm->lock);
+
+		if (ret)
+			goto out;
+	}
+
+	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
+	if (vcpu_id >= 0) {
+		/* kick the specified vcpu */
+		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
+	}
+
+out:
+	return ret;
 }
 
 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1726,39 +1743,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
 	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
 	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
-	vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+	vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
 	if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
 		kvm_vgic_vcpu_destroy(vcpu);
 		return -ENOMEM;
 	}
 
-	return 0;
-}
-
-/**
- * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
- * @vcpu: pointer to the vcpu struct
- *
- * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
- * this vcpu and enable the VGIC for this VCPU
- */
-static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i;
-
-	for (i = 0; i < dist->nr_irqs; i++) {
-		if (i < VGIC_NR_PPIS)
-			vgic_bitmap_set_irq_val(&dist->irq_enabled,
-						vcpu->vcpu_id, i, 1);
-		if (i < VGIC_NR_PRIVATE_IRQS)
-			vgic_bitmap_set_irq_val(&dist->irq_cfg,
-						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
-		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
-	}
+	memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
 
 	/*
 	 * Store the number of LRs per vcpu, so we don't have to go
@@ -1767,7 +1759,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	 */
 	vgic_cpu->nr_lr = vgic->nr_lr;
 
-	vgic_enable(vcpu);
+	return 0;
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
@@ -1798,20 +1790,21 @@ void kvm_vgic_destroy(struct kvm *kvm)
 	dist->irq_spi_cpu = NULL;
 	dist->irq_spi_target = NULL;
 	dist->irq_pending_on_cpu = NULL;
+	dist->nr_cpus = 0;
 }
 
 /*
  * Allocate and initialize the various data structures. Must be called
  * with kvm->lock held!
  */
-static int vgic_init_maps(struct kvm *kvm)
+static int vgic_init(struct kvm *kvm)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct kvm_vcpu *vcpu;
 	int nr_cpus, nr_irqs;
-	int ret, i;
+	int ret, i, vcpu_id;
 
-	if (dist->nr_cpus)	/* Already allocated */
+	if (vgic_initialized(kvm))
 		return 0;
 
 	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
@@ -1859,16 +1852,28 @@ static int vgic_init_maps(struct kvm *kvm)
 	if (ret)
 		goto out;
 
-	kvm_for_each_vcpu(i, vcpu, kvm) {
+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+		vgic_set_target_reg(kvm, 0, i);
+
+	kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
 		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
 		if (ret) {
 			kvm_err("VGIC: Failed to allocate vcpu memory\n");
 			break;
 		}
-	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
+		for (i = 0; i < dist->nr_irqs; i++) {
+			if (i < VGIC_NR_PPIS)
+				vgic_bitmap_set_irq_val(&dist->irq_enabled,
+							vcpu->vcpu_id, i, 1);
+			if (i < VGIC_NR_PRIVATE_IRQS)
+				vgic_bitmap_set_irq_val(&dist->irq_cfg,
+							vcpu->vcpu_id, i,
+							VGIC_CFG_EDGE);
+		}
+
+		vgic_enable(vcpu);
+	}
 
 out:
 	if (ret)
@@ -1878,25 +1883,23 @@ out:
 }
 
 /**
- * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
  *
  * Map the virtual CPU interface into the VM before running any VCPUs.  We
  * can't do this at creation time, because user space must first set the
- * virtual CPU interface address in the guest physical address space.  Also
- * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ * virtual CPU interface address in the guest physical address space.
  */
-int kvm_vgic_init(struct kvm *kvm)
+int kvm_vgic_map_resources(struct kvm *kvm)
 {
-	struct kvm_vcpu *vcpu;
-	int ret = 0, i;
+	int ret = 0;
 
 	if (!irqchip_in_kernel(kvm))
 		return 0;
 
 	mutex_lock(&kvm->lock);
 
-	if (vgic_initialized(kvm))
+	if (vgic_ready(kvm))
 		goto out;
 
 	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
@@ -1906,7 +1909,11 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	ret = vgic_init_maps(kvm);
+	/*
+	 * Initialize the vgic if this hasn't already been done on demand by
+	 * accessing the vgic state from userspace.
+	 */
+	ret = vgic_init(kvm);
 	if (ret) {
 		kvm_err("Unable to allocate maps\n");
 		goto out;
@@ -1920,9 +1927,6 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		kvm_vgic_vcpu_init(vcpu);
-
 	kvm->arch.vgic.ready = true;
 out:
 	if (ret)
@@ -2167,7 +2171,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
 	mutex_lock(&dev->kvm->lock);
 
-	ret = vgic_init_maps(dev->kvm);
+	ret = vgic_init(dev->kvm);
 	if (ret)
 		goto out;
 
@@ -2289,7 +2293,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 		mutex_lock(&dev->kvm->lock);
 
-		if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+		if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
 			ret = -EBUSY;
 		else
 			dev->kvm->arch.vgic.nr_irqs = val;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b0fb390..148b239 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,9 +36,6 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#ifdef __KVM_HAVE_IOAPIC
-#include "ioapic.h"
-#endif
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
@@ -492,9 +489,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 	mutex_lock(&kvm->irq_lock);
 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -504,9 +499,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
 	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 #endif
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3cee7b1..f528343 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -124,15 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 
 	if (mutex_lock_killable(&vcpu->mutex))
 		return -EINTR;
-	if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
-		/* The thread running this VCPU changed. */
-		struct pid *oldpid = vcpu->pid;
-		struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
-		rcu_assign_pointer(vcpu->pid, newpid);
-		if (oldpid)
-			synchronize_rcu();
-		put_pid(oldpid);
-	}
 	cpu = get_cpu();
 	preempt_notifier_register(&vcpu->preempt_notifier);
 	kvm_arch_vcpu_load(vcpu, cpu);
@@ -468,9 +459,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err_no_disable;
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
-	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
-#endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
@@ -668,48 +656,46 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
 	return 0;
 }
 
-static int cmp_memslot(const void *slot1, const void *slot2)
-{
-	struct kvm_memory_slot *s1, *s2;
-
-	s1 = (struct kvm_memory_slot *)slot1;
-	s2 = (struct kvm_memory_slot *)slot2;
-
-	if (s1->npages < s2->npages)
-		return 1;
-	if (s1->npages > s2->npages)
-		return -1;
-
-	return 0;
-}
-
 /*
- * Sort the memslots base on its size, so the larger slots
- * will get better fit.
+ * Insert memslot and re-sort memslots based on their GFN,
+ * so binary search could be used to lookup GFN.
+ * Sorting algorithm takes advantage of having initially
+ * sorted array and known changed memslot position.
  */
-static void sort_memslots(struct kvm_memslots *slots)
-{
-	int i;
-
-	sort(slots->memslots, KVM_MEM_SLOTS_NUM,
-	      sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
-
-	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-		slots->id_to_index[slots->memslots[i].id] = i;
-}
-
 static void update_memslots(struct kvm_memslots *slots,
 			    struct kvm_memory_slot *new)
 {
-	if (new) {
-		int id = new->id;
-		struct kvm_memory_slot *old = id_to_memslot(slots, id);
-		unsigned long npages = old->npages;
+	int id = new->id;
+	int i = slots->id_to_index[id];
+	struct kvm_memory_slot *mslots = slots->memslots;
 
-		*old = *new;
-		if (new->npages != npages)
-			sort_memslots(slots);
+	WARN_ON(mslots[i].id != id);
+	if (!new->npages) {
+		new->base_gfn = 0;
+		if (mslots[i].npages)
+			slots->used_slots--;
+	} else {
+		if (!mslots[i].npages)
+			slots->used_slots++;
 	}
+
+	while (i < KVM_MEM_SLOTS_NUM - 1 &&
+	       new->base_gfn <= mslots[i + 1].base_gfn) {
+		if (!mslots[i + 1].npages)
+			break;
+		mslots[i] = mslots[i + 1];
+		slots->id_to_index[mslots[i].id] = i;
+		i++;
+	}
+	while (i > 0 &&
+	       new->base_gfn > mslots[i - 1].base_gfn) {
+		mslots[i] = mslots[i - 1];
+		slots->id_to_index[mslots[i].id] = i;
+		i--;
+	}
+
+	mslots[i] = *new;
+	slots->id_to_index[mslots[i].id] = i;
 }
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -727,7 +713,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 }
 
 static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
-		struct kvm_memslots *slots, struct kvm_memory_slot *new)
+		struct kvm_memslots *slots)
 {
 	struct kvm_memslots *old_memslots = kvm->memslots;
 
@@ -738,7 +724,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	WARN_ON(old_memslots->generation & 1);
 	slots->generation = old_memslots->generation + 1;
 
-	update_memslots(slots, new);
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
 
@@ -760,7 +745,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
  *
  * Discontiguous memory is allowed, mostly for framebuffers.
  *
- * Must be called holding mmap_sem for write.
+ * Must be called holding kvm->slots_lock for write.
  */
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem)
@@ -866,15 +851,16 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			goto out_free;
 	}
 
+	slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+			GFP_KERNEL);
+	if (!slots)
+		goto out_free;
+
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-				GFP_KERNEL);
-		if (!slots)
-			goto out_free;
 		slot = id_to_memslot(slots, mem->slot);
 		slot->flags |= KVM_MEMSLOT_INVALID;
 
-		old_memslots = install_new_memslots(kvm, slots, NULL);
+		old_memslots = install_new_memslots(kvm, slots);
 
 		/* slot was deleted or moved, clear iommu mapping */
 		kvm_iommu_unmap_pages(kvm, &old);
@@ -886,6 +872,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		 * 	- kvm_is_visible_gfn (mmu_check_roots)
 		 */
 		kvm_arch_flush_shadow_memslot(kvm, slot);
+
+		/*
+		 * We can re-use the old_memslots from above, the only difference
+		 * from the currently installed memslots is the invalid flag.  This
+		 * will get overwritten by update_memslots anyway.
+		 */
 		slots = old_memslots;
 	}
 
@@ -893,26 +885,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (r)
 		goto out_slots;
 
-	r = -ENOMEM;
-	/*
-	 * We can re-use the old_memslots from above, the only difference
-	 * from the currently installed memslots is the invalid flag.  This
-	 * will get overwritten by update_memslots anyway.
-	 */
-	if (!slots) {
-		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-				GFP_KERNEL);
-		if (!slots)
-			goto out_free;
-	}
-
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (change == KVM_MR_DELETE) {
 		new.dirty_bitmap = NULL;
 		memset(&new.arch, 0, sizeof(new.arch));
 	}
 
-	old_memslots = install_new_memslots(kvm, slots, &new);
+	update_memslots(slots, &new);
+	old_memslots = install_new_memslots(kvm, slots);
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
@@ -1799,10 +1779,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
 	rcu_read_unlock();
 	if (!task)
 		return ret;
-	if (task->flags & PF_VCPU) {
-		put_task_struct(task);
-		return ret;
-	}
 	ret = yield_to(task, 1);
 	put_task_struct(task);
 
@@ -2065,6 +2041,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
 		r = -EINVAL;
 		if (arg)
 			goto out;
+		if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+			/* The thread running this VCPU changed. */
+			struct pid *oldpid = vcpu->pid;
+			struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+			rcu_assign_pointer(vcpu->pid, newpid);
+			if (oldpid)
+				synchronize_rcu();
+			put_pid(oldpid);
+		}
 		r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
 		trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
 		break;
@@ -2599,8 +2584,6 @@ static long kvm_vm_ioctl(struct file *filp,
 		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-		if (r == -ENOTTY)
-			r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;