From 5070437cd99511f69ae561f2ab417142a47a85ec Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 21 Oct 2010 17:55:01 +0200
Subject: power_supply: Add gpio charger driver

This patch adds a simple driver for chargers indicating their online
status through a GPIO pin.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/power/gpio-charger.h | 41 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 include/linux/power/gpio-charger.h

(limited to 'include')

diff --git a/include/linux/power/gpio-charger.h b/include/linux/power/gpio-charger.h
new file mode 100644
index 0000000..de1dfe0
--- /dev/null
+++ b/include/linux/power/gpio-charger.h
@@ -0,0 +1,41 @@
+/*
+ *  Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LINUX_POWER_GPIO_CHARGER_H__
+#define __LINUX_POWER_GPIO_CHARGER_H__
+
+#include <linux/power_supply.h>
+#include <linux/types.h>
+
+/**
+ * struct gpio_charger_platform_data - platform_data for gpio_charger devices
+ * @name:		Name for the chargers power_supply device
+ * @type:		Type of the charger
+ * @gpio:		GPIO which is used to indicate the chargers status
+ * @gpio_active_low:	Should be set to 1 if the GPIO is active low otherwise 0
+ * @supplied_to:	Array of battery names to which this chargers supplies power
+ * @num_supplicants:	Number of entries in the supplied_to array
+ */
+struct gpio_charger_platform_data {
+	const char *name;
+	enum power_supply_type type;
+
+	int gpio;
+	int gpio_active_low;
+
+	char **supplied_to;
+	size_t num_supplicants;
+};
+
+#endif
-- 
cgit v1.1


From 5b275ce27077d6463ca28c9671dce7c2c1f622e2 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 11 Nov 2010 15:27:29 +0000
Subject: thermal: make ops constant

And while touching that function definition do something about the disaster
of formatting there.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/thermal.h | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 1de8b9eb..0662690 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -77,7 +77,7 @@ struct thermal_cooling_device {
 	char type[THERMAL_NAME_LENGTH];
 	struct device device;
 	void *devdata;
-	struct thermal_cooling_device_ops *ops;
+	const struct thermal_cooling_device_ops *ops;
 	struct list_head node;
 };
 
@@ -114,7 +114,7 @@ struct thermal_zone_device {
 	int last_temperature;
 	bool passive;
 	unsigned int forced_passive;
-	struct thermal_zone_device_ops *ops;
+	const struct thermal_zone_device_ops *ops;
 	struct list_head cooling_devices;
 	struct idr idr;
 	struct mutex lock;	/* protect cooling devices list */
@@ -129,11 +129,8 @@ struct thermal_zone_device {
 };
 
 struct thermal_zone_device *thermal_zone_device_register(char *, int, void *,
-							 struct
-							 thermal_zone_device_ops
-							 *, int tc1, int tc2,
-							 int passive_freq,
-							 int polling_freq);
+		const struct thermal_zone_device_ops *, int tc1, int tc2,
+		int passive_freq, int polling_freq);
 void thermal_zone_device_unregister(struct thermal_zone_device *);
 
 int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,
@@ -142,9 +139,7 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
 				       struct thermal_cooling_device *);
 void thermal_zone_device_update(struct thermal_zone_device *);
 struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
-							       struct
-							       thermal_cooling_device_ops
-							       *);
+		const struct thermal_cooling_device_ops *);
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
 
 #endif /* __THERMAL_H__ */
-- 
cgit v1.1


From 3e3198f1adda8e0fbd499bde806781237d6c841f Mon Sep 17 00:00:00 2001
From: Roman Tereshonkov <roman.tereshonkov@nokia.com>
Date: Wed, 3 Nov 2010 12:55:19 +0200
Subject: mtd: onenand: add option and variable for cache program feature

A new option is added for cache program feature. A new variable
ongoing is introduced for onenand_chip to handle the multi-command
cache program operation.

Signed-off-by: Roman Tereshonkov <roman.tereshonkov@nokia.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/onenand.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 0c8815b..6da3fe3 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -137,6 +137,14 @@ struct onenand_chip {
 	void			*bbm;
 
 	void			*priv;
+
+	/*
+	 * Shows that the current operation is composed
+	 * of sequence of commands. For example, cache program.
+	 * Such command status OnGo bit is checked at the end of
+	 * sequence.
+	 */
+	unsigned int		ongoing;
 };
 
 /*
@@ -171,6 +179,9 @@ struct onenand_chip {
 #define ONENAND_IS_2PLANE(this)			(0)
 #endif
 
+#define ONENAND_IS_CACHE_PROGRAM(this)					\
+	(this->options & ONENAND_HAS_CACHE_PROGRAM)
+
 /* Check byte access in OneNAND */
 #define ONENAND_CHECK_BYTE_ACCESS(addr)		(addr & 0x1)
 
@@ -181,6 +192,7 @@ struct onenand_chip {
 #define ONENAND_HAS_UNLOCK_ALL		(0x0002)
 #define ONENAND_HAS_2PLANE		(0x0004)
 #define ONENAND_HAS_4KB_PAGE		(0x0008)
+#define ONENAND_HAS_CACHE_PROGRAM	(0x0010)
 #define ONENAND_SKIP_UNLOCK_CHECK	(0x0100)
 #define ONENAND_PAGEBUF_ALLOC		(0x1000)
 #define ONENAND_OOBBUF_ALLOC		(0x2000)
-- 
cgit v1.1


From 65ab220c30bc2120eaa39753cadec68616e3f906 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Mon, 8 Nov 2010 11:17:54 +0530
Subject: mtd: fsmc.h: including linux/io.h for definition of readl

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/fsmc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index 5d25567..e210b87 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h
@@ -16,6 +16,7 @@
 #ifndef __MTD_FSMC_H
 #define __MTD_FSMC_H
 
+#include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/physmap.h>
 #include <linux/types.h>
-- 
cgit v1.1


From cc31822250236ec173bb2aa149ebe2ba35405db2 Mon Sep 17 00:00:00 2001
From: Guillaume LECERF <glecerf@gmail.com>
Date: Wed, 17 Nov 2010 12:35:50 +0100
Subject: mtd: cfi_fixup: remove unused 'param' parameter

The 'param' parameter has never been used since its introduction, so
simply remove it.

Signed-off-by: Guillaume LECERF <glecerf@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/cfi.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 4dd0c2c..a9baee6 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -527,8 +527,7 @@ struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t s
 struct cfi_fixup {
 	uint16_t mfr;
 	uint16_t id;
-	void (*fixup)(struct mtd_info *mtd, void* param);
-	void* param;
+	void (*fixup)(struct mtd_info *mtd);
 };
 
 #define CFI_MFR_ANY		0xFFFF
-- 
cgit v1.1


From 1534b8b09757190ce6e97fa97f9ad77c49082cd8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 18 Nov 2010 15:02:21 -0800
Subject: mtd: fix nand kernel-doc warnings

Warning(include/linux/mtd/nand.h:543): No description found for parameter 'badblockbits'
Warning(drivers/mtd/nand/nand_bbt.c:1101): No description found for parameter 'mtd'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc:	David Woodhouse <dwmw2@infradead.org>
Cc:	linux-mtd@lists.infradead.org
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 63e17d0..1f489b2 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -448,6 +448,8 @@ struct nand_buffers {
  *			See the defines for further explanation.
  * @badblockpos:	[INTERN] position of the bad block marker in the oob
  *			area.
+ * @badblockbits:	[INTERN] number of bits to left-shift the bad block
+ *			number
  * @cellinfo:		[INTERN] MLC/multichip data from chip ident
  * @numchips:		[INTERN] number of physical chips
  * @chipsize:		[INTERN] the size of one chip for multichip arrays
-- 
cgit v1.1


From a7e93dcd9aacb3ef4acfcc4310577f3ae0741821 Mon Sep 17 00:00:00 2001
From: Roman Tereshonkov <roman.tereshonkov@nokia.com>
Date: Tue, 23 Nov 2010 14:17:17 +0200
Subject: mtd: fix master device identification for mtd repartition

Function mtd_has_master renamed as mtd_is_partition to follow the function logic.
The patch fixes the problem of checking the right mtd device for partition creation.
To delete partition checking is not needed here so as it is done in mtd_del_partition.
By master we consider the mtd device which does not belong to any partition.

Signed-off-by: Roman Tereshonkov <roman.tereshonkov@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/partitions.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 2b54316..4a0a8ba 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -89,7 +89,7 @@ static inline int mtd_has_cmdlinepart(void) { return 1; }
 static inline int mtd_has_cmdlinepart(void) { return 0; }
 #endif
 
-int mtd_is_master(struct mtd_info *mtd);
+int mtd_is_partition(struct mtd_info *mtd);
 int mtd_add_partition(struct mtd_info *master, char *name,
 		      long long offset, long long length);
 int mtd_del_partition(struct mtd_info *master, int partno);
-- 
cgit v1.1


From 593cd8711221c9661dbf9beb2fb42fecca03e693 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 29 Nov 2010 13:52:19 +0100
Subject: mtd: FSMC NAND use the PrimeCell identifier macros

The FSMC actually has a standard ARM PrimeCell ID register, and
the "revision" part of that register contains the thing that
the code is looking at. Reuse the infrastructure from the AMBA
bus abstraction and rid local defines.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/fsmc.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index e210b87..96e8e67 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h
@@ -115,25 +115,6 @@ struct fsmc_regs {
 #define FSMC_THOLD_4		(4 << 16)
 #define FSMC_THIZ_1		(1 << 24)
 
-/* peripid2 register definitions */
-#define FSMC_REVISION_MSK	(0xf)
-#define FSMC_REVISION_SHFT	(0x4)
-
-#define FSMC_VER1		1
-#define FSMC_VER2		2
-#define FSMC_VER3		3
-#define FSMC_VER4		4
-#define FSMC_VER5		5
-#define FSMC_VER6		6
-#define FSMC_VER7		7
-#define FSMC_VER8		8
-
-static inline uint32_t get_fsmc_version(struct fsmc_regs *regs)
-{
-	return (readl(&regs->peripid2) >> FSMC_REVISION_SHFT) &
-				FSMC_REVISION_MSK;
-}
-
 /*
  * There are 13 bytes of ecc for every 512 byte block in FSMC version 8
  * and it has to be read consecutively and immediately after the 512
-- 
cgit v1.1


From b5602e86432aaf0cc90dd207bf74e3a2bfb5078b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Mon, 29 Nov 2010 13:52:27 +0100
Subject: mtd: FSMC NAND fix obvious speling errors

Fix spelling in the interface file.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/fsmc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index 96e8e67..6987995 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h
@@ -28,7 +28,7 @@
 
 /*
  * The placement of the Command Latch Enable (CLE) and
- * Address Latch Enable (ALE) is twised around in the
+ * Address Latch Enable (ALE) is twisted around in the
  * SPEAR310 implementation.
  */
 #if defined(CONFIG_MACH_SPEAR310)
@@ -63,7 +63,7 @@ struct fsmc_nor_bank_regs {
 
 /* ctrl_tim register definitions */
 
-struct fsms_nand_bank_regs {
+struct fsmc_nand_bank_regs {
 	uint32_t pc;
 	uint32_t sts;
 	uint32_t comm;
@@ -79,7 +79,7 @@ struct fsms_nand_bank_regs {
 struct fsmc_regs {
 	struct fsmc_nor_bank_regs nor_bank_regs[FSMC_MAX_NOR_BANKS];
 	uint8_t reserved_1[0x40 - 0x20];
-	struct fsms_nand_bank_regs bank_regs[FSMC_MAX_NAND_BANKS];
+	struct fsmc_nand_bank_regs bank_regs[FSMC_MAX_NAND_BANKS];
 	uint8_t reserved_2[0xfe0 - 0xc0];
 	uint32_t peripid0;			/* 0xfe0 */
 	uint32_t peripid1;			/* 0xfe4 */
-- 
cgit v1.1


From c9aa308fd5c373faeda588cfb02b04f116904613 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Tue, 7 Dec 2010 10:22:29 +0800
Subject: Add CPER PCIe error section structure and constants definition

On some machine, PCIe error is reported via APEI (ACPI Platform Error
Interface).  The error data is passed from firmware to Linux via CPER
PCIe error section structure.

This patch adds CPER PCIe error section structure and constants
definition.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/cper.h | 86 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 82 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/cper.h b/include/linux/cper.h
index bf972f8..3104aaf 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -39,10 +39,12 @@
  * Severity difinition for error_severity in struct cper_record_header
  * and section_severity in struct cper_section_descriptor
  */
-#define CPER_SEV_RECOVERABLE			0x0
-#define CPER_SEV_FATAL				0x1
-#define CPER_SEV_CORRECTED			0x2
-#define CPER_SEV_INFORMATIONAL			0x3
+enum {
+	CPER_SEV_RECOVERABLE,
+	CPER_SEV_FATAL,
+	CPER_SEV_CORRECTED,
+	CPER_SEV_INFORMATIONAL,
+};
 
 /*
  * Validation bits difinition for validation_bits in struct
@@ -201,6 +203,47 @@
 	UUID_LE(0x036F84E1, 0x7F37, 0x428c, 0xA7, 0x9E, 0x57, 0x5F,	\
 		0xDF, 0xAA, 0x84, 0xEC)
 
+#define CPER_PROC_VALID_TYPE			0x0001
+#define CPER_PROC_VALID_ISA			0x0002
+#define CPER_PROC_VALID_ERROR_TYPE		0x0004
+#define CPER_PROC_VALID_OPERATION		0x0008
+#define CPER_PROC_VALID_FLAGS			0x0010
+#define CPER_PROC_VALID_LEVEL			0x0020
+#define CPER_PROC_VALID_VERSION			0x0040
+#define CPER_PROC_VALID_BRAND_INFO		0x0080
+#define CPER_PROC_VALID_ID			0x0100
+#define CPER_PROC_VALID_TARGET_ADDRESS		0x0200
+#define CPER_PROC_VALID_REQUESTOR_ID		0x0400
+#define CPER_PROC_VALID_RESPONDER_ID		0x0800
+#define CPER_PROC_VALID_IP			0x1000
+
+#define CPER_MEM_VALID_ERROR_STATUS		0x0001
+#define CPER_MEM_VALID_PHYSICAL_ADDRESS		0x0002
+#define CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK	0x0004
+#define CPER_MEM_VALID_NODE			0x0008
+#define CPER_MEM_VALID_CARD			0x0010
+#define CPER_MEM_VALID_MODULE			0x0020
+#define CPER_MEM_VALID_BANK			0x0040
+#define CPER_MEM_VALID_DEVICE			0x0080
+#define CPER_MEM_VALID_ROW			0x0100
+#define CPER_MEM_VALID_COLUMN			0x0200
+#define CPER_MEM_VALID_BIT_POSITION		0x0400
+#define CPER_MEM_VALID_REQUESTOR_ID		0x0800
+#define CPER_MEM_VALID_RESPONDER_ID		0x1000
+#define CPER_MEM_VALID_TARGET_ID		0x2000
+#define CPER_MEM_VALID_ERROR_TYPE		0x4000
+
+#define CPER_PCIE_VALID_PORT_TYPE		0x0001
+#define CPER_PCIE_VALID_VERSION			0x0002
+#define CPER_PCIE_VALID_COMMAND_STATUS		0x0004
+#define CPER_PCIE_VALID_DEVICE_ID		0x0008
+#define CPER_PCIE_VALID_SERIAL_NUMBER		0x0010
+#define CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS	0x0020
+#define CPER_PCIE_VALID_CAPABILITY		0x0040
+#define CPER_PCIE_VALID_AER_INFO		0x0080
+
+#define CPER_PCIE_SLOT_SHIFT			3
+
 /*
  * All tables and structs must be byte-packed to match CPER
  * specification, since the tables are provided by the system BIOS
@@ -306,6 +349,41 @@ struct cper_sec_mem_err {
 	__u8	error_type;
 };
 
+struct cper_sec_pcie {
+	__u64		validation_bits;
+	__u32		port_type;
+	struct {
+		__u8	minor;
+		__u8	major;
+		__u8	reserved[2];
+	}		version;
+	__u16		command;
+	__u16		status;
+	__u32		reserved;
+	struct {
+		__u16	vendor_id;
+		__u16	device_id;
+		__u8	class_code[3];
+		__u8	function;
+		__u8	device;
+		__u16	segment;
+		__u8	bus;
+		__u8	secondary_bus;
+		__u16	slot;
+		__u8	reserved;
+	}		device_id;
+	struct {
+		__u32	lower;
+		__u32	upper;
+	}		serial_number;
+	struct {
+		__u16	secondary_status;
+		__u16	control;
+	}		bridge;
+	__u8	capability[60];
+	__u8	aer_info[96];
+};
+
 /* Reset to default packing */
 #pragma pack()
 
-- 
cgit v1.1


From 16f4232ce4d6855361b4eb56262f4a202295c978 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Wed, 8 Dec 2010 10:10:16 +0800
Subject: IPMI: Add one interface to get more info of low-level IPMI device

The IPMI smi_watcher will be used to catch the IPMI interface as they
come or go.  In order to communicate with the correct IPMI device, it
should be confirmed whether it is what we wanted especially on the
system with multiple IPMI devices. But the new_smi callback function
of smi_watcher provides very limited info(only the interface number
and dev pointer) and there is no detailed info about the low level
interface. For example: which mechansim registers the IPMI
interface(ACPI, PCI, DMI and so on).

This is to add one interface that can get more info of low-level IPMI
device. For example: the ACPI device handle will be returned for the
pnp_acpi IPMI device.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/ipmi.h     | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/ipmi_smi.h |  8 ++++++++
 2 files changed, 46 insertions(+)

(limited to 'include')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 65aae34..045f2f2 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -454,6 +454,44 @@ unsigned int ipmi_addr_length(int addr_type);
 /* Validate that the given IPMI address is valid. */
 int ipmi_validate_addr(struct ipmi_addr *addr, int len);
 
+/*
+ * How did the IPMI driver find out about the device?
+ */
+enum ipmi_addr_src {
+	SI_INVALID = 0, SI_HOTMOD, SI_HARDCODED, SI_SPMI, SI_ACPI, SI_SMBIOS,
+	SI_PCI,	SI_DEVICETREE, SI_DEFAULT
+};
+
+union ipmi_smi_info_union {
+	/*
+	 * the acpi_info element is defined for the SI_ACPI
+	 * address type
+	 */
+	struct {
+		void *acpi_handle;
+	} acpi_info;
+};
+
+struct ipmi_smi_info {
+	enum ipmi_addr_src addr_src;
+
+	/*
+	 * Base device for the interface.  Don't forget to put this when
+	 * you are done.
+	 */
+	struct device *dev;
+
+	/*
+	 * The addr_info provides more detailed info for some IPMI
+	 * devices, depending on the addr_src.  Currently only SI_ACPI
+	 * info is provided.
+	 */
+	union ipmi_smi_info_union addr_info;
+};
+
+/* This is to get the private info of ipmi_smi_t */
+extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data);
+
 #endif /* __KERNEL__ */
 
 
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 4b48318..906590a 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -39,6 +39,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
+#include <linux/ipmi.h>
 
 /* This files describes the interface for IPMI system management interface
    drivers to bind into the IPMI message handler. */
@@ -86,6 +87,13 @@ struct ipmi_smi_handlers {
 	int (*start_processing)(void       *send_info,
 				ipmi_smi_t new_intf);
 
+	/*
+	 * Get the detailed private info of the low level interface and store
+	 * it into the structure of ipmi_smi_data. For example: the
+	 * ACPI device handle will be returned for the pnp_acpi IPMI device.
+	 */
+	int (*get_smi_info)(void *send_info, struct ipmi_smi_info *data);
+
 	/* Called to enqueue an SMI message to be sent.  This
 	   operation is not allowed to fail.  If an error occurs, it
 	   should report back the error in a received message.  It may
-- 
cgit v1.1


From 7c96aef75949a56ec427fc6a2522dace2af33605 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 15 Nov 2010 11:27:01 +1100
Subject: sunrpc: remove xpt_pool

The xpt_pool field is only used for reporting BUGs.
And it isn't used correctly.

In particular, when it is cleared in svc_xprt_received before
XPT_BUSY is cleared, there is no guarantee that either the
compiler or the CPU might not re-order to two assignments, just
setting xpt_pool to NULL after XPT_BUSY is cleared.

If a different cpu were running svc_xprt_enqueue at this moment,
it might see XPT_BUSY clear and then xpt_pool non-NULL, and
so BUG.

This could be fixed by calling
  smp_mb__before_clear_bit()
before the clear_bit.  However as xpt_pool isn't really used,
it seems safest to simply remove xpt_pool.

Another alternate would be to change the clear_bit to
clear_bit_unlock, and the test_and_set_bit to test_and_set_bit_lock.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index aea0d438..c8f81da 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -63,7 +63,6 @@ struct svc_xprt {
 #define XPT_LISTENER	11		/* listening endpoint */
 #define XPT_CACHE_AUTH	12		/* cache auth info */
 
-	struct svc_pool		*xpt_pool;	/* current pool iff queued */
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
 	struct mutex		xpt_mutex;	/* to serialize sending data */
-- 
cgit v1.1


From 04f4ad16b231abbfde34c762697ad035a3af0b5f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 16 Dec 2010 09:51:13 -0500
Subject: nfsd4: implement secinfo_no_name

Implementation of this operation is mandatory for NFSv4.1.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfs4.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 4925b22..26afa30 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -136,6 +136,9 @@
 #define SEQ4_STATUS_CB_PATH_DOWN_SESSION	0x00000200
 #define SEQ4_STATUS_BACKCHANNEL_FAULT		0x00000400
 
+#define NFS4_SECINFO_STYLE4_CURRENT_FH	0
+#define NFS4_SECINFO_STYLE4_PARENT	1
+
 #define NFS4_MAX_UINT64	(~(u64)0)
 
 /* An NFS4 sessions server must support at least NFS4_MAX_OPS operations.
-- 
cgit v1.1


From c66ae9bb4dcaac78cc5e30d0ce7ff2bf3dcb48d9 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Mon, 13 Dec 2010 12:26:21 +0200
Subject: s3c_adc_battery: Add gpio_inverted field to pdata

Add support for inverted gpio_charge_finished values.
This change is necessary for H1940 support.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/s3c_adc_battery.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/s3c_adc_battery.h b/include/linux/s3c_adc_battery.h
index dbce22f..fbe58b7 100644
--- a/include/linux/s3c_adc_battery.h
+++ b/include/linux/s3c_adc_battery.h
@@ -14,6 +14,7 @@ struct s3c_adc_bat_pdata {
 	void (*disable_charger)(void);
 
 	int gpio_charge_finished;
+	int gpio_inverted;
 
 	const struct s3c_adc_bat_thresh *lut_noac;
 	unsigned int lut_noac_cnt;
-- 
cgit v1.1


From 00aaaef9a51a1a25c5d6d52ce510772f149a0eb0 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 11 Nov 2010 15:46:54 +0800
Subject: PCI: MSI: Move MSI-X entry definition to pci_regs.h

Then it can be used by others.

Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_regs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index af83076..b21d33e 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -309,6 +309,13 @@
 #define PCI_MSIX_PBA		8
 #define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
 
+/* MSI-X entry's format */
+#define PCI_MSIX_ENTRY_SIZE		16
+#define  PCI_MSIX_ENTRY_LOWER_ADDR	0
+#define  PCI_MSIX_ENTRY_UPPER_ADDR	4
+#define  PCI_MSIX_ENTRY_DATA		8
+#define  PCI_MSIX_ENTRY_VECTOR_CTRL	12
+
 /* CompactPCI Hotswap Register */
 
 #define PCI_CHSWP_CSR		2	/* Control and Status Register */
-- 
cgit v1.1


From 8d805286968811223cca002134ba3d81244d5313 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 11 Nov 2010 15:46:55 +0800
Subject: PCI: Add mask bit definition for MSI-X table

Then we can use it instead of magic number 1.

Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_regs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index b21d33e..d4f2c80 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -315,6 +315,7 @@
 #define  PCI_MSIX_ENTRY_UPPER_ADDR	4
 #define  PCI_MSIX_ENTRY_DATA		8
 #define  PCI_MSIX_ENTRY_VECTOR_CTRL	12
+#define   PCI_MSIX_ENTRY_CTRL_MASKBIT	1
 
 /* CompactPCI Hotswap Register */
 
-- 
cgit v1.1


From 2f671e2dbff6eb5ef4e2600adbec550c13b8fe72 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Mon, 6 Dec 2010 14:00:56 -0500
Subject: PCI: Disable ASPM if BIOS asks us to

We currently refuse to touch the ASPM registers if the BIOS tells us that
ASPM isn't supported. This can cause problems if the BIOS has (for any
reason) enabled ASPM on some devices anyway. Change the code such that we
explicitly clear ASPM if the FADT indicates that ASPM isn't supported,
and make sure we tidy up appropriately on device removal in order to deal
with the hotplug case. If ASPM is disabled because the BIOS doesn't hand
over control then we won't touch the registers.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-aspm.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index 91ba0b3..ce681051 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -27,6 +27,7 @@ extern void pcie_aspm_init_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
+extern void pcie_clear_aspm(void);
 extern void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
@@ -41,7 +42,9 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev)
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
-
+static inline void pcie_clear_aspm(void)
+{
+}
 static inline void pcie_no_aspm(void)
 {
 }
-- 
cgit v1.1


From 1d3c16a818e992c199844954d95c17fd7ce6cbba Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@exar.com>
Date: Tue, 30 Nov 2010 17:43:26 -0600
Subject: PCI: make pci_restore_state return void

pci_restore_state only ever returns 0, thus there is no benefit in
having it return any value.  Also, a large majority of the callers do
not check the return code of pci_restore_state.  Make the
pci_restore_state a void return and avoid the overhead.

Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7454408..63cbadc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -806,7 +806,7 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size);
 
 /* Power management related routines */
 int pci_save_state(struct pci_dev *dev);
-int pci_restore_state(struct pci_dev *dev);
+void pci_restore_state(struct pci_dev *dev);
 int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state);
 int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
@@ -1168,10 +1168,8 @@ static inline int pci_save_state(struct pci_dev *dev)
 	return 0;
 }
 
-static inline int pci_restore_state(struct pci_dev *dev)
-{
-	return 0;
-}
+static inline void pci_restore_state(struct pci_dev *dev)
+{ }
 
 static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 {
-- 
cgit v1.1


From 9b444b36fee16d2aaae9cc91ce594ecb15d922a9 Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Wed, 17 Nov 2010 12:12:08 -0700
Subject: x86/PCI: irq and pci_ids patch for Intel Patsburg

This patch adds an additional LPC Controller DeviceID for the Intel
Patsburg PCH.

Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_ids.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index cb845c16..d830106 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2468,7 +2468,8 @@
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN	0x1c41
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX	0x1c5f
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS	0x1d22
-#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC	0x1d40
+#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0	0x1d40
+#define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1	0x1d41
 #define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
 #define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
 #define PCI_DEVICE_ID_INTEL_82801AA_3	0x2413
-- 
cgit v1.1


From fe31e69740eddc7316071ed5165fed6703c8cd12 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 19 Dec 2010 15:57:16 +0100
Subject: PCI/PCIe: Clear Root PME Status bits early during system resume

I noticed that PCI Express PMEs don't work on my Toshiba Portege R500
after the system has been woken up from a sleep state by a PME
(through Wake-on-LAN).  After some investigation it turned out that
the BIOS didn't clear the Root PME Status bit in the root port that
received the wakeup PME and since the Requester ID was also set in
the port's Root Status register, any subsequent PMEs didn't trigger
interrupts.

This problem can be avoided by clearing the Root PME Status bits in
all PCI Express root ports during early resume.  For this purpose,
add an early resume routine to the PCIe port driver and make this
driver be always registered, even if pci_ports_disable is set (in
which case the driver's only function is to provide the early
resume callback).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_regs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index d4f2c80..5b7e6b1 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -504,6 +504,8 @@
 #define  PCI_EXP_RTCTL_CRSSVE	0x10	/* CRS Software Visibility Enable */
 #define PCI_EXP_RTCAP		30	/* Root Capabilities */
 #define PCI_EXP_RTSTA		32	/* Root Status */
+#define PCI_EXP_RTSTA_PME	0x10000 /* PME status */
+#define PCI_EXP_RTSTA_PENDING	0x20000 /* PME pending */
 #define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
-- 
cgit v1.1


From bdd5f05d91e8ae68075b812ce244c918d3d752cd Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 4 Jan 2011 13:31:45 -0500
Subject: SUNRPC: Remove more code when NFSD_DEPRECATED is not configured

Signed-off-by: NeilBrown <neilb@suse.de>
[bfields@redhat.com: moved svcauth_unix_purge outside ifdef's.]
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 6950c98..c2aebe8 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -255,10 +255,13 @@ static inline time_t get_expiry(char **bpp)
 	return rv - boot.tv_sec;
 }
 
+#ifdef CONFIG_NFSD_DEPRECATED
 static inline void sunrpc_invalidate(struct cache_head *h,
 				     struct cache_detail *detail)
 {
 	h->expiry_time = seconds_since_boot() - 1;
 	detail->nextcheck = seconds_since_boot();
 }
+#endif /* CONFIG_NFSD_DEPRECATED */
+
 #endif /*  _LINUX_SUNRPC_CACHE_H_ */
-- 
cgit v1.1


From 9e701c610923aaeac8b38b9202a686d1cc9ee35d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 2 Jan 2011 21:56:36 -0500
Subject: svcrpc: simpler request dropping

Currently we use -EAGAIN returns to determine when to drop a deferred
request.  On its own, that is error-prone, as it makes us treat -EAGAIN
returns from other functions specially to prevent inadvertent dropping.

So, use a flag on the request instead.

Returning an error on request deferral is still required, to prevent
further processing, but we no longer need worry that an error return on
its own could result in a drop.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5a3085b..d45c482 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -269,6 +269,7 @@ struct svc_rqst {
 	struct cache_req	rq_chandle;	/* handle passed to caches for 
 						 * request delaying 
 						 */
+	bool			rq_dropme;
 	/* Catering to nfsd */
 	struct auth_domain *	rq_client;	/* RPC peer info */
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
-- 
cgit v1.1


From c45821d263a8a5109d69a9e8942b8d65bcd5f31a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 31 Oct 2010 00:04:44 -0400
Subject: locks: eliminate fl_mylease callback

The nfs server only supports read delegations for now, so we don't care
how conflicts are determined.  All we care is that unlocks are
recognized as matching the leases they are meant to remove.  After the
last patch, a comparison of struct files will work for that purpose.  So
we no longer need this callback.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 090f0ea..73ce69f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1059,7 +1059,6 @@ struct lock_manager_operations {
 	int (*fl_grant)(struct file_lock *, struct file_lock *, int);
 	void (*fl_release_private)(struct file_lock *);
 	void (*fl_break)(struct file_lock *);
-	int (*fl_mylease)(struct file_lock *, struct file_lock *);
 	int (*fl_change)(struct file_lock **, int);
 };
 
-- 
cgit v1.1


From 2ca72e17e5acb1052c35c9faba609c2289ce7a92 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 4 Jan 2011 17:37:15 -0500
Subject: nfsd4: move idmap and acl header files into fs/nfsd

These are internal nfsd interfaces.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfs4_acl.h   | 61 -------------------------------------------
 include/linux/nfsd_idmap.h | 64 ----------------------------------------------
 2 files changed, 125 deletions(-)
 delete mode 100644 include/linux/nfs4_acl.h
 delete mode 100644 include/linux/nfsd_idmap.h

(limited to 'include')

diff --git a/include/linux/nfs4_acl.h b/include/linux/nfs4_acl.h
deleted file mode 100644
index c9c05a7..0000000
--- a/include/linux/nfs4_acl.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  include/linux/nfs4_acl.c
- *
- *  Common NFSv4 ACL handling definitions.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Marius Aamodt Eriksen <marius@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef LINUX_NFS4_ACL_H
-#define LINUX_NFS4_ACL_H
-
-#include <linux/posix_acl.h>
-
-/* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to
- * fit in a page: */
-#define NFS4_ACL_MAX 170
-
-struct nfs4_acl *nfs4_acl_new(int);
-int nfs4_acl_get_whotype(char *, u32);
-int nfs4_acl_write_who(int who, char *p);
-int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group,
-		                        uid_t who, u32 mask);
-
-#define NFS4_ACL_TYPE_DEFAULT	0x01
-#define NFS4_ACL_DIR		0x02
-#define NFS4_ACL_OWNER		0x04
-
-struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *,
-				struct posix_acl *, unsigned int flags);
-int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **,
-				struct posix_acl **, unsigned int flags);
-
-#endif /* LINUX_NFS4_ACL_H */
diff --git a/include/linux/nfsd_idmap.h b/include/linux/nfsd_idmap.h
deleted file mode 100644
index d4a2ac1..0000000
--- a/include/linux/nfsd_idmap.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- *  include/linux/nfsd_idmap.h
- *
- *  Mapping of UID to name and vice versa.
- *
- *  Copyright (c) 2002, 2003 The Regents of the University of
- *  Michigan.  All rights reserved.
-> *
- *  Marius Aamodt Eriksen <marius@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef LINUX_NFSD_IDMAP_H
-#define LINUX_NFSD_IDMAP_H
-
-#include <linux/in.h>
-#include <linux/sunrpc/svc.h>
-
-/* XXX from linux/nfs_idmap.h */
-#define IDMAP_NAMESZ 128
-
-#ifdef CONFIG_NFSD_V4
-int nfsd_idmap_init(void);
-void nfsd_idmap_shutdown(void);
-#else
-static inline int nfsd_idmap_init(void)
-{
-	return 0;
-}
-static inline void nfsd_idmap_shutdown(void)
-{
-}
-#endif
-
-int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *);
-int nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *);
-int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *);
-int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *);
-
-#endif /* LINUX_NFSD_IDMAP_H */
-- 
cgit v1.1


From 91aa5fadb831e7b6ea473a526a6b49c6dc4819ce Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:31:04 +0000
Subject: ARM: PL08x: fix atomic_t usage and tx_submit() return value range

The last_issued variable uses an atomic type, which is only
incremented inside a protected region, and then read.  Everywhere else
only reads the value, so it isn't using atomic_t correctly, and it
doesn't even need to.  Moreover, the DMA engine code provides us with
a variable for this already - chan.cookie.  Use chan.cookie instead.

Also, avoid negative dma_cookie_t values - negative returns from
tx_submit() mean failure, yet in reality we always succeed.  Restart
from cookie 1, just like other DMA engine drivers do.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 521a0f8..4ae62b4 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -174,7 +174,6 @@ struct pl08x_dma_chan {
 	struct pl08x_channel_data *cd;
 	dma_addr_t runtime_addr;
 	enum dma_data_direction	runtime_direction;
-	atomic_t last_issued;
 	dma_cookie_t lc;
 	struct list_head desc_list;
 	struct pl08x_txd *at;
-- 
cgit v1.1


From 7cb72ad959b16ac594118977b7954a7d2ec7a052 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:35:28 +0000
Subject: ARM: PL08x: avoid 'void *' struct fields when we can type them
 properly

Avoid using 'void *' struct fields when the structs are not defined
in linux/amba/pl08x.h - instead, forward declare the struct names, and
use these instead.  This ensures we have proper typechecking.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 4ae62b4..3ecc20f 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -22,6 +22,9 @@
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 
+struct pl08x_lli;
+struct pl08x_driver_data;
+
 /**
  * struct pl08x_channel_data - data structure to pass info between
  * platform and PL08x driver regarding channel configuration
@@ -179,7 +182,7 @@ struct pl08x_dma_chan {
 	struct pl08x_txd *at;
 	unsigned long lockflags;
 	spinlock_t lock;
-	void *host;
+	struct pl08x_driver_data *host;
 	enum pl08x_dma_chan_state state;
 	bool slave;
 	struct pl08x_txd *waiting;
-- 
cgit v1.1


From cace658572ba5d1075f3891e823130a66f3e330f Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:37:31 +0000
Subject: ARM: PL08x: use 'size_t' for lengths

Use size_t for variables denoting lengths throughout, and use the 'z'
qualifier for printing the value.  For safety, add a BUG_ON() in
pl08x_fill_lli_for_desc() to catch the remainder potentially becoming
negative.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 3ecc20f..2c834ed 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -77,7 +77,7 @@ struct pl08x_bus_data {
 	dma_addr_t addr;
 	u8 maxwidth;
 	u8 buswidth;
-	u32 fill_bytes;
+	size_t fill_bytes;
 };
 
 /**
@@ -113,7 +113,7 @@ struct pl08x_txd {
 	enum dma_data_direction	direction;
 	struct pl08x_bus_data srcbus;
 	struct pl08x_bus_data dstbus;
-	int len;
+	size_t len;
 	dma_addr_t llis_bus;
 	void *llis_va;
 	struct pl08x_channel_data *cd;
-- 
cgit v1.1


From 19524d77ec34faf58d313ba34fb755ef6e159216 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:39:13 +0000
Subject: ARM: PL08x: avoid duplicating registers in txd and phychan structures

As we now have all the code accessing the phychan {csrc,cdst,clli,cctl,
ccfg} members in one function, there's no point storing the data into
the struct.  Get rid of the struct members.  Re-order the register dump
in the dev_dbg() to reflect the order we write the registers to the DMA
device.

The txd {csrc,cdst,clli,cctl} values are duplicates of the lli[0]
values, so there's no point duplicating these either.  Program the DMAC
registers directly from the lli[0] values.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 2c834ed..29d9745 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -95,11 +95,6 @@ struct pl08x_phy_chan {
 	spinlock_t lock;
 	int signal;
 	struct pl08x_dma_chan *serving;
-	u32 csrc;
-	u32 cdst;
-	u32 clli;
-	u32 cctl;
-	u32 ccfg;
 };
 
 /**
@@ -118,14 +113,6 @@ struct pl08x_txd {
 	void *llis_va;
 	struct pl08x_channel_data *cd;
 	bool active;
-	/*
-	 * Settings to be put into the physical channel when we
-	 * trigger this txd
-	 */
-	u32 csrc;
-	u32 cdst;
-	u32 clli;
-	u32 cctl;
 };
 
 /**
-- 
cgit v1.1


From 4983a04fd2562986360b646b378f267308bc22c0 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:39:33 +0000
Subject: ARM: PL08x: move ccfg into txd structure

The ccfg register is used to configure the channel parameters - the type
and direction of transfer, the flow control signal and IRQ mask enables.
The type and direction of transfer is known in the relevent prep_*
function where a txd is created.  The IRQ mask enables are always set,
and the flow control signals are always set when we start processing a
txd according to phychan->signal.

If we store the ccfg value in the txd structure, we can avoid modifying
platform data - and even having it in platform data at all.

So, remove it from platform data too.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 29d9745..8e74cb1 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -58,7 +58,6 @@ struct pl08x_channel_data {
 	int max_signal;
 	u32 muxval;
 	u32 cctl;
-	u32 ccfg;
 	dma_addr_t addr;
 	bool circular_buffer;
 	bool single;
@@ -113,6 +112,11 @@ struct pl08x_txd {
 	void *llis_va;
 	struct pl08x_channel_data *cd;
 	bool active;
+	/*
+	 * Settings to be put into the physical channel when we
+	 * trigger this txd.  Other registers are in llis_va[0].
+	 */
+	u32 ccfg;
 };
 
 /**
-- 
cgit v1.1


From 70b5ed6b6d72cd8b1a3d4b7b878a0dd132bec7ba Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:40:13 +0000
Subject: ARM: PL08x: move default cctl into txd structure

Rather than modifying platform data while preparing a transfer, copy
the cctl value into the txd structure and modify the value there.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 8e74cb1..8d90830 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -110,8 +110,9 @@ struct pl08x_txd {
 	size_t len;
 	dma_addr_t llis_bus;
 	void *llis_va;
-	struct pl08x_channel_data *cd;
 	bool active;
+	/* Default cctl value for LLIs */
+	u32 cctl;
 	/*
 	 * Settings to be put into the physical channel when we
 	 * trigger this txd.  Other registers are in llis_va[0].
-- 
cgit v1.1


From 30749cb4a40f02a199640011e5ab5c5f60b8482e Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:41:13 +0000
Subject: ARM: PL08x: allow AHB master port selection to be configured

Platforms need to be able to control which AHB master interface is used,
as each AHB master interface may be asymetric.  Allow the interfaces
used for fetching LLIs, memory, and each peripheral to be configured
individually.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 8d90830..f858651 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -25,6 +25,12 @@
 struct pl08x_lli;
 struct pl08x_driver_data;
 
+/* Bitmasks for selecting AHB ports for DMA transfers */
+enum {
+	PL08X_AHB1 = (1 << 0),
+	PL08X_AHB2 = (1 << 1)
+};
+
 /**
  * struct pl08x_channel_data - data structure to pass info between
  * platform and PL08x driver regarding channel configuration
@@ -51,6 +57,8 @@ struct pl08x_driver_data;
  * round round round)
  * @single: the device connected to this channel will request single
  * DMA transfers, not bursts. (Bursts are default.)
+ * @periph_buses: the device connected to this channel is accessible via
+ * these buses (use PL08X_AHB1 | PL08X_AHB2).
  */
 struct pl08x_channel_data {
 	char *bus_id;
@@ -61,6 +69,7 @@ struct pl08x_channel_data {
 	dma_addr_t addr;
 	bool circular_buffer;
 	bool single;
+	u8 periph_buses;
 };
 
 /**
@@ -193,8 +202,8 @@ struct pl08x_dma_chan {
  * less than zero, else it returns the allocated signal number
  * @put_signal: indicate to the platform that this physical signal is not
  * running any DMA transfer and multiplexing can be recycled
- * @bus_bit_lli: Bit[0] of the address indicated which AHB bus master the
- * LLI addresses are on 0/1 Master 1/2.
+ * @lli_buses: buses which LLIs can be fetched from: PL08X_AHB1 | PL08X_AHB2
+ * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2
  */
 struct pl08x_platform_data {
 	struct pl08x_channel_data *slave_channels;
@@ -202,6 +211,8 @@ struct pl08x_platform_data {
 	struct pl08x_channel_data memcpy_channel;
 	int (*get_signal)(struct pl08x_dma_chan *);
 	void (*put_signal)(struct pl08x_dma_chan *);
+	u8 lli_buses;
+	u8 mem_buses;
 };
 
 #ifdef CONFIG_AMBA_PL08X
-- 
cgit v1.1


From d7244e9a27a3da27d62aabf560ee828d7991493e Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:43:35 +0000
Subject: ARM: PL08x: shrink srcbus/dstbus in txd structure

We only need to store the dma address.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index f858651..4e9e718 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -114,8 +114,8 @@ struct pl08x_txd {
 	struct dma_async_tx_descriptor tx;
 	struct list_head node;
 	enum dma_data_direction	direction;
-	struct pl08x_bus_data srcbus;
-	struct pl08x_bus_data dstbus;
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
 	size_t len;
 	dma_addr_t llis_bus;
 	void *llis_va;
-- 
cgit v1.1


From 15c17232fbd1f7687c740c3c26f9e7f337bd9e36 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:44:36 +0000
Subject: ARM: PL08x: rename 'desc_list' as 'pend_list'

This 'desc_list' is actually a list of pending descriptors, so name
it after its function (pending list) rather than what it contains
(descriptors).

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 4e9e718..08a9024 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -158,7 +158,7 @@ enum pl08x_dma_chan_state {
  * @runtime_direction: current direction of this channel according to
  * runtime config
  * @lc: last completed transaction on this channel
- * @desc_list: queued transactions pending on this channel
+ * @pend_list: queued transactions pending on this channel
  * @at: active transaction on this channel
  * @lockflags: sometimes we let a lock last between two function calls,
  * especially prep/submit, and then we need to store the IRQ flags
@@ -179,7 +179,7 @@ struct pl08x_dma_chan {
 	dma_addr_t runtime_addr;
 	enum dma_data_direction	runtime_direction;
 	dma_cookie_t lc;
-	struct list_head desc_list;
+	struct list_head pend_list;
 	struct pl08x_txd *at;
 	unsigned long lockflags;
 	spinlock_t lock;
-- 
cgit v1.1


From 8087aacda040bdbf84940712d132ce80c30b9d5d Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:45:17 +0000
Subject: ARM: PL08x: introduce 'phychan_hold' to hold on to physical channels

Introduce 'phychan_hold' to hold on to physical DMA channels while we're
preparing a new descriptor for it.  This will be incremented when we
allocate a physical channel and set the MUX registers during the
preparation of the TXD, and will only be decremented when the TXD is
submitted.

This prevents the physical channel being given up before the new TXD
is placed on the queue.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 08a9024..95b76ea 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -151,6 +151,8 @@ enum pl08x_dma_chan_state {
  * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel
  * @chan: wrappped abstract channel
  * @phychan: the physical channel utilized by this channel, if there is one
+ * @phychan_hold: if non-zero, hold on to the physical channel even if we
+ *  have no pending entries
  * @tasklet: tasklet scheduled by the IRQ to handle actual work etc
  * @name: name of channel
  * @cd: channel platform data
@@ -173,6 +175,7 @@ enum pl08x_dma_chan_state {
 struct pl08x_dma_chan {
 	struct dma_chan chan;
 	struct pl08x_phy_chan *phychan;
+	int phychan_hold;
 	struct tasklet_struct tasklet;
 	char *name;
 	struct pl08x_channel_data *cd;
-- 
cgit v1.1


From c370e594efe2993620d24d41a78f325102e99d1c Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Mon, 3 Jan 2011 22:45:37 +0000
Subject: ARM: PL08x: fix locking between prepare function and submit function

The PL08x driver holds on to the channel lock with interrupts disabled
between the prepare and the subsequent submit API functions.  This
means that the locking state when the prepare function returns is
dependent on whether it suceeeds or not.

It did this to ensure that the physical channel wasn't released, and
as it used to add the descriptor onto the pending list at prepare time
rather than submit time.

Now that we have reorganized the code to remove those reasons, we can
now safely release the spinlock at the end of preparation and reacquire
it in our submit function.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 95b76ea..933b4ed 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -162,9 +162,6 @@ enum pl08x_dma_chan_state {
  * @lc: last completed transaction on this channel
  * @pend_list: queued transactions pending on this channel
  * @at: active transaction on this channel
- * @lockflags: sometimes we let a lock last between two function calls,
- * especially prep/submit, and then we need to store the IRQ flags
- * in the channel state, here
  * @lock: a lock for this channel data
  * @host: a pointer to the host (internal use)
  * @state: whether the channel is idle, paused, running etc
@@ -184,7 +181,6 @@ struct pl08x_dma_chan {
 	dma_cookie_t lc;
 	struct list_head pend_list;
 	struct pl08x_txd *at;
-	unsigned long lockflags;
 	spinlock_t lock;
 	struct pl08x_driver_data *host;
 	enum pl08x_dma_chan_state state;
-- 
cgit v1.1


From cf24dc85ff29a41abd8e73730e5feb22b2666bd3 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Fri, 19 Feb 2010 15:39:52 +0100
Subject: mtd: OneNAND: add enable / disable methods to onenand_chip

Add enable / disable methods called from get_device() / release_device().
These can be used, for example, to allow the driver to prevent the voltage
regulator from being put to sleep while OneNAND is in use.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/onenand.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 6da3fe3..ae418e4 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -118,6 +118,8 @@ struct onenand_chip {
 	int (*chip_probe)(struct mtd_info *mtd);
 	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
 	int (*scan_bbt)(struct mtd_info *mtd);
+	int (*enable)(struct mtd_info *mtd);
+	int (*disable)(struct mtd_info *mtd);
 
 	struct completion	complete;
 	int			irq;
-- 
cgit v1.1


From 0e4ca7e5101e7f4054452b8d71c535eec64a187b Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Thu, 16 Dec 2010 23:42:14 +0100
Subject: mtd: add writebufsize field to mtd_info struct

This field will be used to indicate the write buffer size
of the MTD device. UBI will set it's minimal I/O unit size
(min_io_size) to the indicated write buffer size. By this
change we intend to fix failed recovery of UBIFS partitions
we currently observe on NOR flash when mounting the partition
after unclean unmount.

Currently the min_io_size is set to mtd->writesize (which is 1
byte for NOR flash). But flash programming is often done from
prepared write buffer containing multiple bytes and is performed
in one programming operation which could be interrupted by a power
cut or a system reset causing corrupted (partially written) areas
in a flash sector. Knowing the size of potentially corrupted areas
UBIFS scanning and recovery algorithms are able to perform
successful recovery.

In case of NOR flash minimal I/O size must be equal to the
maximal size of the write buffer used by embedded flash
programming algorithm. In case of NAND flash mtd->writebufsize
should be equivalent to mtd->writesize.

The subsequent patches will add mtd->writebufsize initialization
where needed.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index fe8d77e..9d5306b 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -144,6 +144,17 @@ struct mtd_info {
 	 */
 	uint32_t writesize;
 
+	/*
+	 * Size of the write buffer used by the MTD. MTD devices having a write
+	 * buffer can write multiple writesize chunks at a time. E.g. while
+	 * writing 4 * writesize bytes to a device with 2 * writesize bytes
+	 * buffer the MTD driver can (but doesn't have to) do 2 writesize
+	 * operations, but not 4. Currently, all NANDs have writebufsize
+	 * equivalent to writesize (NAND page size). Some NOR flashes do have
+	 * writebufsize greater than writesize.
+	 */
+	uint32_t writebufsize;
+
 	uint32_t oobsize;   // Amount of OOB data per block (e.g. 16)
 	uint32_t oobavail;  // Available OOB bytes per block
 
-- 
cgit v1.1


From 26fcaf60fe3861409eb4c455c5c0d0f00f599b08 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 7 Jan 2011 01:42:31 +0100
Subject: PM: Fix oops in suspend/hibernate code related to failing ioremap()

When ioremap() fails (which might happen for some reason), we nicely
oops in suspend_nvs_save() due to NULL dereference by memcpy() in there.
Fail gracefully instead.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/suspend.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 2669751..acb7d91 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -262,7 +262,7 @@ static inline bool system_entering_hibernation(void) { return false; }
 extern int suspend_nvs_register(unsigned long start, unsigned long size);
 extern int suspend_nvs_alloc(void);
 extern void suspend_nvs_free(void);
-extern void suspend_nvs_save(void);
+extern int suspend_nvs_save(void);
 extern void suspend_nvs_restore(void);
 #else /* CONFIG_SUSPEND_NVS */
 static inline int suspend_nvs_register(unsigned long a, unsigned long b)
@@ -271,7 +271,7 @@ static inline int suspend_nvs_register(unsigned long a, unsigned long b)
 }
 static inline int suspend_nvs_alloc(void) { return 0; }
 static inline void suspend_nvs_free(void) {}
-static inline void suspend_nvs_save(void) {}
+static inline int suspend_nvs_save(void) {}
 static inline void suspend_nvs_restore(void) {}
 #endif /* CONFIG_SUSPEND_NVS */
 
-- 
cgit v1.1


From 976513dbfc1547c7b1822566923058655f0c32fd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 7 Jan 2011 01:43:44 +0100
Subject: PM / ACPI: Move NVS saving and restoring code to drivers/acpi

The saving of the ACPI NVS area during hibernation and suspend and
restoring it during the subsequent resume is entirely specific to
ACPI, so move it to drivers/acpi and drop the CONFIG_SUSPEND_NVS
configuration option which is redundant.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h    |  9 +++++++++
 include/linux/suspend.h | 17 -----------------
 2 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 67c91b4..fa7ed6a98 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -254,6 +254,15 @@ void __init acpi_old_suspend_ordering(void);
 void __init acpi_nvs_nosave(void);
 #endif /* CONFIG_PM_SLEEP */
 
+#ifdef CONFIG_ACPI_SLEEP
+int suspend_nvs_register(unsigned long start, unsigned long size);
+#else
+static inline int suspend_nvs_register(unsigned long a, unsigned long b)
+{
+	return 0;
+}
+#endif
+
 struct acpi_osc_context {
 	char *uuid_str; /* uuid string */
 	int rev;
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index acb7d91..0e288e3 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -258,23 +258,6 @@ static inline int hibernate(void) { return -ENOSYS; }
 static inline bool system_entering_hibernation(void) { return false; }
 #endif /* CONFIG_HIBERNATION */
 
-#ifdef CONFIG_SUSPEND_NVS
-extern int suspend_nvs_register(unsigned long start, unsigned long size);
-extern int suspend_nvs_alloc(void);
-extern void suspend_nvs_free(void);
-extern int suspend_nvs_save(void);
-extern void suspend_nvs_restore(void);
-#else /* CONFIG_SUSPEND_NVS */
-static inline int suspend_nvs_register(unsigned long a, unsigned long b)
-{
-	return 0;
-}
-static inline int suspend_nvs_alloc(void) { return 0; }
-static inline void suspend_nvs_free(void) {}
-static inline int suspend_nvs_save(void) {}
-static inline void suspend_nvs_restore(void) {}
-#endif /* CONFIG_SUSPEND_NVS */
-
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
 void restore_processor_state(void);
-- 
cgit v1.1


From 7fa69baf29de8c77a6b32c054df2abb8f11f8aa4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 6 Jan 2011 23:35:10 +0100
Subject: ACPI / PM: Drop special ACPI wakeup flags

Drop special ACPI wakeup flags, wakeup.state.enabled and
wakeup.flags.always_enabled, that aren't necessary any more after
we've started to use standard device wakeup flags for handling ACPI
wakeup devices.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 359ef11..f2499f5 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -242,20 +242,14 @@ struct acpi_device_perf {
 struct acpi_device_wakeup_flags {
 	u8 valid:1;		/* Can successfully enable wakeup? */
 	u8 run_wake:1;		/* Run-Wake GPE devices */
-	u8 always_enabled:1;	/* Run-wake devices that are always enabled */
 	u8 notifier_present:1;  /* Wake-up notify handler has been installed */
 };
 
-struct acpi_device_wakeup_state {
-	u8 enabled:1;
-};
-
 struct acpi_device_wakeup {
 	acpi_handle gpe_device;
 	u64 gpe_number;
 	u64 sleep_state;
 	struct acpi_handle_list resources;
-	struct acpi_device_wakeup_state state;
 	struct acpi_device_wakeup_flags flags;
 	int prepare_count;
 	int run_wake_count;
-- 
cgit v1.1


From 5a344a505093dd65f82f338ffdb7208321b3630e Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 10 Jan 2011 16:35:45 +0800
Subject: ACPI: Reevaluate whether the T-state is supported or not after cpu is
 online/offline

After one CPU is offlined, it is unnecessary to switch T-state for it.
So it will be better that the throttling is disabled after the cpu
is offline.
At the same time after one cpu is online, we should check whether
the T-state is supported and then set the corresponding T-state
flag.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/processor.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 1b62102..55192ac 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -324,6 +324,12 @@ int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
 int acpi_processor_get_throttling_info(struct acpi_processor *pr);
 extern int acpi_processor_set_throttling(struct acpi_processor *pr,
 					 int state, bool force);
+/*
+ * Reevaluate whether the T-state is invalid after one cpu is
+ * onlined/offlined. In such case the flags.throttling will be updated.
+ */
+extern void acpi_processor_reevaluate_tstate(struct acpi_processor *pr,
+			unsigned long action);
 extern const struct file_operations acpi_processor_throttling_fops;
 extern void acpi_processor_throttling_init(void);
 /* in processor_idle.c */
-- 
cgit v1.1


From 4976b4eb9d083f035aa97afec560c7e1c16c6afd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 4 Jan 2011 13:02:32 +0100
Subject: mac80211: add remain-on-channel docs

Add documentation for the new callbacks that I
forgot in the patch adding the callbacks.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5b3fd5a..3ce8e1f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1753,6 +1753,16 @@ enum ieee80211_ampdu_mlme_action {
  *	(also see nl80211.h @NL80211_ATTR_WIPHY_ANTENNA_TX).
  *
  * @get_antenna: Get current antenna configuration from device (tx_ant, rx_ant).
+ *
+ * @remain_on_channel: Starts an off-channel period on the given channel, must
+ *	call back to ieee80211_ready_on_channel() when on that channel. Note
+ *	that normal channel traffic is not stopped as this is intended for hw
+ *	offload. Frames to transmit on the off-channel channel are transmitted
+ *	normally except for the %IEEE80211_TX_CTL_TX_OFFCHAN flag. When the
+ *	duration (which will always be non-zero) expires, the driver must call
+ *	ieee80211_remain_on_channel_expired(). This callback may sleep.
+ * @cancel_remain_on_channel: Requests that an ongoing off-channel period is
+ *	aborted before it expires. This callback may sleep.
  */
 struct ieee80211_ops {
 	int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb);
-- 
cgit v1.1


From 610dbc980f7ad886313278ce946287f24b44cf55 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 6 Jan 2011 22:36:44 +0100
Subject: mac80211: add missing docs for off-chan TX flag

The flag is IEEE80211_TX_CTL_TX_OFFCHAN and I had
added that in a previous patch but forgotten docs.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3ce8e1f..62c0ce2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -337,6 +337,10 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_LDPC: tells the driver to use LDPC for this frame
  * @IEEE80211_TX_CTL_STBC: Enables Space-Time Block Coding (STBC) for this
  *	frame and selects the maximum number of streams that it can use.
+ * @IEEE80211_TX_CTL_TX_OFFCHAN: Marks this packet to be transmitted on
+ *	the off-channel channel when a remain-on-channel offload is done
+ *	in hardware -- normal packets still flow and are expected to be
+ *	handled properly by the device.
  *
  * Note: If you have to add new flags to the enumeration, then don't
  *	 forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary.
-- 
cgit v1.1


From f52555a4b2d229079155e6642ec09afa63d10cab Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 6 Jan 2011 22:36:45 +0100
Subject: cfg80211: add mesh join/leave callback docs

When I made the patch to add mesh join/leave I
didn't pay attention to docs because it was a
proof of concept, and then when we actually did
merge it I forgot -- add docs now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bcc9f44..1322695 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1103,6 +1103,8 @@ struct cfg80211_pmksa {
  * @change_mpath: change a given mesh path
  * @get_mpath: get a mesh path for the given parameters
  * @dump_mpath: dump mesh path callback -- resume dump at index @idx
+ * @join_mesh: join the mesh network with the specified parameters
+ * @leave_mesh: leave the current mesh network
  *
  * @get_mesh_config: Get the current mesh configuration
  *
-- 
cgit v1.1


From 45007fd590c6b099cec5d36ea7056b00f8b4916a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 6 Jan 2011 22:36:46 +0100
Subject: nl80211: add/fix mesh docs

Some mesh attribute/command docs are missing or
have errors in the name so they don't match, fix
all of them.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2b89b71..821ffb9 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -148,6 +148,10 @@
  * @NL80211_CMD_SET_MPATH:  Set mesh path attributes for mesh path to
  * 	destination %NL80211_ATTR_MAC on the interface identified by
  * 	%NL80211_ATTR_IFINDEX.
+ * @NL80211_CMD_NEW_MPATH: Create a new mesh path for the destination given by
+ *	%NL80211_ATTR_MAC via %NL80211_ATTR_MPATH_NEXT_HOP.
+ * @NL80211_CMD_DEL_MPATH: Delete a mesh path to the destination given by
+ *	%NL80211_ATTR_MAC.
  * @NL80211_CMD_NEW_PATH: Add a mesh path with given attributes to the
  *	the interface identified by %NL80211_ATTR_IFINDEX.
  * @NL80211_CMD_DEL_PATH: Remove a mesh path identified by %NL80211_ATTR_MAC
@@ -612,7 +616,7 @@ enum nl80211_commands {
  *	consisting of a nested array.
  *
  * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes).
- * @NL80211_ATTR_PLINK_ACTION: action to perform on the mesh peer link.
+ * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link.
  * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path.
  * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path
  * 	info given for %NL80211_CMD_GET_MPATH, nested attribute described at
@@ -879,7 +883,9 @@ enum nl80211_commands {
  *	See &enum nl80211_key_default_types.
  *
  * @NL80211_ATTR_MESH_SETUP: Optional mesh setup parameters.  These cannot be
- * changed once the mesh is active.
+ *	changed once the mesh is active.
+ * @NL80211_ATTR_MESH_CONFIG: Mesh configuration parameters, a nested attribute
+ *	containing attributes from &enum nl80211_meshconf_params.
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -1225,8 +1231,6 @@ enum nl80211_rate_info {
  * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs)
  * @NL80211_STA_INFO_RX_BYTES: total received bytes (u32, from this station)
  * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station)
- * @__NL80211_STA_INFO_AFTER_LAST: internal
- * @NL80211_STA_INFO_MAX: highest possible station info attribute
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
  * 	containing info as possible, see &enum nl80211_sta_info_txrate.
@@ -1236,6 +1240,11 @@ enum nl80211_rate_info {
  * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station)
  * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station)
  * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
+ * @NL80211_STA_INFO_LLID: the station's mesh LLID
+ * @NL80211_STA_INFO_PLID: the station's mesh PLID
+ * @NL80211_STA_INFO_PLINK_STATE: peer link state for the station
+ * @__NL80211_STA_INFO_AFTER_LAST: internal
+ * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
 enum nl80211_sta_info {
 	__NL80211_STA_INFO_INVALID,
@@ -1626,7 +1635,7 @@ enum nl80211_mntr_flags {
  * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs)
  * that it takes for an HWMP information element to propagate across the mesh
  *
- * @NL80211_MESHCONF_ROOTMODE: whether root mode is enabled or not
+ * @NL80211_MESHCONF_HWMP_ROOTMODE: whether root mode is enabled or not
  *
  * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
  * source mesh point for path selection elements.
@@ -1678,6 +1687,7 @@ enum nl80211_meshconf_params {
  * element that vendors will use to identify the path selection methods and
  * metrics in use.
  *
+ * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
  * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
  */
 enum nl80211_mesh_setup_params {
-- 
cgit v1.1


From e159489baa717dbae70f9903770a6a4990865887 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 9 Jan 2011 23:32:15 +0100
Subject: workqueue: relax lockdep annotation on flush_work()

Currently, the lockdep annotation in flush_work() requires exclusive
access on the workqueue the target work is queued on and triggers
warning if a work is trying to flush another work on the same
workqueue; however, this is no longer true as workqueues can now
execute multiple works concurrently.

This patch adds lock_map_acquire_read() and make process_one_work()
hold read access to the workqueue while executing a work and
start_flush_work() check for write access if concurrnecy level is one
or the workqueue has a rescuer (as only one execution resource - the
rescuer - is guaranteed to be available under memory pressure), and
read access if higher.

This better represents what's going on and removes spurious lockdep
warnings which are triggered by fake dependency chain created through
flush_work().

* Peter pointed out that flushing another work from a WQ_MEM_RECLAIM
  wq breaks forward progress guarantee under memory pressure.
  Condition check accordingly updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Tested-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@kernel.org
---
 include/linux/lockdep.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 71c09b26..9f19430 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -522,12 +522,15 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # ifdef CONFIG_PROVE_LOCKING
 #  define lock_map_acquire(l)		lock_acquire(l, 0, 0, 0, 2, NULL, _THIS_IP_)
+#  define lock_map_acquire_read(l)	lock_acquire(l, 0, 0, 2, 2, NULL, _THIS_IP_)
 # else
 #  define lock_map_acquire(l)		lock_acquire(l, 0, 0, 0, 1, NULL, _THIS_IP_)
+#  define lock_map_acquire_read(l)	lock_acquire(l, 0, 0, 2, 1, NULL, _THIS_IP_)
 # endif
 # define lock_map_release(l)			lock_release(l, 1, _THIS_IP_)
 #else
 # define lock_map_acquire(l)			do { } while (0)
+# define lock_map_acquire_read(l)		do { } while (0)
 # define lock_map_release(l)			do { } while (0)
 #endif
 
-- 
cgit v1.1


From 925268a06dc2b1ff7bfcc37419a6827a0e739639 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 11 Jan 2011 16:44:01 +0900
Subject: memory hotplug: one more lock on memory hotplug

Now, memory_hotplug_(un)lock() is used for add/remove/offline pages
for avoiding races with hibernation. But this should be held in
online_pages(), too. It seems asymmetric.

There are cases where one has to avoid a race with memory hotplug
notifier and his own local code, and hotplug v.s. hotplug.
This will add a generic solution for avoiding races. In other view,
having lock here has no big impacts. online pages is tend to be
done by udev script at el against each memory section one by one.

Then, it's better to have lock here, too.

Cc: <stable@kernel.org> # 2.6.37
Reviewed-by: Christoph Lameter <cl@linux.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/memory_hotplug.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 31c237a..12b9eb5a 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -161,6 +161,12 @@ extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
 extern void put_page_bootmem(struct page *page);
 #endif
 
+/*
+ * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
+ * notifier will be called under this. 2) offline/online/add/remove memory
+ * will not run simultaneously.
+ */
+
 void lock_memory_hotplug(void);
 void unlock_memory_hotplug(void);
 
-- 
cgit v1.1


From f07745325cbd93afc5d8bcf7539a063d33134075 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 9 Feb 2009 12:05:49 -0800
Subject: xen: define gnttab_set_map_op/unmap_op

Impact: hypercall definitions

These functions populate the gnttab data structures used by the
granttab map and unmap ops and are used in the backend drivers.

Originally xen-unstable.hg 9625:c3bb51c443a7

[ Include Stefano's fix for phys_addr_t ]

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/grant_table.h | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 9a73170..1821aa1 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -37,10 +37,16 @@
 #ifndef __ASM_GNTTAB_H__
 #define __ASM_GNTTAB_H__
 
-#include <asm/xen/hypervisor.h>
+#include <asm/page.h>
+
+#include <xen/interface/xen.h>
 #include <xen/interface/grant_table.h>
+
+#include <asm/xen/hypervisor.h>
 #include <asm/xen/grant_table.h>
 
+#include <xen/features.h>
+
 /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
 #define NR_GRANT_FRAMES 4
 
@@ -107,6 +113,37 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
 				       unsigned long pfn);
 
+static inline void
+gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr,
+		  uint32_t flags, grant_ref_t ref, domid_t domid)
+{
+	if (flags & GNTMAP_contains_pte)
+		map->host_addr = addr;
+	else if (xen_feature(XENFEAT_auto_translated_physmap))
+		map->host_addr = __pa(addr);
+	else
+		map->host_addr = addr;
+
+	map->flags = flags;
+	map->ref = ref;
+	map->dom = domid;
+}
+
+static inline void
+gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr,
+		    uint32_t flags, grant_handle_t handle)
+{
+	if (flags & GNTMAP_contains_pte)
+		unmap->host_addr = addr;
+	else if (xen_feature(XENFEAT_auto_translated_physmap))
+		unmap->host_addr = __pa(addr);
+	else
+		unmap->host_addr = addr;
+
+	unmap->handle = handle;
+	unmap->dev_bus_addr = 0;
+}
+
 int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 			   unsigned long max_nr_gframes,
 			   struct grant_entry **__shared);
-- 
cgit v1.1


From ab31523c2fcac557226bac72cbdf5fafe01f9a26 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Tue, 14 Dec 2010 18:40:46 +0000
Subject: xen/gntdev: allow usermode to map granted pages

The gntdev driver allows usermode to map granted pages from other
domains.  This is typically used to implement a Xen backend driver
in user mode.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Stefano Stabellini <Stefano.Stabellini@eu.citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/gntdev.h | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 include/xen/gntdev.h

(limited to 'include')

diff --git a/include/xen/gntdev.h b/include/xen/gntdev.h
new file mode 100644
index 0000000..eb23f41
--- /dev/null
+++ b/include/xen/gntdev.h
@@ -0,0 +1,119 @@
+/******************************************************************************
+ * gntdev.h
+ * 
+ * Interface to /dev/xen/gntdev.
+ * 
+ * Copyright (c) 2007, D G Murray
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_GNTDEV_H__
+#define __LINUX_PUBLIC_GNTDEV_H__
+
+struct ioctl_gntdev_grant_ref {
+	/* The domain ID of the grant to be mapped. */
+	uint32_t domid;
+	/* The grant reference of the grant to be mapped. */
+	uint32_t ref;
+};
+
+/*
+ * Inserts the grant references into the mapping table of an instance
+ * of gntdev. N.B. This does not perform the mapping, which is deferred
+ * until mmap() is called with @index as the offset.
+ */
+#define IOCTL_GNTDEV_MAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
+struct ioctl_gntdev_map_grant_ref {
+	/* IN parameters */
+	/* The number of grants to be mapped. */
+	uint32_t count;
+	uint32_t pad;
+	/* OUT parameters */
+	/* The offset to be used on a subsequent call to mmap(). */
+	uint64_t index;
+	/* Variable IN parameter. */
+	/* Array of grant references, of size @count. */
+	struct ioctl_gntdev_grant_ref refs[1];
+};
+
+/*
+ * Removes the grant references from the mapping table of an instance of
+ * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
+ * before this ioctl is called, or an error will result.
+ */
+#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
+_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))
+struct ioctl_gntdev_unmap_grant_ref {
+	/* IN parameters */
+	/* The offset was returned by the corresponding map operation. */
+	uint64_t index;
+	/* The number of pages to be unmapped. */
+	uint32_t count;
+	uint32_t pad;
+};
+
+/*
+ * Returns the offset in the driver's address space that corresponds
+ * to @vaddr. This can be used to perform a munmap(), followed by an
+ * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
+ * the caller. The number of pages that were allocated at the same time as
+ * @vaddr is returned in @count.
+ *
+ * N.B. Where more than one page has been mapped into a contiguous range, the
+ *      supplied @vaddr must correspond to the start of the range; otherwise
+ *      an error will result. It is only possible to munmap() the entire
+ *      contiguously-allocated range at once, and not any subrange thereof.
+ */
+#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
+_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
+struct ioctl_gntdev_get_offset_for_vaddr {
+	/* IN parameters */
+	/* The virtual address of the first mapped page in a range. */
+	uint64_t vaddr;
+	/* OUT parameters */
+	/* The offset that was used in the initial mmap() operation. */
+	uint64_t offset;
+	/* The number of pages mapped in the VM area that begins at @vaddr. */
+	uint32_t count;
+	uint32_t pad;
+};
+
+/*
+ * Sets the maximum number of grants that may mapped at once by this gntdev
+ * instance.
+ *
+ * N.B. This must be called before any other ioctl is performed on the device.
+ */
+#define IOCTL_GNTDEV_SET_MAX_GRANTS \
+_IOC(_IOC_NONE, 'G', 3, sizeof(struct ioctl_gntdev_set_max_grants))
+struct ioctl_gntdev_set_max_grants {
+	/* IN parameter */
+	/* The maximum number of grants that may be mapped at once. */
+	uint32_t count;
+};
+
+#endif /* __LINUX_PUBLIC_GNTDEV_H__ */
-- 
cgit v1.1


From 289b777eac19c811b474593b4d2fd14e46340c23 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 10 Dec 2010 14:54:44 +0000
Subject: xen: introduce gnttab_map_refs and gnttab_unmap_refs

gnttab_map_refs maps some grant refs and uses the new m2p override to
set a proper m2p mapping for the granted pages.

gnttab_unmap_refs unmaps the granted refs and removes th mappings from
the m2p override.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/grant_table.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 1821aa1..b1fab6b 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -155,4 +155,9 @@ unsigned int gnttab_max_grant_frames(void);
 
 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
 
+int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+		    struct page **pages, unsigned int count);
+int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
+		      struct page **pages, unsigned int count);
+
 #endif /* __ASM_GNTTAB_H__ */
-- 
cgit v1.1


From 1d1bc8f2074f0b728dfca2a3c16f2f5a3f298ffc Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 4 Oct 2010 23:12:59 -0400
Subject: nfsd4: support BIND_CONN_TO_SESSION

Basic xdr and processing for BIND_CONN_TO_SESSION.  This adds a
connection to the list of connections associated with a session.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfs4.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 26afa30..a591893 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -65,6 +65,9 @@
 
 #define NFS4_CDFC4_FORE	0x1
 #define NFS4_CDFC4_BACK 0x2
+#define NFS4_CDFC4_BOTH 0x3
+#define NFS4_CDFC4_FORE_OR_BOTH 0x3
+#define NFS4_CDFC4_BACK_OR_BOTH 0x7
 
 #define NFS4_SET_TO_SERVER_TIME	0
 #define NFS4_SET_TO_CLIENT_TIME	1
-- 
cgit v1.1


From d75faea330dbd1873c9094e9926ae306590c0998 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 30 Nov 2010 19:15:01 -0500
Subject: rpc: move sk_bc_xprt to svc_xprt

This seems obviously transport-level information even if it's currently
used only by the server socket code.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 +
 include/linux/sunrpc/svcsock.h  | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index c8f81da..7ad9751 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -79,6 +79,7 @@ struct svc_xprt {
 	struct list_head	xpt_users;	/* callbacks on free */
 
 	struct net		*xpt_net;
+	struct rpc_xprt		*xpt_bc_xprt;	/* NFSv4.1 backchannel */
 };
 
 static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 1b353a7..04dba23 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,7 +28,6 @@ struct svc_sock {
 	/* private TCP part */
 	u32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
-	struct rpc_xprt		*sk_bc_xprt;	/* NFSv4.1 backchannel xprt */
 };
 
 /*
-- 
cgit v1.1


From f0418aa4b1103f959d64dc18273efa04ee0140e9 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 8 Dec 2010 13:48:19 -0500
Subject: rpc: allow xprt_class->setup to return a preexisting xprt

This allows us to reuse the xprt associated with a server connection if
one has already been set up.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xprt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 89d10d2..bef0f53 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -321,6 +321,7 @@ void			xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
 #define XPRT_CLOSING		(6)
 #define XPRT_CONNECTION_ABORT	(7)
 #define XPRT_CONNECTION_CLOSE	(8)
+#define XPRT_INITIALIZED	(9)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
-- 
cgit v1.1


From 4cb18728709683c91a5f6f8d5f337bfb498b089a Mon Sep 17 00:00:00 2001
From: "R.Durgadoss" <durgadoss.r@intel.com>
Date: Wed, 27 Oct 2010 03:33:29 +0530
Subject: thermal: Add event notification to thermal framework

This patch adds event notification support to the generic
thermal sysfs framework in the kernel. The notification is in the
form of a netlink event.

Signed-off-by: R.Durgadoss <durgadoss.r@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/thermal.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 1de8b9eb..7846449 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -127,6 +127,37 @@ struct thermal_zone_device {
 	struct thermal_hwmon_attr temp_crit;	/* hwmon sys attr */
 #endif
 };
+/* Adding event notification support elements */
+#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
+#define THERMAL_GENL_VERSION                    0x01
+#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_group"
+
+enum events {
+	THERMAL_AUX0,
+	THERMAL_AUX1,
+	THERMAL_CRITICAL,
+	THERMAL_DEV_FAULT,
+};
+
+struct thermal_genl_event {
+	u32 orig;
+	enum events event;
+};
+/* attributes of thermal_genl_family */
+enum {
+	THERMAL_GENL_ATTR_UNSPEC,
+	THERMAL_GENL_ATTR_EVENT,
+	__THERMAL_GENL_ATTR_MAX,
+};
+#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
+
+/* commands supported by the thermal_genl_family */
+enum {
+	THERMAL_GENL_CMD_UNSPEC,
+	THERMAL_GENL_CMD_EVENT,
+	__THERMAL_GENL_CMD_MAX,
+};
+#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
 
 struct thermal_zone_device *thermal_zone_device_register(char *, int, void *,
 							 struct
@@ -146,5 +177,6 @@ struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
 							       thermal_cooling_device_ops
 							       *);
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
+extern int generate_netlink_event(u32 orig, enum events event);
 
 #endif /* __THERMAL_H__ */
-- 
cgit v1.1


From 3a37898d507794cfc68a092303e02651d3f01308 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Mon, 13 Dec 2010 13:36:15 +0800
Subject: ACPICA: Rename some function and variable names

Some function and variable names are renamed to be consistent with
ACPICA code base.

acpi_raw_enable_gpe -> acpi_ev_add_gpe_reference
acpi_raw_disable_gpe -> acpi_ev_remove_gpe_reference
acpi_gpe_can_wake -> acpi_setup_gpe_for_wake
acpi_gpe_wakeup -> acpi_set_gpe_wake_mask
acpi_update_gpes -> acpi_update_all_gpes
acpi_all_gpes_initialized -> acpi_gbl_all_gpes_initialized
acpi_handler_info -> acpi_gpe_handler_info
...

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h  | 6 +++---
 include/acpi/actypes.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 53b7cfd..5f8ccf1 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -292,11 +292,11 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
 acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
-acpi_status acpi_gpe_can_wake(acpi_handle gpe_device, u32 gpe_number);
+acpi_status acpi_setup_gpe_for_wake(acpi_handle gpe_device, u32 gpe_number);
 
 acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number);
 
-acpi_status acpi_gpe_wakeup(acpi_handle gpe_device, u32 gpe_number, u8 action);
+acpi_status acpi_set_gpe_wake_mask(acpi_handle gpe_device, u32 gpe_number, u8 action);
 
 acpi_status
 acpi_get_gpe_status(acpi_handle gpe_device,
@@ -315,7 +315,7 @@ acpi_install_gpe_block(acpi_handle gpe_device,
 
 acpi_status acpi_remove_gpe_block(acpi_handle gpe_device);
 
-acpi_status acpi_update_gpes(void);
+acpi_status acpi_update_all_gpes(void);
 
 /*
  * Resource interfaces
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 2b134b6..e738939 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -656,11 +656,11 @@ typedef u32 acpi_event_status;
 #define ACPI_GPE_MAX                    0xFF
 #define ACPI_NUM_GPE                    256
 
-/* Actions for acpi_gpe_wakeup, acpi_hw_low_set_gpe */
+/* Actions for acpi_set_gpe_wake_mask, acpi_hw_low_set_gpe */
 
 #define ACPI_GPE_ENABLE                 0
 #define ACPI_GPE_DISABLE                1
-#define ACPI_GPE_COND_ENABLE            2
+#define ACPI_GPE_CONDITIONAL_ENABLE     2
 
 /*
  * GPE info flags - Per GPE
-- 
cgit v1.1


From 8b6cd8ad18def34bfc5045b2a0234329bf94cf78 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Mon, 13 Dec 2010 13:38:46 +0800
Subject: ACPICA: New GPE handler callback definition

The new GPE handler callback has 2 additional parameters, gpe_device and
gpe_number.

typedef
u32 (*acpi_gpe_handler) (acpi_handle gpe_device, u32 gpe_number, void *context);

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h  | 4 ++--
 include/acpi/actypes.h | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 5f8ccf1..b806e56 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -258,11 +258,11 @@ acpi_remove_address_space_handler(acpi_handle device,
 acpi_status
 acpi_install_gpe_handler(acpi_handle gpe_device,
 			 u32 gpe_number,
-			 u32 type, acpi_event_handler address, void *context);
+			 u32 type, acpi_gpe_handler address, void *context);
 
 acpi_status
 acpi_remove_gpe_handler(acpi_handle gpe_device,
-			u32 gpe_number, acpi_event_handler address);
+			u32 gpe_number, acpi_gpe_handler address);
 
 #ifdef ACPI_FUTURE_USAGE
 acpi_status acpi_install_exception_handler(acpi_exception_handler handler);
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index e738939..b9575ad 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -897,6 +897,9 @@ typedef void
 typedef u32(*acpi_event_handler) (void *context);
 
 typedef
+u32 (*acpi_gpe_handler) (acpi_handle gpe_device, u32 gpe_number, void *context);
+
+typedef
 void (*acpi_notify_handler) (acpi_handle device, u32 value, void *context);
 
 typedef
-- 
cgit v1.1


From bba63a296ffab20e08d9e8252d2f0d99050ac859 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Mon, 13 Dec 2010 13:39:17 +0800
Subject: ACPICA: Implicit notify support

This feature provides an automatic device notification for wake devices
when a wakeup GPE occurs and there is no corresponding GPE method or
handler. Rather than ignoring such a GPE, an implicit AML Notify
operation is performed on the parent device object.
This feature is not part of the ACPI specification and is provided for
Windows compatibility only.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h  |  6 ++++--
 include/acpi/actypes.h | 37 +++++++++++++++++++++----------------
 2 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index b806e56..9de6a17 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -292,10 +292,12 @@ acpi_status acpi_enable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
 acpi_status acpi_disable_gpe(acpi_handle gpe_device, u32 gpe_number);
 
-acpi_status acpi_setup_gpe_for_wake(acpi_handle gpe_device, u32 gpe_number);
-
 acpi_status acpi_clear_gpe(acpi_handle gpe_device, u32 gpe_number);
 
+acpi_status
+acpi_setup_gpe_for_wake(acpi_handle parent_device,
+			acpi_handle gpe_device, u32 gpe_number);
+
 acpi_status acpi_set_gpe_wake_mask(acpi_handle gpe_device, u32 gpe_number, u8 action);
 
 acpi_status
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index b9575ad..d7274ee 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -664,25 +664,26 @@ typedef u32 acpi_event_status;
 
 /*
  * GPE info flags - Per GPE
- * +-------+---+-+-+
- * |  7:4  |3:2|1|0|
- * +-------+---+-+-+
- *     |     |  | |
- *     |     |  | +--- Interrupt type: edge or level triggered
- *     |     |  +----- GPE can wake the system
- *     |     +-------- Type of dispatch:to method, handler, or none
- *     +-------------- <Reserved>
+ * +-------+-+-+---+
+ * |  7:4  |3|2|1:0|
+ * +-------+-+-+---+
+ *     |    | |  |
+ *     |    | |  +-- Type of dispatch:to method, handler, notify, or none
+ *     |    | +----- Interrupt type: edge or level triggered
+ *     |    +------- Is a Wake GPE
+ *     +------------ <Reserved>
  */
-#define ACPI_GPE_XRUPT_TYPE_MASK        (u8) 0x01
-#define ACPI_GPE_LEVEL_TRIGGERED        (u8) 0x01
-#define ACPI_GPE_EDGE_TRIGGERED         (u8) 0x00
+#define ACPI_GPE_DISPATCH_NONE          (u8) 0x00
+#define ACPI_GPE_DISPATCH_METHOD        (u8) 0x01
+#define ACPI_GPE_DISPATCH_HANDLER       (u8) 0x02
+#define ACPI_GPE_DISPATCH_NOTIFY        (u8) 0x03
+#define ACPI_GPE_DISPATCH_MASK          (u8) 0x03
 
-#define ACPI_GPE_CAN_WAKE		(u8) 0x02
+#define ACPI_GPE_LEVEL_TRIGGERED        (u8) 0x04
+#define ACPI_GPE_EDGE_TRIGGERED         (u8) 0x00
+#define ACPI_GPE_XRUPT_TYPE_MASK        (u8) 0x04
 
-#define ACPI_GPE_DISPATCH_MASK          (u8) 0x0C
-#define ACPI_GPE_DISPATCH_HANDLER       (u8) 0x04
-#define ACPI_GPE_DISPATCH_METHOD        (u8) 0x08
-#define ACPI_GPE_DISPATCH_NOT_USED      (u8) 0x00
+#define ACPI_GPE_CAN_WAKE               (u8) 0x08
 
 /*
  * Flags for GPE and Lock interfaces
@@ -954,6 +955,10 @@ u32 (*acpi_interface_handler) (acpi_string interface_name, u32 supported);
 #define ACPI_INTERRUPT_NOT_HANDLED      0x00
 #define ACPI_INTERRUPT_HANDLED          0x01
 
+/* GPE handler return values */
+
+#define ACPI_REENABLE_GPE               0x80
+
 /* Length of 32-bit EISAID values when converted back to a string */
 
 #define ACPI_EISAID_STRING_SIZE         8	/* Includes null terminator */
-- 
cgit v1.1


From a0fcdb237fcd4eaa7e5009b28ef5be07415f287d Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Mon, 13 Dec 2010 13:39:26 +0800
Subject: ACPICA: Global event handler

The global event handler is called whenever a general purpose
or fixed ACPI event occurs.

Also update Linux OSL to collect events counter with
global event handler.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h  | 4 ++++
 include/acpi/actypes.h | 8 ++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 9de6a17..e8142d5 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -229,6 +229,10 @@ acpi_status
 acpi_install_initialization_handler(acpi_init_handler handler, u32 function);
 
 acpi_status
+acpi_install_global_event_handler(ACPI_GBL_EVENT_HANDLER handler,
+				 void *context);
+
+acpi_status
 acpi_install_fixed_event_handler(u32 acpi_event,
 				 acpi_event_handler handler, void *context);
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index d7274ee..939a431 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -895,6 +895,14 @@ typedef void
 /*
  * Various handlers and callback procedures
  */
+typedef
+void (*ACPI_GBL_EVENT_HANDLER) (u32 event_type,
+			       acpi_handle device,
+			       u32 event_number, void *context);
+
+#define ACPI_EVENT_TYPE_GPE         0
+#define ACPI_EVENT_TYPE_FIXED       1
+
 typedef u32(*acpi_event_handler) (void *context);
 
 typedef
-- 
cgit v1.1


From 1ae5ec903f71c0ffa583ec54d17415892036ee18 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Mon, 27 Dec 2010 09:07:43 +0800
Subject: ACPICA: Update version to 20101209

Version 20101209.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index e8142d5..241b8a0 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20101013
+#define ACPI_CA_VERSION                 0x20101209
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.1


From 25eed40720fc9005c63a1f436e5f8a78836c26ff Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 25 Nov 2010 00:09:15 +0100
Subject: ACPI / PM: Add function for updating device power state consistently

Add function acpi_bus_update_power() for reading the actual power
state of an ACPI device and updating its device->power.state field
in such a way that its power resources' reference counters will
remain consistent with that field.

For this purpose introduce __acpi_bus_set_power() setting the
power state of an ACPI device without updating its
device->power.state field and make acpi_bus_set_power() and
acpi_bus_update_power() use it (acpi_bus_set_power() retains the
current behavior for now).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 359ef11..5d2c4c5 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -330,6 +330,7 @@ acpi_status acpi_bus_get_status_handle(acpi_handle handle,
 int acpi_bus_get_status(struct acpi_device *device);
 int acpi_bus_get_power(acpi_handle handle, int *state);
 int acpi_bus_set_power(acpi_handle handle, int state);
+int acpi_bus_update_power(acpi_handle handle, int *state_p);
 bool acpi_bus_power_manageable(acpi_handle handle);
 bool acpi_bus_can_wakeup(acpi_handle handle);
 #ifdef CONFIG_ACPI_PROC_EVENT
-- 
cgit v1.1


From 488a76c52606199100adf09c8eb7cbedbd94e9d9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 25 Nov 2010 00:11:24 +0100
Subject: ACPI / Fan: Rework the handling of power resources

Use the new function acpi_bus_update_power() for manipulating power
resources used by ACPI fan devices, which allows them to be put into
the right state during initialization and resume.  Consequently,
remove the flags.force_power_state field from struct acpi_device,
which is not necessary any more.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 5d2c4c5..8912580 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -149,8 +149,7 @@ struct acpi_device_flags {
 	u32 power_manageable:1;
 	u32 performance_manageable:1;
 	u32 wake_capable:1;	/* Wakeup(_PRW) supported? */
-	u32 force_power_state:1;
-	u32 reserved:22;
+	u32 reserved:23;
 };
 
 /* File System */
-- 
cgit v1.1


From f6767dcf2a4f6e62960912d0affec1e15a246191 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 11 Dec 2010 23:44:39 +0100
Subject: ACPI / PM: Drop acpi_bus_get_power()

There are no more users of acpi_bus_get_power(), so it can be
dropped.  Moreover, it should be dropped, because it modifies
the device->power.state field of an ACPI device without updating
the reference counters of the device's power resources, which is
wrong.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 8912580..673a3f4 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -327,7 +327,6 @@ void acpi_bus_data_handler(acpi_handle handle, void *context);
 acpi_status acpi_bus_get_status_handle(acpi_handle handle,
 				       unsigned long long *sta);
 int acpi_bus_get_status(struct acpi_device *device);
-int acpi_bus_get_power(acpi_handle handle, int *state);
 int acpi_bus_set_power(acpi_handle handle, int state);
 int acpi_bus_update_power(acpi_handle handle, int *state_p);
 bool acpi_bus_power_manageable(acpi_handle handle);
-- 
cgit v1.1


From d57d09a480e1db38eeee7629c81289b00f338a15 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 6 Jan 2011 23:41:27 +0100
Subject: ACPI: Drop device flag wake_capable

The wake_capable ACPI device flag is not necessary, because it is
only used in scan.c for recording the information that _PRW is
present for the given device.  That information is only used by
acpi_add_single_object() to decide whether or not to call
acpi_bus_get_wakeup_device_flags(), so the flag may be dropped
if the _PRW check is moved to acpi_bus_get_wakeup_device_flags().
Moreover, acpi_bus_get_wakeup_device_flags() always returns 0,
so it really should be void.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 20b05cd..78ca429 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -148,8 +148,7 @@ struct acpi_device_flags {
 	u32 suprise_removal_ok:1;
 	u32 power_manageable:1;
 	u32 performance_manageable:1;
-	u32 wake_capable:1;	/* Wakeup(_PRW) supported? */
-	u32 reserved:23;
+	u32 reserved:24;
 };
 
 /* File System */
-- 
cgit v1.1


From d247632c08c674864d438733280422ddb7130ff8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 12 Jan 2011 02:34:59 -0500
Subject: cpuidle: delete NOP CPUIDLE_FLAG_POLL

it serves no purpose

Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/cpuidle.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 1be416b..dee3285 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -48,7 +48,6 @@ struct cpuidle_state {
 /* Idle State Flags */
 #define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
 #define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
-#define CPUIDLE_FLAG_POLL	(0x10) /* no latency, no savings */
 #define CPUIDLE_FLAG_SHALLOW	(0x20) /* low latency, minimal savings */
 #define CPUIDLE_FLAG_BALANCED	(0x40) /* medium latency, moderate savings */
 #define CPUIDLE_FLAG_DEEP	(0x80) /* high latency, large savings */
-- 
cgit v1.1


From 642f11c53f17aee991d97d14e6922172849ef227 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 12 Jan 2011 02:45:00 -0500
Subject: cpuidle: delete unused CPUIDLE_FLAG_SHALLOW, BALANCED, DEEP
 definitions

Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/cpuidle.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index dee3285..c252953 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -48,9 +48,6 @@ struct cpuidle_state {
 /* Idle State Flags */
 #define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
 #define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
-#define CPUIDLE_FLAG_SHALLOW	(0x20) /* low latency, minimal savings */
-#define CPUIDLE_FLAG_BALANCED	(0x40) /* medium latency, moderate savings */
-#define CPUIDLE_FLAG_DEEP	(0x80) /* high latency, large savings */
 #define CPUIDLE_FLAG_IGNORE	(0x100) /* ignore during this idle period */
 #define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */
 
-- 
cgit v1.1


From 956d033fb2eb3f8818260cdf01644bf4dc1a9e33 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 12 Jan 2011 02:51:20 -0500
Subject: cpuidle: CPUIDLE_FLAG_TLB_FLUSHED is specific to intel_idle

Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/cpuidle.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index c252953..6be722c 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -49,7 +49,6 @@ struct cpuidle_state {
 #define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
 #define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
 #define CPUIDLE_FLAG_IGNORE	(0x100) /* ignore during this idle period */
-#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
 
-- 
cgit v1.1


From 5392083748a340f68052c0b83a7ce28b10324972 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 12 Jan 2011 02:56:15 -0500
Subject: cpuidle: CPUIDLE_FLAG_CHECK_BM is omap3_idle specific

Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/cpuidle.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 6be722c..36719ea 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -47,7 +47,6 @@ struct cpuidle_state {
 
 /* Idle State Flags */
 #define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
-#define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
 #define CPUIDLE_FLAG_IGNORE	(0x100) /* ignore during this idle period */
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
-- 
cgit v1.1


From f00c9e44ad1a9660fe8cd3ca15b6cd9497172eab Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 15 Sep 2010 17:38:58 +0200
Subject: quota: Fix deadlock during path resolution

As Al Viro pointed out path resolution during Q_QUOTAON calls to quotactl
is prone to deadlocks. We hold s_umount semaphore for reading during the
path resolution and resolution itself may need to acquire the semaphore
for writing when e. g. autofs mountpoint is passed.

Solve the problem by performing the resolution before we get hold of the
superblock (and thus s_umount semaphore). The whole thing is complicated
by the fact that some filesystems (OCFS2) ignore the path argument. So to
distinguish between filesystem which want the path and which do not we
introduce new .quota_on_meta callback which does not get the path. OCFS2
then uses this callback instead of old .quota_on.

CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Christoph Hellwig <hch@lst.de>
CC: Ted Ts'o <tytso@mit.edu>
CC: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quota.h    | 5 ++++-
 include/linux/quotaops.h | 4 +---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/quota.h b/include/linux/quota.h
index 94c1f03..9a85412 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -322,9 +322,12 @@ struct dquot_operations {
 	qsize_t *(*get_reserved_space) (struct inode *);
 };
 
+struct path;
+
 /* Operations handling requests from userspace */
 struct quotactl_ops {
-	int (*quota_on)(struct super_block *, int, int, char *);
+	int (*quota_on)(struct super_block *, int, int, struct path *);
+	int (*quota_on_meta)(struct super_block *, int, int);
 	int (*quota_off)(struct super_block *, int);
 	int (*quota_sync)(struct super_block *, int, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 223b14c..eb354f6 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -76,11 +76,9 @@ int dquot_mark_dquot_dirty(struct dquot *dquot);
 
 int dquot_file_open(struct inode *inode, struct file *file);
 
-int dquot_quota_on(struct super_block *sb, int type, int format_id,
-	char *path);
 int dquot_enable(struct inode *inode, int type, int format_id,
 	unsigned int flags);
-int dquot_quota_on_path(struct super_block *sb, int type, int format_id,
+int dquot_quota_on(struct super_block *sb, int type, int format_id,
  	struct path *path);
 int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
-- 
cgit v1.1


From 2fc72c7b84002ffb3c66918e2a7b0ee607d8b5aa Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@balabit.hu>
Date: Wed, 12 Jan 2011 20:25:08 +0100
Subject: netfilter: fix compilation when conntrack is disabled but tproxy is
 enabled

The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but
failed to update the #ifdef stanzas guarding the defragmentation related
fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c.

This patch adds the required #ifdefs so that IPv6 tproxy can truly be used
without connection tracking.

Original report:
http://marc.info/?l=linux-netdev&m=129010118516341&w=2

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/skbuff.h                         | 15 +++++++++++++++
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 10 ----------
 include/net/netfilter/ipv6/nf_defrag_ipv6.h    | 10 ++++++++++
 3 files changed, 25 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 20ec0a64..bf221d6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -255,6 +255,11 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
+#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
+    defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
+#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
+#endif
+
 /** 
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
@@ -362,6 +367,8 @@ struct sk_buff {
 	void			(*destructor)(struct sk_buff *skb);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	struct sk_buff		*nfct_reasm;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -2057,6 +2064,8 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 	if (nfct)
 		atomic_inc(&nfct->use);
 }
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
 {
 	if (skb)
@@ -2085,6 +2094,8 @@ static inline void nf_reset(struct sk_buff *skb)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 	skb->nfct_reasm = NULL;
 #endif
@@ -2101,6 +2112,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 	dst->nfct = src->nfct;
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	dst->nfct_reasm = src->nfct_reasm;
 	nf_conntrack_get_reasm(src->nfct_reasm);
 #endif
@@ -2114,6 +2127,8 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
 	nf_conntrack_put_reasm(dst->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 1ee717e..a4c9936 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -7,16 +7,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 
-extern int nf_ct_frag6_init(void);
-extern void nf_ct_frag6_cleanup(void);
-extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
-extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
-			       struct net_device *in,
-			       struct net_device *out,
-			       int (*okfn)(struct sk_buff *));
-
-struct inet_frags_ctl;
-
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
 
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 94dd54d..fd79c9a 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -3,4 +3,14 @@
 
 extern void nf_defrag_ipv6_enable(void);
 
+extern int nf_ct_frag6_init(void);
+extern void nf_ct_frag6_cleanup(void);
+extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
+extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+			       struct net_device *in,
+			       struct net_device *out,
+			       int (*okfn)(struct sk_buff *));
+
+struct inet_frags_ctl;
+
 #endif /* _NF_DEFRAG_IPV6_H */
-- 
cgit v1.1


From 6fed05c9c9812b5882bc708f4da4fa8d5df2875c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 12 Jan 2011 22:03:20 +0100
Subject: ACPI / PM: Fix build problems for !CONFIG_ACPI related to NVS rework

The recent rework of the NVS saving/restoring code introduced two
build issues for !CONFIG_ACPI, a warning in drivers/acpi/internal.h
and an error in arch/x86/kernel/e820.c.

Fix them by providing suitable static inline definitions of the
relevant functions.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index fa7ed6a98..eb176bb 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -254,15 +254,6 @@ void __init acpi_old_suspend_ordering(void);
 void __init acpi_nvs_nosave(void);
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_ACPI_SLEEP
-int suspend_nvs_register(unsigned long start, unsigned long size);
-#else
-static inline int suspend_nvs_register(unsigned long a, unsigned long b)
-{
-	return 0;
-}
-#endif
-
 struct acpi_osc_context {
 	char *uuid_str; /* uuid string */
 	int rev;
@@ -361,4 +352,14 @@ static inline int acpi_table_parse(char *id,
 	return -1;
 }
 #endif	/* !CONFIG_ACPI */
+
+#ifdef CONFIG_ACPI_SLEEP
+int suspend_nvs_register(unsigned long start, unsigned long size);
+#else
+static inline int suspend_nvs_register(unsigned long a, unsigned long b)
+{
+	return 0;
+}
+#endif
+
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.1


From da995a8aee044bc5d0847e19e351cd48a2cb8bcc Mon Sep 17 00:00:00 2001
From: Aleksey Senin <alex@senin.name>
Date: Thu, 2 Dec 2010 11:44:49 +0000
Subject: IB/mlx4: Handle protocol field in multicast table

The newest device firmware stores IB vs. Ethernet protocol in two bits
in members_count field of multicast group table (0: Infiniband, 1:
Ethernet).  When changing the QP members count for a multicast group,
it important not to reset this information.  When calling multicast
attach first time, the protocol type should be specified.  In this
patch we always set it IB, but in the future we will handle Ethernet
too.  When looking for a QP, the protocol type shoud be checked too.

Signed-off-by: Aleksey Senin <alekseys@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/linux/mlx4/device.h | 10 ++++++++--
 include/linux/mlx4/driver.h |  6 +-----
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a7b15bc..0492146 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -144,6 +144,11 @@ enum {
 	MLX4_STAT_RATE_OFFSET	= 5
 };
 
+enum mlx4_protocol {
+	MLX4_PROTOCOL_IB,
+	MLX4_PROTOCOL_EN,
+};
+
 enum {
 	MLX4_MTT_FLAG_PRESENT		= 1
 };
@@ -500,8 +505,9 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
 
 int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
-			  int block_mcast_loopback);
-int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
+			  int block_mcast_loopback, enum mlx4_protocol protocol);
+int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  enum mlx4_protocol protocol);
 
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
 void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index f407cd4..e1eebf7 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -34,6 +34,7 @@
 #define MLX4_DRIVER_H
 
 #include <linux/device.h>
+#include <linux/mlx4/device.h>
 
 struct mlx4_dev;
 
@@ -44,11 +45,6 @@ enum mlx4_dev_event {
 	MLX4_DEV_EVENT_PORT_REINIT,
 };
 
-enum mlx4_protocol {
-	MLX4_PROTOCOL_IB,
-	MLX4_PROTOCOL_EN,
-};
-
 struct mlx4_interface {
 	void *			(*add)	 (struct mlx4_dev *dev);
 	void			(*remove)(struct mlx4_dev *dev, void *context);
-- 
cgit v1.1


From 838b4dc6d8a6bd83a93077ebc6873712c65bf85e Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 12 Jan 2011 15:42:32 +0000
Subject: sched: remove unused backlog in RED stats

The RED statistics structure includes backlog field which is not
set or used by any code.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/red.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/red.h b/include/net/red.h
index 995108e..3319f16 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -97,7 +97,6 @@ struct red_stats {
 	u32		forced_mark;	/* Forced marks, qavg > max_thresh */
 	u32		pdrop;          /* Drops due to queue limits */
 	u32		other;          /* Drops due to drop() calls */
-	u32		backlog;
 };
 
 struct red_parms {
-- 
cgit v1.1


From 84c89557a302e18414a011cc52b1abd034860743 Mon Sep 17 00:00:00 2001
From: Peter Jones <pjones@redhat.com>
Date: Thu, 13 Jan 2011 19:59:47 +0000
Subject: dm ioctl: allow rename to fill empty uuid

Allow the uuid of a mapped device to be set after device creation.
Previously the uuid (which is optional) could only be set by
DM_DEV_CREATE.  If no uuid was supplied it could not be set later.

Sometimes it's necessary to create the device before the uuid is known,
and in such cases the uuid must be filled in after the creation.

This patch extends DM_DEV_RENAME to accept a uuid accompanied by
a new flag DM_UUID_FLAG.  This can only be done once and if no
uuid was previously supplied.  It cannot be used to change an
existing uuid.

DM_VERSION_MINOR is also bumped to 19 to indicate this interface
extension is available.

Signed-off-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/dm-ioctl.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 49eab36..e453e11 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -44,7 +44,7 @@
  * Remove a device, destroy any tables.
  *
  * DM_DEV_RENAME:
- * Rename a device.
+ * Rename a device or set its uuid if none was previously supplied.
  *
  * DM_SUSPEND:
  * This performs both suspend and resume, depending which flag is
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	18
+#define DM_VERSION_MINOR	19
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2010-06-29)"
+#define DM_VERSION_EXTRA	"-ioctl (2010-10-14)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
@@ -322,4 +322,10 @@ enum {
  */
 #define DM_UEVENT_GENERATED_FLAG	(1 << 13) /* Out */
 
+/*
+ * If set, rename changes the uuid not the name.  Only permitted
+ * if no uuid was previously supplied: an existing uuid cannot be changed.
+ */
+#define DM_UUID_FLAG			(1 << 14) /* In */
+
 #endif				/* _LINUX_DM_IOCTL_H */
-- 
cgit v1.1


From 86a54a4802df10d23ccd655e2083e812fe990243 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Thu, 13 Jan 2011 19:59:52 +0000
Subject: dm log userspace: add version number to comms

This patch adds a 'version' field to the 'dm_ulog_request'
structure.

The 'version' field is taken from a portion of the unused
'padding' field in the 'dm_ulog_request' structure.  This
was done to avoid changing the size of the structure and
possibly disrupting backwards compatibility.

The version number will help notify user-space daemons
when a change has been made to the kernel/userspace
log API.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/dm-log-userspace.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h
index 0c3c3a2..eeace7d 100644
--- a/include/linux/dm-log-userspace.h
+++ b/include/linux/dm-log-userspace.h
@@ -370,6 +370,16 @@
 #define DM_ULOG_REQUEST_TYPE(request_type) \
 	(DM_ULOG_REQUEST_MASK & (request_type))
 
+/*
+ * DM_ULOG_REQUEST_VERSION is incremented when there is a
+ * change to the way information is passed between kernel
+ * and userspace.  This could be a structure change of
+ * dm_ulog_request or a change in the way requests are
+ * issued/handled.  Changes are outlined here:
+ *	version 1:  Initial implementation
+ */
+#define DM_ULOG_REQUEST_VERSION 1
+
 struct dm_ulog_request {
 	/*
 	 * The local unique identifier (luid) and the universally unique
@@ -383,8 +393,9 @@ struct dm_ulog_request {
 	 */
 	uint64_t luid;
 	char uuid[DM_UUID_LEN];
-	char padding[7];        /* Padding because DM_UUID_LEN = 129 */
+	char padding[3];        /* Padding because DM_UUID_LEN = 129 */
 
+	uint32_t version;       /* See DM_ULOG_REQUEST_VERSION */
 	int32_t error;          /* Used to report back processing errors */
 
 	uint32_t seq;           /* Sequence number for request */
-- 
cgit v1.1


From 9c4376de98719d2768dd919553843de34bb094a6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 13 Jan 2011 19:59:58 +0000
Subject: dm: use non reentrant workqueues if equivalent

kmirrord_wq, kcopyd_work and md->wq are created per dm instance and
serve only a single work item from the dm instance, so non-reentrant
workqueues would provide the same ordering guarantees as ordered ones
while allowing CPU affinity and use of the workqueues for other
purposes.  Switch them to non-reentrant workqueues.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/dm-ioctl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index e453e11..78bbf47 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -268,8 +268,8 @@ enum {
 
 #define DM_VERSION_MAJOR	4
 #define DM_VERSION_MINOR	19
-#define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2010-10-14)"
+#define DM_VERSION_PATCHLEVEL	1
+#define DM_VERSION_EXTRA	"-ioctl (2011-01-07)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.1


From 9d357b0787bb3c91835d5e658c3bda178f9ca419 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 13 Jan 2011 20:00:01 +0000
Subject: dm: introduce target callbacks and congestion callback

DM currently implements congestion checking by checking on congestion
in each component device.  For raid456 we need to also check if the
stripe cache is congested.

Add per-target congestion checker callback support.

Extending the target_callbacks structure with additional callback
functions allows for establishing multiple callbacks per-target (a
callback is also needed for unplug).

Cc: linux-raid@vger.kernel.org
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/device-mapper.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 2970022..4b1c63d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -193,6 +193,12 @@ struct dm_target {
 	char *error;
 };
 
+/* Each target can link one of these into the table */
+struct dm_target_callbacks {
+	struct list_head list;
+	int (*congested_fn) (struct dm_target_callbacks *, int);
+};
+
 int dm_register_target(struct target_type *t);
 void dm_unregister_target(struct target_type *t);
 
@@ -269,6 +275,11 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 			sector_t start, sector_t len, char *params);
 
 /*
+ * Target_ctr should call this if it needs to add any callbacks.
+ */
+void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb);
+
+/*
  * Finally call this to make the table ready for use.
  */
 int dm_table_complete(struct dm_table *t);
-- 
cgit v1.1


From 99d03c141b40914b67d63c9d23b8da4386422ed7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 13 Jan 2011 20:00:02 +0000
Subject: dm: per target unplug callback support

Add per-target unplug callback support.

Cc: linux-raid@vger.kernel.org
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/device-mapper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 4b1c63d..272496d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -197,6 +197,7 @@ struct dm_target {
 struct dm_target_callbacks {
 	struct list_head list;
 	int (*congested_fn) (struct dm_target_callbacks *, int);
+	void (*unplug_fn)(struct dm_target_callbacks *);
 };
 
 int dm_register_target(struct target_type *t);
-- 
cgit v1.1


From 8d661f1e462d50bd83de87ee628aaf820ce3c66c Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Tue, 11 Jan 2011 16:14:24 -0800
Subject: ieee80211: correct IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK macro

It is defined in include/linux/ieee80211.h. As per IEEE spec.
bit6 to bit15 in block ack parameter represents buffer size.
So the bitmask should be 0xFFC0.

Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Bing Zhao <bzhao@marvell.com>
Cc: stable@kernel.org
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 6042228..294169e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -959,7 +959,7 @@ struct ieee80211_ht_info {
 /* block-ack parameters */
 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002
 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C
-#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0
+#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFC0
 #define IEEE80211_DELBA_PARAM_TID_MASK 0xF000
 #define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
 
-- 
cgit v1.1


From d8a3515e2a9523f8ed56d1f4537d16338bda2bc2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 13 Jan 2011 17:26:46 -0800
Subject: Revert "gpiolib: annotate gpio-intialization with __must_check"

This reverts commit 0fdae42d361bbb431ca0ab0efed5126a94821177, which
wasn't really supposed to go in, and causes lots of annoying warnings.

Quoth Andrew:
  "Complete brainfart - I meant to drop that patch ages ago."

Quoth Greg:
  "Ick, yeah, that patch isn't ok to go in as-is, all of the callers
   need to be fixed up first, which is what I thought we had agreed on..."

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Greg KH <greg@kroah.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/gpio.h | 10 +++++-----
 include/linux/gpio.h       |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 6098cae..ff5c660 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -147,11 +147,11 @@ extern struct gpio_chip *gpiochip_find(void *data,
 /* Always use the library code for GPIO management calls,
  * or when sleeping may be involved.
  */
-extern int __must_check gpio_request(unsigned gpio, const char *label);
+extern int gpio_request(unsigned gpio, const char *label);
 extern void gpio_free(unsigned gpio);
 
-extern int __must_check gpio_direction_input(unsigned gpio);
-extern int __must_check gpio_direction_output(unsigned gpio, int value);
+extern int gpio_direction_input(unsigned gpio);
+extern int gpio_direction_output(unsigned gpio, int value);
 
 extern int gpio_set_debounce(unsigned gpio, unsigned debounce);
 
@@ -192,8 +192,8 @@ struct gpio {
 	const char	*label;
 };
 
-extern int __must_check gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
-extern int __must_check gpio_request_array(struct gpio *array, size_t num);
+extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
+extern int gpio_request_array(struct gpio *array, size_t num);
 extern void gpio_free_array(struct gpio *array, size_t num);
 
 #ifdef CONFIG_GPIO_SYSFS
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index f79d67f..4b47ed9 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -30,7 +30,7 @@ static inline int gpio_is_valid(int number)
 	return 0;
 }
 
-static inline int __must_check gpio_request(unsigned gpio, const char *label)
+static inline int gpio_request(unsigned gpio, const char *label)
 {
 	return -ENOSYS;
 }
@@ -62,12 +62,12 @@ static inline void gpio_free_array(struct gpio *array, size_t num)
 	WARN_ON(1);
 }
 
-static inline int __must_check gpio_direction_input(unsigned gpio)
+static inline int gpio_direction_input(unsigned gpio)
 {
 	return -ENOSYS;
 }
 
-static inline int __must_check gpio_direction_output(unsigned gpio, int value)
+static inline int gpio_direction_output(unsigned gpio, int value)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.1


From 6c9ae009b298753a3baf71298d676a68b5a10c8f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 13 Jan 2011 15:45:38 -0800
Subject: irq: use per_cpu kstat_irqs

Use modern per_cpu API to increment {soft|hard}irq counters, and use
per_cpu allocation for (struct irq_desc)->kstats_irq instead of an array.

This gives better SMP/NUMA locality and saves few instructions per irq.

With small nr_cpuids values (8 for example), kstats_irq was a small array
(less than L1_CACHE_BYTES), potentially source of false sharing.

In the !CONFIG_SPARSE_IRQ case, remove the huge, NUMA/cache unfriendly
kstat_irqs_all[NR_IRQS][NR_CPUS] array.

Note: we still populate kstats_irq for all possible irqs in
early_irq_init().  We probably could use on-demand allocations.  (Code
included in alloc_descs()).  Problem is not all IRQS are used with a prior
alloc_descs() call.

kstat_irqs_this_cpu() is not used anymore, remove it.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irqdesc.h     |  2 +-
 include/linux/kernel_stat.h | 19 +++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 979c68c..6a64c6f 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -57,7 +57,7 @@ struct irq_desc {
 #endif
 
 	struct timer_rand_state *timer_rand_state;
-	unsigned int		*kstat_irqs;
+	unsigned int __percpu	*kstat_irqs;
 	irq_flow_handler_t	handle_irq;
 	struct irqaction	*action;	/* IRQ action list */
 	unsigned int		status;		/* IRQ status */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44e83ba..0cce2db 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -46,16 +46,14 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 extern unsigned long long nr_context_switches(void);
 
 #ifndef CONFIG_GENERIC_HARDIRQS
-#define kstat_irqs_this_cpu(irq) \
-	(this_cpu_read(kstat.irqs[irq])
 
 struct irq_desc;
 
 static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
 					    struct irq_desc *desc)
 {
-	kstat_this_cpu.irqs[irq]++;
-	kstat_this_cpu.irqs_sum++;
+	__this_cpu_inc(kstat.irqs[irq]);
+	__this_cpu_inc(kstat.irqs_sum);
 }
 
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
@@ -65,17 +63,18 @@ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 #else
 #include <linux/irq.h>
 extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
-#define kstat_irqs_this_cpu(DESC) \
-	((DESC)->kstat_irqs[smp_processor_id()])
-#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\
-	((DESC)->kstat_irqs[smp_processor_id()]++);\
-	kstat_this_cpu.irqs_sum++; } while (0)
+
+#define kstat_incr_irqs_this_cpu(irqno, DESC)		\
+do {							\
+	__this_cpu_inc(*(DESC)->kstat_irqs);		\
+	__this_cpu_inc(kstat.irqs_sum);			\
+} while (0)
 
 #endif
 
 static inline void kstat_incr_softirqs_this_cpu(unsigned int irq)
 {
-	kstat_this_cpu.softirqs[irq]++;
+	__this_cpu_inc(kstat.softirqs[irq]);
 }
 
 static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
-- 
cgit v1.1


From 43bb40c9e3aa51a3b038c9df2c9afb4d4685614d Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 13 Jan 2011 15:45:40 -0800
Subject: sched: remove long deprecated CLONE_STOPPED flag

This warning was added in commit bdff746a3915 ("clone: prepare to recycle
CLONE_STOPPED") three years ago.  2.6.26 came and went.  As far as I know,
no-one is actually using CLONE_STOPPED.

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 96e2321..0740253 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -21,7 +21,8 @@
 #define CLONE_DETACHED		0x00400000	/* Unused, ignored */
 #define CLONE_UNTRACED		0x00800000	/* set if the tracing process can't force CLONE_PTRACE on this clone */
 #define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
-#define CLONE_STOPPED		0x02000000	/* Start in stopped state */
+/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
+   and is now available for re-use. */
 #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
 #define CLONE_NEWIPC		0x08000000	/* New ipcs */
 #define CLONE_NEWUSER		0x10000000	/* New user namespace */
-- 
cgit v1.1


From 88f5acf88ae6a9778f6d25d0d5d7ec2d57764a97 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:45:41 -0800
Subject: mm: page allocator: adjust the per-cpu counter threshold when memory
 is low

Commit aa45484 ("calculate a better estimate of NR_FREE_PAGES when memory
is low") noted that watermarks were based on the vmstat NR_FREE_PAGES.  To
avoid synchronization overhead, these counters are maintained on a per-cpu
basis and drained both periodically and when a threshold is above a
threshold.  On large CPU systems, the difference between the estimate and
real value of NR_FREE_PAGES can be very high.  The system can get into a
case where pages are allocated far below the min watermark potentially
causing livelock issues.  The commit solved the problem by taking a better
reading of NR_FREE_PAGES when memory was low.

Unfortately, as reported by Shaohua Li this accurate reading can consume a
large amount of CPU time on systems with many sockets due to cache line
bouncing.  This patch takes a different approach.  For large machines
where counter drift might be unsafe and while kswapd is awake, the per-cpu
thresholds for the target pgdat are reduced to limit the level of drift to
what should be a safe level.  This incurs a performance penalty in heavy
memory pressure by a factor that depends on the workload and the machine
but the machine should function correctly without accidentally exhausting
all memory on a node.  There is an additional cost when kswapd wakes and
sleeps but the event is not expected to be frequent - in Shaohua's test
case, there was one recorded sleep and wake event at least.

To ensure that kswapd wakes up, a safe version of zone_watermark_ok() is
introduced that takes a more accurate reading of NR_FREE_PAGES when called
from wakeup_kswapd, when deciding whether it is really safe to go back to
sleep in sleeping_prematurely() and when deciding if a zone is really
balanced or not in balance_pgdat().  We are still using an expensive
function but limiting how often it is called.

When the test case is reproduced, the time spent in the watermark
functions is reduced.  The following report is on the percentage of time
spent cumulatively spent in the functions zone_nr_free_pages(),
zone_watermark_ok(), __zone_watermark_ok(), zone_watermark_ok_safe(),
zone_page_state_snapshot(), zone_page_state().

vanilla                      11.6615%
disable-threshold            0.2584%

David said:

: We had to pull aa454840 "mm: page allocator: calculate a better estimate
: of NR_FREE_PAGES when memory is low and kswapd is awake" from 2.6.36
: internally because tests showed that it would cause the machine to stall
: as the result of heavy kswapd activity.  I merged it back with this fix as
: it is pending in the -mm tree and it solves the issue we were seeing, so I
: definitely think this should be pushed to -stable (and I would seriously
: consider it for 2.6.37 inclusion even at this late date).

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reported-by: Shaohua Li <shaohua.li@intel.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Tested-by: Nicolas Bareil <nico@chdir.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: <stable@kernel.org>		[2.6.37.1, 2.6.36.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 10 +++-------
 include/linux/vmstat.h |  5 +++++
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 39c24eb..4890662 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -458,12 +458,6 @@ static inline int zone_is_oom_locked(const struct zone *zone)
 	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
 }
 
-#ifdef CONFIG_SMP
-unsigned long zone_nr_free_pages(struct zone *zone);
-#else
-#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES)
-#endif /* CONFIG_SMP */
-
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -661,7 +655,9 @@ typedef struct pglist_data {
 extern struct mutex zonelists_mutex;
 void build_all_zonelists(void *data);
 void wakeup_kswapd(struct zone *zone, int order);
-int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+		int classzone_idx, int alloc_flags);
+bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
 		int classzone_idx, int alloc_flags);
 enum memmap_context {
 	MEMMAP_EARLY,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index eaaea37..e4cc21c 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -254,6 +254,8 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
 void refresh_cpu_vm_stats(int);
+void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
+void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
 #else /* CONFIG_SMP */
 
 /*
@@ -298,6 +300,9 @@ static inline void __dec_zone_page_state(struct page *page,
 #define dec_zone_page_state __dec_zone_page_state
 #define mod_zone_page_state __mod_zone_page_state
 
+static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+
 static inline void refresh_cpu_vm_stats(int cpu) { }
 #endif
 
-- 
cgit v1.1


From b44129b30652c8771db2265939bb8b463724043d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:45:43 -0800
Subject: mm: vmstat: use a single setter function and callback for adjusting
 percpu thresholds

reduce_pgdat_percpu_threshold() and restore_pgdat_percpu_threshold() exist
to adjust the per-cpu vmstat thresholds while kswapd is awake to avoid
errors due to counter drift.  The functions duplicate some code so this
patch replaces them with a single set_pgdat_percpu_threshold() that takes
a callback function to calculate the desired threshold as a parameter.

[akpm@linux-foundation.org: readability tweak]
[kosaki.motohiro@jp.fujitsu.com: set_pgdat_percpu_threshold(): don't use for_each_online_cpu]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e4cc21c..833e676 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -254,8 +254,11 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
 void refresh_cpu_vm_stats(int);
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
+
+int calculate_pressure_threshold(struct zone *zone);
+int calculate_normal_threshold(struct zone *zone);
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+				int (*calculate_pressure)(struct zone *));
 #else /* CONFIG_SMP */
 
 /*
@@ -300,8 +303,7 @@ static inline void __dec_zone_page_state(struct page *page,
 #define dec_zone_page_state __dec_zone_page_state
 #define mod_zone_page_state __mod_zone_page_state
 
-static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
-static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+#define set_pgdat_percpu_threshold(pgdat, callback) { }
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
 #endif
-- 
cgit v1.1


From 71927e84e0aebfbe5a91565c3b207af25a4e9162 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Thu, 13 Jan 2011 15:45:46 -0800
Subject: writeback: trace wakeup event for background writeback

This tracks when balance_dirty_pages() tries to wakeup the flusher thread
for background writeback (if it was not started already).

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/writeback.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 89a2b2d..4e249b9 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -81,6 +81,7 @@ DEFINE_EVENT(writeback_class, name, \
 	TP_ARGS(bdi))
 
 DEFINE_WRITEBACK_EVENT(writeback_nowork);
+DEFINE_WRITEBACK_EVENT(writeback_wake_background);
 DEFINE_WRITEBACK_EVENT(writeback_wake_thread);
 DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread);
 DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
-- 
cgit v1.1


From b7aba6984dc048503b69c2a885098cdd430832bf Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:45:54 -0800
Subject: mm: compaction: add trace events for memory compaction activity

In preparation for a patches promoting the use of memory compaction over
lumpy reclaim, this patch adds trace points for memory compaction
activity.  Using them, we can monitor the scanning activity of the
migration and free page scanners as well as the number and success rates
of pages passed to page migration.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/compaction.h | 74 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 include/trace/events/compaction.h

(limited to 'include')

diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
new file mode 100644
index 0000000..388bcdd
--- /dev/null
+++ b/include/trace/events/compaction.h
@@ -0,0 +1,74 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM compaction
+
+#if !defined(_TRACE_COMPACTION_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_COMPACTION_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include "gfpflags.h"
+
+DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
+
+	TP_PROTO(unsigned long nr_scanned,
+		unsigned long nr_taken),
+
+	TP_ARGS(nr_scanned, nr_taken),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, nr_scanned)
+		__field(unsigned long, nr_taken)
+	),
+
+	TP_fast_assign(
+		__entry->nr_scanned = nr_scanned;
+		__entry->nr_taken = nr_taken;
+	),
+
+	TP_printk("nr_scanned=%lu nr_taken=%lu",
+		__entry->nr_scanned,
+		__entry->nr_taken)
+);
+
+DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_migratepages,
+
+	TP_PROTO(unsigned long nr_scanned,
+		unsigned long nr_taken),
+
+	TP_ARGS(nr_scanned, nr_taken)
+);
+
+DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages,
+	TP_PROTO(unsigned long nr_scanned,
+		unsigned long nr_taken),
+
+	TP_ARGS(nr_scanned, nr_taken)
+);
+
+TRACE_EVENT(mm_compaction_migratepages,
+
+	TP_PROTO(unsigned long nr_migrated,
+		unsigned long nr_failed),
+
+	TP_ARGS(nr_migrated, nr_failed),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, nr_migrated)
+		__field(unsigned long, nr_failed)
+	),
+
+	TP_fast_assign(
+		__entry->nr_migrated = nr_migrated;
+		__entry->nr_failed = nr_failed;
+	),
+
+	TP_printk("nr_migrated=%lu nr_failed=%lu",
+		__entry->nr_migrated,
+		__entry->nr_failed)
+);
+
+
+#endif /* _TRACE_COMPACTION_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.1


From ee64fc9354e515a79c7232cfde65c88ec627308b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:45:55 -0800
Subject: mm: vmscan: convert lumpy_mode into a bitmask

Currently lumpy_mode is an enum and determines if lumpy reclaim is off,
syncronous or asyncronous.  In preparation for using compaction instead of
lumpy reclaim, this patch converts the flags into a bitmap.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/vmscan.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index c255fcc..be76429 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -25,13 +25,13 @@
 
 #define trace_reclaim_flags(page, sync) ( \
 	(page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
-	(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC)   \
+	(sync & LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC)   \
 	)
 
 #define trace_shrink_flags(file, sync) ( \
-	(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_MIXED : \
+	(sync & LUMPY_MODE_SYNC ? RECLAIM_WB_MIXED : \
 			(file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) |  \
-	(sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
+	(sync & LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
 	)
 
 TRACE_EVENT(mm_vmscan_kswapd_sleep,
-- 
cgit v1.1


From 3e7d344970673c5334cf7b5bb27c8c0942b06126 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:45:56 -0800
Subject: mm: vmscan: reclaim order-0 and use compaction instead of lumpy
 reclaim

Lumpy reclaim is disruptive.  It reclaims a large number of pages and
ignores the age of the pages it reclaims.  This can incur significant
stalls and potentially increase the number of major faults.

Compaction has reached the point where it is considered reasonably stable
(meaning it has passed a lot of testing) and is a potential candidate for
displacing lumpy reclaim.  This patch introduces an alternative to lumpy
reclaim whe compaction is available called reclaim/compaction.  The basic
operation is very simple - instead of selecting a contiguous range of
pages to reclaim, a number of order-0 pages are reclaimed and then
compaction is later by either kswapd (compact_zone_order()) or direct
compaction (__alloc_pages_direct_compact()).

[akpm@linux-foundation.org: fix build]
[akpm@linux-foundation.org: use conventional task_struct naming]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 14 ++++++++++++++
 include/linux/kernel.h     |  7 +++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 5ac5155..2592883 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,6 +22,9 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask);
+extern unsigned long compaction_suitable(struct zone *zone, int order);
+extern unsigned long compact_zone_order(struct zone *zone, int order,
+						gfp_t gfp_mask);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -59,6 +62,17 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	return COMPACT_CONTINUE;
 }
 
+static inline unsigned long compaction_suitable(struct zone *zone, int order)
+{
+	return COMPACT_SKIPPED;
+}
+
+static inline unsigned long compact_zone_order(struct zone *zone, int order,
+						gfp_t gfp_mask)
+{
+	return 0;
+}
+
 static inline void defer_compaction(struct zone *zone)
 {
 }
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 57dac70..5a9d905 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -600,6 +600,13 @@ struct sysinfo {
 #define NUMA_BUILD 0
 #endif
 
+/* This helps us avoid #ifdef CONFIG_COMPACTION */
+#ifdef CONFIG_COMPACTION
+#define COMPACTION_BUILD 1
+#else
+#define COMPACTION_BUILD 0
+#endif
+
 /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
-- 
cgit v1.1


From 77f1fe6b08b13a87391549c8a820ddc817b6f50e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:45:57 -0800
Subject: mm: migration: allow migration to operate asynchronously and avoid
 synchronous compaction in the faster path

Migration synchronously waits for writeback if the initial passes fails.
Callers of memory compaction do not necessarily want this behaviour if the
caller is latency sensitive or expects that synchronous migration is not
going to have a significantly better success rate.

This patch adds a sync parameter to migrate_pages() allowing the caller to
indicate if wait_on_page_writeback() is allowed within migration or not.
For reclaim/compaction, try_to_compact_pages() is first called
asynchronously, direct reclaim runs and then try_to_compact_pages() is
called synchronously as there is a greater expectation that it'll succeed.

[akpm@linux-foundation.org: build/merge fix]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 10 ++++++----
 include/linux/migrate.h    | 12 ++++++++----
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 2592883..72cba40 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -21,10 +21,11 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
-			int order, gfp_t gfp_mask, nodemask_t *mask);
+			int order, gfp_t gfp_mask, nodemask_t *mask,
+			bool sync);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 extern unsigned long compact_zone_order(struct zone *zone, int order,
-						gfp_t gfp_mask);
+						gfp_t gfp_mask, bool sync);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -57,7 +58,8 @@ static inline bool compaction_deferred(struct zone *zone)
 
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
-			int order, gfp_t gfp_mask, nodemask_t *nodemask)
+			int order, gfp_t gfp_mask, nodemask_t *nodemask,
+			bool sync)
 {
 	return COMPACT_CONTINUE;
 }
@@ -68,7 +70,7 @@ static inline unsigned long compaction_suitable(struct zone *zone, int order)
 }
 
 static inline unsigned long compact_zone_order(struct zone *zone, int order,
-						gfp_t gfp_mask)
+						gfp_t gfp_mask, bool sync)
 {
 	return 0;
 }
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 085527f..fa31902 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -13,9 +13,11 @@ extern void putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
 extern int migrate_pages(struct list_head *l, new_page_t x,
-			unsigned long private, int offlining);
+			unsigned long private, int offlining,
+			bool sync);
 extern int migrate_huge_pages(struct list_head *l, new_page_t x,
-			unsigned long private, int offlining);
+			unsigned long private, int offlining,
+			bool sync);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -33,9 +35,11 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
-		unsigned long private, int offlining) { return -ENOSYS; }
+		unsigned long private, int offlining,
+		bool sync) { return -ENOSYS; }
 static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
-		unsigned long private, int offlining) { return -ENOSYS; }
+		unsigned long private, int offlining,
+		bool sync) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
-- 
cgit v1.1


From 7f0f24967b0349798803260b2e4bf347cffa1990 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:45:58 -0800
Subject: mm: migration: cleanup migrate_pages API by matching types for
 offlining and sync

With the introduction of the boolean sync parameter, the API looks a
little inconsistent as offlining is still an int.  Convert offlining to a
bool for the sake of being tidy.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index fa31902..e39aeec 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -13,10 +13,10 @@ extern void putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
 extern int migrate_pages(struct list_head *l, new_page_t x,
-			unsigned long private, int offlining,
+			unsigned long private, bool offlining,
 			bool sync);
 extern int migrate_huge_pages(struct list_head *l, new_page_t x,
-			unsigned long private, int offlining,
+			unsigned long private, bool offlining,
 			bool sync);
 
 extern int fail_migrate_page(struct address_space *,
@@ -35,10 +35,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
-		unsigned long private, int offlining,
+		unsigned long private, bool offlining,
 		bool sync) { return -ENOSYS; }
 static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
-		unsigned long private, int offlining,
+		unsigned long private, bool offlining,
 		bool sync) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
-- 
cgit v1.1


From f3a310bc4e5ce7e55e1c8e25c31e63af017f3e50 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:46:00 -0800
Subject: mm: vmscan: rename lumpy_mode to reclaim_mode

With compaction being used instead of lumpy reclaim, the name lumpy_mode
and associated variables is a bit misleading.  Rename lumpy_mode to
reclaim_mode which is a better fit.  There is no functional change.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/vmscan.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index be76429..ea422aa 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -25,13 +25,13 @@
 
 #define trace_reclaim_flags(page, sync) ( \
 	(page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
-	(sync & LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC)   \
+	(sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC)   \
 	)
 
 #define trace_shrink_flags(file, sync) ( \
-	(sync & LUMPY_MODE_SYNC ? RECLAIM_WB_MIXED : \
+	(sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_MIXED : \
 			(file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) |  \
-	(sync & LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
+	(sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
 	)
 
 TRACE_EVENT(mm_vmscan_kswapd_sleep,
-- 
cgit v1.1


From e5a5623b28198aa91ea71ee5d3846757fc76bc87 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 13 Jan 2011 15:46:00 -0800
Subject: mm: remove unused get_vm_area_node

get_vm_area_node() is unused in the kernel and can thus be removed.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 44b54f6..cb73c755 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -90,9 +90,6 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
 					unsigned long flags,
 					unsigned long start, unsigned long end,
 					void *caller);
-extern struct vm_struct *get_vm_area_node(unsigned long size,
-					  unsigned long flags, int node,
-					  gfp_t gfp_mask);
 extern struct vm_struct *remove_vm_area(const void *addr);
 
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
-- 
cgit v1.1


From ec3f64fc9c196a304c4b7db3e1ff56d640628509 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 13 Jan 2011 15:46:01 -0800
Subject: mm: remove gfp mask from pcpu_get_vm_areas

pcpu_get_vm_areas() only uses GFP_KERNEL allocations, so remove the gfp_t
formal and use the mask internally.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index cb73c755..c7348b8 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -117,7 +117,7 @@ extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
 #ifdef CONFIG_SMP
 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 				     const size_t *sizes, int nr_vms,
-				     size_t align, gfp_t gfp_mask);
+				     size_t align);
 
 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
 #endif
-- 
cgit v1.1


From d0a21265dfb5fa8ae54e90d0fb6d1c215b10a28a Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 13 Jan 2011 15:46:02 -0800
Subject: mm: unify module_alloc code for vmalloc

Four architectures (arm, mips, sparc, x86) use __vmalloc_area() for
module_init().  Much of the code is duplicated and can be generalized in a
globally accessible function, __vmalloc_node_range().

__vmalloc_node() now calls into __vmalloc_node_range() with a range of
[VMALLOC_START, VMALLOC_END) for functionally equivalent behavior.

Each architecture may then use __vmalloc_node_range() directly to remove
the duplication of code.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index c7348b8..4ed6fcd 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -59,8 +59,9 @@ extern void *vmalloc_exec(unsigned long size);
 extern void *vmalloc_32(unsigned long size);
 extern void *vmalloc_32_user(unsigned long size);
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
-extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
-				pgprot_t prot);
+extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
+			unsigned long start, unsigned long end, gfp_t gfp_mask,
+			pgprot_t prot, int node, void *caller);
 extern void vfree(const void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
-- 
cgit v1.1


From dabb16f639820267b3850d804571c70bd93d4e07 Mon Sep 17 00:00:00 2001
From: Mandeep Singh Baines <msb@chromium.org>
Date: Thu, 13 Jan 2011 15:46:05 -0800
Subject: oom: allow a non-CAP_SYS_RESOURCE proces to oom_score_adj down

We'd like to be able to oom_score_adj a process up/down as it
enters/leaves the foreground.  Currently, it is not possible to oom_adj
down without CAP_SYS_RESOURCE.  This patch allows a task to decrease its
oom_score_adj back to the value that a CAP_SYS_RESOURCE thread set it to
or its inherited value at fork.  Assuming the thread that has forked it
has oom_score_adj of 0, each process could decrease it back from 0 upon
activation unless a CAP_SYS_RESOURCE thread elevated it to something
higher.

Alternative considered:

* a setuid binary
* a daemon with CAP_SYS_RESOURCE

Since you don't wan't all processes to be able to reduce their oom_adj, a
setuid or daemon implementation would be complex.  The alternatives also
have much higher overhead.

This patch updated from original patch based on feedback from David
Rientjes.

Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0740253..f23b5bb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -634,6 +634,8 @@ struct signal_struct {
 
 	int oom_adj;		/* OOM kill score adjustment (bit shift) */
 	int oom_score_adj;	/* OOM kill score adjustment */
+	int oom_score_adj_min;	/* OOM kill score adjustment minimum value.
+				 * Only settable by CAP_SYS_RESOURCE. */
 
 	struct mutex cred_guard_mutex;	/* guard against foreign influences on
 					 * credential calculations
-- 
cgit v1.1


From 212260aa07135b327752dc02625c68cf4ce04caf Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Thu, 13 Jan 2011 15:46:06 -0800
Subject: mm: clear PageError bit in msync & fsync

Temporary IO failures, eg.  due to loss of both multipath paths, can
permanently leave the PageError bit set on a page, resulting in msync or
fsync returning -EIO over and over again, even if IO is now getting to the
disk correctly.

We already clear the AS_ENOSPC and AS_IO bits in mapping->flags in the
filemap_fdatawait_range function.  Also clearing the PageError bit on the
page allows subsequent msync or fsync calls on this file to return without
an error, if the subsequent IO succeeds.

Unfortunately data written out in the msync or fsync call that returned
-EIO can still get lost, because the page dirty bit appears to not get
restored on IO error.  However, the alternative could be potentially all
of memory filling up with uncleanable dirty pages, hanging the system, so
there is no nice choice here...

Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Valerie Aurora <vaurora@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5f38c46..4805fde 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -198,7 +198,7 @@ static inline int __TestClearPage##uname(struct page *page) { return 0; }
 struct page;	/* forward declaration */
 
 TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked)
-PAGEFLAG(Error, error)
+PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
 PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
-- 
cgit v1.1


From 110d74a921f4d272b47ef6104fcf937df808f4c8 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Thu, 13 Jan 2011 15:46:11 -0800
Subject: mm: add FOLL_MLOCK follow_page flag.

Move the code to mlock pages from __mlock_vma_pages_range() to
follow_page().

This allows __mlock_vma_pages_range() to not have to break down work into
16-page batches.

An additional motivation for doing this within the present patch series is
that it'll make it easier for a later chagne to drop mmap_sem when
blocking on disk (we'd like to be able to resume at the page that was read
from disk instead of at the start of a 16-page batch).

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 721f451..9ade803 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1415,6 +1415,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
 #define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
+#define FOLL_MLOCK	0x40	/* mark page as mlocked */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
-- 
cgit v1.1


From 088e54658f559a13c2b988086512d76fe9e8f846 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 13 Jan 2011 15:46:16 -0800
Subject: mm: remove likely() from mapping_unevictable()

The mapping_unevictable() has a likely() around the mapping parameter.
This mapping parameter comes from page_mapping() which has an unlikely()
that the page will be set as PAGE_MAPPING_ANON, and if so, it will return
NULL.  One would think that this unlikely() means that the mapping
returned by page_mapping() would not be NULL, but where page_mapping() is
used just above mapping_unevictable(), that unlikely() is incorrect most
of the time.  This means that the "likely(mapping)" in
mapping_unevictable() is incorrect most of the time.

Running the annotated branch profiler on my main box which runs firefox,
evolution, xchat and is part of my distcc farm, I had this:

 correct incorrect  %        Function                  File              Line
 ------- ---------  -        --------                  ----              ----
12872836 1269443893  98 mapping_unevictable            pagemap.h            51
35935762 1270265395  97 page_mapping                   mm.h                 659
1306198001   143659   0 page_mapping                   mm.h                 657
203131478   121586   0 page_mapping                   mm.h                 657
 5415491     1116   0 page_mapping                   mm.h                 657
74899487     1116   0 page_mapping                   mm.h                 657
203132845      224   0 page_mapping                   mm.h                 659
 5415464       27   0 page_mapping                   mm.h                 659
   13552        0   0 page_mapping                   mm.h                 657
   13552        0   0 page_mapping                   mm.h                 659
  242630        0   0 page_mapping                   mm.h                 657
  242630        0   0 page_mapping                   mm.h                 659
74899487        0   0 page_mapping                   mm.h                 659

The page_mapping() is a static inline, which is why it shows up multiple
times.  The mapping_unevictable() is also a static inline but seems to be
used only once in my setup.

The unlikely in page_mapping() was correct a total of 1909540379 times and
incorrect 1270533123 times, with a 39% being incorrect.  Perhaps this is
enough to remove the unlikely from page_mapping() as well.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Nick Piggin <npiggin@kernel.dk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2d1ffe3..9c66e99 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -48,7 +48,7 @@ static inline void mapping_clear_unevictable(struct address_space *mapping)
 
 static inline int mapping_unevictable(struct address_space *mapping)
 {
-	if (likely(mapping))
+	if (mapping)
 		return test_bit(AS_UNEVICTABLE, &mapping->flags);
 	return !!mapping;
 }
-- 
cgit v1.1


From e20e87795834f2f14cb53baf657b91d9c39f92c8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 13 Jan 2011 15:46:17 -0800
Subject: mm: remove unlikely() from page_mapping()

page_mapping() has a unlikely that the mapping has PAGE_MAPPING_ANON set.
But running the annotated branch profiler on a normal desktop system doing
vairous tasks (xchat, evolution, firefox, distcc), it is not really that
unlikely that the mapping here will have the PAGE_MAPPING_ANON flag set:

 correct incorrect  %        Function                  File              Line
 ------- ---------  -        --------                  ----              ----
35935762 1270265395  97 page_mapping                   mm.h                 659
1306198001   143659   0 page_mapping                   mm.h                 657
203131478   121586   0 page_mapping                   mm.h                 657
 5415491     1116   0 page_mapping                   mm.h                 657
74899487     1116   0 page_mapping                   mm.h                 657
203132845      224   0 page_mapping                   mm.h                 659
 5415464       27   0 page_mapping                   mm.h                 659
   13552        0   0 page_mapping                   mm.h                 657
   13552        0   0 page_mapping                   mm.h                 659
  242630        0   0 page_mapping                   mm.h                 657
  242630        0   0 page_mapping                   mm.h                 659
74899487        0   0 page_mapping                   mm.h                 659

The page_mapping() is a static inline, which is why it shows up multiple
times.

The unlikely in page_mapping() was correct a total of 1909540379 times and
incorrect 1270533123 times, with a 39% being incorrect.  With this much of
an error, it's best to simply remove the unlikely and have the compiler
and branch prediction figure this out.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Rik van Riel <riel@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9ade803..57e11b2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -657,7 +657,7 @@ static inline struct address_space *page_mapping(struct page *page)
 	VM_BUG_ON(PageSlab(page));
 	if (unlikely(PageSwapCache(page)))
 		mapping = &swapper_space;
-	else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
+	else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
 		mapping = NULL;
 	return mapping;
 }
-- 
cgit v1.1


From 9950474883e027e6e728cbcff25f7f2bf0c96530 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:46:20 -0800
Subject: mm: kswapd: stop high-order balancing when any suitable zone is
 balanced

Simon Kirby reported the following problem

   We're seeing cases on a number of servers where cache never fully
   grows to use all available memory.  Sometimes we see servers with 4 GB
   of memory that never seem to have less than 1.5 GB free, even with a
   constantly-active VM.  In some cases, these servers also swap out while
   this happens, even though they are constantly reading the working set
   into memory.  We have been seeing this happening for a long time; I
   don't think it's anything recent, and it still happens on 2.6.36.

After some debugging work by Simon, Dave Hansen and others, the prevaling
theory became that kswapd is reclaiming order-3 pages requested by SLUB
too aggressive about it.

There are two apparent problems here.  On the target machine, there is a
small Normal zone in comparison to DMA32.  As kswapd tries to balance all
zones, it would continually try reclaiming for Normal even though DMA32
was balanced enough for callers.  The second problem is that
sleeping_prematurely() does not use the same logic as balance_pgdat() when
deciding whether to sleep or not.  This keeps kswapd artifically awake.

A number of tests were run and the figures from previous postings will
look very different for a few reasons.  One, the old figures were forcing
my network card to use GFP_ATOMIC in attempt to replicate Simon's problem.
 Second, I previous specified slub_min_order=3 again in an attempt to
reproduce Simon's problem.  In this posting, I'm depending on Simon to say
whether his problem is fixed or not and these figures are to show the
impact to the ordinary cases.  Finally, the "vmscan" figures are taken
from /proc/vmstat instead of the tracepoints.  There is less information
but recording is less disruptive.

The first test of relevance was postmark with a process running in the
background reading a large amount of anonymous memory in blocks.  The
objective was to vaguely simulate what was happening on Simon's machine
and it's memory intensive enough to have kswapd awake.

POSTMARK
                                            traceonly          kanyzone
Transactions per second:              156.00 ( 0.00%)   153.00 (-1.96%)
Data megabytes read per second:        21.51 ( 0.00%)    21.52 ( 0.05%)
Data megabytes written per second:     29.28 ( 0.00%)    29.11 (-0.58%)
Files created alone per second:       250.00 ( 0.00%)   416.00 (39.90%)
Files create/transact per second:      79.00 ( 0.00%)    76.00 (-3.95%)
Files deleted alone per second:       520.00 ( 0.00%)   420.00 (-23.81%)
Files delete/transact per second:      79.00 ( 0.00%)    76.00 (-3.95%)

MMTests Statistics: duration
User/Sys Time Running Test (seconds)         16.58      17.4
Total Elapsed Time (seconds)                218.48    222.47

VMstat Reclaim Statistics: vmscan
Direct reclaims                                  0          4
Direct reclaim pages scanned                     0        203
Direct reclaim pages reclaimed                   0        184
Kswapd pages scanned                        326631     322018
Kswapd pages reclaimed                      312632     309784
Kswapd low wmark quickly                         1          4
Kswapd high wmark quickly                      122        475
Kswapd skip congestion_wait                      1          0
Pages activated                             700040     705317
Pages deactivated                           212113     203922
Pages written                                 9875       6363

Total pages scanned                         326631    322221
Total pages reclaimed                       312632    309968
%age total pages scanned/reclaimed          95.71%    96.20%
%age total pages scanned/written             3.02%     1.97%

proc vmstat: Faults
Major Faults                                   300       254
Minor Faults                                645183    660284
Page ins                                    493588    486704
Page outs                                  4960088   4986704
Swap ins                                      1230       661
Swap outs                                     9869      6355

Performance is mildly affected because kswapd is no longer doing as much
work and the background memory consumer process is getting in the way.
Note that kswapd scanned and reclaimed fewer pages as it's less aggressive
and overall fewer pages were scanned and reclaimed.  Swap in/out is
particularly reduced again reflecting kswapd throwing out fewer pages.

The slight performance impact is unfortunate here but it looks like a
direct result of kswapd being less aggressive.  As the bug report is about
too many pages being freed by kswapd, it may have to be accepted for now.

The second test is a streaming IO benchmark that was previously used by
Johannes to show regressions in page reclaim.

MICRO
					 traceonly  kanyzone
User/Sys Time Running Test (seconds)         29.29     28.87
Total Elapsed Time (seconds)                492.18    488.79

VMstat Reclaim Statistics: vmscan
Direct reclaims                               2128       1460
Direct reclaim pages scanned               2284822    1496067
Direct reclaim pages reclaimed              148919     110937
Kswapd pages scanned                      15450014   16202876
Kswapd pages reclaimed                     8503697    8537897
Kswapd low wmark quickly                      3100       3397
Kswapd high wmark quickly                     1860       7243
Kswapd skip congestion_wait                    708        801
Pages activated                               9635       9573
Pages deactivated                             1432       1271
Pages written                                  223       1130

Total pages scanned                       17734836  17698943
Total pages reclaimed                      8652616   8648834
%age total pages scanned/reclaimed          48.79%    48.87%
%age total pages scanned/written             0.00%     0.01%

proc vmstat: Faults
Major Faults                                   165       221
Minor Faults                               9655785   9656506
Page ins                                      3880      7228
Page outs                                 37692940  37480076
Swap ins                                         0        69
Swap outs                                       19        15

Again fewer pages are scanned and reclaimed as expected and this time the
test completed faster.  Note that kswapd is hitting its watermarks faster
(low and high wmark quickly) which I expect is due to kswapd reclaiming
fewer pages.

I also ran fs-mark, iozone and sysbench but there is nothing interesting
to report in the figures.  Performance is not significantly changed and
the reclaim statistics look reasonable.

Tgis patch:

When the allocator enters its slow path, kswapd is woken up to balance the
node.  It continues working until all zones within the node are balanced.
For order-0 allocations, this makes perfect sense but for higher orders it
can have unintended side-effects.  If the zone sizes are imbalanced,
kswapd may reclaim heavily within a smaller zone discarding an excessive
number of pages.  The user-visible behaviour is that kswapd is awake and
reclaiming even though plenty of pages are free from a suitable zone.

This patch alters the "balance" logic for high-order reclaim allowing
kswapd to stop if any suitable zone becomes balanced to reduce the number
of pages it reclaims from other zones.  kswapd still tries to ensure that
order-0 watermarks for all zones are met before sleeping.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Eric B Munson <emunson@mgebm.net>
Cc: Simon Kirby <sim@hostway.ca>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4890662..dad3612 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -639,6 +639,7 @@ typedef struct pglist_data {
 	wait_queue_head_t kswapd_wait;
 	struct task_struct *kswapd;
 	int kswapd_max_order;
+	enum zone_type classzone_idx;
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
@@ -654,7 +655,7 @@ typedef struct pglist_data {
 
 extern struct mutex zonelists_mutex;
 void build_all_zonelists(void *data);
-void wakeup_kswapd(struct zone *zone, int order);
+void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
 bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		int classzone_idx, int alloc_flags);
 bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
-- 
cgit v1.1


From a826e422420b461a6247137c292ff83c4800354a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:31 -0800
Subject: thp: mm: define MADV_HUGEPAGE

Define MADV_HUGEPAGE.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/mman-common.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 3da9e27..e91392f 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -45,6 +45,8 @@
 #define MADV_MERGEABLE   12		/* KSM may merge identical pages */
 #define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
 
+#define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-- 
cgit v1.1


From e9da73d67729b58bba256123e2b4651e0d8a01ac Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:32 -0800
Subject: thp: compound_lock

Add a new compound_lock() needed to serialize put_page against
__split_huge_page_refcount().

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h         | 34 ++++++++++++++++++++++++++++++++++
 include/linux/page-flags.h | 12 +++++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 57e11b2..3b1754a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
 #include <linux/mm_types.h>
 #include <linux/range.h>
 #include <linux/pfn.h>
+#include <linux/bit_spinlock.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -305,6 +306,39 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 }
 #endif
 
+static inline void compound_lock(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	bit_spin_lock(PG_compound_lock, &page->flags);
+#endif
+}
+
+static inline void compound_unlock(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	bit_spin_unlock(PG_compound_lock, &page->flags);
+#endif
+}
+
+static inline unsigned long compound_lock_irqsave(struct page *page)
+{
+	unsigned long uninitialized_var(flags);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	local_irq_save(flags);
+	compound_lock(page);
+#endif
+	return flags;
+}
+
+static inline void compound_unlock_irqrestore(struct page *page,
+					      unsigned long flags)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	compound_unlock(page);
+	local_irq_restore(flags);
+#endif
+}
+
 static inline struct page *compound_head(struct page *page)
 {
 	if (unlikely(PageTail(page)))
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4805fde..5a743cc 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -108,6 +108,9 @@ enum pageflags {
 #ifdef CONFIG_MEMORY_FAILURE
 	PG_hwpoison,		/* hardware poisoned page. Don't touch */
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	PG_compound_lock,
+#endif
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -397,6 +400,12 @@ static inline void __ClearPageTail(struct page *page)
 #define __PG_MLOCKED		0
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __PG_COMPOUND_LOCK		(1 << PG_compound_lock)
+#else
+#define __PG_COMPOUND_LOCK		0
+#endif
+
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
  * these flags set.  It they are, there is a problem.
@@ -406,7 +415,8 @@ static inline void __ClearPageTail(struct page *page)
 	 1 << PG_private | 1 << PG_private_2 | \
 	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
-	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
+	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
+	 __PG_COMPOUND_LOCK)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
-- 
cgit v1.1


From 9180706344487700b40da9eca5dedd3d11cb33b4 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:32 -0800
Subject: thp: alter compound get_page/put_page

Alter compound get_page/put_page to keep references on subpages too, in
order to allow __split_huge_page_refcount to split an hugepage even while
subpages have been pinned by one of the get_user_pages() variants.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3b1754a..a2718e1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -353,9 +353,29 @@ static inline int page_count(struct page *page)
 
 static inline void get_page(struct page *page)
 {
-	page = compound_head(page);
-	VM_BUG_ON(atomic_read(&page->_count) == 0);
+	/*
+	 * Getting a normal page or the head of a compound page
+	 * requires to already have an elevated page->_count. Only if
+	 * we're getting a tail page, the elevated page->_count is
+	 * required only in the head page, so for tail pages the
+	 * bugcheck only verifies that the page->_count isn't
+	 * negative.
+	 */
+	VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
 	atomic_inc(&page->_count);
+	/*
+	 * Getting a tail page will elevate both the head and tail
+	 * page->_count(s).
+	 */
+	if (unlikely(PageTail(page))) {
+		/*
+		 * This is safe only because
+		 * __split_huge_page_refcount can't run under
+		 * get_page().
+		 */
+		VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+		atomic_inc(&page->first_page->_count);
+	}
 }
 
 static inline struct page *virt_to_head_page(const void *x)
-- 
cgit v1.1


From 14fd403f2146f740942d78af4e0ee59396ad8eab Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:37 -0800
Subject: thp: export maybe_mkwrite

huge_memory.c needs it too when it fallbacks in copying hugepages into
regular fragmented pages if hugepage allocation fails during COW.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a2718e1..6bef67d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -430,6 +430,19 @@ static inline void set_compound_order(struct page *page, unsigned long order)
 }
 
 /*
+ * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
+ * servicing faults for write access.  In the normal case, do always want
+ * pte_mkwrite.  But get_user_pages can cause write faults for mappings
+ * that do not have writing enabled, when used by access_process_vm.
+ */
+static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_flags & VM_WRITE))
+		pte = pte_mkwrite(pte);
+	return pte;
+}
+
+/*
  * Multiple processes may "see" the same page. E.g. for untouched
  * mappings of /dev/null, all processes see the same page full of
  * zeroes, and text pages of executables and shared libraries have
-- 
cgit v1.1


From 5f6e8da70a289d403975907371ce5738c726ad3f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:40 -0800
Subject: thp: special pmd_trans_* functions

These returns 0 at compile time when the config option is disabled, to
allow gcc to eliminate the transparent hugepage function calls at compile
time without additional #ifdefs (only the export of those functions have
to be visible to gcc but they won't be required at link time and
huge_memory.o can be not built at all).

_PAGE_BIT_UNUSED1 is never used for pmd, only on pte.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 6f3c6ae..0ab2cd2 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -348,6 +348,17 @@ extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 				unsigned long size);
 #endif
 
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return 0;
+}
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	return 0;
+}
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_GENERIC_PGTABLE_H */
-- 
cgit v1.1


From e2cda322648122dc400c85ada80eaddbc612ef6a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:40 -0800
Subject: thp: add pmd mangling generic functions

Some are needed to build but not actually used on archs not supporting
transparent hugepages.  Others like pmdp_clear_flush are used by x86 too.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable.h | 214 ++++++++++++++++++++++++++++++------------
 1 file changed, 154 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 0ab2cd2..f1eddf7 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -5,67 +5,108 @@
 #ifdef CONFIG_MMU
 
 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-/*
- * Largely same as above, but only sets the access flags (dirty,
- * accessed, and writable). Furthermore, we know it always gets set
- * to a "more permissive" setting, which allows most architectures
- * to optimize this. We return whether the PTE actually changed, which
- * in turn instructs the caller to do things like update__mmu_cache.
- * This used to be done in the caller, but sparc needs minor faults to
- * force that call on sun4c so we changed this macro slightly
- */
-#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
-({									  \
-	int __changed = !pte_same(*(__ptep), __entry);			  \
-	if (__changed) {						  \
-		set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
-		flush_tlb_page(__vma, __address);			  \
-	}								  \
-	__changed;							  \
-})
+extern int ptep_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pte_t *ptep,
+				 pte_t entry, int dirty);
+#endif
+
+#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp,
+				 pmd_t entry, int dirty);
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define ptep_test_and_clear_young(__vma, __address, __ptep)		\
-({									\
-	pte_t __pte = *(__ptep);					\
-	int r = 1;							\
-	if (!pte_young(__pte))						\
-		r = 0;							\
-	else								\
-		set_pte_at((__vma)->vm_mm, (__address),			\
-			   (__ptep), pte_mkold(__pte));			\
-	r;								\
-})
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long address,
+					    pte_t *ptep)
+{
+	pte_t pte = *ptep;
+	int r = 1;
+	if (!pte_young(pte))
+		r = 0;
+	else
+		set_pte_at(vma->vm_mm, address, ptep, pte_mkold(pte));
+	return r;
+}
+#endif
+
+#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long address,
+					    pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+	int r = 1;
+	if (!pmd_young(pmd))
+		r = 0;
+	else
+		set_pmd_at(vma->vm_mm, address, pmdp, pmd_mkold(pmd));
+	return r;
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long address,
+					    pmd_t *pmdp)
+{
+	BUG();
+	return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(__vma, __address, __ptep)		\
-({									\
-	int __young;							\
-	__young = ptep_test_and_clear_young(__vma, __address, __ptep);	\
-	if (__young)							\
-		flush_tlb_page(__vma, __address);			\
-	__young;							\
-})
+int ptep_clear_flush_young(struct vm_area_struct *vma,
+			   unsigned long address, pte_t *ptep);
+#endif
+
+#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+int pmdp_clear_flush_young(struct vm_area_struct *vma,
+			   unsigned long address, pmd_t *pmdp);
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define ptep_get_and_clear(__mm, __address, __ptep)			\
-({									\
-	pte_t __pte = *(__ptep);					\
-	pte_clear((__mm), (__address), (__ptep));			\
-	__pte;								\
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long address,
+				       pte_t *ptep)
+{
+	pte_t pte = *ptep;
+	pte_clear(mm, address, ptep);
+	return pte;
+}
+#endif
+
+#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+				       unsigned long address,
+				       pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+	pmd_clear(mm, address, pmdp);
+	return pmd;
 })
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+				       unsigned long address,
+				       pmd_t *pmdp)
+{
+	BUG();
+	return __pmd(0);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-#define ptep_get_and_clear_full(__mm, __address, __ptep, __full)	\
-({									\
-	pte_t __pte;							\
-	__pte = ptep_get_and_clear((__mm), (__address), (__ptep));	\
-	__pte;								\
-})
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+					    unsigned long address, pte_t *ptep,
+					    int full)
+{
+	pte_t pte;
+	pte = ptep_get_and_clear(mm, address, ptep);
+	return pte;
+}
 #endif
 
 /*
@@ -74,20 +115,25 @@
  * not present, or in the process of an address space destruction.
  */
 #ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
-#define pte_clear_not_present_full(__mm, __address, __ptep, __full)	\
-do {									\
-	pte_clear((__mm), (__address), (__ptep));			\
-} while (0)
+static inline void pte_clear_not_present_full(struct mm_struct *mm,
+					      unsigned long address,
+					      pte_t *ptep,
+					      int full)
+{
+	pte_clear(mm, address, ptep);
+}
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
-#define ptep_clear_flush(__vma, __address, __ptep)			\
-({									\
-	pte_t __pte;							\
-	__pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep);	\
-	flush_tlb_page(__vma, __address);				\
-	__pte;								\
-})
+extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
+			      unsigned long address,
+			      pte_t *ptep);
+#endif
+
+#ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH
+extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+			      unsigned long address,
+			      pmd_t *pmdp);
 #endif
 
 #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -99,8 +145,49 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 }
 #endif
 
+#ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+				      unsigned long address, pmd_t *pmdp)
+{
+	pmd_t old_pmd = *pmdp;
+	set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd));
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+				      unsigned long address, pmd_t *pmdp)
+{
+	BUG();
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
+
+#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+			      unsigned long address,
+			      pmd_t *pmdp);
+#endif
+
 #ifndef __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B)	(pte_val(A) == pte_val(B))
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+	return pte_val(pte_a) == pte_val(pte_b);
+}
+#endif
+
+#ifndef __HAVE_ARCH_PMD_SAME
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return pmd_val(pmd_a) == pmd_val(pmd_b);
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	BUG();
+	return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
 #ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
@@ -357,6 +444,13 @@ static inline int pmd_trans_splitting(pmd_t pmd)
 {
 	return 0;
 }
+#ifndef __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+	BUG();
+	return 0;
+}
+#endif /* __HAVE_ARCH_PMD_WRITE */
 #endif
 
 #endif /* !__ASSEMBLY__ */
-- 
cgit v1.1


From 8ac1f8320a0073f28cf9e0491af4cd98f504f92a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:43 -0800
Subject: thp: pte alloc trans splitting

pte alloc routines must wait for split_huge_page if the pmd is not present
and not null (i.e.  pmd_trans_splitting).  The additional branches are
optimized away at compile time by pmd_trans_splitting if the config option
is off.  However we must pass the vma down in order to know the anon_vma
lock to wait for.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6bef67d..14ddd98 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1131,7 +1131,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
 int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 #endif
 
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+		pmd_t *pmd, unsigned long address);
 int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 
 /*
@@ -1200,16 +1201,18 @@ static inline void pgtable_page_dtor(struct page *page)
 	pte_unmap(pte);					\
 } while (0)
 
-#define pte_alloc_map(mm, pmd, address)			\
-	((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
-		NULL: pte_offset_map(pmd, address))
+#define pte_alloc_map(mm, vma, pmd, address)				\
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma,	\
+							pmd, address))?	\
+	 NULL: pte_offset_map(pmd, address))
 
 #define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
-	((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL,	\
+							pmd, address))?	\
 		NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 extern void free_area_init(unsigned long * zones_size);
-- 
cgit v1.1


From 91a4ee2670e0ee2b0630a0eb24fb9a87ea3acd0a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:44 -0800
Subject: thp: add pmd mmu_notifier helpers

Add mmu notifier helpers to handle pmd huge operations.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 43dcfbd..cbfab1e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -243,6 +243,32 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	__pte;								\
 })
 
+#define pmdp_clear_flush_notify(__vma, __address, __pmdp)		\
+({									\
+	pmd_t __pmd;							\
+	struct vm_area_struct *___vma = __vma;				\
+	unsigned long ___address = __address;				\
+	VM_BUG_ON(__address & ~HPAGE_PMD_MASK);				\
+	mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address,	\
+					    (__address)+HPAGE_PMD_SIZE);\
+	__pmd = pmdp_clear_flush(___vma, ___address, __pmdp);		\
+	mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address,	\
+					  (__address)+HPAGE_PMD_SIZE);	\
+	__pmd;								\
+})
+
+#define pmdp_splitting_flush_notify(__vma, __address, __pmdp)		\
+({									\
+	struct vm_area_struct *___vma = __vma;				\
+	unsigned long ___address = __address;				\
+	VM_BUG_ON(__address & ~HPAGE_PMD_MASK);				\
+	mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address,	\
+					    (__address)+HPAGE_PMD_SIZE);\
+	pmdp_splitting_flush(___vma, ___address, __pmdp);		\
+	mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address,	\
+					  (__address)+HPAGE_PMD_SIZE);	\
+})
+
 #define ptep_clear_flush_young_notify(__vma, __address, __ptep)		\
 ({									\
 	int __young;							\
@@ -254,6 +280,17 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	__young;							\
 })
 
+#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)		\
+({									\
+	int __young;							\
+	struct vm_area_struct *___vma = __vma;				\
+	unsigned long ___address = __address;				\
+	__young = pmdp_clear_flush_young(___vma, ___address, __pmdp);	\
+	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
+						  ___address);		\
+	__young;							\
+})
+
 #define set_pte_at_notify(__mm, __address, __ptep, __pte)		\
 ({									\
 	struct mm_struct *___mm = __mm;					\
@@ -305,7 +342,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 }
 
 #define ptep_clear_flush_young_notify ptep_clear_flush_young
+#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 #define ptep_clear_flush_notify ptep_clear_flush
+#define pmdp_clear_flush_notify pmdp_clear_flush
+#define pmdp_splitting_flush_notify pmdp_splitting_flush
 #define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */
-- 
cgit v1.1


From 4e6af67e970a2ac287739a4c526c857b5bda27ec Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:44 -0800
Subject: thp: clear page compound

split_huge_page must transform a compound page to a regular page and needs
ClearPageCompound.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5a743cc..4835cae 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -347,7 +347,7 @@ static inline void set_page_writeback(struct page *page)
  * tests can be used in performance sensitive paths. PageCompound is
  * generally not used in hot code paths.
  */
-__PAGEFLAG(Head, head)
+__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
 __PAGEFLAG(Tail, tail)
 
 static inline int PageCompound(struct page *page)
@@ -355,6 +355,13 @@ static inline int PageCompound(struct page *page)
 	return page->flags & ((1L << PG_head) | (1L << PG_tail));
 
 }
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void ClearPageCompound(struct page *page)
+{
+	BUG_ON(!PageHead(page));
+	ClearPageHead(page);
+}
+#endif
 #else
 /*
  * Reduce page flag use as much as possible by overlapping
@@ -392,6 +399,14 @@ static inline void __ClearPageTail(struct page *page)
 	page->flags &= ~PG_head_tail_mask;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void ClearPageCompound(struct page *page)
+{
+	BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound));
+	clear_bit(PG_compound, &page->flags);
+}
+#endif
+
 #endif /* !PAGEFLAGS_EXTENDED */
 
 #ifdef CONFIG_MMU
-- 
cgit v1.1


From e7a00c45f29c0155007aa150bf231a70fa470365 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:45 -0800
Subject: thp: add pmd_huge_pte to mm_struct

This increase the size of the mm struct a bit but it is needed to
preallocate one pte for each hugepage so that split_huge_page will not
require a fail path.  Guarantee of success is a fundamental property of
split_huge_page to avoid decrasing swapping reliability and to avoid
adding -ENOMEM fail paths that would otherwise force the hugepage-unaware
VM code to learn rolling back in the middle of its pte mangling operations
(if something we need it to learn handling pmd_trans_huge natively rather
being capable of rollback).  When split_huge_page runs a pte is needed to
succeed the split, to map the newly splitted regular pages with a regular
pte.  This way all existing VM code remains backwards compatible by just
adding a split_huge_page* one liner.  The memory waste of those
preallocated ptes is negligible and so it is worth it.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bb7288a..26bc4e2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -310,6 +310,9 @@ struct mm_struct {
 #ifdef CONFIG_MMU_NOTIFIER
 	struct mmu_notifier_mm *mmu_notifier_mm;
 #endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
+#endif
 	/* How many tasks sharing this mm are OOM_DISABLE */
 	atomic_t oom_disable_count;
 };
-- 
cgit v1.1


From 47ad8475c000141eacb3ecda5e5ce4b43a9cd04d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:47 -0800
Subject: thp: clear_copy_huge_page

Move the copy/clear_huge_page functions to common code to share between
hugetlb.c and huge_memory.c.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 14ddd98..cc6ab10 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1589,5 +1589,14 @@ static inline int is_hwpoison_address(unsigned long addr)
 
 extern void dump_page(struct page *page);
 
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
+extern void clear_huge_page(struct page *page,
+			    unsigned long addr,
+			    unsigned int pages_per_huge_page);
+extern void copy_user_huge_page(struct page *dst, struct page *src,
+				unsigned long addr, struct vm_area_struct *vma,
+				unsigned int pages_per_huge_page);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
-- 
cgit v1.1


From 936a5fe6e6148c0b3ea0d792b903847d9b9931a1 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:48 -0800
Subject: thp: kvm mmu transparent hugepage support

This should work for both hugetlbfs and transparent hugepages.

[akpm@linux-foundation.org: bring forward PageTransCompound() addition for bisectability]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4835cae..907f160 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -409,6 +409,18 @@ static inline void ClearPageCompound(struct page *page)
 
 #endif /* !PAGEFLAGS_EXTENDED */
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int PageTransCompound(struct page *page)
+{
+	return PageCompound(page);
+}
+#else
+static inline int PageTransCompound(struct page *page)
+{
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_MMU
 #define __PG_MLOCKED		(1 << PG_mlocked)
 #else
-- 
cgit v1.1


From 32dba98e085f8b2b4345887df9abf5e0e93bfc12 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:49 -0800
Subject: thp: _GFP_NO_KSWAPD

Transparent hugepage allocations must be allowed not to invoke kswapd or
any other kind of indirect reclaim (especially when the defrag sysfs is
control disabled).  It's unacceptable to swap out anonymous pages
(potentially anonymous transparent hugepages) in order to create new
transparent hugepages.  This is true for the MADV_HUGEPAGE areas too
(swapping out a kvm virtual machine and so having it suffer an unbearable
slowdown, so another one with guest physical memory marked MADV_HUGEPAGE
can run 30% faster if it is running memory intensive workloads, makes no
sense).  If a transparent hugepage allocation fails the slowdown is minor
and there is total fallback, so kswapd should never be asked to swapout
memory to allow the high order allocation to succeed.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f54adfc..49d2356 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -34,6 +34,7 @@ struct vm_area_struct;
 #else
 #define ___GFP_NOTRACK		0
 #endif
+#define ___GFP_NO_KSWAPD	0x400000u
 
 /*
  * GFP bitmasks..
@@ -81,13 +82,15 @@ struct vm_area_struct;
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
+#define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
+
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
  * allocations that simply cannot be supported (e.g. page tables).
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 22	/* Room for 22 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 23	/* Room for 23 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
-- 
cgit v1.1


From 71e3aac0724ffe8918992d76acfe3aad7d8724a5 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:52 -0800
Subject: thp: transparent hugepage core

Lately I've been working to make KVM use hugepages transparently without
the usual restrictions of hugetlbfs.  Some of the restrictions I'd like to
see removed:

1) hugepages have to be swappable or the guest physical memory remains
   locked in RAM and can't be paged out to swap

2) if a hugepage allocation fails, regular pages should be allocated
   instead and mixed in the same vma without any failure and without
   userland noticing

3) if some task quits and more hugepages become available in the
   buddy, guest physical memory backed by regular pages should be
   relocated on hugepages automatically in regions under
   madvise(MADV_HUGEPAGE) (ideally event driven by waking up the
   kernel deamon if the order=HPAGE_PMD_SHIFT-PAGE_SHIFT list becomes
   not null)

4) avoidance of reservation and maximization of use of hugepages whenever
   possible. Reservation (needed to avoid runtime fatal faliures) may be ok for
   1 machine with 1 database with 1 database cache with 1 database cache size
   known at boot time. It's definitely not feasible with a virtualization
   hypervisor usage like RHEV-H that runs an unknown number of virtual machines
   with an unknown size of each virtual machine with an unknown amount of
   pagecache that could be potentially useful in the host for guest not using
   O_DIRECT (aka cache=off).

hugepages in the virtualization hypervisor (and also in the guest!) are
much more important than in a regular host not using virtualization,
becasue with NPT/EPT they decrease the tlb-miss cacheline accesses from 24
to 19 in case only the hypervisor uses transparent hugepages, and they
decrease the tlb-miss cacheline accesses from 19 to 15 in case both the
linux hypervisor and the linux guest both uses this patch (though the
guest will limit the addition speedup to anonymous regions only for
now...).  Even more important is that the tlb miss handler is much slower
on a NPT/EPT guest than for a regular shadow paging or no-virtualization
scenario.  So maximizing the amount of virtual memory cached by the TLB
pays off significantly more with NPT/EPT than without (even if there would
be no significant speedup in the tlb-miss runtime).

The first (and more tedious) part of this work requires allowing the VM to
handle anonymous hugepages mixed with regular pages transparently on
regular anonymous vmas.  This is what this patch tries to achieve in the
least intrusive possible way.  We want hugepages and hugetlb to be used in
a way so that all applications can benefit without changes (as usual we
leverage the KVM virtualization design: by improving the Linux VM at
large, KVM gets the performance boost too).

The most important design choice is: always fallback to 4k allocation if
the hugepage allocation fails!  This is the _very_ opposite of some large
pagecache patches that failed with -EIO back then if a 64k (or similar)
allocation failed...

Second important decision (to reduce the impact of the feature on the
existing pagetable handling code) is that at any time we can split an
hugepage into 512 regular pages and it has to be done with an operation
that can't fail.  This way the reliability of the swapping isn't decreased
(no need to allocate memory when we are short on memory to swap) and it's
trivial to plug a split_huge_page* one-liner where needed without
polluting the VM.  Over time we can teach mprotect, mremap and friends to
handle pmd_trans_huge natively without calling split_huge_page*.  The fact
it can't fail isn't just for swap: if split_huge_page would return -ENOMEM
(instead of the current void) we'd need to rollback the mprotect from the
middle of it (ideally including undoing the split_vma) which would be a
big change and in the very wrong direction (it'd likely be simpler not to
call split_huge_page at all and to teach mprotect and friends to handle
hugepages instead of rolling them back from the middle).  In short the
very value of split_huge_page is that it can't fail.

The collapsing and madvise(MADV_HUGEPAGE) part will remain separated and
incremental and it'll just be an "harmless" addition later if this initial
part is agreed upon.  It also should be noted that locking-wise replacing
regular pages with hugepages is going to be very easy if compared to what
I'm doing below in split_huge_page, as it will only happen when
page_count(page) matches page_mapcount(page) if we can take the PG_lock
and mmap_sem in write mode.  collapse_huge_page will be a "best effort"
that (unlike split_huge_page) can fail at the minimal sign of trouble and
we can try again later.  collapse_huge_page will be similar to how KSM
works and the madvise(MADV_HUGEPAGE) will work similar to
madvise(MADV_MERGEABLE).

The default I like is that transparent hugepages are used at page fault
time.  This can be changed with
/sys/kernel/mm/transparent_hugepage/enabled.  The control knob can be set
to three values "always", "madvise", "never" which mean respectively that
hugepages are always used, or only inside madvise(MADV_HUGEPAGE) regions,
or never used.  /sys/kernel/mm/transparent_hugepage/defrag instead
controls if the hugepage allocation should defrag memory aggressively
"always", only inside "madvise" regions, or "never".

The pmd_trans_splitting/pmd_trans_huge locking is very solid.  The
put_page (from get_user_page users that can't use mmu notifier like
O_DIRECT) that runs against a __split_huge_page_refcount instead was a
pain to serialize in a way that would result always in a coherent page
count for both tail and head.  I think my locking solution with a
compound_lock taken only after the page_first is valid and is still a
PageHead should be safe but it surely needs review from SMP race point of
view.  In short there is no current existing way to serialize the O_DIRECT
final put_page against split_huge_page_refcount so I had to invent a new
one (O_DIRECT loses knowledge on the mapping status by the time gup_fast
returns so...).  And I didn't want to impact all gup/gup_fast users for
now, maybe if we change the gup interface substantially we can avoid this
locking, I admit I didn't think too much about it because changing the gup
unpinning interface would be invasive.

If we ignored O_DIRECT we could stick to the existing compound refcounting
code, by simply adding a get_user_pages_fast_flags(foll_flags) where KVM
(and any other mmu notifier user) would call it without FOLL_GET (and if
FOLL_GET isn't set we'd just BUG_ON if nobody registered itself in the
current task mmu notifier list yet).  But O_DIRECT is fundamental for
decent performance of virtualized I/O on fast storage so we can't avoid it
to solve the race of put_page against split_huge_page_refcount to achieve
a complete hugepage feature for KVM.

Swap and oom works fine (well just like with regular pages ;).  MMU
notifier is handled transparently too, with the exception of the young bit
on the pmd, that didn't have a range check but I think KVM will be fine
because the whole point of hugepages is that EPT/NPT will also use a huge
pmd when they notice gup returns pages with PageCompound set, so they
won't care of a range and there's just the pmd young bit to check in that
case.

NOTE: in some cases if the L2 cache is small, this may slowdown and waste
memory during COWs because 4M of memory are accessed in a single fault
instead of 8k (the payoff is that after COW the program can run faster).
So we might want to switch the copy_huge_page (and clear_huge_page too) to
not temporal stores.  I also extensively researched ways to avoid this
cache trashing with a full prefault logic that would cow in 8k/16k/32k/64k
up to 1M (I can send those patches that fully implemented prefault) but I
concluded they're not worth it and they add an huge additional complexity
and they remove all tlb benefits until the full hugepage has been faulted
in, to save a little bit of memory and some cache during app startup, but
they still don't improve substantially the cache-trashing during startup
if the prefault happens in >4k chunks.  One reason is that those 4k pte
entries copied are still mapped on a perfectly cache-colored hugepage, so
the trashing is the worst one can generate in those copies (cow of 4k page
copies aren't so well colored so they trashes less, but again this results
in software running faster after the page fault).  Those prefault patches
allowed things like a pte where post-cow pages were local 4k regular anon
pages and the not-yet-cowed pte entries were pointing in the middle of
some hugepage mapped read-only.  If it doesn't payoff substantially with
todays hardware it will payoff even less in the future with larger l2
caches, and the prefault logic would blot the VM a lot.  If one is
emebdded transparent_hugepage can be disabled during boot with sysfs or
with the boot commandline parameter transparent_hugepage=0 (or
transparent_hugepage=2 to restrict hugepages inside madvise regions) that
will ensure not a single hugepage is allocated at boot time.  It is simple
enough to just disable transparent hugepage globally and let transparent
hugepages be allocated selectively by applications in the MADV_HUGEPAGE
region (both at page fault time, and if enabled with the
collapse_huge_page too through the kernel daemon).

This patch supports only hugepages mapped in the pmd, archs that have
smaller hugepages will not fit in this patch alone.  Also some archs like
power have certain tlb limits that prevents mixing different page size in
the same regions so they will not fit in this framework that requires
"graceful fallback" to basic PAGE_SIZE in case of physical memory
fragmentation.  hugetlbfs remains a perfect fit for those because its
software limits happen to match the hardware limits.  hugetlbfs also
remains a perfect fit for hugepage sizes like 1GByte that cannot be hoped
to be found not fragmented after a certain system uptime and that would be
very expensive to defragment with relocation, so requiring reservation.
hugetlbfs is the "reservation way", the point of transparent hugepages is
not to have any reservation at all and maximizing the use of cache and
hugepages at all times automatically.

Some performance result:

vmx andrea # LD_PRELOAD=/usr/lib64/libhugetlbfs.so HUGETLB_MORECORE=yes HUGETLB_PATH=/mnt/huge/ ./largep
ages3
memset page fault 1566023
memset tlb miss 453854
memset second tlb miss 453321
random access tlb miss 41635
random access second tlb miss 41658
vmx andrea # LD_PRELOAD=/usr/lib64/libhugetlbfs.so HUGETLB_MORECORE=yes HUGETLB_PATH=/mnt/huge/ ./largepages3
memset page fault 1566471
memset tlb miss 453375
memset second tlb miss 453320
random access tlb miss 41636
random access second tlb miss 41637
vmx andrea # ./largepages3
memset page fault 1566642
memset tlb miss 453417
memset second tlb miss 453313
random access tlb miss 41630
random access second tlb miss 41647
vmx andrea # ./largepages3
memset page fault 1566872
memset tlb miss 453418
memset second tlb miss 453315
random access tlb miss 41618
random access second tlb miss 41659
vmx andrea # echo 0 > /proc/sys/vm/transparent_hugepage
vmx andrea # ./largepages3
memset page fault 2182476
memset tlb miss 460305
memset second tlb miss 460179
random access tlb miss 44483
random access second tlb miss 44186
vmx andrea # ./largepages3
memset page fault 2182791
memset tlb miss 460742
memset second tlb miss 459962
random access tlb miss 43981
random access second tlb miss 43988

============
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>

#define SIZE (3UL*1024*1024*1024)

int main()
{
	char *p = malloc(SIZE), *p2;
	struct timeval before, after;

	gettimeofday(&before, NULL);
	memset(p, 0, SIZE);
	gettimeofday(&after, NULL);
	printf("memset page fault %Lu\n",
	       (after.tv_sec-before.tv_sec)*1000000UL +
	       after.tv_usec-before.tv_usec);

	gettimeofday(&before, NULL);
	memset(p, 0, SIZE);
	gettimeofday(&after, NULL);
	printf("memset tlb miss %Lu\n",
	       (after.tv_sec-before.tv_sec)*1000000UL +
	       after.tv_usec-before.tv_usec);

	gettimeofday(&before, NULL);
	memset(p, 0, SIZE);
	gettimeofday(&after, NULL);
	printf("memset second tlb miss %Lu\n",
	       (after.tv_sec-before.tv_sec)*1000000UL +
	       after.tv_usec-before.tv_usec);

	gettimeofday(&before, NULL);
	for (p2 = p; p2 < p+SIZE; p2 += 4096)
		*p2 = 0;
	gettimeofday(&after, NULL);
	printf("random access tlb miss %Lu\n",
	       (after.tv_sec-before.tv_sec)*1000000UL +
	       after.tv_usec-before.tv_usec);

	gettimeofday(&before, NULL);
	for (p2 = p; p2 < p+SIZE; p2 += 4096)
		*p2 = 0;
	gettimeofday(&after, NULL);
	printf("random access second tlb miss %Lu\n",
	       (after.tv_sec-before.tv_sec)*1000000UL +
	       after.tv_usec-before.tv_usec);

	return 0;
}
============

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h        |   3 ++
 include/linux/huge_mm.h    | 118 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h         |   4 ++
 include/linux/mm_inline.h  |  11 ++++-
 include/linux/page-flags.h |  21 ++++++++
 include/linux/rmap.h       |   2 +
 include/linux/swap.h       |   2 +
 7 files changed, 159 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/huge_mm.h

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 49d2356..d95082c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -109,6 +109,9 @@ struct vm_area_struct;
 				 __GFP_HARDWALL | __GFP_HIGHMEM | \
 				 __GFP_MOVABLE)
 #define GFP_IOFS	(__GFP_IO | __GFP_FS)
+#define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
+			 __GFP_NO_KSWAPD)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
new file mode 100644
index 0000000..9301824
--- /dev/null
+++ b/include/linux/huge_mm.h
@@ -0,0 +1,118 @@
+#ifndef _LINUX_HUGE_MM_H
+#define _LINUX_HUGE_MM_H
+
+extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
+				      struct vm_area_struct *vma,
+				      unsigned long address, pmd_t *pmd,
+				      unsigned int flags);
+extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
+			 struct vm_area_struct *vma);
+extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
+			       unsigned long address, pmd_t *pmd,
+			       pmd_t orig_pmd);
+extern pgtable_t get_pmd_huge_pte(struct mm_struct *mm);
+extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
+					  unsigned long addr,
+					  pmd_t *pmd,
+					  unsigned int flags);
+extern int zap_huge_pmd(struct mmu_gather *tlb,
+			struct vm_area_struct *vma,
+			pmd_t *pmd);
+
+enum transparent_hugepage_flag {
+	TRANSPARENT_HUGEPAGE_FLAG,
+	TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
+	TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
+	TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
+#ifdef CONFIG_DEBUG_VM
+	TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
+#endif
+};
+
+enum page_check_address_pmd_flag {
+	PAGE_CHECK_ADDRESS_PMD_FLAG,
+	PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG,
+	PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG,
+};
+extern pmd_t *page_check_address_pmd(struct page *page,
+				     struct mm_struct *mm,
+				     unsigned long address,
+				     enum page_check_address_pmd_flag flag);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define HPAGE_PMD_SHIFT HPAGE_SHIFT
+#define HPAGE_PMD_MASK HPAGE_MASK
+#define HPAGE_PMD_SIZE HPAGE_SIZE
+
+#define transparent_hugepage_enabled(__vma)				\
+	(transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG) ||	\
+	 (transparent_hugepage_flags &					\
+	  (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&			\
+	  (__vma)->vm_flags & VM_HUGEPAGE))
+#define transparent_hugepage_defrag(__vma)				\
+	((transparent_hugepage_flags &					\
+	  (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) ||			\
+	 (transparent_hugepage_flags &					\
+	  (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) &&		\
+	  (__vma)->vm_flags & VM_HUGEPAGE))
+#ifdef CONFIG_DEBUG_VM
+#define transparent_hugepage_debug_cow()				\
+	(transparent_hugepage_flags &					\
+	 (1<<TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG))
+#else /* CONFIG_DEBUG_VM */
+#define transparent_hugepage_debug_cow() 0
+#endif /* CONFIG_DEBUG_VM */
+
+extern unsigned long transparent_hugepage_flags;
+extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+			  pmd_t *dst_pmd, pmd_t *src_pmd,
+			  struct vm_area_struct *vma,
+			  unsigned long addr, unsigned long end);
+extern int handle_pte_fault(struct mm_struct *mm,
+			    struct vm_area_struct *vma, unsigned long address,
+			    pte_t *pte, pmd_t *pmd, unsigned int flags);
+extern int split_huge_page(struct page *page);
+extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
+#define split_huge_page_pmd(__mm, __pmd)				\
+	do {								\
+		pmd_t *____pmd = (__pmd);				\
+		if (unlikely(pmd_trans_huge(*____pmd)))			\
+			__split_huge_page_pmd(__mm, ____pmd);		\
+	}  while (0)
+#define wait_split_huge_page(__anon_vma, __pmd)				\
+	do {								\
+		pmd_t *____pmd = (__pmd);				\
+		spin_unlock_wait(&(__anon_vma)->root->lock);		\
+		/*							\
+		 * spin_unlock_wait() is just a loop in C and so the	\
+		 * CPU can reorder anything around it.			\
+		 */							\
+		smp_mb();						\
+		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
+		       pmd_trans_huge(*____pmd));			\
+	} while (0)
+#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
+#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
+#if HPAGE_PMD_ORDER > MAX_ORDER
+#error "hugepages can't be allocated by the buddy allocator"
+#endif
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+#define HPAGE_PMD_SHIFT ({ BUG(); 0; })
+#define HPAGE_PMD_MASK ({ BUG(); 0; })
+#define HPAGE_PMD_SIZE ({ BUG(); 0; })
+
+#define transparent_hugepage_enabled(__vma) 0
+
+#define transparent_hugepage_flags 0UL
+static inline int split_huge_page(struct page *page)
+{
+	return 0;
+}
+#define split_huge_page_pmd(__mm, __pmd)	\
+	do { } while (0)
+#define wait_split_huge_page(__anon_vma, __pmd)	\
+	do { } while (0)
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cc6ab10..78adec4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -111,6 +111,9 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
 #define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
 #define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
+#if BITS_PER_LONG > 32
+#define VM_HUGEPAGE	0x100000000UL	/* MADV_HUGEPAGE marked this vma */
+#endif
 
 /* Bits set in the VMA until the stack is in its final location */
 #define VM_STACK_INCOMPLETE_SETUP	(VM_RAND_READ | VM_SEQ_READ)
@@ -243,6 +246,7 @@ struct inode;
  * files which need it (119 of them)
  */
 #include <linux/page-flags.h>
+#include <linux/huge_mm.h>
 
 /*
  * Methods to modify the page usage count.
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8835b87..650f31e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -20,14 +20,21 @@ static inline int page_is_file_cache(struct page *page)
 }
 
 static inline void
-add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
+		       struct list_head *head)
 {
-	list_add(&page->lru, &zone->lru[l].list);
+	list_add(&page->lru, head);
 	__inc_zone_state(zone, NR_LRU_BASE + l);
 	mem_cgroup_add_lru_list(page, l);
 }
 
 static inline void
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+{
+	__add_page_to_lru_list(zone, page, l, &zone->lru[l].list);
+}
+
+static inline void
 del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
 	list_del(&page->lru);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 907f160..4ca1241 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -410,11 +410,32 @@ static inline void ClearPageCompound(struct page *page)
 #endif /* !PAGEFLAGS_EXTENDED */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * PageHuge() only returns true for hugetlbfs pages, but not for
+ * normal or transparent huge pages.
+ *
+ * PageTransHuge() returns true for both transparent huge and
+ * hugetlbfs pages, but not normal pages. PageTransHuge() can only be
+ * called only in the core VM paths where hugetlbfs pages can't exist.
+ */
+static inline int PageTransHuge(struct page *page)
+{
+	VM_BUG_ON(PageTail(page));
+	return PageHead(page);
+}
+
 static inline int PageTransCompound(struct page *page)
 {
 	return PageCompound(page);
 }
+
 #else
+
+static inline int PageTransHuge(struct page *page)
+{
+	return 0;
+}
+
 static inline int PageTransCompound(struct page *page)
 {
 	return 0;
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bb83c0d..e9fd04c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -198,6 +198,8 @@ enum ttu_flags {
 };
 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
 
+bool is_vma_temporary_stack(struct vm_area_struct *vma);
+
 int try_to_unmap(struct page *, enum ttu_flags flags);
 int try_to_unmap_one(struct page *, struct vm_area_struct *,
 			unsigned long address, enum ttu_flags flags);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index eba53e7..4d55932 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -208,6 +208,8 @@ extern unsigned int nr_free_pagecache_pages(void);
 /* linux/mm/swap.c */
 extern void __lru_cache_add(struct page *, enum lru_list lru);
 extern void lru_cache_add_lru(struct page *, enum lru_list lru);
+extern void lru_add_page_tail(struct zone* zone,
+			      struct page *page, struct page *page_tail);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
-- 
cgit v1.1


From 0af4e98b6b095c74588af04872f83d333c958c32 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:55 -0800
Subject: thp: madvise(MADV_HUGEPAGE)

Add madvise MADV_HUGEPAGE to mark regions that are important to be
hugepage backed.  Return -EINVAL if the vma is not of an anonymous type,
or the feature isn't built into the kernel.  Never silently return
success.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 9301824..d9ab70d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -97,6 +97,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 #if HPAGE_PMD_ORDER > MAX_ORDER
 #error "hugepages can't be allocated by the buddy allocator"
 #endif
+extern int hugepage_madvise(unsigned long *vm_flags);
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUG(); 0; })
@@ -113,6 +114,11 @@ static inline int split_huge_page(struct page *page)
 	do { } while (0)
 #define wait_split_huge_page(__anon_vma, __pmd)	\
 	do { } while (0)
+static inline int hugepage_madvise(unsigned long *vm_flags)
+{
+	BUG();
+	return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
-- 
cgit v1.1


From 500d65d471018d9a13b0d51b7e141ed2a3555c1d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:55 -0800
Subject: thp: pmd_trans_huge migrate bugcheck

No pmd_trans_huge should ever materialize in migration ptes areas, because
we split the hugepage before migration ptes are instantiated.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 78adec4..2ec5138 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1490,6 +1490,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
 #define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
 #define FOLL_MLOCK	0x40	/* mark page as mlocked */
+#define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
-- 
cgit v1.1


From 79134171df238171daa4c024a42b77b401ccb00b Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:58 -0800
Subject: thp: transparent hugepage vmstat

Add hugepage stat information to /proc/vmstat and /proc/meminfo.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dad3612..02ecb01 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -114,6 +114,7 @@ enum zone_stat_item {
 	NUMA_LOCAL,		/* allocation from local node */
 	NUMA_OTHER,		/* allocation from other node */
 #endif
+	NR_ANON_TRANSPARENT_HUGEPAGES,
 	NR_VM_ZONE_STAT_ITEMS };
 
 /*
-- 
cgit v1.1


From ba76149f47d8c939efa0acc07a191237af900471 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:46:58 -0800
Subject: thp: khugepaged

Add khugepaged to relocate fragmented pages into hugepages if new
hugepages become available.  (this is indipendent of the defrag logic that
will have to make new hugepages available)

The fundamental reason why khugepaged is unavoidable, is that some memory
can be fragmented and not everything can be relocated.  So when a virtual
machine quits and releases gigabytes of hugepages, we want to use those
freely available hugepages to create huge-pmd in the other virtual
machines that may be running on fragmented memory, to maximize the CPU
efficiency at all times.  The scan is slow, it takes nearly zero cpu time,
except when it copies data (in which case it means we definitely want to
pay for that cpu time) so it seems a good tradeoff.

In addition to the hugepages being released by other process releasing
memory, we have the strong suspicion that the performance impact of
potentially defragmenting hugepages during or before each page fault could
lead to more performance inconsistency than allocating small pages at
first and having them collapsed into large pages later...  if they prove
themselfs to be long lived mappings (khugepaged scan is slow so short
lived mappings have low probability to run into khugepaged if compared to
long lived mappings).

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h    |  1 +
 include/linux/khugepaged.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h      |  1 +
 3 files changed, 68 insertions(+)
 create mode 100644 include/linux/khugepaged.h

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index d9ab70d..43a694e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -25,6 +25,7 @@ enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
+	TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
 #ifdef CONFIG_DEBUG_VM
 	TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
 #endif
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
new file mode 100644
index 0000000..552f318
--- /dev/null
+++ b/include/linux/khugepaged.h
@@ -0,0 +1,66 @@
+#ifndef _LINUX_KHUGEPAGED_H
+#define _LINUX_KHUGEPAGED_H
+
+#include <linux/sched.h> /* MMF_VM_HUGEPAGE */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern int __khugepaged_enter(struct mm_struct *mm);
+extern void __khugepaged_exit(struct mm_struct *mm);
+extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma);
+
+#define khugepaged_enabled()					       \
+	(transparent_hugepage_flags &				       \
+	 ((1<<TRANSPARENT_HUGEPAGE_FLAG) |		       \
+	  (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
+#define khugepaged_always()				\
+	(transparent_hugepage_flags &			\
+	 (1<<TRANSPARENT_HUGEPAGE_FLAG))
+#define khugepaged_req_madv()					\
+	(transparent_hugepage_flags &				\
+	 (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+#define khugepaged_defrag()					\
+	(transparent_hugepage_flags &				\
+	 (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG))
+
+static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags))
+		return __khugepaged_enter(mm);
+	return 0;
+}
+
+static inline void khugepaged_exit(struct mm_struct *mm)
+{
+	if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
+		__khugepaged_exit(mm);
+}
+
+static inline int khugepaged_enter(struct vm_area_struct *vma)
+{
+	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
+		if (khugepaged_always() ||
+		    (khugepaged_req_madv() &&
+		     vma->vm_flags & VM_HUGEPAGE))
+			if (__khugepaged_enter(vma->vm_mm))
+				return -ENOMEM;
+	return 0;
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	return 0;
+}
+static inline void khugepaged_exit(struct mm_struct *mm)
+{
+}
+static inline int khugepaged_enter(struct vm_area_struct *vma)
+{
+	return 0;
+}
+static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+{
+	return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#endif /* _LINUX_KHUGEPAGED_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f23b5bb..d747f94 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -434,6 +434,7 @@ extern int get_dumpable(struct mm_struct *mm);
 #endif
 					/* leave room for more dump flags */
 #define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
+#define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
 
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
-- 
cgit v1.1


From 5f24ce5fd34c3ca1b3d10d30da754732da64d5c0 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:00 -0800
Subject: thp: remove PG_buddy

PG_buddy can be converted to _mapcount == -2.  So the PG_compound_lock can
be added to page->flags without overflowing (because of the sparse section
bits increasing) with CONFIG_X86_PAE=y and CONFIG_X86_PAT=y.  This also
has to move the memory hotplug code from _mapcount to lru.next to avoid
any risk of clashes.  We can't use lru.next for PG_buddy removal, but
memory hotplug can use lru.next even more easily than the mapcount
instead.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 14 +++++++++-----
 include/linux/mm.h             | 21 +++++++++++++++++++++
 include/linux/page-flags.h     |  7 +------
 3 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 31c237a..24376fe 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,12 +13,16 @@ struct mem_section;
 #ifdef CONFIG_MEMORY_HOTPLUG
 
 /*
- * Types for free bootmem.
- * The normal smallest mapcount is -1. Here is smaller value than it.
+ * Types for free bootmem stored in page->lru.next. These have to be in
+ * some random range in unsigned long space for debugging purposes.
  */
-#define SECTION_INFO		(-1 - 1)
-#define MIX_SECTION_INFO	(-1 - 2)
-#define NODE_INFO		(-1 - 3)
+enum {
+	MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
+	SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
+	MIX_SECTION_INFO,
+	NODE_INFO,
+	MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
+};
 
 /*
  * pgdat resizing functions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2ec5138..7ab7d2b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -397,6 +397,27 @@ static inline void init_page_count(struct page *page)
 	atomic_set(&page->_count, 1);
 }
 
+/*
+ * PageBuddy() indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ */
+static inline int PageBuddy(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == -2;
+}
+
+static inline void __SetPageBuddy(struct page *page)
+{
+	VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
+	atomic_set(&page->_mapcount, -2);
+}
+
+static inline void __ClearPageBuddy(struct page *page)
+{
+	VM_BUG_ON(!PageBuddy(page));
+	atomic_set(&page->_mapcount, -1);
+}
+
 void put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4ca1241..0db8037 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -48,9 +48,6 @@
  * struct page (these bits with information) are always mapped into kernel
  * address space...
  *
- * PG_buddy is set to indicate that the page is free and in the buddy system
- * (see mm/page_alloc.c).
- *
  * PG_hwpoison indicates that a page got corrupted in hardware and contains
  * data with incorrect ECC bits that triggered a machine check. Accessing is
  * not safe since it may cause another machine check. Don't touch!
@@ -96,7 +93,6 @@ enum pageflags {
 	PG_swapcache,		/* Swap page: swp_entry_t in private */
 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
-	PG_buddy,		/* Page is free, on buddy lists */
 	PG_swapbacked,		/* Page is backed by RAM/swap */
 	PG_unevictable,		/* Page is "unevictable"  */
 #ifdef CONFIG_MMU
@@ -233,7 +229,6 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
  * risky: they bypass page accounting.
  */
 TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
-__PAGEFLAG(Buddy, buddy)
 PAGEFLAG(MappedToDisk, mappedtodisk)
 
 /* PG_readahead is only used for file reads; PG_reclaim is only for writes */
@@ -461,7 +456,7 @@ static inline int PageTransCompound(struct page *page)
 #define PAGE_FLAGS_CHECK_AT_FREE \
 	(1 << PG_lru	 | 1 << PG_locked    | \
 	 1 << PG_private | 1 << PG_private_2 | \
-	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
+	 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
 	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
 	 __PG_COMPOUND_LOCK)
-- 
cgit v1.1


From f2d6bfe9ff0acec30b713614260e78b03d20e909 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 13 Jan 2011 15:47:01 -0800
Subject: thp: add x86 32bit support

Add support for transparent hugepages to x86 32bit.

Share the same VM_ bitflag for VM_MAPPED_COPY.  mm/nommu.c will never
support transparent hugepages.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7ab7d2b..9c2695b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -102,7 +102,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
+#else
+#define VM_HUGEPAGE	0x01000000	/* MADV_HUGEPAGE marked this vma */
+#endif
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
@@ -111,9 +115,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
 #define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
 #define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
-#if BITS_PER_LONG > 32
-#define VM_HUGEPAGE	0x100000000UL	/* MADV_HUGEPAGE marked this vma */
-#endif
 
 /* Bits set in the VMA until the stack is in its final location */
 #define VM_STACK_INCOMPLETE_SETUP	(VM_RAND_READ | VM_SEQ_READ)
-- 
cgit v1.1


From 0ca1634d4143c3579273ca53b993df19f5c98e92 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 13 Jan 2011 15:47:02 -0800
Subject: thp: mincore transparent hugepage support

Handle transparent huge page pmd entries natively instead of splitting
them into subpages.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 43a694e..25125fb 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -19,6 +19,9 @@ extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
 extern int zap_huge_pmd(struct mmu_gather *tlb,
 			struct vm_area_struct *vma,
 			pmd_t *pmd);
+extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+			unsigned long addr, unsigned long end,
+			unsigned char *vec);
 
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
-- 
cgit v1.1


From cd7548ab360c462118568eebb8c6da3bc303b02e Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 13 Jan 2011 15:47:04 -0800
Subject: thp: mprotect: transparent huge page support

Natively handle huge pmds when changing page tables on behalf of
mprotect().

I left out update_mmu_cache() because we do not need it on x86 anyway but
more importantly the interface works on ptes, not pmds.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 25125fb..c590b08 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -22,6 +22,8 @@ extern int zap_huge_pmd(struct mmu_gather *tlb,
 extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, unsigned long end,
 			unsigned char *vec);
+extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+			unsigned long addr, pgprot_t newprot);
 
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
-- 
cgit v1.1


From 0bbbc0b33d141f78a0d9218a54a47f50621220d3 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:05 -0800
Subject: thp: add numa awareness to hugepage allocations

It's mostly a matter of replacing alloc_pages with alloc_pages_vma after
introducing alloc_pages_vma.  khugepaged needs special handling as the
allocation has to happen inside collapse_huge_page where the vma is known
and an error has to be returned to the outer loop to sleep
alloc_sleep_millisecs in case of failure.  But it retains the more
efficient logic of handling allocation failures in khugepaged in case of
CONFIG_NUMA=n.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index d95082c..a3b148a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -331,14 +331,17 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
 	return alloc_pages_current(gfp_mask, order);
 }
-extern struct page *alloc_page_vma(gfp_t gfp_mask,
+extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr);
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
+#define alloc_pages_vma(gfp_mask, order, vma, addr)	\
+	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
+#define alloc_page_vma(gfp_mask, vma, addr)	\
+	alloc_pages_vma(gfp_mask, 0, vma, addr)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
-- 
cgit v1.1


From 94fcc585fb85ad7b059c70872489b50044d401f3 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:08 -0800
Subject: thp: avoid breaking huge pmd invariants in case of vma_adjust
 failures

An huge pmd can only be mapped if the corresponding 2M virtual range is
fully contained in the vma.  At times the VM calls split_vma twice, if the
first split_vma succeeds and the second fail, the first split_vma remains
in effect and it's not rolled back.  For split_vma or vma_adjust to fail
an allocation failure is needed so it's a very unlikely event (the out of
memory killer would normally fire before any allocation failure is visible
to kernel and userland and if an out of memory condition happens it's
unlikely to happen exactly here).  Nevertheless it's safer to ensure that
no huge pmd can be left around if the vma is adjusted in a way that can't
fit hugepages anymore at the new vm_start/vm_end address.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c590b08..8275952 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -104,6 +104,19 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 #error "hugepages can't be allocated by the buddy allocator"
 #endif
 extern int hugepage_madvise(unsigned long *vm_flags);
+extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+				    unsigned long start,
+				    unsigned long end,
+				    long adjust_next);
+static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
+					 unsigned long start,
+					 unsigned long end,
+					 long adjust_next)
+{
+	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+		return;
+	__vma_adjust_trans_huge(vma, start, end, adjust_next);
+}
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUG(); 0; })
@@ -125,6 +138,12 @@ static inline int hugepage_madvise(unsigned long *vm_flags)
 	BUG();
 	return 0;
 }
+static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
+					 unsigned long start,
+					 unsigned long end,
+					 long adjust_next)
+{
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
-- 
cgit v1.1


From 8ee53820edfd1f3b6554c593f337148dd3d7fc91 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:10 -0800
Subject: thp: mmu_notifier_test_young

For GRU and EPT, we need gup-fast to set referenced bit too (this is why
it's correct to return 0 when shadow_access_mask is zero, it requires
gup-fast to set the referenced bit).  qemu-kvm access already sets the
young bit in the pte if it isn't zero-copy, if it's zero copy or a shadow
paging EPT minor fault we relay on gup-fast to signal the page is in
use...

We also need to check the young bits on the secondary pagetables for NPT
and not nested shadow mmu as the data may never get accessed again by the
primary pte.

Without this closer accuracy, we'd have to remove the heuristic that
avoids collapsing hugepages in hugepage virtual regions that have not even
a single subpage in use.

->test_young is full backwards compatible with GRU and other usages that
don't have young bits in pagetables set by the hardware and that should
nuke the secondary mmu mappings when ->clear_flush_young runs just like
EPT does.

Removing the heuristic that checks the young bit in
khugepaged/collapse_huge_page completely isn't so bad either probably but
I thought it was worth it and this makes it reliable.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index cbfab1e..cc2e7df 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -62,6 +62,16 @@ struct mmu_notifier_ops {
 				 unsigned long address);
 
 	/*
+	 * test_young is called to check the young/accessed bitflag in
+	 * the secondary pte. This is used to know if the page is
+	 * frequently used without actually clearing the flag or tearing
+	 * down the secondary mapping on the page.
+	 */
+	int (*test_young)(struct mmu_notifier *mn,
+			  struct mm_struct *mm,
+			  unsigned long address);
+
+	/*
 	 * change_pte is called in cases that pte mapping to page is changed:
 	 * for example, when ksm remaps pte to point to a new shared page.
 	 */
@@ -163,6 +173,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 					  unsigned long address);
+extern int __mmu_notifier_test_young(struct mm_struct *mm,
+				     unsigned long address);
 extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 				      unsigned long address, pte_t pte);
 extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
@@ -186,6 +198,14 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline int mmu_notifier_test_young(struct mm_struct *mm,
+					  unsigned long address)
+{
+	if (mm_has_notifiers(mm))
+		return __mmu_notifier_test_young(mm, address);
+	return 0;
+}
+
 static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 					   unsigned long address, pte_t pte)
 {
@@ -313,6 +333,12 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline int mmu_notifier_test_young(struct mm_struct *mm,
+					  unsigned long address)
+{
+	return 0;
+}
+
 static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 					   unsigned long address, pte_t pte)
 {
-- 
cgit v1.1


From 5a03b051ed87e72b959f32a86054e1142ac4cf55 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:11 -0800
Subject: thp: use compaction in kswapd for GFP_ATOMIC order > 0

This takes advantage of memory compaction to properly generate pages of
order > 0 if regular page reclaim fails and priority level becomes more
severe and we don't reach the proper watermarks.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 72cba40..dfa2ed4 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -11,6 +11,9 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
 
+#define COMPACT_MODE_DIRECT_RECLAIM	0
+#define COMPACT_MODE_KSWAPD		1
+
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -25,7 +28,8 @@ extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			bool sync);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 extern unsigned long compact_zone_order(struct zone *zone, int order,
-						gfp_t gfp_mask, bool sync);
+					gfp_t gfp_mask, bool sync,
+					int compact_mode);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -70,9 +74,10 @@ static inline unsigned long compaction_suitable(struct zone *zone, int order)
 }
 
 static inline unsigned long compact_zone_order(struct zone *zone, int order,
-						gfp_t gfp_mask, bool sync)
+					       gfp_t gfp_mask, bool sync,
+					       int compact_mode)
 {
-	return 0;
+	return COMPACT_CONTINUE;
 }
 
 static inline void defer_compaction(struct zone *zone)
-- 
cgit v1.1


From 2c888cfbc1b45508a44763d85ba2e8ac43faff5f Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Thu, 13 Jan 2011 15:47:13 -0800
Subject: thp: fix anon memory statistics with transparent hugepages

Count each transparent hugepage as HPAGE_PMD_NR pages in the LRU
statistics, so the Active(anon) and Inactive(anon) statistics in
/proc/meminfo are correct.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h   | 8 ++++++++
 include/linux/mm_inline.h | 8 +++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 8275952..9b48c24d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -117,11 +117,19 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
 		return;
 	__vma_adjust_trans_huge(vma, start, end, adjust_next);
 }
+static inline int hpage_nr_pages(struct page *page)
+{
+	if (unlikely(PageTransHuge(page)))
+		return HPAGE_PMD_NR;
+	return 1;
+}
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUG(); 0; })
 #define HPAGE_PMD_SIZE ({ BUG(); 0; })
 
+#define hpage_nr_pages(x) 1
+
 #define transparent_hugepage_enabled(__vma) 0
 
 #define transparent_hugepage_flags 0UL
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 650f31e..8f7d247 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -1,6 +1,8 @@
 #ifndef LINUX_MM_INLINE_H
 #define LINUX_MM_INLINE_H
 
+#include <linux/huge_mm.h>
+
 /**
  * page_is_file_cache - should the page be on a file LRU or anon LRU?
  * @page: the page to test
@@ -24,7 +26,7 @@ __add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
 		       struct list_head *head)
 {
 	list_add(&page->lru, head);
-	__inc_zone_state(zone, NR_LRU_BASE + l);
+	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
 	mem_cgroup_add_lru_list(page, l);
 }
 
@@ -38,7 +40,7 @@ static inline void
 del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
 	list_del(&page->lru);
-	__dec_zone_state(zone, NR_LRU_BASE + l);
+	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
 	mem_cgroup_del_lru_list(page, l);
 }
 
@@ -73,7 +75,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
 			l += LRU_ACTIVE;
 		}
 	}
-	__dec_zone_state(zone, NR_LRU_BASE + l);
+	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
 	mem_cgroup_del_lru_list(page, l);
 }
 
-- 
cgit v1.1


From 37c2ac7872a9387542616f658d20ac25f5bdb32e Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:16 -0800
Subject: thp: compound_trans_order

Read compound_trans_order safe. Noop for CONFIG_TRANSPARENT_HUGEPAGE=n.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9c2695b..ce97a2b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -450,6 +450,20 @@ static inline int compound_order(struct page *page)
 	return (unsigned long)page[1].lru.prev;
 }
 
+static inline int compound_trans_order(struct page *page)
+{
+	int order;
+	unsigned long flags;
+
+	if (!PageHead(page))
+		return 0;
+
+	flags = compound_lock_irqsave(page);
+	order = compound_order(page);
+	compound_unlock_irqrestore(page, flags);
+	return order;
+}
+
 static inline void set_compound_order(struct page *page, unsigned long order)
 {
 	page[1].lru.prev = (void *)order;
-- 
cgit v1.1


From 1ddd6db43a08cba56c7ee920800980862086f1c3 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:17 -0800
Subject: thp: mm: define MADV_NOHUGEPAGE

Define MADV_NOHUGEPAGE.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/mman-common.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index e91392f..787abbb 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -46,6 +46,7 @@
 #define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
 
 #define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
 
 /* compatibility flags */
 #define MAP_FILE	0
-- 
cgit v1.1


From a664b2d8555c659127bf8fe049a58449d394a707 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:17 -0800
Subject: thp: madvise(MADV_NOHUGEPAGE)

Add madvise MADV_NOHUGEPAGE to mark regions that are not important to be
hugepage backed.  Return -EINVAL if the vma is not of an anonymous type,
or the feature isn't built into the kernel.  Never silently return
success.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h    | 14 ++++++++------
 include/linux/khugepaged.h |  7 ++++---
 include/linux/mm.h         |  1 +
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 9b48c24d..a8b7e42 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -52,10 +52,12 @@ extern pmd_t *page_check_address_pmd(struct page *page,
 #define HPAGE_PMD_SIZE HPAGE_SIZE
 
 #define transparent_hugepage_enabled(__vma)				\
-	(transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG) ||	\
-	 (transparent_hugepage_flags &					\
-	  (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&			\
-	  (__vma)->vm_flags & VM_HUGEPAGE))
+	((transparent_hugepage_flags &					\
+	  (1<<TRANSPARENT_HUGEPAGE_FLAG) ||				\
+	  (transparent_hugepage_flags &					\
+	   (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&			\
+	   ((__vma)->vm_flags & VM_HUGEPAGE))) &&			\
+	 !((__vma)->vm_flags & VM_NOHUGEPAGE))
 #define transparent_hugepage_defrag(__vma)				\
 	((transparent_hugepage_flags &					\
 	  (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) ||			\
@@ -103,7 +105,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 #if HPAGE_PMD_ORDER > MAX_ORDER
 #error "hugepages can't be allocated by the buddy allocator"
 #endif
-extern int hugepage_madvise(unsigned long *vm_flags);
+extern int hugepage_madvise(unsigned long *vm_flags, int advice);
 extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
 				    unsigned long start,
 				    unsigned long end,
@@ -141,7 +143,7 @@ static inline int split_huge_page(struct page *page)
 	do { } while (0)
 #define wait_split_huge_page(__anon_vma, __pmd)	\
 	do { } while (0)
-static inline int hugepage_madvise(unsigned long *vm_flags)
+static inline int hugepage_madvise(unsigned long *vm_flags, int advice)
 {
 	BUG();
 	return 0;
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 552f318..6b394f0 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -38,9 +38,10 @@ static inline void khugepaged_exit(struct mm_struct *mm)
 static inline int khugepaged_enter(struct vm_area_struct *vma)
 {
 	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
-		if (khugepaged_always() ||
-		    (khugepaged_req_madv() &&
-		     vma->vm_flags & VM_HUGEPAGE))
+		if ((khugepaged_always() ||
+		     (khugepaged_req_madv() &&
+		      vma->vm_flags & VM_HUGEPAGE)) &&
+		    !(vma->vm_flags & VM_NOHUGEPAGE))
 			if (__khugepaged_enter(vma->vm_mm))
 				return -ENOMEM;
 	return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ce97a2b..956a355 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -83,6 +83,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_GROWSUP	0x00000200
 #else
 #define VM_GROWSUP	0x00000000
+#define VM_NOHUGEPAGE	0x00000200	/* MADV_NOHUGEPAGE marked this vma */
 #endif
 #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
 #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
-- 
cgit v1.1


From 60ab3244ec85c44276c585a2a20d3750402e1cf4 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:18 -0800
Subject: thp: khugepaged: make khugepaged aware about madvise

MADV_HUGEPAGE and MADV_NOHUGEPAGE were fully effective only if run after
mmap and before touching the memory.  While this is enough for most
usages, it's little effort to make madvise more dynamic at runtime on an
existing mapping by making khugepaged aware about madvise.

MADV_HUGEPAGE: register in khugepaged immediately without waiting a page
fault (that may not ever happen if all pages are already mapped and the
"enabled" knob was set to madvise during the initial page faults).

MADV_NOHUGEPAGE: skip vmas marked VM_NOHUGEPAGE in khugepaged to stop
collapsing pages where not needed.

[akpm@linux-foundation.org: tweak comment]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a8b7e42..bddfba1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -105,7 +105,8 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 #if HPAGE_PMD_ORDER > MAX_ORDER
 #error "hugepages can't be allocated by the buddy allocator"
 #endif
-extern int hugepage_madvise(unsigned long *vm_flags, int advice);
+extern int hugepage_madvise(struct vm_area_struct *vma,
+			    unsigned long *vm_flags, int advice);
 extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
 				    unsigned long start,
 				    unsigned long end,
@@ -143,7 +144,8 @@ static inline int split_huge_page(struct page *page)
 	do { } while (0)
 #define wait_split_huge_page(__anon_vma, __pmd)	\
 	do { } while (0)
-static inline int hugepage_madvise(unsigned long *vm_flags, int advice)
+static inline int hugepage_madvise(struct vm_area_struct *vma,
+				   unsigned long *vm_flags, int advice)
 {
 	BUG();
 	return 0;
-- 
cgit v1.1


From 22e5c47ee238abe636655c3862ed28d6eb084ad4 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 13 Jan 2011 15:47:20 -0800
Subject: thp: add compound_trans_head() helper

Cleanup some code with common compound_trans_head helper.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index bddfba1..8e6c8c4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -126,6 +126,23 @@ static inline int hpage_nr_pages(struct page *page)
 		return HPAGE_PMD_NR;
 	return 1;
 }
+static inline struct page *compound_trans_head(struct page *page)
+{
+	if (PageTail(page)) {
+		struct page *head;
+		head = page->first_page;
+		smp_rmb();
+		/*
+		 * head may be a dangling pointer.
+		 * __split_huge_page_refcount clears PageTail before
+		 * overwriting first_page, so if PageTail is still
+		 * there it means the head pointer isn't dangling.
+		 */
+		if (PageTail(page))
+			return head;
+	}
+	return page;
+}
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUG(); 0; })
@@ -144,6 +161,7 @@ static inline int split_huge_page(struct page *page)
 	do { } while (0)
 #define wait_split_huge_page(__anon_vma, __pmd)	\
 	do { } while (0)
+#define compound_trans_head(page) compound_head(page)
 static inline int hugepage_madvise(struct vm_area_struct *vma,
 				   unsigned long *vm_flags, int advice)
 {
-- 
cgit v1.1


From 29c1f677d424e8c5683a837fc4f03fc9f19201d7 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Thu, 13 Jan 2011 15:47:21 -0800
Subject: mm: migration: use rcu_dereference_protected when dereferencing the
 radix tree slot during file page migration

migrate_pages() -> unmap_and_move() only calls rcu_read_lock() for
anonymous pages, as introduced by git commit
989f89c57e6361e7d16fbd9572b5da7d313b073d ("fix rcu_read_lock() in page
migraton").  The point of the RCU protection there is part of getting a
stable reference to anon_vma and is only held for anon pages as file pages
are locked which is sufficient protection against freeing.

However, while a file page's mapping is being migrated, the radix tree is
double checked to ensure it is the expected page.  This uses
radix_tree_deref_slot() -> rcu_dereference() without the RCU lock held
triggering the following warning.

[  173.674290] ===================================================
[  173.676016] [ INFO: suspicious rcu_dereference_check() usage. ]
[  173.676016] ---------------------------------------------------
[  173.676016] include/linux/radix-tree.h:145 invoked rcu_dereference_check() without protection!
[  173.676016]
[  173.676016] other info that might help us debug this:
[  173.676016]
[  173.676016]
[  173.676016] rcu_scheduler_active = 1, debug_locks = 0
[  173.676016] 1 lock held by hugeadm/2899:
[  173.676016]  #0:  (&(&inode->i_data.tree_lock)->rlock){..-.-.}, at: [<c10e3d2b>] migrate_page_move_mapping+0x40/0x1ab
[  173.676016]
[  173.676016] stack backtrace:
[  173.676016] Pid: 2899, comm: hugeadm Not tainted 2.6.37-rc5-autobuild
[  173.676016] Call Trace:
[  173.676016]  [<c128cc01>] ? printk+0x14/0x1b
[  173.676016]  [<c1063502>] lockdep_rcu_dereference+0x7d/0x86
[  173.676016]  [<c10e3db5>] migrate_page_move_mapping+0xca/0x1ab
[  173.676016]  [<c10e41ad>] migrate_page+0x23/0x39
[  173.676016]  [<c10e491b>] buffer_migrate_page+0x22/0x107
[  173.676016]  [<c10e48f9>] ? buffer_migrate_page+0x0/0x107
[  173.676016]  [<c10e425d>] move_to_new_page+0x9a/0x1ae
[  173.676016]  [<c10e47e6>] migrate_pages+0x1e7/0x2fa

This patch introduces radix_tree_deref_slot_protected() which calls
rcu_dereference_protected().  Users of it must pass in the
mapping->tree_lock that is protecting this dereference.  Holding the tree
lock protects against parallel updaters of the radix tree meaning that
rcu_dereference_protected is allowable.

[akpm@linux-foundation.org: remove unneeded casts]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Milton Miller <miltonm@bga.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: <stable@kernel.org>		[2.6.37.early]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index ab2baa5..23241c2 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -146,6 +146,22 @@ static inline void *radix_tree_deref_slot(void **pslot)
 }
 
 /**
+ * radix_tree_deref_slot_protected	- dereference a slot without RCU lock but with tree lock held
+ * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
+ * Returns:	item that was stored in that slot with any direct pointer flag
+ *		removed.
+ *
+ * Similar to radix_tree_deref_slot but only used during migration when a pages
+ * mapping is being moved. The caller does not hold the RCU read lock but it
+ * must hold the tree lock to prevent parallel updates.
+ */
+static inline void *radix_tree_deref_slot_protected(void **pslot,
+							spinlock_t *treelock)
+{
+	return rcu_dereference_protected(*pslot, lockdep_is_held(treelock));
+}
+
+/**
  * radix_tree_deref_retry	- check radix_tree_deref_slot
  * @arg:	pointer returned by radix_tree_deref_slot
  * Returns:	0 if retry is not required, otherwise retry is required
-- 
cgit v1.1


From db16d5ec1f87f17511599bc77857dd1662b5a22f Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Thu, 13 Jan 2011 15:47:35 -0800
Subject: memcg: add page_cgroup flags for dirty page tracking

This patchset provides the ability for each cgroup to have independent
dirty page limits.

Limiting dirty memory is like fixing the max amount of dirty (hard to
reclaim) page cache used by a cgroup.  So, in case of multiple cgroup
writers, they will not be able to consume more than their designated share
of dirty pages and will be forced to perform write-out if they cross that
limit.

The patches are based on a series proposed by Andrea Righi in Mar 2010.

Overview:

- Add page_cgroup flags to record when pages are dirty, in writeback, or nfs
  unstable.

- Extend mem_cgroup to record the total number of pages in each of the
  interesting dirty states (dirty, writeback, unstable_nfs).

- Add dirty parameters similar to the system-wide  /proc/sys/vm/dirty_*
  limits to mem_cgroup.  The mem_cgroup dirty parameters are accessible
  via cgroupfs control files.

- Consider both system and per-memcg dirty limits in page writeback when
  deciding to queue background writeback or block for foreground writeback.

Known shortcomings:

- When a cgroup dirty limit is exceeded, then bdi writeback is employed to
  writeback dirty inodes.  Bdi writeback considers inodes from any cgroup, not
  just inodes contributing dirty pages to the cgroup exceeding its limit.

- When memory.use_hierarchy is set, then dirty limits are disabled.  This is a
  implementation detail.  An enhanced implementation is needed to check the
  chain of parents to ensure that no dirty limit is exceeded.

Performance data:
- A page fault microbenchmark workload was used to measure performance, which
  can be called in read or write mode:
        f = open(foo. $cpu)
        truncate(f, 4096)
        alarm(60)
        while (1) {
                p = mmap(f, 4096)
                if (write)
			*p = 1
		else
			x = *p
                munmap(p)
        }

- The workload was called for several points in the patch series in different
  modes:
  - s_read is a single threaded reader
  - s_write is a single threaded writer
  - p_read is a 16 thread reader, each operating on a different file
  - p_write is a 16 thread writer, each operating on a different file

- Measurements were collected on a 16 core non-numa system using "perf stat
  --repeat 3".  The -a option was used for parallel (p_*) runs.

- All numbers are page fault rate (M/sec).  Higher is better.

- To compare the performance of a kernel without non-memcg compare the first and
  last rows, neither has memcg configured.  The first row does not include any
  of these memcg patches.

- To compare the performance of using memcg dirty limits, compare the baseline
  (2nd row titled "w/ memcg") with the the code and memcg enabled (2nd to last
  row titled "all patches").

                           root_cgroup                    child_cgroup
                 s_read s_write p_read p_write   s_read s_write p_read p_write
mmotm w/o memcg   0.428  0.390   0.429  0.388
mmotm w/ memcg    0.411  0.378   0.391  0.362     0.412  0.377   0.385  0.363
all patches       0.384  0.360   0.370  0.348     0.381  0.363   0.368  0.347
all patches       0.431  0.402   0.427  0.395
  w/o memcg

This patch:

Add additional flags to page_cgroup to track dirty pages within a
mem_cgroup.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrea Righi <arighi@develer.com>
Signed-off-by: Greg Thelen <gthelen@google.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index b02195d..fdb5a92 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -40,6 +40,9 @@ enum {
 	PCG_USED, /* this object is in use. */
 	PCG_ACCT_LRU, /* page has been accounted for */
 	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
+	PCG_FILE_DIRTY, /* page is dirty */
+	PCG_FILE_WRITEBACK, /* page is under writeback */
+	PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */
 	PCG_MIGRATION, /* under page migration */
 };
 
@@ -59,6 +62,10 @@ static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
 static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
 	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
 
+#define TESTSETPCGFLAG(uname, lname)			\
+static inline int TestSetPageCgroup##uname(struct page_cgroup *pc)	\
+	{ return test_and_set_bit(PCG_##lname, &pc->flags);  }
+
 /* Cache flag is set only once (at allocation) */
 TESTPCGFLAG(Cache, CACHE)
 CLEARPCGFLAG(Cache, CACHE)
@@ -78,6 +85,22 @@ SETPCGFLAG(FileMapped, FILE_MAPPED)
 CLEARPCGFLAG(FileMapped, FILE_MAPPED)
 TESTPCGFLAG(FileMapped, FILE_MAPPED)
 
+SETPCGFLAG(FileDirty, FILE_DIRTY)
+CLEARPCGFLAG(FileDirty, FILE_DIRTY)
+TESTPCGFLAG(FileDirty, FILE_DIRTY)
+TESTCLEARPCGFLAG(FileDirty, FILE_DIRTY)
+TESTSETPCGFLAG(FileDirty, FILE_DIRTY)
+
+SETPCGFLAG(FileWriteback, FILE_WRITEBACK)
+CLEARPCGFLAG(FileWriteback, FILE_WRITEBACK)
+TESTPCGFLAG(FileWriteback, FILE_WRITEBACK)
+
+SETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+CLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+TESTPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+TESTCLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+TESTSETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+
 SETPCGFLAG(Migration, MIGRATION)
 CLEARPCGFLAG(Migration, MIGRATION)
 TESTPCGFLAG(Migration, MIGRATION)
-- 
cgit v1.1


From 2a7106f2cb0768d00fe8c1eb42a754a7d8518f08 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Thu, 13 Jan 2011 15:47:37 -0800
Subject: memcg: create extensible page stat update routines

Replace usage of the mem_cgroup_update_file_mapped() memcg
statistic update routine with two new routines:
* mem_cgroup_inc_page_stat()
* mem_cgroup_dec_page_stat()

As before, only the file_mapped statistic is managed.  However, these more
general interfaces allow for new statistics to be more easily added.  New
statistics are added with memcg dirty page accounting.

Signed-off-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrea Righi <arighi@develer.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 159a0762..067115c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -25,6 +25,11 @@ struct page_cgroup;
 struct page;
 struct mm_struct;
 
+/* Stats that can be updated by kernel. */
+enum mem_cgroup_page_stat_item {
+	MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
+};
+
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
@@ -121,7 +126,22 @@ static inline bool mem_cgroup_disabled(void)
 	return false;
 }
 
-void mem_cgroup_update_file_mapped(struct page *page, int val);
+void mem_cgroup_update_page_stat(struct page *page,
+				 enum mem_cgroup_page_stat_item idx,
+				 int val);
+
+static inline void mem_cgroup_inc_page_stat(struct page *page,
+					    enum mem_cgroup_page_stat_item idx)
+{
+	mem_cgroup_update_page_stat(page, idx, 1);
+}
+
+static inline void mem_cgroup_dec_page_stat(struct page *page,
+					    enum mem_cgroup_page_stat_item idx)
+{
+	mem_cgroup_update_page_stat(page, idx, -1);
+}
+
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask);
 u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
@@ -293,8 +313,13 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
 }
 
-static inline void mem_cgroup_update_file_mapped(struct page *page,
-							int val)
+static inline void mem_cgroup_inc_page_stat(struct page *page,
+					    enum mem_cgroup_page_stat_item idx)
+{
+}
+
+static inline void mem_cgroup_dec_page_stat(struct page *page,
+					    enum mem_cgroup_page_stat_item idx)
 {
 }
 
-- 
cgit v1.1


From dbd4ea78f002df283c95d9774837041735fa1bf9 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 13 Jan 2011 15:47:38 -0800
Subject: memcg: add lock to synchronize page accounting and migration

Introduce a new bit spin lock, PCG_MOVE_LOCK, to synchronize the page
accounting and migration code.  This reworks the locking scheme of
_update_stat() and _move_account() by adding new lock bit PCG_MOVE_LOCK,
which is always taken under IRQ disable.

1. If pages are being migrated from a memcg, then updates to that
   memcg page statistics are protected by grabbing PCG_MOVE_LOCK using
   move_lock_page_cgroup().  In an upcoming commit, memcg dirty page
   accounting will be updating memcg page accounting (specifically: num
   writeback pages) from IRQ context (softirq).  Avoid a deadlocking
   nested spin lock attempt by disabling irq on the local processor when
   grabbing the PCG_MOVE_LOCK.

2. lock for update_page_stat is used only for avoiding race with
   move_account().  So, IRQ awareness of lock_page_cgroup() itself is not
   a problem.  The problem is between mem_cgroup_update_page_stat() and
   mem_cgroup_move_account_page().

Trade-off:
  * Changing lock_page_cgroup() to always disable IRQ (or
    local_bh) has some impacts on performance and I think
    it's bad to disable IRQ when it's not necessary.
  * adding a new lock makes move_account() slower.  Score is
    here.

Performance Impact: moving a 8G anon process.

Before:
	real    0m0.792s
	user    0m0.000s
	sys     0m0.780s

After:
	real    0m0.854s
	user    0m0.000s
	sys     0m0.842s

This score is bad but planned patches for optimization can reduce
this impact.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Greg Thelen <gthelen@google.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Andrea Righi <arighi@develer.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index fdb5a92..5b0c971 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -35,15 +35,18 @@ struct page_cgroup *lookup_page_cgroup(struct page *page);
 
 enum {
 	/* flags for mem_cgroup */
-	PCG_LOCK,  /* page cgroup is locked */
+	PCG_LOCK,  /* Lock for pc->mem_cgroup and following bits. */
 	PCG_CACHE, /* charged as cache */
 	PCG_USED, /* this object is in use. */
-	PCG_ACCT_LRU, /* page has been accounted for */
+	PCG_MIGRATION, /* under page migration */
+	/* flags for mem_cgroup and file and I/O status */
+	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
 	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
 	PCG_FILE_DIRTY, /* page is dirty */
 	PCG_FILE_WRITEBACK, /* page is under writeback */
 	PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */
-	PCG_MIGRATION, /* under page migration */
+	/* No lock in page_cgroup */
+	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
 };
 
 #define TESTPCGFLAG(uname, lname)			\
@@ -117,6 +120,10 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
 
 static inline void lock_page_cgroup(struct page_cgroup *pc)
 {
+	/*
+	 * Don't take this lock in IRQ context.
+	 * This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION
+	 */
 	bit_spin_lock(PCG_LOCK, &pc->flags);
 }
 
@@ -130,6 +137,24 @@ static inline int page_is_cgroup_locked(struct page_cgroup *pc)
 	return bit_spin_is_locked(PCG_LOCK, &pc->flags);
 }
 
+static inline void move_lock_page_cgroup(struct page_cgroup *pc,
+	unsigned long *flags)
+{
+	/*
+	 * We know updates to pc->flags of page cache's stats are from both of
+	 * usual context or IRQ context. Disable IRQ to avoid deadlock.
+	 */
+	local_irq_save(*flags);
+	bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
+}
+
+static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
+	unsigned long *flags)
+{
+	bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
+	local_irq_restore(*flags);
+}
+
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct page_cgroup;
 
-- 
cgit v1.1


From 50de1dd967d4ba3b8a90ebe7a4f5feca24191317 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Thu, 13 Jan 2011 15:47:43 -0800
Subject: memcg: fix memory migration of shmem swapcache

In the current implementation mem_cgroup_end_migration() decides whether
the page migration has succeeded or not by checking "oldpage->mapping".

But if we are tring to migrate a shmem swapcache, the page->mapping of it
is NULL from the begining, so the check would be invalid.  As a result,
mem_cgroup_end_migration() assumes the migration has succeeded even if
it's not, so "newpage" would be freed while it's not uncharged.

This patch fixes it by passing mem_cgroup_end_migration() the result of
the page migration.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 067115c..6a576f9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -98,7 +98,7 @@ extern int
 mem_cgroup_prepare_migration(struct page *page,
 	struct page *newpage, struct mem_cgroup **ptr);
 extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
-	struct page *oldpage, struct page *newpage);
+	struct page *oldpage, struct page *newpage, bool migration_ok);
 
 /*
  * For memory reclaim.
@@ -251,8 +251,7 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 }
 
 static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
-					struct page *oldpage,
-					struct page *newpage)
+		struct page *oldpage, struct page *newpage, bool migration_ok)
 {
 }
 
-- 
cgit v1.1


From 9ce137eee4febaabca81143be07d4205d2bd52d4 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 11 Jan 2011 14:07:12 -0500
Subject: nfsd: don't support msnfs export option

We've long had these pointless #ifdef MSNFS's sprinkled throughout the
code--pointless because MSNFS is always defined (and we give no config
option to make that easy to change).  So we could just remove the
ifdef's and compile the resulting code unconditionally.

But as long as we're there: why not just rip out this code entirely?
The only purpose is to implement the "msnfs" export option which turns
on Windows-like behavior in some cases, and:

	- the export option isn't documented anywhere;
	- the userland utilities (which would need to be able to parse
	  "msnfs" in an export file) don't support it;
	- I don't know how to maintain this, as I don't know what the
	  proper behavior is; and
	- google shows no evidence that anyone has ever used this.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfsd/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 8ae78a6..bd31615 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -35,7 +35,7 @@
 #define NFSEXP_NOHIDE		0x0200
 #define NFSEXP_NOSUBTREECHECK	0x0400
 #define	NFSEXP_NOAUTHNLM	0x0800		/* Don't authenticate NLM requests - just trust */
-#define NFSEXP_MSNFS		0x1000	/* do silly things that MS clients expect */
+#define NFSEXP_MSNFS		0x1000	/* do silly things that MS clients expect; no longer supported */
 #define NFSEXP_FSID		0x2000
 #define	NFSEXP_CROSSMOUNT	0x4000
 #define	NFSEXP_NOACL		0x8000	/* reserved for possible ACL related use */
-- 
cgit v1.1


From 2c6755988afc003a0332406a134fb6a1626f9b28 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 14 Jan 2011 02:36:43 +0000
Subject: fs: hlist UP debug fixup

Po-Yu Chuang <ratbert.chuang@gmail.com> noticed that hlist_bl_set_first could
crash on a UP system when LIST_BL_LOCKMASK is 0, because

	LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));

always evaulates to true.

Fix the expression, and also avoid a dependency between bit spinlock
implementation and list bl code (list code shouldn't know anything
except that bit 0 is set when adding and removing elements). Eventually
if a good use case comes up, we might use this list to store 1 or more
arbitrary bits of data, so it really shouldn't be tied to locking either,
but for now they are helpful for debugging.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 include/linux/list_bl.h    | 5 +++--
 include/linux/rculist_bl.h | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 9ee97e7..b2adbb4 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -16,7 +16,7 @@
  * some fast and compact auxiliary data.
  */
 
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+#if defined(CONFIG_SMP)
 #define LIST_BL_LOCKMASK	1UL
 #else
 #define LIST_BL_LOCKMASK	0UL
@@ -62,7 +62,8 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h,
 					struct hlist_bl_node *n)
 {
 	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
-	LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
+	LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) !=
+							LIST_BL_LOCKMASK);
 	h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK);
 }
 
diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h
index b872b49..cf1244f 100644
--- a/include/linux/rculist_bl.h
+++ b/include/linux/rculist_bl.h
@@ -11,7 +11,8 @@ static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h,
 					struct hlist_bl_node *n)
 {
 	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
-	LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK));
+	LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) !=
+							LIST_BL_LOCKMASK);
 	rcu_assign_pointer(h->first,
 		(struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK));
 }
-- 
cgit v1.1


From 1ac9ad1394fa542ac7ae0dc943ee3cda678799fa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 12 Jan 2011 12:13:14 +0000
Subject: net: remove dev_txq_stats_fold()

After recent changes, (percpu stats on vlan/tunnels...), we dont need
anymore per struct netdev_queue tx_bytes/tx_packets/tx_dropped counters.

Only remaining users are ixgbe, sch_teql, gianfar & macvlan :

1) ixgbe can be converted to use existing tx_ring counters.

2) macvlan incremented txq->tx_dropped, it can use the
dev->stats.tx_dropped counter.

3) sch_teql : almost revert ab35cd4b8f42 (Use net_device internal stats)
    Now we have ndo_get_stats64(), use it, even for "unsigned long"
fields (No need to bring back a struct net_device_stats)

4) gianfar adds a stats structure per tx queue to hold
tx_bytes/tx_packets

This removes a lockdep warning (and possible lockup) in rndis gadget,
calling dev_get_stats() from hard IRQ context.

Ref: http://www.spinics.net/lists/netdev/msg149202.html

Reported-by: Neil Jones <neiljay@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
CC: Sandeep Gopalpet <sandeep.kumar@freescale.com>
CC: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index be4957c..d971346 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -520,9 +520,6 @@ struct netdev_queue {
 	 * please use this field instead of dev->trans_start
 	 */
 	unsigned long		trans_start;
-	u64			tx_bytes;
-	u64			tx_packets;
-	u64			tx_dropped;
 } ____cacheline_aligned_in_smp;
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -2265,8 +2262,6 @@ extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
 extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 					       struct rtnl_link_stats64 *storage);
-extern void		dev_txq_stats_fold(const struct net_device *dev,
-					   struct rtnl_link_stats64 *stats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
-- 
cgit v1.1


From 78d07369462e9feeaa5db301b0aa70e9dcb40b48 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 13 Jan 2011 11:51:03 +0000
Subject: ipsec: update MAX_AH_AUTH_LEN to support sha512

icv_truncbits is set to 256 for sha512, so update
MAX_AH_AUTH_LEN to 64.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ah.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ah.h b/include/net/ah.h
index be7798d..ca95b98 100644
--- a/include/net/ah.h
+++ b/include/net/ah.h
@@ -4,7 +4,7 @@
 #include <linux/skbuff.h>
 
 /* This is the maximum truncated ICV length that we know of. */
-#define MAX_AH_AUTH_LEN	16
+#define MAX_AH_AUTH_LEN	64
 
 struct crypto_ahash;
 
-- 
cgit v1.1


From 51e7eed79c41180919ff94942895ba38467d9ad4 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 12 Jan 2011 22:14:56 +0000
Subject: etherdevice.h: Add is_unicast_ether_addr function

From a check for !is_multicast_ether_addr it is not always obvious that
we're checking for a unicast address. So add this helper function to
make those code paths easier to read.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index bec8b82..ab68f78 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -99,6 +99,17 @@ static inline int is_broadcast_ether_addr(const u8 *addr)
 }
 
 /**
+ * is_unicast_ether_addr - Determine if the Ethernet address is unicast
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Return true if the address is a unicast address.
+ */
+static inline int is_unicast_ether_addr(const u8 *addr)
+{
+	return !is_multicast_ether_addr(addr);
+}
+
+/**
  * is_valid_ether_addr - Determine if the given Ethernet address is valid
  * @addr: Pointer to a six-byte array containing the Ethernet address
  *
-- 
cgit v1.1


From bba958783b1b4cb0a9420f4e11082467132a334c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 14 Jan 2011 15:57:47 +0900
Subject: mmc: sh_mmcif: Convert to __raw_xxx() I/O accessors.

When using the I/O accessors in raw mode from the boot stubs we don't
want to bother with any of the complexity associated with readl/writel
and friends. Furthermore, utilization within the context of the host
driver itself is all performed on an ioremapped window, so using the
__raw variants there doesn't pose any problem either.

If and when barriers need to be added in the future, these will need to
be explicitly written out, but this is so far not a concern for any of
the affected CPUs in question.

This fixes up the link error introduced by the ARM tree via its barrier
refactoring:

	arch/arm/boot/compressed/mmcif-sh7372.o: In function `mmcif_loader':
	mmcif-sh7372.c:(.text+0x9e8): undefined reference to `outer_cache

Following the change in:

	http://www.arm.linux.org.uk/developer/patches/viewpatch.php?id=6275/1

Reported-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/mmc/sh_mmcif.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index bf17350..38d3930 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -94,12 +94,12 @@ struct sh_mmcif_plat_data {
 
 static inline u32 sh_mmcif_readl(void __iomem *addr, int reg)
 {
-	return readl(addr + reg);
+	return __raw_readl(addr + reg);
 }
 
 static inline void sh_mmcif_writel(void __iomem *addr, int reg, u32 val)
 {
-	writel(val, addr + reg);
+	__raw_writel(val, addr + reg);
 }
 
 #define SH_MMCIF_BBS 512 /* boot block size */
-- 
cgit v1.1


From 412dc11d3fd01f96fdf4a8cbfbc5584a17dab7c8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 24 Nov 2010 18:01:41 +0000
Subject: mfd: Add WM8326 support

The WM8326 is a high performance variant of the WM832x series with
no software visible differences.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm831x/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index a1239c4..903280d 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -245,6 +245,7 @@ enum wm831x_parent {
 	WM8320 = 0x8320,
 	WM8321 = 0x8321,
 	WM8325 = 0x8325,
+	WM8326 = 0x8326,
 };
 
 struct wm831x {
-- 
cgit v1.1


From 4c90aa94f6b3e33f57faaf19ef9819195dff61d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 26 Nov 2010 17:19:34 +0000
Subject: mfd: Provide pm_runtime_no_callbacks flag in cell data

Allow MFD cells to have pm_runtime_no_callbacks() called on them during
registration. This causes the runtime PM framework to ignore them,
allowing use of runtime PM to suspend the device as a whole even if
not all drivers for the MFD can usefully implement runtime PM. For
example, RTCs are likely to run continuously regardless of the power
state of the system.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 5582ab3..835996e 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -47,6 +47,12 @@ struct mfd_cell {
 
 	/* don't check for resource conflicts */
 	bool			ignore_resource_conflicts;
+
+	/*
+	 * Disable runtime PM callbacks for this subdevice - see
+	 * pm_runtime_no_callbacks().
+	 */
+	bool			pm_runtime_no_callbacks;
 };
 
 extern int mfd_add_devices(struct device *parent, int id,
-- 
cgit v1.1


From e098aded79f24e2024139e82f778ff9db6dc142a Mon Sep 17 00:00:00 2001
From: Mattias Wallin <mattias.wallin@stericsson.com>
Date: Thu, 2 Dec 2010 15:40:31 +0100
Subject: mfd: ab8500-core ioresources irq for subdrivers added

This patch adds the ioresources used by subdrivers to
retrieve their interrupt.

Signed-off-by: Mattias Wallin <mattias.wallin@stericsson.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/ab8500.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/ab8500.h
index 85cf2c28..1ad1696 100644
--- a/include/linux/mfd/ab8500.h
+++ b/include/linux/mfd/ab8500.h
@@ -91,8 +91,8 @@
 #define AB8500_INT_ID_DET_R4F		93
 #define AB8500_INT_USB_CHG_DET_DONE	94
 #define AB8500_INT_USB_CH_TH_PROT_F	96
-#define AB8500_INT_USB_CH_TH_PROP_R	97
-#define AB8500_INT_MAIN_CH_TH_PROP_F	98
+#define AB8500_INT_USB_CH_TH_PROT_R	97
+#define AB8500_INT_MAIN_CH_TH_PROT_F	98
 #define AB8500_INT_MAIN_CH_TH_PROT_R	99
 #define AB8500_INT_USB_CHARGER_NOT_OKF	103
 
-- 
cgit v1.1


From cdd137c9c86c201ddb7f42ec978d2da45e7b7a17 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Thu, 23 Dec 2010 17:53:36 +0900
Subject: mfd: MAX8998/LP3974 hibernation support

This patch makes the driver to save and restore register values
for hibernation.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max8998-private.h | 2 ++
 include/linux/mfd/max8998.h         | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h
index 7363dea..effa5d3 100644
--- a/include/linux/mfd/max8998-private.h
+++ b/include/linux/mfd/max8998-private.h
@@ -159,10 +159,12 @@ struct max8998_dev {
 	u8 irq_masks_cur[MAX8998_NUM_IRQ_REGS];
 	u8 irq_masks_cache[MAX8998_NUM_IRQ_REGS];
 	int type;
+	bool wakeup;
 };
 
 int max8998_irq_init(struct max8998_dev *max8998);
 void max8998_irq_exit(struct max8998_dev *max8998);
+int max8998_irq_resume(struct max8998_dev *max8998);
 
 extern int max8998_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest);
 extern int max8998_bulk_read(struct i2c_client *i2c, u8 reg, int count,
diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index f8c9f88..686a744 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -88,6 +88,7 @@ struct max8998_platform_data {
 	int				buck1_set1;
 	int				buck1_set2;
 	int				buck2_set3;
+	bool				wakeup;
 };
 
 #endif /*  __LINUX_MFD_MAX8998_H */
-- 
cgit v1.1


From 337ce5d1c5759644cea6c47220ce7e84f0398362 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Tue, 4 Jan 2011 14:17:39 +0900
Subject: mfd: Support LP3974 RTC

The first releases of LP3974 have a large delay in RTC registers,
which requires 2 seconds of delay after writing to a rtc register
(recommended by National Semiconductor's engineers)
before reading it.

If "rtc_delay" field of the platform data is true, the rtc driver
assumes that such delays are required. Although we have not seen
LP3974s without requiring such delays, we assume that such LP3974s
will be released soon (or they have done so already) and they are
supported by "lp3974" without setting "rtc_delay" at the platform
data.

This patch adds delays with msleep when writing values to RTC registers
if the platform data has rtc_delay set.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max8998.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index 686a744..c22b9a4 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -76,6 +76,9 @@ struct max8998_regulator_data {
  * @buck1_set1: BUCK1 gpio pin 1 to set output voltage
  * @buck1_set2: BUCK1 gpio pin 2 to set output voltage
  * @buck2_set3: BUCK2 gpio pin to set output voltage
+ * @wakeup: Allow to wake up from suspend
+ * @rtc_delay: LP3974 RTC chip bug that requires delay after a register
+ * write before reading it.
  */
 struct max8998_platform_data {
 	struct max8998_regulator_data	*regulators;
@@ -89,6 +92,7 @@ struct max8998_platform_data {
 	int				buck1_set2;
 	int				buck2_set3;
 	bool				wakeup;
+	bool				rtc_delay;
 };
 
 #endif /*  __LINUX_MFD_MAX8998_H */
-- 
cgit v1.1


From 735a3d9efdc5aeebe201008e6655b235c7f02aeb Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Tue, 11 Jan 2011 12:20:05 +0100
Subject: regulator: Support MAX8998/LP3974 DVS-GPIO

The previous driver did not support BUCK1-DVS3, BUCK1-DVS4, and
BUCK2-DVS2 modes. This patch adds such modes and an option to block
setting buck1/2 voltages out of the preset values.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max8998.h | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index c22b9a4..61daa16 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -70,12 +70,20 @@ struct max8998_regulator_data {
  * @num_regulators: number of regultors used
  * @irq_base: base IRQ number for max8998, required for IRQs
  * @ono: power onoff IRQ number for max8998
- * @buck1_max_voltage1: BUCK1 maximum alowed voltage register 1
- * @buck1_max_voltage2: BUCK1 maximum alowed voltage register 2
- * @buck2_max_voltage: BUCK2 maximum alowed voltage
+ * @buck_voltage_lock: Do NOT change the values of the following six
+ *   registers set by buck?_voltage?. The voltage of BUCK1/2 cannot
+ *   be other than the preset values.
+ * @buck1_voltage1: BUCK1 DVS mode 1 voltage register
+ * @buck1_voltage2: BUCK1 DVS mode 2 voltage register
+ * @buck1_voltage3: BUCK1 DVS mode 3 voltage register
+ * @buck1_voltage4: BUCK1 DVS mode 4 voltage register
+ * @buck2_voltage1: BUCK2 DVS mode 1 voltage register
+ * @buck2_voltage2: BUCK2 DVS mode 2 voltage register
  * @buck1_set1: BUCK1 gpio pin 1 to set output voltage
  * @buck1_set2: BUCK1 gpio pin 2 to set output voltage
+ * @buck1_default_idx: Default for BUCK1 gpio pin 1, 2
  * @buck2_set3: BUCK2 gpio pin to set output voltage
+ * @buck2_default_idx: Default for BUCK2 gpio pin.
  * @wakeup: Allow to wake up from suspend
  * @rtc_delay: LP3974 RTC chip bug that requires delay after a register
  * write before reading it.
@@ -85,12 +93,18 @@ struct max8998_platform_data {
 	int				num_regulators;
 	int				irq_base;
 	int				ono;
-	int                             buck1_max_voltage1;
-	int                             buck1_max_voltage2;
-	int                             buck2_max_voltage;
+	bool				buck_voltage_lock;
+	int				buck1_voltage1;
+	int				buck1_voltage2;
+	int				buck1_voltage3;
+	int				buck1_voltage4;
+	int				buck2_voltage1;
+	int				buck2_voltage2;
 	int				buck1_set1;
 	int				buck1_set2;
+	int				buck1_default_idx;
 	int				buck2_set3;
+	int				buck2_default_idx;
 	bool				wakeup;
 	bool				rtc_delay;
 };
-- 
cgit v1.1


From 92d50a4132977b932ed830fa58c05deeb5c524f0 Mon Sep 17 00:00:00 2001
From: Mattias Wallin <mattias.wallin@stericsson.com>
Date: Tue, 7 Dec 2010 11:20:47 +0100
Subject: mfd: ab8500-core chip version cut 2.0 support

This patch adds support for chip version 2.0 or cut 2.0.
One new interrupt latch register - latch 12 - is introduced.

Signed-off-by: Mattias Wallin <mattias.wallin@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/ab8500.h | 53 ++++++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/ab8500.h
index 1ad1696..37f56b7 100644
--- a/include/linux/mfd/ab8500.h
+++ b/include/linux/mfd/ab8500.h
@@ -74,30 +74,37 @@
 #define AB8500_INT_ACC_DETECT_21DB_F	37
 #define AB8500_INT_ACC_DETECT_21DB_R	38
 #define AB8500_INT_GP_SW_ADC_CONV_END	39
-#define AB8500_INT_BTEMP_LOW		72
-#define AB8500_INT_BTEMP_LOW_MEDIUM	73
-#define AB8500_INT_BTEMP_MEDIUM_HIGH	74
-#define AB8500_INT_BTEMP_HIGH		75
-#define AB8500_INT_USB_CHARGER_NOT_OK	81
-#define AB8500_INT_ID_WAKEUP_R		82
-#define AB8500_INT_ID_DET_R1R		84
-#define AB8500_INT_ID_DET_R2R		85
-#define AB8500_INT_ID_DET_R3R		86
-#define AB8500_INT_ID_DET_R4R		87
-#define AB8500_INT_ID_WAKEUP_F		88
-#define AB8500_INT_ID_DET_R1F		90
-#define AB8500_INT_ID_DET_R2F		91
-#define AB8500_INT_ID_DET_R3F		92
-#define AB8500_INT_ID_DET_R4F		93
-#define AB8500_INT_USB_CHG_DET_DONE	94
-#define AB8500_INT_USB_CH_TH_PROT_F	96
-#define AB8500_INT_USB_CH_TH_PROT_R	97
-#define AB8500_INT_MAIN_CH_TH_PROT_F	98
-#define AB8500_INT_MAIN_CH_TH_PROT_R	99
-#define AB8500_INT_USB_CHARGER_NOT_OKF	103
+#define AB8500_INT_ADP_SOURCE_ERROR	72
+#define AB8500_INT_ADP_SINK_ERROR	73
+#define AB8500_INT_ADP_PROBE_PLUG	74
+#define AB8500_INT_ADP_PROBE_UNPLUG	75
+#define AB8500_INT_ADP_SENSE_OFF	76
+#define AB8500_INT_USB_PHY_POWER_ERR	78
+#define AB8500_INT_USB_LINK_STATUS	79
+#define AB8500_INT_BTEMP_LOW		80
+#define AB8500_INT_BTEMP_LOW_MEDIUM	81
+#define AB8500_INT_BTEMP_MEDIUM_HIGH	82
+#define AB8500_INT_BTEMP_HIGH		83
+#define AB8500_INT_USB_CHARGER_NOT_OK	89
+#define AB8500_INT_ID_WAKEUP_R		90
+#define AB8500_INT_ID_DET_R1R		92
+#define AB8500_INT_ID_DET_R2R		93
+#define AB8500_INT_ID_DET_R3R		94
+#define AB8500_INT_ID_DET_R4R		95
+#define AB8500_INT_ID_WAKEUP_F		96
+#define AB8500_INT_ID_DET_R1F		98
+#define AB8500_INT_ID_DET_R2F		99
+#define AB8500_INT_ID_DET_R3F		100
+#define AB8500_INT_ID_DET_R4F		101
+#define AB8500_INT_USB_CHG_DET_DONE	102
+#define AB8500_INT_USB_CH_TH_PROT_F	104
+#define AB8500_INT_USB_CH_TH_PROT_R    105
+#define AB8500_INT_MAIN_CH_TH_PROT_F   106
+#define AB8500_INT_MAIN_CH_TH_PROT_R	107
+#define AB8500_INT_USB_CHARGER_NOT_OKF	111
 
-#define AB8500_NR_IRQS			104
-#define AB8500_NUM_IRQ_REGS		13
+#define AB8500_NR_IRQS			112
+#define AB8500_NUM_IRQ_REGS		14
 
 /**
  * struct ab8500 - ab8500 internal structure
-- 
cgit v1.1


From 32385c7cf60a78375b63afc4f02001df84dfd1a0 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 14 Jan 2011 13:12:45 +0000
Subject: kernel: fix hlist_bl again

__d_rehash is dereferencing an almost-NULL pointer on my ARM926.
CONFIG_SMP=n and CONFIG_DEBUG_SPINLOCK=y.

The faulting instruction is:    strne   r3, [r2, #4]
and as can be seen from the register dump below, r2 is 0x00000001, hence
the faulting 0x00000005 address.

__d_rehash is essentially:

       spin_lock_bucket(b);
       entry->d_flags &= ~DCACHE_UNHASHED;
       hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
       spin_unlock_bucket(b);

which is:

       bit_spin_lock(0, (unsigned long *)&b->head.first);
       entry->d_flags &= ~DCACHE_UNHASHED;
       hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
       __bit_spin_unlock(0, (unsigned long *)&b->head.first);

bit_spin_lock(0, ptr) sets bit 0 of *ptr, in this case b->head.first if
CONFIG_SMP or CONFIG_DEBUG_SPINLOCK is set:

#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
       while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
               while (test_bit(bitnum, addr)) {
                       preempt_enable();
                       cpu_relax();
                       preempt_disable();
               }
       }
#endif

So, b->head.first starts off NULL, and becomes a non-NULL (address 1).
hlist_bl_add_head_rcu() does this:

static inline void hlist_bl_add_head_rcu(struct hlist_bl_node *n,
                                       struct hlist_bl_head *h)
{
       first = hlist_bl_first(h);
       n->next = first;
       if (first)
               first->pprev = &n->next;

It is the store to first->pprev which is faulting.

hlist_bl_first():

static inline struct hlist_bl_node *hlist_bl_first(struct hlist_bl_head *h)
{
       return (struct hlist_bl_node *)
               ((unsigned long)h->first & ~LIST_BL_LOCKMASK);
}

but:
#if defined(CONFIG_SMP)
#define LIST_BL_LOCKMASK        1UL
#else
#define LIST_BL_LOCKMASK        0UL
#endif

So, we have one piece of code which sets bit 0 of addresses, and another
bit of code which doesn't clear it before dereferencing the pointer if
!CONFIG_SMP && CONFIG_DEBUG_SPINLOCK.  With the patch below, I can again
sucessfully boot the kernel on my Versatile PB/926 platform.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/list_bl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index b2adbb4..5bad17d1 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -16,7 +16,7 @@
  * some fast and compact auxiliary data.
  */
 
-#if defined(CONFIG_SMP)
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 #define LIST_BL_LOCKMASK	1UL
 #else
 #define LIST_BL_LOCKMASK	0UL
-- 
cgit v1.1


From 359ab9f5b154cbd807a11e22792235f0f36b0cd5 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Fri, 14 Jan 2011 14:46:11 +0900
Subject: power_supply: Add MAX17042 Fuel Gauge Driver

The MAX17042 is a fuel gauge with an I2C interface for lithium-ion
betteries. Unlike its predecessor MAX17040, MAX17042 uses 16bit
registers. Besides, MAX17042 has much more features than MAX17040; e.g.,
a thermistor, current and current accumulation measurement, battery
internal resistance estimate, average values of measurement, and others.

This patch implements a driver for MAX17042.
In this initial release, we have implemented the most basic features of
a fuel gauge: measure the battery capacity and voltage.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/power/max17042_battery.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 include/linux/power/max17042_battery.h

(limited to 'include')

diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
new file mode 100644
index 0000000..7995deb
--- /dev/null
+++ b/include/linux/power/max17042_battery.h
@@ -0,0 +1,30 @@
+/*
+ * Fuel gauge driver for Maxim 17042 / 8966 / 8997
+ *  Note that Maxim 8966 and 8997 are mfd and this is its subdevice.
+ *
+ * Copyright (C) 2011 Samsung Electronics
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __MAX17042_BATTERY_H_
+#define __MAX17042_BATTERY_H_
+
+struct max17042_platform_data {
+	bool enable_current_sense;
+};
+
+#endif /* __MAX17042_BATTERY_H_ */
-- 
cgit v1.1


From 836cb711ad7960e52625b24195d6e70b79ab0816 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa@bluextal.(none)>
Date: Fri, 14 Jan 2011 14:56:53 +0900
Subject: Revert update for dirty_ratio for memcg.

The flags added by commit db16d5ec1f87f17511599bc77857dd1662b5a22f
has no user now. We believe we'll use it soon but considering
patch reviewing, the change itself should be folded into incoming
set of "dirty ratio for memcg" patches.

So, it's better to drop this change from current mainline tree.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5b0c971..6d6cb7a 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -42,9 +42,6 @@ enum {
 	/* flags for mem_cgroup and file and I/O status */
 	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
 	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
-	PCG_FILE_DIRTY, /* page is dirty */
-	PCG_FILE_WRITEBACK, /* page is under writeback */
-	PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */
 	/* No lock in page_cgroup */
 	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
 };
@@ -65,10 +62,6 @@ static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
 static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
 	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
 
-#define TESTSETPCGFLAG(uname, lname)			\
-static inline int TestSetPageCgroup##uname(struct page_cgroup *pc)	\
-	{ return test_and_set_bit(PCG_##lname, &pc->flags);  }
-
 /* Cache flag is set only once (at allocation) */
 TESTPCGFLAG(Cache, CACHE)
 CLEARPCGFLAG(Cache, CACHE)
@@ -88,22 +81,6 @@ SETPCGFLAG(FileMapped, FILE_MAPPED)
 CLEARPCGFLAG(FileMapped, FILE_MAPPED)
 TESTPCGFLAG(FileMapped, FILE_MAPPED)
 
-SETPCGFLAG(FileDirty, FILE_DIRTY)
-CLEARPCGFLAG(FileDirty, FILE_DIRTY)
-TESTPCGFLAG(FileDirty, FILE_DIRTY)
-TESTCLEARPCGFLAG(FileDirty, FILE_DIRTY)
-TESTSETPCGFLAG(FileDirty, FILE_DIRTY)
-
-SETPCGFLAG(FileWriteback, FILE_WRITEBACK)
-CLEARPCGFLAG(FileWriteback, FILE_WRITEBACK)
-TESTPCGFLAG(FileWriteback, FILE_WRITEBACK)
-
-SETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
-CLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
-TESTPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
-TESTCLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
-TESTSETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
-
 SETPCGFLAG(Migration, MIGRATION)
 CLEARPCGFLAG(Migration, MIGRATION)
 TESTPCGFLAG(Migration, MIGRATION)
-- 
cgit v1.1


From 323b7fe8f8f6d5ac6214382cf30e8b3a80b265c9 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 14 Jan 2011 09:34:29 +0100
Subject: include/gpio.h: remove remaining __must_check-annotiations

Commit 5f829e405ec4e96f711165a4a7b55c271d4363e2 (gpiolib: add missing functions
to generic fallback) also introduced two.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Greg KH <gregkh@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gpio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 4b47ed9..32720ba 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -35,13 +35,13 @@ static inline int gpio_request(unsigned gpio, const char *label)
 	return -ENOSYS;
 }
 
-static inline int __must_check gpio_request_one(unsigned gpio,
+static inline int gpio_request_one(unsigned gpio,
 					unsigned long flags, const char *label)
 {
 	return -ENOSYS;
 }
 
-static inline int __must_check gpio_request_array(struct gpio *array, size_t num)
+static inline int gpio_request_array(struct gpio *array, size_t num)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.1


From c66ac9db8d4ad9994a02b3e933ea2ccc643e1fe5 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 17 Dec 2010 11:11:26 -0800
Subject: [SCSI] target: Add LIO target core v4.0.0-rc6

LIO target is a full featured in-kernel target framework with the
following feature set:

High-performance, non-blocking, multithreaded architecture with SIMD
support.

Advanced SCSI feature set:

    * Persistent Reservations (PRs)
    * Asymmetric Logical Unit Assignment (ALUA)
    * Protocol and intra-nexus multiplexing, load-balancing and failover (MC/S)
    * Full Error Recovery (ERL=0,1,2)
    * Active/active task migration and session continuation (ERL=2)
    * Thin LUN provisioning (UNMAP and WRITE_SAMExx)

Multiprotocol target plugins

Storage media independence:

    * Virtualization of all storage media; transparent mapping of IO to LUNs
    * No hard limits on number of LUNs per Target; maximum LUN size ~750 TB
    * Backstores: SATA, SAS, SCSI, BluRay, DVD, FLASH, USB, ramdisk, etc.

Standards compliance:

    * Full compliance with IETF (RFC 3720)
    * Full implementation of SPC-4 PRs and ALUA

Significant code cleanups done by Christoph Hellwig.

[jejb: fix up for new block bdev exclusive interface. Minor fixes from
 Randy Dunlap and Dan Carpenter.]
Signed-off-by: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/target/configfs_macros.h             | 147 +++++
 include/target/target_core_base.h            | 937 +++++++++++++++++++++++++++
 include/target/target_core_configfs.h        |  52 ++
 include/target/target_core_device.h          |  61 ++
 include/target/target_core_fabric_configfs.h | 106 +++
 include/target/target_core_fabric_lib.h      |  28 +
 include/target/target_core_fabric_ops.h      | 100 +++
 include/target/target_core_tmr.h             |  43 ++
 include/target/target_core_tpg.h             |  35 +
 include/target/target_core_transport.h       | 351 ++++++++++
 10 files changed, 1860 insertions(+)
 create mode 100644 include/target/configfs_macros.h
 create mode 100644 include/target/target_core_base.h
 create mode 100644 include/target/target_core_configfs.h
 create mode 100644 include/target/target_core_device.h
 create mode 100644 include/target/target_core_fabric_configfs.h
 create mode 100644 include/target/target_core_fabric_lib.h
 create mode 100644 include/target/target_core_fabric_ops.h
 create mode 100644 include/target/target_core_tmr.h
 create mode 100644 include/target/target_core_tpg.h
 create mode 100644 include/target/target_core_transport.h

(limited to 'include')

diff --git a/include/target/configfs_macros.h b/include/target/configfs_macros.h
new file mode 100644
index 0000000..7fe7460
--- /dev/null
+++ b/include/target/configfs_macros.h
@@ -0,0 +1,147 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * configfs_macros.h - extends macros for configfs
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * Based on kobject.h:
+ *      Copyright (c) 2002-2003	Patrick Mochel
+ *      Copyright (c) 2002-2003	Open Source Development Labs
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ *
+ * Added CONFIGFS_EATTR() macros from original configfs.h macros
+ * Copright (C) 2008-2009 Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * Please read Documentation/filesystems/configfs.txt before using the
+ * configfs interface, ESPECIALLY the parts about reference counts and
+ * item destructors.
+ */
+
+#ifndef _CONFIGFS_MACROS_H_
+#define _CONFIGFS_MACROS_H_
+
+#include <linux/configfs.h>
+
+/*
+ * Users often need to create attribute structures for their configurable
+ * attributes, containing a configfs_attribute member and function pointers
+ * for the show() and store() operations on that attribute. If they don't
+ * need anything else on the extended attribute structure, they can use
+ * this macro to define it.  The argument _name isends up as
+ * 'struct _name_attribute, as well as names of to CONFIGFS_ATTR_OPS() below.
+ * The argument _item is the name of the structure containing the
+ * struct config_item or struct config_group structure members
+ */
+#define CONFIGFS_EATTR_STRUCT(_name, _item)				\
+struct _name##_attribute {						\
+	struct configfs_attribute attr;					\
+	ssize_t (*show)(struct _item *, char *);			\
+	ssize_t (*store)(struct _item *, const char *, size_t);		\
+}
+
+/*
+ * With the extended attribute structure, users can use this macro
+ * (similar to sysfs' __ATTR) to make defining attributes easier.
+ * An example:
+ * #define MYITEM_EATTR(_name, _mode, _show, _store)	\
+ * struct myitem_attribute childless_attr_##_name =	\
+ *         __CONFIGFS_EATTR(_name, _mode, _show, _store)
+ */
+#define __CONFIGFS_EATTR(_name, _mode, _show, _store)			\
+{									\
+	.attr	= {							\
+			.ca_name = __stringify(_name),			\
+			.ca_mode = _mode,				\
+			.ca_owner = THIS_MODULE,			\
+	},								\
+	.show	= _show,						\
+	.store	= _store,						\
+}
+/* Here is a readonly version, only requiring a show() operation */
+#define __CONFIGFS_EATTR_RO(_name, _show)				\
+{									\
+	.attr	= {							\
+			.ca_name = __stringify(_name),			\
+			.ca_mode = 0444,				\
+			.ca_owner = THIS_MODULE,			\
+	},								\
+	.show	= _show,						\
+}
+
+/*
+ * With these extended attributes, the simple show_attribute() and
+ * store_attribute() operations need to call the show() and store() of the
+ * attributes.  This is a common pattern, so we provide a macro to define
+ * them.  The argument _name is the name of the attribute defined by
+ * CONFIGFS_ATTR_STRUCT(). The argument _item is the name of the structure
+ * containing the struct config_item or struct config_group structure member.
+ * The argument _item_member is the actual name of the struct config_* struct
+ * in your _item structure.  Meaning  my_structure->some_config_group.
+ *		                      ^^_item^^^^^  ^^_item_member^^^
+ * This macro expects the attributes to be named "struct <name>_attribute".
+ */
+#define CONFIGFS_EATTR_OPS_TO_FUNC(_name, _item, _item_member)		\
+static struct _item *to_##_name(struct config_item *ci)			\
+{									\
+	return (ci) ? container_of(to_config_group(ci), struct _item,	\
+		_item_member) : NULL;					\
+}
+
+#define CONFIGFS_EATTR_OPS_SHOW(_name, _item)				\
+static ssize_t _name##_attr_show(struct config_item *item,		\
+				 struct configfs_attribute *attr,	\
+				 char *page)				\
+{									\
+	struct _item *_item = to_##_name(item);				\
+	struct _name##_attribute * _name##_attr =			\
+		container_of(attr, struct _name##_attribute, attr);	\
+	ssize_t ret = 0;						\
+									\
+	if (_name##_attr->show)						\
+		ret = _name##_attr->show(_item, page);			\
+	return ret;							\
+}
+
+#define CONFIGFS_EATTR_OPS_STORE(_name, _item)				\
+static ssize_t _name##_attr_store(struct config_item *item,		\
+				  struct configfs_attribute *attr,	\
+				  const char *page, size_t count)	\
+{									\
+	struct _item *_item = to_##_name(item);				\
+	struct _name##_attribute * _name##_attr =			\
+		container_of(attr, struct _name##_attribute, attr);	\
+	ssize_t ret = -EINVAL;						\
+									\
+	if (_name##_attr->store)					\
+		ret = _name##_attr->store(_item, page, count);		\
+	return ret;							\
+}
+
+#define CONFIGFS_EATTR_OPS(_name, _item, _item_member)			\
+	CONFIGFS_EATTR_OPS_TO_FUNC(_name, _item, _item_member);		\
+	CONFIGFS_EATTR_OPS_SHOW(_name, _item);				\
+	CONFIGFS_EATTR_OPS_STORE(_name, _item);
+
+#define CONFIGFS_EATTR_OPS_RO(_name, _item, _item_member)		\
+	CONFIGFS_EATTR_OPS_TO_FUNC(_name, _item, _item_member);		\
+	CONFIGFS_EATTR_OPS_SHOW(_name, _item);
+
+#endif /* _CONFIGFS_MACROS_H_ */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
new file mode 100644
index 0000000..07fdfb6
--- /dev/null
+++ b/include/target/target_core_base.h
@@ -0,0 +1,937 @@
+#ifndef TARGET_CORE_BASE_H
+#define TARGET_CORE_BASE_H
+
+#include <linux/in.h>
+#include <linux/configfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/blkdev.h>
+#include <scsi/scsi_cmnd.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include "target_core_mib.h"
+
+#define TARGET_CORE_MOD_VERSION		"v4.0.0-rc6"
+#define SHUTDOWN_SIGS	(sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGABRT))
+
+/* Used by transport_generic_allocate_iovecs() */
+#define TRANSPORT_IOV_DATA_BUFFER		5
+/* Maximum Number of LUNs per Target Portal Group */
+#define TRANSPORT_MAX_LUNS_PER_TPG		256
+/*
+ * By default we use 32-byte CDBs in TCM Core and subsystem plugin code.
+ *
+ * Note that both include/scsi/scsi_cmnd.h:MAX_COMMAND_SIZE and
+ * include/linux/blkdev.h:BLOCK_MAX_CDB as of v2.6.36-rc4 still use
+ * 16-byte CDBs by default and require an extra allocation for
+ * 32-byte CDBs to becasue of legacy issues.
+ *
+ * Within TCM Core there are no such legacy limitiations, so we go ahead
+ * use 32-byte CDBs by default and use include/scsi/scsi.h:scsi_command_size()
+ * within all TCM Core and subsystem plugin code.
+ */
+#define TCM_MAX_COMMAND_SIZE			32
+/*
+ * From include/scsi/scsi_cmnd.h:SCSI_SENSE_BUFFERSIZE, currently
+ * defined 96, but the real limit is 252 (or 260 including the header)
+ */
+#define TRANSPORT_SENSE_BUFFER			SCSI_SENSE_BUFFERSIZE
+/* Used by transport_send_check_condition_and_sense() */
+#define SPC_SENSE_KEY_OFFSET			2
+#define SPC_ASC_KEY_OFFSET			12
+#define SPC_ASCQ_KEY_OFFSET			13
+#define TRANSPORT_IQN_LEN			224
+/* Used by target_core_store_alua_lu_gp() and target_core_alua_lu_gp_show_attr_members() */
+#define LU_GROUP_NAME_BUF			256
+/* Used by core_alua_store_tg_pt_gp_info() and target_core_alua_tg_pt_gp_show_attr_members() */
+#define TG_PT_GROUP_NAME_BUF			256
+/* Used to parse VPD into struct t10_vpd */
+#define VPD_TMP_BUF_SIZE			128
+/* Used by transport_generic_cmd_sequencer() */
+#define READ_BLOCK_LEN          		6
+#define READ_CAP_LEN            		8
+#define READ_POSITION_LEN       		20
+#define INQUIRY_LEN				36
+/* Used by transport_get_inquiry_vpd_serial() */
+#define INQUIRY_VPD_SERIAL_LEN			254
+/* Used by transport_get_inquiry_vpd_device_ident() */
+#define INQUIRY_VPD_DEVICE_IDENTIFIER_LEN	254
+
+/* struct se_hba->hba_flags */
+enum hba_flags_table {
+	HBA_FLAGS_INTERNAL_USE	= 0x01,
+	HBA_FLAGS_PSCSI_MODE	= 0x02,
+};
+
+/* struct se_lun->lun_status */
+enum transport_lun_status_table {
+	TRANSPORT_LUN_STATUS_FREE = 0,
+	TRANSPORT_LUN_STATUS_ACTIVE = 1,
+};
+
+/* struct se_portal_group->se_tpg_type */
+enum transport_tpg_type_table {
+	TRANSPORT_TPG_TYPE_NORMAL = 0,
+	TRANSPORT_TPG_TYPE_DISCOVERY = 1,
+};
+
+/* Used for generate timer flags */
+enum timer_flags_table {
+	TF_RUNNING	= 0x01,
+	TF_STOP		= 0x02,
+};
+
+/* Special transport agnostic struct se_cmd->t_states */
+enum transport_state_table {
+	TRANSPORT_NO_STATE	= 0,
+	TRANSPORT_NEW_CMD	= 1,
+	TRANSPORT_DEFERRED_CMD	= 2,
+	TRANSPORT_WRITE_PENDING	= 3,
+	TRANSPORT_PROCESS_WRITE	= 4,
+	TRANSPORT_PROCESSING	= 5,
+	TRANSPORT_COMPLETE_OK	= 6,
+	TRANSPORT_COMPLETE_FAILURE = 7,
+	TRANSPORT_COMPLETE_TIMEOUT = 8,
+	TRANSPORT_PROCESS_TMR	= 9,
+	TRANSPORT_TMR_COMPLETE	= 10,
+	TRANSPORT_ISTATE_PROCESSING = 11,
+	TRANSPORT_ISTATE_PROCESSED = 12,
+	TRANSPORT_KILL		= 13,
+	TRANSPORT_REMOVE	= 14,
+	TRANSPORT_FREE		= 15,
+	TRANSPORT_NEW_CMD_MAP	= 16,
+};
+
+/* Used for struct se_cmd->se_cmd_flags */
+enum se_cmd_flags_table {
+	SCF_SUPPORTED_SAM_OPCODE	= 0x00000001,
+	SCF_TRANSPORT_TASK_SENSE	= 0x00000002,
+	SCF_EMULATED_TASK_SENSE		= 0x00000004,
+	SCF_SCSI_DATA_SG_IO_CDB		= 0x00000008,
+	SCF_SCSI_CONTROL_SG_IO_CDB	= 0x00000010,
+	SCF_SCSI_CONTROL_NONSG_IO_CDB	= 0x00000020,
+	SCF_SCSI_NON_DATA_CDB		= 0x00000040,
+	SCF_SCSI_CDB_EXCEPTION		= 0x00000080,
+	SCF_SCSI_RESERVATION_CONFLICT	= 0x00000100,
+	SCF_CMD_PASSTHROUGH_NOALLOC	= 0x00000200,
+	SCF_SE_CMD_FAILED		= 0x00000400,
+	SCF_SE_LUN_CMD			= 0x00000800,
+	SCF_SE_ALLOW_EOO		= 0x00001000,
+	SCF_SE_DISABLE_ONLINE_CHECK	= 0x00002000,
+	SCF_SENT_CHECK_CONDITION	= 0x00004000,
+	SCF_OVERFLOW_BIT		= 0x00008000,
+	SCF_UNDERFLOW_BIT		= 0x00010000,
+	SCF_SENT_DELAYED_TAS		= 0x00020000,
+	SCF_ALUA_NON_OPTIMIZED		= 0x00040000,
+	SCF_DELAYED_CMD_FROM_SAM_ATTR	= 0x00080000,
+	SCF_PASSTHROUGH_SG_TO_MEM	= 0x00100000,
+	SCF_PASSTHROUGH_CONTIG_TO_SG	= 0x00200000,
+	SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00400000,
+	SCF_EMULATE_SYNC_CACHE		= 0x00800000,
+	SCF_EMULATE_CDB_ASYNC		= 0x01000000,
+	SCF_EMULATE_SYNC_UNMAP		= 0x02000000
+};
+
+/* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
+enum transport_lunflags_table {
+	TRANSPORT_LUNFLAGS_NO_ACCESS		= 0x00,
+	TRANSPORT_LUNFLAGS_INITIATOR_ACCESS	= 0x01,
+	TRANSPORT_LUNFLAGS_READ_ONLY		= 0x02,
+	TRANSPORT_LUNFLAGS_READ_WRITE		= 0x04,
+};
+
+/* struct se_device->dev_status */
+enum transport_device_status_table {
+	TRANSPORT_DEVICE_ACTIVATED		= 0x01,
+	TRANSPORT_DEVICE_DEACTIVATED		= 0x02,
+	TRANSPORT_DEVICE_QUEUE_FULL		= 0x04,
+	TRANSPORT_DEVICE_SHUTDOWN		= 0x08,
+	TRANSPORT_DEVICE_OFFLINE_ACTIVATED	= 0x10,
+	TRANSPORT_DEVICE_OFFLINE_DEACTIVATED	= 0x20,
+};
+
+/*
+ * Used by transport_send_check_condition_and_sense() and se_cmd->scsi_sense_reason
+ * to signal which ASC/ASCQ sense payload should be built.
+ */
+enum tcm_sense_reason_table {
+	TCM_NON_EXISTENT_LUN			= 0x01,
+	TCM_UNSUPPORTED_SCSI_OPCODE		= 0x02,
+	TCM_INCORRECT_AMOUNT_OF_DATA		= 0x03,
+	TCM_UNEXPECTED_UNSOLICITED_DATA		= 0x04,
+	TCM_SERVICE_CRC_ERROR			= 0x05,
+	TCM_SNACK_REJECTED			= 0x06,
+	TCM_SECTOR_COUNT_TOO_MANY		= 0x07,
+	TCM_INVALID_CDB_FIELD			= 0x08,
+	TCM_INVALID_PARAMETER_LIST		= 0x09,
+	TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE	= 0x0a,
+	TCM_UNKNOWN_MODE_PAGE			= 0x0b,
+	TCM_WRITE_PROTECTED			= 0x0c,
+	TCM_CHECK_CONDITION_ABORT_CMD		= 0x0d,
+	TCM_CHECK_CONDITION_UNIT_ATTENTION	= 0x0e,
+	TCM_CHECK_CONDITION_NOT_READY		= 0x0f,
+};
+
+struct se_obj {
+	atomic_t obj_access_count;
+} ____cacheline_aligned;
+
+/*
+ * Used by TCM Core internally to signal if ALUA emulation is enabled or
+ * disabled, or running in with TCM/pSCSI passthrough mode
+ */
+typedef enum {
+	SPC_ALUA_PASSTHROUGH,
+	SPC2_ALUA_DISABLED,
+	SPC3_ALUA_EMULATED
+} t10_alua_index_t;
+
+/*
+ * Used by TCM Core internally to signal if SAM Task Attribute emulation
+ * is enabled or disabled, or running in with TCM/pSCSI passthrough mode
+ */
+typedef enum {
+	SAM_TASK_ATTR_PASSTHROUGH,
+	SAM_TASK_ATTR_UNTAGGED,
+	SAM_TASK_ATTR_EMULATED
+} t10_task_attr_index_t;
+
+struct se_cmd;
+
+struct t10_alua {
+	t10_alua_index_t alua_type;
+	/* ALUA Target Port Group ID */
+	u16	alua_tg_pt_gps_counter;
+	u32	alua_tg_pt_gps_count;
+	spinlock_t tg_pt_gps_lock;
+	struct se_subsystem_dev *t10_sub_dev;
+	/* Used for default ALUA Target Port Group */
+	struct t10_alua_tg_pt_gp *default_tg_pt_gp;
+	/* Used for default ALUA Target Port Group ConfigFS group */
+	struct config_group alua_tg_pt_gps_group;
+	int (*alua_state_check)(struct se_cmd *, unsigned char *, u8 *);
+	struct list_head tg_pt_gps_list;
+} ____cacheline_aligned;
+
+struct t10_alua_lu_gp {
+	u16	lu_gp_id;
+	int	lu_gp_valid_id;
+	u32	lu_gp_members;
+	atomic_t lu_gp_shutdown;
+	atomic_t lu_gp_ref_cnt;
+	spinlock_t lu_gp_lock;
+	struct config_group lu_gp_group;
+	struct list_head lu_gp_list;
+	struct list_head lu_gp_mem_list;
+} ____cacheline_aligned;
+
+struct t10_alua_lu_gp_member {
+	int lu_gp_assoc:1;
+	atomic_t lu_gp_mem_ref_cnt;
+	spinlock_t lu_gp_mem_lock;
+	struct t10_alua_lu_gp *lu_gp;
+	struct se_device *lu_gp_mem_dev;
+	struct list_head lu_gp_mem_list;
+} ____cacheline_aligned;
+
+struct t10_alua_tg_pt_gp {
+	u16	tg_pt_gp_id;
+	int	tg_pt_gp_valid_id;
+	int	tg_pt_gp_alua_access_status;
+	int	tg_pt_gp_alua_access_type;
+	int	tg_pt_gp_nonop_delay_msecs;
+	int	tg_pt_gp_trans_delay_msecs;
+	int	tg_pt_gp_pref;
+	int	tg_pt_gp_write_metadata;
+	/* Used by struct t10_alua_tg_pt_gp->tg_pt_gp_md_buf_len */
+#define ALUA_MD_BUF_LEN				1024
+	u32	tg_pt_gp_md_buf_len;
+	u32	tg_pt_gp_members;
+	atomic_t tg_pt_gp_alua_access_state;
+	atomic_t tg_pt_gp_ref_cnt;
+	spinlock_t tg_pt_gp_lock;
+	struct mutex tg_pt_gp_md_mutex;
+	struct se_subsystem_dev *tg_pt_gp_su_dev;
+	struct config_group tg_pt_gp_group;
+	struct list_head tg_pt_gp_list;
+	struct list_head tg_pt_gp_mem_list;
+} ____cacheline_aligned;
+
+struct t10_alua_tg_pt_gp_member {
+	int tg_pt_gp_assoc:1;
+	atomic_t tg_pt_gp_mem_ref_cnt;
+	spinlock_t tg_pt_gp_mem_lock;
+	struct t10_alua_tg_pt_gp *tg_pt_gp;
+	struct se_port *tg_pt;
+	struct list_head tg_pt_gp_mem_list;
+} ____cacheline_aligned;
+
+struct t10_vpd {
+	unsigned char device_identifier[INQUIRY_VPD_DEVICE_IDENTIFIER_LEN];
+	int protocol_identifier_set;
+	u32 protocol_identifier;
+	u32 device_identifier_code_set;
+	u32 association;
+	u32 device_identifier_type;
+	struct list_head vpd_list;
+} ____cacheline_aligned;
+
+struct t10_wwn {
+	unsigned char vendor[8];
+	unsigned char model[16];
+	unsigned char revision[4];
+	unsigned char unit_serial[INQUIRY_VPD_SERIAL_LEN];
+	spinlock_t t10_vpd_lock;
+	struct se_subsystem_dev *t10_sub_dev;
+	struct config_group t10_wwn_group;
+	struct list_head t10_vpd_list;
+} ____cacheline_aligned;
+
+
+/*
+ * Used by TCM Core internally to signal if >= SPC-3 peristent reservations
+ * emulation is enabled or disabled, or running in with TCM/pSCSI passthrough
+ * mode
+ */
+typedef enum {
+	SPC_PASSTHROUGH,
+	SPC2_RESERVATIONS,
+	SPC3_PERSISTENT_RESERVATIONS
+} t10_reservations_index_t;
+
+struct t10_pr_registration {
+	/* Used for fabrics that contain WWN+ISID */
+#define PR_REG_ISID_LEN				16
+	/* PR_REG_ISID_LEN + ',i,0x' */
+#define PR_REG_ISID_ID_LEN			(PR_REG_ISID_LEN + 5)
+	char pr_reg_isid[PR_REG_ISID_LEN];
+	/* Used during APTPL metadata reading */
+#define PR_APTPL_MAX_IPORT_LEN			256
+	unsigned char pr_iport[PR_APTPL_MAX_IPORT_LEN];
+	/* Used during APTPL metadata reading */
+#define PR_APTPL_MAX_TPORT_LEN			256
+	unsigned char pr_tport[PR_APTPL_MAX_TPORT_LEN];
+	/* For writing out live meta data */
+	unsigned char *pr_aptpl_buf;
+	u16 pr_aptpl_rpti;
+	u16 pr_reg_tpgt;
+	/* Reservation effects all target ports */
+	int pr_reg_all_tg_pt;
+	/* Activate Persistence across Target Power Loss */
+	int pr_reg_aptpl;
+	int pr_res_holder;
+	int pr_res_type;
+	int pr_res_scope;
+	/* Used for fabric initiator WWPNs using a ISID */
+	int isid_present_at_reg:1;
+	u32 pr_res_mapped_lun;
+	u32 pr_aptpl_target_lun;
+	u32 pr_res_generation;
+	u64 pr_reg_bin_isid;
+	u64 pr_res_key;
+	atomic_t pr_res_holders;
+	struct se_node_acl *pr_reg_nacl;
+	struct se_dev_entry *pr_reg_deve;
+	struct se_lun *pr_reg_tg_pt_lun;
+	struct list_head pr_reg_list;
+	struct list_head pr_reg_abort_list;
+	struct list_head pr_reg_aptpl_list;
+	struct list_head pr_reg_atp_list;
+	struct list_head pr_reg_atp_mem_list;
+} ____cacheline_aligned;
+
+/*
+ * This set of function pointer ops is set based upon SPC3_PERSISTENT_RESERVATIONS,
+ * SPC2_RESERVATIONS or SPC_PASSTHROUGH in drivers/target/target_core_pr.c:
+ * core_setup_reservations()
+ */
+struct t10_reservation_ops {
+	int (*t10_reservation_check)(struct se_cmd *, u32 *);
+	int (*t10_seq_non_holder)(struct se_cmd *, unsigned char *, u32);
+	int (*t10_pr_register)(struct se_cmd *);
+	int (*t10_pr_clear)(struct se_cmd *);
+};
+
+struct t10_reservation_template {
+	/* Reservation effects all target ports */
+	int pr_all_tg_pt;
+	/* Activate Persistence across Target Power Loss enabled
+	 * for SCSI device */
+	int pr_aptpl_active;
+	/* Used by struct t10_reservation_template->pr_aptpl_buf_len */
+#define PR_APTPL_BUF_LEN			8192
+	u32 pr_aptpl_buf_len;
+	u32 pr_generation;
+	t10_reservations_index_t res_type;
+	spinlock_t registration_lock;
+	spinlock_t aptpl_reg_lock;
+	/*
+	 * This will always be set by one individual I_T Nexus.
+	 * However with all_tg_pt=1, other I_T Nexus from the
+	 * same initiator can access PR reg/res info on a different
+	 * target port.
+	 *
+	 * There is also the 'All Registrants' case, where there is
+	 * a single *pr_res_holder of the reservation, but all
+	 * registrations are considered reservation holders.
+	 */
+	struct se_node_acl *pr_res_holder;
+	struct list_head registration_list;
+	struct list_head aptpl_reg_list;
+	struct t10_reservation_ops pr_ops;
+} ____cacheline_aligned;
+
+struct se_queue_req {
+	int			state;
+	void			*cmd;
+	struct list_head	qr_list;
+} ____cacheline_aligned;
+
+struct se_queue_obj {
+	atomic_t		queue_cnt;
+	spinlock_t		cmd_queue_lock;
+	struct list_head	qobj_list;
+	wait_queue_head_t	thread_wq;
+} ____cacheline_aligned;
+
+/*
+ * Used one per struct se_cmd to hold all extra struct se_task
+ * metadata.  This structure is setup and allocated in
+ * drivers/target/target_core_transport.c:__transport_alloc_se_cmd()
+ */
+struct se_transport_task {
+	unsigned char		*t_task_cdb;
+	unsigned char		__t_task_cdb[TCM_MAX_COMMAND_SIZE];
+	unsigned long long	t_task_lba;
+	int			t_tasks_failed;
+	int			t_tasks_fua;
+	int			t_tasks_bidi:1;
+	u32			t_task_cdbs;
+	u32			t_tasks_check;
+	u32			t_tasks_no;
+	u32			t_tasks_sectors;
+	u32			t_tasks_se_num;
+	u32			t_tasks_se_bidi_num;
+	u32			t_tasks_sg_chained_no;
+	atomic_t		t_fe_count;
+	atomic_t		t_se_count;
+	atomic_t		t_task_cdbs_left;
+	atomic_t		t_task_cdbs_ex_left;
+	atomic_t		t_task_cdbs_timeout_left;
+	atomic_t		t_task_cdbs_sent;
+	atomic_t		t_transport_aborted;
+	atomic_t		t_transport_active;
+	atomic_t		t_transport_complete;
+	atomic_t		t_transport_queue_active;
+	atomic_t		t_transport_sent;
+	atomic_t		t_transport_stop;
+	atomic_t		t_transport_timeout;
+	atomic_t		transport_dev_active;
+	atomic_t		transport_lun_active;
+	atomic_t		transport_lun_fe_stop;
+	atomic_t		transport_lun_stop;
+	spinlock_t		t_state_lock;
+	struct completion	t_transport_stop_comp;
+	struct completion	transport_lun_fe_stop_comp;
+	struct completion	transport_lun_stop_comp;
+	struct scatterlist	*t_tasks_sg_chained;
+	struct scatterlist	t_tasks_sg_bounce;
+	void			*t_task_buf;
+	/*
+	 * Used for pre-registered fabric SGL passthrough WRITE and READ
+	 * with the special SCF_PASSTHROUGH_CONTIG_TO_SG case for TCM_Loop
+	 * and other HW target mode fabric modules.
+	 */
+	struct scatterlist	*t_task_pt_sgl;
+	struct list_head	*t_mem_list;
+	/* Used for BIDI READ */
+	struct list_head	*t_mem_bidi_list;
+	struct list_head	t_task_list;
+} ____cacheline_aligned;
+
+struct se_task {
+	unsigned char	task_sense;
+	struct scatterlist *task_sg;
+	struct scatterlist *task_sg_bidi;
+	u8		task_scsi_status;
+	u8		task_flags;
+	int		task_error_status;
+	int		task_state_flags;
+	int		task_padded_sg:1;
+	unsigned long long	task_lba;
+	u32		task_no;
+	u32		task_sectors;
+	u32		task_size;
+	u32		task_sg_num;
+	u32		task_sg_offset;
+	enum dma_data_direction	task_data_direction;
+	struct se_cmd *task_se_cmd;
+	struct se_device	*se_dev;
+	struct completion	task_stop_comp;
+	atomic_t	task_active;
+	atomic_t	task_execute_queue;
+	atomic_t	task_timeout;
+	atomic_t	task_sent;
+	atomic_t	task_stop;
+	atomic_t	task_state_active;
+	struct timer_list	task_timer;
+	struct se_device *se_obj_ptr;
+	struct list_head t_list;
+	struct list_head t_execute_list;
+	struct list_head t_state_list;
+} ____cacheline_aligned;
+
+#define TASK_CMD(task)	((struct se_cmd *)task->task_se_cmd)
+#define TASK_DEV(task)	((struct se_device *)task->se_dev)
+
+struct se_cmd {
+	/* SAM response code being sent to initiator */
+	u8			scsi_status;
+	u8			scsi_asc;
+	u8			scsi_ascq;
+	u8			scsi_sense_reason;
+	u16			scsi_sense_length;
+	/* Delay for ALUA Active/NonOptimized state access in milliseconds */
+	int			alua_nonop_delay;
+	/* See include/linux/dma-mapping.h */
+	enum dma_data_direction	data_direction;
+	/* For SAM Task Attribute */
+	int			sam_task_attr;
+	/* Transport protocol dependent state, see transport_state_table */
+	enum transport_state_table t_state;
+	/* Transport protocol dependent state for out of order CmdSNs */
+	int			deferred_t_state;
+	/* Transport specific error status */
+	int			transport_error_status;
+	/* See se_cmd_flags_table */
+	u32			se_cmd_flags;
+	u32			se_ordered_id;
+	/* Total size in bytes associated with command */
+	u32			data_length;
+	/* SCSI Presented Data Transfer Length */
+	u32			cmd_spdtl;
+	u32			residual_count;
+	u32			orig_fe_lun;
+	/* Persistent Reservation key */
+	u64			pr_res_key;
+	atomic_t                transport_sent;
+	/* Used for sense data */
+	void			*sense_buffer;
+	struct list_head	se_delayed_list;
+	struct list_head	se_ordered_list;
+	struct list_head	se_lun_list;
+	struct se_device      *se_dev;
+	struct se_dev_entry   *se_deve;
+	struct se_device	*se_obj_ptr;
+	struct se_device	*se_orig_obj_ptr;
+	struct se_lun		*se_lun;
+	/* Only used for internal passthrough and legacy TCM fabric modules */
+	struct se_session	*se_sess;
+	struct se_tmr_req	*se_tmr_req;
+	/* t_task is setup to t_task_backstore in transport_init_se_cmd() */
+	struct se_transport_task *t_task;
+	struct se_transport_task t_task_backstore;
+	struct target_core_fabric_ops *se_tfo;
+	int (*transport_emulate_cdb)(struct se_cmd *);
+	void (*transport_split_cdb)(unsigned long long, u32 *, unsigned char *);
+	void (*transport_wait_for_tasks)(struct se_cmd *, int, int);
+	void (*transport_complete_callback)(struct se_cmd *);
+} ____cacheline_aligned;
+
+#define T_TASK(cmd)     ((struct se_transport_task *)(cmd->t_task))
+#define CMD_TFO(cmd) ((struct target_core_fabric_ops *)cmd->se_tfo)
+
+struct se_tmr_req {
+	/* Task Management function to be preformed */
+	u8			function;
+	/* Task Management response to send */
+	u8			response;
+	int			call_transport;
+	/* Reference to ITT that Task Mgmt should be preformed */
+	u32			ref_task_tag;
+	/* 64-bit encoded SAM LUN from $FABRIC_MOD TMR header */
+	u64			ref_task_lun;
+	void 			*fabric_tmr_ptr;
+	struct se_cmd		*task_cmd;
+	struct se_cmd		*ref_cmd;
+	struct se_device	*tmr_dev;
+	struct se_lun		*tmr_lun;
+	struct list_head	tmr_list;
+} ____cacheline_aligned;
+
+struct se_ua {
+	u8			ua_asc;
+	u8			ua_ascq;
+	struct se_node_acl	*ua_nacl;
+	struct list_head	ua_dev_list;
+	struct list_head	ua_nacl_list;
+} ____cacheline_aligned;
+
+struct se_node_acl {
+	char			initiatorname[TRANSPORT_IQN_LEN];
+	/* Used to signal demo mode created ACL, disabled by default */
+	int			dynamic_node_acl:1;
+	u32			queue_depth;
+	u32			acl_index;
+	u64			num_cmds;
+	u64			read_bytes;
+	u64			write_bytes;
+	spinlock_t		stats_lock;
+	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
+	atomic_t		acl_pr_ref_count;
+	/* Used for MIB access */
+	atomic_t		mib_ref_count;
+	struct se_dev_entry	*device_list;
+	struct se_session	*nacl_sess;
+	struct se_portal_group *se_tpg;
+	spinlock_t		device_list_lock;
+	spinlock_t		nacl_sess_lock;
+	struct config_group	acl_group;
+	struct config_group	acl_attrib_group;
+	struct config_group	acl_auth_group;
+	struct config_group	acl_param_group;
+	struct config_group	*acl_default_groups[4];
+	struct list_head	acl_list;
+	struct list_head	acl_sess_list;
+} ____cacheline_aligned;
+
+struct se_session {
+	/* Used for MIB access */
+	atomic_t		mib_ref_count;
+	u64			sess_bin_isid;
+	struct se_node_acl	*se_node_acl;
+	struct se_portal_group *se_tpg;
+	void			*fabric_sess_ptr;
+	struct list_head	sess_list;
+	struct list_head	sess_acl_list;
+} ____cacheline_aligned;
+
+#define SE_SESS(cmd)		((struct se_session *)(cmd)->se_sess)
+#define SE_NODE_ACL(sess)	((struct se_node_acl *)(sess)->se_node_acl)
+
+struct se_device;
+struct se_transform_info;
+struct scatterlist;
+
+struct se_lun_acl {
+	char			initiatorname[TRANSPORT_IQN_LEN];
+	u32			mapped_lun;
+	struct se_node_acl	*se_lun_nacl;
+	struct se_lun		*se_lun;
+	struct list_head	lacl_list;
+	struct config_group	se_lun_group;
+}  ____cacheline_aligned;
+
+struct se_dev_entry {
+	int			def_pr_registered:1;
+	/* See transport_lunflags_table */
+	u32			lun_flags;
+	u32			deve_cmds;
+	u32			mapped_lun;
+	u32			average_bytes;
+	u32			last_byte_count;
+	u32			total_cmds;
+	u32			total_bytes;
+	u64			pr_res_key;
+	u64			creation_time;
+	u32			attach_count;
+	u64			read_bytes;
+	u64			write_bytes;
+	atomic_t		ua_count;
+	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
+	atomic_t		pr_ref_count;
+	struct se_lun_acl	*se_lun_acl;
+	spinlock_t		ua_lock;
+	struct se_lun		*se_lun;
+	struct list_head	alua_port_list;
+	struct list_head	ua_list;
+}  ____cacheline_aligned;
+
+struct se_dev_limits {
+	/* Max supported HW queue depth */
+	u32		hw_queue_depth;
+	/* Max supported virtual queue depth */
+	u32		queue_depth;
+	/* From include/linux/blkdev.h for the other HW/SW limits. */
+	struct queue_limits limits;
+} ____cacheline_aligned;
+
+struct se_dev_attrib {
+	int		emulate_dpo;
+	int		emulate_fua_write;
+	int		emulate_fua_read;
+	int		emulate_write_cache;
+	int		emulate_ua_intlck_ctrl;
+	int		emulate_tas;
+	int		emulate_tpu;
+	int		emulate_tpws;
+	int		emulate_reservations;
+	int		emulate_alua;
+	int		enforce_pr_isids;
+	u32		hw_block_size;
+	u32		block_size;
+	u32		hw_max_sectors;
+	u32		max_sectors;
+	u32		optimal_sectors;
+	u32		hw_queue_depth;
+	u32		queue_depth;
+	u32		task_timeout;
+	u32		max_unmap_lba_count;
+	u32		max_unmap_block_desc_count;
+	u32		unmap_granularity;
+	u32		unmap_granularity_alignment;
+	struct se_subsystem_dev *da_sub_dev;
+	struct config_group da_group;
+} ____cacheline_aligned;
+
+struct se_subsystem_dev {
+/* Used for struct se_subsystem_dev-->se_dev_alias, must be less than PAGE_SIZE */
+#define SE_DEV_ALIAS_LEN		512
+	unsigned char	se_dev_alias[SE_DEV_ALIAS_LEN];
+/* Used for struct se_subsystem_dev->se_dev_udev_path[], must be less than PAGE_SIZE */
+#define SE_UDEV_PATH_LEN		512
+	unsigned char	se_dev_udev_path[SE_UDEV_PATH_LEN];
+	u32		su_dev_flags;
+	struct se_hba *se_dev_hba;
+	struct se_device *se_dev_ptr;
+	struct se_dev_attrib se_dev_attrib;
+	/* T10 Asymmetric Logical Unit Assignment for Target Ports */
+	struct t10_alua	t10_alua;
+	/* T10 Inquiry and VPD WWN Information */
+	struct t10_wwn	t10_wwn;
+	/* T10 SPC-2 + SPC-3 Reservations */
+	struct t10_reservation_template t10_reservation;
+	spinlock_t      se_dev_lock;
+	void            *se_dev_su_ptr;
+	struct list_head g_se_dev_list;
+	struct config_group se_dev_group;
+	/* For T10 Reservations */
+	struct config_group se_dev_pr_group;
+} ____cacheline_aligned;
+
+#define T10_ALUA(su_dev)	(&(su_dev)->t10_alua)
+#define T10_RES(su_dev)		(&(su_dev)->t10_reservation)
+#define T10_PR_OPS(su_dev)	(&(su_dev)->t10_reservation.pr_ops)
+
+struct se_device {
+	/* Set to 1 if thread is NOT sleeping on thread_sem */
+	u8			thread_active;
+	u8			dev_status_timer_flags;
+	/* RELATIVE TARGET PORT IDENTIFER Counter */
+	u16			dev_rpti_counter;
+	/* Used for SAM Task Attribute ordering */
+	u32			dev_cur_ordered_id;
+	u32			dev_flags;
+	u32			dev_port_count;
+	/* See transport_device_status_table */
+	u32			dev_status;
+	u32			dev_tcq_window_closed;
+	/* Physical device queue depth */
+	u32			queue_depth;
+	/* Used for SPC-2 reservations enforce of ISIDs */
+	u64			dev_res_bin_isid;
+	t10_task_attr_index_t	dev_task_attr_type;
+	/* Pointer to transport specific device structure */
+	void 			*dev_ptr;
+	u32			dev_index;
+	u64			creation_time;
+	u32			num_resets;
+	u64			num_cmds;
+	u64			read_bytes;
+	u64			write_bytes;
+	spinlock_t		stats_lock;
+	/* Active commands on this virtual SE device */
+	atomic_t		active_cmds;
+	atomic_t		simple_cmds;
+	atomic_t		depth_left;
+	atomic_t		dev_ordered_id;
+	atomic_t		dev_tur_active;
+	atomic_t		execute_tasks;
+	atomic_t		dev_status_thr_count;
+	atomic_t		dev_hoq_count;
+	atomic_t		dev_ordered_sync;
+	struct se_obj		dev_obj;
+	struct se_obj		dev_access_obj;
+	struct se_obj		dev_export_obj;
+	struct se_queue_obj	*dev_queue_obj;
+	struct se_queue_obj	*dev_status_queue_obj;
+	spinlock_t		delayed_cmd_lock;
+	spinlock_t		ordered_cmd_lock;
+	spinlock_t		execute_task_lock;
+	spinlock_t		state_task_lock;
+	spinlock_t		dev_alua_lock;
+	spinlock_t		dev_reservation_lock;
+	spinlock_t		dev_state_lock;
+	spinlock_t		dev_status_lock;
+	spinlock_t		dev_status_thr_lock;
+	spinlock_t		se_port_lock;
+	spinlock_t		se_tmr_lock;
+	/* Used for legacy SPC-2 reservationsa */
+	struct se_node_acl	*dev_reserved_node_acl;
+	/* Used for ALUA Logical Unit Group membership */
+	struct t10_alua_lu_gp_member *dev_alua_lu_gp_mem;
+	/* Used for SPC-3 Persistent Reservations */
+	struct t10_pr_registration *dev_pr_res_holder;
+	struct list_head	dev_sep_list;
+	struct list_head	dev_tmr_list;
+	struct timer_list	dev_status_timer;
+	/* Pointer to descriptor for processing thread */
+	struct task_struct	*process_thread;
+	pid_t			process_thread_pid;
+	struct task_struct		*dev_mgmt_thread;
+	struct list_head	delayed_cmd_list;
+	struct list_head	ordered_cmd_list;
+	struct list_head	execute_task_list;
+	struct list_head	state_task_list;
+	/* Pointer to associated SE HBA */
+	struct se_hba		*se_hba;
+	struct se_subsystem_dev *se_sub_dev;
+	/* Pointer to template of function pointers for transport */
+	struct se_subsystem_api *transport;
+	/* Linked list for struct se_hba struct se_device list */
+	struct list_head	dev_list;
+	/* Linked list for struct se_global->g_se_dev_list */
+	struct list_head	g_se_dev_list;
+}  ____cacheline_aligned;
+
+#define SE_DEV(cmd)		((struct se_device *)(cmd)->se_lun->lun_se_dev)
+#define SU_DEV(dev)		((struct se_subsystem_dev *)(dev)->se_sub_dev)
+#define DEV_ATTRIB(dev)		(&(dev)->se_sub_dev->se_dev_attrib)
+#define DEV_T10_WWN(dev)	(&(dev)->se_sub_dev->t10_wwn)
+
+struct se_hba {
+	u16			hba_tpgt;
+	u32			hba_id;
+	/* See hba_flags_table */
+	u32			hba_flags;
+	/* Virtual iSCSI devices attached. */
+	u32			dev_count;
+	u32			hba_index;
+	atomic_t		dev_mib_access_count;
+	atomic_t		load_balance_queue;
+	atomic_t		left_queue_depth;
+	/* Maximum queue depth the HBA can handle. */
+	atomic_t		max_queue_depth;
+	/* Pointer to transport specific host structure. */
+	void			*hba_ptr;
+	/* Linked list for struct se_device */
+	struct list_head	hba_dev_list;
+	struct list_head	hba_list;
+	spinlock_t		device_lock;
+	spinlock_t		hba_queue_lock;
+	struct config_group	hba_group;
+	struct mutex		hba_access_mutex;
+	struct se_subsystem_api *transport;
+}  ____cacheline_aligned;
+
+#define SE_HBA(d)		((struct se_hba *)(d)->se_hba)
+
+struct se_lun {
+	/* See transport_lun_status_table */
+	enum transport_lun_status_table lun_status;
+	u32			lun_access;
+	u32			lun_flags;
+	u32			unpacked_lun;
+	atomic_t		lun_acl_count;
+	spinlock_t		lun_acl_lock;
+	spinlock_t		lun_cmd_lock;
+	spinlock_t		lun_sep_lock;
+	struct completion	lun_shutdown_comp;
+	struct list_head	lun_cmd_list;
+	struct list_head	lun_acl_list;
+	struct se_device	*lun_se_dev;
+	struct config_group	lun_group;
+	struct se_port	*lun_sep;
+} ____cacheline_aligned;
+
+#define SE_LUN(c)		((struct se_lun *)(c)->se_lun)
+
+struct se_port {
+	/* RELATIVE TARGET PORT IDENTIFER */
+	u16		sep_rtpi;
+	int		sep_tg_pt_secondary_stat;
+	int		sep_tg_pt_secondary_write_md;
+	u32		sep_index;
+	struct scsi_port_stats sep_stats;
+	/* Used for ALUA Target Port Groups membership */
+	atomic_t	sep_tg_pt_gp_active;
+	atomic_t	sep_tg_pt_secondary_offline;
+	/* Used for PR ALL_TG_PT=1 */
+	atomic_t	sep_tg_pt_ref_cnt;
+	spinlock_t	sep_alua_lock;
+	struct mutex	sep_tg_pt_md_mutex;
+	struct t10_alua_tg_pt_gp_member *sep_alua_tg_pt_gp_mem;
+	struct se_lun *sep_lun;
+	struct se_portal_group *sep_tpg;
+	struct list_head sep_alua_list;
+	struct list_head sep_list;
+} ____cacheline_aligned;
+
+struct se_tpg_np {
+	struct config_group	tpg_np_group;
+} ____cacheline_aligned;
+
+struct se_portal_group {
+	/* Type of target portal group, see transport_tpg_type_table */
+	enum transport_tpg_type_table se_tpg_type;
+	/* Number of ACLed Initiator Nodes for this TPG */
+	u32			num_node_acls;
+	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
+	atomic_t		tpg_pr_ref_count;
+	/* Spinlock for adding/removing ACLed Nodes */
+	spinlock_t		acl_node_lock;
+	/* Spinlock for adding/removing sessions */
+	spinlock_t		session_lock;
+	spinlock_t		tpg_lun_lock;
+	/* Pointer to $FABRIC_MOD portal group */
+	void			*se_tpg_fabric_ptr;
+	struct list_head	se_tpg_list;
+	/* linked list for initiator ACL list */
+	struct list_head	acl_node_list;
+	struct se_lun		*tpg_lun_list;
+	struct se_lun		tpg_virt_lun0;
+	/* List of TCM sessions assoicated wth this TPG */
+	struct list_head	tpg_sess_list;
+	/* Pointer to $FABRIC_MOD dependent code */
+	struct target_core_fabric_ops *se_tpg_tfo;
+	struct se_wwn		*se_tpg_wwn;
+	struct config_group	tpg_group;
+	struct config_group	*tpg_default_groups[6];
+	struct config_group	tpg_lun_group;
+	struct config_group	tpg_np_group;
+	struct config_group	tpg_acl_group;
+	struct config_group	tpg_attrib_group;
+	struct config_group	tpg_param_group;
+} ____cacheline_aligned;
+
+#define TPG_TFO(se_tpg)	((struct target_core_fabric_ops *)(se_tpg)->se_tpg_tfo)
+
+struct se_wwn {
+	struct target_fabric_configfs *wwn_tf;
+	struct config_group	wwn_group;
+} ____cacheline_aligned;
+
+struct se_global {
+	u16			alua_lu_gps_counter;
+	int			g_sub_api_initialized;
+	u32			in_shutdown;
+	u32			alua_lu_gps_count;
+	u32			g_hba_id_counter;
+	struct config_group	target_core_hbagroup;
+	struct config_group	alua_group;
+	struct config_group	alua_lu_gps_group;
+	struct list_head	g_lu_gps_list;
+	struct list_head	g_se_tpg_list;
+	struct list_head	g_hba_list;
+	struct list_head	g_se_dev_list;
+	struct se_hba		*g_lun0_hba;
+	struct se_subsystem_dev *g_lun0_su_dev;
+	struct se_device	*g_lun0_dev;
+	struct t10_alua_lu_gp	*default_lu_gp;
+	spinlock_t		g_device_lock;
+	spinlock_t		hba_lock;
+	spinlock_t		se_tpg_lock;
+	spinlock_t		lu_gps_lock;
+	spinlock_t		plugin_class_lock;
+} ____cacheline_aligned;
+
+#endif /* TARGET_CORE_BASE_H */
diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h
new file mode 100644
index 0000000..40e6e74
--- /dev/null
+++ b/include/target/target_core_configfs.h
@@ -0,0 +1,52 @@
+#define TARGET_CORE_CONFIGFS_VERSION TARGET_CORE_MOD_VERSION
+
+#define TARGET_CORE_CONFIG_ROOT	"/sys/kernel/config"
+
+#define TARGET_CORE_NAME_MAX_LEN	64
+#define TARGET_FABRIC_NAME_SIZE		32
+
+extern struct target_fabric_configfs *target_fabric_configfs_init(
+				struct module *, const char *);
+extern void target_fabric_configfs_free(struct target_fabric_configfs *);
+extern int target_fabric_configfs_register(struct target_fabric_configfs *);
+extern void target_fabric_configfs_deregister(struct target_fabric_configfs *);
+
+struct target_fabric_configfs_template {
+	struct config_item_type tfc_discovery_cit;
+	struct config_item_type	tfc_wwn_cit;
+	struct config_item_type tfc_tpg_cit;
+	struct config_item_type tfc_tpg_base_cit;
+	struct config_item_type tfc_tpg_lun_cit;
+	struct config_item_type tfc_tpg_port_cit;
+	struct config_item_type tfc_tpg_np_cit;
+	struct config_item_type tfc_tpg_np_base_cit;
+	struct config_item_type tfc_tpg_attrib_cit;
+	struct config_item_type tfc_tpg_param_cit;
+	struct config_item_type tfc_tpg_nacl_cit;
+	struct config_item_type tfc_tpg_nacl_base_cit;
+	struct config_item_type tfc_tpg_nacl_attrib_cit;
+	struct config_item_type tfc_tpg_nacl_auth_cit;
+	struct config_item_type tfc_tpg_nacl_param_cit;
+	struct config_item_type tfc_tpg_mappedlun_cit;
+};
+
+struct target_fabric_configfs {
+	char			tf_name[TARGET_FABRIC_NAME_SIZE];
+	atomic_t		tf_access_cnt;
+	struct list_head	tf_list;
+	struct config_group	tf_group;
+	struct config_group	tf_disc_group;
+	struct config_group	*tf_default_groups[2];
+	/* Pointer to fabric's config_item */
+	struct config_item	*tf_fabric;
+	/* Passed from fabric modules */
+	struct config_item_type	*tf_fabric_cit;
+	/* Pointer to target core subsystem */
+	struct configfs_subsystem *tf_subsys;
+	/* Pointer to fabric's struct module */
+	struct module *tf_module;
+	struct target_core_fabric_ops tf_ops;
+	struct target_fabric_configfs_template tf_cit_tmpl;
+};
+
+#define TF_CIT_TMPL(tf) (&(tf)->tf_cit_tmpl)
diff --git a/include/target/target_core_device.h b/include/target/target_core_device.h
new file mode 100644
index 0000000..52b18a5
--- /dev/null
+++ b/include/target/target_core_device.h
@@ -0,0 +1,61 @@
+#ifndef TARGET_CORE_DEVICE_H
+#define TARGET_CORE_DEVICE_H
+
+extern int transport_get_lun_for_cmd(struct se_cmd *, unsigned char *, u32);
+extern int transport_get_lun_for_tmr(struct se_cmd *, u32);
+extern struct se_dev_entry *core_get_se_deve_from_rtpi(
+					struct se_node_acl *, u16);
+extern int core_free_device_list_for_node(struct se_node_acl *,
+					struct se_portal_group *);
+extern void core_dec_lacl_count(struct se_node_acl *, struct se_cmd *);
+extern void core_update_device_list_access(u32, u32, struct se_node_acl *);
+extern int core_update_device_list_for_node(struct se_lun *, struct se_lun_acl *, u32,
+					u32, struct se_node_acl *,
+					struct se_portal_group *, int);
+extern void core_clear_lun_from_tpg(struct se_lun *, struct se_portal_group *);
+extern int core_dev_export(struct se_device *, struct se_portal_group *,
+					struct se_lun *);
+extern void core_dev_unexport(struct se_device *, struct se_portal_group *,
+					struct se_lun *);
+extern int transport_core_report_lun_response(struct se_cmd *);
+extern void se_release_device_for_hba(struct se_device *);
+extern void se_release_vpd_for_dev(struct se_device *);
+extern void se_clear_dev_ports(struct se_device *);
+extern int se_free_virtual_device(struct se_device *, struct se_hba *);
+extern int se_dev_check_online(struct se_device *);
+extern int se_dev_check_shutdown(struct se_device *);
+extern void se_dev_set_default_attribs(struct se_device *, struct se_dev_limits *);
+extern int se_dev_set_task_timeout(struct se_device *, u32);
+extern int se_dev_set_max_unmap_lba_count(struct se_device *, u32);
+extern int se_dev_set_max_unmap_block_desc_count(struct se_device *, u32);
+extern int se_dev_set_unmap_granularity(struct se_device *, u32);
+extern int se_dev_set_unmap_granularity_alignment(struct se_device *, u32);
+extern int se_dev_set_emulate_dpo(struct se_device *, int);
+extern int se_dev_set_emulate_fua_write(struct se_device *, int);
+extern int se_dev_set_emulate_fua_read(struct se_device *, int);
+extern int se_dev_set_emulate_write_cache(struct se_device *, int);
+extern int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int);
+extern int se_dev_set_emulate_tas(struct se_device *, int);
+extern int se_dev_set_emulate_tpu(struct se_device *, int);
+extern int se_dev_set_emulate_tpws(struct se_device *, int);
+extern int se_dev_set_enforce_pr_isids(struct se_device *, int);
+extern int se_dev_set_queue_depth(struct se_device *, u32);
+extern int se_dev_set_max_sectors(struct se_device *, u32);
+extern int se_dev_set_optimal_sectors(struct se_device *, u32);
+extern int se_dev_set_block_size(struct se_device *, u32);
+extern struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_hba *,
+					struct se_device *, u32);
+extern int core_dev_del_lun(struct se_portal_group *, u32);
+extern struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32);
+extern struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group *,
+							u32, char *, int *);
+extern int core_dev_add_initiator_node_lun_acl(struct se_portal_group *,
+						struct se_lun_acl *, u32, u32);
+extern int core_dev_del_initiator_node_lun_acl(struct se_portal_group *,
+						struct se_lun *, struct se_lun_acl *);
+extern void core_dev_free_initiator_node_lun_acl(struct se_portal_group *,
+						struct se_lun_acl *lacl);
+extern int core_dev_setup_virtual_lun0(void);
+extern void core_dev_release_virtual_lun0(void);
+
+#endif /* TARGET_CORE_DEVICE_H */
diff --git a/include/target/target_core_fabric_configfs.h b/include/target/target_core_fabric_configfs.h
new file mode 100644
index 0000000..a26fb75
--- /dev/null
+++ b/include/target/target_core_fabric_configfs.h
@@ -0,0 +1,106 @@
+/*
+ * Used for tfc_wwn_cit attributes
+ */
+
+#include <target/configfs_macros.h>
+
+CONFIGFS_EATTR_STRUCT(target_fabric_nacl_attrib, se_node_acl);
+#define TF_NACL_ATTRIB_ATTR(_fabric, _name, _mode)			\
+static struct target_fabric_nacl_attrib_attribute _fabric##_nacl_attrib_##_name = \
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_nacl_attrib_show_##_name,				\
+	_fabric##_nacl_attrib_store_##_name);
+
+CONFIGFS_EATTR_STRUCT(target_fabric_nacl_auth, se_node_acl);
+#define TF_NACL_AUTH_ATTR(_fabric, _name, _mode)			\
+static struct target_fabric_nacl_auth_attribute _fabric##_nacl_auth_##_name = \
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_nacl_auth_show_##_name,				\
+	_fabric##_nacl_auth_store_##_name);
+
+#define TF_NACL_AUTH_ATTR_RO(_fabric, _name)				\
+static struct target_fabric_nacl_auth_attribute _fabric##_nacl_auth_##_name = \
+	__CONFIGFS_EATTR_RO(_name,					\
+	_fabric##_nacl_auth_show_##_name);
+
+CONFIGFS_EATTR_STRUCT(target_fabric_nacl_param, se_node_acl);
+#define TF_NACL_PARAM_ATTR(_fabric, _name, _mode)			\
+static struct target_fabric_nacl_param_attribute _fabric##_nacl_param_##_name = \
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_nacl_param_show_##_name,				\
+	_fabric##_nacl_param_store_##_name);
+
+#define TF_NACL_PARAM_ATTR_RO(_fabric, _name)				\
+static struct target_fabric_nacl_param_attribute _fabric##_nacl_param_##_name = \
+	__CONFIGFS_EATTR_RO(_name,					\
+	_fabric##_nacl_param_show_##_name);
+
+
+CONFIGFS_EATTR_STRUCT(target_fabric_nacl_base, se_node_acl);
+#define TF_NACL_BASE_ATTR(_fabric, _name, _mode)			\
+static struct target_fabric_nacl_base_attribute _fabric##_nacl_##_name = \
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_nacl_show_##_name,					\
+	_fabric##_nacl_store_##_name);
+
+#define TF_NACL_BASE_ATTR_RO(_fabric, _name)				\
+static struct target_fabric_nacl_base_attribute _fabric##_nacl_##_name = \
+	__CONFIGFS_EATTR_RO(_name,					\
+	_fabric##_nacl_show_##_name);
+
+CONFIGFS_EATTR_STRUCT(target_fabric_np_base, se_tpg_np);
+#define TF_NP_BASE_ATTR(_fabric, _name, _mode)				\
+static struct target_fabric_np_base_attribute _fabric##_np_##_name =	\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_np_show_##_name,					\
+	_fabric##_np_store_##_name);
+
+CONFIGFS_EATTR_STRUCT(target_fabric_tpg_attrib, se_portal_group);
+#define TF_TPG_ATTRIB_ATTR(_fabric, _name, _mode)			\
+static struct target_fabric_tpg_attrib_attribute _fabric##_tpg_attrib_##_name = \
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_tpg_attrib_show_##_name,				\
+	_fabric##_tpg_attrib_store_##_name);
+
+
+CONFIGFS_EATTR_STRUCT(target_fabric_tpg_param, se_portal_group);
+#define TF_TPG_PARAM_ATTR(_fabric, _name, _mode)			\
+static struct target_fabric_tpg_param_attribute _fabric##_tpg_param_##_name = \
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_tpg_param_show_##_name,				\
+	_fabric##_tpg_param_store_##_name);
+
+
+CONFIGFS_EATTR_STRUCT(target_fabric_tpg, se_portal_group);
+#define TF_TPG_BASE_ATTR(_fabric, _name, _mode)				\
+static struct target_fabric_tpg_attribute _fabric##_tpg_##_name =	\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_tpg_show_##_name,					\
+	_fabric##_tpg_store_##_name);
+
+
+CONFIGFS_EATTR_STRUCT(target_fabric_wwn, target_fabric_configfs);
+#define TF_WWN_ATTR(_fabric, _name, _mode)				\
+static struct target_fabric_wwn_attribute _fabric##_wwn_##_name =	\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_wwn_show_attr_##_name,				\
+	_fabric##_wwn_store_attr_##_name);
+
+#define TF_WWN_ATTR_RO(_fabric, _name)					\
+static struct target_fabric_wwn_attribute _fabric##_wwn_##_name =	\
+	__CONFIGFS_EATTR_RO(_name,					\
+	_fabric##_wwn_show_attr_##_name);
+
+CONFIGFS_EATTR_STRUCT(target_fabric_discovery, target_fabric_configfs);
+#define TF_DISC_ATTR(_fabric, _name, _mode)				\
+static struct target_fabric_discovery_attribute _fabric##_disc_##_name = \
+	__CONFIGFS_EATTR(_name, _mode,					\
+	_fabric##_disc_show_##_name,					\
+	_fabric##_disc_store_##_name);
+
+#define TF_DISC_ATTR_RO(_fabric, _name)					\
+static struct target_fabric_discovery_attribute _fabric##_disc_##_name = \
+	__CONFIGFS_EATTR_RO(_name,					\
+	_fabric##_disc_show_##_name);
+
+extern int target_fabric_setup_cits(struct target_fabric_configfs *);
diff --git a/include/target/target_core_fabric_lib.h b/include/target/target_core_fabric_lib.h
new file mode 100644
index 0000000..c2f8d0e
--- /dev/null
+++ b/include/target/target_core_fabric_lib.h
@@ -0,0 +1,28 @@
+#ifndef TARGET_CORE_FABRIC_LIB_H
+#define TARGET_CORE_FABRIC_LIB_H
+
+extern u8 sas_get_fabric_proto_ident(struct se_portal_group *);
+extern u32 sas_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *,
+			struct t10_pr_registration *, int *, unsigned char *);
+extern u32 sas_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *,
+			struct t10_pr_registration *, int *);
+extern char *sas_parse_pr_out_transport_id(struct se_portal_group *,
+			const char *, u32 *, char **);
+
+extern u8 fc_get_fabric_proto_ident(struct se_portal_group *);
+extern u32 fc_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *,
+			struct t10_pr_registration *, int *, unsigned char *);
+extern u32 fc_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *,
+			struct t10_pr_registration *, int *);
+extern char *fc_parse_pr_out_transport_id(struct se_portal_group *,
+			const char *, u32 *, char **);
+
+extern u8 iscsi_get_fabric_proto_ident(struct se_portal_group *);
+extern u32 iscsi_get_pr_transport_id(struct se_portal_group *, struct se_node_acl *,
+			struct t10_pr_registration *, int *, unsigned char *);
+extern u32 iscsi_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *,
+			struct t10_pr_registration *, int *);
+extern char *iscsi_parse_pr_out_transport_id(struct se_portal_group *,
+			const char *, u32 *, char **);
+
+#endif /* TARGET_CORE_FABRIC_LIB_H */
diff --git a/include/target/target_core_fabric_ops.h b/include/target/target_core_fabric_ops.h
new file mode 100644
index 0000000..f3ac12b
--- /dev/null
+++ b/include/target/target_core_fabric_ops.h
@@ -0,0 +1,100 @@
+/* Defined in target_core_configfs.h */
+struct target_fabric_configfs;
+
+struct target_core_fabric_ops {
+	struct configfs_subsystem *tf_subsys;
+	/*
+	 * Optional to signal struct se_task->task_sg[] padding entries
+	 * for scatterlist chaining using transport_do_task_sg_link(),
+	 * disabled by default
+	 */
+	int task_sg_chaining:1;
+	char *(*get_fabric_name)(void);
+	u8 (*get_fabric_proto_ident)(struct se_portal_group *);
+	char *(*tpg_get_wwn)(struct se_portal_group *);
+	u16 (*tpg_get_tag)(struct se_portal_group *);
+	u32 (*tpg_get_default_depth)(struct se_portal_group *);
+	u32 (*tpg_get_pr_transport_id)(struct se_portal_group *,
+				struct se_node_acl *,
+				struct t10_pr_registration *, int *,
+				unsigned char *);
+	u32 (*tpg_get_pr_transport_id_len)(struct se_portal_group *,
+				struct se_node_acl *,
+				struct t10_pr_registration *, int *);
+	char *(*tpg_parse_pr_out_transport_id)(struct se_portal_group *,
+				const char *, u32 *, char **);
+	int (*tpg_check_demo_mode)(struct se_portal_group *);
+	int (*tpg_check_demo_mode_cache)(struct se_portal_group *);
+	int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *);
+	int (*tpg_check_prod_mode_write_protect)(struct se_portal_group *);
+	struct se_node_acl *(*tpg_alloc_fabric_acl)(
+					struct se_portal_group *);
+	void (*tpg_release_fabric_acl)(struct se_portal_group *,
+					struct se_node_acl *);
+	u32 (*tpg_get_inst_index)(struct se_portal_group *);
+	/*
+	 * Optional function pointer for TCM to perform command map
+	 * from TCM processing thread context, for those struct se_cmd
+	 * initally allocated in interrupt context.
+	 */
+	int (*new_cmd_map)(struct se_cmd *);
+	/*
+	 * Optional function pointer for TCM fabric modules that use
+	 * Linux/NET sockets to allocate struct iovec array to struct se_cmd
+	 */
+	int (*alloc_cmd_iovecs)(struct se_cmd *);
+	/*
+	 * Optional to release struct se_cmd and fabric dependent allocated
+	 * I/O descriptor in transport_cmd_check_stop()
+	 */
+	void (*check_stop_free)(struct se_cmd *);
+	void (*release_cmd_to_pool)(struct se_cmd *);
+	void (*release_cmd_direct)(struct se_cmd *);
+	/*
+	 * Called with spin_lock_bh(struct se_portal_group->session_lock held.
+	 */
+	int (*shutdown_session)(struct se_session *);
+	void (*close_session)(struct se_session *);
+	void (*stop_session)(struct se_session *, int, int);
+	void (*fall_back_to_erl0)(struct se_session *);
+	int (*sess_logged_in)(struct se_session *);
+	u32 (*sess_get_index)(struct se_session *);
+	/*
+	 * Used only for SCSI fabrics that contain multi-value TransportIDs
+	 * (like iSCSI).  All other SCSI fabrics should set this to NULL.
+	 */
+	u32 (*sess_get_initiator_sid)(struct se_session *,
+				      unsigned char *, u32);
+	int (*write_pending)(struct se_cmd *);
+	int (*write_pending_status)(struct se_cmd *);
+	void (*set_default_node_attributes)(struct se_node_acl *);
+	u32 (*get_task_tag)(struct se_cmd *);
+	int (*get_cmd_state)(struct se_cmd *);
+	void (*new_cmd_failure)(struct se_cmd *);
+	int (*queue_data_in)(struct se_cmd *);
+	int (*queue_status)(struct se_cmd *);
+	int (*queue_tm_rsp)(struct se_cmd *);
+	u16 (*set_fabric_sense_len)(struct se_cmd *, u32);
+	u16 (*get_fabric_sense_len)(void);
+	int (*is_state_remove)(struct se_cmd *);
+	u64 (*pack_lun)(unsigned int);
+	/*
+	 * fabric module calls for target_core_fabric_configfs.c
+	 */
+	struct se_wwn *(*fabric_make_wwn)(struct target_fabric_configfs *,
+				struct config_group *, const char *);
+	void (*fabric_drop_wwn)(struct se_wwn *);
+	struct se_portal_group *(*fabric_make_tpg)(struct se_wwn *,
+				struct config_group *, const char *);
+	void (*fabric_drop_tpg)(struct se_portal_group *);
+	int (*fabric_post_link)(struct se_portal_group *,
+				struct se_lun *);
+	void (*fabric_pre_unlink)(struct se_portal_group *,
+				struct se_lun *);
+	struct se_tpg_np *(*fabric_make_np)(struct se_portal_group *,
+				struct config_group *, const char *);
+	void (*fabric_drop_np)(struct se_tpg_np *);
+	struct se_node_acl *(*fabric_make_nodeacl)(struct se_portal_group *,
+				struct config_group *, const char *);
+	void (*fabric_drop_nodeacl)(struct se_node_acl *);
+};
diff --git a/include/target/target_core_tmr.h b/include/target/target_core_tmr.h
new file mode 100644
index 0000000..6c8248b
--- /dev/null
+++ b/include/target/target_core_tmr.h
@@ -0,0 +1,43 @@
+#ifndef TARGET_CORE_TMR_H
+#define TARGET_CORE_TMR_H
+
+/* task management function values */
+#ifdef ABORT_TASK
+#undef ABORT_TASK
+#endif /* ABORT_TASK */
+#define ABORT_TASK				1
+#ifdef ABORT_TASK_SET
+#undef ABORT_TASK_SET
+#endif /* ABORT_TASK_SET */
+#define ABORT_TASK_SET				2
+#ifdef CLEAR_ACA
+#undef CLEAR_ACA
+#endif /* CLEAR_ACA */
+#define CLEAR_ACA				3
+#ifdef CLEAR_TASK_SET
+#undef CLEAR_TASK_SET
+#endif /* CLEAR_TASK_SET */
+#define CLEAR_TASK_SET				4
+#define LUN_RESET				5
+#define TARGET_WARM_RESET			6
+#define TARGET_COLD_RESET			7
+#define TASK_REASSIGN				8
+
+/* task management response values */
+#define TMR_FUNCTION_COMPLETE			0
+#define TMR_TASK_DOES_NOT_EXIST			1
+#define TMR_LUN_DOES_NOT_EXIST			2
+#define TMR_TASK_STILL_ALLEGIANT		3
+#define TMR_TASK_FAILOVER_NOT_SUPPORTED		4
+#define TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED	5
+#define TMR_FUNCTION_AUTHORIZATION_FAILED	6
+#define TMR_FUNCTION_REJECTED			255
+
+extern struct kmem_cache *se_tmr_req_cache;
+
+extern struct se_tmr_req *core_tmr_alloc_req(struct se_cmd *, void *, u8);
+extern void core_tmr_release_req(struct se_tmr_req *);
+extern int core_tmr_lun_reset(struct se_device *, struct se_tmr_req *,
+				struct list_head *, struct se_cmd *);
+
+#endif /* TARGET_CORE_TMR_H */
diff --git a/include/target/target_core_tpg.h b/include/target/target_core_tpg.h
new file mode 100644
index 0000000..77e1872
--- /dev/null
+++ b/include/target/target_core_tpg.h
@@ -0,0 +1,35 @@
+#ifndef TARGET_CORE_TPG_H
+#define TARGET_CORE_TPG_H
+
+extern struct se_node_acl *__core_tpg_get_initiator_node_acl(struct se_portal_group *tpg,
+						const char *);
+extern struct se_node_acl *core_tpg_get_initiator_node_acl(struct se_portal_group *tpg,
+						unsigned char *);
+extern void core_tpg_add_node_to_devs(struct se_node_acl *,
+						struct se_portal_group *);
+extern struct se_node_acl *core_tpg_check_initiator_node_acl(
+						struct se_portal_group *,
+						unsigned char *);
+extern void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *);
+extern void core_tpg_wait_for_mib_ref(struct se_node_acl *);
+extern void core_tpg_clear_object_luns(struct se_portal_group *);
+extern struct se_node_acl *core_tpg_add_initiator_node_acl(
+					struct se_portal_group *,
+					struct se_node_acl *,
+					const char *, u32);
+extern int core_tpg_del_initiator_node_acl(struct se_portal_group *,
+						struct se_node_acl *, int);
+extern int core_tpg_set_initiator_node_queue_depth(struct se_portal_group *,
+						unsigned char *, u32, int);
+extern int core_tpg_register(struct target_core_fabric_ops *,
+					struct se_wwn *,
+					struct se_portal_group *, void *,
+					int);
+extern int core_tpg_deregister(struct se_portal_group *);
+extern struct se_lun *core_tpg_pre_addlun(struct se_portal_group *, u32);
+extern int core_tpg_post_addlun(struct se_portal_group *, struct se_lun *, u32,
+				void *);
+extern struct se_lun *core_tpg_pre_dellun(struct se_portal_group *, u32, int *);
+extern int core_tpg_post_dellun(struct se_portal_group *, struct se_lun *);
+
+#endif /* TARGET_CORE_TPG_H */
diff --git a/include/target/target_core_transport.h b/include/target/target_core_transport.h
new file mode 100644
index 0000000..66f44e5
--- /dev/null
+++ b/include/target/target_core_transport.h
@@ -0,0 +1,351 @@
+#ifndef TARGET_CORE_TRANSPORT_H
+#define TARGET_CORE_TRANSPORT_H
+
+#define TARGET_CORE_VERSION			TARGET_CORE_MOD_VERSION
+
+/* Attempts before moving from SHORT to LONG */
+#define PYX_TRANSPORT_WINDOW_CLOSED_THRESHOLD	3
+#define PYX_TRANSPORT_WINDOW_CLOSED_WAIT_SHORT	3  /* In milliseconds */
+#define PYX_TRANSPORT_WINDOW_CLOSED_WAIT_LONG	10 /* In milliseconds */
+
+#define PYX_TRANSPORT_STATUS_INTERVAL		5 /* In seconds */
+
+#define PYX_TRANSPORT_SENT_TO_TRANSPORT		0
+#define PYX_TRANSPORT_WRITE_PENDING		1
+
+#define PYX_TRANSPORT_UNKNOWN_SAM_OPCODE	-1
+#define PYX_TRANSPORT_HBA_QUEUE_FULL		-2
+#define PYX_TRANSPORT_REQ_TOO_MANY_SECTORS	-3
+#define PYX_TRANSPORT_OUT_OF_MEMORY_RESOURCES	-4
+#define PYX_TRANSPORT_INVALID_CDB_FIELD		-5
+#define PYX_TRANSPORT_INVALID_PARAMETER_LIST	-6
+#define PYX_TRANSPORT_LU_COMM_FAILURE		-7
+#define PYX_TRANSPORT_UNKNOWN_MODE_PAGE		-8
+#define PYX_TRANSPORT_WRITE_PROTECTED		-9
+#define PYX_TRANSPORT_TASK_TIMEOUT		-10
+#define PYX_TRANSPORT_RESERVATION_CONFLICT	-11
+#define PYX_TRANSPORT_ILLEGAL_REQUEST		-12
+#define PYX_TRANSPORT_USE_SENSE_REASON		-13
+
+#ifndef SAM_STAT_RESERVATION_CONFLICT
+#define SAM_STAT_RESERVATION_CONFLICT		0x18
+#endif
+
+#define TRANSPORT_PLUGIN_FREE			0
+#define TRANSPORT_PLUGIN_REGISTERED		1
+
+#define TRANSPORT_PLUGIN_PHBA_PDEV		1
+#define TRANSPORT_PLUGIN_VHBA_PDEV		2
+#define TRANSPORT_PLUGIN_VHBA_VDEV		3
+
+/* For SE OBJ Plugins, in seconds */
+#define TRANSPORT_TIMEOUT_TUR			10
+#define TRANSPORT_TIMEOUT_TYPE_DISK		60
+#define TRANSPORT_TIMEOUT_TYPE_ROM		120
+#define TRANSPORT_TIMEOUT_TYPE_TAPE		600
+#define TRANSPORT_TIMEOUT_TYPE_OTHER		300
+
+/* For se_task->task_state_flags */
+#define TSF_EXCEPTION_CLEARED			0x01
+
+/*
+ * struct se_subsystem_dev->su_dev_flags
+*/
+#define SDF_FIRMWARE_VPD_UNIT_SERIAL		0x00000001
+#define SDF_EMULATED_VPD_UNIT_SERIAL		0x00000002
+#define SDF_USING_UDEV_PATH			0x00000004
+#define SDF_USING_ALIAS				0x00000008
+
+/*
+ * struct se_device->dev_flags
+ */
+#define DF_READ_ONLY				0x00000001
+#define DF_SPC2_RESERVATIONS			0x00000002
+#define DF_SPC2_RESERVATIONS_WITH_ISID		0x00000004
+
+/* struct se_dev_attrib sanity values */
+/* 10 Minutes */
+#define DA_TASK_TIMEOUT_MAX			600
+/* Default max_unmap_lba_count */
+#define DA_MAX_UNMAP_LBA_COUNT			0
+/* Default max_unmap_block_desc_count */
+#define DA_MAX_UNMAP_BLOCK_DESC_COUNT		0
+/* Default unmap_granularity */
+#define DA_UNMAP_GRANULARITY_DEFAULT		0
+/* Default unmap_granularity_alignment */
+#define DA_UNMAP_GRANULARITY_ALIGNMENT_DEFAULT	0
+/* Emulation for Direct Page Out */
+#define DA_EMULATE_DPO				0
+/* Emulation for Forced Unit Access WRITEs */
+#define DA_EMULATE_FUA_WRITE			1
+/* Emulation for Forced Unit Access READs */
+#define DA_EMULATE_FUA_READ			0
+/* Emulation for WriteCache and SYNCHRONIZE_CACHE */
+#define DA_EMULATE_WRITE_CACHE			0
+/* Emulation for UNIT ATTENTION Interlock Control */
+#define DA_EMULATE_UA_INTLLCK_CTRL		0
+/* Emulation for TASK_ABORTED status (TAS) by default */
+#define DA_EMULATE_TAS				1
+/* Emulation for Thin Provisioning UNMAP using block/blk-lib.c:blkdev_issue_discard() */
+#define DA_EMULATE_TPU				0
+/*
+ * Emulation for Thin Provisioning WRITE_SAME w/ UNMAP=1 bit using
+ * block/blk-lib.c:blkdev_issue_discard()
+ */
+#define DA_EMULATE_TPWS				0
+/* No Emulation for PSCSI by default */
+#define DA_EMULATE_RESERVATIONS			0
+/* No Emulation for PSCSI by default */
+#define DA_EMULATE_ALUA				0
+/* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */
+#define DA_ENFORCE_PR_ISIDS			1
+#define DA_STATUS_MAX_SECTORS_MIN		16
+#define DA_STATUS_MAX_SECTORS_MAX		8192
+
+#define SE_MODE_PAGE_BUF			512
+
+#define MOD_MAX_SECTORS(ms, bs)			(ms % (PAGE_SIZE / bs))
+
+struct se_mem;
+struct se_subsystem_api;
+
+extern int init_se_global(void);
+extern void release_se_global(void);
+extern void transport_init_queue_obj(struct se_queue_obj *);
+extern int transport_subsystem_check_init(void);
+extern int transport_subsystem_register(struct se_subsystem_api *);
+extern void transport_subsystem_release(struct se_subsystem_api *);
+extern void transport_load_plugins(void);
+extern struct se_session *transport_init_session(void);
+extern void __transport_register_session(struct se_portal_group *,
+					struct se_node_acl *,
+					struct se_session *, void *);
+extern void transport_register_session(struct se_portal_group *,
+					struct se_node_acl *,
+					struct se_session *, void *);
+extern void transport_free_session(struct se_session *);
+extern void transport_deregister_session_configfs(struct se_session *);
+extern void transport_deregister_session(struct se_session *);
+extern void transport_cmd_finish_abort(struct se_cmd *, int);
+extern void transport_cmd_finish_abort_tmr(struct se_cmd *);
+extern void transport_complete_sync_cache(struct se_cmd *, int);
+extern void transport_complete_task(struct se_task *, int);
+extern void transport_add_task_to_execute_queue(struct se_task *,
+						struct se_task *,
+						struct se_device *);
+unsigned char *transport_dump_cmd_direction(struct se_cmd *);
+extern void transport_dump_dev_state(struct se_device *, char *, int *);
+extern void transport_dump_dev_info(struct se_device *, struct se_lun *,
+					unsigned long long, char *, int *);
+extern void transport_dump_vpd_proto_id(struct t10_vpd *,
+					unsigned char *, int);
+extern void transport_set_vpd_proto_id(struct t10_vpd *, unsigned char *);
+extern int transport_dump_vpd_assoc(struct t10_vpd *,
+					unsigned char *, int);
+extern int transport_set_vpd_assoc(struct t10_vpd *, unsigned char *);
+extern int transport_dump_vpd_ident_type(struct t10_vpd *,
+					unsigned char *, int);
+extern int transport_set_vpd_ident_type(struct t10_vpd *, unsigned char *);
+extern int transport_dump_vpd_ident(struct t10_vpd *,
+					unsigned char *, int);
+extern int transport_set_vpd_ident(struct t10_vpd *, unsigned char *);
+extern struct se_device *transport_add_device_to_core_hba(struct se_hba *,
+					struct se_subsystem_api *,
+					struct se_subsystem_dev *, u32,
+					void *, struct se_dev_limits *,
+					const char *, const char *);
+extern void transport_device_setup_cmd(struct se_cmd *);
+extern void transport_init_se_cmd(struct se_cmd *,
+					struct target_core_fabric_ops *,
+					struct se_session *, u32, int, int,
+					unsigned char *);
+extern void transport_free_se_cmd(struct se_cmd *);
+extern int transport_generic_allocate_tasks(struct se_cmd *, unsigned char *);
+extern int transport_generic_handle_cdb(struct se_cmd *);
+extern int transport_generic_handle_cdb_map(struct se_cmd *);
+extern int transport_generic_handle_data(struct se_cmd *);
+extern void transport_new_cmd_failure(struct se_cmd *);
+extern int transport_generic_handle_tmr(struct se_cmd *);
+extern void __transport_stop_task_timer(struct se_task *, unsigned long *);
+extern unsigned char transport_asciihex_to_binaryhex(unsigned char val[2]);
+extern int transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *, u32,
+				struct scatterlist *, u32);
+extern int transport_clear_lun_from_sessions(struct se_lun *);
+extern int transport_check_aborted_status(struct se_cmd *, int);
+extern int transport_send_check_condition_and_sense(struct se_cmd *, u8, int);
+extern void transport_send_task_abort(struct se_cmd *);
+extern void transport_release_cmd_to_pool(struct se_cmd *);
+extern void transport_generic_free_cmd(struct se_cmd *, int, int, int);
+extern void transport_generic_wait_for_cmds(struct se_cmd *, int);
+extern u32 transport_calc_sg_num(struct se_task *, struct se_mem *, u32);
+extern int transport_map_mem_to_sg(struct se_task *, struct list_head *,
+					void *, struct se_mem *,
+					struct se_mem **, u32 *, u32 *);
+extern void transport_do_task_sg_chain(struct se_cmd *);
+extern void transport_generic_process_write(struct se_cmd *);
+extern int transport_generic_do_tmr(struct se_cmd *);
+/* From target_core_alua.c */
+extern int core_alua_check_nonop_delay(struct se_cmd *);
+
+/*
+ * Each se_transport_task_t can have N number of possible struct se_task's
+ * for the storage transport(s) to possibly execute.
+ * Used primarily for splitting up CDBs that exceed the physical storage
+ * HBA's maximum sector count per task.
+ */
+struct se_mem {
+	struct page	*se_page;
+	u32		se_len;
+	u32		se_off;
+	struct list_head se_list;
+} ____cacheline_aligned;
+
+/*
+ * 	Each type of disk transport supported MUST have a template defined
+ *	within its .h file.
+ */
+struct se_subsystem_api {
+	/*
+	 * The Name. :-)
+	 */
+	char name[16];
+	/*
+	 * Transport Type.
+	 */
+	u8 transport_type;
+	/*
+	 * struct module for struct se_hba references
+	 */
+	struct module *owner;
+	/*
+	 * Used for global se_subsystem_api list_head
+	 */
+	struct list_head sub_api_list;
+	/*
+	 * For SCF_SCSI_NON_DATA_CDB
+	 */
+	int (*cdb_none)(struct se_task *);
+	/*
+	 * For SCF_SCSI_CONTROL_NONSG_IO_CDB
+	 */
+	int (*map_task_non_SG)(struct se_task *);
+	/*
+	 * For SCF_SCSI_DATA_SG_IO_CDB and SCF_SCSI_CONTROL_SG_IO_CDB
+	 */
+	int (*map_task_SG)(struct se_task *);
+	/*
+	 * attach_hba():
+	 */
+	int (*attach_hba)(struct se_hba *, u32);
+	/*
+	 * detach_hba():
+	 */
+	void (*detach_hba)(struct se_hba *);
+	/*
+	 * pmode_hba(): Used for TCM/pSCSI subsystem plugin HBA ->
+	 *		Linux/SCSI struct Scsi_Host passthrough
+	*/
+	int (*pmode_enable_hba)(struct se_hba *, unsigned long);
+	/*
+	 * allocate_virtdevice():
+	 */
+	void *(*allocate_virtdevice)(struct se_hba *, const char *);
+	/*
+	 * create_virtdevice(): Only for Virtual HBAs
+	 */
+	struct se_device *(*create_virtdevice)(struct se_hba *,
+				struct se_subsystem_dev *, void *);
+	/*
+	 * free_device():
+	 */
+	void (*free_device)(void *);
+
+	/*
+	 * dpo_emulated():
+	 */
+	int (*dpo_emulated)(struct se_device *);
+	/*
+	 * fua_write_emulated():
+	 */
+	int (*fua_write_emulated)(struct se_device *);
+	/*
+	 * fua_read_emulated():
+	 */
+	int (*fua_read_emulated)(struct se_device *);
+	/*
+	 * write_cache_emulated():
+	 */
+	int (*write_cache_emulated)(struct se_device *);
+	/*
+	 * transport_complete():
+	 *
+	 * Use transport_generic_complete() for majority of DAS transport
+	 * drivers.  Provided out of convenience.
+	 */
+	int (*transport_complete)(struct se_task *task);
+	struct se_task *(*alloc_task)(struct se_cmd *);
+	/*
+	 * do_task():
+	 */
+	int (*do_task)(struct se_task *);
+	/*
+	 * Used by virtual subsystem plugins IBLOCK and FILEIO to emulate
+	 * UNMAP and WRITE_SAME_* w/ UNMAP=1 <-> Linux/Block Discard
+	 */
+	int (*do_discard)(struct se_device *, sector_t, u32);
+	/*
+	 * Used  by virtual subsystem plugins IBLOCK and FILEIO to emulate
+	 * SYNCHRONIZE_CACHE_* <-> Linux/Block blkdev_issue_flush()
+	 */
+	void (*do_sync_cache)(struct se_task *);
+	/*
+	 * free_task():
+	 */
+	void (*free_task)(struct se_task *);
+	/*
+	 * check_configfs_dev_params():
+	 */
+	ssize_t (*check_configfs_dev_params)(struct se_hba *, struct se_subsystem_dev *);
+	/*
+	 * set_configfs_dev_params():
+	 */
+	ssize_t (*set_configfs_dev_params)(struct se_hba *, struct se_subsystem_dev *,
+						const char *, ssize_t);
+	/*
+	 * show_configfs_dev_params():
+	 */
+	ssize_t (*show_configfs_dev_params)(struct se_hba *, struct se_subsystem_dev *,
+						char *);
+	/*
+	 * get_cdb():
+	 */
+	unsigned char *(*get_cdb)(struct se_task *);
+	/*
+	 * get_device_rev():
+	 */
+	u32 (*get_device_rev)(struct se_device *);
+	/*
+	 * get_device_type():
+	 */
+	u32 (*get_device_type)(struct se_device *);
+	/*
+	 * Get the sector_t from a subsystem backstore..
+	 */
+	sector_t (*get_blocks)(struct se_device *);
+	/*
+	 * do_se_mem_map():
+	 */
+	int (*do_se_mem_map)(struct se_task *, struct list_head *, void *,
+				struct se_mem *, struct se_mem **, u32 *, u32 *);
+	/*
+	 * get_sense_buffer():
+	 */
+	unsigned char *(*get_sense_buffer)(struct se_task *);
+} ____cacheline_aligned;
+
+#define TRANSPORT(dev)		((dev)->transport)
+#define HBA_TRANSPORT(hba)	((hba)->transport)
+
+extern struct se_global *se_global;
+
+#endif /* TARGET_CORE_TRANSPORT_H */
-- 
cgit v1.1


From c94fbe1d9e1e9b1a1f82eb0b53b1cf53bcf9712b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 14 Jan 2011 11:25:58 -0500
Subject: tracing: Only process module tracepoints once

The commit:

 9f987b3141f086de27832514aad9f50a53f754
 tracing: Include module.h in define_trace.h

only solved half the problem. If the trace/events/module.h header is
included at the time of define_trace.h (or in ftrace.h within it),
the module.h TRACE_SYSTEM will override the current TRACE_SYSTEM
macro.

Since define_trace.h is included when CREATE_TRACE_POINTS is set,
and the first thing it does is to #undef CREATE_TRACE_POINTS,
by placing the module.h TRACE_SYSTEM inside a
 #ifdef CREATE_TRACE_POINTS
we can prevent it from overriding the TRACE_SYSTEM that is
being processed, and still process the module.h tracepoints
when the module code defines CREATE_TRACE_POINTS and includes
the trace/events/module.h header.

As with commit 9f987b3141, this is only an issue if module.h
is not included before the trace/events/<event>.h file is
included, which (luckily) has not happened yet.

Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/module.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index c7bb2f0..c6bae36 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -1,5 +1,15 @@
+/*
+ * Because linux/module.h has tracepoints in the header, and ftrace.h
+ * eventually includes this file, define_trace.h includes linux/module.h
+ * But we do not want the module.h to override the TRACE_SYSTEM macro
+ * variable that define_trace.h is processing, so we only set it
+ * when module events are being processed, which would happen when
+ * CREATE_TRACE_POINTS is defined.
+ */
+#ifdef CREATE_TRACE_POINTS
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM module
+#endif
 
 #if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_MODULE_H
-- 
cgit v1.1


From 415e12b2379239973feab91850b0dce985c6058a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 7 Jan 2011 00:55:09 +0100
Subject: PCI/ACPI: Request _OSC control once for each root bridge (v3)

Move the evaluation of acpi_pci_osc_control_set() (to request control of
PCI Express native features) into acpi_pci_root_add() to avoid calling
it many times for the same root complex with the same arguments.
Additionally, check if all of the requisite _OSC support bits are set
before calling acpi_pci_osc_control_set() for a given root complex.

References: https://bugzilla.kernel.org/show_bug.cgi?id=20232
Reported-by: Ozan Caglayan <ozan@pardus.org.tr>
Tested-by: Ozan Caglayan <ozan@pardus.org.tr>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/acpi/apei.h      |  6 ++++++
 include/linux/pci-acpi.h |  6 ++++++
 include/linux/pci.h      | 11 +++++++++++
 3 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index b336502..c4dbb13 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -19,6 +19,12 @@
 extern int hest_disable;
 extern int erst_disable;
 
+#ifdef CONFIG_ACPI_APEI
+void __init acpi_hest_init(void);
+#else
+static inline void acpi_hest_init(void) { return; }
+#endif
+
 typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
 int apei_hest_parse(apei_hest_func_t func, void *data);
 
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index c8b6473..479d9bb 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -40,4 +40,10 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 { return NULL; }
 #endif
 
+#ifdef CONFIG_ACPI_APEI
+extern bool aer_acpi_firmware_first(void);
+#else
+static inline bool aer_acpi_firmware_first(void) { return false; }
+#endif
+
 #endif	/* _PCI_ACPI_H_ */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 63cbadc..12dd86a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -994,6 +994,9 @@ extern void pci_restore_msi_state(struct pci_dev *dev);
 extern int pci_msi_enabled(void);
 #endif
 
+extern bool pcie_ports_disabled;
+extern bool pcie_ports_auto;
+
 #ifndef CONFIG_PCIEASPM
 static inline int pcie_aspm_enabled(void)
 {
@@ -1003,6 +1006,14 @@ static inline int pcie_aspm_enabled(void)
 extern int pcie_aspm_enabled(void);
 #endif
 
+#ifdef CONFIG_PCIEAER
+void pci_no_aer(void);
+bool pci_aer_available(void);
+#else
+static inline void pci_no_aer(void) { }
+static inline bool pci_aer_available(void) { return false; }
+#endif
+
 #ifndef CONFIG_PCIE_ECRC
 static inline void pcie_set_ecrc_checking(struct pci_dev *dev)
 {
-- 
cgit v1.1


From b6e335aeeb114dccb07eaa09e8b62ff9510cf745 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 29 Dec 2010 13:21:23 +0100
Subject: PCI/PM: Use pm_wakeup_event() directly for reporting wakeup events

After recent changes related to wakeup events pm_wakeup_event()
automatically checks if the given device is configured to signal wakeup,
so pci_wakeup_event() may be a static inline function calling
pm_wakeup_event() directly.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 12dd86a..9e67dcd 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -820,7 +820,6 @@ int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 bool pci_dev_run_wake(struct pci_dev *dev);
 bool pci_check_pme_status(struct pci_dev *dev);
-void pci_wakeup_event(struct pci_dev *dev);
 void pci_pme_wakeup_bus(struct pci_bus *bus);
 
 static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
-- 
cgit v1.1


From 49731baa41df404c2c3f44555869ab387363af43 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Jan 2011 18:43:57 +0100
Subject: block: restore multiple bd_link_disk_holder() support

Commit e09b457b (block: simplify holder symlink handling) incorrectly
assumed that there is only one link at maximum.  dm may use multiple
links and expects block layer to track reference count for each link,
which is different from and unrelated to the exclusive device holder
identified by @holder when the device is opened.

Remove the single holder assumption and automatic removal of the link
and revive the per-link reference count tracking.  The code
essentially behaves the same as before commit e09b457b sans the
unnecessary kobject reference count dancing.

While at it, note that this facility should not be used by anyone else
than the current ones.  Sysfs symlinks shouldn't be abused like this
and the whole thing doesn't belong in the block layer at all.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Milan Broz <mbroz@redhat.com>
Cc: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Neil Brown <neilb@suse.de>
Cc: linux-raid@vger.kernel.org
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/fs.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3984f23..fb21903 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -666,7 +666,7 @@ struct block_device {
 	int			bd_holders;
 	bool			bd_write_holder;
 #ifdef CONFIG_SYSFS
-	struct gendisk *	bd_holder_disk;	/* for sysfs slave linkng */
+	struct list_head	bd_holder_disks;
 #endif
 	struct block_device *	bd_contains;
 	unsigned		bd_block_size;
@@ -2058,12 +2058,18 @@ extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
 extern int blkdev_put(struct block_device *bdev, fmode_t mode);
 #ifdef CONFIG_SYSFS
 extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
+extern void bd_unlink_disk_holder(struct block_device *bdev,
+				  struct gendisk *disk);
 #else
 static inline int bd_link_disk_holder(struct block_device *bdev,
 				      struct gendisk *disk)
 {
 	return 0;
 }
+static inline void bd_unlink_disk_holder(struct block_device *bdev,
+					 struct gendisk *disk)
+{
+}
 #endif
 #endif
 
-- 
cgit v1.1


From 3e8b3b90fecedcf20d895c4e6ad01a379fe252bf Mon Sep 17 00:00:00 2001
From: Hanno Boeck <hanno@hboeck.de>
Date: Fri, 14 Jan 2011 19:14:47 +0100
Subject: ALSA: constify functions in ac97

Signed-off-by: Hanno Boeck <hanno@hboeck.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/ac97_codec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 4940045..b602f47 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -477,7 +477,7 @@ struct snd_ac97_template {
 
 struct snd_ac97 {
 	/* -- lowlevel (hardware) driver specific -- */
-	struct snd_ac97_build_ops * build_ops;
+	const struct snd_ac97_build_ops *build_ops;
 	void *private_data;
 	void (*private_free) (struct snd_ac97 *ac97);
 	/* --- */
-- 
cgit v1.1


From ab0724ffee24409a7f81afb539b2ac29090bff3e Mon Sep 17 00:00:00 2001
From: Markus Trippelsdorf <markus@trippelsdorf.de>
Date: Sat, 15 Jan 2011 00:07:22 +0100
Subject: PCI / ACPI: Fix build issue in pci_root.c for !CONFIG_PCIEPORTBUS

The compilation of drivers/acpi/pci_root.c fails if
CONFIG_PCIEPORTBUS is unset.  Fix the problem.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e67dcd..559d028 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -993,8 +993,13 @@ extern void pci_restore_msi_state(struct pci_dev *dev);
 extern int pci_msi_enabled(void);
 #endif
 
+#ifdef CONFIG_PCIEPORTBUS
 extern bool pcie_ports_disabled;
 extern bool pcie_ports_auto;
+#else
+#define pcie_ports_disabled	true
+#define pcie_ports_auto		false
+#endif
 
 #ifndef CONFIG_PCIEASPM
 static inline int pcie_aspm_enabled(void)
-- 
cgit v1.1


From 0029227f1bc30b6c809ae751f9e7af6cef900997 Mon Sep 17 00:00:00 2001
From: Andiry Xu <andiry.xu@amd.com>
Date: Mon, 27 Dec 2010 17:39:02 +0800
Subject: xHCI: synchronize irq in xhci_suspend()

Synchronize the interrupts instead of free them in xhci_suspend(). This will
prevent a double free when the host is suspended and then the card removed.

Set the flag hcd->msix_enabled when using MSI-X, and check the flag in
suspend_common(). MSI-X synchronization will be handled by xhci_suspend(),
and MSI/INTx will be synchronized in suspend_common().

This patch should be queued for the 2.6.37 stable tree.

Reported-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andiry Xu <andiry.xu@amd.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@kernel.org
---
 include/linux/usb/hcd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index dd6ee49..a854fe8 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -112,6 +112,7 @@ struct usb_hcd {
 	/* Flags that get set only during HCD registration or removal. */
 	unsigned		rh_registered:1;/* is root hub registered? */
 	unsigned		rh_pollable:1;	/* may we poll the root hub? */
+	unsigned		msix_enabled:1;	/* driver has MSI-X enabled? */
 
 	/* The next flag is a stopgap, to be removed when all the HCDs
 	 * support the new root-hub polling mechanism. */
-- 
cgit v1.1


From 3632ef8909118db9584e1bed9538dc180adb32f8 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sat, 15 Jan 2011 09:27:00 +1000
Subject: Revert "drm: Update fbdev fb_fix_screeninfo"

This reverts commit dfe63bb0ad9810db13aab0058caba97866e0a681.

This commit was causing nouveau not to work properly, for -rc1 I'd
prefer it worked and we can look if this is useful for 2.6.39.

Cc: James Simmons <jsimmons@infradead.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/drm/drm_fb_helper.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index aac27bd..f22e7fe 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -121,6 +121,9 @@ int drm_fb_helper_setcolreg(unsigned regno,
 void drm_fb_helper_restore(void);
 void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper,
 			    uint32_t fb_width, uint32_t fb_height);
+void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
+			    uint32_t depth);
+
 int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info);
 
 bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper);
-- 
cgit v1.1


From 98d530fe246b65fbd3cdeeeca319a80c46cb4793 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sat, 1 Jan 2011 23:00:23 +0000
Subject: Fix dmaengine_submit() return type

desc->tx_submit's return type is dma_cookie_t, not int.  Therefore,
dmaengine_submit() should match this return type as it's just
wrapping this detail.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 9d8688b..830935b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -532,7 +532,7 @@ static inline int dmaengine_resume(struct dma_chan *chan)
 	return dmaengine_device_control(chan, DMA_RESUME, 0);
 }
 
-static inline int dmaengine_submit(struct dma_async_tx_descriptor *desc)
+static inline dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)
 {
 	return desc->tx_submit(desc);
 }
-- 
cgit v1.1


From 96a608a4bfd8468c21881b3f92024923886eb015 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 14 Jan 2011 17:51:11 -0800
Subject: ARM: PL08x: fix a warning

drivers/dma/amba-pl08x.c: In function 'pl08x_start_txd':
drivers/dma/amba-pl08x.c:205: warning: dereferencing 'void *' pointer

We never dereference llis_va aside from assigning it to a struct
pl08x_lli pointer or calculating the address of array element 0.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 933b4ed..5b87b6a 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -118,7 +118,7 @@ struct pl08x_txd {
 	dma_addr_t dst_addr;
 	size_t len;
 	dma_addr_t llis_bus;
-	void *llis_va;
+	struct pl08x_lli *llis_va;
 	bool active;
 	/* Default cctl value for LLIs */
 	u32 cctl;
-- 
cgit v1.1


From 9875cf806403fae66b2410a3c2cc820d97731e04 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Jan 2011 18:45:21 +0000
Subject: Add a dentry op to handle automounting rather than abusing
 follow_link()

Add a dentry op (d_automount) to handle automounting directories rather than
abusing the follow_link() inode operation.  The operation is keyed off a new
dentry flag (DCACHE_NEED_AUTOMOUNT).

This also makes it easier to add an AT_ flag to suppress terminal segment
automount during pathwalk and removes the need for the kludge code in the
pathwalk algorithm to handle directories with follow_link() semantics.

The ->d_automount() dentry operation:

	struct vfsmount *(*d_automount)(struct path *mountpoint);

takes a pointer to the directory to be mounted upon, which is expected to
provide sufficient data to determine what should be mounted.  If successful, it
should return the vfsmount struct it creates (which it should also have added
to the namespace using do_add_mount() or similar).  If there's a collision with
another automount attempt, NULL should be returned.  If the directory specified
by the parameter should be used directly rather than being mounted upon,
-EISDIR should be returned.  In any other case, an error code should be
returned.

The ->d_automount() operation is called with no locks held and may sleep.  At
this point the pathwalk algorithm will be in ref-walk mode.

Within fs/namei.c itself, a new pathwalk subroutine (follow_automount()) is
added to handle mountpoints.  It will return -EREMOTE if the automount flag was
set, but no d_automount() op was supplied, -ELOOP if we've encountered too many
symlinks or mountpoints, -EISDIR if the walk point should be used without
mounting and 0 if successful.  The path will be updated to point to the mounted
filesystem if a successful automount took place.

__follow_mount() is replaced by follow_managed() which is more generic
(especially with the patch that adds ->d_manage()).  This handles transits from
directories during pathwalk, including automounting and skipping over
mountpoints (and holding processes with the next patch).

__follow_mount_rcu() will jump out of RCU-walk mode if it encounters an
automount point with nothing mounted on it.

follow_dotdot*() does not handle automounts as you don't want to trigger them
whilst following "..".

I've also extracted the mount/don't-mount logic from autofs4 and included it
here.  It makes the mount go ahead anyway if someone calls open() or creat(),
tries to traverse the directory, tries to chdir/chroot/etc. into the directory,
or sticks a '/' on the end of the pathname.  If they do a stat(), however,
they'll only trigger the automount if they didn't also say O_NOFOLLOW.

I've also added an inode flag (S_AUTOMOUNT) so that filesystems can mark their
inodes as automount points.  This flag is automatically propagated to the
dentry as DCACHE_NEED_AUTOMOUNT by __d_instantiate().  This saves NFS and could
save AFS a private flag bit apiece, but is not strictly necessary.  It would be
preferable to do the propagation in d_set_d_op(), but that doesn't normally
have access to the inode.

[AV: fixed breakage in case if __follow_mount_rcu() fails and nameidata_drop_rcu()
succeeds in RCU case of do_lookup(); we need to fall through to non-RCU case after
that, rather than just returning with ungrabbed *path]

Signed-off-by: David Howells <dhowells@redhat.com>
Was-Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 7 ++++++-
 include/linux/fs.h     | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 59fcd24..ee6c26d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -167,6 +167,7 @@ struct dentry_operations {
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
+	struct vfsmount *(*d_automount)(struct path *);
 } ____cacheline_aligned;
 
 /*
@@ -205,13 +206,17 @@ struct dentry_operations {
 
 #define DCACHE_CANT_MOUNT	0x0100
 #define DCACHE_GENOCIDE		0x0200
-#define DCACHE_MOUNTED		0x0400	/* is a mountpoint */
 
 #define DCACHE_OP_HASH		0x1000
 #define DCACHE_OP_COMPARE	0x2000
 #define DCACHE_OP_REVALIDATE	0x4000
 #define DCACHE_OP_DELETE	0x8000
 
+#define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
+#define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
+#define DCACHE_MANAGED_DENTRY \
+	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT)
+
 extern seqlock_t rename_lock;
 
 static inline int dname_external(struct dentry *dentry)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3984f23..4c00ed5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -242,6 +242,7 @@ struct inodes_stat_t {
 #define S_SWAPFILE	256	/* Do not truncate: swapon got its bmaps */
 #define S_PRIVATE	512	/* Inode is fs-internal */
 #define S_IMA		1024	/* Inode has an associated IMA struct */
+#define S_AUTOMOUNT	2048	/* Automount/referral quasi-directory */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -277,6 +278,7 @@ struct inodes_stat_t {
 #define IS_SWAPFILE(inode)	((inode)->i_flags & S_SWAPFILE)
 #define IS_PRIVATE(inode)	((inode)->i_flags & S_PRIVATE)
 #define IS_IMA(inode)		((inode)->i_flags & S_IMA)
+#define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
 
 /* the read-only stuff doesn't really belong here, but any other place is
    probably as bad and I don't want to create yet another include file. */
-- 
cgit v1.1


From cc53ce53c86924bfe98a12ea20b7465038a08792 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Jan 2011 18:45:26 +0000
Subject: Add a dentry op to allow processes to be held during pathwalk transit

Add a dentry op (d_manage) to permit a filesystem to hold a process and make it
sleep when it tries to transit away from one of that filesystem's directories
during a pathwalk.  The operation is keyed off a new dentry flag
(DCACHE_MANAGE_TRANSIT).

The filesystem is allowed to be selective about which processes it holds and
which it permits to continue on or prohibits from transiting from each flagged
directory.  This will allow autofs to hold up client processes whilst letting
its userspace daemon through to maintain the directory or the stuff behind it
or mounted upon it.

The ->d_manage() dentry operation:

	int (*d_manage)(struct path *path, bool mounting_here);

takes a pointer to the directory about to be transited away from and a flag
indicating whether the transit is undertaken by do_add_mount() or
do_move_mount() skipping through a pile of filesystems mounted on a mountpoint.

It should return 0 if successful and to let the process continue on its way;
-EISDIR to prohibit the caller from skipping to overmounted filesystems or
automounting, and to use this directory; or some other error code to return to
the user.

->d_manage() is called with namespace_sem writelocked if mounting_here is true
and no other locks held, so it may sleep.  However, if mounting_here is true,
it may not initiate or wait for a mount or unmount upon the parameter
directory, even if the act is actually performed by userspace.

Within fs/namei.c, follow_managed() is extended to check with d_manage() first
on each managed directory, before transiting away from it or attempting to
automount upon it.

follow_down() is renamed follow_down_one() and should only be used where the
filesystem deliberately intends to avoid management steps (e.g. autofs).

A new follow_down() is added that incorporates the loop done by all other
callers of follow_down() (do_add/move_mount(), autofs and NFSD; whilst AFS, NFS
and CIFS do use it, their use is removed by converting them to use
d_automount()).  The new follow_down() calls d_manage() as appropriate.  It
also takes an extra parameter to indicate if it is being called from mount code
(with namespace_sem writelocked) which it passes to d_manage().  follow_down()
ignores automount points so that it can be used to mount on them.

__follow_mount_rcu() is made to abort rcu-walk mode if it hits a directory with
DCACHE_MANAGE_TRANSIT set on the basis that we're probably going to have to
sleep.  It would be possible to enter d_manage() in rcu-walk mode too, and have
that determine whether to abort or not itself.  That would allow the autofs
daemon to continue on in rcu-walk mode.

Note that DCACHE_MANAGE_TRANSIT on a directory should be cleared when it isn't
required as every tranist from that directory will cause d_manage() to be
invoked.  It can always be set again when necessary.

==========================
WHAT THIS MEANS FOR AUTOFS
==========================

Autofs currently uses the lookup() inode op and the d_revalidate() dentry op to
trigger the automounting of indirect mounts, and both of these can be called
with i_mutex held.

autofs knows that the i_mutex will be held by the caller in lookup(), and so
can drop it before invoking the daemon - but this isn't so for d_revalidate(),
since the lock is only held on _some_ of the code paths that call it.  This
means that autofs can't risk dropping i_mutex from its d_revalidate() function
before it calls the daemon.

The bug could manifest itself as, for example, a process that's trying to
validate an automount dentry that gets made to wait because that dentry is
expired and needs cleaning up:

	mkdir         S ffffffff8014e05a     0 32580  24956
	Call Trace:
	 [<ffffffff885371fd>] :autofs4:autofs4_wait+0x674/0x897
	 [<ffffffff80127f7d>] avc_has_perm+0x46/0x58
	 [<ffffffff8009fdcf>] autoremove_wake_function+0x0/0x2e
	 [<ffffffff88537be6>] :autofs4:autofs4_expire_wait+0x41/0x6b
	 [<ffffffff88535cfc>] :autofs4:autofs4_revalidate+0x91/0x149
	 [<ffffffff80036d96>] __lookup_hash+0xa0/0x12f
	 [<ffffffff80057a2f>] lookup_create+0x46/0x80
	 [<ffffffff800e6e31>] sys_mkdirat+0x56/0xe4

versus the automount daemon which wants to remove that dentry, but can't
because the normal process is holding the i_mutex lock:

	automount     D ffffffff8014e05a     0 32581      1              32561
	Call Trace:
	 [<ffffffff80063c3f>] __mutex_lock_slowpath+0x60/0x9b
	 [<ffffffff8000ccf1>] do_path_lookup+0x2ca/0x2f1
	 [<ffffffff80063c89>] .text.lock.mutex+0xf/0x14
	 [<ffffffff800e6d55>] do_rmdir+0x77/0xde
	 [<ffffffff8005d229>] tracesys+0x71/0xe0
	 [<ffffffff8005d28d>] tracesys+0xd5/0xe0

which means that the system is deadlocked.

This patch allows autofs to hold up normal processes whilst the daemon goes
ahead and does things to the dentry tree behind the automouter point without
risking a deadlock as almost no locks are held in d_manage() and none in
d_automount().

Signed-off-by: David Howells <dhowells@redhat.com>
Was-Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 11 +++++++++--
 include/linux/namei.h  |  3 ++-
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ee6c26d..1a87760 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -168,6 +168,7 @@ struct dentry_operations {
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
+	int (*d_manage)(struct dentry *, bool);
 } ____cacheline_aligned;
 
 /*
@@ -214,8 +215,9 @@ struct dentry_operations {
 
 #define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
 #define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
+#define DCACHE_MANAGE_TRANSIT	0x40000	/* manage transit from this dirent */
 #define DCACHE_MANAGED_DENTRY \
-	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT)
+	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
 extern seqlock_t rename_lock;
 
@@ -404,7 +406,12 @@ static inline void dont_mount(struct dentry *dentry)
 
 extern void dput(struct dentry *);
 
-static inline int d_mountpoint(struct dentry *dentry)
+static inline bool d_managed(struct dentry *dentry)
+{
+	return dentry->d_flags & DCACHE_MANAGED_DENTRY;
+}
+
+static inline bool d_mountpoint(struct dentry *dentry)
 {
 	return dentry->d_flags & DCACHE_MOUNTED;
 }
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 18d06ad..8ef2c78 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -79,7 +79,8 @@ extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 
-extern int follow_down(struct path *);
+extern int follow_down_one(struct path *);
+extern int follow_down(struct path *, bool);
 extern int follow_up(struct path *);
 
 extern struct dentry *lock_rename(struct dentry *, struct dentry *);
-- 
cgit v1.1


From 6f45b65672c8017d5e210e338bb5858a938ef445 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Jan 2011 18:45:31 +0000
Subject: Add an AT_NO_AUTOMOUNT flag to suppress terminal automount

Add an AT_NO_AUTOMOUNT flag to suppress terminal automounting of automount
point directories.  This can be used by fstatat() users to permit the
gathering of attributes on an automount point and also prevent
mass-automounting of a directory of automount points by ls.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fcntl.h | 1 +
 include/linux/namei.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index afc00af..a562fa5 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -45,6 +45,7 @@
 #define AT_REMOVEDIR		0x200   /* Remove directory instead of
                                            unlinking file.  */
 #define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
+#define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
 
 #ifdef __KERNEL__
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 8ef2c78..f276d4f 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -45,6 +45,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  *  - ending slashes ok even for nonexistent files
  *  - internal "there are more path components" flag
  *  - dentry cache is untrusted; force a real lookup
+ *  - suppress terminal automount
  */
 #define LOOKUP_FOLLOW		0x0001
 #define LOOKUP_DIRECTORY	0x0002
@@ -53,6 +54,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_PARENT		0x0010
 #define LOOKUP_REVAL		0x0020
 #define LOOKUP_RCU		0x0040
+#define LOOKUP_NO_AUTOMOUNT	0x0080
 /*
  * Intent data
  */
-- 
cgit v1.1


From 36d43a43761b004ad1879ac21471d8fc5f3157ec Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Jan 2011 18:45:42 +0000
Subject: NFS: Use d_automount() rather than abusing follow_link()

Make NFS use the new d_automount() dentry operation rather than abusing
follow_link() on directories.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0779bb8..6023efa 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -215,7 +215,6 @@ struct nfs_inode {
 #define NFS_INO_ADVISE_RDPLUS	(0)		/* advise readdirplus */
 #define NFS_INO_STALE		(1)		/* possible stale inode */
 #define NFS_INO_ACL_LRU_SET	(2)		/* Inode is on the LRU list */
-#define NFS_INO_MOUNTPOINT	(3)		/* inode is remote mountpoint */
 #define NFS_INO_FLUSHING	(4)		/* inode is flushing out data */
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
-- 
cgit v1.1


From 1972580bb4edea3ed6fe273b2ca72f44f10f8c86 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Fri, 14 Jan 2011 18:46:40 +0000
Subject: autofs4: Bump version

Increase the autofs module sub-version so we can tell what kernel
implementation is being used from user space debug logging.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/auto_fs4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index 8b49ac4..e02982f 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -24,7 +24,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	3
 #define AUTOFS_MAX_PROTO_VERSION	5
 
-#define AUTOFS_PROTO_SUBVERSION		1
+#define AUTOFS_PROTO_SUBVERSION		2
 
 /* Mask for expire behaviour */
 #define AUTOFS_EXP_IMMEDIATE		1
-- 
cgit v1.1


From ab90911ff90cdab59b31c045c3f0ae480d14f29d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Jan 2011 18:46:51 +0000
Subject: Allow d_manage() to be used in RCU-walk mode

Allow d_manage() to be called from pathwalk when it is in RCU-walk mode as well
as when it is in Ref-walk mode.  This permits __follow_mount_rcu() to call
d_manage() directly.  d_manage() needs a parameter to indicate that it is in
RCU-walk mode as it isn't allowed to sleep if in that mode (but should return
-ECHILD instead).

autofs4_d_manage() can then be set to retain RCU-walk mode if the daemon
accesses it and otherwise request dropping back to ref-walk mode.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1a87760..f958c19 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -168,7 +168,7 @@ struct dentry_operations {
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
-	int (*d_manage)(struct dentry *, bool);
+	int (*d_manage)(struct dentry *, bool, bool);
 } ____cacheline_aligned;
 
 /*
-- 
cgit v1.1


From ea5b778a8b98c85a87d66bf844904f9c3802b869 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Jan 2011 19:10:03 +0000
Subject: Unexport do_add_mount() and add in follow_automount(), not
 ->d_automount()

Unexport do_add_mount() and make ->d_automount() return the vfsmount to be
added rather than calling do_add_mount() itself.  follow_automount() will then
do the addition.

This slightly complicates things as ->d_automount() normally wants to add the
new vfsmount to an expiration list and start an expiration timer.  The problem
with that is that the vfsmount will be deleted if it has a refcount of 1 and
the timer will not repeat if the expiration list is empty.

To this end, we require the vfsmount to be returned from d_automount() with a
refcount of (at least) 2.  One of these refs will be dropped unconditionally.
In addition, follow_automount() must get a 3rd ref around the call to
do_add_mount() lest it eat a ref and return an error, leaving the mount we
have open to being expired as we would otherwise have only 1 ref on it.

d_automount() should also add the the vfsmount to the expiration list (by
calling mnt_set_expiry()) and start the expiration timer before returning, if
this mechanism is to be used.  The vfsmount will be unlinked from the
expiration list by follow_automount() if do_add_mount() fails.

This patch also fixes the call to do_add_mount() for AFS to propagate the mount
flags from the parent vfsmount.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mount.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1869ea2..af4765e 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -110,12 +110,7 @@ extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
 				      void *data);
 
-struct nameidata;
-
-struct path;
-extern int do_add_mount(struct vfsmount *newmnt, struct path *path,
-			int mnt_flags, struct list_head *fslist);
-
+extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 
 extern dev_t name_to_dev_t(char *name);
-- 
cgit v1.1


From 672c54466d24994eb9633f993862c89539504a42 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 13 Jan 2011 15:36:09 -0700
Subject: dt/flattree: Return virtual address from
 early_init_dt_alloc_memory_arch()

The physical address is never used by the device tree code when
allocating memory for unflattening.  Change the architecture's alloc
hook to return the virutal address instead.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_fdt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 0ef22a1..c84d900 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -97,7 +97,7 @@ extern void early_init_dt_check_for_initrd(unsigned long node);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
-extern u64 early_init_dt_alloc_memory_arch(u64 size, u64 align);
+extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
 extern u64 dt_mem_next_cell(int s, __be32 **cellp);
 
 /*
-- 
cgit v1.1


From f03c65993b98eeb909a4012ce7833c5857d74755 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Jan 2011 22:30:21 -0500
Subject: sanitize vfsmount refcounting changes

Instead of splitting refcount between (per-cpu) mnt_count
and (SMP-only) mnt_longrefs, make all references contribute
to mnt_count again and keep track of how many are longterm
ones.

Accounting rules for longterm count:
	* 1 for each fs_struct.root.mnt
	* 1 for each fs_struct.pwd.mnt
	* 1 for having non-NULL ->mnt_ns
	* decrement to 0 happens only under vfsmount lock exclusive

That allows nice common case for mntput() - since we can't drop the
final reference until after mnt_longterm has reached 0 due to the rules
above, mntput() can grab vfsmount lock shared and check mnt_longterm.
If it turns out to be non-zero (which is the common case), we know
that this is not the final mntput() and can just blindly decrement
percpu mnt_count.  Otherwise we grab vfsmount lock exclusive and
do usual decrement-and-check of percpu mnt_count.

For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm();
namespace.c uses the latter in places where we don't already hold
vfsmount lock exclusive and opencodes a few remaining spots where
we need to manipulate mnt_longterm.

Note that we mostly revert the code outside of fs/namespace.c back
to what we used to have; in particular, normal code doesn't need
to care about two kinds of references, etc.  And we get to keep
the optimization Nick's variant had bought us...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mount.h | 4 +---
 include/linux/path.h  | 2 --
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index af4765e..604f122 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -60,7 +60,7 @@ struct vfsmount {
 	struct super_block *mnt_sb;	/* pointer to superblock */
 #ifdef CONFIG_SMP
 	struct mnt_pcp __percpu *mnt_pcp;
-	atomic_t mnt_longrefs;
+	atomic_t mnt_longterm;		/* how many of the refs are longterm */
 #else
 	int mnt_count;
 	int mnt_writers;
@@ -96,8 +96,6 @@ extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
-extern void mntput_long(struct vfsmount *mnt);
-extern struct vfsmount *mntget_long(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
 extern void mnt_unpin(struct vfsmount *mnt);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
diff --git a/include/linux/path.h b/include/linux/path.h
index a581e8c..edc98de 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -10,9 +10,7 @@ struct path {
 };
 
 extern void path_get(struct path *);
-extern void path_get_long(struct path *);
 extern void path_put(struct path *);
-extern void path_put_long(struct path *);
 
 static inline int path_equal(const struct path *path1, const struct path *path2)
 {
-- 
cgit v1.1


From fc8fe1e992ae0326a88edbe4d6793e840bbdd4ff Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 16 Jan 2011 20:42:43 +0100
Subject: PCI / ACPI: Fix build of the AER driver for CONFIG_ACPI unset

After commit 415e12b23792 ("PCI/ACPI: Request _OSC control once for each
root bridge (v3)") include/linux/pci-acpi.h is included by
drivers/pci/pcie/aer/aerdrv.c and if CONFIG_ACPI is unset, the bogus and
unnecessary alternative definition of acpi_find_root_bridge_handle()
causes a build error to occur.

Remove the offending piece of garbage.

Reported-and-tested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci-acpi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 479d9bb..4462350 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -35,9 +35,6 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pbus),
 					      pbus->number);
 }
-#else
-static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
-{ return NULL; }
 #endif
 
 #ifdef CONFIG_ACPI_APEI
-- 
cgit v1.1


From b3697c0255d9d73eaaa4deb4512e3f0ff97b3b71 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Sun, 16 Jan 2011 13:10:39 -0800
Subject: fix non-x86 build failure in pmdp_get_and_clear

pmdp_get_and_clear/pmdp_clear_flush/pmdp_splitting_flush were trapped as
BUG() and they were defined only to diminish the risk of build issues on
not-x86 archs and to be consistent with the generic pte methods previously
defined in include/asm-generic/pgtable.h.

But they are causing more trouble than they were supposed to solve, so
it's simpler not to define them when THP is off.

This is also correcting the export of pmdp_splitting_flush which is
currently unused (x86 isn't using the generic implementation in
mm/pgtable-generic.c and no other arch needs that [yet]).

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Sam Ravnborg <sam@ravnborg.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable.h | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f1eddf7..31b6188 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -87,14 +87,6 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 	pmd_clear(mm, address, pmdp);
 	return pmd;
 })
-#else /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
-				       unsigned long address,
-				       pmd_t *pmdp)
-{
-	BUG();
-	return __pmd(0);
-}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
@@ -163,9 +155,9 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 #endif
 
 #ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
-extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
-			      unsigned long address,
-			      pmd_t *pmdp);
+extern pmd_t pmdp_splitting_flush(struct vm_area_struct *vma,
+				  unsigned long address,
+				  pmd_t *pmdp);
 #endif
 
 #ifndef __HAVE_ARCH_PTE_SAME
-- 
cgit v1.1


From 94ae85220a07d357d4937086c490854f63344de4 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sun, 16 Jan 2011 20:18:05 +0000
Subject: ARM: PL08x: cleanup comments

Cleanup the formatting of comments, remove some which don't make sense
anymore.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
[fix conflict with 96a608a4]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/amba/pl08x.h | 43 ++++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 5b87b6a..3111385 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -12,7 +12,6 @@
  *
  * Please credit ARM.com
  * Documentation: ARM DDI 0196D
- *
  */
 
 #ifndef AMBA_PL08X_H
@@ -55,8 +54,8 @@ enum {
  * @circular_buffer: whether the buffer passed in is circular and
  * shall simply be looped round round (like a record baby round
  * round round round)
- * @single: the device connected to this channel will request single
- * DMA transfers, not bursts. (Bursts are default.)
+ * @single: the device connected to this channel will request single DMA
+ * transfers, not bursts. (Bursts are default.)
  * @periph_buses: the device connected to this channel is accessible via
  * these buses (use PL08X_AHB1 | PL08X_AHB2).
  */
@@ -78,8 +77,7 @@ struct pl08x_channel_data {
  * @addr: current address
  * @maxwidth: the maximum width of a transfer on this bus
  * @buswidth: the width of this bus in bytes: 1, 2 or 4
- * @fill_bytes: bytes required to fill to the next bus memory
- * boundary
+ * @fill_bytes: bytes required to fill to the next bus memory boundary
  */
 struct pl08x_bus_data {
 	dma_addr_t addr;
@@ -92,10 +90,10 @@ struct pl08x_bus_data {
  * struct pl08x_phy_chan - holder for the physical channels
  * @id: physical index to this channel
  * @lock: a lock to use when altering an instance of this struct
- * @signal: the physical signal (aka channel) serving this
- * physical channel right now
- * @serving: the virtual channel currently being served by this
- * physical channel
+ * @signal: the physical signal (aka channel) serving this physical channel
+ * right now
+ * @serving: the virtual channel currently being served by this physical
+ * channel
  */
 struct pl08x_phy_chan {
 	unsigned int id;
@@ -119,7 +117,6 @@ struct pl08x_txd {
 	size_t len;
 	dma_addr_t llis_bus;
 	struct pl08x_lli *llis_va;
-	bool active;
 	/* Default cctl value for LLIs */
 	u32 cctl;
 	/*
@@ -130,8 +127,8 @@ struct pl08x_txd {
 };
 
 /**
- * struct pl08x_dma_chan_state - holds the PL08x specific virtual
- * channel states
+ * struct pl08x_dma_chan_state - holds the PL08x specific virtual channel
+ * states
  * @PL08X_CHAN_IDLE: the channel is idle
  * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
  * channel and is running a transfer on it
@@ -152,7 +149,7 @@ enum pl08x_dma_chan_state {
  * @chan: wrappped abstract channel
  * @phychan: the physical channel utilized by this channel, if there is one
  * @phychan_hold: if non-zero, hold on to the physical channel even if we
- *  have no pending entries
+ * have no pending entries
  * @tasklet: tasklet scheduled by the IRQ to handle actual work etc
  * @name: name of channel
  * @cd: channel platform data
@@ -166,8 +163,8 @@ enum pl08x_dma_chan_state {
  * @host: a pointer to the host (internal use)
  * @state: whether the channel is idle, paused, running etc
  * @slave: whether this channel is a device (slave) or for memcpy
- * @waiting: a TX descriptor on this channel which is waiting for
- * a physical channel to become available
+ * @waiting: a TX descriptor on this channel which is waiting for a physical
+ * channel to become available
  */
 struct pl08x_dma_chan {
 	struct dma_chan chan;
@@ -189,16 +186,16 @@ struct pl08x_dma_chan {
 };
 
 /**
- * struct pl08x_platform_data - the platform configuration for the
- * PL08x PrimeCells.
+ * struct pl08x_platform_data - the platform configuration for the PL08x
+ * PrimeCells.
  * @slave_channels: the channels defined for the different devices on the
  * platform, all inclusive, including multiplexed channels. The available
- * physical channels will be multiplexed around these signals as they
- * are requested, just enumerate all possible channels.
- * @get_signal: request a physical signal to be used for a DMA
- * transfer immediately: if there is some multiplexing or similar blocking
- * the use of the channel the transfer can be denied by returning
- * less than zero, else it returns the allocated signal number
+ * physical channels will be multiplexed around these signals as they are
+ * requested, just enumerate all possible channels.
+ * @get_signal: request a physical signal to be used for a DMA transfer
+ * immediately: if there is some multiplexing or similar blocking the use
+ * of the channel the transfer can be denied by returning less than zero,
+ * else it returns the allocated signal number
  * @put_signal: indicate to the platform that this physical signal is not
  * running any DMA transfer and multiplexing can be recycled
  * @lli_buses: buses which LLIs can be fetched from: PL08X_AHB1 | PL08X_AHB2
-- 
cgit v1.1


From f06267104dd9112f11586830d22501d0e26245ea Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Oct 2010 15:24:36 +0000
Subject: RDMA: Update workqueue usage

* ib_wq is added, which is used as the common workqueue for infiniband
  instead of the system workqueue.  All system workqueue usages
  including flush_scheduled_work() callers are converted to use and
  flush ib_wq.

* cancel_delayed_work() + flush_scheduled_work() converted to
  cancel_delayed_work_sync().

* qib_wq is removed and ib_wq is used instead.

This is to prepare for deprecation of flush_scheduled_work().

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/rdma/ib_verbs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e04c488..55cd0a0 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -47,10 +47,13 @@
 #include <linux/list.h>
 #include <linux/rwsem.h>
 #include <linux/scatterlist.h>
+#include <linux/workqueue.h>
 
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
+extern struct workqueue_struct *ib_wq;
+
 union ib_gid {
 	u8	raw[16];
 	struct {
-- 
cgit v1.1


From 2fe17c1075836b66678ed2a305fd09b6773883aa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 14 Jan 2011 13:07:43 +0100
Subject: fallocate should be a file operation

Currently all filesystems except XFS implement fallocate asynchronously,
while XFS forced a commit.  Both of these are suboptimal - in case of O_SYNC
I/O we really want our allocation on disk, especially for the !KEEP_SIZE
case where we actually grow the file with user-visible zeroes.  On the
other hand always commiting the transaction is a bad idea for fast-path
uses of fallocate like for example in recent Samba versions.   Given
that block allocation is a data plane operation anyway change it from
an inode operation to a file operation so that we have the file structure
available that lets us check for O_SYNC.

This also includes moving the code around for a few of the filesystems,
and remove the already unnedded S_ISDIR checks given that we only wire
up fallocate for regular files.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 177b4dde..09b5bd6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1552,6 +1552,8 @@ struct file_operations {
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
 	int (*setlease)(struct file *, long, struct file_lock **);
+	long (*fallocate)(struct file *file, int mode, loff_t offset,
+			  loff_t len);
 };
 
 #define IPERM_FLAG_RCU	0x0001
@@ -1582,8 +1584,6 @@ struct inode_operations {
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
-	long (*fallocate)(struct inode *inode, int mode, loff_t offset,
-			  loff_t len);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
 } ____cacheline_aligned;
-- 
cgit v1.1


From c2b3e74b78b24cb367289a75a2bd30e569e56e0e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 13 Dec 2010 19:38:08 -0500
Subject: fs: Remove unlikely() from fput_light()

In fput_light(), there's an unlikely(fput_needed), which running on
my normal desktop doing firefox, xchat, evolution and part of my distcc farm,
and running the annotate branch profiler shows that the unlikely is not
very unlikely.

 correct incorrect  %        Function             File              Line
 ------- ---------  -        --------             ----              ----
       0       48 100 fput_light                file.h               26
115828710 897415279  88 fput_light              file.h               26
865271179 5286128445  85 fput_light             file.h               26
19568539  8923664  31 fput_light                file.h               26
12353677  3562279  22 fput_light                file.h               26
  267691    67062  20 fput_light                file.h               26
15014853   348172   2 fput_light                file.h               26
  209258      205   0 fput_light                file.h               26
 1364164        0   0 fput_light                file.h               26

Which gives 1032903812 times it was correct and 6203351846 times it was
incorrect, or 85% incorrect.

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/file.h b/include/linux/file.h
index b1e1297..e85baeb 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -23,7 +23,7 @@ extern struct file *alloc_file(struct path *, fmode_t mode,
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
-	if (unlikely(fput_needed))
+	if (fput_needed)
 		fput(file);
 }
 
-- 
cgit v1.1


From ecf5632dd189ab4c366cef853d6e5fe7adfe52e5 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Sun, 16 Jan 2011 23:28:17 +0900
Subject: fs: fix address space warnings in ioctl_fiemap()

The fi_extents_start field of struct fiemap_extent_info is a
user pointer but was not marked as __user. This makes sparse
emit following warnings:

  CHECK   fs/ioctl.c
fs/ioctl.c:114:26: warning: incorrect type in argument 1 (different address spaces)
fs/ioctl.c:114:26:    expected void [noderef] <asn:1>*dst
fs/ioctl.c:114:26:    got struct fiemap_extent *[assigned] dest
fs/ioctl.c:202:14: warning: incorrect type in argument 1 (different address spaces)
fs/ioctl.c:202:14:    expected void const volatile [noderef] <asn:1>*<noident>
fs/ioctl.c:202:14:    got struct fiemap_extent *[assigned] fi_extents_start
fs/ioctl.c:212:27: warning: incorrect type in argument 1 (different address spaces)
fs/ioctl.c:212:27:    expected void [noderef] <asn:1>*dst
fs/ioctl.c:212:27:    got char *<noident>

Also add 'ufiemap' variable to eliminate unnecessary casts.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09b5bd6..32b38cd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1483,8 +1483,8 @@ struct fiemap_extent_info {
 	unsigned int fi_flags;		/* Flags as passed from user */
 	unsigned int fi_extents_mapped;	/* Number of mapped extents */
 	unsigned int fi_extents_max;	/* Size of fiemap_extent array */
-	struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent
-						 * array */
+	struct fiemap_extent __user *fi_extents_start; /* Start of
+							fiemap_extent array */
 };
 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
 			    u64 phys, u64 len, u32 flags);
-- 
cgit v1.1


From 2a8652f4e0d11ee27b1d2870c600fd1300661a6e Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@polito.it>
Date: Wed, 3 Nov 2010 11:11:15 +0100
Subject: ecryptfs: moved ECRYPTFS_SUPER_MAGIC definition to linux/magic.h

The definition of ECRYPTFS_SUPER_MAGIC has been moved to the include
file 'linux/magic.h' to become available to other kernel subsystems.

Signed-off-by: Roberto Sassu <roberto.sassu@polito.it>
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 include/linux/magic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/magic.h b/include/linux/magic.h
index ff690d0..62730ea 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -16,6 +16,7 @@
 #define TMPFS_MAGIC		0x01021994
 #define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
 #define SQUASHFS_MAGIC		0x73717368
+#define ECRYPTFS_SUPER_MAGIC	0xf15f
 #define EFS_SUPER_MAGIC		0x414A53
 #define EXT2_SUPER_MAGIC	0xEF53
 #define EXT3_SUPER_MAGIC	0xEF53
-- 
cgit v1.1


From d2763b4f44e16f44cc4156c9591e74df9dcd88be Mon Sep 17 00:00:00 2001
From: Naveen Kumar Gaddipati <naveen.gaddipati@stericsson.com>
Date: Mon, 17 Jan 2011 20:40:58 -0800
Subject: Input: bu21013_ts - remove duplicate resolution parameters

Remove duplicate display resolution parameters from platform data as
one pair is quite enough.

Signed-off-by: Naveen Kumar Gaddipati <naveen.gaddipati@stericsson.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input/bu21013.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h
index e470d38..05e0328 100644
--- a/include/linux/input/bu21013.h
+++ b/include/linux/input/bu21013.h
@@ -12,8 +12,6 @@
  * @cs_en:	pointer to the cs enable function
  * @cs_dis:	pointer to the cs disable function
  * @irq_read_val:    pointer to read the pen irq value function
- * @x_max_res: xmax resolution
- * @y_max_res: ymax resolution
  * @touch_x_max: touch x max
  * @touch_y_max: touch y max
  * @cs_pin: chip select pin
@@ -29,8 +27,6 @@ struct bu21013_platform_device {
 	int (*cs_en)(int reset_pin);
 	int (*cs_dis)(int reset_pin);
 	int (*irq_read_val)(void);
-	int x_max_res;
-	int y_max_res;
 	int touch_x_max;
 	int touch_y_max;
 	unsigned int cs_pin;
-- 
cgit v1.1


From b4e104eaeb8cd4329a23e0e4ebf166681b1d182d Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 17 Jan 2011 11:05:40 +0800
Subject: ACPICA: Update all ACPICA copyrights and signons to 2011

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acexcep.h          | 2 +-
 include/acpi/acnames.h          | 2 +-
 include/acpi/acoutput.h         | 2 +-
 include/acpi/acpi.h             | 2 +-
 include/acpi/acpiosxf.h         | 2 +-
 include/acpi/acpixf.h           | 2 +-
 include/acpi/acrestyp.h         | 2 +-
 include/acpi/actbl.h            | 2 +-
 include/acpi/actbl1.h           | 2 +-
 include/acpi/actbl2.h           | 2 +-
 include/acpi/actypes.h          | 2 +-
 include/acpi/platform/acenv.h   | 2 +-
 include/acpi/platform/acgcc.h   | 2 +-
 include/acpi/platform/aclinux.h | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
index 17714be..5b6c391 100644
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h
index 9cf736e..fc1575f 100644
--- a/include/acpi/acnames.h
+++ b/include/acpi/acnames.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index bc4a6de..ef1cef7 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/acpi.h b/include/acpi/acpi.h
index a091cab..de39915 100644
--- a/include/acpi/acpi.h
+++ b/include/acpi/acpi.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 65b3f58..a3252a5 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -8,7 +8,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 241b8a0..2f38e29 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -6,7 +6,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
index e552635..0a66cc4 100644
--- a/include/acpi/acrestyp.h
+++ b/include/acpi/acrestyp.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index ad20016..7e42bfe 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index c637b75..981349d 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index d4136b2..0fc15df 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 939a431..64f838b 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index a3e334a..5af3ed5 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 5dcb953..e228893 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 572189e..5d2a5e9 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -5,7 +5,7 @@
  *****************************************************************************/
 
 /*
- * Copyright (C) 2000 - 2010, Intel Corp.
+ * Copyright (C) 2000 - 2011, Intel Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
-- 
cgit v1.1


From 8d5f0a647395c1323787df675d2805cad54fc89f Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 17 Jan 2011 10:31:08 +0800
Subject: ACPICA: Update version to 20110112

Version 20110112.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 2f38e29..e46ec95 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20101209
+#define ACPI_CA_VERSION                 0x20110112
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.1


From 9af39713feb53da96ba23fa94a73ffd0de50a815 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 18 Dec 2010 09:20:59 -0300
Subject: [media] saa7146: Convert from .ioctl to .unlocked_ioctl

Convert saa7146 to use core-assisted locking.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/saa7146.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/saa7146.h b/include/media/saa7146.h
index ac7ce00..7982714 100644
--- a/include/media/saa7146.h
+++ b/include/media/saa7146.h
@@ -115,7 +115,7 @@ struct saa7146_dev
 
 	/* different device locks */
 	spinlock_t			slock;
-	struct mutex			lock;
+	struct mutex			v4l2_lock;
 
 	unsigned char			__iomem *mem;		/* pointer to mapped IO memory */
 	u32				revision;	/* chip revision; needed for bug-workarounds*/
-- 
cgit v1.1


From 3c7c9370fb645f4713e0fbbe69425d8db9b47a13 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 8 Jan 2011 07:08:02 -0300
Subject: [media] v4l2-subdev: remove core.s_config and
 v4l2_i2c_new_subdev_cfg()

The core.s_config op was meant for legacy drivers that needed to work with old
pre-2.6.26 kernels. This is no longer relevant. Unfortunately, this op was
incorrectly called from several drivers.

Replace those occurences with proper i2c_board_info structs and call
v4l2_i2c_new_subdev_board.

After these changes v4l2_i2c_new_subdev_cfg() was no longer used, so remove
that function as well.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/mt9v011.h     | 17 +++++++++++++++++
 include/media/v4l2-common.h | 13 +------------
 include/media/v4l2-subdev.h |  6 +-----
 3 files changed, 19 insertions(+), 17 deletions(-)
 create mode 100644 include/media/mt9v011.h

(limited to 'include')

diff --git a/include/media/mt9v011.h b/include/media/mt9v011.h
new file mode 100644
index 0000000..ea29fc7
--- /dev/null
+++ b/include/media/mt9v011.h
@@ -0,0 +1,17 @@
+/* mt9v011 sensor
+ *
+ * Copyright (C) 2011 Hans Verkuil <hverkuil@xs4all.nl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MT9V011_H__
+#define __MT9V011_H__
+
+struct mt9v011_platform_data {
+	unsigned xtal;	/* Hz */
+};
+
+#endif
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 2d65b35..a659319 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -138,21 +138,10 @@ struct v4l2_subdev_ops;
 
 /* Load an i2c module and return an initialized v4l2_subdev struct.
    The client_type argument is the name of the chip that's on the adapter. */
-struct v4l2_subdev *v4l2_i2c_new_subdev_cfg(struct v4l2_device *v4l2_dev,
+struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter, const char *client_type,
-		int irq, void *platform_data,
 		u8 addr, const unsigned short *probe_addrs);
 
-/* Load an i2c module and return an initialized v4l2_subdev struct.
-   The client_type argument is the name of the chip that's on the adapter. */
-static inline struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter, const char *client_type,
-		u8 addr, const unsigned short *probe_addrs)
-{
-	return v4l2_i2c_new_subdev_cfg(v4l2_dev, adapter, client_type, 0, NULL,
-				       addr, probe_addrs);
-}
-
 struct i2c_board_info;
 
 struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index b0316a7..42fbe46 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -106,10 +106,7 @@ struct v4l2_subdev_io_pin_config {
 	u8 strength;	/* Pin drive strength */
 };
 
-/* s_config: if set, then it is always called by the v4l2_i2c_new_subdev*
-	functions after the v4l2_subdev was registered. It is used to pass
-	platform data to the subdev which can be used during initialization.
-
+/*
    s_io_pin_config: configure one or more chip I/O pins for chips that
 	multiplex different internal signal pads out to IO pins.  This function
 	takes a pointer to an array of 'n' pin configuration entries, one for
@@ -141,7 +138,6 @@ struct v4l2_subdev_io_pin_config {
 struct v4l2_subdev_core_ops {
 	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip);
 	int (*log_status)(struct v4l2_subdev *sd);
-	int (*s_config)(struct v4l2_subdev *sd, int irq, void *platform_data);
 	int (*s_io_pin_config)(struct v4l2_subdev *sd, size_t n,
 				      struct v4l2_subdev_io_pin_config *pincfg);
 	int (*init)(struct v4l2_subdev *sd, u32 val);
-- 
cgit v1.1


From 45f6f84af3ae9db19f39bc5d0976d626b0ef626e Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 8 Jan 2011 07:15:53 -0300
Subject: [media] v4l2-subdev: add (un)register internal ops

Some subdevs need to call into the board code after they are registered
and have a valid struct v4l2_device pointer. The s_config op was abused
for this, but now that it is removed we need a cleaner way of solving this.

So this patch adds a struct with internal ops that the v4l2 core can call.

Currently only two ops exist: register and unregister. Subdevs can implement
these to call the board code and pass it the v4l2_device pointer, which the
board code can then use to get access to the struct that embeds the
v4l2_device.

It is expected that in the future open and close ops will also be added.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-subdev.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 42fbe46..daf1e57 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -411,6 +411,21 @@ struct v4l2_subdev_ops {
 	const struct v4l2_subdev_sensor_ops	*sensor;
 };
 
+/*
+ * Internal ops. Never call this from drivers, only the v4l2 framework can call
+ * these ops.
+ *
+ * registered: called when this subdev is registered. When called the v4l2_dev
+ *	field is set to the correct v4l2_device.
+ *
+ * unregistered: called when this subdev is unregistered. When called the
+ *	v4l2_dev field is still set to the correct v4l2_device.
+ */
+struct v4l2_subdev_internal_ops {
+	int (*registered)(struct v4l2_subdev *sd);
+	void (*unregistered)(struct v4l2_subdev *sd);
+};
+
 #define V4L2_SUBDEV_NAME_SIZE 32
 
 /* Set this flag if this subdev is a i2c device. */
@@ -427,6 +442,8 @@ struct v4l2_subdev {
 	u32 flags;
 	struct v4l2_device *v4l2_dev;
 	const struct v4l2_subdev_ops *ops;
+	/* Never call these internal ops from within a driver! */
+	const struct v4l2_subdev_internal_ops *internal_ops;
 	/* The control handler of this subdev. May be NULL. */
 	struct v4l2_ctrl_handler *ctrl_handler;
 	/* name must be unique */
-- 
cgit v1.1


From 2a863793beaa0fc9ee7aeb87efe85544a6b129c0 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 11 Jan 2011 14:45:03 -0300
Subject: [media] v4l2-ctrls: v4l2_ctrl_handler_setup must set is_new to 1

Renamed has_new to is_new.

Drivers can use the is_new field to determine if a new value was specified
for a control. The v4l2_ctrl_handler_setup() must always set this to 1 since
the setup has to force a full update of all controls.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-ctrls.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index d69ab4a..fcc9a0c 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -53,8 +53,10 @@ struct v4l2_ctrl_ops {
   * @handler:	The handler that owns the control.
   * @cluster:	Point to start of cluster array.
   * @ncontrols:	Number of controls in cluster array.
-  * @has_new:	Internal flag: set when there is a valid new value.
   * @done:	Internal flag: set for each processed control.
+  * @is_new:	Set when the user specified a new value for this control. It
+  *		is also set when called from v4l2_ctrl_handler_setup. Drivers
+  *		should never set this flag.
   * @is_private: If set, then this control is private to its handler and it
   *		will not be added to any other handlers. Drivers can set
   *		this flag.
@@ -97,9 +99,9 @@ struct v4l2_ctrl {
 	struct v4l2_ctrl_handler *handler;
 	struct v4l2_ctrl **cluster;
 	unsigned ncontrols;
-	unsigned int has_new:1;
 	unsigned int done:1;
 
+	unsigned int is_new:1;
 	unsigned int is_private:1;
 	unsigned int is_volatile:1;
 
-- 
cgit v1.1


From 5aad724280b9f8ffff3a55311ef0ba35ebb4099a Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Thu, 6 Jan 2011 16:59:36 -0300
Subject: [media] rc: fix up and genericize some time unit conversions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ene_ir driver was using a private define of MS_TO_NS, which is meant
to be microseconds to nanoseconds. The mceusb driver copied it,
intending to use is a milliseconds to microseconds. Lets move the
defines to a common location, expand and standardize them a touch, so
that we now have:

  MS_TO_NS - milliseconds to nanoseconds
  MS_TO_US - milliseconds to microseconds
  US_TO_NS - microseconds to nanoseconds

Reported-by: David Härdeman <david@hardeman.nu>
CC: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/rc-core.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/media/rc-core.h b/include/media/rc-core.h
index a23c1fc..2963263 100644
--- a/include/media/rc-core.h
+++ b/include/media/rc-core.h
@@ -183,6 +183,9 @@ static inline void init_ir_raw_event(struct ir_raw_event *ev)
 }
 
 #define IR_MAX_DURATION         0xFFFFFFFF      /* a bit more than 4 seconds */
+#define US_TO_NS(usec)		((usec) * 1000)
+#define MS_TO_US(msec)		((msec) * 1000)
+#define MS_TO_NS(msec)		((msec) * 1000 * 1000)
 
 void ir_raw_event_handle(struct rc_dev *dev);
 int ir_raw_event_store(struct rc_dev *dev, struct ir_raw_event *ev);
-- 
cgit v1.1


From 01c40c048b0f3f377e6d27b35fd99f04efcc21dd Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 19 Nov 2010 11:20:06 -0300
Subject: [media] v4l: Include linux/videodev2.h in media/v4l2-ctrls.h

The later makes extensive use of structures defined in the former.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-ctrls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index fcc9a0c..97d0638 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -23,6 +23,7 @@
 
 #include <linux/list.h>
 #include <linux/device.h>
+#include <linux/videodev2.h>
 
 /* forward references */
 struct v4l2_ctrl_handler;
-- 
cgit v1.1


From 765c2a964b49bd06b61a52991519281c85d82b67 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 19 Jan 2011 12:06:52 +0530
Subject: Bluetooth: Fix race condition with conn->sec_level

The conn->sec_level value is supposed to represent the current level of
security that the connection has. However, by assigning to it before
requesting authentication it will have the wrong value during the
authentication procedure. To fix this a pending_sec_level variable is
added which is used to track the desired security level while making
sure that sec_level always represents the current level of security.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/hci_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a29feb0..d2cf884 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -184,6 +184,7 @@ struct hci_conn {
 	__u32		 link_mode;
 	__u8             auth_type;
 	__u8             sec_level;
+	__u8		 pending_sec_level;
 	__u8             power_save;
 	__u16            disc_timeout;
 	unsigned long	 pend;
-- 
cgit v1.1


From 4580ccc04ddd8c17a470573a7fdb8def2e036dfa Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Tue, 18 Jan 2011 22:39:00 +0000
Subject: sctp: user perfect name for Delayed SACK Timer option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The option name of Delayed SACK Timer should be SCTP_DELAYED_SACK,
not SCTP_DELAYED_ACK.

Left SCTP_DELAYED_ACK be concomitant with SCTP_DELAYED_SACK,
for making compatibility with existing applications.

Reference:
8.1.19.  Get or Set Delayed SACK Timer (SCTP_DELAYED_SACK)
（http://tools.ietf.org/html/draft-ietf-tsvwg-sctpsocket-25)

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/user.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 2b2769c..92eedc0 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -78,6 +78,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_GET_PEER_ADDR_INFO	15
 #define SCTP_DELAYED_ACK_TIME	16
 #define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME
+#define SCTP_DELAYED_SACK SCTP_DELAYED_ACK_TIME
 #define SCTP_CONTEXT	17
 #define SCTP_FRAGMENT_INTERLEAVE	18
 #define SCTP_PARTIAL_DELIVERY_POINT	19 /* Set/Get partial delivery point */
-- 
cgit v1.1


From 2ce802f62ba32a7d95748ac92bf351f76affb6ff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 20 Jan 2011 12:06:35 +0100
Subject: lockdep: Move early boot local IRQ enable/disable status to
 init/main.c

During early boot, local IRQ is disabled until IRQ subsystem is
properly initialized.  During this time, no one should enable
local IRQ and some operations which usually are not allowed with
IRQ disabled, e.g. operations which might sleep or require
communications with other processors, are allowed.

lockdep tracked this with early_boot_irqs_off/on() callbacks.
As other subsystems need this information too, move it to
init/main.c and make it generally available.  While at it,
toggle the boolean to early_boot_irqs_disabled instead of
enabled so that it can be initialized with %false and %true
indicates the exceptional condition.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <20110120110635.GB6036@htj.dyndns.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h  | 2 ++
 include/linux/lockdep.h | 8 --------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5a9d905..d07d805 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -243,6 +243,8 @@ extern int test_taint(unsigned flag);
 extern unsigned long get_taint(void);
 extern int root_mountflags;
 
+extern bool early_boot_irqs_disabled;
+
 /* Values used for system_state */
 extern enum system_states {
 	SYSTEM_BOOTING,
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 71c09b26..f638fd7 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -436,16 +436,8 @@ do {								\
 #endif /* CONFIG_LOCKDEP */
 
 #ifdef CONFIG_TRACE_IRQFLAGS
-extern void early_boot_irqs_off(void);
-extern void early_boot_irqs_on(void);
 extern void print_irqtrace_events(struct task_struct *curr);
 #else
-static inline void early_boot_irqs_off(void)
-{
-}
-static inline void early_boot_irqs_on(void)
-{
-}
 static inline void print_irqtrace_events(struct task_struct *curr)
 {
 }
-- 
cgit v1.1


From ca3e021417eed30ec2b64ce88eb0acf64aa9bc29 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 20 Jan 2011 14:44:24 -0800
Subject: memcg: fix USED bit handling at uncharge in THP

Now, under THP:

at charge:
  - PageCgroupUsed bit is set to all page_cgroup on a hugepage.
    ....set to 512 pages.
at uncharge
  - PageCgroupUsed bit is unset on the head page.

So, some pages will remain with "Used" bit.

This patch fixes that Used bit is set only to the head page.
Used bits for tail pages will be set at splitting if necessary.

This patch adds this lock order:
   compound_lock() -> page_cgroup_move_lock().

[akpm@linux-foundation.org: fix warning]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6a576f9..f512e18 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -146,6 +146,10 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask);
 u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
+#endif
+
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
@@ -335,6 +339,11 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *mem)
 	return 0;
 }
 
+static inline void mem_cgroup_split_huge_fixup(struct page *head,
+						struct page *tail)
+{
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
-- 
cgit v1.1


From 2d6d9fd3a54a28c6f67f26eb6c74803307a1b11e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 19 Jan 2011 22:27:14 +0100
Subject: ACPI: Introduce acpi_os_ioremap()

Commit ca9b600be38c ("ACPI / PM: Make suspend_nvs_save() use
acpi_os_map_memory()") attempted to prevent the code in osl.c and nvs.c
from using different ioremap() variants by making the latter use
acpi_os_map_memory() for mapping the NVS pages.  However, that also
requires acpi_os_unmap_memory() to be used for unmapping them, which
causes synchronize_rcu() to be executed many times in a row
unnecessarily and introduces substantial delays during resume on some
systems.

Instead of using acpi_os_map_memory() for mapping the NVS pages in nvs.c
introduce acpi_os_ioremap() calling ioremap_cache() and make the code in
both osl.c and nvs.c use it.

Reported-by: Jeff Chua <jeff.chua.linux@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acpi.h    |  3 ---
 include/linux/acpi_io.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/acpi_io.h

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index eb176bb..a2e910e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -306,9 +306,6 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
 
-int acpi_os_map_generic_address(struct acpi_generic_address *addr);
-void acpi_os_unmap_generic_address(struct acpi_generic_address *addr);
-
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
diff --git a/include/linux/acpi_io.h b/include/linux/acpi_io.h
new file mode 100644
index 0000000..7180013
--- /dev/null
+++ b/include/linux/acpi_io.h
@@ -0,0 +1,16 @@
+#ifndef _ACPI_IO_H_
+#define _ACPI_IO_H_
+
+#include <linux/io.h>
+#include <acpi/acpi.h>
+
+static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
+					    acpi_size size)
+{
+       return ioremap_cache(phys, size);
+}
+
+int acpi_os_map_generic_address(struct acpi_generic_address *addr);
+void acpi_os_unmap_generic_address(struct acpi_generic_address *addr);
+
+#endif
-- 
cgit v1.1


From 9190b3b3208d052d98cb601fcc192f3f71a5658b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 20 Jan 2011 23:31:33 -0800
Subject: net_sched: accurate bytes/packets stats/rates

In commit 44b8288308ac9d (net_sched: pfifo_head_drop problem), we fixed
a problem with pfifo_head drops that incorrectly decreased
sch->bstats.bytes and sch->bstats.packets

Several qdiscs (CHOKe, SFQ, pfifo_head, ...) are able to drop a
previously enqueued packet, and bstats cannot be changed, so
bstats/rates are not accurate (over estimated)

This patch changes the qdisc_bstats updates to be done at dequeue() time
instead of enqueue() time. bstats counters no longer account for dropped
frames, and rates are more correct, since enqueue() bursts dont have
effect on dequeue() rate.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e9eee99..160a407 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -445,7 +445,6 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 {
 	__skb_queue_tail(list, skb);
 	sch->qstats.backlog += qdisc_pkt_len(skb);
-	qdisc_bstats_update(sch, skb);
 
 	return NET_XMIT_SUCCESS;
 }
@@ -460,8 +459,10 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,
 {
 	struct sk_buff *skb = __skb_dequeue(list);
 
-	if (likely(skb != NULL))
+	if (likely(skb != NULL)) {
 		sch->qstats.backlog -= qdisc_pkt_len(skb);
+		qdisc_bstats_update(sch, skb);
+	}
 
 	return skb;
 }
@@ -474,10 +475,11 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
 static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
 					      struct sk_buff_head *list)
 {
-	struct sk_buff *skb = __qdisc_dequeue_head(sch, list);
+	struct sk_buff *skb = __skb_dequeue(list);
 
 	if (likely(skb != NULL)) {
 		unsigned int len = qdisc_pkt_len(skb);
+		sch->qstats.backlog -= len;
 		kfree_skb(skb);
 		return len;
 	}
-- 
cgit v1.1


From 1c77ff22f539ceaa64ea43d6a26d867e84602cb7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 19 Jan 2011 19:41:35 +0100
Subject: genirq: Remove __do_IRQ

All architectures are finally converted. Remove the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: Michal Simek <monstr@monstr.eu>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Jeff Dike <jdike@addtoit.com>
---
 include/linux/irqdesc.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 6a64c6f..c1a95b7 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -101,13 +101,6 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 #define get_irq_desc_msi(desc)		((desc)->irq_data.msi_desc)
 
 /*
- * Monolithic do_IRQ implementation.
- */
-#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
-extern unsigned int __do_IRQ(unsigned int irq);
-#endif
-
-/*
  * Architectures call this to let the generic IRQ layer
  * handle an interrupt. If the descriptor is attached to an
  * irqchip-style controller then we call the ->handle_irq() handler,
@@ -115,14 +108,7 @@ extern unsigned int __do_IRQ(unsigned int irq);
  */
 static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
-#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 	desc->handle_irq(irq, desc);
-#else
-	if (likely(desc->handle_irq))
-		desc->handle_irq(irq, desc);
-	else
-		__do_IRQ(irq);
-#endif
 }
 
 static inline void generic_handle_irq(unsigned int irq)
-- 
cgit v1.1


From 1daeddd5962acad1bea55e524fc0fadf32654a21 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 13 Jan 2011 09:30:49 -0800
Subject: rtc: Cleanup removed UIE emulation declaration

rtc_dev_update_irq_enable_emul was removed in commit
042620a018afcfba1d678062b62e463b9e43a68d (UIE emulation is
now handled via hrtimer), but the declaration was missed.
This patch cleans it up.

Signed-off-by: John Stultz <john.stultz@linaro.org>
LKML-Reference: <1294939849-20608-1-git-send-email-john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rtc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 3c995b4..ea76069 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -235,8 +235,6 @@ extern int rtc_irq_set_freq(struct rtc_device *rtc,
 				struct rtc_task *task, int freq);
 extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled);
 extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
-extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
-						unsigned int enabled);
 
 void rtc_aie_update_irq(void *private);
 void rtc_uie_update_irq(void *private);
-- 
cgit v1.1


From aa0be0f4659f91f31e45adc422b1788cb36ffddc Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 20 Jan 2011 15:26:12 -0800
Subject: RTC: Propagate error handling via rtc_timer_enqueue properly

In cases where RTC hardware does not support alarms, the virtualized
RTC interfaces did not have a way to propagate the error up to userland.

This patch extends rtc_timer_enqueue so it catches errors from the hardware
and returns them upwards to the virtualized interfaces. To simplify error
handling, it also internalizes the management of the timer->enabled bit
into rtc_timer_enqueue and rtc_timer_remove.

Also makes rtc_timer_enqueue and rtc_timer_remove static.

Reported-by: David Daney <ddaney@caviumnetworks.com>
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Diagnosed-by: David Daney <ddaney@caviumnetworks.com>
Tested-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
LKML-Reference: <1295565973-14358-1-git-send-email-john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rtc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index ea76069..a0b639f 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -244,8 +244,6 @@ int rtc_register(rtc_task_t *task);
 int rtc_unregister(rtc_task_t *task);
 int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
 
-void rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
-void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
 void rtc_timer_init(struct rtc_timer *timer, void (*f)(void* p), void* data);
 int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer* timer,
 			ktime_t expires, ktime_t period);
-- 
cgit v1.1


From 3dece370ecc7c6152b3fdd21c6de28179f6e643d Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Fri, 21 Jan 2011 08:49:56 +0100
Subject: mm: System without MMU do not need pte_mkwrite

The patch "thp: export maybe_mkwrite" (commit 14fd403f2146) breaks
systems without MMU.

Error log:

    CC      arch/microblaze/mm/init.o
  In file included from include/linux/mman.h:14,
                   from arch/microblaze/mm/consistent.c:24:
  include/linux/mm.h: In function 'maybe_mkwrite':
  include/linux/mm.h:482: error: implicit declaration of function 'pte_mkwrite'
  include/linux/mm.h:482: error: incompatible types in assignment

Signed-off-by: Michal Simek <monstr@monstr.eu>
CC: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 956a355..f6385fc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -470,6 +470,7 @@ static inline void set_compound_order(struct page *page, unsigned long order)
 	page[1].lru.prev = (void *)order;
 }
 
+#ifdef CONFIG_MMU
 /*
  * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
  * servicing faults for write access.  In the normal case, do always want
@@ -482,6 +483,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 		pte = pte_mkwrite(pte);
 	return pte;
 }
+#endif
 
 /*
  * Multiple processes may "see" the same page. E.g. for untouched
-- 
cgit v1.1


From d14fc1a74e846d7851f24fc9519fe87dc12a1231 Mon Sep 17 00:00:00 2001
From: Libor Pechacek <lpechacek@suse.cz>
Date: Fri, 14 Jan 2011 14:30:21 +0100
Subject: USB: serial: handle Data Carrier Detect changes

Alan's commit 335f8514f200e63d689113d29cb7253a5c282967 introduced
.carrier_raised function in several drivers.  That also means
tty_port_block_til_ready can now suspend the process trying to open the serial
port when Carrier Detect is low and put it into tty_port.open_wait queue.  We
need to wake up the process when Carrier Detect goes high and trigger TTY
hangup when CD goes low.

Some of the devices do not report modem status line changes, or at least we
don't understand the status message, so for those we remove .carrier_raised
again.

Signed-off-by: Libor Pechacek <lpechacek@suse.cz>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/serial.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 16d682f..c904913 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -347,6 +347,9 @@ extern int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
 extern int usb_serial_handle_sysrq_char(struct usb_serial_port *port,
 					unsigned int ch);
 extern int usb_serial_handle_break(struct usb_serial_port *port);
+extern void usb_serial_handle_dcd_change(struct usb_serial_port *usb_port,
+					 struct tty_struct *tty,
+					 unsigned int status);
 
 
 extern int usb_serial_bus_register(struct usb_serial_driver *device);
-- 
cgit v1.1


From e94965ed5beb23c6fabf7ed31f625e66d7ff28de Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@vmware.com>
Date: Wed, 15 Dec 2010 14:00:19 -0800
Subject: module: show version information for built-in modules in sysfs

Currently only drivers that are built as modules have their versions
shown in /sys/module/<module_name>/version, but this information might
also be useful for built-in drivers as well. This especially important
for drivers that do not define any parameters - such drivers, if
built-in, are completely invisible from userspace.

This patch changes MODULE_VERSION() macro so that in case when we are
compiling built-in module, version information is stored in a separate
section. Kernel then uses this data to create 'version' sysfs attribute
in the same fashion it creates attributes for module parameters.

Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/asm-generic/vmlinux.lds.h |  7 +++++++
 include/linux/module.h            | 27 +++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 6864933..6ebb810 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -364,6 +364,13 @@
 		VMLINUX_SYMBOL(__start___param) = .;			\
 		*(__param)						\
 		VMLINUX_SYMBOL(__stop___param) = .;			\
+	}								\
+									\
+	/* Built-in module versions. */					\
+	__modver : AT(ADDR(__modver) - LOAD_OFFSET) {			\
+		VMLINUX_SYMBOL(__start___modver) = .;			\
+		*(__modver)						\
+		VMLINUX_SYMBOL(__stop___modver) = .;			\
 		. = ALIGN((align));					\
 		VMLINUX_SYMBOL(__end_rodata) = .;			\
 	}								\
diff --git a/include/linux/module.h b/include/linux/module.h
index 8b17fd8..50efcd3 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -58,6 +58,12 @@ struct module_attribute {
 	void (*free)(struct module *);
 };
 
+struct module_version_attribute {
+	struct module_attribute mattr;
+	const char *module_name;
+	const char *version;
+};
+
 struct module_kobject
 {
 	struct kobject kobj;
@@ -161,7 +167,28 @@ extern struct module __this_module;
   Using this automatically adds a checksum of the .c files and the
   local headers in "srcversion".
 */
+
+#ifdef MODULE
 #define MODULE_VERSION(_version) MODULE_INFO(version, _version)
+#else
+#define MODULE_VERSION(_version)					\
+	extern ssize_t __modver_version_show(struct module_attribute *,	\
+					     struct module *, char *);	\
+	static struct module_version_attribute __modver_version_attr	\
+	__used								\
+    __attribute__ ((__section__ ("__modver"),aligned(sizeof(void *)))) \
+	= {								\
+		.mattr	= {						\
+			.attr	= {					\
+				.name	= "version",			\
+				.mode	= S_IRUGO,			\
+			},						\
+			.show	= __modver_version_show,		\
+		},							\
+		.module_name	= KBUILD_MODNAME,			\
+		.version	= _version,				\
+	}
+#endif
 
 /* Optional firmware file (or files) needed by the module
  * format is simply firmware file name.  Multiple firmware
-- 
cgit v1.1


From 3b90a5b292321b2acac3921f77046ae195aef53f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 24 Jan 2011 14:32:51 -0600
Subject: module: fix linker error for MODULE_VERSION when !MODULE and
 CONFIG_SYSFS=n

lib/built-in.o:(__modver+0x8): undefined reference to `__modver_version_show'
lib/built-in.o:(__modver+0x2c): undefined reference to `__modver_version_show'

Simplest to just not emit anything: if they've disabled SYSFS they probably
want the smallest kernel possible.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 50efcd3..e7c6385 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -168,7 +168,7 @@ extern struct module __this_module;
   local headers in "srcversion".
 */
 
-#ifdef MODULE
+#if defined(MODULE) || !defined(CONFIG_SYSFS)
 #define MODULE_VERSION(_version) MODULE_INFO(version, _version)
 #else
 #define MODULE_VERSION(_version)					\
-- 
cgit v1.1


From b75be4204e7871869b2c268c00783703197aaa7d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 5 Jan 2011 13:27:04 +0100
Subject: param: add null statement to compiled-in module params

Add an unused struct declaration statement requiring a
terminating semicolon to the compile-in case to provoke an
error if __MODULE_INFO() is used without the terminating
semicolon. Previously MODULE_ALIAS("foo") (no semicolon)
compiled fine if MODULE was not selected.

Cc: Dan Carpenter <error27@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 112adf8..07b4195 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -16,15 +16,17 @@
 /* Chosen so that structs with an unsigned long line up. */
 #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
 
-#ifdef MODULE
 #define ___module_cat(a,b) __mod_ ## a ## b
 #define __module_cat(a,b) ___module_cat(a,b)
+#ifdef MODULE
 #define __MODULE_INFO(tag, name, info)					  \
 static const char __module_cat(name,__LINE__)[]				  \
   __used __attribute__((section(".modinfo"), unused, aligned(1)))	  \
   = __stringify(tag) "=" info
 #else  /* !MODULE */
-#define __MODULE_INFO(tag, name, info)
+/* This struct is here for syntactic coherency, it is not used */
+#define __MODULE_INFO(tag, name, info)					  \
+  struct __module_cat(name,__LINE__) {}
 #endif
 #define __MODULE_PARM_TYPE(name, _type)					  \
   __MODULE_INFO(parmtype, name##type, #name ":" _type)
-- 
cgit v1.1


From 7ef88ad561457c0346355dfd1f53e503ddfde719 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 24 Jan 2011 14:45:10 -0600
Subject: BUILD_BUG_ON: make it handle more cases

BUILD_BUG_ON used to use the optimizer to do code elimination or fail
at link time; it was changed to first the size of a negative array (a
nicer compile time error), then (in
8c87df457cb58fe75b9b893007917cf8095660a0) to a bitfield.

This forced us to change some non-constant cases to MAYBE_BUILD_BUG_ON();
as Jan points out in that commit, it didn't work as intended anyway.

bitfields: needs a literal constant at parse time, and can't be put under
	"if (__builtin_constant_p(x))" for example.
negative array: can handle anything, but if the compiler can't tell it's
	a constant, silently has no effect.
link time: breaks link if the compiler can't determine the value, but the
	linker output is not usually as informative as a compiler error.

If we use the negative-array-size method *and* the link time trick,
we get the ability to use BUILD_BUG_ON() under __builtin_constant_p()
branches, and maximal ability for the compiler to detect errors at
build time.

We also document it thoroughly.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jan Beulich <JBeulich@novell.com>
Acked-by: Hollis Blanchard <hollisb@us.ibm.com>
---
 include/linux/kernel.h | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d07d805..864712f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -575,12 +575,6 @@ struct sysinfo {
 	char _f[20-2*sizeof(long)-sizeof(int)];	/* Padding: libc5 uses this.. */
 };
 
-/* Force a compilation error if condition is true */
-#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
-
-/* Force a compilation error if condition is constant and true */
-#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
-
 /* Force a compilation error if a constant expression is not a power of 2 */
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
 	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
@@ -592,6 +586,33 @@ struct sysinfo {
 #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 
+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @cond: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ *
+ * The implementation uses gcc's reluctance to create a negative array, but
+ * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments
+ * to inline functions).  So as a fallback we use the optimizer; if it can't
+ * prove the condition is false, it will cause a link error on the undefined
+ * "__build_bug_on_failed".  This error message can be harder to track down
+ * though, hence the two different methods.
+ */
+#ifndef __OPTIMIZE__
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+extern int __build_bug_on_failed;
+#define BUILD_BUG_ON(condition)					\
+	do {							\
+		((void)sizeof(char[1 - 2*!!(condition)]));	\
+		if (condition) __build_bug_on_failed = 1;	\
+	} while(0)
+#endif
+#define MAYBE_BUILD_BUG_ON(condition) BUILD_BUG_ON(condition)
+
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
 
-- 
cgit v1.1


From 1765e3a4933ea0870fabd755feffc5473c4363ce Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 24 Jan 2011 14:45:10 -0600
Subject: Remove MAYBE_BUILD_BUG_ON

Now BUILD_BUG_ON() can handle optimizable constants, we don't need
MAYBE_BUILD_BUG_ON any more.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/gfp.h           | 2 +-
 include/linux/kernel.h        | 1 -
 include/linux/kmemcheck.h     | 2 +-
 include/linux/virtio_config.h | 5 ++++-
 4 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a3b148a..0b84c61 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -249,7 +249,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 					 ((1 << ZONES_SHIFT) - 1);
 
 	if (__builtin_constant_p(bit))
-		MAYBE_BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 	else {
 #ifdef CONFIG_DEBUG_VM
 		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 864712f..e2f4d6a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -611,7 +611,6 @@ extern int __build_bug_on_failed;
 		if (condition) __build_bug_on_failed = 1;	\
 	} while(0)
 #endif
-#define MAYBE_BUILD_BUG_ON(condition) BUILD_BUG_ON(condition)
 
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 08d7dc4..39f8453 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -76,7 +76,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
 									\
 		_n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
-		MAYBE_BUILD_BUG_ON(_n < 0);				\
+		BUILD_BUG_ON(_n < 0);					\
 									\
 		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
 	} while (0)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 0093dd7..800617b 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -109,7 +109,10 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 				      unsigned int fbit)
 {
 	/* Did you forget to fix assumptions on max features? */
-	MAYBE_BUILD_BUG_ON(fbit >= 32);
+	if (__builtin_constant_p(fbit))
+		BUILD_BUG_ON(fbit >= 32);
+	else
+		BUG_ON(fbit >= 32);
 
 	if (fbit < VIRTIO_TRANSPORT_F_START)
 		virtio_check_driver_offered_feature(vdev, fbit);
-- 
cgit v1.1


From 8c6a98b22b750c9eb52653ba643faa17db8d3881 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Mon, 24 Jan 2011 09:31:38 -0800
Subject: Input: sysrq - ensure sysrq_enabled and __sysrq_enabled are
 consistent

Currently sysrq_enabled and __sysrq_enabled are initialised separately
and inconsistently, leading to sysrq being actually enabled by reported
as not enabled in sysfs.  The first change to the sysfs configurable
synchronises these two:

    static int __read_mostly sysrq_enabled = 1;
    static int __sysrq_enabled;

Add a common define to carry the default for these preventing them becoming
out of sync again.  Default this to 1 to mirror previous behaviour.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/sysrq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 387fa7d..7faf933 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -17,6 +17,9 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 
+/* Enable/disable SYSRQ support by default (0==no, 1==yes). */
+#define SYSRQ_DEFAULT_ENABLE	1
+
 /* Possible values of bitmask for enabling sysrq functions */
 /* 0x0001 is reserved for enable everything */
 #define SYSRQ_ENABLE_LOG	0x0002
-- 
cgit v1.1


From ac61c46f4f7665ab4548e90430c37b2529e16cff Mon Sep 17 00:00:00 2001
From: David Dillow <dillowda@ornl.gov>
Date: Sun, 16 Jan 2011 15:12:39 -0500
Subject: [SCSI] fix incorrect value of SCSI_MAX_SG_CHAIN_SEGMENTS due to
 include file ordering

If the compiled object doesn't include linux/scatterlist.h before
scsi/scsi.h, it will get an incorrect definition of
SCSI_MAX_SG_CHAIN_SEGMENTS.

Signed-off-by: David Dillow <dillowda@ornl.gov>
Cc: stable@kernel.org
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/scsi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 648d233..b76d400 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -9,6 +9,7 @@
 #define _SCSI_SCSI_H
 
 #include <linux/types.h>
+#include <linux/scatterlist.h>
 
 struct scsi_cmnd;
 
-- 
cgit v1.1


From 58bbf018a70c562437eeae121a5d021ba7fe56a5 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Mon, 24 Jan 2011 17:14:26 -0500
Subject: drm/radeon/kms: add new radeon_info ioctl query for clock crystal
 freq

Needed for timer queries in the 3D driver.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@gmail.com>
---
 include/drm/radeon_drm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index e95a86b..e5c607a 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -907,6 +907,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_TILING_CONFIG	0x06
 #define RADEON_INFO_WANT_HYPERZ		0x07
 #define RADEON_INFO_WANT_CMASK		0x08 /* get access to CMASK on r300 */
+#define RADEON_INFO_CLOCK_CRYSTAL_FREQ	0x09 /* clock crystal frequency */
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
cgit v1.1


From eb03355660b44cf6b1ed2f895085b9de8f74efbc Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 24 Jan 2011 15:11:08 +0000
Subject: drm: Add an interface to reset the device

Iterate over the attached CRTCs, encoders and connectors and call the
supplied reset vfunc in order to reset any cached state back to unknown.
Useful after an invalidation event such as a GPU reset or resuming.

Tested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm/drm_crtc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index acd7fad..801be59 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -275,6 +275,7 @@ struct drm_pending_vblank_event;
 
 /**
  * drm_crtc_funcs - control CRTCs for a given device
+ * @reset: reset CRTC after state has been invalidate (e.g. resume)
  * @dpms: control display power levels
  * @save: save CRTC state
  * @resore: restore CRTC state
@@ -302,6 +303,8 @@ struct drm_crtc_funcs {
 	void (*save)(struct drm_crtc *crtc); /* suspend? */
 	/* Restore CRTC state */
 	void (*restore)(struct drm_crtc *crtc); /* resume? */
+	/* Reset CRTC state */
+	void (*reset)(struct drm_crtc *crtc);
 
 	/* cursor controls */
 	int (*cursor_set)(struct drm_crtc *crtc, struct drm_file *file_priv,
@@ -379,6 +382,7 @@ struct drm_crtc {
  * @dpms: set power state (see drm_crtc_funcs above)
  * @save: save connector state
  * @restore: restore connector state
+ * @reset: reset connector after state has been invalidate (e.g. resume)
  * @mode_valid: is this mode valid on the given connector?
  * @mode_fixup: try to fixup proposed mode for this connector
  * @mode_set: set this mode
@@ -396,6 +400,7 @@ struct drm_connector_funcs {
 	void (*dpms)(struct drm_connector *connector, int mode);
 	void (*save)(struct drm_connector *connector);
 	void (*restore)(struct drm_connector *connector);
+	void (*reset)(struct drm_connector *connector);
 
 	/* Check to see if anything is attached to the connector.
 	 * @force is set to false whilst polling, true when checking the
@@ -413,6 +418,7 @@ struct drm_connector_funcs {
 };
 
 struct drm_encoder_funcs {
+	void (*reset)(struct drm_encoder *encoder);
 	void (*destroy)(struct drm_encoder *encoder);
 };
 
@@ -656,6 +662,7 @@ extern struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
 						   struct drm_display_mode *mode);
 extern void drm_mode_debug_printmodeline(struct drm_display_mode *mode);
 extern void drm_mode_config_init(struct drm_device *dev);
+extern void drm_mode_config_reset(struct drm_device *dev);
 extern void drm_mode_config_cleanup(struct drm_device *dev);
 extern void drm_mode_set_name(struct drm_display_mode *mode);
 extern bool drm_mode_equal(struct drm_display_mode *mode1, struct drm_display_mode *mode2);
-- 
cgit v1.1


From 731f3f482ad3b2c58a1af2d0a9a634a82803706a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 21 Jan 2011 03:05:28 +0000
Subject: NFS: nfsacl_{encode,decode} should return signed integer

Clean up.

The nfsacl_encode() and nfsacl_decode() functions return negative
errno values, and each call site verifies that the returned value
is not negative.  Change the synopsis of both of these functions
to reflect this usage.

Document the synopsis and return values.

Reported-by: Trond Myklebust <trond.myklebust@netapp.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfsacl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h
index f321b57..fabcb1e 100644
--- a/include/linux/nfsacl.h
+++ b/include/linux/nfsacl.h
@@ -51,10 +51,10 @@ nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default)
 	return w;
 }
 
-extern unsigned int
+extern int
 nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
 	      struct posix_acl *acl, int encode_entries, int typeflag);
-extern unsigned int
+extern int
 nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
 	      struct posix_acl **pacl);
 
-- 
cgit v1.1


From f61f6da0d53842e849bab7f69e1431bd3de1136d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 21 Jan 2011 03:05:38 +0000
Subject: NFS: Prevent memory allocation failure in nfsacl_encode()

nfsacl_encode() allocates memory in certain cases.  This of course
is not guaranteed to work.

Since commit 9f06c719 "SUNRPC: New xdr_streams XDR encoder API", the
kernel's XDR encoders can't return a result indicating possibly a
failure, so a memory allocation failure in nfsacl_encode() has become
fatal (ie, the XDR code Oopses) in some cases.

However, the allocated memory is a tiny fixed amount, on the order
of 40-50 bytes.  We can easily use a stack-allocated buffer for
this, with only a wee bit of nose-holding.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/posix_acl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index d68283a..54211c1 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -71,6 +71,7 @@ posix_acl_release(struct posix_acl *acl)
 
 /* posix_acl.c */
 
+extern void posix_acl_init(struct posix_acl *, int);
 extern struct posix_acl *posix_acl_alloc(int, gfp_t);
 extern struct posix_acl *posix_acl_clone(const struct posix_acl *, gfp_t);
 extern int posix_acl_valid(const struct posix_acl *);
-- 
cgit v1.1


From 778be232a207e79088ba70d832ac25dfea6fbf1a Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Tue, 25 Jan 2011 15:38:01 +0000
Subject: NFS do not find client in NFSv4 pg_authenticate

The information required to find the nfs_client cooresponding to the incoming
back channel request is contained in the NFS layer. Perform minimal checking
in the RPC layer pg_authenticate method, and push more detailed checking into
the NFS layer where the nfs_client can be found.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/bc_xprt.h  | 13 -------------
 include/linux/sunrpc/svc_xprt.h |  1 -
 2 files changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index c50b458..0828842 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -47,14 +47,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 		return 1;
 	return 0;
 }
-static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
-{
-	if (svc_is_backchannel(rqstp))
-		return (struct nfs4_sessionid *)
-			rqstp->rq_server->sv_bc_xprt->xpt_bc_sid;
-	return NULL;
-}
-
 #else /* CONFIG_NFS_V4_1 */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 					 unsigned int min_reqs)
@@ -67,11 +59,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 	return 0;
 }
 
-static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
-{
-	return NULL;
-}
-
 static inline void xprt_free_bc_request(struct rpc_rqst *req)
 {
 }
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 059877b..7ad9751 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -77,7 +77,6 @@ struct svc_xprt {
 	size_t			xpt_remotelen;	/* length of address */
 	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 	struct list_head	xpt_users;	/* callbacks on free */
-	void			*xpt_bc_sid;	/* back channel session ID */
 
 	struct net		*xpt_net;
 	struct rpc_xprt		*xpt_bc_xprt;	/* NFSv4.1 backchannel */
-- 
cgit v1.1


From ac751efa6a0d70f2c9daef5c7e3a92270f5c2dff Mon Sep 17 00:00:00 2001
From: Torben Hohn <torbenh@gmx.de>
Date: Tue, 25 Jan 2011 15:07:35 -0800
Subject: console: rename acquire/release_console_sem() to
 console_lock/unlock()

The -rt patches change the console_semaphore to console_mutex.  As a
result, a quite large chunk of the patches changes all
acquire/release_console_sem() to acquire/release_console_mutex()

This commit makes things use more neutral function names which dont make
implications about the underlying lock.

The only real change is the return value of console_trylock which is
inverted from try_acquire_console_sem()

This patch also paves the way to switching console_sem from a semaphore to
a mutex.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: make console_trylock return 1 on success, per Geert]
Signed-off-by: Torben Hohn <torbenh@gmx.de>
Cc: Thomas Gleixner <tglx@tglx.de>
Cc: Greg KH <gregkh@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/console.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index 9774fe6..7453cfd 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -139,9 +139,9 @@ extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_n
 extern void register_console(struct console *);
 extern int unregister_console(struct console *);
 extern struct console *console_drivers;
-extern void acquire_console_sem(void);
-extern int try_acquire_console_sem(void);
-extern void release_console_sem(void);
+extern void console_lock(void);
+extern int console_trylock(void);
+extern void console_unlock(void);
 extern void console_conditional_schedule(void);
 extern void console_unblank(void);
 extern struct tty_driver *console_device(int *);
-- 
cgit v1.1


From 52fe7c9cc1637110ba4e0e6fe5d07cc0786d62de Mon Sep 17 00:00:00 2001
From: "sjur.brandeland@stericsson.com" <sjur.brandeland@stericsson.com>
Date: Sat, 29 Jan 2011 13:10:37 +0000
Subject: caif: bugfix - add caif headers for userspace usage.

Add caif_socket.h and if_caif.h to the kernel header files
exported for use by userspace.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild      | 1 +
 include/linux/caif/Kbuild | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 include/linux/caif/Kbuild

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 2296d8b..b0ada6f 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -1,5 +1,6 @@
 header-y += byteorder/
 header-y += can/
+header-y += caif/
 header-y += dvb/
 header-y += hdlc/
 header-y += isdn/
diff --git a/include/linux/caif/Kbuild b/include/linux/caif/Kbuild
new file mode 100644
index 0000000..a9cf250
--- /dev/null
+++ b/include/linux/caif/Kbuild
@@ -0,0 +1,2 @@
+header-y += caif_socket.h
+header-y += if_caif.h
-- 
cgit v1.1


From 709b46e8d90badda1898caea50483c12af178e96 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 29 Jan 2011 16:15:56 +0000
Subject: net: Add compat ioctl support for the ipv4 multicast ioctl
 SIOCGETSGCNT

SIOCGETSGCNT is not a unique ioctl value as it it maps tio SIOCPROTOPRIVATE +1,
which unfortunately means the existing infrastructure for compat networking
ioctls is insufficient.  A trivial compact ioctl implementation would conflict
with:

SIOCAX25ADDUID
SIOCAIPXPRISLT
SIOCGETSGCNT_IN6
SIOCGETSGCNT
SIOCRSSCAUSE
SIOCX25SSUBSCRIP
SIOCX25SDTEFACILITIES

To make this work I have updated the compat_ioctl decode path to mirror the
the normal ioctl decode path.  I have added an ipv4 inet_compat_ioctl function
so that I can have ipv4 specific compat ioctls.   I have added a compat_ioctl
function into struct proto so I can break out ioctls by which kind of ip socket
I am using.  I have added a compat_raw_ioctl function because SIOCGETSGCNT only
works on raw sockets.  I have added a ipmr_compat_ioctl that mirrors the normal
ipmr_ioctl.

This was necessary because unfortunately the struct layout for the SIOCGETSGCNT
has unsigned longs in it so changes between 32bit and 64bit kernels.

This change was sufficient to run a 32bit ip multicast routing daemon on a
64bit kernel.

Reported-by: Bill Fenner <fenner@aristanetworks.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 1 +
 include/net/sock.h     | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 0fa7a3a..b21d567 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -150,6 +150,7 @@ static inline int ip_mroute_opt(int opt)
 extern int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int);
 extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
+extern int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
 extern int ip_mr_init(void);
 #else
 static inline
diff --git a/include/net/sock.h b/include/net/sock.h
index d884d26..bc1cf7d8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -753,6 +753,8 @@ struct proto {
 					int level,
 					int optname, char __user *optval,
 					int __user *option);
+	int			(*compat_ioctl)(struct sock *sk,
+					unsigned int cmd, unsigned long arg);
 #endif
 	int			(*sendmsg)(struct kiocb *iocb, struct sock *sk,
 					   struct msghdr *msg, size_t len);
-- 
cgit v1.1


From 78c6e170badd22c86a5b50a7eb038a02024b8f03 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 31 Jan 2011 10:48:04 +0000
Subject: drm/i915: Suppress spurious vblank interrupts

Hugh Dickins found that characters in xterm were going missing and oft
delayed. Being the curious type, he managed to associate this with the
new high-precision vblank patches; disabling these he found, restored
the orderliness of his characters.

The oddness begins when one realised that Hugh was not using vblanks at
all on his system (fvwm and some xterms). Instead, all he had to go on
were warning of a pipe underrun, curiously enough at around 60Hz. He
poked and found that in addition to the underrun warning, the hardware
was flagging the start of a new frame, a vblank, which in turn was
kicking off the pending vblank processing code.

There is little we can do for the underruns on Hugh's machine, a
Crestline [965GM], which must have its FIFO watermarks set to 8.
However, we do not need to process the vblank if we know that they are
disabled...

Reported-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm/drmP.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index a4694c6..fe29aad 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1367,7 +1367,7 @@ extern int drm_vblank_wait(struct drm_device *dev, unsigned int *vbl_seq);
 extern u32 drm_vblank_count(struct drm_device *dev, int crtc);
 extern u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc,
 				     struct timeval *vblanktime);
-extern void drm_handle_vblank(struct drm_device *dev, int crtc);
+extern bool drm_handle_vblank(struct drm_device *dev, int crtc);
 extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
 extern void drm_vblank_off(struct drm_device *dev, int crtc);
-- 
cgit v1.1


From ffbbf2da9e578dc7b7ae4f945412c4b74f54b20e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 31 Jan 2011 09:29:14 -0800
Subject: kernel.h: fix kernel-doc warning

Fix kernel-doc warning in kernel.h from commit 7ef88ad56145
("BUILD_BUG_ON: make it handle more cases"):

  Warning(include/linux/kernel.h:605): No description found for parameter 'condition'
  Warning(include/linux/kernel.h:605): Excess function parameter 'cond' description in 'BUILD_BUG_ON'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e2f4d6a..2fe6e84 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -588,7 +588,7 @@ struct sysinfo {
 
 /**
  * BUILD_BUG_ON - break compile if a condition is true.
- * @cond: the condition which the compiler should know is false.
+ * @condition: the condition which the compiler should know is false.
  *
  * If you have some code which relies on certain constants being equal, or
  * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
-- 
cgit v1.1


From 3db7e93d3308fb882884b9f024235d6fbf542034 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 1 Feb 2011 16:06:30 +0100
Subject: netfilter: ecache: always set events bits, filter them later

For the following rule:

iptables -I PREROUTING -t raw -j CT --ctevents assured

The event delivered looks like the following:

 [UPDATE] tcp      6 src=192.168.0.2 dst=192.168.1.2 sport=37041 dport=80 src=192.168.1.2 dst=192.168.1.100 sport=80 dport=37041 [ASSURED]

Note that the TCP protocol state is not included. For that reason
the CT event filtering is not very useful for conntrackd.

To resolve this issue, instead of conditionally setting the CT events
bits based on the ctmask, we always set them and perform the filtering
in the late stage, just before the delivery.

Thus, the event delivered looks like the following:

 [UPDATE] tcp      6 432000 ESTABLISHED src=192.168.0.2 dst=192.168.1.2 sport=37041 dport=80 src=192.168.1.2 dst=192.168.1.100 sport=80 dport=37041 [ASSURED]

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_ecache.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 96ba5f7..349cefe 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -77,9 +77,6 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 	if (e == NULL)
 		return;
 
-	if (!(e->ctmask & (1 << event)))
-		return;
-
 	set_bit(event, &e->cache);
 }
 
-- 
cgit v1.1


From 63a507800c8aca5a1891d598ae13f829346e8e39 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Tue, 1 Feb 2011 19:06:46 -0500
Subject: drm/radeon: remove 0x4243 pci id

0x4243 is a PCI bridge, not a GPU.

Fixes:
https://bugs.freedesktop.org/show_bug.cgi?id=33815

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index fe29ae3..5ff1194 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -28,7 +28,6 @@
 	{0x1002, 0x4156, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV350}, \
 	{0x1002, 0x4237, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS200|RADEON_IS_IGP}, \
 	{0x1002, 0x4242, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R200}, \
-	{0x1002, 0x4243, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R200}, \
 	{0x1002, 0x4336, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS100|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x4337, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS200|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x4437, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS200|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \
-- 
cgit v1.1


From 19942822df65ee4a47c2e6d6d70cace1b7f01710 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 1 Feb 2011 15:52:43 -0800
Subject: memcg: prevent endless loop when charging huge pages to near-limit
 group

If reclaim after a failed charging was unsuccessful, the limits are
checked again, just in case they settled by means of other tasks.

This is all fine as long as every charge is of size PAGE_SIZE, because in
that case, being below the limit means having at least PAGE_SIZE bytes
available.

But with transparent huge pages, we may end up in an endless loop where
charging and reclaim fail, but we keep going because the limits are not
yet exceeded, although not allowing for a huge page.

Fix this up by explicitely checking for enough room, not just whether we
are within limits.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index fcb9884..a5930cb 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -182,6 +182,26 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt)
 	return ret;
 }
 
+/**
+ * res_counter_check_margin - check if the counter allows charging
+ * @cnt: the resource counter to check
+ * @bytes: the number of bytes to check the remaining space against
+ *
+ * Returns a boolean value on whether the counter can be charged
+ * @bytes or whether this would exceed the limit.
+ */
+static inline bool res_counter_check_margin(struct res_counter *cnt,
+					    unsigned long bytes)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	ret = cnt->limit - cnt->usage >= bytes;
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return ret;
+}
+
 static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt)
 {
 	bool ret;
-- 
cgit v1.1


From 1a44bc8c7cfe69756a116d38aef992d50fc1969d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 1 Feb 2011 15:52:46 -0800
Subject: vfs: sparse: remove a warning on OPEN_FMODE()

AND-ing FMODE_* constant with normal integer results in following
sparse warnings. Fix it.

 fs/open.c:662:21: warning: restricted fmode_t degrades to integer
 fs/anon_inodes.c:123:34: warning: restricted fmode_t degrades to integer

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 32b38cd..675678a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2555,9 +2555,11 @@ int proc_nr_inodes(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
 
+#define __FMODE_NONOTIFY	((__force int) FMODE_NONOTIFY)
+
 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
-					    (flag & FMODE_NONOTIFY)))
+					    (flag & __FMODE_NONOTIFY)))
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
-- 
cgit v1.1


From 3cd90ea42f2c15f928b70ed66f6d8ed0a8e7aadd Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 1 Feb 2011 15:52:46 -0800
Subject: vfs: sparse: add __FMODE_EXEC

FMODE_EXEC is a constant type of fmode_t but was used with normal integer
constants.  This results in following warnings from sparse.  Fix it using
new macro __FMODE_EXEC.

 fs/exec.c:116:58: warning: restricted fmode_t degrades to integer
 fs/exec.c:689:58: warning: restricted fmode_t degrades to integer
 fs/fcntl.c:777:9: warning: restricted fmode_t degrades to integer

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 675678a..bd32159 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2555,6 +2555,7 @@ int proc_nr_inodes(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
 
+#define __FMODE_EXEC		((__force int) FMODE_EXEC)
 #define __FMODE_NONOTIFY	((__force int) FMODE_NONOTIFY)
 
 #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
-- 
cgit v1.1


From e4a9ea5ee7c8812a7bf0c3fb725ceeaa3d4c2fcc Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 27 Jan 2011 09:15:30 -0500
Subject: tracing: Replace trace_event struct array with pointer array

Currently the trace_event structures are placed in the _ftrace_events
section, and at link time, the linker makes one large array of all
the trace_event structures. On boot up, this array is read (much like
the initcall sections) and the events are processed.

The problem is that there is no guarantee that gcc will place complex
structures nicely together in an array format. Two structures in the
same file may be placed awkwardly, because gcc has no clue that they
are suppose to be in an array.

A hack was used previous to force the alignment to 4, to pack the
structures together. But this caused alignment issues with other
architectures (sparc).

Instead of packing the structures into an array, the structures' addresses
are now put into the _ftrace_event section. As pointers are always the
natural alignment, gcc should always pack them tightly together
(otherwise initcall, extable, etc would also fail).

By having the pointers to the structures in the section, we can still
iterate the trace_events without causing unnecessary alignment problems
with other architectures, or depending on the current behaviour of
gcc that will likely change in the future just to tick us kernel developers
off a little more.

The _ftrace_event section is also moved into the .init.data section
as it is now only needed at boot up.

Suggested-by: David Miller <davem@davemloft.net>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/asm-generic/vmlinux.lds.h |  7 +++----
 include/linux/module.h            |  2 +-
 include/linux/syscalls.h          | 10 ++++++----
 include/trace/ftrace.h            | 24 +++++++++++++-----------
 4 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 6ebb810..f53708b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -124,7 +124,8 @@
 #endif
 
 #ifdef CONFIG_EVENT_TRACING
-#define FTRACE_EVENTS()	VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
+#define FTRACE_EVENTS()	. = ALIGN(8);					\
+			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
 			*(_ftrace_events)				\
 			VMLINUX_SYMBOL(__stop_ftrace_events) = .;
 #else
@@ -179,9 +180,6 @@
 	TRACE_PRINTKS()							\
 									\
 	STRUCT_ALIGN();							\
-	FTRACE_EVENTS()							\
-									\
-	STRUCT_ALIGN();							\
 	TRACE_SYSCALLS()
 
 /*
@@ -482,6 +480,7 @@
 	KERNEL_CTORS()							\
 	*(.init.rodata)							\
 	MCOUNT_REC()							\
+	FTRACE_EVENTS()							\
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)					\
diff --git a/include/linux/module.h b/include/linux/module.h
index e7c6385..7695a30 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -389,7 +389,7 @@ struct module
 	unsigned int num_trace_bprintk_fmt;
 #endif
 #ifdef CONFIG_EVENT_TRACING
-	struct ftrace_event_call *trace_events;
+	struct ftrace_event_call **trace_events;
 	unsigned int num_trace_events;
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 18cd068..45508fe 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -128,28 +128,30 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	static struct syscall_metadata					\
 	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
 	static struct ftrace_event_call __used				\
-	  __attribute__((__aligned__(4)))				\
-	  __attribute__((section("_ftrace_events")))			\
 	  event_enter_##sname = {					\
 		.name                   = "sys_enter"#sname,		\
 		.class			= &event_class_syscall_enter,	\
 		.event.funcs            = &enter_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
 	};								\
+	static struct ftrace_event_call __used				\
+	  __attribute__((section("_ftrace_events")))			\
+	 *__event_enter_##sname = &event_enter_##sname;			\
 	__TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static struct syscall_metadata					\
 	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
 	static struct ftrace_event_call __used				\
-	  __attribute__((__aligned__(4)))				\
-	  __attribute__((section("_ftrace_events")))			\
 	  event_exit_##sname = {					\
 		.name                   = "sys_exit"#sname,		\
 		.class			= &event_class_syscall_exit,	\
 		.event.funcs		= &exit_syscall_print_funcs,	\
 		.data			= (void *)&__syscall_meta_##sname,\
 	};								\
+	static struct ftrace_event_call __used				\
+	  __attribute__((section("_ftrace_events")))			\
+	*__event_exit_##sname = &event_exit_##sname;			\
 	__TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_METADATA(sname, nb)				\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index e16610c..3e68366 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -446,14 +446,16 @@ static inline notrace int ftrace_get_offsets_##call(			\
  *	.reg			= ftrace_event_reg,
  * };
  *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
+ * static struct ftrace_event_call event_<call> = {
  *	.name			= "<call>",
  *	.class			= event_class_<template>,
  *	.event			= &ftrace_event_type_<call>,
  *	.print_fmt		= print_fmt_<call>,
  * };
+ * // its only safe to use pointers when doing linker tricks to
+ * // create an array.
+ * static struct ftrace_event_call __used
+ * __attribute__((section("_ftrace_events"))) *__event_<call> = &event_<call>;
  *
  */
 
@@ -579,28 +581,28 @@ static struct ftrace_event_class __used event_class_##call = {		\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)			\
 									\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
+static struct ftrace_event_call __used event_##call = {			\
 	.name			= #call,				\
 	.class			= &event_class_##template,		\
 	.event.funcs		= &ftrace_event_type_funcs_##template,	\
 	.print_fmt		= print_fmt_##template,			\
-};
+};									\
+static struct ftrace_event_call __used					\
+__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
 									\
 static const char print_fmt_##call[] = print;				\
 									\
-static struct ftrace_event_call __used					\
-__attribute__((__aligned__(4)))						\
-__attribute__((section("_ftrace_events"))) event_##call = {		\
+static struct ftrace_event_call __used event_##call = {			\
 	.name			= #call,				\
 	.class			= &event_class_##template,		\
 	.event.funcs		= &ftrace_event_type_funcs_##call,	\
 	.print_fmt		= print_fmt_##call,			\
-}
+};									\
+static struct ftrace_event_call __used					\
+__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-- 
cgit v1.1


From 654986462939cd7ec18f276c6379a334dac106a7 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Wed, 26 Jan 2011 17:26:22 -0500
Subject: tracepoints: Fix section alignment using pointer array

Make the tracepoints more robust, making them solid enough to handle compiler
changes by not relying on anything based on compiler-specific behavior with
respect to structure alignment. Implement an approach proposed by David Miller:
use an array of const pointers to refer to the individual structures, and export
this pointer array through the linker script rather than the structures per se.
It will consume 32 extra bytes per tracepoint (24 for structure padding and 8
for the pointers), but are less likely to break due to compiler changes.

History:

commit 7e066fb8 tracepoints: add DECLARE_TRACE() and DEFINE_TRACE()
added the aligned(32) type and variable attribute to the tracepoint structures
to deal with gcc happily aligning statically defined structures on 32-byte
multiples.

One attempt was to use a 8-byte alignment for tracepoint structures by applying
both the variable and type attribute to tracepoint structures definitions and
declarations. It worked fine with gcc 4.5.1, but broke with gcc 4.4.4 and 4.4.5.

The reason is that the "aligned" attribute only specify the _minimum_ alignment
for a structure, leaving both the compiler and the linker free to align on
larger multiples. Because tracepoint.c expects the structures to be placed as an
array within each section, up-alignment cause NULL-pointer exceptions due to the
extra unexpected padding.

(this patch applies on top of -tip)

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: David S. Miller <davem@davemloft.net>
LKML-Reference: <20110126222622.GA10794@Krystal>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Ingo Molnar <mingo@elte.hu>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/asm-generic/vmlinux.lds.h |  8 +++++---
 include/linux/module.h            |  2 +-
 include/linux/tracepoint.h        | 35 ++++++++++++++++++++---------------
 3 files changed, 26 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f53708b..57b1b68 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -166,10 +166,8 @@
 	CPU_KEEP(exit.data)						\
 	MEM_KEEP(init.data)						\
 	MEM_KEEP(exit.data)						\
-	. = ALIGN(32);							\
-	VMLINUX_SYMBOL(__start___tracepoints) = .;			\
+	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
-	VMLINUX_SYMBOL(__stop___tracepoints) = .;			\
 	/* implement dynamic printk debug */				\
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___verbose) = .;                          \
@@ -218,6 +216,10 @@
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
+		. = ALIGN(8);						\
+		VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .;		\
+		*(__tracepoints_ptrs)	/* Tracepoints: pointer array */\
+		VMLINUX_SYMBOL(__stop___tracepoints_ptrs) = .;		\
 		*(__markers_strings)	/* Markers: strings */		\
 		*(__tracepoints_strings)/* Tracepoints: strings */	\
 	}								\
diff --git a/include/linux/module.h b/include/linux/module.h
index 7695a30..9bdf27c 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -377,7 +377,7 @@ struct module
 	   keeping pointers to this stuff */
 	char *args;
 #ifdef CONFIG_TRACEPOINTS
-	struct tracepoint *tracepoints;
+	struct tracepoint * const *tracepoints_ptrs;
 	unsigned int num_tracepoints;
 #endif
 #ifdef HAVE_JUMP_LABEL
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c681461..97c84a5 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -33,12 +33,7 @@ struct tracepoint {
 	void (*regfunc)(void);
 	void (*unregfunc)(void);
 	struct tracepoint_func __rcu *funcs;
-} __attribute__((aligned(32)));		/*
-					 * Aligned on 32 bytes because it is
-					 * globally visible and gcc happily
-					 * align these on the structure size.
-					 * Keep in sync with vmlinux.lds.h.
-					 */
+};
 
 /*
  * Connect a probe to a tracepoint.
@@ -61,15 +56,15 @@ extern void tracepoint_probe_update_all(void);
 
 struct tracepoint_iter {
 	struct module *module;
-	struct tracepoint *tracepoint;
+	struct tracepoint * const *tracepoint;
 };
 
 extern void tracepoint_iter_start(struct tracepoint_iter *iter);
 extern void tracepoint_iter_next(struct tracepoint_iter *iter);
 extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
 extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
-extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
-	struct tracepoint *begin, struct tracepoint *end);
+extern int tracepoint_get_iter_range(struct tracepoint * const **tracepoint,
+	struct tracepoint * const *begin, struct tracepoint * const *end);
 
 /*
  * tracepoint_synchronize_unregister must be called between the last tracepoint
@@ -84,11 +79,13 @@ static inline void tracepoint_synchronize_unregister(void)
 #define PARAMS(args...) args
 
 #ifdef CONFIG_TRACEPOINTS
-extern void tracepoint_update_probe_range(struct tracepoint *begin,
-	struct tracepoint *end);
+extern
+void tracepoint_update_probe_range(struct tracepoint * const *begin,
+	struct tracepoint * const *end);
 #else
-static inline void tracepoint_update_probe_range(struct tracepoint *begin,
-	struct tracepoint *end)
+static inline
+void tracepoint_update_probe_range(struct tracepoint * const *begin,
+	struct tracepoint * const *end)
 { }
 #endif /* CONFIG_TRACEPOINTS */
 
@@ -174,12 +171,20 @@ do_trace:								\
 	{								\
 	}
 
+/*
+ * We have no guarantee that gcc and the linker won't up-align the tracepoint
+ * structures, so we create an array of pointers that will be used for iteration
+ * on the tracepoints.
+ */
 #define DEFINE_TRACE_FN(name, reg, unreg)				\
 	static const char __tpstrtab_##name[]				\
 	__attribute__((section("__tracepoints_strings"))) = #name;	\
 	struct tracepoint __tracepoint_##name				\
-	__attribute__((section("__tracepoints"), aligned(32))) =	\
-		{ __tpstrtab_##name, 0, reg, unreg, NULL }
+	__attribute__((section("__tracepoints"))) =			\
+		{ __tpstrtab_##name, 0, reg, unreg, NULL };		\
+	static struct tracepoint * const __tracepoint_ptr_##name __used	\
+	__attribute__((section("__tracepoints_ptrs"))) =		\
+		&__tracepoint_##name;
 
 #define DEFINE_TRACE(name)						\
 	DEFINE_TRACE_FN(name, NULL, NULL);
-- 
cgit v1.1


From 3d56e331b6537671c66f1b510bed0f1e0331dfc8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 2 Feb 2011 17:06:09 -0500
Subject: tracing: Replace syscall_meta_data struct array with pointer array

Currently the syscall_meta structures for the syscall tracepoints are
placed in the __syscall_metadata section, and at link time, the linker
makes one large array of all these syscall metadata structures. On boot
up, this array is read (much like the initcall sections) and the syscall
data is processed.

The problem is that there is no guarantee that gcc will place complex
structures nicely together in an array format. Two structures in the
same file may be placed awkwardly, because gcc has no clue that they
are suppose to be in an array.

A hack was used previous to force the alignment to 4, to pack the
structures together. But this caused alignment issues with other
architectures (sparc).

Instead of packing the structures into an array, the structures' addresses
are now put into the __syscall_metadata section. As pointers are always the
natural alignment, gcc should always pack them tightly together
(otherwise initcall, extable, etc would also fail).

By having the pointers to the structures in the section, we can still
iterate the trace_events without causing unnecessary alignment problems
with other architectures, or depending on the current behaviour of
gcc that will likely change in the future just to tick us kernel developers
off a little more.

The __syscall_metadata section is also moved into the .init.data section
as it is now only needed at boot up.

Suggested-by: David Miller <davem@davemloft.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/asm-generic/vmlinux.lds.h |  9 ++++-----
 include/linux/syscalls.h          | 18 +++++++++---------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 57b1b68..fe77e33 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -141,7 +141,8 @@
 #endif
 
 #ifdef CONFIG_FTRACE_SYSCALLS
-#define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .;	\
+#define TRACE_SYSCALLS() . = ALIGN(8);					\
+			 VMLINUX_SYMBOL(__start_syscalls_metadata) = .;	\
 			 *(__syscalls_metadata)				\
 			 VMLINUX_SYMBOL(__stop_syscalls_metadata) = .;
 #else
@@ -175,10 +176,7 @@
 	VMLINUX_SYMBOL(__stop___verbose) = .;				\
 	LIKELY_PROFILE()		       				\
 	BRANCH_PROFILE()						\
-	TRACE_PRINTKS()							\
-									\
-	STRUCT_ALIGN();							\
-	TRACE_SYSCALLS()
+	TRACE_PRINTKS()
 
 /*
  * Data section helpers
@@ -483,6 +481,7 @@
 	*(.init.rodata)							\
 	MCOUNT_REC()							\
 	FTRACE_EVENTS()							\
+	TRACE_SYSCALLS()						\
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)					\
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 45508fe..98664db 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -125,8 +125,7 @@ extern struct trace_event_functions enter_syscall_print_funcs;
 extern struct trace_event_functions exit_syscall_print_funcs;
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
-	static struct syscall_metadata					\
-	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
+	static struct syscall_metadata __syscall_meta_##sname;		\
 	static struct ftrace_event_call __used				\
 	  event_enter_##sname = {					\
 		.name                   = "sys_enter"#sname,		\
@@ -140,8 +139,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	__TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
-	static struct syscall_metadata					\
-	__attribute__((__aligned__(4))) __syscall_meta_##sname;		\
+	static struct syscall_metadata __syscall_meta_##sname;		\
 	static struct ftrace_event_call __used				\
 	  event_exit_##sname = {					\
 		.name                   = "sys_exit"#sname,		\
@@ -158,8 +156,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	SYSCALL_TRACE_ENTER_EVENT(sname);			\
 	SYSCALL_TRACE_EXIT_EVENT(sname);			\
 	static struct syscall_metadata __used			\
-	  __attribute__((__aligned__(4)))			\
-	  __attribute__((section("__syscalls_metadata")))	\
 	  __syscall_meta_##sname = {				\
 		.name 		= "sys"#sname,			\
 		.nb_args 	= nb,				\
@@ -168,14 +164,15 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.enter_event	= &event_enter_##sname,		\
 		.exit_event	= &event_exit_##sname,		\
 		.enter_fields	= LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
-	};
+	};							\
+	static struct syscall_metadata __used			\
+	  __attribute__((section("__syscalls_metadata")))	\
+	 *__p_syscall_meta_##sname = &__syscall_meta_##sname;
 
 #define SYSCALL_DEFINE0(sname)					\
 	SYSCALL_TRACE_ENTER_EVENT(_##sname);			\
 	SYSCALL_TRACE_EXIT_EVENT(_##sname);			\
 	static struct syscall_metadata __used			\
-	  __attribute__((__aligned__(4)))			\
-	  __attribute__((section("__syscalls_metadata")))	\
 	  __syscall_meta__##sname = {				\
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
@@ -183,6 +180,9 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 		.exit_event	= &event_exit__##sname,		\
 		.enter_fields	= LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
 	};							\
+	static struct syscall_metadata __used			\
+	  __attribute__((section("__syscalls_metadata")))	\
+	 *__p_syscall_meta_##sname = &__syscall_meta__##sname;	\
 	asmlinkage long sys_##sname(void)
 #else
 #define SYSCALL_DEFINE0(name)	   asmlinkage long sys_##name(void)
-- 
cgit v1.1


From e2d57766e6744f2956975dd2086d82957187b0f6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 3 Feb 2011 17:59:32 -0800
Subject: net: Provide compat support for SIOCGETMIFCNT_IN6 and
 SIOCGETSGCNT_IN6.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 6091ab7..9d2deb2 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -136,6 +136,7 @@ extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, unsigned int
 extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ip6_mr_input(struct sk_buff *skb);
 extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
+extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
 extern int ip6_mr_init(void);
 extern void ip6_mr_cleanup(void);
 #else
-- 
cgit v1.1


From 38db9e1db1c91c953b2a539130257ce91533c9f6 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 28 Jan 2011 05:43:40 +0000
Subject: include/net/genetlink.h: Allow genlmsg_cancel to accept a NULL
 argument

nlmsg_cancel can accept NULL as its second argument, so for similarity,
this patch extends genlmsg_cancel to be able to accept a NULL second
argument as well.

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 8a64b81..b4c7c1c 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -195,7 +195,8 @@ static inline int genlmsg_end(struct sk_buff *skb, void *hdr)
  */
 static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
 {
-	nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
+	if (hdr)
+		nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
 }
 
 /**
-- 
cgit v1.1


From 8cf28f1f4de58c70e6af657bb46ca8f304c073d4 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Fri, 4 Feb 2011 10:08:18 +0530
Subject: USB: Fix trout build failure with ci13xxx_msm gadget

This patch fixes the below compilation errors.

  CC      drivers/usb/gadget/ci13xxx_msm.o
  CC      net/mac80211/led.o
  drivers/usb/gadget/ci13xxx_msm.c: In function 'ci13xxx_msm_notify_event':
  drivers/usb/gadget/ci13xxx_msm.c:42: error: 'USB_AHBBURST' undeclared (first use in this function)
  drivers/usb/gadget/ci13xxx_msm.c:42: error: (Each undeclared identifier is reported only once
  drivers/usb/gadget/ci13xxx_msm.c:42: error: for each function it appears in.)
  drivers/usb/gadget/ci13xxx_msm.c:43: error: 'USB_AHBMODE' undeclared (first use in this function)
make[4]: *** [drivers/usb/gadget/ci13xxx_msm.o] Error 1
make[3]: *** [drivers/usb/gadget] Error 2

MSM USB driver is not supported on boards like trout (MSM7201) which
has an external PHY.

Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/msm_hsusb_hw.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index b92e173..7d1babb 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -16,12 +16,8 @@
 #ifndef __LINUX_USB_GADGET_MSM72K_UDC_H__
 #define __LINUX_USB_GADGET_MSM72K_UDC_H__
 
-#ifdef CONFIG_ARCH_MSM7X00A
-#define USB_SBUSCFG          (MSM_USB_BASE + 0x0090)
-#else
 #define USB_AHBBURST         (MSM_USB_BASE + 0x0090)
 #define USB_AHBMODE          (MSM_USB_BASE + 0x0098)
-#endif
 #define USB_CAPLENGTH        (MSM_USB_BASE + 0x0100) /* 8 bit */
 
 #define USB_USBCMD           (MSM_USB_BASE + 0x0140)
-- 
cgit v1.1


From 872434d69c644b8aa5088b835598dc3cd9832aff Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 5 Feb 2011 16:25:25 +0100
Subject: genirq: Add missing status flags to modification mask

The mask which filters out the valid bits which can be set via
irq_modify_status() is missing IRQ_NO_BALANCING, which breaks UV.

Add IRQ_PER_CPU as well to avoid another one line patch for 39.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index abde252..80fcb53 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -74,7 +74,8 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 
 #define IRQF_MODIFY_MASK	\
 	(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
-	 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL)
+	 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
+	 IRQ_PER_CPU)
 
 #ifdef CONFIG_IRQ_PER_CPU
 # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
-- 
cgit v1.1


From 5084f89303c0a138f66bf74662753f46878989bb Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 31 Jan 2011 13:06:37 +0530
Subject: virtio: console: Update Copyright

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_console.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index a85064d..e4d3335 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -7,7 +7,8 @@
  * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
  * anyone can use the definitions to implement compatible drivers/servers.
  *
- * Copyright (C) Red Hat, Inc., 2009, 2010
+ * Copyright (C) Red Hat, Inc., 2009, 2010, 2011
+ * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011
  */
 
 /* Feature bits */
-- 
cgit v1.1


From 3a9dda7602e566014a859faaf8490e6454683ab1 Mon Sep 17 00:00:00 2001
From: Alexey Orishko <alexey.orishko@gmail.com>
Date: Mon, 7 Feb 2011 09:45:09 +0000
Subject: CDC NCM errata updates for cdc.h

Changes are based on the following documents:
- CDC NCM errata:
http://www.usb.org/developers/devclass_docs/NCM10_012011.zip
- CDC and WMC errata link:
http://www.usb.org/developers/devclass_docs/CDC1.2_WMC1.1_012011.zip

Signed-off-by: Alexey Orishko <alexey.orishko@stericsson.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/cdc.h | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index 5e86dc7..81a9279 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -89,7 +89,7 @@ struct usb_cdc_acm_descriptor {
 
 #define USB_CDC_COMM_FEATURE	0x01
 #define USB_CDC_CAP_LINE	0x02
-#define USB_CDC_CAP_BRK	0x04
+#define USB_CDC_CAP_BRK		0x04
 #define USB_CDC_CAP_NOTIFY	0x08
 
 /* "Union Functional Descriptor" from CDC spec 5.2.3.8 */
@@ -271,6 +271,11 @@ struct usb_cdc_notification {
 	__le16	wLength;
 } __attribute__ ((packed));
 
+struct usb_cdc_speed_change {
+	__le32	DLBitRRate;	/* contains the downlink bit rate (IN pipe) */
+	__le32	ULBitRate;	/* contains the uplink bit rate (OUT pipe) */
+} __attribute__ ((packed));
+
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -292,7 +297,7 @@ struct usb_cdc_ncm_ntb_parameters {
 	__le16	wNdpOutDivisor;
 	__le16	wNdpOutPayloadRemainder;
 	__le16	wNdpOutAlignment;
-	__le16	wPadding2;
+	__le16	wNtbOutMaxDatagrams;
 } __attribute__ ((packed));
 
 /*
@@ -307,7 +312,7 @@ struct usb_cdc_ncm_nth16 {
 	__le16	wHeaderLength;
 	__le16	wSequence;
 	__le16	wBlockLength;
-	__le16	wFpIndex;
+	__le16	wNdpIndex;
 } __attribute__ ((packed));
 
 struct usb_cdc_ncm_nth32 {
@@ -315,7 +320,7 @@ struct usb_cdc_ncm_nth32 {
 	__le16	wHeaderLength;
 	__le16	wSequence;
 	__le32	dwBlockLength;
-	__le32	dwFpIndex;
+	__le32	dwNdpIndex;
 } __attribute__ ((packed));
 
 /*
@@ -337,7 +342,7 @@ struct usb_cdc_ncm_dpe16 {
 struct usb_cdc_ncm_ndp16 {
 	__le32	dwSignature;
 	__le16	wLength;
-	__le16	wNextFpIndex;
+	__le16	wNextNdpIndex;
 	struct	usb_cdc_ncm_dpe16 dpe16[0];
 } __attribute__ ((packed));
 
@@ -375,6 +380,7 @@ struct usb_cdc_ncm_ndp32 {
 #define USB_CDC_NCM_NCAP_ENCAP_COMMAND			(1 << 2)
 #define USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE		(1 << 3)
 #define USB_CDC_NCM_NCAP_CRC_MODE			(1 << 4)
+#define	USB_CDC_NCM_NCAP_NTB_INPUT_SIZE			(1 << 5)
 
 /* CDC NCM subclass Table 6-3: NTB Parameter Structure */
 #define USB_CDC_NCM_NTB16_SUPPORTED			(1 << 0)
@@ -392,6 +398,13 @@ struct usb_cdc_ncm_ndp32 {
 #define USB_CDC_NCM_NTB_MIN_IN_SIZE			2048
 #define USB_CDC_NCM_NTB_MIN_OUT_SIZE			2048
 
+/* NTB Input Size Structure */
+struct usb_cdc_ncm_ndp_input_size {
+	__le32	dwNtbInMaxSize;
+	__le16	wNtbInMaxDatagrams;
+	__le16	wReserved;
+} __attribute__ ((packed));
+
 /* CDC NCM subclass 6.2.11 SetCrcMode */
 #define USB_CDC_NCM_CRC_NOT_APPENDED			0x00
 #define USB_CDC_NCM_CRC_APPENDED			0x01
-- 
cgit v1.1


From 6037b715d6fab139742c3df8851db4c823081561 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Wed, 9 Feb 2011 22:11:51 -0800
Subject: security: add cred argument to security_capable()

Expand security_capable() to include cred, so that it can be usable in a
wider range of call sites.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index c642bb8..b2b7f97 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1662,7 +1662,7 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *effective,
 		    const kernel_cap_t *inheritable,
 		    const kernel_cap_t *permitted);
-int security_capable(int cap);
+int security_capable(const struct cred *cred, int cap);
 int security_real_capable(struct task_struct *tsk, int cap);
 int security_real_capable_noaudit(struct task_struct *tsk, int cap);
 int security_sysctl(struct ctl_table *table, int op);
@@ -1856,9 +1856,9 @@ static inline int security_capset(struct cred *new,
 	return cap_capset(new, old, effective, inheritable, permitted);
 }
 
-static inline int security_capable(int cap)
+static inline int security_capable(const struct cred *cred, int cap)
 {
-	return cap_capable(current, current_cred(), cap, SECURITY_CAP_AUDIT);
+	return cap_capable(current, cred, cap, SECURITY_CAP_AUDIT);
 }
 
 static inline int security_real_capable(struct task_struct *tsk, int cap)
-- 
cgit v1.1


From 795abaf1e4e188c4171e3cd3dbb11a9fcacaf505 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sun, 13 Feb 2011 16:37:07 -0800
Subject: klist: Fix object alignment on 64-bit.

Commit c0e69a5bbc6f ("klist.c: bit 0 in pointer can't be used as flag")
intended to make sure that all klist objects were at least pointer size
aligned, but used the constant "4" which only works on 32-bit.

Use "sizeof(void *)" which is correct in all cases.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: stable <stable@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/klist.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/klist.h b/include/linux/klist.h
index e91a4e5..a370ce5 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -22,7 +22,7 @@ struct klist {
 	struct list_head	k_list;
 	void			(*get)(struct klist_node *);
 	void			(*put)(struct klist_node *);
-} __attribute__ ((aligned (4)));
+} __attribute__ ((aligned (sizeof(void *))));
 
 #define KLIST_INIT(_name, _get, _put)					\
 	{ .k_lock	= __SPIN_LOCK_UNLOCKED(_name.k_lock),		\
-- 
cgit v1.1