From 00e4d116a7ef94eb910be037912b0b2fc09f608b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Fri, 22 Sep 2006 09:33:58 +0100
Subject: [DCCP]: Allow default/fallback service code.

This has been discussed on dccp@vger and removes the necessity for applications
to supply service codes in each and every case.

If an application does not want to provide a service code, that's fine, it will
be given 0. Otherwise, service codes can be set via socket options as before.

This patch has been tested using various client/server configurations
(including listening on multiple service codes).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/linux/dccp.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 2d7671c..29f9291 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -404,6 +404,7 @@ struct dccp_service_list {
 };
 
 #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
+#define DCCP_SERVICE_CODE_IS_ABSENT 		 0
 
 static inline int dccp_list_has_service(const struct dccp_service_list *sl,
 					const __be32 service)
@@ -484,11 +485,6 @@ static inline struct dccp_minisock *dccp_msk(const struct sock *sk)
 	return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock;
 }
 
-static inline int dccp_service_not_initialized(const struct sock *sk)
-{
-	return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
-}
-
 static inline const char *dccp_role(const struct sock *sk)
 {
 	switch (dccp_sk(sk)->dccps_role) {
-- 
cgit v1.1


From b83eff641ed39bd631535b9a8971e161b056f541 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Fri, 22 Sep 2006 14:25:36 +1200
Subject: [DCCP]: Introduce constants for CCID numbers

Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 include/linux/dccp.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 29f9291..d6f4ec4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -169,6 +169,12 @@ enum {
 	DCCPO_MAX_CCID_SPECIFIC = 255,
 };
 
+/* DCCP CCIDS */
+enum {
+	DCCPC_CCID2 = 2,
+	DCCPC_CCID3 = 3,
+};
+
 /* DCCP features */
 enum {
 	DCCPF_RESERVED = 0,
@@ -320,7 +326,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
 /* initial values for each feature */
 #define DCCPF_INITIAL_SEQUENCE_WINDOW		100
 #define DCCPF_INITIAL_ACK_RATIO			2
-#define DCCPF_INITIAL_CCID			2
+#define DCCPF_INITIAL_CCID			DCCPC_CCID2
 #define DCCPF_INITIAL_SEND_ACK_VECTOR		1
 /* FIXME: for now we're default to 1 but it should really be 0 */
 #define DCCPF_INITIAL_SEND_NDP_COUNT		1
-- 
cgit v1.1


From 0d6c7ae22d4fadbc83677ea1a6144eea8911e319 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sun, 24 Sep 2006 19:28:47 -0700
Subject: [NETFILTER]: Add dscp,DSCP headers to header-y

This patch adds xt_dscp.h and xt_DSCP.h to the kernel headers which are
exported via 'make headers_install'. These are necessary for userspace
to add rules using dscp match and DSCP target.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/Kbuild | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9a285ce..312bd2f 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -10,6 +10,8 @@ header-y += xt_connmark.h
 header-y += xt_CONNMARK.h
 header-y += xt_conntrack.h
 header-y += xt_dccp.h
+header-y += xt_dscp.h
+header-y += xt_DSCP.h
 header-y += xt_esp.h
 header-y += xt_helper.h
 header-y += xt_length.h
-- 
cgit v1.1


From a6d967a485c67ec8a1276261f39d81ace6a3e308 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Mon, 25 Sep 2006 15:33:09 -0400
Subject: [libata] No need for all those arch libata-portmap.h headers

They all contain the same thing.  Instead, have a single generic one in
include/asm-generic, and permit an arch to override as needed.

Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1ef3d39..d6a3d4b 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -36,7 +36,15 @@
 #include <linux/workqueue.h>
 #include <scsi/scsi_host.h>
 
+/*
+ * Define if arch has non-standard setup.  This is a _PCI_ standard
+ * not a legacy or ISA standard.
+ */
+#ifdef CONFIG_ATA_NONSTANDARD
 #include <asm/libata-portmap.h>
+#else
+#include <asm-generic/libata-portmap.h>
+#endif
 
 /*
  * compile-time options: to be removed as soon as all the drivers are
-- 
cgit v1.1


From baef186519c69b11cf7e48c26e75feb1e6173baa Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 8 Sep 2006 16:04:05 -0400
Subject: [PATCH] WE-21 support (core API)

This is version 21 of the Wireless Extensions. Changelog :
	o finishes migrating the ESSID API (remove the +1)
	o netdev->get_wireless_stats is no more
	o long/short retry

This is a redacted version of a patch originally submitted by Jean
Tourrilhes.  I removed most of the additions, in order to minimize
future support requirements for nl80211 (or other WE successor).

CC: Jean Tourrilhes <jt@hpl.hp.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/netdevice.h |  1 -
 include/linux/wireless.h  | 24 ++++++++++++++++++++----
 2 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4328912..ac4f502 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -334,7 +334,6 @@ struct net_device
 
 
 	struct net_device_stats* (*get_stats)(struct net_device *dev);
-	struct iw_statistics*	(*get_wireless_stats)(struct net_device *dev);
 
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
 	 * See <net/iw_handler.h> for details. Jean II */
diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 1358856..a50a013 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -1,7 +1,7 @@
 /*
  * This file define a set of standard wireless extensions
  *
- * Version :	20	17.2.06
+ * Version :	21	14.3.06
  *
  * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
  * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved.
@@ -69,9 +69,14 @@
 
 /***************************** INCLUDES *****************************/
 
+/* This header is used in user-space, therefore need to be sanitised
+ * for that purpose. Those includes are usually not compatible with glibc.
+ * To know which includes to use in user-space, check iwlib.h. */
+#ifdef __KERNEL__
 #include <linux/types.h>		/* for "caddr_t" et al		*/
 #include <linux/socket.h>		/* for "struct sockaddr" et al	*/
 #include <linux/if.h>			/* for IFNAMSIZ and co... */
+#endif	/* __KERNEL__ */
 
 /***************************** VERSION *****************************/
 /*
@@ -80,7 +85,7 @@
  * (there is some stuff that will be added in the future...)
  * I just plan to increment with each new version.
  */
-#define WIRELESS_EXT	20
+#define WIRELESS_EXT	21
 
 /*
  * Changes :
@@ -208,6 +213,14 @@
  * V19 to V20
  * ----------
  *	- RtNetlink requests support (SET/GET)
+ *
+ * V20 to V21
+ * ----------
+ *	- Remove (struct net_device *)->get_wireless_stats()
+ *	- Change length in ESSID and NICK to strlen() instead of strlen()+1
+ *	- Add IW_RETRY_SHORT/IW_RETRY_LONG retry modifiers
+ *	- Power/Retry relative values no longer * 100000
+ *	- Add explicit flag to tell stats are in 802.11k RCPI : IW_QUAL_RCPI
  */
 
 /**************************** CONSTANTS ****************************/
@@ -448,6 +461,7 @@
 #define IW_QUAL_QUAL_INVALID	0x10	/* Driver doesn't provide value */
 #define IW_QUAL_LEVEL_INVALID	0x20
 #define IW_QUAL_NOISE_INVALID	0x40
+#define IW_QUAL_RCPI		0x80	/* Level + Noise are 802.11k RCPI */
 #define IW_QUAL_ALL_INVALID	0x70
 
 /* Frequency flags */
@@ -500,10 +514,12 @@
 #define IW_RETRY_TYPE		0xF000	/* Type of parameter */
 #define IW_RETRY_LIMIT		0x1000	/* Maximum number of retries*/
 #define IW_RETRY_LIFETIME	0x2000	/* Maximum duration of retries in us */
-#define IW_RETRY_MODIFIER	0x000F	/* Modify a parameter */
+#define IW_RETRY_MODIFIER	0x00FF	/* Modify a parameter */
 #define IW_RETRY_MIN		0x0001	/* Value is a minimum  */
 #define IW_RETRY_MAX		0x0002	/* Value is a maximum */
 #define IW_RETRY_RELATIVE	0x0004	/* Value is not in seconds/ms/us */
+#define IW_RETRY_SHORT		0x0010	/* Value is for short packets  */
+#define IW_RETRY_LONG		0x0020	/* Value is for long packets */
 
 /* Scanning request flags */
 #define IW_SCAN_DEFAULT		0x0000	/* Default scan of the driver */
@@ -1017,7 +1033,7 @@ struct	iw_range
 	/* Note : this frequency list doesn't need to fit channel numbers,
 	 * because each entry contain its channel index */
 
-	__u32		enc_capa; /* IW_ENC_CAPA_* bit field */
+	__u32		enc_capa;	/* IW_ENC_CAPA_* bit field */
 };
 
 /*
-- 
cgit v1.1


From 0b680e753724d31a9c45f059d1aad29df54584a1 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Fri, 22 Sep 2006 21:54:10 -0700
Subject: [PATCH] bonding: Add priv_flag to avoid event mishandling

Add priv_flag to specifically identify bonding-involved devices.  Needed
because IFF_MASTER is an unreliable identifier (vlan interfaces above bonding
will inherit IFF_MASTER).  Misidentification of devices would cause
notifier events for other devices to be erroneously processed by bonding,
causing various havoc.

Bug discovered by Martin Papik <martin.papik@ipsec.info>; this patch is
modified from his original.

Signed-off-by: Martin Papik <martin.papik@ipsec.info>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/if.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if.h b/include/linux/if.h
index cd080d7..a023ec1 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -59,6 +59,7 @@
 #define IFF_SLAVE_INACTIVE	0x4	/* bonding slave not the curr. active */
 #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
 #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
+#define IFF_BONDING	0x20		/* bonding master or slave	*/
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
-- 
cgit v1.1


From f5b2b966f032f22d3a289045a5afd4afa09f09c6 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Fri, 22 Sep 2006 21:54:53 -0700
Subject: [PATCH] bonding: Validate probe replies in ARP monitor

	Add logic to check ARP request / reply packets used for ARP
monitor link integrity checking.

	The current method simply examines the slave device to see if it
has sent and received traffic; this can be fooled by extraneous traffic.
For example, if multiple hosts running bonding are behind a common
switch, the probe traffic from the multiple instances of bonding will
update the tx/rx times on each other's slave devices.

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/if.h        | 1 +
 include/linux/netdevice.h | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if.h b/include/linux/if.h
index a023ec1..8018c2e 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -60,6 +60,7 @@
 #define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
 #define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
 #define IFF_BONDING	0x20		/* bonding master or slave	*/
+#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4328912..afd80ef 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1016,7 +1016,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 }
 
 /* On bonding slaves other than the currently active slave, suppress
- * duplicates except for 802.3ad ETH_P_SLOW and alb non-mcast/bcast.
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
  */
 static inline int skb_bond_should_drop(struct sk_buff *skb)
 {
@@ -1025,6 +1026,10 @@ static inline int skb_bond_should_drop(struct sk_buff *skb)
 
 	if (master &&
 	    (dev->priv_flags & IFF_SLAVE_INACTIVE)) {
+		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+		    skb->protocol == __constant_htons(ETH_P_ARP))
+			return 0;
+
 		if (master->priv_flags & IFF_MASTER_ALB) {
 			if (skb->pkt_type != PACKET_BROADCAST &&
 			    skb->pkt_type != PACKET_MULTICAST)
-- 
cgit v1.1


From ddd5d35a8f7c1924049e8b6877b3177c1787e6a3 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@insightbb.com>
Date: Thu, 10 Aug 2006 01:18:18 -0400
Subject: class_device_create(): make fmt argument 'const char *'

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 1e5f30d..1fec285 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -277,7 +277,7 @@ extern struct class_device *class_device_create(struct class *cls,
 						struct class_device *parent,
 						dev_t devt,
 						struct device *device,
-						char *fmt, ...)
+						const char *fmt, ...)
 					__attribute__((format(printf,5,6)));
 extern void class_device_destroy(struct class *cls, dev_t devt);
 
-- 
cgit v1.1


From 5cbe5f8a5897470698222ac9924429056e57d84c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 10 Aug 2006 01:19:19 -0400
Subject: device_create(): make fmt argument 'const char *'

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 1fec285..8d92013 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -384,7 +384,7 @@ extern void device_reprobe(struct device *dev);
  * Easy functions for dynamically creating devices on the fly
  */
 extern struct device *device_create(struct class *cls, struct device *parent,
-				    dev_t devt, char *fmt, ...)
+				    dev_t devt, const char *fmt, ...)
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
 
-- 
cgit v1.1


From ab7d7371acc68fa9130b079a9ba879191202035f Mon Sep 17 00:00:00 2001
From: Miguel Ojeda Sandonis <maxextreme@gmail.com>
Date: Wed, 13 Sep 2006 15:34:05 +0200
Subject: Driver core: add const to class_create

Adds const to class_create second parameter, because:

struct class {
	const char * name;

	/*...*/
}

Signed-off-by: Miguel Ojeda Sandonis <maxextreme@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 8d92013..8a648cd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -271,7 +271,7 @@ struct class_interface {
 extern int class_interface_register(struct class_interface *);
 extern void class_interface_unregister(struct class_interface *);
 
-extern struct class *class_create(struct module *owner, char *name);
+extern struct class *class_create(struct module *owner, const char *name);
 extern void class_destroy(struct class *cls);
 extern struct class_device *class_device_create(struct class *cls,
 						struct class_device *parent,
-- 
cgit v1.1


From 7c8265f51073bc8632a99de78d5fd19117ed78b7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Sat, 24 Jun 2006 14:50:29 -0700
Subject: Suspend infrastructure cleanup and extension

Allow devices to participate in the suspend process more intimately,
in particular, allow the final phase (with interrupts disabled) to
also be open to normal devices, not just system devices.

Also, allow classes to participate in device suspend.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 11 +++++++++--
 include/linux/pm.h     |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 8a648cd..b40be6f 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -51,8 +51,12 @@ struct bus_type {
 	int		(*probe)(struct device * dev);
 	int		(*remove)(struct device * dev);
 	void		(*shutdown)(struct device * dev);
-	int		(*suspend)(struct device * dev, pm_message_t state);
-	int		(*resume)(struct device * dev);
+
+	int (*suspend_prepare)(struct device * dev, pm_message_t state);
+	int (*suspend)(struct device * dev, pm_message_t state);
+	int (*suspend_late)(struct device * dev, pm_message_t state);
+	int (*resume_early)(struct device * dev);
+	int (*resume)(struct device * dev);
 };
 
 extern int bus_register(struct bus_type * bus);
@@ -154,6 +158,9 @@ struct class {
 
 	void	(*release)(struct class_device *dev);
 	void	(*class_release)(struct class *class);
+
+	int	(*suspend)(struct device *, pm_message_t state);
+	int	(*resume)(struct device *);
 };
 
 extern int class_register(struct class *);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 658c1b9..096fb6f 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -190,6 +190,7 @@ extern void device_resume(void);
 extern suspend_disk_method_t pm_disk_mode;
 
 extern int device_suspend(pm_message_t state);
+extern int device_prepare_suspend(pm_message_t state);
 
 #define device_set_wakeup_enable(dev,val) \
 	((dev)->power.should_wakeup = !!(val))
-- 
cgit v1.1


From cbd69dbbf1adfce6e048f15afc8629901ca9dae5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Sat, 24 Jun 2006 14:50:29 -0700
Subject: Suspend changes for PCI core

Changes the PCI core to use the new suspend infrastructure changes.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8565b81..4b2e6294 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -345,7 +345,10 @@ struct pci_driver {
 	const struct pci_device_id *id_table;	/* must be non-NULL for probe to be called */
 	int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 	void (*remove) (struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
+	int  (*suspend_prepare) (struct pci_dev *dev, pm_message_t state);
 	int  (*suspend) (struct pci_dev *dev, pm_message_t state);	/* Device suspended */
+	int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
+	int  (*resume_early) (struct pci_dev *dev);
 	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
 	int  (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable);   /* Enable wake event */
 	void (*shutdown) (struct pci_dev *dev);
-- 
cgit v1.1


From 82bb67f2c1f9ef438c56ac24e7dca027fe7289b5 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Mon, 14 Aug 2006 23:11:04 -0700
Subject: PM: define PM_EVENT_PRETHAW

This adds a new pm_message_t event type to use when preparing to restore a
swsusp snapshot.  Devices that have been initialized by Linux after resume
(rather than left in power-up-reset state) may need to be reset; this new
event type give drivers the chance to do that.

The drivers that will care about this are those which understand more hardware
states than just "on" and "reset", relying on hardware state during resume()
methods to be either the state left by the preceding suspend(), or a
power-lost reset.  The best current example of this class of drivers are USB
host controller drivers, which currently do not work through swsusp when
they're statically linked.

When the swsusp freeze/thaw mechanism kicks in, a troublesome third state
could exist: one state set up by a different kernel instance, before a
snapshot image is resumed.  This mechanism lets drivers prevent that state.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pm.h | 62 +++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 096fb6f..6b27e07 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -142,29 +142,61 @@ typedef struct pm_message {
 } pm_message_t;
 
 /*
- * There are 4 important states driver can be in:
- * ON     -- driver is working
- * FREEZE -- stop operations and apply whatever policy is applicable to a
- *           suspended driver of that class, freeze queues for block like IDE
- *           does, drop packets for ethernet, etc... stop DMA engine too etc...
- *           so a consistent image can be saved; but do not power any hardware
- *           down.
- * SUSPEND - like FREEZE, but hardware is doing as much powersaving as
- *           possible. Roughly pci D3.
+ * Several driver power state transitions are externally visible, affecting
+ * the state of pending I/O queues and (for drivers that touch hardware)
+ * interrupts, wakeups, DMA, and other hardware state.  There may also be
+ * internal transitions to various low power modes, which are transparent
+ * to the rest of the driver stack (such as a driver that's ON gating off
+ * clocks which are not in active use).
  *
- * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3
- * (SUSPEND).  We'll need to fix the drivers. So yes, putting 3 to all different
- * defines is intentional, and will go away as soon as drivers are fixed.  Also
- * note that typedef is neccessary, we'll probably want to switch to
- *   typedef struct pm_message_t { int event; int flags; } pm_message_t
- * or something similar soon.
+ * One transition is triggered by resume(), after a suspend() call; the
+ * message is implicit:
+ *
+ * ON		Driver starts working again, responding to hardware events
+ * 		and software requests.  The hardware may have gone through
+ * 		a power-off reset, or it may have maintained state from the
+ * 		previous suspend() which the driver will rely on while
+ * 		resuming.  On most platforms, there are no restrictions on
+ * 		availability of resources like clocks during resume().
+ *
+ * Other transitions are triggered by messages sent using suspend().  All
+ * these transitions quiesce the driver, so that I/O queues are inactive.
+ * That commonly entails turning off IRQs and DMA; there may be rules
+ * about how to quiesce that are specific to the bus or the device's type.
+ * (For example, network drivers mark the link state.)  Other details may
+ * differ according to the message:
+ *
+ * SUSPEND	Quiesce, enter a low power device state appropriate for
+ * 		the upcoming system state (such as PCI_D3hot), and enable
+ * 		wakeup events as appropriate.
+ *
+ * FREEZE	Quiesce operations so that a consistent image can be saved;
+ * 		but do NOT otherwise enter a low power device state, and do
+ * 		NOT emit system wakeup events.
+ *
+ * PRETHAW	Quiesce as if for FREEZE; additionally, prepare for restoring
+ * 		the system from a snapshot taken after an earlier FREEZE.
+ * 		Some drivers will need to reset their hardware state instead
+ * 		of preserving it, to ensure that it's never mistaken for the
+ * 		state which that earlier snapshot had set up.
+ *
+ * A minimally power-aware driver treats all messages as SUSPEND, fully
+ * reinitializes its device during resume() -- whether or not it was reset
+ * during the suspend/resume cycle -- and can't issue wakeup events.
+ *
+ * More power-aware drivers may also use low power states at runtime as
+ * well as during system sleep states like PM_SUSPEND_STANDBY.  They may
+ * be able to use wakeup events to exit from runtime low-power states,
+ * or from system low-power states such as standby or suspend-to-RAM.
  */
 
 #define PM_EVENT_ON 0
 #define PM_EVENT_FREEZE 1
 #define PM_EVENT_SUSPEND 2
+#define PM_EVENT_PRETHAW 3
 
 #define PMSG_FREEZE	((struct pm_message){ .event = PM_EVENT_FREEZE, })
+#define PMSG_PRETHAW	((struct pm_message){ .event = PM_EVENT_PRETHAW, })
 #define PMSG_SUSPEND	((struct pm_message){ .event = PM_EVENT_SUSPEND, })
 #define PMSG_ON		((struct pm_message){ .event = PM_EVENT_ON, })
 
-- 
cgit v1.1


From 1d3a82af45428c5e8deaa119cdeb79611ae46371 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Wed, 30 Aug 2006 14:09:47 -0700
Subject: PM: no suspend_prepare() phase

Remove the new suspend_prepare() phase.  It doesn't seem very usable,
has never been tested, doesn't address fault cleanup, and would need
a sibling resume_complete(); plus there are no real use cases.  It
could be restored later if those issues get resolved.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 -
 include/linux/pci.h    | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index b40be6f..b364646 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -52,7 +52,6 @@ struct bus_type {
 	int		(*remove)(struct device * dev);
 	void		(*shutdown)(struct device * dev);
 
-	int (*suspend_prepare)(struct device * dev, pm_message_t state);
 	int (*suspend)(struct device * dev, pm_message_t state);
 	int (*suspend_late)(struct device * dev, pm_message_t state);
 	int (*resume_early)(struct device * dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4b2e6294..9514bbf 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -345,7 +345,6 @@ struct pci_driver {
 	const struct pci_device_id *id_table;	/* must be non-NULL for probe to be called */
 	int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 	void (*remove) (struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
-	int  (*suspend_prepare) (struct pci_dev *dev, pm_message_t state);
 	int  (*suspend) (struct pci_dev *dev, pm_message_t state);	/* Device suspended */
 	int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
 	int  (*resume_early) (struct pci_dev *dev);
-- 
cgit v1.1


From 386415d88b1ae50304f9c61aa3e0db082fa90428 Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Sun, 3 Sep 2006 13:16:45 -0700
Subject: PM: platform_bus and late_suspend/early_resume

Teach platform_bus about the new suspend_late/resume_early PM calls,
issued with IRQs off.  Do we really need sysdev and friends any more,
or can janitors start switching its users over to platform_device so
we can do a minor code-ectomy?

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/platform_device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 782090c..29cd6de 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -49,6 +49,8 @@ struct platform_driver {
 	int (*remove)(struct platform_device *);
 	void (*shutdown)(struct platform_device *);
 	int (*suspend)(struct platform_device *, pm_message_t state);
+	int (*suspend_late)(struct platform_device *, pm_message_t state);
+	int (*resume_early)(struct platform_device *);
 	int (*resume)(struct platform_device *);
 	struct device_driver driver;
 };
-- 
cgit v1.1


From de0ff00d723fd821d372496e2c084805644aa5e1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 27 Jun 2006 00:06:09 -0700
Subject: Driver core: add groups support to struct device

This is needed for the network class devices in order to be able to
convert over to use struct device.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index b364646..994d3eb 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -344,6 +344,7 @@ struct device {
 	struct list_head	node;
 	struct class		*class;		/* optional*/
 	dev_t			devt;		/* dev_t, creates the sysfs "dev" */
+	struct attribute_group	**groups;	/* optional groups */
 
 	void	(*release)(struct device * dev);
 };
-- 
cgit v1.1


From 2620efef7029bb040430f50f0fc148f2d5e002ad Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 28 Jun 2006 16:19:58 -0700
Subject: Driver core: add ability for classes to handle devices properly

This adds two new callbacks to the class structure:
	int	(*dev_uevent)(struct device *dev, char **envp, int num_envp,
			char *buffer, int buffer_size);
	void	(*dev_release)(struct device *dev);

And one pointer:
	struct device_attribute		* dev_attrs;

which all corrispond with the same thing as the "normal" class devices
do, yet this is for when a struct device is bound to a class.

Someday soon, struct class_device will go away, and then the other
fields in this structure can be removed too.  But this is necessary in
order to get the transition to work properly.

Tested out on a network core patch that converted it to use struct
device instead of struct class_device.


Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 994d3eb..3122bd25 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -151,12 +151,16 @@ struct class {
 
 	struct class_attribute		* class_attrs;
 	struct class_device_attribute	* class_dev_attrs;
+	struct device_attribute		* dev_attrs;
 
 	int	(*uevent)(struct class_device *dev, char **envp,
 			   int num_envp, char *buffer, int buffer_size);
+	int	(*dev_uevent)(struct device *dev, char **envp, int num_envp,
+				char *buffer, int buffer_size);
 
 	void	(*release)(struct class_device *dev);
 	void	(*class_release)(struct class *class);
+	void	(*dev_release)(struct device *dev);
 
 	int	(*suspend)(struct device *, pm_message_t state);
 	int	(*resume)(struct device *);
-- 
cgit v1.1


From a2de48cace5d0993da6cfa28b276ae724dc3569b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 3 Jul 2006 14:31:12 -0700
Subject: Driver core: add device_rename function

The network layer needs this to convert to using struct device instead
of a struct class_device.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 3122bd25..3400e09 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -380,6 +380,7 @@ extern int device_add(struct device * dev);
 extern void device_del(struct device * dev);
 extern int device_for_each_child(struct device *, void *,
 		     int (*fn)(struct device *, void *));
+extern int device_rename(struct device *dev, char *new_name);
 
 /*
  * Manual binding of a device to driver. See drivers/base/bus.c
-- 
cgit v1.1


From c205ef4880273d2de4ee5388d4e52227ff688cc4 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 7 Aug 2006 22:19:37 -0700
Subject: Driver core: create devices/virtual/ tree

This change creates a devices/virtual/CLASS_NAME tree for struct devices
that belong to a class, yet do not have a "real" struct device for a
parent.  It automatically creates the directories on the fly as needed.


Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 3400e09..bbb0d6b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -149,6 +149,8 @@ struct class {
 	struct list_head	interfaces;
 	struct semaphore	sem;	/* locks both the children and interfaces lists */
 
+	struct kobject		*virtual_dir;
+
 	struct class_attribute		* class_attrs;
 	struct class_device_attribute	* class_dev_attrs;
 	struct device_attribute		* dev_attrs;
@@ -291,7 +293,6 @@ extern struct class_device *class_device_create(struct class *cls,
 					__attribute__((format(printf,5,6)));
 extern void class_device_destroy(struct class *cls, dev_t devt);
 
-
 /* interface for exporting device attributes */
 struct device_attribute {
 	struct attribute	attr;
@@ -400,6 +401,8 @@ extern struct device *device_create(struct class *cls, struct device *parent,
 				    __attribute__((format(printf,4,5)));
 extern void device_destroy(struct class *cls, dev_t devt);
 
+extern int virtual_device_parent(struct device *dev);
+
 /*
  * Platform "fixup" functions - allow the platform to have their say
  * about devices and actions that the general device layer doesn't
-- 
cgit v1.1


From c47ed219ba81632595e9f02e27318151fec16c9e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 13 Sep 2006 15:34:05 +0200
Subject: Class: add support for class interfaces for devices

When moving class_device usage over to device, we need to handle
class_interfaces properly with devices.  This patch adds that support.


Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index bbb0d6b..e0fae0e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -278,6 +278,8 @@ struct class_interface {
 
 	int (*add)	(struct class_device *, struct class_interface *);
 	void (*remove)	(struct class_device *, struct class_interface *);
+	int (*add_dev)		(struct device *, struct class_interface *);
+	void (*remove_dev)	(struct device *, struct class_interface *);
 };
 
 extern int class_interface_register(struct class_interface *);
-- 
cgit v1.1


From 2589f1887b0bf9f08ec3d7f3c5705ccb7c628076 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 19 Sep 2006 09:39:19 -0700
Subject: Driver core: add ability for devices to create and remove bin files

Makes it easier for devices to create and remove binary attribute files
so they don't have to call directly into sysfs.  This is needed to help
with the conversion from struct class_device to struct device.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index e0fae0e..7d447d7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -309,6 +309,10 @@ struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
 extern int device_create_file(struct device *device, struct device_attribute * entry);
 extern void device_remove_file(struct device * dev, struct device_attribute * attr);
+extern int __must_check device_create_bin_file(struct device *dev,
+					       struct bin_attribute *attr);
+extern void device_remove_bin_file(struct device *dev,
+				   struct bin_attribute *attr);
 struct device {
 	struct klist		klist_children;
 	struct klist_node	knode_parent;		/* node in sibling list */
-- 
cgit v1.1


From 995982ca79d9262869513948ec7c540f32035491 Mon Sep 17 00:00:00 2001
From: "Randy.Dunlap" <rdunlap@xenotime.net>
Date: Mon, 10 Jul 2006 23:05:25 -0700
Subject: sysfs_remove_bin_file: no return value, dump_stack on error

Make sysfs_remove_bin_file() void.  If it detects an error,
printk the file name and call dump_stack().

sysfs_hash_and_remove() now returns an error code indicating
its success or failure so that sysfs_remove_bin_file() can
know success/failure.

Convert the only driver that checked the return value of
sysfs_remove_bin_file().

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 1ea5d3c..95f6db5 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -114,7 +114,7 @@ extern void
 sysfs_remove_link(struct kobject *, const char * name);
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr);
-int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr);
+void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
 
 int sysfs_create_group(struct kobject *, const struct attribute_group *);
 void sysfs_remove_group(struct kobject *, const struct attribute_group *);
-- 
cgit v1.1


From 4a7fb6363f2d1a6c09a10253937f672f3c7929e1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 14 Aug 2006 22:43:17 -0700
Subject: add __must_check to device management code

We're getting a lot of crashes in the sysfs/kobject/device/bus/class code and
they're very hard to diagnose.

I'm suspecting that in some cases this is because drivers aren't checking
return values and aren't handling errors correctly.  So the code blithely
blunders on and crashes later in very obscure ways.

There's just no reason to ignore errors which can and do occur.  So the patch
sprinkles __must_check all over these APIs.

Causes 1,513 new warnings.  Heh.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h  | 52 ++++++++++++++++++++++++++-----------------------
 include/linux/kobject.h | 16 ++++++++-------
 include/linux/pci.h     | 34 +++++++++++++++++---------------
 include/linux/sysfs.h   | 19 ++++++++++--------
 4 files changed, 66 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 7d447d7..ad4db72 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -15,6 +15,7 @@
 #include <linux/kobject.h>
 #include <linux/klist.h>
 #include <linux/list.h>
+#include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/pm.h>
@@ -58,7 +59,7 @@ struct bus_type {
 	int (*resume)(struct device * dev);
 };
 
-extern int bus_register(struct bus_type * bus);
+extern int __must_check bus_register(struct bus_type * bus);
 extern void bus_unregister(struct bus_type * bus);
 
 extern void bus_rescan_devices(struct bus_type * bus);
@@ -70,9 +71,9 @@ int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data,
 struct device * bus_find_device(struct bus_type *bus, struct device *start,
 				void *data, int (*match)(struct device *, void *));
 
-int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, 
-		     void * data, int (*fn)(struct device_driver *, void *));
-
+int __must_check bus_for_each_drv(struct bus_type *bus,
+		struct device_driver *start, void *data,
+		int (*fn)(struct device_driver *, void *));
 
 /* driverfs interface for exporting bus attributes */
 
@@ -85,7 +86,8 @@ struct bus_attribute {
 #define BUS_ATTR(_name,_mode,_show,_store)	\
 struct bus_attribute bus_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
-extern int bus_create_file(struct bus_type *, struct bus_attribute *);
+extern int __must_check bus_create_file(struct bus_type *,
+					struct bus_attribute *);
 extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
 
 struct device_driver {
@@ -107,14 +109,13 @@ struct device_driver {
 };
 
 
-extern int driver_register(struct device_driver * drv);
+extern int __must_check driver_register(struct device_driver * drv);
 extern void driver_unregister(struct device_driver * drv);
 
 extern struct device_driver * get_driver(struct device_driver * drv);
 extern void put_driver(struct device_driver * drv);
 extern struct device_driver *driver_find(const char *name, struct bus_type *bus);
 
-
 /* driverfs interface for exporting driver attributes */
 
 struct driver_attribute {
@@ -126,16 +127,17 @@ struct driver_attribute {
 #define DRIVER_ATTR(_name,_mode,_show,_store)	\
 struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
-extern int driver_create_file(struct device_driver *, struct driver_attribute *);
+extern int __must_check driver_create_file(struct device_driver *,
+					struct driver_attribute *);
 extern void driver_remove_file(struct device_driver *, struct driver_attribute *);
 
-extern int driver_for_each_device(struct device_driver * drv, struct device * start,
-				  void * data, int (*fn)(struct device *, void *));
+extern int __must_check driver_for_each_device(struct device_driver * drv,
+		struct device *start, void *data,
+		int (*fn)(struct device *, void *));
 struct device * driver_find_device(struct device_driver *drv,
 				   struct device *start, void *data,
 				   int (*match)(struct device *, void *));
 
-
 /*
  * device classes
  */
@@ -168,7 +170,7 @@ struct class {
 	int	(*resume)(struct device *);
 };
 
-extern int class_register(struct class *);
+extern int __must_check class_register(struct class *);
 extern void class_unregister(struct class *);
 
 
@@ -181,7 +183,8 @@ struct class_attribute {
 #define CLASS_ATTR(_name,_mode,_show,_store)			\
 struct class_attribute class_attr_##_name = __ATTR(_name,_mode,_show,_store) 
 
-extern int class_create_file(struct class *, const struct class_attribute *);
+extern int __must_check class_create_file(struct class *,
+					const struct class_attribute *);
 extern void class_remove_file(struct class *, const struct class_attribute *);
 
 struct class_device_attribute {
@@ -194,7 +197,7 @@ struct class_device_attribute {
 struct class_device_attribute class_device_attr_##_name = 	\
 	__ATTR(_name,_mode,_show,_store)
 
-extern int class_device_create_file(struct class_device *,
+extern int __must_check class_device_create_file(struct class_device *,
 				    const struct class_device_attribute *);
 
 /**
@@ -254,10 +257,10 @@ class_set_devdata (struct class_device *dev, void *data)
 }
 
 
-extern int class_device_register(struct class_device *);
+extern int __must_check class_device_register(struct class_device *);
 extern void class_device_unregister(struct class_device *);
 extern void class_device_initialize(struct class_device *);
-extern int class_device_add(struct class_device *);
+extern int __must_check class_device_add(struct class_device *);
 extern void class_device_del(struct class_device *);
 
 extern int class_device_rename(struct class_device *, char *);
@@ -267,7 +270,7 @@ extern void class_device_put(struct class_device *);
 
 extern void class_device_remove_file(struct class_device *, 
 				     const struct class_device_attribute *);
-extern int class_device_create_bin_file(struct class_device *,
+extern int __must_check class_device_create_bin_file(struct class_device *,
 					struct bin_attribute *);
 extern void class_device_remove_bin_file(struct class_device *,
 					 struct bin_attribute *);
@@ -282,7 +285,7 @@ struct class_interface {
 	void (*remove_dev)	(struct device *, struct class_interface *);
 };
 
-extern int class_interface_register(struct class_interface *);
+extern int __must_check class_interface_register(struct class_interface *);
 extern void class_interface_unregister(struct class_interface *);
 
 extern struct class *class_create(struct module *owner, const char *name);
@@ -307,7 +310,8 @@ struct device_attribute {
 #define DEVICE_ATTR(_name,_mode,_show,_store) \
 struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store)
 
-extern int device_create_file(struct device *device, struct device_attribute * entry);
+extern int __must_check device_create_file(struct device *device,
+					struct device_attribute * entry);
 extern void device_remove_file(struct device * dev, struct device_attribute * attr);
 extern int __must_check device_create_bin_file(struct device *dev,
 					       struct bin_attribute *attr);
@@ -380,12 +384,12 @@ static inline int device_is_registered(struct device *dev)
 /*
  * High level routines for use by the bus drivers
  */
-extern int device_register(struct device * dev);
+extern int __must_check device_register(struct device * dev);
 extern void device_unregister(struct device * dev);
 extern void device_initialize(struct device * dev);
-extern int device_add(struct device * dev);
+extern int __must_check device_add(struct device * dev);
 extern void device_del(struct device * dev);
-extern int device_for_each_child(struct device *, void *,
+extern int __must_check device_for_each_child(struct device *, void *,
 		     int (*fn)(struct device *, void *));
 extern int device_rename(struct device *dev, char *new_name);
 
@@ -395,7 +399,7 @@ extern int device_rename(struct device *dev, char *new_name);
  */
 extern void device_bind_driver(struct device * dev);
 extern void device_release_driver(struct device * dev);
-extern int  device_attach(struct device * dev);
+extern int  __must_check device_attach(struct device * dev);
 extern void driver_attach(struct device_driver * drv);
 extern void device_reprobe(struct device *dev);
 
@@ -433,7 +437,7 @@ extern void device_shutdown(void);
 
 
 /* drivers/base/firmware.c */
-extern int firmware_register(struct subsystem *);
+extern int __must_check firmware_register(struct subsystem *);
 extern void firmware_unregister(struct subsystem *);
 
 /* debugging and troubleshooting/diagnostic helpers. */
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 2d22932..bcd9cd1 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -20,6 +20,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/sysfs.h>
+#include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
 #include <linux/kref.h>
@@ -71,12 +72,12 @@ static inline const char * kobject_name(const struct kobject * kobj)
 extern void kobject_init(struct kobject *);
 extern void kobject_cleanup(struct kobject *);
 
-extern int kobject_add(struct kobject *);
+extern int __must_check kobject_add(struct kobject *);
 extern void kobject_del(struct kobject *);
 
-extern int kobject_rename(struct kobject *, const char *new_name);
+extern int __must_check kobject_rename(struct kobject *, const char *new_name);
 
-extern int kobject_register(struct kobject *);
+extern int __must_check kobject_register(struct kobject *);
 extern void kobject_unregister(struct kobject *);
 
 extern struct kobject * kobject_get(struct kobject *);
@@ -128,8 +129,8 @@ struct kset {
 
 
 extern void kset_init(struct kset * k);
-extern int kset_add(struct kset * k);
-extern int kset_register(struct kset * k);
+extern int __must_check kset_add(struct kset * k);
+extern int __must_check kset_register(struct kset * k);
 extern void kset_unregister(struct kset * k);
 
 static inline struct kset * to_kset(struct kobject * kobj)
@@ -239,7 +240,7 @@ extern struct subsystem hypervisor_subsys;
 	(obj)->subsys.kset.kobj.kset = &(_subsys).kset
 
 extern void subsystem_init(struct subsystem *);
-extern int subsystem_register(struct subsystem *);
+extern int __must_check subsystem_register(struct subsystem *);
 extern void subsystem_unregister(struct subsystem *);
 
 static inline struct subsystem * subsys_get(struct subsystem * s)
@@ -258,7 +259,8 @@ struct subsys_attribute {
 	ssize_t (*store)(struct subsystem *, const char *, size_t); 
 };
 
-extern int subsys_create_file(struct subsystem * , struct subsys_attribute *);
+extern int __must_check subsys_create_file(struct subsystem * ,
+					struct subsys_attribute *);
 
 #if defined(CONFIG_HOTPLUG)
 void kobject_uevent(struct kobject *kobj, enum kobject_action action);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9514bbf..3ec7255 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -49,6 +49,7 @@
 #include <linux/types.h>
 #include <linux/ioport.h>
 #include <linux/list.h>
+#include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/device.h>
 
@@ -403,7 +404,7 @@ extern struct list_head pci_root_buses;	/* list of all known PCI buses */
 extern struct list_head pci_devices;	/* list of all devices */
 
 void pcibios_fixup_bus(struct pci_bus *);
-int pcibios_enable_device(struct pci_dev *, int mask);
+int __must_check pcibios_enable_device(struct pci_dev *, int mask);
 char *pcibios_setup (char *str);
 
 /* Used only when drivers/pci/setup.c is used */
@@ -490,19 +491,19 @@ static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val
 	return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val);
 }
 
-int pci_enable_device(struct pci_dev *dev);
-int pci_enable_device_bars(struct pci_dev *dev, int mask);
+int __must_check pci_enable_device(struct pci_dev *dev);
+int __must_check pci_enable_device_bars(struct pci_dev *dev, int mask);
 void pci_disable_device(struct pci_dev *dev);
 void pci_set_master(struct pci_dev *dev);
 #define HAVE_PCI_SET_MWI
-int pci_set_mwi(struct pci_dev *dev);
+int __must_check pci_set_mwi(struct pci_dev *dev);
 void pci_clear_mwi(struct pci_dev *dev);
 void pci_intx(struct pci_dev *dev, int enable);
 int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
 int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask);
 void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
-int pci_assign_resource(struct pci_dev *dev, int i);
-int pci_assign_resource_fixed(struct pci_dev *dev, int i);
+int __must_check pci_assign_resource(struct pci_dev *dev, int i);
+int __must_check pci_assign_resource_fixed(struct pci_dev *dev, int i);
 void pci_restore_bars(struct pci_dev *dev);
 
 /* ROM control related routines */
@@ -528,23 +529,24 @@ void pdev_sort_resources(struct pci_dev *, struct resource_list *);
 void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
 		    int (*)(struct pci_dev *, u8, u8));
 #define HAVE_PCI_REQ_REGIONS	2
-int pci_request_regions(struct pci_dev *, const char *);
+int __must_check pci_request_regions(struct pci_dev *, const char *);
 void pci_release_regions(struct pci_dev *);
-int pci_request_region(struct pci_dev *, int, const char *);
+int __must_check pci_request_region(struct pci_dev *, int, const char *);
 void pci_release_region(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
-int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
-			   resource_size_t size, resource_size_t align,
-			   resource_size_t min, unsigned int type_mask,
-			   void (*alignf)(void *, struct resource *,
-					  resource_size_t, resource_size_t),
-			   void *alignf_data);
+int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
+			struct resource *res, resource_size_t size,
+			resource_size_t align, resource_size_t min,
+			unsigned int type_mask,
+			void (*alignf)(void *, struct resource *,
+				resource_size_t, resource_size_t),
+			void *alignf_data);
 void pci_enable_bridges(struct pci_bus *bus);
 
 /* Proper probing supporting hot-pluggable devices */
-int __pci_register_driver(struct pci_driver *, struct module *);
-static inline int pci_register_driver(struct pci_driver *driver)
+int __must_check __pci_register_driver(struct pci_driver *, struct module *);
+static inline int __must_check pci_register_driver(struct pci_driver *driver)
 {
 	return __pci_register_driver(driver, THIS_MODULE);
 }
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 95f6db5..02ad790 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -10,6 +10,7 @@
 #ifndef _SYSFS_H_
 #define _SYSFS_H_
 
+#include <linux/compiler.h>
 #include <asm/atomic.h>
 
 struct kobject;
@@ -86,37 +87,39 @@ struct sysfs_dirent {
 
 #ifdef CONFIG_SYSFS
 
-extern int
+extern int __must_check
 sysfs_create_dir(struct kobject *);
 
 extern void
 sysfs_remove_dir(struct kobject *);
 
-extern int
+extern int __must_check
 sysfs_rename_dir(struct kobject *, const char *new_name);
 
-extern int
+extern int __must_check
 sysfs_create_file(struct kobject *, const struct attribute *);
 
-extern int
+extern int __must_check
 sysfs_update_file(struct kobject *, const struct attribute *);
 
-extern int
+extern int __must_check
 sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode);
 
 extern void
 sysfs_remove_file(struct kobject *, const struct attribute *);
 
-extern int
+extern int __must_check
 sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name);
 
 extern void
 sysfs_remove_link(struct kobject *, const char * name);
 
-int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr);
+int __must_check sysfs_create_bin_file(struct kobject *kobj,
+					struct bin_attribute *attr);
 void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
 
-int sysfs_create_group(struct kobject *, const struct attribute_group *);
+int __must_check sysfs_create_group(struct kobject *,
+					const struct attribute_group *);
 void sysfs_remove_group(struct kobject *, const struct attribute_group *);
 void sysfs_notify(struct kobject * k, char *dir, char *attr);
 
-- 
cgit v1.1


From cebc04ba9aeb3a646cc746300421fc0e5aa4f253 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 14 Aug 2006 22:43:18 -0700
Subject: add CONFIG_ENABLE_MUST_CHECK

Those 1500 warnings can be a bit of a pain.  Add a config option to shut them
up.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/compiler.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 9b4f110..060b961 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -99,6 +99,11 @@ extern void __chk_io_ptr(void __iomem *);
 #define __must_check
 #endif
 
+#ifndef CONFIG_ENABLE_MUST_CHECK
+#undef __must_check
+#define __must_check
+#endif
+
 /*
  * Allow us to avoid 'defined but not used' warnings on functions and data,
  * as well as force them to be emitted to the assembly file.
-- 
cgit v1.1


From f86db396ff455ed586751d21816a1ebd431264e5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 14 Aug 2006 22:43:20 -0700
Subject: drivers/base: check errors

Add lots of return-value checking.

<pcornelia.huck@de.ibm.com>: fix bus_rescan_devices()]
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index ad4db72..b3da9a8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -62,7 +62,7 @@ struct bus_type {
 extern int __must_check bus_register(struct bus_type * bus);
 extern void bus_unregister(struct bus_type * bus);
 
-extern void bus_rescan_devices(struct bus_type * bus);
+extern int __must_check bus_rescan_devices(struct bus_type * bus);
 
 /* iterator helpers for buses */
 
@@ -397,11 +397,11 @@ extern int device_rename(struct device *dev, char *new_name);
  * Manual binding of a device to driver. See drivers/base/bus.c
  * for information on use.
  */
-extern void device_bind_driver(struct device * dev);
+extern int __must_check device_bind_driver(struct device *dev);
 extern void device_release_driver(struct device * dev);
 extern int  __must_check device_attach(struct device * dev);
-extern void driver_attach(struct device_driver * drv);
-extern void device_reprobe(struct device *dev);
+extern int __must_check driver_attach(struct device_driver *drv);
+extern int __must_check device_reprobe(struct device *dev);
 
 /*
  * Easy functions for dynamically creating devices on the fly
-- 
cgit v1.1


From f20a9ead0d005fbeeae3fc21a96f9bf197ac1c1c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 14 Aug 2006 22:43:23 -0700
Subject: sysfs: add proper sysfs_init() prototype

Don't be crufty.  Mark it __must_check too.

Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 02ad790..6d5c43d 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -123,6 +123,8 @@ int __must_check sysfs_create_group(struct kobject *,
 void sysfs_remove_group(struct kobject *, const struct attribute_group *);
 void sysfs_notify(struct kobject * k, char *dir, char *attr);
 
+extern int __must_check sysfs_init(void);
+
 #else /* CONFIG_SYSFS */
 
 static inline int sysfs_create_dir(struct kobject * k)
@@ -194,6 +196,11 @@ static inline void sysfs_notify(struct kobject * k, char *dir, char *attr)
 {
 }
 
+static inline int __must_check sysfs_init(void)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SYSFS */
 
 #endif /* _SYSFS_H_ */
-- 
cgit v1.1


From d779249ed4cb3b50690de6de8448829d65a1cd08 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 18 Jul 2006 10:59:59 -0700
Subject: Driver Core: add ability for drivers to do a threaded probe

This adds the infrastructure for drivers to do a threaded probe, and
waits at init time for all currently outstanding probes to complete.

A new kernel thread will be created when the probe() function for the
driver is called, if the multithread_probe bit is set in the driver
saying it can support this kind of operation.

I have tested this with USB and PCI, and it works, and shaves off a lot
of time in the boot process, but there are issues with finding root boot
disks, and some USB drivers assume that this can never happen, so it is
currently not enabled for any bus type.  Individual drivers can enable
this right now if they wish, and bus authors can selectivly turn it on
as well, once they determine that their subsystem will work properly
with it.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index b3da9a8..74246ef 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -106,6 +106,8 @@ struct device_driver {
 	void	(*shutdown)	(struct device * dev);
 	int	(*suspend)	(struct device * dev, pm_message_t state);
 	int	(*resume)	(struct device * dev);
+
+	unsigned int multithread_probe:1;
 };
 
 
@@ -115,6 +117,7 @@ extern void driver_unregister(struct device_driver * drv);
 extern struct device_driver * get_driver(struct device_driver * drv);
 extern void put_driver(struct device_driver * drv);
 extern struct device_driver *driver_find(const char *name, struct bus_type *bus);
+extern int driver_probe_done(void);
 
 /* driverfs interface for exporting driver attributes */
 
-- 
cgit v1.1


From f2eaae197f4590c4d96f31b09b0ee9067421a95c Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 18 Sep 2006 16:22:34 -0400
Subject: Driver core: Fix potential deadlock in driver core

There is a potential deadlock in the driver core.  It boils down to
the fact that bus_remove_device() calls klist_remove() instead of
klist_del(), thereby waiting until the reference count of the
klist_node in the bus's klist of devices drops to 0.  The refcount
can't reach 0 so long as a modprobe process is trying to bind a new
driver to the device being removed, by calling __driver_attach().  The
problem is that __driver_attach() tries to acquire the device's
parent's semaphore, but the caller of bus_remove_device() is quite
likely to own that semaphore already.

It isn't sufficient just to replace klist_remove() with klist_del().
Doing so runs the risk that the device would remain on the bus's klist
of devices for some time, and so could be bound to another driver even
after it was unregistered.  What's needed is a new way to distinguish
whether or not a device is registered, based on a criterion other than
whether its klist_node is linked into the bus's klist of devices.  That
way driver binding can fail when the device is unregistered, even if
it is still linked into the klist.

This patch (as782) implements the solution, by adding a new bitflag to
indiate when a struct device is registered, by testing the flag before
allowing a driver to bind a device, and by changing the definition of
the device_is_registered() inline.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 74246ef..662e6a1 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -329,6 +329,7 @@ struct device {
 
 	struct kobject kobj;
 	char	bus_id[BUS_ID_SIZE];	/* position on parent bus */
+	unsigned		is_registered:1;
 	struct device_attribute uevent_attr;
 	struct device_attribute *devt_attr;
 
@@ -381,7 +382,7 @@ dev_set_drvdata (struct device *dev, void *data)
 
 static inline int device_is_registered(struct device *dev)
 {
-	return klist_node_attached(&dev->knode_bus);
+	return dev->is_registered;
 }
 
 /*
-- 
cgit v1.1


From 407984f1af259b31957c7c05075a454a751bb801 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 26 Sep 2006 10:52:27 +0200
Subject: [PATCH] x86: Add abilty to enable/disable nmi watchdog with sysctl

Adds a new /proc/sys/kernel/nmi call that will enable/disable the nmi
watchdog.

Signed-off-by:  Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/sysctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 736ed91..ecb79ba 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -150,6 +150,7 @@ enum
 	KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */
 	KERN_COMPAT_LOG=73,	/* int: print compat layer  messages */
 	KERN_MAX_LOCK_DEPTH=74,
+	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
 };
 
 
-- 
cgit v1.1


From 8da5adda91df3d2fcc5300e68da491694c9af019 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 26 Sep 2006 10:52:27 +0200
Subject: [PATCH] x86: Allow users to force a panic on NMI

To quote Alan Cox:

The default Linux behaviour on an NMI of either memory or unknown is to
continue operation. For many environments such as scientific computing
it is preferable that the box is taken out and the error dealt with than
an uncorrected parity/ECC error get propogated.

A small number of systems do generate NMI's for bizarre random reasons
such as power management so the default is unchanged. In other respects
the new proc/sys entry works like the existing panic controls already in
that directory.

This is separate to the edac support - EDAC allows supported chipsets to
handle ECC errors well, this change allows unsupported cases to at least
panic rather than cause problems further down the line.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/kernel.h | 1 +
 include/linux/sysctl.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b2ae4f..1ff9609 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -186,6 +186,7 @@ extern void bust_spinlocks(int yes);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
 extern int panic_timeout;
 extern int panic_on_oops;
+extern int panic_on_unrecovered_nmi;
 extern int tainted;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index ecb79ba..4327784 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -151,6 +151,7 @@ enum
 	KERN_COMPAT_LOG=73,	/* int: print compat layer  messages */
 	KERN_MAX_LOCK_DEPTH=74,
 	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
+	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
 };
 
 
-- 
cgit v1.1


From c08c820508233b424deab3302bc404bbecc6493a Mon Sep 17 00:00:00 2001
From: Vojtech Pavlik <vojtech@suse.cz>
Date: Tue, 26 Sep 2006 10:52:28 +0200
Subject: [PATCH] Add the vgetcpu vsyscall

This patch adds a vgetcpu vsyscall, which depending on the CPU RDTSCP
capability uses either the RDTSCP or CPUID to obtain a CPU and node
numbers and pass them to the program.

AK: Lots of changes over Vojtech's original code:
Better prototype for vgetcpu()
It's better to pass the cpu / node numbers as separate arguments
to avoid mistakes when going from SMP to NUMA.
Also add a fast time stamp based cache using a user supplied
argument to speed things more up.
Use fast method from Chuck Ebbert to retrieve node/cpu from
GDT limit instead of CPUID
Made sure RDTSCP init is always executed after node is known.
Drop printk

Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/getcpu.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 include/linux/getcpu.h

(limited to 'include/linux')

diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h
new file mode 100644
index 0000000..031ed37
--- /dev/null
+++ b/include/linux/getcpu.h
@@ -0,0 +1,16 @@
+#ifndef _LINUX_GETCPU_H
+#define _LINUX_GETCPU_H 1
+
+/* Cache for getcpu() to speed it up. Results might be upto a jiffie
+   out of date, but will be faster.
+   User programs should not refer to the contents of this structure.
+   It is only a cache for vgetcpu(). It might change in future kernels.
+   The user program must store this information per thread (__thread)
+   If you want 100% accurate information pass NULL instead. */
+struct getcpu_cache {
+	unsigned long t0;
+	unsigned long t1;
+	unsigned long res[4];
+};
+
+#endif
-- 
cgit v1.1


From 3cfc348bf90ffaa777c188652aa297f04eb94de8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 26 Sep 2006 10:52:28 +0200
Subject: [PATCH] x86: Add portable getcpu call

For NUMA optimization and some other algorithms it is useful to have a fast
to get the current CPU and node numbers in user space.

x86-64 added a fast way to do this in a vsyscall. This adds a generic
syscall for other architectures to make it a generic portable facility.

I expect some of them will also implement it as a faster vsyscall.

The cache is an optimization for the x86-64 vsyscall optimization. Since
what the syscall returns is an approximation anyways and user space
often wants very fast results it can be cached for some time.  The norma
methods to get this information in user space are relatively slow

The vsyscall is in a better position to manage the cache because it has direct
access to a fast time stamp (jiffies). For the generic syscall optimization
it doesn't help much, but enforce a valid argument to keep programs
portable

I only added an i386 syscall entry for now. Other architectures can follow
as needed.

AK: Also added some cleanups from Andrew Morton

Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/syscalls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 008f04c..3f0f716 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -53,6 +53,7 @@ struct mq_attr;
 struct compat_stat;
 struct compat_timeval;
 struct robust_list_head;
+struct getcpu_cache;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -596,5 +597,6 @@ asmlinkage long sys_get_robust_list(int pid,
 				    size_t __user *len_ptr);
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
+asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache);
 
 #endif
-- 
cgit v1.1


From 5a1b3999d6cb7ab87f1f3b1700bc91839fd6fa29 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 26 Sep 2006 10:52:34 +0200
Subject: [PATCH] x86: Some preparationary cleanup for stack trace

- Remove unused all_contexts parameter
No caller used it
- Move skip argument into the structure (needed for
followon patches)

Cc: mingo@elte.hu

Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/stacktrace.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 9cc81e5..50e2b01 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -5,15 +5,16 @@
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
+	int skip;	/* input argument: How many entries to skip */
+	int all_contexts; /* input argument: if true do than one stack */
 };
 
 extern void save_stack_trace(struct stack_trace *trace,
-			     struct task_struct *task, int all_contexts,
-			     unsigned int skip);
+			     struct task_struct *task);
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
 #else
-# define save_stack_trace(trace, task, all, skip)	do { } while (0)
+# define save_stack_trace(trace, task)			do { } while (0)
 # define print_stack_trace(trace)			do { } while (0)
 #endif
 
-- 
cgit v1.1


From d28c4393a7bf558538e9def269c1caeab6ec056f Mon Sep 17 00:00:00 2001
From: "Prasanna S.P" <prasanna@in.ibm.com>
Date: Tue, 26 Sep 2006 10:52:34 +0200
Subject: [PATCH] x86: error_code is not safe for kprobes

This patch moves the entry.S:error_entry to .kprobes.text section,
since code marked unsafe for kprobes jumps directly to entry.S::error_entry,
that must be marked unsafe as well.
This patch also moves all the ".previous.text" asm directives to ".previous"
for kprobes section.

AK: Following a similar i386 patch from Chuck Ebbert
AK: Also merged Jeremy's fix in.

+From: Jeremy Fitzhardinge <jeremy@goop.org>

KPROBE_ENTRY does a .section .kprobes.text, and expects its users to
do a .previous at the end of the function.

Unfortunately, if any code within the function switches sections, for
example .fixup, then the .previous ends up putting all subsequent code
into .fixup.  Worse, any subsequent .fixup code gets intermingled with
the code its supposed to be fixing (which is also in .fixup).  It's
surprising this didn't cause more havok.

The fix is to use .pushsection/.popsection, so this stuff nests
properly.  A further cleanup would be to get rid of all
.section/.previous pairs, since they're inherently fragile.

+From: Chuck Ebbert <76306.1226@compuserve.com>

Because code marked unsafe for kprobes jumps directly to
entry.S::error_code, that must be marked unsafe as well.
The easiest way to do that is to move the page fault entry
point to just before error_code and let it inherit the same
section.

Also moved all the ".previous" asm directives for kprobes
sections to column 1 and removed ".text" from them.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/linkage.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 932021f..6c9873f 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -35,9 +35,13 @@
 #endif
 
 #define KPROBE_ENTRY(name) \
-  .section .kprobes.text, "ax"; \
+  .pushsection .kprobes.text, "ax"; \
   ENTRY(name)
 
+#define KPROBE_END(name) \
+  END(name);		 \
+  .popsection
+
 #ifndef END
 #define END(name) \
   .size name, .-name
-- 
cgit v1.1


From e07e23e1fd3000289fc7ccc6c71879070d3b19e0 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Tue, 26 Sep 2006 10:52:36 +0200
Subject: [PATCH] non lazy "sleazy" fpu implementation

Right now the kernel on x86-64 has a 100% lazy fpu behavior: after *every*
context switch a trap is taken for the first FPU use to restore the FPU
context lazily.  This is of course great for applications that have very
sporadic or no FPU use (since then you avoid doing the expensive
save/restore all the time).  However for very frequent FPU users...  you
take an extra trap every context switch.

The patch below adds a simple heuristic to this code: After 5 consecutive
context switches of FPU use, the lazy behavior is disabled and the context
gets restored every context switch.  If the app indeed uses the FPU, the
trap is avoided.  (the chance of the 6th time slice using FPU after the
previous 5 having done so are quite high obviously).

After 256 switches, this is reset and lazy behavior is returned (until
there are 5 consecutive ones again).  The reason for this is to give apps
that do longer bursts of FPU use still the lazy behavior back after some
time.

[akpm@osdl.org: place new task_struct field next to jit_keyring to save space]
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 include/linux/sched.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 34ed0d9..807556c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -865,6 +865,15 @@ struct task_struct {
 	struct key *thread_keyring;	/* keyring private to this thread */
 	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
 #endif
+	/*
+	 * fpu_counter contains the number of consecutive context switches
+	 * that the FPU is used. If this is over a threshold, the lazy fpu
+	 * saving becomes unlazy to save the trap. This is an unsigned char
+	 * so that after 256 times the counter wraps and the behavior turns
+	 * lazy again; this to deal with bursty apps that only use FPU for
+	 * a short time
+	 */
+	unsigned char fpu_counter;
 	int oomkilladj; /* OOM kill score adjustment (bit shift). */
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
-- 
cgit v1.1


From 1bb4996bcebca1cde49d964b4e012699ce180e61 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 26 Sep 2006 10:52:37 +0200
Subject: [PATCH] Move compiler check for modules to ia64 only

Apparently IA64 needs it, but i386/x86-64 don't anymore
since gcc 2.95 support was dropped.  Nobody else on linux-arch
requested keeping it generically

Cc: tony.luck@intel.com
Cc: kaos@sgi.com

Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/vermagic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 46919f9..4d0909e 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -24,5 +24,5 @@
 #define VERMAGIC_STRING 						\
 	UTS_RELEASE " "							\
 	MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT 			\
-	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC 		\
-	"gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__)
+	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC
+
-- 
cgit v1.1


From 575400d1b483fbe9e03c68758059bfaf4e4768d1 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 26 Sep 2006 10:52:38 +0200
Subject: [PATCH] i386: Fix the EDD code misparsing the command line

The EDD code would scan the command line as a fixed array, without
taking account of either whitespace, null-termination, the old
command-line protocol, late overrides early, or the fact that the
command line may not be reachable from INITSEG.

This should fix those problems, and enable us to use a longer command
line.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/edd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/edd.h b/include/linux/edd.h
index 162512b..b2b3e68 100644
--- a/include/linux/edd.h
+++ b/include/linux/edd.h
@@ -52,6 +52,7 @@
 #define EDD_CL_EQUALS   0x3d646465     /* "edd=" */
 #define EDD_CL_OFF      0x666f         /* "of" for off  */
 #define EDD_CL_SKIP     0x6b73         /* "sk" for skipmbr */
+#define EDD_CL_ON       0x6e6f	       /* "on" for on */
 
 #ifndef __ASSEMBLY__
 
-- 
cgit v1.1


From 0a4254058037eb172758961d0a5b94f4320a1425 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Tue, 26 Sep 2006 10:52:38 +0200
Subject: [PATCH] Add the canary field to the PDA area and the task struct

This patch adds the per thread cookie field to the task struct and the PDA.
Also it makes sure that the PDA value gets the new cookie value at context
switch, and that a new task gets a new cookie at task creation time.

Signed-off-by: Arjan van Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andi Kleen <ak@suse.de>
CC: Andi Kleen <ak@suse.de>
---
 include/linux/sched.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 807556c..9d4aa7f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -819,6 +819,11 @@ struct task_struct {
 	unsigned did_exec:1;
 	pid_t pid;
 	pid_t tgid;
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+	/* Canary value for the -fstack-protector gcc feature */
+	unsigned long stack_canary;
+#endif
 	/* 
 	 * pointers to (original) parent process, youngest child, younger sibling,
 	 * older sibling, respectively.  (p->father can be replaced with 
-- 
cgit v1.1


From 3b171672831b9633c2ed8fa94805255cd4d5af19 Mon Sep 17 00:00:00 2001
From: Dmitriy Zavin <dmitriyz@google.com>
Date: Tue, 26 Sep 2006 10:52:42 +0200
Subject: [PATCH] Add 64bit jiffies compares (for use with get_jiffies_64)

The current time_before/time_after macros will fail typechecks
when passed u64 values (as returned by get_jiffies_64()). On 64bit
systems, this will just result in a warning about mismatching types
without explicit casts, but since unsigned long and u64
(unsigned long long) are of same size, it will still work.
On 32bit systems, a long is 32bits, so the value from get_jiffies_64()
will be truncated by the cast and thus lose all the precision gained by
64bit jiffies.

Signed-off-by: Dmitriy Zavin <dmitriyz@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/linux/jiffies.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 329ebcf..c8d5f20 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -115,6 +115,21 @@ static inline u64 get_jiffies_64(void)
 	 ((long)(a) - (long)(b) >= 0))
 #define time_before_eq(a,b)	time_after_eq(b,a)
 
+/* Same as above, but does so with platform independent 64bit types.
+ * These must be used when utilizing jiffies_64 (i.e. return value of
+ * get_jiffies_64() */
+#define time_after64(a,b)	\
+	(typecheck(__u64, a) &&	\
+	 typecheck(__u64, b) && \
+	 ((__s64)(b) - (__s64)(a) < 0))
+#define time_before64(a,b)	time_after64(b,a)
+
+#define time_after_eq64(a,b)	\
+	(typecheck(__u64, a) && \
+	 typecheck(__u64, b) && \
+	 ((__s64)(a) - (__s64)(b) >= 0))
+#define time_before_eq64(a,b)	time_after_eq64(b,a)
+
 /*
  * Have the 32 bit jiffies value wrap 5 minutes after boot
  * so jiffies wrap bugs show up earlier.
-- 
cgit v1.1


From 632bbfeee4f042c05bc65150b4433a297d3fe387 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Mon, 25 Sep 2006 23:30:53 -0700
Subject: [PATCH] trigger a syntax error if percpu macros are incorrectly used

get_cpu_var()/per_cpu()/__get_cpu_var() arguments must be simple
identifiers.  Otherwise the arch dependent implementations might break.

This patch enforces the correct usage of the macros by producing a syntax
error if the variable is not a simple identifier.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/percpu.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cb9039a..f926490 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -11,8 +11,14 @@
 #define PERCPU_ENOUGH_ROOM 32768
 #endif
 
-/* Must be an lvalue. */
-#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); }))
+/*
+ * Must be an lvalue. Since @var must be a simple identifier,
+ * we force a syntax error here if it isn't.
+ */
+#define get_cpu_var(var) (*({				\
+	extern int simple_indentifier_##var(void);	\
+	preempt_disable();				\
+	&__get_cpu_var(var); }))
 #define put_cpu_var(var) preempt_enable()
 
 #ifdef CONFIG_SMP
-- 
cgit v1.1


From a6ca1b99ed434f3fb41bbed647ed36c0420501e5 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@SteelEye.com>
Date: Mon, 25 Sep 2006 23:30:55 -0700
Subject: [PATCH] update to the kernel kmap/kunmap API

Give non-highmem architectures access to the kmap API for the purposes of
overriding (this is what the attached patch does).

The proposal is that we should now require all architectures with coherence
issues to manage data coherence via the kmap/kunmap API.  Thus driver
writers never have to write code like

    kmap(page)
    modify data in page
    flush_kernel_dcache_page(page)
    kunmap(page)

instead, kmap/kunmap will manage the coherence and driver (and filesystem)
writers don't need to worry about how to flush between kmap and kunmap.

For most architectures, the page only needs to be flushed if it was
actually written to *and* there are user mappings of it, so the best
implementation looks to be: clear the page dirty pte bit in the kernel page
tables on kmap and on kunmap, check page->mappings for user maps, and then
the dirty bit, and only flush if it both has user mappings and is dirty.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/highmem.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 85ce7ef..42620e7 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -29,6 +29,7 @@ unsigned int nr_free_highpages(void);
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
+#ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
 {
 	might_sleep();
@@ -41,6 +42,7 @@ static inline void *kmap(struct page *page)
 #define kunmap_atomic(addr, idx)	do { } while (0)
 #define kmap_atomic_pfn(pfn, idx)	page_address(pfn_to_page(pfn))
 #define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
+#endif
 
 #endif /* CONFIG_HIGHMEM */
 
-- 
cgit v1.1


From 725d704ecaca4a43f067092c140d4f3271cf2856 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 25 Sep 2006 23:30:55 -0700
Subject: [PATCH] mm: VM_BUG_ON

Introduce a VM_BUG_ON, which is turned on with CONFIG_DEBUG_VM.  Use this
in the lightweight, inline refcounting functions; PageLRU and PageActive
checks in vmscan, because they're pretty well confined to vmscan.  And in
page allocate/free fastpaths which can be the hottest parts of the kernel
for kbuilds.

Unlike BUG_ON, VM_BUG_ON must not be used to execute statements with
side-effects, and should not be used outside core mm code.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 224178a..7d20b25 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -278,6 +278,12 @@ struct page {
  */
 #include <linux/page-flags.h>
 
+#ifdef CONFIG_DEBUG_VM
+#define VM_BUG_ON(cond) BUG_ON(cond)
+#else
+#define VM_BUG_ON(condition) do { } while(0)
+#endif
+
 /*
  * Methods to modify the page usage count.
  *
@@ -297,7 +303,7 @@ struct page {
  */
 static inline int put_page_testzero(struct page *page)
 {
-	BUG_ON(atomic_read(&page->_count) == 0);
+	VM_BUG_ON(atomic_read(&page->_count) == 0);
 	return atomic_dec_and_test(&page->_count);
 }
 
@@ -307,6 +313,7 @@ static inline int put_page_testzero(struct page *page)
  */
 static inline int get_page_unless_zero(struct page *page)
 {
+	VM_BUG_ON(PageCompound(page));
 	return atomic_inc_not_zero(&page->_count);
 }
 
@@ -323,6 +330,7 @@ static inline void get_page(struct page *page)
 {
 	if (unlikely(PageCompound(page)))
 		page = (struct page *)page_private(page);
+	VM_BUG_ON(atomic_read(&page->_count) == 0);
 	atomic_inc(&page->_count);
 }
 
-- 
cgit v1.1


From d08b3851da41d0ee60851f2c75b118e1f7a5fc89 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 Sep 2006 23:30:57 -0700
Subject: [PATCH] mm: tracking shared dirty pages

Tracking of dirty pages in shared writeable mmap()s.

The idea is simple: write protect clean shared writeable pages, catch the
write-fault, make writeable and set dirty.  On page write-back clean all the
PTE dirty bits and write protect them once again.

The implementation is a tad harder, mainly because the default
backing_dev_info capabilities were too loosely maintained.  Hence it is not
enough to test the backing_dev_info for cap_account_dirty.

The current heuristic is as follows, a VMA is eligible when:
 - its shared writeable
    (vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)
 - it is not a 'special' mapping
    (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) == 0
 - the backing_dev_info is cap_account_dirty
    mapping_cap_account_dirty(vma->vm_file->f_mapping)
 - f_op->mmap() didn't change the default page protection

Page from remap_pfn_range() are explicitly excluded because their COW
semantics are already horrid enough (see vm_normal_page() in do_wp_page()) and
because they don't have a backing store anyway.

mprotect() is taught about the new behaviour as well.  However it overrides
the last condition.

Cleaning the pages on write-back is done with page_mkclean() a new rmap call.
It can be called on any page, but is currently only implemented for mapped
pages, if the page is found the be of a VMA that accounts dirty pages it will
also wrprotect the PTE.

Finally, in fs/buffers.c:try_to_free_buffers(); remove clear_page_dirty() from
under ->private_lock.  This seems to be safe, since ->private_lock is used to
serialize access to the buffers, not the page itself.  This is needed because
clear_page_dirty() will call into page_mkclean() and would thereby violate
locking order.

[dhowells@redhat.com: Provide a page_mkclean() implementation for NOMMU]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h   | 34 ++++++++++++++++++++++++++++++++++
 include/linux/rmap.h | 14 ++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d20b25..4498414 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
 #include <linux/fs.h>
 #include <linux/mutex.h>
 #include <linux/debug_locks.h>
+#include <linux/backing-dev.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -810,6 +811,39 @@ struct shrinker;
 extern struct shrinker *set_shrinker(int, shrinker_t);
 extern void remove_shrinker(struct shrinker *shrinker);
 
+/*
+ * Some shared mappigns will want the pages marked read-only
+ * to track write events. If so, we'll downgrade vm_page_prot
+ * to the private version (using protection_map[] without the
+ * VM_SHARED bit).
+ */
+static inline int vma_wants_writenotify(struct vm_area_struct *vma)
+{
+	unsigned int vm_flags = vma->vm_flags;
+
+	/* If it was private or non-writable, the write bit is already clear */
+	if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+		return 0;
+
+	/* The backer wishes to know when pages are first written to? */
+	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+		return 1;
+
+	/* The open routine did something to the protections already? */
+	if (pgprot_val(vma->vm_page_prot) !=
+	    pgprot_val(protection_map[vm_flags &
+		    (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+		return 0;
+
+	/* Specialty mapping? */
+	if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
+		return 0;
+
+	/* Can the mapping track the dirty pages? */
+	return vma->vm_file && vma->vm_file->f_mapping &&
+		mapping_cap_account_dirty(vma->vm_file->f_mapping);
+}
+
 extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
 
 int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf97b09..db2c1df 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *,
  */
 unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
 
+/*
+ * Cleans the PTEs of shared mappings.
+ * (and since clean PTEs should also be readonly, write protects them too)
+ *
+ * returns the number of cleaned PTEs.
+ */
+int page_mkclean(struct page *);
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
 #define page_referenced(page,l) TestClearPageReferenced(page)
 #define try_to_unmap(page, refs) SWAP_FAIL
 
+static inline int page_mkclean(struct page *page)
+{
+	return 0;
+}
+
+
 #endif	/* CONFIG_MMU */
 
 /*
-- 
cgit v1.1


From edc79b2a46ed854595e40edcf3f8b37f9f14aa3f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 Sep 2006 23:30:58 -0700
Subject: [PATCH] mm: balance dirty pages

Now that we can detect writers of shared mappings, throttle them.  Avoids OOM
by surprise.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/writeback.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0422036..56a23a0 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
 			loff_t pos, loff_t count);
 int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
 			   loff_t pos, loff_t count);
+void set_page_dirty_balance(struct page *page);
 
 /* pdflush.c */
 extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
-- 
cgit v1.1


From b221385bc41d6789edde3d2fa0cb20d5045730eb Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 25 Sep 2006 23:31:02 -0700
Subject: [PATCH] mm/: make functions static

This patch makes the following needlessly global functions static:
 - slab.c: kmem_find_general_cachep()
 - swap.c: __page_cache_release()
 - vmalloc.c: __vmalloc_node()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h      | 2 --
 include/linux/slab.h    | 2 --
 include/linux/vmalloc.h | 2 --
 3 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4498414..45678b0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -318,8 +318,6 @@ static inline int get_page_unless_zero(struct page *page)
 	return atomic_inc_not_zero(&page->_count);
 }
 
-extern void FASTCALL(__page_cache_release(struct page *));
-
 static inline int page_count(struct page *page)
 {
 	if (unlikely(PageCompound(page)))
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 45ad55b..193c03c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
 extern const char *kmem_cache_name(kmem_cache_t *);
-extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
 
 /* Size description struct for general caches. */
 struct cache_sizes {
@@ -223,7 +222,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
 /* SLOB allocator routines */
 
 void kmem_cache_init(void);
-struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
 struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
 	unsigned long,
 	void (*)(void *, struct kmem_cache *, unsigned long),
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 71b6363..dee88c6b 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size);
 extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
 extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
 				pgprot_t prot);
-extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
-				pgprot_t prot, int node);
 extern void vfree(void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
-- 
cgit v1.1


From 2d1a07d487d8b36658404839cdf03a974968cefd Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:03 -0700
Subject: [PATCH] bootmem: remove useless __init in header file

__init in headers is pretty useless because the compiler doesn't check it, and
they get out of sync relatively frequently.  So if you see an __init in a
header file, it's quite unreliable and you need to check the definition
anyway.

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index e319c64..c7124d4 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -41,23 +41,23 @@ typedef struct bootmem_data {
 	struct list_head list;
 } bootmem_data_t;
 
-extern unsigned long __init bootmem_bootmap_pages (unsigned long);
-extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
-extern void __init free_bootmem (unsigned long addr, unsigned long size);
-extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_low(unsigned long size,
+extern unsigned long bootmem_bootmap_pages (unsigned long);
+extern unsigned long init_bootmem (unsigned long addr, unsigned long memend);
+extern void free_bootmem (unsigned long addr, unsigned long size);
+extern void * __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __alloc_bootmem_low(unsigned long size,
 					 unsigned long align,
 					 unsigned long goal);
-extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat,
+extern void * __alloc_bootmem_low_node(pg_data_t *pgdat,
 					      unsigned long size,
 					      unsigned long align,
 					      unsigned long goal);
-extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata,
+extern void * __alloc_bootmem_core(struct bootmem_data *bdata,
 		unsigned long size, unsigned long align, unsigned long goal,
 		unsigned long limit);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
 	__alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
@@ -67,12 +67,12 @@ extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low((x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-extern unsigned long __init free_all_bootmem (void);
-extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
-extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
-extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
-extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
-extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
+extern unsigned long free_all_bootmem (void);
+extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
+extern unsigned long init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
+extern void reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
+extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
+extern unsigned long free_all_bootmem_node (pg_data_t *pgdat);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
@@ -94,14 +94,14 @@ static inline void *alloc_remap(int nid, unsigned long size)
 extern unsigned long __meminitdata nr_kernel_pages;
 extern unsigned long nr_all_pages;
 
-extern void *__init alloc_large_system_hash(const char *tablename,
-					    unsigned long bucketsize,
-					    unsigned long numentries,
-					    int scale,
-					    int flags,
-					    unsigned int *_hash_shift,
-					    unsigned int *_hash_mask,
-					    unsigned long limit);
+extern void * alloc_large_system_hash(const char *tablename,
+				      unsigned long bucketsize,
+				      unsigned long numentries,
+				      int scale,
+				      int flags,
+				      unsigned int *_hash_shift,
+				      unsigned int *_hash_mask,
+				      unsigned long limit);
 
 #define HASH_HIGHMEM	0x00000001	/* Consider highmem? */
 #define HASH_EARLY	0x00000002	/* Allocating during early boot? */
-- 
cgit v1.1


From 71fb2e8f8753b66b1f4295aa264a2eb4e69381e8 Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:05 -0700
Subject: [PATCH] bootmem: remove useless parentheses in bootmem header file

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index c7124d4..fa2523f 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -59,13 +59,13 @@ extern void * __alloc_bootmem_core(struct bootmem_data *bdata,
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 extern void reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
-	__alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
-	__alloc_bootmem_low((x), SMP_CACHE_BYTES, 0)
+	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
 #define alloc_bootmem_pages(x) \
-	__alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
-	__alloc_bootmem_low((x), PAGE_SIZE, 0)
+	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 extern unsigned long free_all_bootmem (void);
 extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
@@ -75,11 +75,11 @@ extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned lo
 extern unsigned long free_all_bootmem_node (pg_data_t *pgdat);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
-	__alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0)
+	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
-- 
cgit v1.1


From bb0923a66820718f636736b22ce47372f79e3400 Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:05 -0700
Subject: [PATCH] bootmem: limit to 80 columns width

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index fa2523f..1e8dddf 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -44,18 +44,24 @@ typedef struct bootmem_data {
 extern unsigned long bootmem_bootmap_pages (unsigned long);
 extern unsigned long init_bootmem (unsigned long addr, unsigned long memend);
 extern void free_bootmem (unsigned long addr, unsigned long size);
-extern void * __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __alloc_bootmem (unsigned long size,
+			       unsigned long align,
+			       unsigned long goal);
+extern void * __alloc_bootmem_nopanic (unsigned long size,
+				       unsigned long align,
+				       unsigned long goal);
 extern void * __alloc_bootmem_low(unsigned long size,
-					 unsigned long align,
-					 unsigned long goal);
+				  unsigned long align,
+				  unsigned long goal);
 extern void * __alloc_bootmem_low_node(pg_data_t *pgdat,
-					      unsigned long size,
-					      unsigned long align,
-					      unsigned long goal);
+				       unsigned long size,
+				       unsigned long align,
+				       unsigned long goal);
 extern void * __alloc_bootmem_core(struct bootmem_data *bdata,
-		unsigned long size, unsigned long align, unsigned long goal,
-		unsigned long limit);
+				   unsigned long size,
+				   unsigned long align,
+				   unsigned long goal,
+				   unsigned long limit);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 extern void reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
@@ -68,10 +74,20 @@ extern void reserve_bootmem (unsigned long addr, unsigned long size);
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 extern unsigned long free_all_bootmem (void);
-extern void * __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
-extern unsigned long init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
-extern void reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
-extern void free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
+extern void * __alloc_bootmem_node (pg_data_t *pgdat,
+				    unsigned long size,
+				    unsigned long align,
+				    unsigned long goal);
+extern unsigned long init_bootmem_node (pg_data_t *pgdat,
+					unsigned long freepfn,
+					unsigned long startpfn,
+					unsigned long endpfn);
+extern void reserve_bootmem_node (pg_data_t *pgdat,
+				  unsigned long physaddr,
+				  unsigned long size);
+extern void free_bootmem_node (pg_data_t *pgdat,
+			       unsigned long addr,
+			       unsigned long size);
 extern unsigned long free_all_bootmem_node (pg_data_t *pgdat);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
-- 
cgit v1.1


From e786e86a542ccc1133f333402526ad00b9c088ae Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:06 -0700
Subject: [PATCH] bootmem: remove useless headers inclusions

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1e8dddf..b2067e4 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -4,11 +4,8 @@
 #ifndef _LINUX_BOOTMEM_H
 #define _LINUX_BOOTMEM_H
 
-#include <asm/pgtable.h>
-#include <asm/dma.h>
-#include <linux/cache.h>
-#include <linux/init.h>
 #include <linux/mmzone.h>
+#include <asm/dma.h>
 
 /*
  *  simple boot-time physical memory area allocator.
-- 
cgit v1.1


From f71bf0cac730ccb5ebcdf21747db75ae0445ccde Mon Sep 17 00:00:00 2001
From: Franck Bui-Huu <vagabon.xyz@gmail.com>
Date: Mon, 25 Sep 2006 23:31:08 -0700
Subject: [PATCH] bootmem: miscellaneous coding style fixes

It fixes various coding style issues, specially when spaces are useless.  For
example '*' go next to the function name.

Signed-off-by: Franck Bui-Huu <vagabon.xyz@gmail.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 95 +++++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b2067e4..31e9abb 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -38,29 +38,30 @@ typedef struct bootmem_data {
 	struct list_head list;
 } bootmem_data_t;
 
-extern unsigned long bootmem_bootmap_pages (unsigned long);
-extern unsigned long init_bootmem (unsigned long addr, unsigned long memend);
-extern void free_bootmem (unsigned long addr, unsigned long size);
-extern void * __alloc_bootmem (unsigned long size,
-			       unsigned long align,
-			       unsigned long goal);
-extern void * __alloc_bootmem_nopanic (unsigned long size,
-				       unsigned long align,
-				       unsigned long goal);
-extern void * __alloc_bootmem_low(unsigned long size,
+extern unsigned long bootmem_bootmap_pages(unsigned long);
+extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
+extern void free_bootmem(unsigned long addr, unsigned long size);
+extern void *__alloc_bootmem(unsigned long size,
+			     unsigned long align,
+			     unsigned long goal);
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+				     unsigned long align,
+				     unsigned long goal);
+extern void *__alloc_bootmem_low(unsigned long size,
+				 unsigned long align,
+				 unsigned long goal);
+extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
+				      unsigned long size,
+				      unsigned long align,
+				      unsigned long goal);
+extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
+				  unsigned long size,
 				  unsigned long align,
-				  unsigned long goal);
-extern void * __alloc_bootmem_low_node(pg_data_t *pgdat,
-				       unsigned long size,
-				       unsigned long align,
-				       unsigned long goal);
-extern void * __alloc_bootmem_core(struct bootmem_data *bdata,
-				   unsigned long size,
-				   unsigned long align,
-				   unsigned long goal,
-				   unsigned long limit);
+				  unsigned long goal,
+				  unsigned long limit);
+
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void reserve_bootmem (unsigned long addr, unsigned long size);
+extern void reserve_bootmem(unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
 	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
@@ -70,22 +71,24 @@ extern void reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-extern unsigned long free_all_bootmem (void);
-extern void * __alloc_bootmem_node (pg_data_t *pgdat,
-				    unsigned long size,
-				    unsigned long align,
-				    unsigned long goal);
-extern unsigned long init_bootmem_node (pg_data_t *pgdat,
-					unsigned long freepfn,
-					unsigned long startpfn,
-					unsigned long endpfn);
-extern void reserve_bootmem_node (pg_data_t *pgdat,
-				  unsigned long physaddr,
-				  unsigned long size);
-extern void free_bootmem_node (pg_data_t *pgdat,
-			       unsigned long addr,
-			       unsigned long size);
-extern unsigned long free_all_bootmem_node (pg_data_t *pgdat);
+
+extern unsigned long free_all_bootmem(void);
+extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
+extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
+extern unsigned long init_bootmem_node(pg_data_t *pgdat,
+				       unsigned long freepfn,
+				       unsigned long startpfn,
+				       unsigned long endpfn);
+extern void reserve_bootmem_node(pg_data_t *pgdat,
+				 unsigned long physaddr,
+				 unsigned long size);
+extern void free_bootmem_node(pg_data_t *pgdat,
+			      unsigned long addr,
+			      unsigned long size);
+
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
@@ -102,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size)
 {
 	return NULL;
 }
-#endif
+#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
 
 extern unsigned long __meminitdata nr_kernel_pages;
 extern unsigned long nr_all_pages;
 
-extern void * alloc_large_system_hash(const char *tablename,
-				      unsigned long bucketsize,
-				      unsigned long numentries,
-				      int scale,
-				      int flags,
-				      unsigned int *_hash_shift,
-				      unsigned int *_hash_mask,
-				      unsigned long limit);
+extern void *alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int flags,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask,
+				     unsigned long limit);
 
 #define HASH_HIGHMEM	0x00000001	/* Consider highmem? */
 #define HASH_EARLY	0x00000002	/* Allocating during early boot? */
-- 
cgit v1.1


From c1f60a5a419cc60aff27daffb150f5a3a3a79ef4 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:11 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: move HIGHMEM counters into highmem.c/.h

Move totalhigh_pages and nr_free_highpages() into highmem.c/.h

Move the totalhigh_pages definition into highmem.c/.h.  Move the
nr_free_highpages function into highmem.c

[yoichi_yuasa@tripeaks.co.jp: build fix]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/highmem.h | 3 +++
 include/linux/swap.h    | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 42620e7..fd7d12d 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -24,11 +24,14 @@ static inline void flush_kernel_dcache_page(struct page *page)
 
 /* declarations for linux/mm/highmem.c */
 unsigned int nr_free_highpages(void);
+extern unsigned long totalhigh_pages;
 
 #else /* CONFIG_HIGHMEM */
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
+#define totalhigh_pages 0
+
 #ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
 {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5e59184..34a6bc3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -162,7 +162,6 @@ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *
 
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
-extern unsigned long totalhigh_pages;
 extern unsigned long totalreserve_pages;
 extern long nr_swap_pages;
 extern unsigned int nr_free_pages(void);
-- 
cgit v1.1


From 2f1b6248682f8b39ca3c7e549dfc216d26c4109b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:13 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: use enum to define zones, reformat and
 comment

Use enum for zones and reformat zones dependent information

Add comments explaning the use of zones and add a zones_t type for zone
numbers.

Line up information that will be #ifdefd by the following patches.

[akpm@osdl.org: comment cleanups]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     |  7 +++---
 include/linux/mmzone.h | 66 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 51 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 45678b0..2db4229 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -470,7 +470,7 @@ void split_page(struct page *page, unsigned int order);
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
 #define ZONETABLE_MASK		((1UL << ZONETABLE_SHIFT) - 1)
 
-static inline unsigned long page_zonenum(struct page *page)
+static inline enum zone_type page_zonenum(struct page *page)
 {
 	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
@@ -499,11 +499,12 @@ static inline unsigned long page_to_section(struct page *page)
 	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
 }
 
-static inline void set_page_zone(struct page *page, unsigned long zone)
+static inline void set_page_zone(struct page *page, enum zone_type zone)
 {
 	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
 	page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
 }
+
 static inline void set_page_node(struct page *page, unsigned long node)
 {
 	page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
@@ -515,7 +516,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
 	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
 }
 
-static inline void set_page_links(struct page *page, unsigned long zone,
+static inline void set_page_links(struct page *page, enum zone_type zone,
 	unsigned long node, unsigned long pfn)
 {
 	set_page_zone(page, zone);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f45163c..03a5a6e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -88,14 +88,53 @@ struct per_cpu_pageset {
 #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
 #endif
 
-#define ZONE_DMA		0
-#define ZONE_DMA32		1
-#define ZONE_NORMAL		2
-#define ZONE_HIGHMEM		3
+enum zone_type {
+	/*
+	 * ZONE_DMA is used when there are devices that are not able
+	 * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
+	 * carve out the portion of memory that is needed for these devices.
+	 * The range is arch specific.
+	 *
+	 * Some examples
+	 *
+	 * Architecture		Limit
+	 * ---------------------------
+	 * parisc, ia64, sparc	<4G
+	 * s390			<2G
+	 * arm26		<48M
+	 * arm			Various
+	 * alpha		Unlimited or 0-16MB.
+	 *
+	 * i386, x86_64 and multiple other arches
+	 * 			<16M.
+	 */
+	ZONE_DMA,
+	/*
+	 * x86_64 needs two ZONE_DMAs because it supports devices that are
+	 * only able to do DMA to the lower 16M but also 32 bit devices that
+	 * can only do DMA areas below 4G.
+	 */
+	ZONE_DMA32,
+	/*
+	 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
+	 * performed on pages in ZONE_NORMAL if the DMA devices support
+	 * transfers to all addressable memory.
+	 */
+	ZONE_NORMAL,
+	/*
+	 * A memory area that is only addressable by the kernel through
+	 * mapping portions into its own address space. This is for example
+	 * used by i386 to allow the kernel to address the memory beyond
+	 * 900MB. The kernel will set up special mappings (page
+	 * table entries on i386) for each page that the kernel needs to
+	 * access.
+	 */
+	ZONE_HIGHMEM,
 
-#define MAX_NR_ZONES		4	/* Sync this with ZONES_SHIFT */
-#define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
+	MAX_NR_ZONES
+};
 
+#define	ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 
 /*
  * When a memory allocation must conform to specific limitations (such
@@ -126,16 +165,6 @@ struct per_cpu_pageset {
 /* #define GFP_ZONETYPES       (GFP_ZONEMASK + 1) */           /* Non-loner */
 #define GFP_ZONETYPES  ((GFP_ZONEMASK + 1) / 2 + 1)            /* Loner */
 
-/*
- * On machines where it is needed (eg PCs) we divide physical memory
- * into multiple physical zones. On a 32bit PC we have 4 zones:
- *
- * ZONE_DMA	  < 16 MB	ISA DMA capable memory
- * ZONE_DMA32	     0 MB 	Empty
- * ZONE_NORMAL	16-896 MB	direct mapped by the kernel
- * ZONE_HIGHMEM	 > 896 MB	only page cache and user processes
- */
-
 struct zone {
 	/* Fields commonly accessed by the page allocator */
 	unsigned long		free_pages;
@@ -266,7 +295,6 @@ struct zone {
 	char			*name;
 } ____cacheline_internodealigned_in_smp;
 
-
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -373,12 +401,12 @@ static inline int populated_zone(struct zone *zone)
 	return (!!zone->present_pages);
 }
 
-static inline int is_highmem_idx(int idx)
+static inline int is_highmem_idx(enum zone_type idx)
 {
 	return (idx == ZONE_HIGHMEM);
 }
 
-static inline int is_normal_idx(int idx)
+static inline int is_normal_idx(enum zone_type idx)
 {
 	return (idx == ZONE_NORMAL);
 }
-- 
cgit v1.1


From fb0e7942bdcbbd2f90e61cb4cfa4fa892a873f8a Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:13 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: make ZONE_DMA32 optional

Make ZONE_DMA32 optional

- Add #ifdefs around ZONE_DMA32 specific code and definitions.

- Add CONFIG_ZONE_DMA32 config option and use that for x86_64
  that alone needs this zone.

- Remove the use of CONFIG_DMA_IS_DMA32 and CONFIG_DMA_IS_NORMAL
  for ia64 and fix up the way per node ZVCs are calculated.

- Fall back to prior GFP_ZONEMASK of 0x03 if there is no
  DMA32 zone.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h    |  2 +-
 include/linux/mmzone.h | 16 +++++++++++++---
 include/linux/vmstat.h |  4 +---
 3 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cc9e6084..14610b5 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -13,7 +13,7 @@ struct vm_area_struct;
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
-#ifdef CONFIG_DMA_IS_DMA32
+#ifndef CONFIG_ZONE_DMA32
 #define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
 #elif BITS_PER_LONG < 64
 #define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 03a5a6e..adae3c9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -109,12 +109,14 @@ enum zone_type {
 	 * 			<16M.
 	 */
 	ZONE_DMA,
+#ifdef CONFIG_ZONE_DMA32
 	/*
 	 * x86_64 needs two ZONE_DMAs because it supports devices that are
 	 * only able to do DMA to the lower 16M but also 32 bit devices that
 	 * can only do DMA areas below 4G.
 	 */
 	ZONE_DMA32,
+#endif
 	/*
 	 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
 	 * performed on pages in ZONE_NORMAL if the DMA devices support
@@ -161,9 +163,13 @@ enum zone_type {
  *
  * NOTE! Make sure this matches the zones in <linux/gfp.h>
  */
-#define GFP_ZONEMASK	0x07
-/* #define GFP_ZONETYPES       (GFP_ZONEMASK + 1) */           /* Non-loner */
-#define GFP_ZONETYPES  ((GFP_ZONEMASK + 1) / 2 + 1)            /* Loner */
+#define GFP_ZONETYPES		((GFP_ZONEMASK + 1) / 2 + 1)    /* Loner */
+
+#ifdef CONFIG_ZONE_DMA32
+#define GFP_ZONEMASK		0x07
+#else
+#define GFP_ZONEMASK		0x03
+#endif
 
 struct zone {
 	/* Fields commonly accessed by the page allocator */
@@ -429,7 +435,11 @@ static inline int is_normal(struct zone *zone)
 
 static inline int is_dma32(struct zone *zone)
 {
+#ifdef CONFIG_ZONE_DMA32
 	return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
+#else
+	return 0;
+#endif
 }
 
 static inline int is_dma(struct zone *zone)
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d9b1b6..9c6e62c 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -124,12 +124,10 @@ static inline unsigned long node_page_state(int node,
 	struct zone *zones = NODE_DATA(node)->node_zones;
 
 	return
-#ifndef CONFIG_DMA_IS_NORMAL
-#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64
+#ifdef CONFIG_ZONE_DMA32
 		zone_page_state(&zones[ZONE_DMA32], item) +
 #endif
 		zone_page_state(&zones[ZONE_NORMAL], item) +
-#endif
 #ifdef CONFIG_HIGHMEM
 		zone_page_state(&zones[ZONE_HIGHMEM], item) +
 #endif
-- 
cgit v1.1


From e53ef38d05dd59ed281a35590e4a5b64d8ff4c52 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:14 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: make ZONE_HIGHMEM optional

Make ZONE_HIGHMEM optional

- ifdef out code and definitions related to CONFIG_HIGHMEM

- __GFP_HIGHMEM falls back to normal allocations if there is no
  ZONE_HIGHMEM

- GFP_ZONEMASK becomes 0x01 if there is no DMA32 and no HIGHMEM
  zone.

[jdike@addtoit.com: build fix]
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h    | 18 ++++++++++--------
 include/linux/mmzone.h | 36 +++++++++++++++++++++++++++++++++---
 2 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 14610b5..2a2153e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,17 +9,19 @@ struct vm_area_struct;
 
 /*
  * GFP bitmasks..
+ *
+ * Zone modifiers (see linux/mmzone.h - low three bits)
+ *
+ * These may be masked by GFP_ZONEMASK to make allocations with this bit
+ * set fall back to ZONE_NORMAL.
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use the consistently. The definitions here may
+ * be used in bit comparisons.
  */
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
 #define __GFP_DMA	((__force gfp_t)0x01u)
 #define __GFP_HIGHMEM	((__force gfp_t)0x02u)
-#ifndef CONFIG_ZONE_DMA32
-#define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
-#elif BITS_PER_LONG < 64
-#define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
-#else
-#define __GFP_DMA32	((__force gfp_t)0x04)	/* Has own ZONE_DMA32 */
-#endif
+#define __GFP_DMA32	((__force gfp_t)0x04u)
 
 /*
  * Action modifiers - doesn't change the zoning
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index adae3c9..76d33e6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -123,6 +123,7 @@ enum zone_type {
 	 * transfers to all addressable memory.
 	 */
 	ZONE_NORMAL,
+#ifdef CONFIG_HIGHMEM
 	/*
 	 * A memory area that is only addressable by the kernel through
 	 * mapping portions into its own address space. This is for example
@@ -132,11 +133,10 @@ enum zone_type {
 	 * access.
 	 */
 	ZONE_HIGHMEM,
-
+#endif
 	MAX_NR_ZONES
 };
 
-#define	ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 
 /*
  * When a memory allocation must conform to specific limitations (such
@@ -163,12 +163,34 @@ enum zone_type {
  *
  * NOTE! Make sure this matches the zones in <linux/gfp.h>
  */
-#define GFP_ZONETYPES		((GFP_ZONEMASK + 1) / 2 + 1)    /* Loner */
 
 #ifdef CONFIG_ZONE_DMA32
+
+#ifdef CONFIG_HIGHMEM
+#define GFP_ZONETYPES		((GFP_ZONEMASK + 1) / 2 + 1)    /* Loner */
 #define GFP_ZONEMASK		0x07
+#define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
 #else
+#define GFP_ZONETYPES		((0x07 + 1) / 2 + 1)    /* Loner */
+/* Mask __GFP_HIGHMEM */
+#define GFP_ZONEMASK		0x05
+#define ZONES_SHIFT		2
+#endif
+
+#else
+#ifdef CONFIG_HIGHMEM
+
 #define GFP_ZONEMASK		0x03
+#define ZONES_SHIFT		2
+#define GFP_ZONETYPES		3
+
+#else
+
+#define GFP_ZONEMASK		0x01
+#define ZONES_SHIFT		1
+#define GFP_ZONETYPES		2
+
+#endif
 #endif
 
 struct zone {
@@ -409,7 +431,11 @@ static inline int populated_zone(struct zone *zone)
 
 static inline int is_highmem_idx(enum zone_type idx)
 {
+#ifdef CONFIG_HIGHMEM
 	return (idx == ZONE_HIGHMEM);
+#else
+	return 0;
+#endif
 }
 
 static inline int is_normal_idx(enum zone_type idx)
@@ -425,7 +451,11 @@ static inline int is_normal_idx(enum zone_type idx)
  */
 static inline int is_highmem(struct zone *zone)
 {
+#ifdef CONFIG_HIGHMEM
 	return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
+#else
+	return 0;
+#endif
 }
 
 static inline int is_normal(struct zone *zone)
-- 
cgit v1.1


From 27bf71c2a7e596ed34e9bf2d4a5030321a09a1ad Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:15 -0700
Subject: [PATCH] reduce MAX_NR_ZONES: remove display of counters for
 unconfigured zones

eventcounters: Do not display counters for zones that are not available on an
arch

Do not define or display counters for the DMA32 and the HIGHMEM zone if such
zones were not configured.

[akpm@osdl.org: s390 fix]
[heiko.carstens@de.ibm.com: s390 fix]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/vmstat.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 9c6e62c..176c7f7 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -18,7 +18,19 @@
  * generated will simply be the increment of a global address.
  */
 
-#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
+#ifdef CONFIG_ZONE_DMA32
+#define DMA32_ZONE(xx) xx##_DMA32,
+#else
+#define DMA32_ZONE(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#else
+#define HIGHMEM_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
 
 enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		FOR_ALL_ZONES(PGALLOC),
-- 
cgit v1.1


From 4e4785bcf0c8503224fa6c17d8e0228de781bff6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:17 -0700
Subject: [PATCH] mempolicies: fix policy_zone check

There is a check in zonelist_policy that compares pieces of the bitmap
obtained from a gfp mask via GFP_ZONETYPES with a zone number in function
zonelist_policy().

The bitmap is an ORed mask of __GFP_DMA, __GFP_DMA32 and __GFP_HIGHMEM.
The policy_zone is a zone number with the possible values of ZONE_DMA,
ZONE_DMA32, ZONE_HIGHMEM and ZONE_NORMAL. These are two different domains
of values.

For some reason seemed to work before the zone reduction patchset (It
definitely works on SGI boxes since we just have one zone and the check
cannot fail).

With the zone reduction patchset this check definitely fails on systems
with two zones if the system actually has memory in both zones.

This is because ZONE_NORMAL is selected using no __GFP flag at
all and thus gfp_zone(gfpmask) == 0. ZONE_DMA is selected when __GFP_DMA
is set. __GFP_DMA is 0x01.  So gfp_zone(gfpmask) == 1.

policy_zone is set to ZONE_NORMAL (==1) if ZONE_NORMAL and ZONE_DMA are
populated.

For ZONE_NORMAL gfp_zone(<no _GFP_DMA>) yields 0 which is <
policy_zone(ZONE_NORMAL) and so policy is not applied to regular memory
allocations!

Instead gfp_zone(__GFP_DMA) == 1 which results in policy being applied
to DMA allocations!

What we realy want in that place is to establish the highest allowable
zone for a given gfp_mask. If the highest zone is higher or equal to the
policy_zone then memory policies need to be applied. We have such
a highest_zone() function in page_alloc.c.

So move the highest_zone() function from mm/page_alloc.c into
include/linux/gfp.h.  On the way we simplify the function and use the new
zone_type that was also introduced with the zone reduction patchset plus we
also specify the right type for the gfp flags parameter.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2a2153e..a0992d3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -85,6 +85,21 @@ static inline int gfp_zone(gfp_t gfp)
 	return zone;
 }
 
+static inline enum zone_type highest_zone(gfp_t flags)
+{
+	if (flags & __GFP_DMA)
+		return ZONE_DMA;
+#ifdef CONFIG_ZONE_DMA32
+	if (flags & __GFP_DMA32)
+		return ZONE_DMA32;
+#endif
+#ifdef CONFIG_HIGHMEM
+	if (flags & __GFP_HIGHMEM)
+		return ZONE_HIGHMEM;
+#endif
+	return ZONE_NORMAL;
+}
+
 /*
  * There is only one page-allocator function, and two main namespaces to
  * it. The alloc_page*() variants return 'struct page *' and as such
-- 
cgit v1.1


From 2f6726e54a9410e2e4cee864947c05e954051916 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:18 -0700
Subject: [PATCH] Apply type enum zone_type

After we have done this we can now do some typing cleanup.

The memory policy layer keeps a policy_zone that specifies
the zone that gets memory policies applied. This variable
can now be of type enum zone_type.

The check_highest_zone function and the build_zonelists funnctionm must
then also take a enum zone_type parameter.

Plus there are a number of loops over zones that also should use
zone_type.

We run into some troubles at some points with functions that need a
zone_type variable to become -1. Fix that up.

[pj@sgi.com: fix set_mempolicy() crash]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mempolicy.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 72440f0..09f0f57 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 		unsigned long addr);
 extern unsigned slab_node(struct mempolicy *policy);
 
-extern int policy_zone;
+extern enum zone_type policy_zone;
 
-static inline void check_highest_zone(int k)
+static inline void check_highest_zone(enum zone_type k)
 {
 	if (k > policy_zone)
 		policy_zone = k;
-- 
cgit v1.1


From 19655d3487001d7df0e10e9cbfc27c758b77c2b5 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:19 -0700
Subject: [PATCH] linearly index zone->node_zonelists[]

I wonder why we need this bitmask indexing into zone->node_zonelists[]?

We always start with the highest zone and then include all lower zones
if we build zonelists.

Are there really cases where we need allocation from ZONE_DMA or
ZONE_HIGHMEM but not ZONE_NORMAL? It seems that the current implementation
of highest_zone() makes that already impossible.

If we go linear on the index then gfp_zone() == highest_zone() and a lot
of definitions fall by the wayside.

We can now revert back to the use of gfp_zone() in mempolicy.c ;-)

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h    | 12 +-----------
 include/linux/mmzone.h | 52 +++++---------------------------------------------
 2 files changed, 6 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a0992d3..63ab88a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -12,9 +12,6 @@ struct vm_area_struct;
  *
  * Zone modifiers (see linux/mmzone.h - low three bits)
  *
- * These may be masked by GFP_ZONEMASK to make allocations with this bit
- * set fall back to ZONE_NORMAL.
- *
  * Do not put any conditional on these. If necessary modify the definitions
  * without the underscores and use the consistently. The definitions here may
  * be used in bit comparisons.
@@ -78,14 +75,7 @@ struct vm_area_struct;
 #define GFP_DMA32	__GFP_DMA32
 
 
-static inline int gfp_zone(gfp_t gfp)
-{
-	int zone = GFP_ZONEMASK & (__force int) gfp;
-	BUG_ON(zone >= GFP_ZONETYPES);
-	return zone;
-}
-
-static inline enum zone_type highest_zone(gfp_t flags)
+static inline enum zone_type gfp_zone(gfp_t flags)
 {
 	if (flags & __GFP_DMA)
 		return ZONE_DMA;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 76d33e6..7fe3171 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -137,60 +137,18 @@ enum zone_type {
 	MAX_NR_ZONES
 };
 
-
 /*
  * When a memory allocation must conform to specific limitations (such
  * as being suitable for DMA) the caller will pass in hints to the
  * allocator in the gfp_mask, in the zone modifier bits.  These bits
  * are used to select a priority ordered list of memory zones which
- * match the requested limits.  GFP_ZONEMASK defines which bits within
- * the gfp_mask should be considered as zone modifiers.  Each valid
- * combination of the zone modifier bits has a corresponding list
- * of zones (in node_zonelists).  Thus for two zone modifiers there
- * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
- * be 8 (2 ** 3) zonelists.  GFP_ZONETYPES defines the number of possible
- * combinations of zone modifiers in "zone modifier space".
- *
- * As an optimisation any zone modifier bits which are only valid when
- * no other zone modifier bits are set (loners) should be placed in
- * the highest order bits of this field.  This allows us to reduce the
- * extent of the zonelists thus saving space.  For example in the case
- * of three zone modifier bits, we could require up to eight zonelists.
- * If the left most zone modifier is a "loner" then the highest valid
- * zonelist would be four allowing us to allocate only five zonelists.
- * Use the first form for GFP_ZONETYPES when the left most bit is not
- * a "loner", otherwise use the second.
- *
- * NOTE! Make sure this matches the zones in <linux/gfp.h>
+ * match the requested limits. See gfp_zone() in include/linux/gfp.h
  */
 
-#ifdef CONFIG_ZONE_DMA32
-
-#ifdef CONFIG_HIGHMEM
-#define GFP_ZONETYPES		((GFP_ZONEMASK + 1) / 2 + 1)    /* Loner */
-#define GFP_ZONEMASK		0x07
-#define ZONES_SHIFT		2	/* ceil(log2(MAX_NR_ZONES)) */
-#else
-#define GFP_ZONETYPES		((0x07 + 1) / 2 + 1)    /* Loner */
-/* Mask __GFP_HIGHMEM */
-#define GFP_ZONEMASK		0x05
-#define ZONES_SHIFT		2
-#endif
-
-#else
-#ifdef CONFIG_HIGHMEM
-
-#define GFP_ZONEMASK		0x03
-#define ZONES_SHIFT		2
-#define GFP_ZONETYPES		3
-
+#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM)
+#define ZONES_SHIFT 1
 #else
-
-#define GFP_ZONEMASK		0x01
-#define ZONES_SHIFT		1
-#define GFP_ZONETYPES		2
-
-#endif
+#define ZONES_SHIFT 2
 #endif
 
 struct zone {
@@ -360,7 +318,7 @@ struct zonelist {
 struct bootmem_data;
 typedef struct pglist_data {
 	struct zone node_zones[MAX_NR_ZONES];
-	struct zonelist node_zonelists[GFP_ZONETYPES];
+	struct zonelist node_zonelists[MAX_NR_ZONES];
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
 	struct page *node_mem_map;
-- 
cgit v1.1


From 8bc719d3cab8414938f9ea6e33b58d8810d18068 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 25 Sep 2006 23:31:20 -0700
Subject: [PATCH] out of memory notifier

Add a notifer chain to the out of memory killer.  If one of the registered
callbacks could release some memory, do not kill the process but return and
retry the allocation that forced the oom killer to run.

The purpose of the notifier is to add a safety net in the presence of
memory ballooners.  If the resource manager inflated the balloon to a size
where memory allocations can not be satisfied anymore, it is better to
deflate the balloon a bit instead of killing processes.

The implementation for the s390 ballooner is included.

[akpm@osdl.org: cleanups]
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 34a6bc3..32db06c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -10,6 +10,8 @@
 #include <asm/atomic.h>
 #include <asm/page.h>
 
+struct notifier_block;
+
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
@@ -156,6 +158,8 @@ struct swap_list_t {
 
 /* linux/mm/oom_kill.c */
 extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
+extern int register_oom_notifier(struct notifier_block *nb);
+extern int unregister_oom_notifier(struct notifier_block *nb);
 
 /* linux/mm/memory.c */
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
-- 
cgit v1.1


From 7ff6f08295d90ab20d25200ef485ebb45b1b8d71 Mon Sep 17 00:00:00 2001
From: Martin Peschke <mp3@de.ibm.com>
Date: Mon, 25 Sep 2006 23:31:21 -0700
Subject: [PATCH] CPU hotplug compatible alloc_percpu()

This patch splits alloc_percpu() up into two phases.  Likewise for
free_percpu().  This allows clients to limit initial allocations to online
cpu's, and to populate or depopulate per-cpu data at run time as needed:

  struct my_struct *obj;

  /* initial allocation for online cpu's */
  obj = percpu_alloc(sizeof(struct my_struct), GFP_KERNEL);

  ...

  /* populate per-cpu data for cpu coming online */
  ptr = percpu_populate(obj, sizeof(struct my_struct), GFP_KERNEL, cpu);

  ...

  /* access per-cpu object */
  ptr = percpu_ptr(obj, smp_processor_id());

  ...

  /* depopulate per-cpu data for cpu going offline */
  percpu_depopulate(obj, cpu);

  ...

  /* final removal */
  percpu_free(obj);

Signed-off-by: Martin Peschke <mp3@de.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/percpu.h | 79 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 60 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index f926490..3835a96 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,9 +1,12 @@
 #ifndef __LINUX_PERCPU_H
 #define __LINUX_PERCPU_H
+
 #include <linux/spinlock.h> /* For preempt_disable() */
 #include <linux/slab.h> /* For kmalloc() */
 #include <linux/smp.h>
 #include <linux/string.h> /* For memset() */
+#include <linux/cpumask.h>
+
 #include <asm/percpu.h>
 
 /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
@@ -27,39 +30,77 @@ struct percpu_data {
 	void *ptrs[NR_CPUS];
 };
 
+#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
 /* 
- * Use this to get to a cpu's version of the per-cpu object allocated using
- * alloc_percpu.  Non-atomic access to the current CPU's version should
+ * Use this to get to a cpu's version of the per-cpu object dynamically
+ * allocated. Non-atomic access to the current CPU's version should
  * probably be combined with get_cpu()/put_cpu().
  */ 
-#define per_cpu_ptr(ptr, cpu)                   \
-({                                              \
-        struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \
-        (__typeof__(ptr))__p->ptrs[(cpu)];	\
+#define percpu_ptr(ptr, cpu)                              \
+({                                                        \
+        struct percpu_data *__p = __percpu_disguise(ptr); \
+        (__typeof__(ptr))__p->ptrs[(cpu)];	          \
 })
 
-extern void *__alloc_percpu(size_t size);
-extern void free_percpu(const void *);
+extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
+extern void percpu_depopulate(void *__pdata, int cpu);
+extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+				  cpumask_t *mask);
+extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
+extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
+extern void percpu_free(void *__pdata);
 
 #else /* CONFIG_SMP */
 
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+
+static inline void percpu_depopulate(void *__pdata, int cpu)
+{
+}
+
+static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+}
 
-static inline void *__alloc_percpu(size_t size)
+static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
+				    int cpu)
 {
-	void *ret = kmalloc(size, GFP_KERNEL);
-	if (ret)
-		memset(ret, 0, size);
-	return ret;
+	return percpu_ptr(__pdata, cpu);
 }
-static inline void free_percpu(const void *ptr)
-{	
-	kfree(ptr);
+
+static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+					 cpumask_t *mask)
+{
+	return 0;
+}
+
+static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+	return kzalloc(size, gfp);
+}
+
+static inline void percpu_free(void *__pdata)
+{
+	kfree(__pdata);
 }
 
 #endif /* CONFIG_SMP */
 
-/* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type)	((type *)(__alloc_percpu(sizeof(type))))
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+	__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
+#define percpu_depopulate_mask(__pdata, mask) \
+	__percpu_depopulate_mask((__pdata), &(mask))
+#define percpu_alloc_mask(size, gfp, mask) \
+	__percpu_alloc_mask((size), (gfp), &(mask))
+
+#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
+
+/* (legacy) interface for use without CPU hotplug handling */
+
+#define __alloc_percpu(size)	percpu_alloc_mask((size), GFP_KERNEL, \
+						  cpu_possible_map)
+#define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type))
+#define free_percpu(ptr)	percpu_free((ptr))
+#define per_cpu_ptr(ptr, cpu)	percpu_ptr((ptr), (cpu))
 
 #endif /* __LINUX_PERCPU_H */
-- 
cgit v1.1


From db37648cd6ce9b828abd6d49aa3d269926ee7b7d Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 25 Sep 2006 23:31:24 -0700
Subject: [PATCH] mm: non syncing lock_page()

lock_page needs the caller to have a reference on the page->mapping inode
due to sync_page, ergo set_page_dirty_lock is obviously buggy according to
its comments.

Solve it by introducing a new lock_page_nosync which does not do a sync_page.

akpm: unpleasant solution to an unpleasant problem.  If it goes wrong it could
cause great slowdowns while the lock_page() caller waits for kblockd to
perform the unplug.  And if a filesystem has special sync_page() requirements
(none presently do), permanent hangs are possible.

otoh, set_page_dirty_lock() is usually (always?) called against userspace
pages.  They are always up-to-date, so there shouldn't be any pending read I/O
against these pages.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pagemap.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a2f5d2..64f9509 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 }
 
 extern void FASTCALL(__lock_page(struct page *page));
+extern void FASTCALL(__lock_page_nosync(struct page *page));
 extern void FASTCALL(unlock_page(struct page *page));
 
+/*
+ * lock_page may only be called if we have the page's inode pinned.
+ */
 static inline void lock_page(struct page *page)
 {
 	might_sleep();
 	if (TestSetPageLocked(page))
 		__lock_page(page);
 }
+
+/*
+ * lock_page_nosync should only be used if we can't pin the page's inode.
+ * Doesn't play quite so well with block device plugging.
+ */
+static inline void lock_page_nosync(struct page *page)
+{
+	might_sleep();
+	if (TestSetPageLocked(page))
+		__lock_page_nosync(page);
+}
 	
 /*
  * This is exported only for wait_on_page_locked/wait_on_page_writeback.
-- 
cgit v1.1


From da6052f7b33abe55fbfd7d2213815f58c00a88d4 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 25 Sep 2006 23:31:35 -0700
Subject: [PATCH] update some mm/ comments

Let's try to keep mm/ comments more useful and up to date. This is a start.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h         | 68 +++++++++++++++++++++++++++-------------------
 include/linux/page-flags.h | 35 ++++++++++++++----------
 2 files changed, 60 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2db4229..f201877 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -219,7 +219,8 @@ struct inode;
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
  * moment. Note that we have no way to track which tasks are using
- * a page.
+ * a page, though if it is a pagecache page, rmap structures can tell us
+ * who is mapping it.
  */
 struct page {
 	unsigned long flags;		/* Atomic flags, some possibly
@@ -299,8 +300,7 @@ struct page {
  */
 
 /*
- * Drop a ref, return true if the logical refcount fell to zero (the page has
- * no users)
+ * Drop a ref, return true if the refcount fell to zero (the page has no users)
  */
 static inline int put_page_testzero(struct page *page)
 {
@@ -356,43 +356,55 @@ void split_page(struct page *page, unsigned int order);
  * For the non-reserved pages, page_count(page) denotes a reference count.
  *   page_count() == 0 means the page is free. page->lru is then used for
  *   freelist management in the buddy allocator.
- *   page_count() == 1 means the page is used for exactly one purpose
- *   (e.g. a private data page of one process).
+ *   page_count() > 0  means the page has been allocated.
  *
- * A page may be used for kmalloc() or anyone else who does a
- * __get_free_page(). In this case the page_count() is at least 1, and
- * all other fields are unused but should be 0 or NULL. The
- * management of this page is the responsibility of the one who uses
- * it.
+ * Pages are allocated by the slab allocator in order to provide memory
+ * to kmalloc and kmem_cache_alloc. In this case, the management of the
+ * page, and the fields in 'struct page' are the responsibility of mm/slab.c
+ * unless a particular usage is carefully commented. (the responsibility of
+ * freeing the kmalloc memory is the caller's, of course).
  *
- * The other pages (we may call them "process pages") are completely
+ * A page may be used by anyone else who does a __get_free_page().
+ * In this case, page_count still tracks the references, and should only
+ * be used through the normal accessor functions. The top bits of page->flags
+ * and page->virtual store page management information, but all other fields
+ * are unused and could be used privately, carefully. The management of this
+ * page is the responsibility of the one who allocated it, and those who have
+ * subsequently been given references to it.
+ *
+ * The other pages (we may call them "pagecache pages") are completely
  * managed by the Linux memory manager: I/O, buffers, swapping etc.
  * The following discussion applies only to them.
  *
- * A page may belong to an inode's memory mapping. In this case,
- * page->mapping is the pointer to the inode, and page->index is the
- * file offset of the page, in units of PAGE_CACHE_SIZE.
+ * A pagecache page contains an opaque `private' member, which belongs to the
+ * page's address_space. Usually, this is the address of a circular list of
+ * the page's disk buffers. PG_private must be set to tell the VM to call
+ * into the filesystem to release these pages.
  *
- * A page contains an opaque `private' member, which belongs to the
- * page's address_space.  Usually, this is the address of a circular
- * list of the page's disk buffers.
+ * A page may belong to an inode's memory mapping. In this case, page->mapping
+ * is the pointer to the inode, and page->index is the file offset of the page,
+ * in units of PAGE_CACHE_SIZE.
  *
- * For pages belonging to inodes, the page_count() is the number of
- * attaches, plus 1 if `private' contains something, plus one for
- * the page cache itself.
+ * If pagecache pages are not associated with an inode, they are said to be
+ * anonymous pages. These may become associated with the swapcache, and in that
+ * case PG_swapcache is set, and page->private is an offset into the swapcache.
  *
- * Instead of keeping dirty/clean pages in per address-space lists, we instead
- * now tag pages as dirty/under writeback in the radix tree.
+ * In either case (swapcache or inode backed), the pagecache itself holds one
+ * reference to the page. Setting PG_private should also increment the
+ * refcount. The each user mapping also has a reference to the page.
  *
- * There is also a per-mapping radix tree mapping index to the page
- * in memory if present. The tree is rooted at mapping->root.  
+ * The pagecache pages are stored in a per-mapping radix tree, which is
+ * rooted at mapping->page_tree, and indexed by offset.
+ * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
+ * lists, we instead now tag pages as dirty/writeback in the radix tree.
  *
- * All process pages can do I/O:
+ * All pagecache pages may be subject to I/O:
  * - inode pages may need to be read from disk,
  * - inode pages which have been modified and are MAP_SHARED may need
- *   to be written to disk,
- * - private pages which have been modified may need to be swapped out
- *   to swap space and (later) to be read back into memory.
+ *   to be written back to the inode on disk,
+ * - anonymous pages (including MAP_PRIVATE file mappings) which have been
+ *   modified may need to be swapped out to swap space and (later) to be read
+ *   back into memory.
  */
 
 /*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5748642..9d7921d 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -13,24 +13,25 @@
  * PG_reserved is set for special pages, which can never be swapped out. Some
  * of them might not even exist (eg empty_bad_page)...
  *
- * The PG_private bitflag is set if page->private contains a valid value.
+ * The PG_private bitflag is set on pagecache pages if they contain filesystem
+ * specific data (which is normally at page->private). It can be used by
+ * private allocations for its own usage.
  *
- * During disk I/O, PG_locked is used. This bit is set before I/O and
- * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
- * waiting for the I/O on this page to complete.
+ * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
+ * and cleared when writeback _starts_ or when read _completes_. PG_writeback
+ * is set before writeback starts and cleared when it finishes.
+ *
+ * PG_locked also pins a page in pagecache, and blocks truncation of the file
+ * while it is held.
+ *
+ * page_waitqueue(page) is a wait queue of all tasks waiting for the page
+ * to become unlocked.
  *
  * PG_uptodate tells whether the page's contents is valid.  When a read
  * completes, the page becomes uptodate, unless a disk I/O error happened.
  *
- * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
- * which is set any time the system accesses that page through the (mapping,
- * index) hash table.  This referenced bit, together with the referenced bit
- * in the page tables, is used to manipulate page->age and move the page across
- * the active, inactive_dirty and inactive_clean lists.
- *
- * Note that the referenced bit, the page->lru list_head and the active,
- * inactive_dirty and inactive_clean lists are protected by the
- * zone->lru_lock, and *NOT* by the usual PG_locked bit!
+ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
+ * file-backed pagecache (see mm/vmscan.c).
  *
  * PG_error is set to indicate that an I/O error occurred on this page.
  *
@@ -42,6 +43,10 @@
  * space, they need to be kmapped separately for doing IO on the pages.  The
  * struct page (these bits with information) are always mapped into kernel
  * address space...
+ *
+ * PG_buddy is set to indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ *
  */
 
 /*
@@ -74,7 +79,7 @@
 #define PG_checked		 8	/* kill me in 2.5.<early>. */
 #define PG_arch_1		 9
 #define PG_reserved		10
-#define PG_private		11	/* Has something at ->private */
+#define PG_private		11	/* If pagecache, has fs-private data */
 
 #define PG_writeback		12	/* Page is under writeback */
 #define PG_nosave		13	/* Used for system suspend/resume */
@@ -83,7 +88,7 @@
 
 #define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
 #define PG_reclaim		17	/* To be reclaimed asap */
-#define PG_nosave_free		18	/* Free, should not be written */
+#define PG_nosave_free		18	/* Used for system suspend/resume */
 #define PG_buddy		19	/* Page is free, on buddy lists */
 
 
-- 
cgit v1.1


From dbe5e69d2d6e591996ea2b817b887d03b60bb143 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 25 Sep 2006 23:31:36 -0700
Subject: [PATCH] slab: optimize kmalloc_node the same way as kmalloc

[akpm@osdl.org: export fix]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/slab.h | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 193c03c..2f6bef6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -202,7 +202,30 @@ extern int slab_is_available(void);
 
 #ifdef CONFIG_NUMA
 extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
-extern void *kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+	if (__builtin_constant_p(size)) {
+		int i = 0;
+#define CACHE(x) \
+		if (size <= x) \
+			goto found; \
+		else \
+			i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+		{
+			extern void __you_cannot_kmalloc_that_much(void);
+			__you_cannot_kmalloc_that_much();
+		}
+found:
+		return kmem_cache_alloc_node((flags & GFP_DMA) ?
+			malloc_sizes[i].cs_dmacachep :
+			malloc_sizes[i].cs_cachep, flags, node);
+	}
+	return __kmalloc_node(size, flags, node);
+}
 #else
 static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
 {
-- 
cgit v1.1


From 9b819d204cf602eab1a53a9ec4b8d2ca51e02a1d Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:40 -0700
Subject: [PATCH] Add __GFP_THISNODE to avoid fallback to other nodes and
 ignore cpuset/memory policy restrictions

Add a new gfp flag __GFP_THISNODE to avoid fallback to other nodes.  This
flag is essential if a kernel component requires memory to be located on a
certain node.  It will be needed for alloc_pages_node() to force allocation
on the indicated node and for alloc_pages() to force allocation on the
current node.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 63ab88a..0eda5b6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -45,6 +45,7 @@ struct vm_area_struct;
 #define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -53,7 +54,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL)
+			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
 
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
-- 
cgit v1.1


From 980128f223fa3c75e3ebdde650c9f1bcabd4c0a2 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:46 -0700
Subject: [PATCH] Define easier to handle GFP_THISNODE

In many places we will need to use the same combination of flags.  Specify
a single GFP_THISNODE definition for ease of use in gfp.h.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0eda5b6..8b34aab 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -67,6 +67,8 @@ struct vm_area_struct;
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
 
+#define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
    platforms, used as appropriate on others */
 
-- 
cgit v1.1


From 8417bba4b151346ed475fcc923693c9e3be89063 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:51 -0700
Subject: [PATCH] Replace min_unmapped_ratio by min_unmapped_pages in struct
 zone

*_pages is a better description of the role of the variable.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7fe3171..a703527 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -169,7 +169,7 @@ struct zone {
 	/*
 	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
-	unsigned long		min_unmapped_ratio;
+	unsigned long		min_unmapped_pages;
 	struct per_cpu_pageset	*pageset[NR_CPUS];
 #else
 	struct per_cpu_pageset	pageset[NR_CPUS];
-- 
cgit v1.1


From 972d1a7b140569084439a81265a0f15b74e924e0 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:51 -0700
Subject: [PATCH] ZVC: Support NR_SLAB_RECLAIMABLE / NR_SLAB_UNRECLAIMABLE

Remove the atomic counter for slab_reclaim_pages and replace the counter
and NR_SLAB with two ZVC counter that account for unreclaimable and
reclaimable slab pages: NR_SLAB_RECLAIMABLE and NR_SLAB_UNRECLAIMABLE.

Change the check in vmscan.c to refer to to NR_SLAB_RECLAIMABLE.  The
intend seems to be to check for slab pages that could be freed.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 3 ++-
 include/linux/slab.h   | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a703527..08c41b9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -51,7 +51,8 @@ enum zone_stat_item {
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
 			   only modified from process context */
 	NR_FILE_PAGES,
-	NR_SLAB,	/* Pages used by slab allocator */
+	NR_SLAB_RECLAIMABLE,
+	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,	/* used for pagetables */
 	NR_FILE_DIRTY,
 	NR_WRITEBACK,
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2f6bef6..66d6eb7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -284,8 +284,6 @@ extern kmem_cache_t	*fs_cachep;
 extern kmem_cache_t	*sighand_cachep;
 extern kmem_cache_t	*bio_cachep;
 
-extern atomic_t slab_reclaim_pages;
-
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_SLAB_H */
-- 
cgit v1.1


From 0ff38490c836dc379ff7ec45b10a15a662f4e5f6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:52 -0700
Subject: [PATCH] zone_reclaim: dynamic slab reclaim

Currently one can enable slab reclaim by setting an explicit option in
/proc/sys/vm/zone_reclaim_mode.  Slab reclaim is then used as a final
option if the freeing of unmapped file backed pages is not enough to free
enough pages to allow a local allocation.

However, that means that the slab can grow excessively and that most memory
of a node may be used by slabs.  We have had a case where a machine with
46GB of memory was using 40-42GB for slab.  Zone reclaim was effective in
dealing with pagecache pages.  However, slab reclaim was only done during
global reclaim (which is a bit rare on NUMA systems).

This patch implements slab reclaim during zone reclaim.  Zone reclaim
occurs if there is a danger of an off node allocation.  At that point we

1. Shrink the per node page cache if the number of pagecache
   pages is more than min_unmapped_ratio percent of pages in a zone.

2. Shrink the slab cache if the number of the nodes reclaimable slab pages
   (patch depends on earlier one that implements that counter)
   are more than min_slab_ratio (a new /proc/sys/vm tunable).

The shrinking of the slab cache is a bit problematic since it is not node
specific.  So we simply calculate what point in the slab we want to reach
(current per node slab use minus the number of pages that neeed to be
allocated) and then repeately run the global reclaim until that is
unsuccessful or we have reached the limit.  I hope we will have zone based
slab reclaim at some point which will make that easier.

The default for the min_slab_ratio is 5%

Also remove the slab option from /proc/sys/vm/zone_reclaim_mode.

[akpm@osdl.org: cleanups]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 3 +++
 include/linux/swap.h   | 1 +
 include/linux/sysctl.h | 1 +
 3 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 08c41b9..3693f1a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -171,6 +171,7 @@ struct zone {
 	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
 	unsigned long		min_unmapped_pages;
+	unsigned long		min_slab_pages;
 	struct per_cpu_pageset	*pageset[NR_CPUS];
 #else
 	struct per_cpu_pageset	pageset[NR_CPUS];
@@ -448,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file
 					void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
 			struct file *, void __user *, size_t *, loff_t *);
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
+			struct file *, void __user *, size_t *, loff_t *);
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 32db06c..a2f5ad7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -193,6 +193,7 @@ extern long vm_total_pages;
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
 extern int sysctl_min_unmapped_ratio;
+extern int sysctl_min_slab_ratio;
 extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
 #else
 #define zone_reclaim_mode 0
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 736ed91..eca5557 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -191,6 +191,7 @@ enum
 	VM_MIN_UNMAPPED=32,	/* Set min percent of unmapped pages */
 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
+	VM_MIN_SLAB=35,		 /* Percent pages ignored by zone reclaim */
 };
 
 
-- 
cgit v1.1


From 89fa30242facca249aead2aac03c4c69764f911c Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 25 Sep 2006 23:31:55 -0700
Subject: [PATCH] NUMA: Add zone_to_nid function

There are many places where we need to determine the node of a zone.
Currently we use a difficult to read sequence of pointer dereferencing.
Put that into an inline function and use throughout VM.  Maybe we can find
a way to optimize the lookup in the future.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f201877..856f0ee 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -499,12 +499,17 @@ static inline struct zone *page_zone(struct page *page)
 	return zone_table[page_zone_id(page)];
 }
 
+static inline unsigned long zone_to_nid(struct zone *zone)
+{
+	return zone->zone_pgdat->node_id;
+}
+
 static inline unsigned long page_to_nid(struct page *page)
 {
 	if (FLAGS_HAS_NODE)
 		return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
 	else
-		return page_zone(page)->zone_pgdat->node_id;
+		return zone_to_nid(page_zone(page));
 }
 static inline unsigned long page_to_section(struct page *page)
 {
-- 
cgit v1.1


From 62bac0185ad3dfef11d9602980445c54d45199c6 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 25 Sep 2006 23:31:56 -0700
Subject: [PATCH] selinux: eliminate selinux_task_ctxid

Eliminate selinux_task_ctxid since it duplicates selinux_task_get_sid.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/selinux.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index aad4e39..79e4707 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -70,16 +70,6 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
 void selinux_audit_set_callback(int (*callback)(void));
 
 /**
- *	selinux_task_ctxid - determine a context ID for a process.
- *	@tsk: the task object
- *	@ctxid: ID value returned via this
- *
- *	On return, ctxid will contain an ID for the context.  This value
- *	should only be used opaquely.
- */
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
-
-/**
  *     selinux_ctxid_to_string - map a security context ID to a string
  *     @ctxid: security context ID to be converted.
  *     @ctx: address of context string to be returned
@@ -166,11 +156,6 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
 	return;
 }
 
-static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
-{
-	*ctxid = 0;
-}
-
 static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
 {
        *ctx = NULL;
-- 
cgit v1.1


From 1a70cd40cb291c25b67ec0da715a49d76719329d Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 25 Sep 2006 23:31:57 -0700
Subject: [PATCH] selinux: rename selinux_ctxid_to_string

Rename selinux_ctxid_to_string to selinux_sid_to_string to be
consistent with other interfaces.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/selinux.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 79e4707..df9098d 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -70,8 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
 void selinux_audit_set_callback(int (*callback)(void));
 
 /**
- *     selinux_ctxid_to_string - map a security context ID to a string
- *     @ctxid: security context ID to be converted.
+ *     selinux_sid_to_string - map a security context ID to a string
+ *     @sid: security context ID to be converted.
  *     @ctx: address of context string to be returned
  *     @ctxlen: length of returned context string.
  *
@@ -79,7 +79,7 @@ void selinux_audit_set_callback(int (*callback)(void));
  *     string will be allocated internally, and the caller must call
  *     kfree() on it after use.
  */
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen);
 
 /**
  *     selinux_get_inode_sid - get the inode's security context ID
@@ -156,7 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
 	return;
 }
 
-static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
 {
        *ctx = NULL;
        *ctxlen = 0;
-- 
cgit v1.1


From 9a2f44f01a67a6ecca71515af999895b45a2aeb0 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 25 Sep 2006 23:31:58 -0700
Subject: [PATCH] selinux: replace ctxid with sid in selinux_audit_rule_match
 interface

Replace ctxid with sid in selinux_audit_rule_match interface for
consistency with other interfaces.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/selinux.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index df9098d..d1b7ca6 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
 
 /**
  *	selinux_audit_rule_match - determine if a context ID matches a rule.
- *	@ctxid: the context ID to check
+ *	@sid: the context ID to check
  *	@field: the field this rule refers to
  *	@op: the operater the rule uses
  *	@rule: pointer to the audit rule to check against
@@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
  *	Returns 1 if the context id matches the rule, 0 if it does not, and
  *	-errno on failure.
  */
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
                              struct selinux_audit_rule *rule,
                              struct audit_context *actx);
 
@@ -144,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule)
 	return;
 }
 
-static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
                                            struct selinux_audit_rule *rule,
                                            struct audit_context *actx)
 {
-- 
cgit v1.1


From af8c65b57aaa4ae321af34dbfc5ca7f5625263fe Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Sep 2006 23:32:07 -0700
Subject: [PATCH] FRV: permit __do_IRQ() to be dispensed with

Permit __do_IRQ() to be dispensed with based on a configuration option.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index fbf6d90..48d3cb3 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
  * Monolithic do_IRQ implementation.
  * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
  */
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
 extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
+#endif
 
 /*
  * Architectures call this to let the generic IRQ layer
@@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs)
 {
 	struct irq_desc *desc = irq_desc + irq;
 
+#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+	desc->handle_irq(irq, desc, regs);
+#else
 	if (likely(desc->handle_irq))
 		desc->handle_irq(irq, desc, regs);
 	else
 		__do_IRQ(irq, regs);
+#endif
 }
 
 /* Handling of unhandled and spurious interrupts: */
-- 
cgit v1.1


From 5f97f7f9400de47ae837170bb274e90ad3934386 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Mon, 25 Sep 2006 23:32:13 -0700
Subject: [PATCH] avr32 architecture

This adds support for the Atmel AVR32 architecture as well as the AT32AP7000
CPU and the AT32STK1000 development board.

AVR32 is a new high-performance 32-bit RISC microprocessor core, designed for
cost-sensitive embedded applications, with particular emphasis on low power
consumption and high code density.  The AVR32 architecture is not binary
compatible with earlier 8-bit AVR architectures.

The AVR32 architecture, including the instruction set, is described by the
AVR32 Architecture Manual, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32000.pdf

The Atmel AT32AP7000 is the first CPU implementing the AVR32 architecture.  It
features a 7-stage pipeline, 16KB instruction and data caches and a full
Memory Management Unit.  It also comes with a large set of integrated
peripherals, many of which are shared with the AT91 ARM-based controllers from
Atmel.

Full data sheet is available from

http://www.atmel.com/dyn/resources/prod_documents/doc32003.pdf

while the CPU core implementation including caches and MMU is documented by
the AVR32 AP Technical Reference, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32001.pdf

Information about the AT32STK1000 development board can be found at

http://www.atmel.com/dyn/products/tools_card.asp?tool_id=3918

including a BSP CD image with an earlier version of this patch, development
tools (binaries and source/patches) and a root filesystem image suitable for
booting from SD card.

Alternatively, there's a preliminary "getting started" guide available at
http://avr32linux.org/twiki/bin/view/Main/GettingStarted which provides links
to the sources and patches you will need in order to set up a cross-compiling
environment for avr32-linux.

This patch, as well as the other patches included with the BSP and the
toolchain patches, is actively supported by Atmel Corporation.

[dmccr@us.ibm.com: Fix more pxx_page macro locations]
[bunk@stusta.de: fix `make defconfig']
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Dave McCracken <dmccr@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/elf-em.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 6a5796c..666e0a5 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -31,6 +31,7 @@
 #define EM_M32R		88	/* Renesas M32R */
 #define EM_H8_300	46	/* Renesas H8/300,300H,H8S */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
+#define EM_AVR32	0x18ad	/* Atmel AVR32 */
 
 /*
  * This is an interim value that we will use until the committee comes
-- 
cgit v1.1


From 9c9b8b388296ad5a306ab238dc677cfe6ff4cb12 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@xensource.com>
Date: Mon, 25 Sep 2006 23:32:26 -0700
Subject: [PATCH] x86: put .note.* sections into a PT_NOTE segment in vmlinux

This patch will pack any .note.* section into a PT_NOTE segment in the output
file.

To do this, we tell ld that we need a PT_NOTE segment.  This requires us to
start explicitly mapping sections to segments, so we also need to explicitly
create PT_LOAD segments for text and data, and map the sections to them
appropriately.  Fortunately, each section will default to its previous
section's segment, so it doesn't take many changes to vmlinux.lds.S.

This only changes i386 for now, but I presume the corresponding changes for
other architectures will be as simple.

This change also adds <linux/elfnote.h>, which defines C and Assembler macros
for actually creating ELF notes.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/elfnote.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 include/linux/elfnote.h

(limited to 'include/linux')

diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
new file mode 100644
index 0000000..16f9f8e
--- /dev/null
+++ b/include/linux/elfnote.h
@@ -0,0 +1,88 @@
+#ifndef _LINUX_ELFNOTE_H
+#define _LINUX_ELFNOTE_H
+/*
+ * Helper macros to generate ELF Note structures, which are put into a
+ * PT_NOTE segment of the final vmlinux image.  These are useful for
+ * including name-value pairs of metadata into the kernel binary (or
+ * modules?) for use by external programs.
+ *
+ * Each note has three parts: a name, a type and a desc.  The name is
+ * intended to distinguish the note's originator, so it would be a
+ * company, project, subsystem, etc; it must be in a suitable form for
+ * use in a section name.  The type is an integer which is used to tag
+ * the data, and is considered to be within the "name" namespace (so
+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42).  The
+ * "desc" field is the actual data.  There are no constraints on the
+ * desc field's contents, though typically they're fairly small.
+ *
+ * All notes from a given NAME are put into a section named
+ * .note.NAME.  When the kernel image is finally linked, all the notes
+ * are packed into a single .notes section, which is mapped into the
+ * PT_NOTE segment.  Because notes for a given name are grouped into
+ * the same section, they'll all be adjacent the output file.
+ *
+ * This file defines macros for both C and assembler use.  Their
+ * syntax is slightly different, but they're semantically similar.
+ *
+ * See the ELF specification for more detail about ELF notes.
+ */
+
+#ifdef __ASSEMBLER__
+/*
+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
+ * turn out to be the same size and shape), followed by the name and
+ * desc data with appropriate padding.  The 'desc' argument includes
+ * the assembler pseudo op defining the type of the data: .asciz
+ * "hello, world"
+ */
+.macro ELFNOTE name type desc:vararg
+.pushsection ".note.\name"
+  .align 4
+  .long 2f - 1f			/* namesz */
+  .long 4f - 3f			/* descsz */
+  .long \type
+1:.asciz "\name"
+2:.align 4
+3:\desc
+4:.align 4
+.popsection
+.endm
+#else	/* !__ASSEMBLER__ */
+#include <linux/elf.h>
+/*
+ * Use an anonymous structure which matches the shape of
+ * Elf{32,64}_Nhdr, but includes the name and desc data.  The size and
+ * type of name and desc depend on the macro arguments.  "name" must
+ * be a literal string, and "desc" must be passed by value.  You may
+ * only define one note per line, since __LINE__ is used to generate
+ * unique symbols.
+ */
+#define _ELFNOTE_PASTE(a,b)	a##b
+#define _ELFNOTE(size, name, unique, type, desc)			\
+	static const struct {						\
+		struct elf##size##_note _nhdr;				\
+		unsigned char _name[sizeof(name)]			\
+		__attribute__((aligned(sizeof(Elf##size##_Word))));	\
+		typeof(desc) _desc					\
+			     __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+	} _ELFNOTE_PASTE(_note_, unique)				\
+		__attribute_used__					\
+		__attribute__((section(".note." name),			\
+			       aligned(sizeof(Elf##size##_Word)),	\
+			       unused)) = {				\
+		{							\
+			sizeof(name),					\
+			sizeof(desc),					\
+			type,						\
+		},							\
+		name,							\
+		desc							\
+	}
+#define ELFNOTE(size, name, type, desc)		\
+	_ELFNOTE(size, name, __LINE__, type, desc)
+
+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
+#endif	/* __ASSEMBLER__ */
+
+#endif /* _LINUX_ELFNOTE_H */
-- 
cgit v1.1


From 5091e746848f74c9a2c0579b4ef8d8cd1a6b135d Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@xensource.com>
Date: Mon, 25 Sep 2006 23:32:28 -0700
Subject: [PATCH] Translate asm version of ELFNOTE macro into preprocessor
 macro

I've come across some problems with the assembly version of the ELFNOTE
macro currently in -mm. (in
x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch)

The first is that older gas does not support :varargs in .macro
definitions (in my testing 2.17 does while 2.15 does not, I don't know
when it became supported). The Changes file says binutils >= 2.12 so I
think we need to avoid using it. There are no other uses in mainline or
-mm. Old gas appears to just ignore it so you get "too many arguments"
type errors.

Secondly it seems that passing strings as arguments to assembler macros
is broken without varargs. It looks like they get unquoted or each
character is treated as a separate argument or something and this causes
all manner of grief. I think this is because of the use of -traditional
when compiling assembly files.

Therefore I have translated the assembler macro into a pre-processor
macro.

I added the desctype as a separate argument instead of including it with
the descdata as the previous version did since -traditional means the
ELFNOTE definition after the #else needs to have the same number of
arguments (I think so anyway, the -traditional CPP semantics are pretty
fscking strange!).

With this patch I am able to define elfnotes in assembly like this with
both old and new assemblers.

	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "linux")
	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "2.6")
	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  __PAGE_OFFSET)

Which seems reasonable enough.

Signed-off-by: Ian Campbell <ian.campbell@xensource.com>
Acked-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/elfnote.h | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
index 16f9f8e..67396db 100644
--- a/include/linux/elfnote.h
+++ b/include/linux/elfnote.h
@@ -31,22 +31,24 @@
 /*
  * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
  * turn out to be the same size and shape), followed by the name and
- * desc data with appropriate padding.  The 'desc' argument includes
- * the assembler pseudo op defining the type of the data: .asciz
- * "hello, world"
+ * desc data with appropriate padding.  The 'desctype' argument is the
+ * assembler pseudo op defining the type of the data e.g. .asciz while
+ * 'descdata' is the data itself e.g.  "hello, world".
+ *
+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
+ *      ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
  */
-.macro ELFNOTE name type desc:vararg
-.pushsection ".note.\name"
-  .align 4
-  .long 2f - 1f			/* namesz */
-  .long 4f - 3f			/* descsz */
-  .long \type
-1:.asciz "\name"
-2:.align 4
-3:\desc
-4:.align 4
-.popsection
-.endm
+#define ELFNOTE(name, type, desctype, descdata)	\
+.pushsection .note.name			;	\
+  .align 4				;	\
+  .long 2f - 1f		/* namesz */	;	\
+  .long 4f - 3f		/* descsz */	;	\
+  .long type				;	\
+1:.asciz "name"				;	\
+2:.align 4				;	\
+3:desctype descdata			;	\
+4:.align 4				;	\
+.popsection				;
 #else	/* !__ASSEMBLER__ */
 #include <linux/elf.h>
 /*
-- 
cgit v1.1


From a3bc0dbc81d36fd38991b4373f6de8e1a507605a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 25 Sep 2006 23:32:33 -0700
Subject: [PATCH] smp_call_function_single() cleanup

If we're going to implement smp_call_function_single() on three architecture
with the same prototype then it should have a declaration in a
non-arch-specific header file.

Move it into <linux/smp.h>.

Cc: Stephane Eranian <eranian@hpl.hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/smp.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 837e8bc..5164998 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus);
  */
 int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
 
+int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
+				int retry, int wait);
+
 /*
  * Call a function on all processors
  */
-- 
cgit v1.1


From 930631edd4b1fe2781d9fe90edbe35d89dfc94cc Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 25 Sep 2006 23:32:40 -0700
Subject: [PATCH] add DIV_ROUND_UP()

Add the DIV_ROUND_UP() helper macro: divide `n' by `d', rounding up.

Stolen from the gfs2 tree(!) because the swsusp patches need it.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b2ae4f..e44a37e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@ extern const char linux_banner[];
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
 
 #define	KERN_EMERG	"<0>"	/* system is unusable			*/
-- 
cgit v1.1


From ab954160350c91c77ae03740ef90458c3ad5412c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 25 Sep 2006 23:32:42 -0700
Subject: [PATCH] swsusp: write speedup

Switch the swsusp writeout code from 4k-at-a-time to 4MB-at-a-time.

Crufty old PIII testbox:
	12.9 MB/s -> 20.9 MB/s

Sony Vaio:
	14.7 MB/s -> 26.5 MB/s

The implementation is crude.  A better one would use larger BIOs, but wouldn't
gain any performance.

The memcpys will be mostly pipelined with the IO and basically come for free.

The ENOMEM path has not been tested.  It should be.

Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a2f5ad7..3d434cb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -12,6 +12,8 @@
 
 struct notifier_block;
 
+struct bio;
+
 #define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
 #define SWAP_FLAG_PRIO_MASK	0x7fff
 #define SWAP_FLAG_PRIO_SHIFT	0
@@ -216,7 +218,8 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
+extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+				struct bio **bio_chain);
 
 /* linux/mm/swap_state.c */
 extern struct address_space swapper_space;
-- 
cgit v1.1


From 546e0d271941dd1ff6961e2a1f7eac75f1fc277e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 25 Sep 2006 23:32:44 -0700
Subject: [PATCH] swsusp: read speedup

Implement async reads for swsusp resuming.

Crufty old PIII testbox:
	15.7 MB/s -> 20.3 MB/s

Sony Vaio:
	14.6 MB/s -> 33.3 MB/s

I didn't implement the post-resume bio_set_pages_dirty().  I don't really
understand why resume needs to run set_page_dirty() against these pages.

It might be a worry that this code modifies PG_Uptodate, PG_Error and
PG_Locked against the image pages.  Can this possibly affect the resumed-into
kernel?  Hopefully not, if we're atomically restoring its mem_map?

Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Jens Axboe <axboe@suse.de>
Cc: Laurent Riffard <laurent.riffard@free.fr>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3d434cb..e7c36ba 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -220,6 +220,7 @@ extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
 extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
 				struct bio **bio_chain);
+extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
 
 /* linux/mm/swap_state.c */
 extern struct address_space swapper_space;
-- 
cgit v1.1


From e3920fb42c8ddfe63befb54d95c0e13eabacea9b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:48 -0700
Subject: [PATCH] Disable CPU hotplug during suspend

The current suspend code has to be run on one CPU, so we use the CPU
hotplug to take the non-boot CPUs offline on SMP machines.  However, we
should also make sure that these CPUs will not be enabled by someone else
after we have disabled them.

The functions disable_nonboot_cpus() and enable_nonboot_cpus() are moved to
kernel/cpu.c, because they now refer to some stuff in there that should
better be static.  Also it's better if disable_nonboot_cpus() returns an
error instead of panicking if something goes wrong, and
enable_nonboot_cpus() has no reason to panic(), because the CPUs may have
been enabled by the userland before it tries to take them online.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/cpu.h     | 8 ++++++++
 include/linux/suspend.h | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8fb344a..3fef7d6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
 static inline int cpu_is_offline(int cpu) { return 0; }
 #endif
 
+#ifdef CONFIG_SUSPEND_SMP
+extern int disable_nonboot_cpus(void);
+extern void enable_nonboot_cpus(void);
+#else
+static inline int disable_nonboot_cpus(void) { return 0; }
+static inline void enable_nonboot_cpus(void) {}
+#endif
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 96e31aa..6e8a06c 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -57,14 +57,6 @@ static inline int software_suspend(void)
 }
 #endif /* CONFIG_PM */
 
-#ifdef CONFIG_SUSPEND_SMP
-extern void disable_nonboot_cpus(void);
-extern void enable_nonboot_cpus(void);
-#else
-static inline void disable_nonboot_cpus(void) {}
-static inline void enable_nonboot_cpus(void) {}
-#endif
-
 void save_processor_state(void);
 void restore_processor_state(void);
 struct saved_context;
-- 
cgit v1.1


From dcbb5a54f6e3984efa24772394f2225b11495c55 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:51 -0700
Subject: [PATCH] swsusp: clean up suspend header

Remove some things that are no longer used or defined elsewhere from suspend.h
and make the inline version of software_suspend() return the right error code.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 6e8a06c..c11cacf 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -10,11 +10,11 @@
 #include <linux/pm.h>
 
 /* page backup entry */
-typedef struct pbe {
+struct pbe {
 	unsigned long address;		/* address of the copy */
 	unsigned long orig_address;	/* original address of page */
 	struct pbe *next;
-} suspend_pagedir_t;
+};
 
 #define for_each_pbe(pbe, pblist) \
 	for (pbe = pblist ; pbe ; pbe = pbe->next)
@@ -25,15 +25,6 @@ typedef struct pbe {
 #define for_each_pb_page(pbe, pblist) \
 	for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
 
-
-#define SWAP_FILENAME_MAXLENGTH	32
-
-
-extern dev_t swsusp_resume_device;
-   
-/* mm/vmscan.c */
-extern int shrink_mem(void);
-
 /* mm/page_alloc.c */
 extern void drain_local_pages(void);
 extern void mark_free_pages(struct zone *zone);
@@ -53,7 +44,7 @@ static inline void pm_restore_console(void) {}
 static inline int software_suspend(void)
 {
 	printk("Warning: fake suspend called\n");
-	return -EPERM;
+	return -ENOSYS;
 }
 #endif /* CONFIG_PM */
 
-- 
cgit v1.1


From 940864ddabdb180e02041c4dcd46ba6f9eee732f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:55 -0700
Subject: [PATCH] swsusp: Use memory bitmaps during resume

Make swsusp use memory bitmaps to store its internal information during the
resume phase of the suspend-resume cycle.

If the pfns of saveable pages are saved during the suspend phase instead of
the kernel virtual addresses of these pages, we can use them during the resume
phase directly to set the corresponding bits in a memory bitmap.  Then, this
bitmap is used to mark the page frames corresponding to the pages that were
saveable before the suspend (aka "unsafe" page frames).

Next, we allocate as many page frames as needed to store the entire suspend
image and make sure that there will be some extra free "safe" page frames for
the list of PBEs constructed later.  Subsequently, the image is loaded and, if
possible, the data loaded from it are written into their "original" page
frames (ie.  the ones they had occupied before the suspend).

The image data that cannot be written into their "original" page frames are
loaded into "safe" page frames and their "original" kernel virtual addresses,
as well as the addresses of the "safe" pages containing their copies, are
stored in a list of PBEs.  Finally, the list of PBEs is used to copy the
remaining image data into their "original" page frames (this is done
atomically, by the architecture-dependent parts of swsusp).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index c11cacf..b1237f1 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -16,15 +16,6 @@ struct pbe {
 	struct pbe *next;
 };
 
-#define for_each_pbe(pbe, pblist) \
-	for (pbe = pblist ; pbe ; pbe = pbe->next)
-
-#define PBES_PER_PAGE      (PAGE_SIZE/sizeof(struct pbe))
-#define PB_PAGE_SKIP       (PBES_PER_PAGE-1)
-
-#define for_each_pb_page(pbe, pblist) \
-	for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
-
 /* mm/page_alloc.c */
 extern void drain_local_pages(void);
 extern void mark_free_pages(struct zone *zone);
-- 
cgit v1.1


From c8eb8b4025175f967af0ba8e933f23aa9954dc35 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:56 -0700
Subject: [PATCH] PM: make it possible to disable console suspending

Change suspend_console() so that it waits for all consoles to flush the
remaining messages and make it possible to switch the console suspending off
with the help of a Kconfig option.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Stefan Seyfried <seife@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/console.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/console.h b/include/linux/console.h
index 3bdf215..76a1807 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -120,9 +120,14 @@ extern void console_stop(struct console *);
 extern void console_start(struct console *);
 extern int is_console_locked(void);
 
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
 /* Suspend and resume console messages over PM events */
 extern void suspend_console(void);
 extern void resume_console(void);
+#else
+static inline void suspend_console(void) {}
+static inline void resume_console(void) {}
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
 
 /* Some debug stub to catch some of the obvious races in the VT code */
 #if 1
-- 
cgit v1.1


From c5c6ba4e08ab9c9e390a0f3a7d9a5c332f5cc6ef Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 25 Sep 2006 23:32:58 -0700
Subject: [PATCH] PM: Add pm_trace switch

Add the pm_trace attribute in /sys/power which has to be explicitly set to
one to really enable the "PM tracing" code compiled in when CONFIG_PM_TRACE
is set (which modifies the machine's CMOS clock in unpredictable ways).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/resume-trace.h | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index a376bd4..81e9299 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,21 +3,25 @@
 
 #ifdef CONFIG_PM_TRACE
 
+extern int pm_trace_enabled;
+
 struct device;
 extern void set_trace_device(struct device *);
 extern void generate_resume_trace(void *tracedata, unsigned int user);
 
 #define TRACE_DEVICE(dev) set_trace_device(dev)
-#define TRACE_RESUME(user) do {				\
-	void *tracedata;				\
-	asm volatile("movl $1f,%0\n"			\
-		".section .tracedata,\"a\"\n"		\
-		"1:\t.word %c1\n"			\
-		"\t.long %c2\n"				\
-		".previous"				\
-		:"=r" (tracedata)			\
-		: "i" (__LINE__), "i" (__FILE__));	\
-	generate_resume_trace(tracedata, user);		\
+#define TRACE_RESUME(user) do {					\
+	if (pm_trace_enabled) {					\
+		void *tracedata;				\
+		asm volatile("movl $1f,%0\n"			\
+			".section .tracedata,\"a\"\n"		\
+			"1:\t.word %c1\n"			\
+			"\t.long %c2\n"				\
+			".previous"				\
+			:"=r" (tracedata)			\
+			: "i" (__LINE__), "i" (__FILE__));	\
+		generate_resume_trace(tracedata, user);		\
+	}							\
 } while (0)
 
 #else
-- 
cgit v1.1


From eb2a2fd91f7c8a53b15063d6f08cf22b9a56cbfb Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khc@pm.waw.pl>
Date: Tue, 26 Sep 2006 23:23:45 +0200
Subject: [PATCH] Modularize generic HDLC

This patch enables building of individual WAN protocol support
routines (parts of generic HDLC) as separate modules.
All protocol-private definitions are moved from hdlc.h file
to protocol drivers. User-space interface and interface
between generic HDLC and underlying low-level HDLC drivers
are unchanged.

Signed-off-by: Krzysztof Halasa <khc@pm.waw.pl>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/hdlc.h       | 201 ++++++++++-----------------------------------
 include/linux/hdlc/ioctl.h |  33 ++++++++
 2 files changed, 77 insertions(+), 157 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index d5ebbb2..d4b3339 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -11,95 +11,46 @@
 #ifndef __HDLC_H
 #define __HDLC_H
 
-#define GENERIC_HDLC_VERSION 4	/* For synchronization with sethdlc utility */
-
-#define CLOCK_DEFAULT   0	/* Default setting */
-#define CLOCK_EXT	1	/* External TX and RX clock - DTE */
-#define CLOCK_INT	2	/* Internal TX and RX clock - DCE */
-#define CLOCK_TXINT	3	/* Internal TX and external RX clock */
-#define CLOCK_TXFROMRX	4	/* TX clock derived from external RX clock */
-
-
-#define ENCODING_DEFAULT	0 /* Default setting */
-#define ENCODING_NRZ		1
-#define ENCODING_NRZI		2
-#define ENCODING_FM_MARK	3
-#define ENCODING_FM_SPACE	4
-#define ENCODING_MANCHESTER	5
-
-
-#define PARITY_DEFAULT		0 /* Default setting */
-#define PARITY_NONE		1 /* No parity */
-#define PARITY_CRC16_PR0	2 /* CRC16, initial value 0x0000 */
-#define PARITY_CRC16_PR1	3 /* CRC16, initial value 0xFFFF */
-#define PARITY_CRC16_PR0_CCITT	4 /* CRC16, initial 0x0000, ITU-T version */
-#define PARITY_CRC16_PR1_CCITT	5 /* CRC16, initial 0xFFFF, ITU-T version */
-#define PARITY_CRC32_PR0_CCITT	6 /* CRC32, initial value 0x00000000 */
-#define PARITY_CRC32_PR1_CCITT	7 /* CRC32, initial value 0xFFFFFFFF */
-
-#define LMI_DEFAULT		0 /* Default setting */
-#define LMI_NONE		1 /* No LMI, all PVCs are static */
-#define LMI_ANSI		2 /* ANSI Annex D */
-#define LMI_CCITT		3 /* ITU-T Annex A */
-#define LMI_CISCO		4 /* The "original" LMI, aka Gang of Four */
 
 #define HDLC_MAX_MTU 1500	/* Ethernet 1500 bytes */
+#if 0
 #define HDLC_MAX_MRU (HDLC_MAX_MTU + 10 + 14 + 4) /* for ETH+VLAN over FR */
+#else
+#define HDLC_MAX_MRU 1600 /* as required for FR network */
+#endif
 
 
 #ifdef __KERNEL__
 
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
-#include <net/syncppp.h>
 #include <linux/hdlc/ioctl.h>
 
 
-typedef struct {		/* Used in Cisco and PPP mode */
-	u8 address;
-	u8 control;
-	u16 protocol;
-}__attribute__ ((packed)) hdlc_header;
-
-
-
-typedef struct {
-	u32 type;		/* code */
-	u32 par1;
-	u32 par2;
-	u16 rel;		/* reliability */
-	u32 time;
-}__attribute__ ((packed)) cisco_packet;
-#define	CISCO_PACKET_LEN	18
-#define	CISCO_BIG_PACKET_LEN	20
-
-
-
-typedef struct pvc_device_struct {
-	struct net_device *master;
-	struct net_device *main;
-	struct net_device *ether; /* bridged Ethernet interface */
-	struct pvc_device_struct *next;	/* Sorted in ascending DLCI order */
-	int dlci;
-	int open_count;
-
-	struct {
-		unsigned int new: 1;
-		unsigned int active: 1;
-		unsigned int exist: 1;
-		unsigned int deleted: 1;
-		unsigned int fecn: 1;
-		unsigned int becn: 1;
-		unsigned int bandwidth;	/* Cisco LMI reporting only */
-	}state;
-}pvc_device;
-
-
-
-typedef struct hdlc_device_struct {
-	/* To be initialized by hardware driver */
+/* Used by all network devices here, pointed to by netdev_priv(dev) */
+struct hdlc_device_desc {
+	int (*netif_rx)(struct sk_buff *skb);
 	struct net_device_stats stats;
-
+};
+
+/* This structure is a private property of HDLC protocols.
+   Hardware drivers have no interest here */
+
+struct hdlc_proto {
+	int (*open)(struct net_device *dev);
+	void (*close)(struct net_device *dev);
+	void (*start)(struct net_device *dev); /* if open & DCD */
+	void (*stop)(struct net_device *dev); /* if open & !DCD */
+	void (*detach)(struct net_device *dev);
+	int (*ioctl)(struct net_device *dev, struct ifreq *ifr);
+	unsigned short (*type_trans)(struct sk_buff *skb,
+				     struct net_device *dev);
+	struct module *module;
+	struct hdlc_proto *next; /* next protocol in the list */
+};
+
+
+typedef struct hdlc_device {
 	/* used by HDLC layer to take control over HDLC device from hw driver*/
 	int (*attach)(struct net_device *dev,
 		      unsigned short encoding, unsigned short parity);
@@ -107,82 +58,18 @@ typedef struct hdlc_device_struct {
 	/* hardware driver must handle this instead of dev->hard_start_xmit */
 	int (*xmit)(struct sk_buff *skb, struct net_device *dev);
 
-
 	/* Things below are for HDLC layer internal use only */
-	struct {
-		int (*open)(struct net_device *dev);
-		void (*close)(struct net_device *dev);
-
-		/* if open & DCD */
-		void (*start)(struct net_device *dev);
-		/* if open & !DCD */
-		void (*stop)(struct net_device *dev);
-
-		void (*detach)(struct hdlc_device_struct *hdlc);
-		int (*netif_rx)(struct sk_buff *skb);
-		unsigned short (*type_trans)(struct sk_buff *skb,
-					     struct net_device *dev);
-		int id;		/* IF_PROTO_HDLC/CISCO/FR/etc. */
-	}proto;
-
+	const struct hdlc_proto *proto;
 	int carrier;
 	int open;
 	spinlock_t state_lock;
-
-	union {
-		struct {
-			fr_proto settings;
-			pvc_device *first_pvc;
-			int dce_pvc_count;
-
-			struct timer_list timer;
-			unsigned long last_poll;
-			int reliable;
-			int dce_changed;
-			int request;
-			int fullrep_sent;
-			u32 last_errors; /* last errors bit list */
-			u8 n391cnt;
-			u8 txseq; /* TX sequence number */
-			u8 rxseq; /* RX sequence number */
-		}fr;
-
-		struct {
-			cisco_proto settings;
-
-			struct timer_list timer;
-			unsigned long last_poll;
-			int up;
-			int request_sent;
-			u32 txseq; /* TX sequence number */
-			u32 rxseq; /* RX sequence number */
-		}cisco;
-
-		struct {
-			raw_hdlc_proto settings;
-		}raw_hdlc;
-
-		struct {
-			struct ppp_device pppdev;
-			struct ppp_device *syncppp_ptr;
-			int (*old_change_mtu)(struct net_device *dev,
-					      int new_mtu);
-		}ppp;
-	}state;
+	void *state;
 	void *priv;
 }hdlc_device;
 
 
-int hdlc_raw_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_cisco_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_ppp_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_fr_ioctl(struct net_device *dev, struct ifreq *ifr);
-int hdlc_x25_ioctl(struct net_device *dev, struct ifreq *ifr);
-
-
-/* Exported from hdlc.o */
+/* Exported from hdlc module */
 
 /* Called by hardware driver when a user requests HDLC service */
 int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
@@ -191,17 +78,21 @@ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
 #define register_hdlc_device(dev)	register_netdev(dev)
 void unregister_hdlc_device(struct net_device *dev);
 
+
+void register_hdlc_protocol(struct hdlc_proto *proto);
+void unregister_hdlc_protocol(struct hdlc_proto *proto);
+
 struct net_device *alloc_hdlcdev(void *priv);
 
-static __inline__ hdlc_device* dev_to_hdlc(struct net_device *dev)
+
+static __inline__ struct hdlc_device_desc* dev_to_desc(struct net_device *dev)
 {
 	return netdev_priv(dev);
 }
 
-
-static __inline__ pvc_device* dev_to_pvc(struct net_device *dev)
+static __inline__ hdlc_device* dev_to_hdlc(struct net_device *dev)
 {
-	return (pvc_device*)dev->priv;
+	return netdev_priv(dev) + sizeof(struct hdlc_device_desc);
 }
 
 
@@ -225,18 +116,14 @@ int hdlc_open(struct net_device *dev);
 /* Must be called by hardware driver when HDLC device is being closed */
 void hdlc_close(struct net_device *dev);
 
+int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
+			 int (*rx)(struct sk_buff *skb), size_t size);
 /* May be used by hardware driver to gain control over HDLC device */
-static __inline__ void hdlc_proto_detach(hdlc_device *hdlc)
-{
-	if (hdlc->proto.detach)
-		hdlc->proto.detach(hdlc);
-	hdlc->proto.detach = NULL;
-}
-
+void detach_hdlc_protocol(struct net_device *dev);
 
 static __inline__ struct net_device_stats *hdlc_stats(struct net_device *dev)
 {
-	return &dev_to_hdlc(dev)->stats;
+	return &dev_to_desc(dev)->stats;
 }
 
 
@@ -248,8 +135,8 @@ static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb,
 	skb->mac.raw  = skb->data;
 	skb->dev      = dev;
 
-	if (hdlc->proto.type_trans)
-		return hdlc->proto.type_trans(skb, dev);
+	if (hdlc->proto->type_trans)
+		return hdlc->proto->type_trans(skb, dev);
 	else
 		return htons(ETH_P_HDLC);
 }
diff --git a/include/linux/hdlc/ioctl.h b/include/linux/hdlc/ioctl.h
index 78430ba..5839723 100644
--- a/include/linux/hdlc/ioctl.h
+++ b/include/linux/hdlc/ioctl.h
@@ -1,6 +1,39 @@
 #ifndef __HDLC_IOCTL_H__
 #define __HDLC_IOCTL_H__
 
+
+#define GENERIC_HDLC_VERSION 4	/* For synchronization with sethdlc utility */
+
+#define CLOCK_DEFAULT   0	/* Default setting */
+#define CLOCK_EXT	1	/* External TX and RX clock - DTE */
+#define CLOCK_INT	2	/* Internal TX and RX clock - DCE */
+#define CLOCK_TXINT	3	/* Internal TX and external RX clock */
+#define CLOCK_TXFROMRX	4	/* TX clock derived from external RX clock */
+
+
+#define ENCODING_DEFAULT	0 /* Default setting */
+#define ENCODING_NRZ		1
+#define ENCODING_NRZI		2
+#define ENCODING_FM_MARK	3
+#define ENCODING_FM_SPACE	4
+#define ENCODING_MANCHESTER	5
+
+
+#define PARITY_DEFAULT		0 /* Default setting */
+#define PARITY_NONE		1 /* No parity */
+#define PARITY_CRC16_PR0	2 /* CRC16, initial value 0x0000 */
+#define PARITY_CRC16_PR1	3 /* CRC16, initial value 0xFFFF */
+#define PARITY_CRC16_PR0_CCITT	4 /* CRC16, initial 0x0000, ITU-T version */
+#define PARITY_CRC16_PR1_CCITT	5 /* CRC16, initial 0xFFFF, ITU-T version */
+#define PARITY_CRC32_PR0_CCITT	6 /* CRC32, initial value 0x00000000 */
+#define PARITY_CRC32_PR1_CCITT	7 /* CRC32, initial value 0xFFFFFFFF */
+
+#define LMI_DEFAULT		0 /* Default setting */
+#define LMI_NONE		1 /* No LMI, all PVCs are static */
+#define LMI_ANSI		2 /* ANSI Annex D */
+#define LMI_CCITT		3 /* ITU-T Annex A */
+#define LMI_CISCO		4 /* The "original" LMI, aka Gang of Four */
+
 typedef struct { 
 	unsigned int clock_rate; /* bits per second */
 	unsigned int clock_type; /* internal, external, TX-internal etc. */
-- 
cgit v1.1


From 51c3711704b66986373408cbc0540abea43d2380 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 13 Aug 2006 23:33:16 +0200
Subject: i2c-algo-sibyte: Merge into i2c-sibyte

i2c-algo-sibyte: Merge into i2c-sibyte

Merge i2c-algo-sibyte into i2c-sibyte, as this is a complete,
hardware-dependent SMBus implementation and not a reusable algorithm.

Perform some basic coding style cleanups while we're here (mainly
space-based indentation replaced by tabulations.)

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c-algo-sibyte.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)
 delete mode 100644 include/linux/i2c-algo-sibyte.h

(limited to 'include/linux')

diff --git a/include/linux/i2c-algo-sibyte.h b/include/linux/i2c-algo-sibyte.h
deleted file mode 100644
index 03914de..0000000
--- a/include/linux/i2c-algo-sibyte.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2001,2002,2003 Broadcom Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-
-#ifndef I2C_ALGO_SIBYTE_H
-#define I2C_ALGO_SIBYTE_H 1
-
-#include <linux/i2c.h>
-
-struct i2c_algo_sibyte_data {
-	void *data;		/* private data */
-        int   bus;		/* which bus */
-        void *reg_base;		/* CSR base */
-};
-
-int i2c_sibyte_add_bus(struct i2c_adapter *, int speed);
-int i2c_sibyte_del_bus(struct i2c_adapter *);
-
-#endif /* I2C_ALGO_SIBYTE_H */
-- 
cgit v1.1


From a0d9c63d3640bd4fc90a408e8334754ef44bcf48 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 27 Aug 2006 11:46:49 +0200
Subject: i2c-algo-bit: Discard the mdelay data struct member

i2c-algo-bit: Discard the mdelay data struct member

The i2c_algo_bit_data structure has an mdelay member, which is not
used by the algorithm code (the code has always been ifdef'd out.)
Let's discard it to save some code and memory.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Cc: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c-algo-bit.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h
index c0e7fab..c8f8df2 100644
--- a/include/linux/i2c-algo-bit.h
+++ b/include/linux/i2c-algo-bit.h
@@ -40,7 +40,6 @@ struct i2c_algo_bit_data {
 	/* local settings */
 	int udelay;		/* half-clock-cycle time in microsecs */
 				/* i.e. clock is (500 / udelay) KHz */
-	int mdelay;		/* in millisecs, unused */
 	int timeout;		/* in jiffies */
 };
 
-- 
cgit v1.1


From 9b4ccb86b4abe644ffd218720da2f942b6a20fc2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sun, 3 Sep 2006 22:22:50 +0200
Subject: i2c-algo-pcf: Discard the mdelay data struct member

i2c-algo-pcf: Discard the mdelay data struct member

Just as i2c-algo-bit, i2c-algo-pcf has an unused mdelay struct member,
which we can get rid of to spare some code and memory.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c-algo-pcf.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h
index 18b0adf..9908f3f 100644
--- a/include/linux/i2c-algo-pcf.h
+++ b/include/linux/i2c-algo-pcf.h
@@ -35,7 +35,6 @@ struct i2c_algo_pcf_data {
 
 	/* local settings */
 	int udelay;
-	int mdelay;
 	int timeout;
 };
 
-- 
cgit v1.1


From af71ff690b92894f66ccede27f731150dc10d80d Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Sun, 3 Sep 2006 22:37:11 +0200
Subject: i2c: Let drivers constify i2c_algorithm data

i2c: Let drivers constify i2c_algorithm data

Let drivers constify I2C algorithm method operations tables,
moving them from ".data" to ".rodata".

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index eb0628a..23ad1ee 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -220,7 +220,7 @@ struct i2c_adapter {
 	struct module *owner;
 	unsigned int id;
 	unsigned int class;
-	struct i2c_algorithm *algo;/* the algorithm to access the bus	*/
+	const struct i2c_algorithm *algo; /* the algorithm to access the bus */
 	void *algo_data;
 
 	/* --- administration stuff. */
-- 
cgit v1.1


From 6d3aae9d74221b00e2cbf50a353527e5a71a58ba Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 3 Sep 2006 22:41:08 +0200
Subject: i2c: Drop unimplemented slave functions

i2c: Drop unimplemented slave functions

Drop the function declarations for slave mode support of i2c adapters.
This was never implemented, and by the time it is I bet we will want
something different anyway.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 23ad1ee..9b5d047 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -64,14 +64,6 @@ extern int i2c_master_recv(struct i2c_client *,char* ,int);
  */
 extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num);
 
-/*
- * Some adapter types (i.e. PCF 8584 based ones) may support slave behaviuor. 
- * This is not tested/implemented yet and will change in the future.
- */
-extern int i2c_slave_send(struct i2c_client *,char*,int);
-extern int i2c_slave_recv(struct i2c_client *,char*,int);
-
-
 
 /* This is the very generalized SMBus access routine. You probably do not
    want to use this, though; one of the functions below may be much easier,
@@ -201,10 +193,6 @@ struct i2c_algorithm {
 	                   unsigned short flags, char read_write,
 	                   u8 command, int size, union i2c_smbus_data * data);
 
-	/* --- these optional/future use for some adapter types.*/
-	int (*slave_send)(struct i2c_adapter *,char*,int);
-	int (*slave_recv)(struct i2c_adapter *,char*,int);
-
 	/* --- ioctl like call to set div. parameters. */
 	int (*algo_control)(struct i2c_adapter *, unsigned int, unsigned long);
 
-- 
cgit v1.1


From 46ff34633ed09f36ebc4b5c40ac37e592172df74 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Thu, 31 Aug 2006 01:55:24 -0400
Subject: MSI: Rename PCI_CAP_ID_HT_IRQCONF into PCI_CAP_ID_HT

0x08 is the HT capability, while PCI_CAP_ID_HT_IRQCONF would be
the subtype 0x80 that mpic_scan_ht_pic() uses.
Rename PCI_CAP_ID_HT_IRQCONF into PCI_CAP_ID_HT.

And by the way, use it in the ipath driver instead of defining its
own HT_CAPABILITY_ID.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 96930cb..7d0e26c 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -196,7 +196,7 @@
 #define  PCI_CAP_ID_MSI		0x05	/* Message Signalled Interrupts */
 #define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
 #define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
-#define  PCI_CAP_ID_HT_IRQCONF	0x08	/* HyperTransport IRQ Configuration */
+#define  PCI_CAP_ID_HT		0x08	/* HyperTransport */
 #define  PCI_CAP_ID_VNDR	0x09	/* Vendor specific capability */
 #define  PCI_CAP_ID_SHPC 	0x0C	/* PCI Standard Hot-Plug Controller */
 #define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
-- 
cgit v1.1


From 6397c75cbc4d7dbc3d07278b57c82a47dafb21b5 Mon Sep 17 00:00:00 2001
From: Brice Goglin <brice@myri.com>
Date: Thu, 31 Aug 2006 01:55:32 -0400
Subject: MSI: Blacklist PCI-E chipsets depending on Hypertransport MSI
 capability

Introduce msi_ht_cap_enabled() to check the MSI capability in the
Hypertransport configuration space.
It is used in a generic quirk quirk_msi_ht_cap() to check whether
MSI is enabled on hypertransport chipset, and a nVidia specific quirk
quirk_nvidia_ck804_msi_ht_cap() where two 2 HT MSI mappings have to
be checked.
Both quirks set the PCI_BUS_FLAGS_NO_MSI bus flag when MSI is disabled.

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6a1e098..b9e263a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1411,6 +1411,7 @@
 #define PCI_DEVICE_ID_SERVERWORKS_LE	  0x0009
 #define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017
 #define PCI_DEVICE_ID_SERVERWORKS_EPB	  0x0103
+#define PCI_DEVICE_ID_SERVERWORKS_HT2000_PCIE	0x0132
 #define PCI_DEVICE_ID_SERVERWORKS_OSB4	  0x0200
 #define PCI_DEVICE_ID_SERVERWORKS_CSB5	  0x0201
 #define PCI_DEVICE_ID_SERVERWORKS_CSB6    0x0203
-- 
cgit v1.1


From 6c2b374d74857e892080ee726184ec1d15e7d4e4 Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin.zhang@intel.com>
Date: Mon, 31 Jul 2006 15:21:33 +0800
Subject: PCI-Express AER implemetation: AER core and aerdriver

Patch 3 implements the core part of PCI-Express AER and aerdrv
port service driver.

When a root port service device is probed, the aerdrv will call
request_irq to register irq handler for AER error interrupt.

When a device sends an PCI-Express error message to the root port,
the root port will trigger an interrupt, by either MSI or IO-APIC,
then kernel would run the irq handler. The handler collects root
error status register and schedules a work. The work will call
the core part to process the error based on its type
(Correctable/non-fatal/fatal).

As for Correctable errors, the patch chooses to just clear the correctable
error status register of the device.

As for the non-fatal error, the patch follows generic PCI error handler
rules to call the error callback functions of the endpoint's driver. If
the device is a bridge, the patch chooses to broadcast the error to
downstream devices.

As for the fatal error, the patch resets the pci-express link and
follows generic PCI error handler rules to call the error callback
functions of the endpoint's driver. If the device is a bridge, the patch
chooses to broadcast the error to downstream devices.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/aer.h         | 24 ++++++++++++++++++++++++
 include/linux/pcieport_if.h |  6 ++++++
 2 files changed, 30 insertions(+)
 create mode 100644 include/linux/aer.h

(limited to 'include/linux')

diff --git a/include/linux/aer.h b/include/linux/aer.h
new file mode 100644
index 0000000..402e178
--- /dev/null
+++ b/include/linux/aer.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2006 Intel Corp.
+ *     Tom Long Nguyen (tom.l.nguyen@intel.com)
+ *     Zhang Yanmin (yanmin.zhang@intel.com)
+ */
+
+#ifndef _AER_H_
+#define _AER_H_
+
+#if defined(CONFIG_PCIEAER)
+/* pci-e port driver needs this function to enable aer */
+extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+extern int pci_find_aer_capability(struct pci_dev *dev);
+extern int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+#else
+#define pci_enable_pcie_error_reporting(dev)		do { } while (0)
+#define pci_find_aer_capability(dev)			do { } while (0)
+#define pci_disable_pcie_error_reporting(dev)		do { } while (0)
+#define pci_cleanup_aer_uncorrect_error_status(dev)	do { } while (0)
+#endif
+
+#endif //_AER_H_
+
diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h
index b44e01a..6cd91e3 100644
--- a/include/linux/pcieport_if.h
+++ b/include/linux/pcieport_if.h
@@ -62,6 +62,12 @@ struct pcie_port_service_driver {
 	int (*suspend) (struct pcie_device *dev, pm_message_t state);
 	int (*resume) (struct pcie_device *dev);
 
+	/* Service Error Recovery Handler */
+	struct pci_error_handlers *err_handler;
+
+	/* Link Reset Capability - AER service driver specific */
+	pci_ers_result_t (*reset_link) (struct pci_dev *dev);
+
 	const struct pcie_port_service_id *id_table;
 	struct device_driver driver;
 };
-- 
cgit v1.1


From b19441af185559118e8247382ea4f2f76ebffc6d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 28 Aug 2006 11:43:25 -0700
Subject: PCI: fix __must_check warnings

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3ec7255..c9bb7be 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -431,7 +431,7 @@ int pci_scan_slot(struct pci_bus *bus, int devfn);
 struct pci_dev * pci_scan_single_device(struct pci_bus *bus, int devfn);
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus);
 unsigned int pci_scan_child_bus(struct pci_bus *bus);
-void pci_bus_add_device(struct pci_dev *dev);
+int __must_check pci_bus_add_device(struct pci_dev *dev);
 void pci_read_bridge_bases(struct pci_bus *child);
 struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res);
 int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
-- 
cgit v1.1


From 50b0075520a0acba9cabab5203bbce918b966d9a Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Wed, 16 Aug 2006 17:42:18 +0100
Subject: PCI: Multiprobe sanitizer

There are numerous drivers that can use multithreaded probing but having
some kind of global flag as the way to control this makes migration to
threaded probing hard and since it enables it everywhere and is almost
as likely to cause serious pain as holding a clog dance in a minefield.

If we have a pci_driver multithread_probe flag to inherit you can turn
it on for one driver at a time.

From playing so far however I think we need a different model at the
device layer which serializes until the called probe function says "ok
you can start another one now". That would need some kind of flag and
semaphore plus a helper function.

Anyway in the absence of that this is a starting point to usefully play
with this stuff

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index c9bb7be..549d841 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -356,6 +356,8 @@ struct pci_driver {
 	struct pci_error_handlers *err_handler;
 	struct device_driver	driver;
 	struct pci_dynids dynids;
+
+	int multithread_probe;
 };
 
 #define	to_pci_driver(drv) container_of(drv,struct pci_driver, driver)
-- 
cgit v1.1


From 24f8aa9b464b73e0553f092b747770940ee0ea54 Mon Sep 17 00:00:00 2001
From: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Date: Tue, 12 Sep 2006 10:16:36 -0700
Subject: PCI: add pci_stop_bus_device

This patch adds pci_stop_bus_device() which stops a PCI device (detach
the driver, remove from the global list and so on) and any children.
This is needed for ACPI based PCI-to-PCI bridge hot-remove, and it will
be also needed for ACPI based PCI root bridge hot-remove.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: MUNEDA Takahiro <muneda.takahiro@jp.fujitsu.com>
Signed-off-by: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 549d841..5c3a417 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -441,6 +441,7 @@ extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
 extern void pci_dev_put(struct pci_dev *dev);
 extern void pci_remove_bus(struct pci_bus *b);
 extern void pci_remove_bus_device(struct pci_dev *dev);
+extern void pci_stop_bus_device(struct pci_dev *dev);
 void pci_setup_cardbus(struct pci_bus *bus);
 
 /* Generic PCI functions exported to card drivers */
-- 
cgit v1.1


From d48f1de2d8170814fb64effa320848410c466f95 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 20 Sep 2006 20:56:02 +0100
Subject: [MIPS] Remove EV96100 as previously announced.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/pci_ids.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6a1e098..5c1c698 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1482,9 +1482,6 @@
 #define PCI_DEVICE_ID_MARVELL_GT64260	0x6430
 #define PCI_DEVICE_ID_MARVELL_MV64360	0x6460
 #define PCI_DEVICE_ID_MARVELL_MV64460	0x6480
-#define PCI_DEVICE_ID_MARVELL_GT96100	0x9652
-#define PCI_DEVICE_ID_MARVELL_GT96100A	0x9653
-
 
 #define PCI_VENDOR_ID_V3		0x11b0
 #define PCI_DEVICE_ID_V3_V960		0x0001
-- 
cgit v1.1


From ae6ddcc5f24d6b06ae9231dc128904750a4155e0 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 27 Sep 2006 01:49:27 -0700
Subject: [PATCH] ext3 and jbd cleanup: remove whitespace

Remove whitespace from ext3 and jbd, before we clone ext4.

Signed-off-by: Mingming Cao<cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ext3_jbd.h | 10 ++++-----
 include/linux/jbd.h      | 56 ++++++++++++++++++++++++------------------------
 2 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index c8307c0..ce0e610 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -23,7 +23,7 @@
 
 /* Define the number of blocks we need to account to a transaction to
  * modify one block of data.
- * 
+ *
  * We may have to touch one inode, one bitmap buffer, up to three
  * indirection blocks, the group and superblock summaries, and the data
  * block to complete the transaction.  */
@@ -88,16 +88,16 @@
 #endif
 
 int
-ext3_mark_iloc_dirty(handle_t *handle, 
+ext3_mark_iloc_dirty(handle_t *handle,
 		     struct inode *inode,
 		     struct ext3_iloc *iloc);
 
-/* 
+/*
  * On success, We end up with an outstanding reference count against
- * iloc->bh.  This _must_ be cleaned up later. 
+ * iloc->bh.  This _must_ be cleaned up later.
  */
 
-int ext3_reserve_inode_write(handle_t *handle, struct inode *inode, 
+int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
 			struct ext3_iloc *iloc);
 
 int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a04c154..7d84793 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -1,6 +1,6 @@
 /*
  * linux/include/linux/jbd.h
- * 
+ *
  * Written by Stephen C. Tweedie <sct@redhat.com>
  *
  * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved
@@ -97,8 +97,8 @@ extern void jbd_slab_free(void *ptr, size_t size);
  * number of outstanding buffers possible at any time.  When the
  * operation completes, any buffer credits not used are credited back to
  * the transaction, so that at all times we know how many buffers the
- * outstanding updates on a transaction might possibly touch. 
- * 
+ * outstanding updates on a transaction might possibly touch.
+ *
  * This is an opaque datatype.
  **/
 typedef struct handle_s		handle_t;	/* Atomic operation type */
@@ -108,7 +108,7 @@ typedef struct handle_s		handle_t;	/* Atomic operation type */
  * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem.
  *
  * journal_t is linked to from the fs superblock structure.
- * 
+ *
  * We use the journal_t to keep track of all outstanding transaction
  * activity on the filesystem, and to manage the state of the log
  * writing process.
@@ -128,7 +128,7 @@ typedef struct journal_s	journal_t;	/* Journal control structure */
  * On-disk structures
  */
 
-/* 
+/*
  * Descriptor block types:
  */
 
@@ -149,8 +149,8 @@ typedef struct journal_header_s
 } journal_header_t;
 
 
-/* 
- * The block tag: used to describe a single buffer in the journal 
+/*
+ * The block tag: used to describe a single buffer in the journal
  */
 typedef struct journal_block_tag_s
 {
@@ -158,9 +158,9 @@ typedef struct journal_block_tag_s
 	__be32		t_flags;	/* See below */
 } journal_block_tag_t;
 
-/* 
+/*
  * The revoke descriptor: used on disk to describe a series of blocks to
- * be revoked from the log 
+ * be revoked from the log
  */
 typedef struct journal_revoke_header_s
 {
@@ -374,10 +374,10 @@ struct jbd_revoke_table_s;
  **/
 
 /* Docbook can't yet cope with the bit fields, but will leave the documentation
- * in so it can be fixed later. 
+ * in so it can be fixed later.
  */
 
-struct handle_s 
+struct handle_s
 {
 	/* Which compound transaction is this update a part of? */
 	transaction_t		*h_transaction;
@@ -435,7 +435,7 @@ struct handle_s
  *
  */
 
-struct transaction_s 
+struct transaction_s
 {
 	/* Pointer to the journal for this transaction. [no locking] */
 	journal_t		*t_journal;
@@ -455,7 +455,7 @@ struct transaction_s
 		T_RUNDOWN,
 		T_FLUSH,
 		T_COMMIT,
-		T_FINISHED 
+		T_FINISHED
 	}			t_state;
 
 	/*
@@ -569,7 +569,7 @@ struct transaction_s
  *     journal_t.
  * @j_flags:  General journaling state flags
  * @j_errno:  Is there an outstanding uncleared error on the journal (from a
- *     prior abort)? 
+ *     prior abort)?
  * @j_sb_buffer: First part of superblock buffer
  * @j_superblock: Second part of superblock buffer
  * @j_format_version: Version of the superblock format
@@ -583,7 +583,7 @@ struct transaction_s
  * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction
  *  to start committing, or for a barrier lock to be released
  * @j_wait_logspace: Wait queue for waiting for checkpointing to complete
- * @j_wait_done_commit: Wait queue for waiting for commit to complete 
+ * @j_wait_done_commit: Wait queue for waiting for commit to complete
  * @j_wait_checkpoint:  Wait queue to trigger checkpointing
  * @j_wait_commit: Wait queue to trigger commit
  * @j_wait_updates: Wait queue to wait for updates to complete
@@ -592,7 +592,7 @@ struct transaction_s
  * @j_tail: Journal tail - identifies the oldest still-used block in the
  *  journal.
  * @j_free: Journal free - how many free blocks are there in the journal?
- * @j_first: The block number of the first usable block 
+ * @j_first: The block number of the first usable block
  * @j_last: The block number one beyond the last usable block
  * @j_dev: Device where we store the journal
  * @j_blocksize: blocksize for the location where we store the journal.
@@ -604,12 +604,12 @@ struct transaction_s
  * @j_list_lock: Protects the buffer lists and internal buffer state.
  * @j_inode: Optional inode where we store the journal.  If present, all journal
  *     block numbers are mapped into this inode via bmap().
- * @j_tail_sequence:  Sequence number of the oldest transaction in the log 
+ * @j_tail_sequence:  Sequence number of the oldest transaction in the log
  * @j_transaction_sequence: Sequence number of the next transaction to grant
  * @j_commit_sequence: Sequence number of the most recently committed
  *  transaction
  * @j_commit_request: Sequence number of the most recent transaction wanting
- *     commit 
+ *     commit
  * @j_uuid: Uuid of client object.
  * @j_task: Pointer to the current commit thread for this journal
  * @j_max_transaction_buffers:  Maximum number of metadata buffers to allow in a
@@ -823,8 +823,8 @@ struct journal_s
 	void *j_private;
 };
 
-/* 
- * Journal flag definitions 
+/*
+ * Journal flag definitions
  */
 #define JFS_UNMOUNT	0x001	/* Journal thread is being destroyed */
 #define JFS_ABORT	0x002	/* Journaling has been aborted for errors. */
@@ -833,7 +833,7 @@ struct journal_s
 #define JFS_LOADED	0x010	/* The journal superblock has been loaded */
 #define JFS_BARRIER	0x020	/* Use IDE barriers */
 
-/* 
+/*
  * Function declarations for the journaling transaction and buffer
  * management
  */
@@ -862,7 +862,7 @@ int __journal_remove_checkpoint(struct journal_head *);
 void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 /* Buffer IO */
-extern int 
+extern int
 journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
@@ -890,7 +890,7 @@ static inline handle_t *journal_current_handle(void)
 /* The journaling code user interface:
  *
  * Create and destroy handles
- * Register buffer modifications against the current transaction. 
+ * Register buffer modifications against the current transaction.
  */
 
 extern handle_t *journal_start(journal_t *, int nblocks);
@@ -917,11 +917,11 @@ extern journal_t * journal_init_dev(struct block_device *bdev,
 				int start, int len, int bsize);
 extern journal_t * journal_init_inode (struct inode *);
 extern int	   journal_update_format (journal_t *);
-extern int	   journal_check_used_features 
+extern int	   journal_check_used_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int	   journal_check_available_features 
+extern int	   journal_check_available_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int	   journal_set_features 
+extern int	   journal_set_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
 extern int	   journal_create     (journal_t *);
 extern int	   journal_load       (journal_t *journal);
@@ -1015,7 +1015,7 @@ do {								           \
  * bit, when set, indicates that we have had a fatal error somewhere,
  * either inside the journaling layer or indicated to us by the client
  * (eg. ext3), and that we and should not commit any further
- * transactions.  
+ * transactions.
  */
 
 static inline int is_journal_aborted(journal_t *journal)
@@ -1082,7 +1082,7 @@ static inline int jbd_space_needed(journal_t *journal)
 #define BJ_Reserved	7	/* Buffer is reserved for access by journal */
 #define BJ_Locked	8	/* Locked for I/O during commit */
 #define BJ_Types	9
- 
+
 extern int jbd_blocks_per_page(struct inode *inode);
 
 #ifdef __KERNEL__
-- 
cgit v1.1


From 37ed322290eb6d5cf2ab33915793ed4219eae1d6 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <esandeen@redhat.com>
Date: Wed, 27 Sep 2006 01:49:31 -0700
Subject: [PATCH] JBD: 16T fixes

These are a few places I've found in jbd that look like they may not be
16T-safe, or consistent with the use of unsigned longs for block
containers.  Problems here would be somewhat hard to hit, would require
journal blocks past the 8T boundary, which would not be terribly common.
Still, should fix.

(some of these have come from the ext4 work on jbd as well).

I think there's one more possibility that the wrap() function may not be
safe IF your last block in the journal butts right up against the 232 block
boundary, but that seems like a VERY remote possibility, and I'm not
worrying about it at this point.

Signed-off-by: Eric Sandeen <esandeen@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/jbd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 7d84793..dc26262 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -732,7 +732,7 @@ struct journal_s
 	 */
 	struct block_device	*j_dev;
 	int			j_blocksize;
-	unsigned int		j_blk_offset;
+	unsigned long		j_blk_offset;
 
 	/*
 	 * Device which holds the client fs.  For internal journal this will be
@@ -866,7 +866,7 @@ extern int
 journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
-			      int		   blocknr);
+			      unsigned long	   blocknr);
 
 /* Transaction locking */
 extern void		__wait_on_journal (journal_t *);
-- 
cgit v1.1


From e9ad5620bfb901df8a7a2603c88689ededeecaf1 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 27 Sep 2006 01:49:35 -0700
Subject: [PATCH] ext3: More whitespace cleanups

More white space cleanups in preparation of cloning ext4 from ext3.
Removing spaces that precede a tab.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ext3_fs.h   |  2 +-
 include/linux/ext3_fs_i.h |  2 +-
 include/linux/jbd.h       | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 0eed918..369c675 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -473,7 +473,7 @@ struct ext3_super_block {
 	__u8	s_reserved_char_pad;
 	__u16	s_reserved_word_pad;
 	__le32	s_default_mount_opts;
-	__le32	s_first_meta_bg; 	/* First metablock block group */
+	__le32	s_first_meta_bg;	/* First metablock block group */
 	__u32	s_reserved[190];	/* Padding to the end of the block */
 };
 
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 2f18b95..4395e52 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -35,7 +35,7 @@ struct ext3_reserve_window {
 };
 
 struct ext3_reserve_window_node {
-	struct rb_node	 	rsv_node;
+	struct rb_node		rsv_node;
 	__u32			rsv_goal_size;
 	__u32			rsv_alloc_hit;
 	struct ext3_reserve_window	rsv_window;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index dc26262..a6d9daa 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -64,7 +64,7 @@ extern int journal_enable_debug;
 		if ((n) <= journal_enable_debug) {			\
 			printk (KERN_DEBUG "(%s, %d): %s: ",		\
 				__FILE__, __LINE__, __FUNCTION__);	\
-		  	printk (f, ## a);				\
+			printk (f, ## a);				\
 		}							\
 	} while (0)
 #else
@@ -201,9 +201,9 @@ typedef struct journal_superblock_s
 
 /* 0x0024 */
 	/* Remaining fields are only valid in a version-2 superblock */
-	__be32	s_feature_compat; 	/* compatible feature set */
-	__be32	s_feature_incompat; 	/* incompatible feature set */
-	__be32	s_feature_ro_compat; 	/* readonly-compatible feature set */
+	__be32	s_feature_compat;	/* compatible feature set */
+	__be32	s_feature_incompat;	/* incompatible feature set */
+	__be32	s_feature_ro_compat;	/* readonly-compatible feature set */
 /* 0x0030 */
 	__u8	s_uuid[16];		/* 128-bit uuid for journal */
 
@@ -699,7 +699,7 @@ struct journal_s
 	wait_queue_head_t	j_wait_updates;
 
 	/* Semaphore for locking against concurrent checkpoints */
-	struct mutex	 	j_checkpoint_mutex;
+	struct mutex		j_checkpoint_mutex;
 
 	/*
 	 * Journal head: identifies the first unused block in the journal.
-- 
cgit v1.1


From a4e4de36dc446b2193bdc8ebb96a96e44b69dd94 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 27 Sep 2006 01:49:36 -0700
Subject: [PATCH] ext3: Fix sparse warnings

Fixing up some endian-ness warnings in preparation to clone ext4 from ext3.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ext3_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 369c675..cc08f56 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -460,7 +460,7 @@ struct ext3_super_block {
 	 */
 	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
 	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
-	__u16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
+	__le16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
 	/*
 	 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
 	 */
-- 
cgit v1.1


From 133d205a18b7a4d8cb52959c5310f6664277cf61 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 27 Sep 2006 01:49:41 -0700
Subject: [PATCH] Make kmem_cache_destroy() return void

un-, de-, -free, -destroy, -exit, etc functions should in general return
void.  Also,

There is very little, say, filesystem driver code can do upon failed
kmem_cache_destroy().  If it will be decided to BUG in this case, BUG
should be put in generic code, instead.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/slab.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 66d6eb7..a96fd93 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -60,7 +60,7 @@ extern void __init kmem_cache_init(void);
 extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long,
 				       void (*)(void *, kmem_cache_t *, unsigned long),
 				       void (*)(void *, kmem_cache_t *, unsigned long));
-extern int kmem_cache_destroy(kmem_cache_t *);
+extern void kmem_cache_destroy(kmem_cache_t *);
 extern int kmem_cache_shrink(kmem_cache_t *);
 extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t);
 extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
@@ -249,7 +249,7 @@ struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
 	unsigned long,
 	void (*)(void *, struct kmem_cache *, unsigned long),
 	void (*)(void *, struct kmem_cache *, unsigned long));
-int kmem_cache_destroy(struct kmem_cache *c);
+void kmem_cache_destroy(struct kmem_cache *c);
 void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags);
 void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
 void kmem_cache_free(struct kmem_cache *c, void *b);
-- 
cgit v1.1


From c713216deebd95d2b0ab38fef8bb2361c0180c2d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 27 Sep 2006 01:49:43 -0700
Subject: [PATCH] Introduce mechanism for registering active regions of memory

At a basic level, architectures define structures to record where active
ranges of page frames are located.  Once located, the code to calculate zone
sizes and holes in each architecture is very similar.  Some of this zone and
hole sizing code is difficult to read for no good reason.  This set of patches
eliminates the similar-looking architecture-specific code.

The patches introduce a mechanism where architectures register where the
active ranges of page frames are with add_active_range().  When all areas have
been discovered, free_area_init_nodes() is called to initialise the pgdat and
zones.  The zone sizes and holes are then calculated in an architecture
independent manner.

Patch 1 introduces the mechanism for registering and initialising PFN ranges
Patch 2 changes ppc to use the mechanism - 139 arch-specific LOC removed
Patch 3 changes x86 to use the mechanism - 136 arch-specific LOC removed
Patch 4 changes x86_64 to use the mechanism - 74 arch-specific LOC removed
Patch 5 changes ia64 to use the mechanism - 52 arch-specific LOC removed
Patch 6 accounts for mem_map as a memory hole as the pages are not reclaimable.
	It adjusts the watermarks slightly

Tony Luck has successfully tested for ia64 on Itanium with tiger_defconfig,
gensparse_defconfig and defconfig.  Bob Picco has also tested and debugged on
IA64.  Jack Steiner successfully boot tested on a mammoth SGI IA64-based
machine.  These were on patches against 2.6.17-rc1 and release 3 of these
patches but there have been no ia64-changes since release 3.

There are differences in the zone sizes for x86_64 as the arch-specific code
for x86_64 accounts the kernel image and the starting mem_maps as memory holes
but the architecture-independent code accounts the memory as present.

The big benefit of this set of patches is a sizable reduction of
architecture-specific code, some of which is very hairy.  There should be a
greater reduction when other architectures use the same mechanisms for zone
and hole sizing but I lack the hardware to test on.

Additional credit;
	Dave Hansen for the initial suggestion and comments on early patches
	Andy Whitcroft for reviewing early versions and catching numerous
		errors
	Tony Luck for testing and debugging on IA64
	Bob Picco for fixing bugs related to pfn registration, reviewing a
		number of patch revisions, providing a number of suggestions
		on future direction and testing heavily
	Jack Steiner and Robin Holt for testing on IA64 and clarifying
		issues related to memory holes
	Yasunori for testing on IA64
	Andi Kleen for reviewing and feeding back about x86_64
	Christian Kujau for providing valuable information related to ACPI
		problems on x86_64 and testing potential fixes

This patch:

Define the structure to represent an active range of page frames within a node
in an architecture independent manner.  Architectures are expected to register
active ranges of PFNs using add_active_range(nid, start_pfn, end_pfn) and call
free_area_init_nodes() passing the PFNs of the end of each zone.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmzone.h | 10 +++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 856f0ee..c0402da 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -937,6 +937,53 @@ extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, pg_data_t *pgdat,
 	unsigned long * zones_size, unsigned long zone_start_pfn, 
 	unsigned long *zholes_size);
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/*
+ * With CONFIG_ARCH_POPULATES_NODE_MAP set, an architecture may initialise its
+ * zones, allocate the backing mem_map and account for memory holes in a more
+ * architecture independent manner. This is a substitute for creating the
+ * zone_sizes[] and zholes_size[] arrays and passing them to
+ * free_area_init_node()
+ *
+ * An architecture is expected to register range of page frames backed by
+ * physical memory with add_active_range() before calling
+ * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
+ * usage, an architecture is expected to do something like
+ *
+ * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
+ * 							 max_highmem_pfn};
+ * for_each_valid_physical_page_range()
+ * 	add_active_range(node_id, start_pfn, end_pfn)
+ * free_area_init_nodes(max_zone_pfns);
+ *
+ * If the architecture guarantees that there are no holes in the ranges
+ * registered with add_active_range(), free_bootmem_active_regions()
+ * will call free_bootmem_node() for each registered physical page range.
+ * Similarly sparse_memory_present_with_active_regions() calls
+ * memory_present() for each range when SPARSEMEM is enabled.
+ *
+ * See mm/page_alloc.c for more information on each function exposed by
+ * CONFIG_ARCH_POPULATES_NODE_MAP
+ */
+extern void free_area_init_nodes(unsigned long *max_zone_pfn);
+extern void add_active_range(unsigned int nid, unsigned long start_pfn,
+					unsigned long end_pfn);
+extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
+						unsigned long new_end_pfn);
+extern void remove_all_active_ranges(void);
+extern unsigned long absent_pages_in_range(unsigned long start_pfn,
+						unsigned long end_pfn);
+extern void get_pfn_range_for_nid(unsigned int nid,
+			unsigned long *start_pfn, unsigned long *end_pfn);
+extern unsigned long find_min_pfn_with_active_regions(void);
+extern unsigned long find_max_pfn_with_active_regions(void);
+extern void free_bootmem_with_active_regions(int nid,
+						unsigned long max_low_pfn);
+extern void sparse_memory_present_with_active_regions(int nid);
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+extern int early_pfn_to_nid(unsigned long pfn);
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
 extern void setup_per_zone_pages_min(void);
 extern void mem_init(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3693f1a..7fa1cbe 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -305,6 +305,13 @@ struct zonelist {
 	struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited
 };
 
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+struct node_active_region {
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+	int nid;
+};
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 
 /*
  * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
@@ -518,7 +525,8 @@ extern struct zone *next_zone(struct zone *zone);
 
 #endif
 
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
+	!defined(CONFIG_ARCH_POPULATES_NODE_MAP)
 #define early_pfn_to_nid(nid)  (0UL)
 #endif
 
-- 
cgit v1.1


From 0e0b864e069c52a7b3e4a7da56e29b03a012fd75 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 27 Sep 2006 01:49:56 -0700
Subject: [PATCH] Account for memmap and optionally the kernel image as holes

The x86_64 code accounted for memmap and some portions of the the DMA zone as
holes.  This was because those areas would never be reclaimed and accounting
for them as memory affects min watermarks.  This patch will account for the
memmap as a memory hole.  Architectures may optionally use set_dma_reserve()
if they wish to account for a portion of memory in ZONE_DMA as a hole.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c0402da..22936e1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -984,6 +984,7 @@ extern void sparse_memory_present_with_active_regions(int nid);
 extern int early_pfn_to_nid(unsigned long pfn);
 #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
 extern void setup_per_zone_pages_min(void);
 extern void mem_init(void);
-- 
cgit v1.1


From fb01439c5b778d5974a488c5d4fe85e6d0e18a68 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@skynet.ie>
Date: Wed, 27 Sep 2006 01:49:59 -0700
Subject: [PATCH] Allow an arch to expand node boundaries

Arch-independent zone-sizing determines the size of a node
(pgdat->node_spanned_pages) based on the physical memory that was
registered by the architecture.  However, when
CONFIG_MEMORY_HOTPLUG_RESERVE is set, the architecture expects that the
spanned_pages will be much larger and that mem_map will be allocated that
is used lated on memory hot-add.

This patch allows an architecture that sets CONFIG_MEMORY_HOTPLUG_RESERVE
to call push_node_boundaries() which will set the node beginning and end to
at *least* the requested boundary.

Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "Keith Mannthey" <kmannth@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 22936e1..9d046db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -970,6 +970,8 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
 						unsigned long new_end_pfn);
+extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
+					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
-- 
cgit v1.1


From e129b5c23c2b471d47f1c5d2b8b193fc2034af43 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 27 Sep 2006 01:50:00 -0700
Subject: [PATCH] vm: add per-zone writeout counter

The VM is supposed to minimise the number of pages which get written off the
LRU (for IO scheduling efficiency, and for high reclaim-success rates).  But
we don't actually have a clear way of showing how true this is.

So add `nr_vmscan_write' to /proc/vmstat and /proc/zoneinfo - the number of
pages which have been written by the vm scanner in this zone and globally.

Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7fa1cbe..1b0680c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -58,6 +58,7 @@ enum zone_stat_item {
 	NR_WRITEBACK,
 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_BOUNCE,
+	NR_VMSCAN_WRITE,
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
-- 
cgit v1.1


From 5b99cd0effaf846240a15441aec459a592577eaf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 27 Sep 2006 01:50:01 -0700
Subject: [PATCH] own header file for struct page

This moves the definition of struct page from mm.h to its own header file
page-struct.h.  This is a prereq to fix SetPageUptodate which is broken on
s390:

#define SetPageUptodate(_page)
       do {
               struct page *__page = (_page);
               if (!test_and_set_bit(PG_uptodate, &__page->flags))
                       page_test_and_clear_dirty(_page);
       } while (0)

_page gets used twice in this macro which can cause subtle bugs.  Using
__page for the page_test_and_clear_dirty call doesn't work since it causes
yet another problem with the page_test_and_clear_dirty macro as well.

In order to avoid all these problems caused by macros it seems to be a good
idea to get rid of them and convert them to static inline functions.
Because of header file include order it's necessary to have a seperate
header file for the struct page definition.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h       | 62 +-------------------------------------------
 include/linux/mm_types.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmzone.h   |  5 ++++
 3 files changed, 73 insertions(+), 61 deletions(-)
 create mode 100644 include/linux/mm_types.h

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d046db..7477fb5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -16,6 +16,7 @@
 #include <linux/mutex.h>
 #include <linux/debug_locks.h>
 #include <linux/backing-dev.h>
+#include <linux/mm_types.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -215,62 +216,6 @@ struct vm_operations_struct {
 struct mmu_gather;
 struct inode;
 
-/*
- * Each physical page in the system has a struct page associated with
- * it to keep track of whatever it is we are using the page for at the
- * moment. Note that we have no way to track which tasks are using
- * a page, though if it is a pagecache page, rmap structures can tell us
- * who is mapping it.
- */
-struct page {
-	unsigned long flags;		/* Atomic flags, some possibly
-					 * updated asynchronously */
-	atomic_t _count;		/* Usage count, see below. */
-	atomic_t _mapcount;		/* Count of ptes mapped in mms,
-					 * to show when page is mapped
-					 * & limit reverse map searches.
-					 */
-	union {
-	    struct {
-		unsigned long private;		/* Mapping-private opaque data:
-					 	 * usually used for buffer_heads
-						 * if PagePrivate set; used for
-						 * swp_entry_t if PageSwapCache;
-						 * indicates order in the buddy
-						 * system if PG_buddy is set.
-						 */
-		struct address_space *mapping;	/* If low bit clear, points to
-						 * inode address_space, or NULL.
-						 * If page mapped as anonymous
-						 * memory, low bit is set, and
-						 * it points to anon_vma object:
-						 * see PAGE_MAPPING_ANON below.
-						 */
-	    };
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
-	    spinlock_t ptl;
-#endif
-	};
-	pgoff_t index;			/* Our offset within mapping. */
-	struct list_head lru;		/* Pageout list, eg. active_list
-					 * protected by zone->lru_lock !
-					 */
-	/*
-	 * On machines where all RAM is mapped into kernel address space,
-	 * we can simply calculate the virtual address. On machines with
-	 * highmem some memory is mapped into kernel virtual memory
-	 * dynamically, so we need a place to store that address.
-	 * Note that this field could be 16 bits on x86 ... ;)
-	 *
-	 * Architectures with slow multiplication can define
-	 * WANT_PAGE_VIRTUAL in asm/page.h
-	 */
-#if defined(WANT_PAGE_VIRTUAL)
-	void *virtual;			/* Kernel virtual address (NULL if
-					   not kmapped, ie. highmem) */
-#endif /* WANT_PAGE_VIRTUAL */
-};
-
 #define page_private(page)		((page)->private)
 #define set_page_private(page, v)	((page)->private = (v))
 
@@ -546,11 +491,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
  */
 #include <linux/vmstat.h>
 
-#ifndef CONFIG_DISCONTIGMEM
-/* The array of struct pages - for discontigmem use pgdat->lmem_map */
-extern struct page *mem_map;
-#endif
-
 static __always_inline void *lowmem_page_address(struct page *page)
 {
 	return __va(page_to_pfn(page) << PAGE_SHIFT);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
new file mode 100644
index 0000000..c3852fd
--- /dev/null
+++ b/include/linux/mm_types.h
@@ -0,0 +1,67 @@
+#ifndef _LINUX_MM_TYPES_H
+#define _LINUX_MM_TYPES_H
+
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct address_space;
+
+/*
+ * Each physical page in the system has a struct page associated with
+ * it to keep track of whatever it is we are using the page for at the
+ * moment. Note that we have no way to track which tasks are using
+ * a page, though if it is a pagecache page, rmap structures can tell us
+ * who is mapping it.
+ */
+struct page {
+	unsigned long flags;		/* Atomic flags, some possibly
+					 * updated asynchronously */
+	atomic_t _count;		/* Usage count, see below. */
+	atomic_t _mapcount;		/* Count of ptes mapped in mms,
+					 * to show when page is mapped
+					 * & limit reverse map searches.
+					 */
+	union {
+	    struct {
+		unsigned long private;		/* Mapping-private opaque data:
+					 	 * usually used for buffer_heads
+						 * if PagePrivate set; used for
+						 * swp_entry_t if PageSwapCache;
+						 * indicates order in the buddy
+						 * system if PG_buddy is set.
+						 */
+		struct address_space *mapping;	/* If low bit clear, points to
+						 * inode address_space, or NULL.
+						 * If page mapped as anonymous
+						 * memory, low bit is set, and
+						 * it points to anon_vma object:
+						 * see PAGE_MAPPING_ANON below.
+						 */
+	    };
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+	    spinlock_t ptl;
+#endif
+	};
+	pgoff_t index;			/* Our offset within mapping. */
+	struct list_head lru;		/* Pageout list, eg. active_list
+					 * protected by zone->lru_lock !
+					 */
+	/*
+	 * On machines where all RAM is mapped into kernel address space,
+	 * we can simply calculate the virtual address. On machines with
+	 * highmem some memory is mapped into kernel virtual memory
+	 * dynamically, so we need a place to store that address.
+	 * Note that this field could be 16 bits on x86 ... ;)
+	 *
+	 * Architectures with slow multiplication can define
+	 * WANT_PAGE_VIRTUAL in asm/page.h
+	 */
+#if defined(WANT_PAGE_VIRTUAL)
+	void *virtual;			/* Kernel virtual address (NULL if
+					   not kmapped, ie. highmem) */
+#endif /* WANT_PAGE_VIRTUAL */
+};
+
+#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b0680c..562cf7a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -314,6 +314,11 @@ struct node_active_region {
 };
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 
+#ifndef CONFIG_DISCONTIGMEM
+/* The array of struct pages - for discontigmem use pgdat->lmem_map */
+extern struct page *mem_map;
+#endif
+
 /*
  * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
  * (mostly NUMA machines?) to denote a higher-level memory zone than the
-- 
cgit v1.1


From 08e0f6a9705376732fd3bc9bf8ba97a6b5211eb1 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 27 Sep 2006 01:50:06 -0700
Subject: [PATCH] Add NUMA_BUILD definition in kernel.h to avoid #ifdef
 CONFIG_NUMA

The NUMA_BUILD constant is always available and will be set to 1 on
NUMA_BUILDs.  That way checks valid only under CONFIG_NUMA can easily be done
without #ifdef CONFIG_NUMA

F.e.

if (NUMA_BUILD && <numa_condition>) {
...
}

[akpm: not a thing we'd normally do, but CONFIG_NUMA is special: it is
 causing ifdef explosion in core kernel, so let's see if this is a comfortable
 way in whcih to control that]

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4fa373b..4d00988 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -350,4 +350,11 @@ struct sysinfo {
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
 
+/* This helps us to avoid #ifdef CONFIG_NUMA */
+#ifdef CONFIG_NUMA
+#define NUMA_BUILD 1
+#else
+#define NUMA_BUILD 0
+#endif
+
 #endif
-- 
cgit v1.1


From 77f700dab4c05f8ee17584ec869672796d7bcb87 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 27 Sep 2006 01:50:07 -0700
Subject: [PATCH] Disable GFP_THISNODE in the non-NUMA case

GFP_THISNODE must be set to 0 in the non numa case otherwise we disable retry
and warnings for failing allocations in the SMP and UP case.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 8b34aab..bf2b6bc 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -67,7 +67,12 @@ struct vm_area_struct;
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
 
+#ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+#else
+#define GFP_THISNODE	0
+#endif
+
 
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
    platforms, used as appropriate on others */
-- 
cgit v1.1


From d5f541ed6e31518508c688912e7464facf253c87 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 27 Sep 2006 01:50:08 -0700
Subject: [PATCH] Add node to zone for the NUMA case

Add the node in order to optimize zone_to_nid.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     | 6 +++++-
 include/linux/mmzone.h | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7477fb5..8e433bb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -446,7 +446,11 @@ static inline struct zone *page_zone(struct page *page)
 
 static inline unsigned long zone_to_nid(struct zone *zone)
 {
-	return zone->zone_pgdat->node_id;
+#ifdef CONFIG_NUMA
+	return zone->node;
+#else
+	return 0;
+#endif
 }
 
 static inline unsigned long page_to_nid(struct page *page)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 562cf7a..59855b8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -168,6 +168,7 @@ struct zone {
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
 
 #ifdef CONFIG_NUMA
+	int node;
 	/*
 	 * zone reclaim becomes active if more unmapped pages exist.
 	 */
-- 
cgit v1.1


From f4b81804a2d1ab341a4613089dc31ecce0800ed8 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 27 Sep 2006 01:50:10 -0700
Subject: [PATCH] do_no_pfn()

Implement do_no_pfn() for handling mapping of memory without a struct page
backing it.  This avoids creating fake page table entries for regions which
are not backed by real memory.

This feature is used by the MSPEC driver and other users, where it is
highly undesirable to have a struct page sitting behind the page (for
instance if the page is accessed in cached mode via the struct page in
parallel to the the driver accessing it uncached, which can result in data
corruption on some architectures, such as ia64).

This version uses specific NOPFN_{SIGBUS,OOM} return values, rather than
expect all negative pfn values would be an error.  It also bugs on cow
mappings as this would not work with the VM.

[akpm@osdl.org: micro-optimise]
Signed-off-by: Jes Sorensen <jes@sgi.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e433bb..22165cb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -199,6 +199,7 @@ struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
+	unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address);
 	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
 
 	/* notification that a previously read-only page is about to become
@@ -594,6 +595,12 @@ static inline int page_mapped(struct page *page)
 #define NOPAGE_OOM	((struct page *) (-1))
 
 /*
+ * Error return values for the *_nopfn functions
+ */
+#define NOPFN_SIGBUS	((unsigned long) -1)
+#define NOPFN_OOM	((unsigned long) -2)
+
+/*
  * Different kinds of faults, as returned by handle_mm_fault().
  * Used to decide whether a process gets delivered SIGBUS or
  * just gets major/minor fault counters bumped up.
-- 
cgit v1.1


From d24afc57d51b1be41f95521e81399061fa5875a6 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Wed, 27 Sep 2006 01:50:13 -0700
Subject: [PATCH] Mark __remove_vm_area() static

The function is exported but not used from anywhere else.  It's also marked as
"not for driver use" so noone out there should really care.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/vmalloc.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index dee88c6b..ce5f148 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -62,7 +62,6 @@ extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 extern struct vm_struct *get_vm_area_node(unsigned long size,
 					unsigned long flags, int node);
 extern struct vm_struct *remove_vm_area(void *addr);
-extern struct vm_struct *__remove_vm_area(void *addr);
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page ***pages);
 extern void unmap_vm_area(struct vm_struct *area);
-- 
cgit v1.1


From 5da6185bca064e35aa73a7c1f27488d2b96434f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Sep 2006 01:50:16 -0700
Subject: [PATCH] NOMMU: Set BDI capabilities for /dev/mem and /dev/kmem

Set the backing device info capabilities for /dev/mem and /dev/kmem to
permit direct sharing under no-MMU conditions and full mapping capabilities
under MMU conditions.  Make the BDI used by these available to all directly
mappable character devices.

Also comment the capabilities for /dev/zero.

[akpm@osdl.org: ifdef reductions]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/cdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index 2216638..ee5f53f 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -23,5 +23,7 @@ void cdev_del(struct cdev *);
 
 void cd_forget(struct inode *);
 
+extern struct backing_dev_info directly_mappable_cdev_bdi;
+
 #endif
 #endif
-- 
cgit v1.1


From dbf8685c8e21404e3a8ed244bd0219d3c4b89101 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Sep 2006 01:50:19 -0700
Subject: [PATCH] NOMMU: Implement /proc/pid/maps for NOMMU

Implement /proc/pid/maps for NOMMU by reading the vm_area_list attached to
current->mm->context.vmlist.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/proc_fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3435ca3..57f70bc 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -268,7 +268,9 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
 struct proc_maps_private {
 	struct pid *pid;
 	struct task_struct *task;
+#ifdef CONFIG_MMU
 	struct vm_area_struct *tail_vma;
+#endif
 };
 
 #endif /* _LINUX_PROC_FS_H */
-- 
cgit v1.1


From f269fdd1829acc5e53bf57b145003e5733133f2b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Sep 2006 01:50:23 -0700
Subject: [PATCH] NOMMU: move the fallback arch_vma_name() to a sensible place

Move the fallback arch_vma_name() to a sensible place (kernel/signal.c).

Currently it's in fs/proc/task_mmu.c, a file that is dependent on both
CONFIG_PROC_FS and CONFIG_MMU being enabled, but it's used from
kernel/signal.c from where it is called unconditionally.

[akpm@osdl.org: build fix]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 22165cb..7b703b6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1131,7 +1131,7 @@ void drop_slab(void);
 extern int randomize_va_space;
 #endif
 
-const char *arch_vma_name(struct vm_area_struct *vma);
+__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
-- 
cgit v1.1


From 7e96287ddc4f42081e18248b6167041c0908004c Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Wed, 27 Sep 2006 01:50:44 -0700
Subject: [PATCH] kdump: introduce "reset_devices" command line option

Resetting the devices during driver initialization can be a costly
operation in terms of time (especially scsi devices).  This option can be
used by drivers to know that user forcibly wants the devices to be reset
during initialization.

This option can be useful while kernel is booting in unreliable
environment.  For ex.  during kdump boot where devices are in unknown
random state and BIOS execution has been skipped.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/init.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 6667785..e92b145 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -68,6 +68,7 @@ extern initcall_t __security_initcall_start[], __security_initcall_end[];
 
 /* Defined in init/main.c */
 extern char saved_command_line[];
+extern unsigned int reset_devices;
 
 /* used by init/main.c */
 extern void setup_arch(char **);
-- 
cgit v1.1


From 8e18e2941c53416aa219708e7dcad21fb4bd6794 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:46 -0700
Subject: [PATCH] inode_diet: Replace inode.u.generic_ip with inode.i_private

The following patches reduce the size of the VFS inode structure by 28 bytes
on a UP x86.  (It would be more on an x86_64 system).  This is a 10% reduction
in the inode size on a UP kernel that is configured in a production mode
(i.e., with no spinlock or other debugging functions enabled; if you want to
save memory taken up by in-core inodes, the first thing you should do is
disable the debugging options; they are responsible for a huge amount of bloat
in the VFS inode structure).

This patch:

The filesystem or device-specific pointer in the inode is inside a union,
which is pretty pointless given that all 30+ users of this field have been
using the void pointer.  Get rid of the union and rename it to i_private, with
a comment to explain who is allowed to use the void pointer.  This is just a
cleanup, but it allows us to reuse the union 'u' for something something where
the union will actually be used.

[judith@osdl.org: powerpc build fix]
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Judith Lebzelter <judith@osdl.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1d3e601..4f77ec9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -554,9 +554,7 @@ struct inode {
 
 	atomic_t		i_writecount;
 	void			*i_security;
-	union {
-		void		*generic_ip;
-	} u;
+	void			*i_private; /* fs or device private pointer */
 #ifdef __NEED_I_SIZE_ORDERED
 	seqcount_t		i_size_seqcount;
 #endif
-- 
cgit v1.1


From 4c1541680f8d189d21dd07b053bc12996574646e Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:47 -0700
Subject: [PATCH] inode-diet: Move i_pipe into a union

Move the i_pipe pointer into a union that will be shared with i_bdev and
i_cdev.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f77ec9..ca695fc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -528,9 +528,10 @@ struct inode {
 #ifdef CONFIG_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
-	/* These three should probably be a union */
 	struct list_head	i_devices;
-	struct pipe_inode_info	*i_pipe;
+	union {
+		struct pipe_inode_info	*i_pipe;
+	};
 	struct block_device	*i_bdev;
 	struct cdev		*i_cdev;
 	int			i_cindex;
-- 
cgit v1.1


From eaf796e7ef6014f208c409b2b14fddcfaafe7e3a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:48 -0700
Subject: [PATCH] inode-diet: Move i_bdev into a union

Move the i_bdev pointer in struct inode into a union.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca695fc..98ff684 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -531,8 +531,8 @@ struct inode {
 	struct list_head	i_devices;
 	union {
 		struct pipe_inode_info	*i_pipe;
+		struct block_device	*i_bdev;
 	};
-	struct block_device	*i_bdev;
 	struct cdev		*i_cdev;
 	int			i_cindex;
 
-- 
cgit v1.1


From 577c4eb09d1034d0739e3135fd2cff50588024be Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:49 -0700
Subject: [PATCH] inode-diet: Move i_cdev into a union

Move the i_cdev pointer in struct inode into a union.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98ff684..192e69b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -532,8 +532,8 @@ struct inode {
 	union {
 		struct pipe_inode_info	*i_pipe;
 		struct block_device	*i_bdev;
+		struct cdev		*i_cdev;
 	};
-	struct cdev		*i_cdev;
 	int			i_cindex;
 
 	__u32			i_generation;
-- 
cgit v1.1


From ba52de123d454b57369f291348266d86f4b35070 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 27 Sep 2006 01:50:49 -0700
Subject: [PATCH] inode-diet: Eliminate i_blksize from the inode structure

This eliminates the i_blksize field from struct inode.  Filesystems that want
to provide a per-inode st_blksize can do so by providing their own getattr
routine instead of using the generic_fillattr() function.

Note that some filesystems were providing pretty much random (and incorrect)
values for i_blksize.

[bunk@stusta.de: cleanup]
[akpm@osdl.org: generic_fillattr() fix]
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h         |  1 -
 include/linux/nfsd/nfsfh.h | 10 ++--------
 include/linux/smb.h        |  1 -
 3 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 192e69b..8f74dfb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -512,7 +512,6 @@ struct inode {
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
 	unsigned int		i_blkbits;
-	unsigned long		i_blksize;
 	unsigned long		i_version;
 	blkcnt_t		i_blocks;
 	unsigned short          i_bytes;
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index f9edcd2..31a3cb6 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -269,14 +269,8 @@ fill_post_wcc(struct svc_fh *fhp)
 	fhp->fh_post_uid	= inode->i_uid;
 	fhp->fh_post_gid	= inode->i_gid;
 	fhp->fh_post_size       = inode->i_size;
-	if (inode->i_blksize) {
-		fhp->fh_post_blksize    = inode->i_blksize;
-		fhp->fh_post_blocks     = inode->i_blocks;
-	} else {
-		fhp->fh_post_blksize    = BLOCK_SIZE;
-		/* how much do we care for accuracy with MinixFS? */
-		fhp->fh_post_blocks     = (inode->i_size+511) >> 9;
-	}
+	fhp->fh_post_blksize    = BLOCK_SIZE;
+	fhp->fh_post_blocks     = inode->i_blocks;
 	fhp->fh_post_rdev[0]    = htonl((u32)imajor(inode));
 	fhp->fh_post_rdev[1]    = htonl((u32)iminor(inode));
 	fhp->fh_post_atime      = inode->i_atime;
diff --git a/include/linux/smb.h b/include/linux/smb.h
index 6df3b15..f098dff 100644
--- a/include/linux/smb.h
+++ b/include/linux/smb.h
@@ -89,7 +89,6 @@ struct smb_fattr {
 	struct timespec	f_atime;
 	struct timespec f_mtime;
 	struct timespec f_ctime;
-	unsigned long	f_blksize;
 	unsigned long	f_blocks;
 	int		f_unix;
 };
-- 
cgit v1.1


From ebba5f9fcb882306bef7175dee987342ec6fcf2f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 27 Sep 2006 01:50:55 -0700
Subject: [PATCH] consistently use MAX_ERRNO in __syscall_return

Consistently use MAX_ERRNO when checking for errors in __syscall_return().

[ralf@linux-mips.org: build fix]
Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/err.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/err.h b/include/linux/err.h
index cd3b367..1ab1d44 100644
--- a/include/linux/err.h
+++ b/include/linux/err.h
@@ -15,6 +15,8 @@
  */
 #define MAX_ERRNO	4095
 
+#ifndef __ASSEMBLY__
+
 #define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
 
 static inline void *ERR_PTR(long error)
@@ -32,4 +34,6 @@ static inline long IS_ERR(const void *ptr)
 	return IS_ERR_VALUE((unsigned long)ptr);
 }
 
+#endif
+
 #endif /* _LINUX_ERR_H */
-- 
cgit v1.1


From 07563c711fbc25389e58ab9c9f0b9de2fce56760 Mon Sep 17 00:00:00 2001
From: Michael Tokarev <mjt@tls.mks.ru>
Date: Wed, 27 Sep 2006 01:50:56 -0700
Subject: [PATCH] EISA bus MODALIAS attributes support

Add modalias attribute support for the almost forgotten now EISA bus and
(at least some) EISA-aware modules.

The modalias entry looks like (for an 3c509 NIC):

 eisa:sTCM5093

and the in-module alias like:

 eisa:sTCM5093*

The patch moves struct eisa_device_id declaration from include/linux/eisa.h
to include/linux/mod_devicetable.h (so that the former now #includes the
latter), adds proper MODULE_DEVICE_TABLE(eisa, ...) statements for all
drivers with EISA IDs I found (some drivers already have that DEVICE_TABLE
declared), and adds recognision of __mod_eisa_device_table to
scripts/mod/file2alias.c so that proper modules.alias will be generated.

There's no support for /lib/modules/$kver/modules.eisamap, as it's not used
by any existing tools, and because with in-kernel modalias mechanism those
maps are obsolete anyway.

The rationale for this patch is:

 a) to make EISA bus to act as other busses with modalias
    support, to unify driver loading

 b) to foget about EISA finally - with this patch, kernel
    (who still supports EISA) will be the only one who knows
    how to choose the necessary drivers for this bus ;)

[akpm@osdl.org: fix the kbuild bit]
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Acked-the-net-bits-by: Jeff Garzik <jeff@garzik.org>
Acked-the-tulip-bit-by: Valerie Henson <val_henson@linux.intel.com>
Cc: James Bottomley <James.Bottomley@steeleye.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/eisa.h            |  8 +-------
 include/linux/mod_devicetable.h | 12 ++++++++++++
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/eisa.h b/include/linux/eisa.h
index 4079242..1ff7c13 100644
--- a/include/linux/eisa.h
+++ b/include/linux/eisa.h
@@ -3,8 +3,8 @@
 
 #include <linux/ioport.h>
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 
-#define EISA_SIG_LEN   8
 #define EISA_MAX_SLOTS 8
 
 #define EISA_MAX_RESOURCES 4
@@ -27,12 +27,6 @@
 #define EISA_CONFIG_ENABLED         1
 #define EISA_CONFIG_FORCED          2
 
-/* The EISA signature, in ASCII form, null terminated */
-struct eisa_device_id {
-	char          sig[EISA_SIG_LEN];
-	unsigned long driver_data;
-};
-
 /* There is not much we can say about an EISA device, apart from
  * signature, slot number, and base address. dma_mask is set by
  * default to parent device mask..*/
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index f7ca0b0..e0c393c 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -308,4 +308,16 @@ struct input_device_id {
 	kernel_ulong_t driver_info;
 };
 
+/* EISA */
+
+#define EISA_SIG_LEN   8
+
+/* The EISA signature, in ASCII form, null terminated */
+struct eisa_device_id {
+	char          sig[EISA_SIG_LEN];
+	kernel_ulong_t driver_data;
+};
+
+#define EISA_DEVICE_MODALIAS_FMT "eisa:s%s"
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
-- 
cgit v1.1


From c18258c6f0848f97e85287f6271c511a092bb784 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 27 Sep 2006 01:51:06 -0700
Subject: [PATCH] pid: Implement transfer_pid and use it to simplify de_thread

In de_thread we move pids from one process to another, a rather ugly case.
The function transfer_pid makes it clear what we are doing, and makes the
action atomic.  This is useful we ever want to atomically traverse the
process group and session lists, in a rcu safe manner.

Even if the atomic properties this change should be a win as transfer_pid
should be less code to execute than executing both attach_pid and
detach_pid, and this should make de_thread slightly smaller as only a
single function call needs to be emitted.  The only downside is that the
code might be slower to execute as the odds are against transfer_pid being
in cache.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pid.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 29960b0..93da7e2 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -76,6 +76,8 @@ extern int FASTCALL(attach_pid(struct task_struct *task,
 				enum pid_type type, int nr));
 
 extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
+extern void FASTCALL(transfer_pid(struct task_struct *old,
+				  struct task_struct *new, enum pid_type));
 
 /*
  * look up a PID in the hash table. Must be called with the tasklist_lock
-- 
cgit v1.1


From 66f37509fc7191df468a8d183374f48b13bacb73 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 27 Sep 2006 01:51:13 -0700
Subject: [PATCH] fs/nfs/: make code static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfs_fs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 36f5bcf..98c9b9f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -315,10 +315,6 @@ extern void nfs_end_data_update(struct inode *);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
-extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
-					const struct dentry *dentry,
-					struct nfs_fh *fh,
-					struct nfs_fattr *fattr);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern u32 root_nfs_parse_addr(char *name); /*__init*/
-- 
cgit v1.1


From 1b79e5513d52e8533a08af35a3595dad80c74d1f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 27 Sep 2006 01:51:14 -0700
Subject: [PATCH] add probe_kernel_address()

Add a version of __get_user() which is safe to call inside mmap_sem.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/uaccess.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 391e7ed..a48d7f1 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -19,4 +19,26 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 
 #endif		/* ARCH_HAS_NOCACHE_UACCESS */
 
+/**
+ * probe_kernel_address(): safely attempt to read from a location
+ * @addr: address to read from - its type is type typeof(retval)*
+ * @retval: read into this variable
+ *
+ * Safely read from address @addr into variable @revtal.  If a kernel fault
+ * happens, handle that and return -EFAULT.
+ * We ensure that the __get_user() is executed in atomic context so that
+ * do_page_fault() doesn't attempt to take mmap_sem.  This makes
+ * probe_kernel_address() suitable for use within regions where the caller
+ * already holds mmap_sem, or other locks which nest inside mmap_sem.
+ */
+#define probe_kernel_address(addr, retval)		\
+	({						\
+		long ret;				\
+							\
+		inc_preempt_count();			\
+		ret = __get_user(retval, addr);		\
+		dec_preempt_count();			\
+		ret;					\
+	})
+
 #endif		/* __LINUX_UACCESS_H__ */
-- 
cgit v1.1


From 3a16f7b4a75d68364c3278523f51ac141a12758a Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Thu, 29 Jun 2006 12:27:23 -0700
Subject: USB: move <linux/usb_otg.h> to <linux/usb/otg.h>

Move <linux/usb_otg.h> to <linux/usb/otg.h>.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/otg.h | 131 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb_otg.h | 131 ------------------------------------------------
 2 files changed, 131 insertions(+), 131 deletions(-)
 create mode 100644 include/linux/usb/otg.h
 delete mode 100644 include/linux/usb_otg.h

(limited to 'include/linux')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
new file mode 100644
index 0000000..9897f7a
--- /dev/null
+++ b/include/linux/usb/otg.h
@@ -0,0 +1,131 @@
+// include/linux/usb/otg.h
+
+/*
+ * These APIs may be used between USB controllers.  USB device drivers
+ * (for either host or peripheral roles) don't use these calls; they
+ * continue to use just usb_device and usb_gadget.
+ */
+
+
+/* OTG defines lots of enumeration states before device reset */
+enum usb_otg_state {
+	OTG_STATE_UNDEFINED = 0,
+
+	/* single-role peripheral, and dual-role default-b */
+	OTG_STATE_B_IDLE,
+	OTG_STATE_B_SRP_INIT,
+	OTG_STATE_B_PERIPHERAL,
+
+	/* extra dual-role default-b states */
+	OTG_STATE_B_WAIT_ACON,
+	OTG_STATE_B_HOST,
+
+	/* dual-role default-a */
+	OTG_STATE_A_IDLE,
+	OTG_STATE_A_WAIT_VRISE,
+	OTG_STATE_A_WAIT_BCON,
+	OTG_STATE_A_HOST,
+	OTG_STATE_A_SUSPEND,
+	OTG_STATE_A_PERIPHERAL,
+	OTG_STATE_A_WAIT_VFALL,
+	OTG_STATE_A_VBUS_ERR,
+};
+
+/*
+ * the otg driver needs to interact with both device side and host side
+ * usb controllers.  it decides which controller is active at a given
+ * moment, using the transceiver, ID signal, HNP and sometimes static
+ * configuration information (including "board isn't wired for otg").
+ */
+struct otg_transceiver {
+	struct device		*dev;
+	const char		*label;
+
+	u8			default_a;
+	enum usb_otg_state	state;
+
+	struct usb_bus		*host;
+	struct usb_gadget	*gadget;
+
+	/* to pass extra port status to the root hub */
+	u16			port_status;
+	u16			port_change;
+
+	/* bind/unbind the host controller */
+	int	(*set_host)(struct otg_transceiver *otg,
+				struct usb_bus *host);
+
+	/* bind/unbind the peripheral controller */
+	int	(*set_peripheral)(struct otg_transceiver *otg,
+				struct usb_gadget *gadget);
+
+	/* effective for B devices, ignored for A-peripheral */
+	int	(*set_power)(struct otg_transceiver *otg,
+				unsigned mA);
+
+	/* for non-OTG B devices: set transceiver into suspend mode */
+	int	(*set_suspend)(struct otg_transceiver *otg,
+				int suspend);
+
+	/* for B devices only:  start session with A-Host */
+	int	(*start_srp)(struct otg_transceiver *otg);
+
+	/* start or continue HNP role switch */
+	int	(*start_hnp)(struct otg_transceiver *otg);
+
+};
+
+
+/* for board-specific init logic */
+extern int otg_set_transceiver(struct otg_transceiver *);
+
+
+/* for usb host and peripheral controller drivers */
+extern struct otg_transceiver *otg_get_transceiver(void);
+
+static inline int
+otg_start_hnp(struct otg_transceiver *otg)
+{
+	return otg->start_hnp(otg);
+}
+
+
+/* for HCDs */
+static inline int
+otg_set_host(struct otg_transceiver *otg, struct usb_bus *host)
+{
+	return otg->set_host(otg, host);
+}
+
+
+/* for usb peripheral controller drivers */
+static inline int
+otg_set_peripheral(struct otg_transceiver *otg, struct usb_gadget *periph)
+{
+	return otg->set_peripheral(otg, periph);
+}
+
+static inline int
+otg_set_power(struct otg_transceiver *otg, unsigned mA)
+{
+	return otg->set_power(otg, mA);
+}
+
+static inline int
+otg_set_suspend(struct otg_transceiver *otg, int suspend)
+{
+	if (otg->set_suspend != NULL)
+		return otg->set_suspend(otg, suspend);
+	else
+		return 0;
+}
+
+static inline int
+otg_start_srp(struct otg_transceiver *otg)
+{
+	return otg->start_srp(otg);
+}
+
+
+/* for OTG controller drivers (and maybe other stuff) */
+extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
diff --git a/include/linux/usb_otg.h b/include/linux/usb_otg.h
deleted file mode 100644
index f827f6e..0000000
--- a/include/linux/usb_otg.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// include/linux/usb_otg.h 
-
-/*
- * These APIs may be used between USB controllers.  USB device drivers
- * (for either host or peripheral roles) don't use these calls; they
- * continue to use just usb_device and usb_gadget.
- */
-
-
-/* OTG defines lots of enumeration states before device reset */
-enum usb_otg_state {
-	OTG_STATE_UNDEFINED = 0,
-
-	/* single-role peripheral, and dual-role default-b */
-	OTG_STATE_B_IDLE,
-	OTG_STATE_B_SRP_INIT,
-	OTG_STATE_B_PERIPHERAL,
-
-	/* extra dual-role default-b states */
-	OTG_STATE_B_WAIT_ACON,
-	OTG_STATE_B_HOST,
-
-	/* dual-role default-a */
-	OTG_STATE_A_IDLE,
-	OTG_STATE_A_WAIT_VRISE,
-	OTG_STATE_A_WAIT_BCON,
-	OTG_STATE_A_HOST,
-	OTG_STATE_A_SUSPEND,
-	OTG_STATE_A_PERIPHERAL,
-	OTG_STATE_A_WAIT_VFALL,
-	OTG_STATE_A_VBUS_ERR,
-};
-
-/*
- * the otg driver needs to interact with both device side and host side
- * usb controllers.  it decides which controller is active at a given
- * moment, using the transceiver, ID signal, HNP and sometimes static
- * configuration information (including "board isn't wired for otg").
- */
-struct otg_transceiver {
-	struct device		*dev;
-	const char		*label;
-
-	u8			default_a;
-	enum usb_otg_state	state;
-
-	struct usb_bus		*host;
-	struct usb_gadget	*gadget;
-
-	/* to pass extra port status to the root hub */
-	u16			port_status;
-	u16			port_change;
-
-	/* bind/unbind the host controller */
-	int 	(*set_host)(struct otg_transceiver *otg,
-				struct usb_bus *host);
-
-	/* bind/unbind the peripheral controller */
-	int	(*set_peripheral)(struct otg_transceiver *otg,
-				struct usb_gadget *gadget);
-
-	/* effective for B devices, ignored for A-peripheral */
-	int	(*set_power)(struct otg_transceiver *otg,
-				unsigned mA);
-
-	/* for non-OTG B devices: set transceiver into suspend mode */
-	int	(*set_suspend)(struct otg_transceiver *otg,
-				int suspend);
-
-	/* for B devices only:  start session with A-Host */
-	int	(*start_srp)(struct otg_transceiver *otg);
-
-	/* start or continue HNP role switch */
-	int	(*start_hnp)(struct otg_transceiver *otg);
-
-};
-
-
-/* for board-specific init logic */
-extern int otg_set_transceiver(struct otg_transceiver *);
-
-
-/* for usb host and peripheral controller drivers */
-extern struct otg_transceiver *otg_get_transceiver(void);
-
-static inline int
-otg_start_hnp(struct otg_transceiver *otg)
-{
-	return otg->start_hnp(otg);
-}
-
-
-/* for HCDs */
-static inline int
-otg_set_host(struct otg_transceiver *otg, struct usb_bus *host)
-{
-	return otg->set_host(otg, host);
-}
-
-
-/* for usb peripheral controller drivers */
-static inline int
-otg_set_peripheral(struct otg_transceiver *otg, struct usb_gadget *periph)
-{
-	return otg->set_peripheral(otg, periph);
-}
-
-static inline int
-otg_set_power(struct otg_transceiver *otg, unsigned mA)
-{
-	return otg->set_power(otg, mA);
-}
-
-static inline int
-otg_set_suspend(struct otg_transceiver *otg, int suspend)
-{
-	if (otg->set_suspend != NULL)
-		return otg->set_suspend(otg, suspend);
-	else
-		return 0;
-}
-
-static inline int
-otg_start_srp(struct otg_transceiver *otg)
-{
-	return otg->start_srp(otg);
-}
-
-
-/* for OTG controller drivers (and maybe other stuff) */
-extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
-- 
cgit v1.1


From 8bb54ab573ecd1b4fe2ed66416a8d99a86e65316 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 1 Jul 2006 22:08:49 -0400
Subject: usbcore: add usb_device_driver definition

This patch (as732) adds a usb_device_driver structure, for representing
drivers that manage an entire USB device as opposed to just an
interface.  Support routines like usb_register_device_driver,
usb_deregister_device_driver, usb_probe_device, and usb_unbind_device
are also added.

Unlike an earlier version of this patch, the new code is type-safe.  To
accomplish this, the existing struct driver embedded in struct
usb_driver had to be wrapped in an intermediate wrapper.  This enables
the core to tell at runtime whether a particular struct driver belongs
to a device driver or to an interface driver.


Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index d2bd0c8..b4ccce6 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -540,7 +540,17 @@ struct usb_dynids {
 };
 
 /**
- * struct usb_driver - identifies USB driver to usbcore
+ * struct usbdrv_wrap - wrapper for driver-model structure
+ * @driver: The driver-model core driver structure.
+ * @for_devices: Non-zero for device drivers, 0 for interface drivers.
+ */
+struct usbdrv_wrap {
+	struct device_driver driver;
+	int for_devices;
+};
+
+/**
+ * struct usb_driver - identifies USB interface driver to usbcore
  * @name: The driver name should be unique among USB drivers,
  *	and should normally be the same as the module name.
  * @probe: Called to see if the driver is willing to manage a particular
@@ -567,12 +577,12 @@ struct usb_dynids {
  *	or your driver's probe function will never get called.
  * @dynids: used internally to hold the list of dynamically added device
  *	ids for this driver.
- * @driver: the driver model core driver structure.
+ * @drvwrap: Driver-model core structure wrapper.
  * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be
  *	added to this driver by preventing the sysfs file from being created.
  *
- * USB drivers must provide a name, probe() and disconnect() methods,
- * and an id_table.  Other driver fields are optional.
+ * USB interface drivers must provide a name, probe() and disconnect()
+ * methods, and an id_table.  Other driver fields are optional.
  *
  * The id_table is used in hotplugging.  It holds a set of descriptors,
  * and specialized data may be associated with each entry.  That table
@@ -606,10 +616,40 @@ struct usb_driver {
 	const struct usb_device_id *id_table;
 
 	struct usb_dynids dynids;
-	struct device_driver driver;
+	struct usbdrv_wrap drvwrap;
 	unsigned int no_dynamic_id:1;
 };
-#define	to_usb_driver(d) container_of(d, struct usb_driver, driver)
+#define	to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver)
+
+/**
+ * struct usb_device_driver - identifies USB device driver to usbcore
+ * @name: The driver name should be unique among USB drivers,
+ *	and should normally be the same as the module name.
+ * @probe: Called to see if the driver is willing to manage a particular
+ *	device.  If it is, probe returns zero and uses dev_set_drvdata()
+ *	to associate driver-specific data with the device.  If unwilling
+ *	to manage the device, return a negative errno value.
+ * @disconnect: Called when the device is no longer accessible, usually
+ *	because it has been (or is being) disconnected or the driver's
+ *	module is being unloaded.
+ * @suspend: Called when the device is going to be suspended by the system.
+ * @resume: Called when the device is being resumed by the system.
+ * @drvwrap: Driver-model core structure wrapper.
+ *
+ * USB drivers must provide all the fields listed above except drvwrap.
+ */
+struct usb_device_driver {
+	const char *name;
+
+	int (*probe) (struct usb_device *udev);
+	void (*disconnect) (struct usb_device *udev);
+
+	int (*suspend) (struct usb_device *udev, pm_message_t message);
+	int (*resume) (struct usb_device *udev);
+	struct usbdrv_wrap drvwrap;
+};
+#define	to_usb_device_driver(d) container_of(d, struct usb_device_driver, \
+		drvwrap.driver)
 
 extern struct bus_type usb_bus_type;
 
@@ -633,13 +673,17 @@ struct usb_class_driver {
  * use these in module_init()/module_exit()
  * and don't forget MODULE_DEVICE_TABLE(usb, ...)
  */
-int usb_register_driver(struct usb_driver *, struct module *);
+extern int usb_register_driver(struct usb_driver *, struct module *);
 static inline int usb_register(struct usb_driver *driver)
 {
 	return usb_register_driver(driver, THIS_MODULE);
 }
 extern void usb_deregister(struct usb_driver *);
 
+extern int usb_register_device_driver(struct usb_device_driver *,
+			struct module *);
+extern void usb_deregister_device_driver(struct usb_device_driver *);
+
 extern int usb_register_dev(struct usb_interface *intf,
 			    struct usb_class_driver *class_driver);
 extern void usb_deregister_dev(struct usb_interface *intf,
-- 
cgit v1.1


From 4d064c080265a41324d108fccc26b72106d43db3 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 1 Jul 2006 22:11:44 -0400
Subject: usbcore: track whether interfaces are suspended

Currently we rely on intf->dev.power.power_state.event for tracking
whether intf is suspended.  This is not a reliable technique because
that value is owned by the PM core, not by usbcore.  This patch (as718b)
adds a new flag so that we can accurately tell which interfaces are
suspended and which aren't.

At first one might think these flags aren't needed, since interfaces
will be suspended along with their devices.  It turns out there are a
couple of intermediate situations where that's not quite true, such as
while processing a remote-wakeup request.


Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index b4ccce6..e22f4b3 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -102,6 +102,7 @@ enum usb_interface_condition {
  *	number from the USB core by calling usb_register_dev().
  * @condition: binding state of the interface: not bound, binding
  *	(in probe()), bound to a driver, or unbinding (in disconnect())
+ * @is_active: flag set when the interface is bound and not suspended.
  * @dev: driver model's view of this device
  * @class_dev: driver model's class view of this device.
  *
@@ -142,6 +143,8 @@ struct usb_interface {
 	int minor;			/* minor number this interface is
 					 * bound to */
 	enum usb_interface_condition condition;		/* state of binding */
+	unsigned is_active:1;		/* the interface is not suspended */
+
 	struct device dev;		/* interface specific device info */
 	struct class_device *class_dev;
 };
-- 
cgit v1.1


From f2ebf92c9e1930a8f79b7eb49a32122931929014 Mon Sep 17 00:00:00 2001
From: Ben Williamson <ben.williamson@greyinnovation.com>
Date: Tue, 1 Aug 2006 11:28:16 +1000
Subject: USB: gmidi: New USB MIDI Gadget class driver.

This driver is glue between the USB gadget interface
and the ALSA MIDI interface. It allows us to appear
as a MIDI Streaming device to a host system on the
other end of a USB cable.

This includes linux/usb/audio.h and linux/usb/midi.h
containing definitions from the relevant USB specifications
for USB audio and USB MIDI devices.

The following changes have been made since the first RFC
posting:

* Bug fixes to endpoint handling.
* Workaround for USB_REQ_SET_CONFIGURATION handling,
  not understood yet.
* Added SND and SND_RAWMIDI dependencies in Kconfig.
* Moved usb_audio.h and usb_midi.h to usb/*.h
* Added module parameters for ALSA card index and id.
* Added module parameters for USB descriptor IDs and strings.
* Removed some unneeded stuff inherited from zero.c, more to go.
* Provide DECLARE_* macros for the variable-length structs.
* Use kmalloc instead of usb_ep_alloc_buffer.
* Limit source to 80 columns.
* Return actual error code instead of -ENOMEM in a few places.

Signed-off-by: Ben Williamson <ben.williamson@greyinnovation.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/audio.h |  53 ++++++++++++++++++++++
 include/linux/usb/midi.h  | 112 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+)
 create mode 100644 include/linux/usb/audio.h
 create mode 100644 include/linux/usb/midi.h

(limited to 'include/linux')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
new file mode 100644
index 0000000..6bd2359
--- /dev/null
+++ b/include/linux/usb/audio.h
@@ -0,0 +1,53 @@
+/*
+ * <linux/usb/audio.h> -- USB Audio definitions.
+ *
+ * Copyright (C) 2006 Thumtronics Pty Ltd.
+ * Developed for Thumtronics by Grey Innovation
+ * Ben Williamson <ben.williamson@greyinnovation.com>
+ *
+ * This software is distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, as published by the Free Software Foundation.
+ *
+ * This file holds USB constants and structures defined
+ * by the USB Device Class Definition for Audio Devices.
+ * Comments below reference relevant sections of that document:
+ *
+ * http://www.usb.org/developers/devclass_docs/audio10.pdf
+ */
+
+#ifndef __LINUX_USB_AUDIO_H
+#define __LINUX_USB_AUDIO_H
+
+#include <linux/types.h>
+
+/* A.2 Audio Interface Subclass Codes */
+#define USB_SUBCLASS_AUDIOCONTROL	0x01
+#define USB_SUBCLASS_AUDIOSTREAMING	0x02
+#define USB_SUBCLASS_MIDISTREAMING	0x03
+
+/* 4.3.2  Class-Specific AC Interface Descriptor */
+struct usb_ac_header_descriptor {
+	__u8  bLength;			// 8+n
+	__u8  bDescriptorType;		// USB_DT_CS_INTERFACE
+	__u8  bDescriptorSubtype;	// USB_MS_HEADER
+	__le16 bcdADC;			// 0x0100
+	__le16 wTotalLength;		// includes Unit and Terminal desc.
+	__u8  bInCollection;		// n
+	__u8  baInterfaceNr[];		// [n]
+} __attribute__ ((packed));
+
+#define USB_DT_AC_HEADER_SIZE(n)	(8+(n))
+
+/* As above, but more useful for defining your own descriptors: */
+#define DECLARE_USB_AC_HEADER_DESCRIPTOR(n) 			\
+struct usb_ac_header_descriptor_##n {				\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__le16 bcdADC;						\
+	__le16 wTotalLength;					\
+	__u8  bInCollection;					\
+	__u8  baInterfaceNr[n];					\
+} __attribute__ ((packed))
+
+#endif
diff --git a/include/linux/usb/midi.h b/include/linux/usb/midi.h
new file mode 100644
index 0000000..11a97d5
--- /dev/null
+++ b/include/linux/usb/midi.h
@@ -0,0 +1,112 @@
+/*
+ * <linux/usb/midi.h> -- USB MIDI definitions.
+ *
+ * Copyright (C) 2006 Thumtronics Pty Ltd.
+ * Developed for Thumtronics by Grey Innovation
+ * Ben Williamson <ben.williamson@greyinnovation.com>
+ *
+ * This software is distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, as published by the Free Software Foundation.
+ *
+ * This file holds USB constants and structures defined
+ * by the USB Device Class Definition for MIDI Devices.
+ * Comments below reference relevant sections of that document:
+ *
+ * http://www.usb.org/developers/devclass_docs/midi10.pdf
+ */
+
+#ifndef __LINUX_USB_MIDI_H
+#define __LINUX_USB_MIDI_H
+
+#include <linux/types.h>
+
+/* A.1  MS Class-Specific Interface Descriptor Subtypes */
+#define USB_MS_HEADER		0x01
+#define USB_MS_MIDI_IN_JACK	0x02
+#define USB_MS_MIDI_OUT_JACK	0x03
+#define USB_MS_ELEMENT		0x04
+
+/* A.2  MS Class-Specific Endpoint Descriptor Subtypes */
+#define USB_MS_GENERAL		0x01
+
+/* A.3  MS MIDI IN and OUT Jack Types */
+#define USB_MS_EMBEDDED		0x01
+#define USB_MS_EXTERNAL		0x02
+
+/* 6.1.2.1  Class-Specific MS Interface Header Descriptor */
+struct usb_ms_header_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;
+	__u8  bDescriptorSubtype;
+	__le16 bcdMSC;
+	__le16 wTotalLength;
+} __attribute__ ((packed));
+
+#define USB_DT_MS_HEADER_SIZE	7
+
+/* 6.1.2.2  MIDI IN Jack Descriptor */
+struct usb_midi_in_jack_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;		// USB_DT_CS_INTERFACE
+	__u8  bDescriptorSubtype;	// USB_MS_MIDI_IN_JACK
+	__u8  bJackType;		// USB_MS_EMBEDDED/EXTERNAL
+	__u8  bJackID;
+	__u8  iJack;
+} __attribute__ ((packed));
+
+#define USB_DT_MIDI_IN_SIZE	6
+
+struct usb_midi_source_pin {
+	__u8  baSourceID;
+	__u8  baSourcePin;
+} __attribute__ ((packed));
+
+/* 6.1.2.3  MIDI OUT Jack Descriptor */
+struct usb_midi_out_jack_descriptor {
+	__u8  bLength;
+	__u8  bDescriptorType;		// USB_DT_CS_INTERFACE
+	__u8  bDescriptorSubtype;	// USB_MS_MIDI_OUT_JACK
+	__u8  bJackType;		// USB_MS_EMBEDDED/EXTERNAL
+	__u8  bJackID;
+	__u8  bNrInputPins;		// p
+	struct usb_midi_source_pin pins[]; // [p]
+	/*__u8  iJack;  -- ommitted due to variable-sized pins[] */
+} __attribute__ ((packed));
+
+#define USB_DT_MIDI_OUT_SIZE(p)	(7 + 2 * (p))
+
+/* As above, but more useful for defining your own descriptors: */
+#define DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(p)			\
+struct usb_midi_out_jack_descriptor_##p {			\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__u8  bJackType;					\
+	__u8  bJackID;						\
+	__u8  bNrInputPins;					\
+	struct usb_midi_source_pin pins[p];			\
+	__u8  iJack;						\
+} __attribute__ ((packed))
+
+/* 6.2.2  Class-Specific MS Bulk Data Endpoint Descriptor */
+struct usb_ms_endpoint_descriptor {
+	__u8  bLength;			// 4+n
+	__u8  bDescriptorType;		// USB_DT_CS_ENDPOINT
+	__u8  bDescriptorSubtype;	// USB_MS_GENERAL
+	__u8  bNumEmbMIDIJack;		// n
+	__u8  baAssocJackID[];		// [n]
+} __attribute__ ((packed));
+
+#define USB_DT_MS_ENDPOINT_SIZE(n)	(4 + (n))
+
+/* As above, but more useful for defining your own descriptors: */
+#define DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(n)			\
+struct usb_ms_endpoint_descriptor_##n {				\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__u8  bNumEmbMIDIJack;					\
+	__u8  baAssocJackID[n];					\
+} __attribute__ ((packed))
+
+#endif
-- 
cgit v1.1


From 3d5b2510f6e361e2203e163c03b93d0026de5629 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Sun, 30 Jul 2006 18:43:43 +0200
Subject: USB: making the kernel -Wshadow clean - USB & completion

include/linux/usb.h causes a lot of -Wshadow warnings - fix them.

  include/linux/usb.h:901: warning: declaration of 'complete' shadows a global declaration
  include/linux/completion.h:52: warning: shadowed declaration is here
  include/linux/usb.h:932: warning: declaration of 'complete' shadows a global declaration
  include/linux/completion.h:52: warning: shadowed declaration is here
  include/linux/usb.h:967: warning: declaration of 'complete' shadows a global declaration
  include/linux/completion.h:52: warning: shadowed declaration is here


Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index e22f4b3..3d5cfa7 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -932,7 +932,7 @@ struct urb
  * @setup_packet: pointer to the setup_packet buffer
  * @transfer_buffer: pointer to the transfer buffer
  * @buffer_length: length of the transfer buffer
- * @complete: pointer to the usb_complete_t function
+ * @complete_fn: pointer to the usb_complete_t function
  * @context: what to set the urb context to.
  *
  * Initializes a control urb with the proper information needed to submit
@@ -944,7 +944,7 @@ static inline void usb_fill_control_urb (struct urb *urb,
 					 unsigned char *setup_packet,
 					 void *transfer_buffer,
 					 int buffer_length,
-					 usb_complete_t complete,
+					 usb_complete_t complete_fn,
 					 void *context)
 {
 	spin_lock_init(&urb->lock);
@@ -953,7 +953,7 @@ static inline void usb_fill_control_urb (struct urb *urb,
 	urb->setup_packet = setup_packet;
 	urb->transfer_buffer = transfer_buffer;
 	urb->transfer_buffer_length = buffer_length;
-	urb->complete = complete;
+	urb->complete = complete_fn;
 	urb->context = context;
 }
 
@@ -964,7 +964,7 @@ static inline void usb_fill_control_urb (struct urb *urb,
  * @pipe: the endpoint pipe
  * @transfer_buffer: pointer to the transfer buffer
  * @buffer_length: length of the transfer buffer
- * @complete: pointer to the usb_complete_t function
+ * @complete_fn: pointer to the usb_complete_t function
  * @context: what to set the urb context to.
  *
  * Initializes a bulk urb with the proper information needed to submit it
@@ -975,7 +975,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb,
 				      unsigned int pipe,
 				      void *transfer_buffer,
 				      int buffer_length,
-				      usb_complete_t complete,
+				      usb_complete_t complete_fn,
 				      void *context)
 {
 	spin_lock_init(&urb->lock);
@@ -983,7 +983,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb,
 	urb->pipe = pipe;
 	urb->transfer_buffer = transfer_buffer;
 	urb->transfer_buffer_length = buffer_length;
-	urb->complete = complete;
+	urb->complete = complete_fn;
 	urb->context = context;
 }
 
@@ -994,7 +994,7 @@ static inline void usb_fill_bulk_urb (struct urb *urb,
  * @pipe: the endpoint pipe
  * @transfer_buffer: pointer to the transfer buffer
  * @buffer_length: length of the transfer buffer
- * @complete: pointer to the usb_complete_t function
+ * @complete_fn: pointer to the usb_complete_t function
  * @context: what to set the urb context to.
  * @interval: what to set the urb interval to, encoded like
  *	the endpoint descriptor's bInterval value.
@@ -1010,7 +1010,7 @@ static inline void usb_fill_int_urb (struct urb *urb,
 				     unsigned int pipe,
 				     void *transfer_buffer,
 				     int buffer_length,
-				     usb_complete_t complete,
+				     usb_complete_t complete_fn,
 				     void *context,
 				     int interval)
 {
@@ -1019,7 +1019,7 @@ static inline void usb_fill_int_urb (struct urb *urb,
 	urb->pipe = pipe;
 	urb->transfer_buffer = transfer_buffer;
 	urb->transfer_buffer_length = buffer_length;
-	urb->complete = complete;
+	urb->complete = complete_fn;
 	urb->context = context;
 	if (dev->speed == USB_SPEED_HIGH)
 		urb->interval = 1 << (interval - 1);
-- 
cgit v1.1


From b7cfaaaf86571732c7728e95a2231a860385463c Mon Sep 17 00:00:00 2001
From: "Luiz Fernando N. Capitulino" <lcapitulino@mandriva.com.br>
Date: Wed, 27 Sep 2006 11:58:53 -0700
Subject: USB: New functions to check endpoints info.

These functions makes USB driver's code simpler when dealing with endpoints
by avoiding them from accessing the endpoint's descriptor structure directly
when they only need to know the endpoint's transfer type and/or
direction.

Please, read each functions' documentation in order to know how to use
them.

Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3d5cfa7..f807479 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -467,6 +467,20 @@ static inline int usb_make_path (struct usb_device *dev, char *buf,
 
 /*-------------------------------------------------------------------------*/
 
+extern int usb_endpoint_dir_in(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_dir_out(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_xfer_bulk(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_xfer_int(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_xfer_isoc(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_bulk_in(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_bulk_out(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_int_in(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_int_out(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_isoc_in(const struct usb_endpoint_descriptor *epd);
+extern int usb_endpoint_is_isoc_out(const struct usb_endpoint_descriptor *epd);
+
+/*-------------------------------------------------------------------------*/
+
 #define USB_DEVICE_ID_MATCH_DEVICE \
 		(USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT)
 #define USB_DEVICE_ID_MATCH_DEV_RANGE \
-- 
cgit v1.1


From dfe0d3ba20e860d0b9a16c4c6524180b8f93be05 Mon Sep 17 00:00:00 2001
From: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Date: Sun, 13 Aug 2006 17:30:14 -0700
Subject: USB Storage: add rio karma eject support

This changeset from Keith Bennett (via Bob Copeland) moves the Karma
initializer to its own file and adds trapping of the START_STOP command to
enable eject of the device.

Signed-off-by: Keith Bennett <keith@mcs.st-and.ac.uk>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb_usual.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index e7fc5fe..2ae76fe 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -108,6 +108,9 @@ enum { US_DO_ALL_FLAGS };
 #ifdef CONFIG_USB_STORAGE_ALAUDA
 #define US_PR_ALAUDA    0xf4		/* Alauda chipsets */
 #endif
+#ifdef CONFIG_USB_STORAGE_KARMA
+#define US_PR_KARMA     0xf5		/* Rio Karma */
+#endif
 
 #define US_PR_DEVICE	0xff		/* Use device's value */
 
-- 
cgit v1.1


From 095bc335360a51623dd8571839bbf465851a7f4b Mon Sep 17 00:00:00 2001
From: "Luiz Fernando N. Capitulino" <lcapitulino@mandriva.com.br>
Date: Sat, 26 Aug 2006 23:48:11 -0300
Subject: USB core: Use const where possible.

This patch marks some USB core's functions parameters as const. This
improves the design (we're saying to the caller that its parameter is
not going to be modified) and may help in compiler's optimisation work.

Signed-off-by: Luiz Fernando N. Capitulino <lcapitulino@mandriva.com.br>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index f807479..26d8a5f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -387,7 +387,7 @@ extern void usb_put_dev(struct usb_device *dev);
 #define usb_unlock_device(udev)		up(&(udev)->dev.sem)
 #define usb_trylock_device(udev)	down_trylock(&(udev)->dev.sem)
 extern int usb_lock_device_for_reset(struct usb_device *udev,
-		struct usb_interface *iface);
+				     const struct usb_interface *iface);
 
 /* USB port reset for device reinitialization */
 extern int usb_reset_device(struct usb_device *dev);
@@ -426,10 +426,10 @@ const struct usb_device_id *usb_match_id(struct usb_interface *interface,
 
 extern struct usb_interface *usb_find_interface(struct usb_driver *drv,
 		int minor);
-extern struct usb_interface *usb_ifnum_to_if(struct usb_device *dev,
+extern struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev,
 		unsigned ifnum);
 extern struct usb_host_interface *usb_altnum_to_altsetting(
-		struct usb_interface *intf, unsigned int altnum);
+		const struct usb_interface *intf, unsigned int altnum);
 
 
 /**
@@ -1064,14 +1064,14 @@ void usb_buffer_unmap (struct urb *urb);
 #endif
 
 struct scatterlist;
-int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int nents);
+int usb_buffer_map_sg(const struct usb_device *dev, unsigned pipe,
+		      struct scatterlist *sg, int nents);
 #if 0
-void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int n_hw_ents);
+void usb_buffer_dmasync_sg(const struct usb_device *dev, unsigned pipe,
+			   struct scatterlist *sg, int n_hw_ents);
 #endif
-void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe,
-		struct scatterlist *sg, int n_hw_ents);
+void usb_buffer_unmap_sg(const struct usb_device *dev, unsigned pipe,
+			 struct scatterlist *sg, int n_hw_ents);
 
 /*-------------------------------------------------------------------*
  *                         SYNCHRONOUS CALL SUPPORT                  *
-- 
cgit v1.1


From 088dc270e1da03744d977cbd9edd4311af142348 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 21 Aug 2006 12:08:19 -0400
Subject: usbcore: help drivers to change device configs

It's generally a bad idea for USB interface drivers to try to change a
device's configuration, and usbcore doesn't provide any way for them
to do it.  However in a few exceptional circumstances it can make
sense.  This patch (as767) adds a roundabout mechanism to help drivers
that may need it.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 26d8a5f..f104efa 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1099,6 +1099,9 @@ extern int usb_clear_halt(struct usb_device *dev, int pipe);
 extern int usb_reset_configuration(struct usb_device *dev);
 extern int usb_set_interface(struct usb_device *dev, int ifnum, int alternate);
 
+/* this request isn't really synchronous, but it belongs with the others */
+extern int usb_driver_set_configuration(struct usb_device *udev, int config);
+
 /*
  * timeouts, in milliseconds, used for sending/receiving control messages
  * they typically complete within a few frames (msec) after they're issued
-- 
cgit v1.1


From a6d2bb9ff919b4685bd684620ec7a1ffa8bf2349 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 11:27:36 -0400
Subject: USB: remove struct usb_operations

All of the currently-supported USB host controller drivers use the HCD
bus-glue framework.  As part of the program for flattening out the glue
layer, this patch (as769) removes the usb_operations structure.  All
function calls now go directly to the HCD routines (slightly renamed
to remain within the "usb_" namespace).

The patch also removes usb_alloc_bus(), because it's not useful in the
HCD framework and it wasn't referenced anywhere.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index f104efa..4709033 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -257,8 +257,6 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size,
 
 /* ----------------------------------------------------------------------- */
 
-struct usb_operations;
-
 /* USB device number allocation bitmap */
 struct usb_devmap {
 	unsigned long devicemap[128 / (8*sizeof(unsigned long))];
@@ -279,7 +277,6 @@ struct usb_bus {
 					 * round-robin allocation */
 
 	struct usb_devmap devmap;	/* device address allocation map */
-	struct usb_operations *op;	/* Operations (specific to the HC) */
 	struct usb_device *root_hub;	/* Root hub */
 	struct list_head bus_list;	/* list of busses */
 	void *hcpriv;                   /* Host Controller private data */
@@ -1051,7 +1048,6 @@ extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags);
 extern int usb_unlink_urb(struct urb *urb);
 extern void usb_kill_urb(struct urb *urb);
 
-#define HAVE_USB_BUFFERS
 void *usb_buffer_alloc (struct usb_device *dev, size_t size,
 	gfp_t mem_flags, dma_addr_t *dma);
 void usb_buffer_free (struct usb_device *dev, size_t size,
-- 
cgit v1.1


From dd990f16a39d4e615c0b70a0ab50b79b32bfb16d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 11:29:56 -0400
Subject: usbcore: Add flag for whether a host controller uses DMA

This patch (as770b) introduces a new field to usb_bus: a flag
indicating whether or not the host controller uses DMA.  This serves
to encapsulate the computation.  It also means we will have only one
spot to update if the DMA API changes.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 4709033..0966175 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -269,6 +269,7 @@ struct usb_bus {
 	struct device *controller;	/* host/master side hardware */
 	int busnum;			/* Bus number (in order of reg) */
 	char *bus_name;			/* stable id (PCI slot_name etc) */
+	u8 uses_dma;			/* Does the host controller use DMA? */
 	u8 otg_port;			/* 0, or number of OTG/HNP port */
 	unsigned is_b_host:1;		/* true during some HNP roleswitches */
 	unsigned b_hnp_enable:1;	/* OTG: did A-Host enable HNP? */
-- 
cgit v1.1


From 1720058343fa43a1a25bfad9e62ea06e7e9743b6 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 11:32:52 -0400
Subject: usbcore: trim down usb_bus structure

As part of the ongoing program to flatten out the HCD bus-glue layer,
this patch (as771b) eliminates the hcpriv, release, and kref fields
from struct usb_bus.  hcpriv and release were not being used for
anything worthwhile, and kref has been moved into the enclosing
usb_hcd structure.

Along with those changes, the patch gets rid of usb_bus_get and
usb_bus_put, replacing them with usb_get_hcd and usb_put_hcd.

The one interesting aspect is that the dev_set_drvdata call was
removed from usb_put_hcd, where it clearly doesn't belong.  This means
the driver private data won't get reset to NULL.  It shouldn't cause
any problems, since the private data is undefined when no driver is
bound.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 0966175..c663032 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -280,7 +280,6 @@ struct usb_bus {
 	struct usb_devmap devmap;	/* device address allocation map */
 	struct usb_device *root_hub;	/* Root hub */
 	struct list_head bus_list;	/* list of busses */
-	void *hcpriv;                   /* Host Controller private data */
 
 	int bandwidth_allocated;	/* on this bus: how much of the time
 					 * reserved for periodic (intr/iso)
@@ -295,8 +294,6 @@ struct usb_bus {
 	struct dentry *usbfs_dentry;	/* usbfs dentry entry for the bus */
 
 	struct class_device *class_dev;	/* class device for this bus */
-	struct kref kref;		/* reference counting for this bus */
-	void (*release)(struct usb_bus *bus);
 
 #if defined(CONFIG_USB_MON)
 	struct mon_bus *mon_bus;	/* non-null when associated */
-- 
cgit v1.1


From b6956ffa595db97656d5901ca8fec77ef272d41a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 15:46:48 -0400
Subject: usbcore: store each usb_device's level in the tree

This patch (as778) adds a field to struct usb_device to store the
device's level in the USB tree.  In itself this number isn't really
important.  But the overhead is very low, and in a later patch it will
be used for preventing bogus warnings from the lockdep checker.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index c663032..df5c93e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -348,6 +348,7 @@ struct usb_device {
 
 	unsigned short bus_mA;		/* Current available from the bus */
 	u8 portnum;			/* Parent port number (origin 1) */
+	u8 level;			/* Number of USB hub ancestors */
 
 	int have_langid;		/* whether string_langid is valid */
 	int string_langid;		/* language ID for strings */
-- 
cgit v1.1


From 645daaab0b6adc35c1838df2a82f9d729fdb1767 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 30 Aug 2006 15:47:02 -0400
Subject: usbcore: add autosuspend/autoresume infrastructure

This patch (as739) adds the basic infrastructure for USB autosuspend
and autoresume.  The main features are:

	PM usage counters added to struct usb_device and struct
	usb_interface, indicating whether it's okay to autosuspend
	them or they are currently in use.

	Flag added to usb_device indicating whether the current
	suspend/resume operation originated from outside or as an
	autosuspend/autoresume.

	Flag added to usb_driver indicating whether the driver
	supports autosuspend.  If not, no device bound to the driver
	will be autosuspended.

	Mutex added to usb_device for protecting PM operations.
	Unlike the device semaphore, the locking rule for the pm_mutex
	is that you must acquire the locks going _up_ the device tree.

	New routines handling autosuspend/autoresume requests for
	interfaces and devices.

	Suspend and resume requests are propagated up the device tree
	(but not outside the USB subsystem).

	work_struct added to usb_device, for carrying out delayed
	autosuspend requests.

	Autoresume added (and autosuspend prevented) during probe and
	disconnect.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index df5c93e..0da15b0 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -19,6 +19,7 @@
 #include <linux/fs.h>		/* for struct file_operations */
 #include <linux/completion.h>	/* for struct completion */
 #include <linux/sched.h>	/* for current && schedule_timeout */
+#include <linux/mutex.h>	/* for struct mutex */
 
 struct usb_device;
 struct usb_driver;
@@ -103,8 +104,12 @@ enum usb_interface_condition {
  * @condition: binding state of the interface: not bound, binding
  *	(in probe()), bound to a driver, or unbinding (in disconnect())
  * @is_active: flag set when the interface is bound and not suspended.
+ * @needs_remote_wakeup: flag set when the driver requires remote-wakeup
+ *	capability during autosuspend.
  * @dev: driver model's view of this device
  * @class_dev: driver model's class view of this device.
+ * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not
+ *	allowed unless the counter is 0.
  *
  * USB device drivers attach to interfaces on a physical device.  Each
  * interface encapsulates a single high level function, such as feeding
@@ -144,9 +149,11 @@ struct usb_interface {
 					 * bound to */
 	enum usb_interface_condition condition;		/* state of binding */
 	unsigned is_active:1;		/* the interface is not suspended */
+	unsigned needs_remote_wakeup:1;	/* driver requires remote wakeup */
 
 	struct device dev;		/* interface specific device info */
 	struct class_device *class_dev;
+	int pm_usage_cnt;		/* usage counter for autosuspend */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
 #define	interface_to_usbdev(intf) \
@@ -372,6 +379,15 @@ struct usb_device {
 
 	int maxchild;			/* Number of ports if hub */
 	struct usb_device *children[USB_MAXCHILDREN];
+
+#ifdef CONFIG_PM
+	struct work_struct autosuspend;	/* for delayed autosuspends */
+	struct mutex pm_mutex;		/* protects PM operations */
+	int pm_usage_cnt;		/* usage counter for autosuspend */
+
+	unsigned auto_pm:1;		/* autosuspend/resume in progress */
+	unsigned do_remote_wakeup:1;	/* remote wakeup should be enabled */
+#endif
 };
 #define	to_usb_device(d) container_of(d, struct usb_device, dev)
 
@@ -392,6 +408,17 @@ extern int usb_reset_composite_device(struct usb_device *dev,
 
 extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
+/* USB autosuspend and autoresume */
+#ifdef CONFIG_USB_SUSPEND
+extern int usb_autopm_get_interface(struct usb_interface *intf);
+extern void usb_autopm_put_interface(struct usb_interface *intf);
+
+#else
+#define usb_autopm_get_interface(intf)		0
+#define usb_autopm_put_interface(intf)		do {} while (0)
+#endif
+
+
 /*-------------------------------------------------------------------------*/
 
 /* for drivers using iso endpoints */
@@ -593,6 +620,8 @@ struct usbdrv_wrap {
  * @drvwrap: Driver-model core structure wrapper.
  * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be
  *	added to this driver by preventing the sysfs file from being created.
+ * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
+ *	for interfaces bound to this driver.
  *
  * USB interface drivers must provide a name, probe() and disconnect()
  * methods, and an id_table.  Other driver fields are optional.
@@ -631,6 +660,7 @@ struct usb_driver {
 	struct usb_dynids dynids;
 	struct usbdrv_wrap drvwrap;
 	unsigned int no_dynamic_id:1;
+	unsigned int supports_autosuspend:1;
 };
 #define	to_usb_driver(d) container_of(d, struct usb_driver, drvwrap.driver)
 
@@ -648,6 +678,8 @@ struct usb_driver {
  * @suspend: Called when the device is going to be suspended by the system.
  * @resume: Called when the device is being resumed by the system.
  * @drvwrap: Driver-model core structure wrapper.
+ * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
+ *	for devices bound to this driver.
  *
  * USB drivers must provide all the fields listed above except drvwrap.
  */
@@ -660,6 +692,7 @@ struct usb_device_driver {
 	int (*suspend) (struct usb_device *udev, pm_message_t message);
 	int (*resume) (struct usb_device *udev);
 	struct usbdrv_wrap drvwrap;
+	unsigned int supports_autosuspend:1;
 };
 #define	to_usb_device_driver(d) container_of(d, struct usb_device_driver, \
 		drvwrap.driver)
-- 
cgit v1.1