From a2daff6803a384ce065e3681a2affea1da59c5f5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 31 May 2011 14:09:00 -0700
Subject: fuse: fix non-ANSI void function notation

Fix void function parameter list sparse warning:

fs/fuse/inode.c:74:44: warning: non-ANSI function declaration of function 'fuse_alloc_forget'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cc6ec4b..5354906e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -71,7 +71,7 @@ struct fuse_mount_data {
 	unsigned blksize;
 };
 
-struct fuse_forget_link *fuse_alloc_forget()
+struct fuse_forget_link *fuse_alloc_forget(void)
 {
 	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
 }
-- 
cgit v1.1


From 4a4c879904aa0cc64629e14a49b64fb3d149bf1a Mon Sep 17 00:00:00 2001
From: Dan Bastone <dan@pwienterprises.com>
Date: Sun, 31 Jul 2011 07:40:49 -0400
Subject: HID: add support for new revision of Apple aluminum keyboard

Add USB device ids for the new revision (MB110LL/B) of Apple's wired aluminum
keyboard.  I have only confirmed that the ANSI version is correct - it is
assumed that the ISO and JIS versions follow the standard numbering convention.

Signed-off-by: Dan Bastone <dan@pwienterprises.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-apple.c | 6 ++++++
 drivers/hid/hid-core.c  | 3 +++
 drivers/hid/hid-ids.h   | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index b85744f..18b3bc6 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -444,6 +444,12 @@ static const struct hid_device_id apple_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
 			APPLE_RDESC_JIS },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
+		.driver_data = APPLE_HAS_FN },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
+		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS),
+		.driver_data = APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 1a5cf0c..242353d 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1340,6 +1340,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index db63ccf..61c8809 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -109,6 +109,9 @@
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI	0x0245
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO	0x0246
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS	0x0247
+#define USB_DEVICE_ID_APPLE_ALU_REVB_ANSI	0x024f
+#define USB_DEVICE_ID_APPLE_ALU_REVB_ISO	0x0250
+#define USB_DEVICE_ID_APPLE_ALU_REVB_JIS	0x0251
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI  0x0239
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO   0x023a
 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS   0x023b
-- 
cgit v1.1


From 18b08c55a9b04c8783420fb6657599ad724459cc Mon Sep 17 00:00:00 2001
From: Deepak Saxena <dsaxena@linaro.org>
Date: Thu, 4 Aug 2011 23:39:58 -0700
Subject: Input: remove CLOCK_TICK_RATE from analog joystick driver

The analog joystick driver is written for x86 systems. This
patch updates it to use the PIT_TICK_RATE value instead of
CLOCK_TICK_RATE as they are equivalent on x86 and we want to
deprecate the latter.

Signed-off-by: Deepak Saxena <dsaxena@linaro.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/joystick/analog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 4afe0a3..c021317 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -139,7 +139,7 @@ struct analog_port {
 #include <asm/i8253.h>
 
 #define GET_TIME(x)	do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0)
-#define DELTA(x,y)	(cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? CLOCK_TICK_RATE / HZ : 0)))
+#define DELTA(x,y)	(cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
 #define TIME_NAME	(cpu_has_tsc?"TSC":"PIT")
 static unsigned int get_time_pit(void)
 {
-- 
cgit v1.1


From a3ea14df0e383f44dcb2e61badb71180dbffe526 Mon Sep 17 00:00:00 2001
From: Paul Fox <pgf@laptop.org>
Date: Tue, 26 Jul 2011 16:42:26 +0100
Subject: x86, olpc: Wait for last byte of EC command to be accepted

When executing EC commands, only waiting when there are still
more bytes to write is usually fine. However, if the system
suspends very quickly after a call to olpc_ec_cmd(), the last
data byte may not yet be transferred to the EC, and the command
will not complete.

This solves a bug where the SCI wakeup mask was not correctly
written when going into suspend.

It means that sometimes, on XO-1.5 (but not XO-1), the
devices that were marked as wakeup sources can't wake up
the system. e.g. you ask for wifi wakeups, suspend, but then
incoming wifi frames don't wake up the system as they should.

Signed-off-by: Paul Fox <pgf@laptop.org>
Signed-off-by: Daniel Drake <dsd@laptop.org>
Acked-by: Andres Salomon <dilinger@queued.net>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/platform/olpc/olpc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 8b9940e..7cce722 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -161,13 +161,13 @@ restart:
 	if (inbuf && inlen) {
 		/* write data to EC */
 		for (i = 0; i < inlen; i++) {
+			pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
+			outb(inbuf[i], 0x68);
 			if (wait_on_ibf(0x6c, 0)) {
 				printk(KERN_ERR "olpc-ec:  timeout waiting for"
 						" EC accept data!\n");
 				goto err;
 			}
-			pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
-			outb(inbuf[i], 0x68);
 		}
 	}
 	if (outbuf && outlen) {
-- 
cgit v1.1


From 05e33fc20ea5e493a2a1e7f1d04f43cdf89f83ed Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Fri, 5 Aug 2011 09:09:00 -0500
Subject: x86, UV: Remove UV delay in starting slave cpus

Delete the 10 msec delay between the INIT and SIPI when starting
slave cpus. I can find no requirement for this delay. BIOS also
has similar code sequences without the delay.

Removing the delay reduces boot time by 40 sec. Every bit helps.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/20110805140900.GA6774@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index adc66c3..34b1859 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
 	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
 	    APIC_DM_INIT;
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-	mdelay(10);
 
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
 	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
-- 
cgit v1.1


From 37fb3a30b46237f23cfdf7ee09d49f9888dd13bf Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 8 Aug 2011 16:08:08 +0200
Subject: fuse: fix flock

Commit a9ff4f87 "fuse: support BSD locking semantics" overlooked a
number of issues with supporing flock locks over existing POSIX
locking infrastructure:

  - it's not backward compatible, passing flock(2) calls to userspace
    unconditionally (if userspace sets FUSE_POSIX_LOCKS)

  - it doesn't cater for the fact that flock locks are automatically
    unlocked on file release

  - it doesn't take into account the fact that flock exclusive locks
    (write locks) don't need an fd opened for write.

The last one invalidates the original premise of the patch that flock
locks can be emulated with POSIX locks.

This patch fixes the first two issues.  The last one needs to be fixed
in userspace if the filesystem assumed that a write lock will happen
only on a file operned for write (as in the case of the current fuse
library).

Reported-by: Sebastian Pipping <webmaster@hartwork.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c       | 11 ++++++++++-
 fs/fuse/fuse_i.h     |  8 +++++++-
 fs/fuse/inode.c      |  8 +++++++-
 include/linux/fuse.h |  9 ++++++++-
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 82a6646..e327849 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -245,6 +245,12 @@ void fuse_release_common(struct file *file, int opcode)
 	req = ff->reserved_req;
 	fuse_prepare_release(ff, file->f_flags, opcode);
 
+	if (ff->flock) {
+		struct fuse_release_in *inarg = &req->misc.release.in;
+		inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
+		inarg->lock_owner = fuse_lock_owner_id(ff->fc,
+						       (fl_owner_t) file);
+	}
 	/* Hold vfsmount and dentry until release is finished */
 	path_get(&file->f_path);
 	req->misc.release.path = file->f_path;
@@ -1547,11 +1553,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	int err;
 
-	if (fc->no_lock) {
+	if (fc->no_flock) {
 		err = flock_lock_file_wait(file, fl);
 	} else {
+		struct fuse_file *ff = file->private_data;
+
 		/* emulate flock with POSIX locks */
 		fl->fl_owner = (fl_owner_t) file;
+		ff->flock = true;
 		err = fuse_setlk(file, fl, 1);
 	}
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b788bec..eb8c613 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -135,6 +135,9 @@ struct fuse_file {
 
 	/** Wait queue head for poll */
 	wait_queue_head_t poll_wait;
+
+	/** Has flock been performed on this file? */
+	bool flock:1;
 };
 
 /** One input argument of a request */
@@ -448,7 +451,7 @@ struct fuse_conn {
 	/** Is removexattr not implemented by fs? */
 	unsigned no_removexattr:1;
 
-	/** Are file locking primitives not implemented by fs? */
+	/** Are posix file locking primitives not implemented by fs? */
 	unsigned no_lock:1;
 
 	/** Is access not implemented by fs? */
@@ -472,6 +475,9 @@ struct fuse_conn {
 	/** Don't apply umask to creation modes */
 	unsigned dont_mask:1;
 
+	/** Are BSD file locking primitives not implemented by fs? */
+	unsigned no_flock:1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 5354906e..f541d63 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -809,6 +809,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->async_read = 1;
 			if (!(arg->flags & FUSE_POSIX_LOCKS))
 				fc->no_lock = 1;
+			if (arg->minor >= 17) {
+				if (!(arg->flags & FUSE_FLOCK_LOCKS))
+					fc->no_flock = 1;
+			}
 			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
 				fc->atomic_o_trunc = 1;
 			if (arg->minor >= 9) {
@@ -823,6 +827,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 		} else {
 			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 			fc->no_lock = 1;
+			fc->no_flock = 1;
 		}
 
 		fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
@@ -843,7 +848,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
+		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+		FUSE_FLOCK_LOCKS;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index d464de5..464cff5 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -47,6 +47,9 @@
  *  - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
  *    fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
  *  - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
  */
 
 #ifndef _LINUX_FUSE_H
@@ -78,7 +81,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 16
+#define FUSE_KERNEL_MINOR_VERSION 17
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -153,8 +156,10 @@ struct fuse_file_lock {
 /**
  * INIT request/reply flags
  *
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
  * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
  * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -163,6 +168,7 @@ struct fuse_file_lock {
 #define FUSE_EXPORT_SUPPORT	(1 << 4)
 #define FUSE_BIG_WRITES		(1 << 5)
 #define FUSE_DONT_MASK		(1 << 6)
+#define FUSE_FLOCK_LOCKS	(1 << 10)
 
 /**
  * CUSE INIT request/reply flags
@@ -175,6 +181,7 @@ struct fuse_file_lock {
  * Release flags
  */
 #define FUSE_RELEASE_FLUSH	(1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK	(1 << 1)
 
 /**
  * Getattr flags
-- 
cgit v1.1


From b40cdd56dfa065c0832905e266b39f79419e6914 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Mon, 25 Jul 2011 22:35:34 +0200
Subject: fuse: delete dead .write_begin and .write_end aops

Ever since 'ea9b990 fuse: implement perform_write', the .write_begin
and .write_end aops have been dead code.

Their task - acquiring a page from the page cache, sending out a write
request and releasing the page again - is now done batch-wise to
maximize the number of pages send per userspace request.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c | 70 ----------------------------------------------------------
 1 file changed, 70 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e327849..ab5b84e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -749,18 +749,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
 	return req->misc.write.out.size;
 }
 
-static int fuse_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
-{
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-
-	*pagep = grab_cache_page_write_begin(mapping, index, flags);
-	if (!*pagep)
-		return -ENOMEM;
-	return 0;
-}
-
 void fuse_write_update_size(struct inode *inode, loff_t pos)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
@@ -773,62 +761,6 @@ void fuse_write_update_size(struct inode *inode, loff_t pos)
 	spin_unlock(&fc->lock);
 }
 
-static int fuse_buffered_write(struct file *file, struct inode *inode,
-			       loff_t pos, unsigned count, struct page *page)
-{
-	int err;
-	size_t nres;
-	struct fuse_conn *fc = get_fuse_conn(inode);
-	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-	struct fuse_req *req;
-
-	if (is_bad_inode(inode))
-		return -EIO;
-
-	/*
-	 * Make sure writepages on the same page are not mixed up with
-	 * plain writes.
-	 */
-	fuse_wait_on_page_writeback(inode, page->index);
-
-	req = fuse_get_req(fc);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	req->in.argpages = 1;
-	req->num_pages = 1;
-	req->pages[0] = page;
-	req->page_offset = offset;
-	nres = fuse_send_write(req, file, pos, count, NULL);
-	err = req->out.h.error;
-	fuse_put_request(fc, req);
-	if (!err && !nres)
-		err = -EIO;
-	if (!err) {
-		pos += nres;
-		fuse_write_update_size(inode, pos);
-		if (count == PAGE_CACHE_SIZE)
-			SetPageUptodate(page);
-	}
-	fuse_invalidate_attr(inode);
-	return err ? err : nres;
-}
-
-static int fuse_write_end(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata)
-{
-	struct inode *inode = mapping->host;
-	int res = 0;
-
-	if (copied)
-		res = fuse_buffered_write(file, inode, pos, copied, page);
-
-	unlock_page(page);
-	page_cache_release(page);
-	return res;
-}
-
 static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
 				    struct inode *inode, loff_t pos,
 				    size_t count)
@@ -2181,8 +2113,6 @@ static const struct address_space_operations fuse_file_aops  = {
 	.readpage	= fuse_readpage,
 	.writepage	= fuse_writepage,
 	.launder_page	= fuse_launder_page,
-	.write_begin	= fuse_write_begin,
-	.write_end	= fuse_write_end,
 	.readpages	= fuse_readpages,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.bmap		= fuse_bmap,
-- 
cgit v1.1


From 478e0841b3dce3edc2c67bf0fc51af30f582e9e2 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Mon, 25 Jul 2011 22:35:35 +0200
Subject: fuse: mark pages accessed when written to

As fuse does not use the page cache library functions when userspace
writes to a file, it did not benefit from 'c8236db mm: mark page
accessed before we write_end()' that made sure pages are properly
marked accessed when written to.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ab5b84e..7155f49 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/compat.h>
+#include <linux/swap.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -834,6 +835,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 		pagefault_enable();
 		flush_dcache_page(page);
 
+		mark_page_accessed(page);
+
 		if (!tmp) {
 			unlock_page(page);
 			page_cache_release(page);
-- 
cgit v1.1


From a88769cde24fcef11219cf99193ee558d1028217 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 27 Jul 2011 10:11:28 -0700
Subject: firmware: fix google/gsmi.c build warning

Modify function parameter type to match expected type.  Fixes a
build warning:

drivers/firmware/google/gsmi.c:473: warning: initialization from incompatible pointer type

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Mike Waychison <mikew@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/firmware/google/gsmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
index 68810fd..aa83de9 100644
--- a/drivers/firmware/google/gsmi.c
+++ b/drivers/firmware/google/gsmi.c
@@ -420,7 +420,7 @@ static efi_status_t gsmi_get_next_variable(unsigned long *name_size,
 
 static efi_status_t gsmi_set_variable(efi_char16_t *name,
 				      efi_guid_t *vendor,
-				      unsigned long attr,
+				      u32 attr,
 				      unsigned long data_size,
 				      void *data)
 {
-- 
cgit v1.1


From 7de636fa25c19fc4187f85f8325c50b1b21a6d8b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 27 Jul 2011 12:11:25 -0700
Subject: driver core: fix kernel-doc warning in platform.c

Warning(drivers/base/platform.c:50): No description found for parameter 'pdev'
Warning(drivers/base/platform.c:50): Excess function parameter 'dev' description in 'arch_setup_pdev_archdata'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 0cad9c7..99a5272 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(platform_bus);
 
 /**
  * arch_setup_pdev_archdata - Allow manipulation of archdata before its used
- * @dev: platform device
+ * @pdev: platform device
  *
  * This is called before platform_device_add() such that any pdev_archdata may
  * be setup before the platform_notifier is called.  So if a user needs to
-- 
cgit v1.1


From f9e0b159dbff693bacb64a929e04f442df985b50 Mon Sep 17 00:00:00 2001
From: Arnaud Lacombe <lacombar@gmail.com>
Date: Thu, 21 Jul 2011 13:16:19 -0400
Subject: drivers/base/devtmpfs.c: correct annotation of `setup_done'

This fixes the following section mismatch issue:

WARNING: vmlinux.o(.text+0x1192bf): Section mismatch in reference from the function devtmpfsd() to the variable .init.data:setup_done
The function devtmpfsd() references the variable __initdata setup_done.
This is often because devtmpfsd lacks a __initdata annotation or the annotation of setup_done is wrong.

WARNING: vmlinux.o(.text+0x119342): Section mismatch in reference from the function devtmpfsd() to the variable .init.data:setup_done
The function devtmpfsd() references the variable __initdata setup_done.
This is often because devtmpfsd lacks a __initdata annotation or the annotation of setup_done is wrong.

Signed-off-by: Arnaud Lacombe <lacombar@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/devtmpfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 33e1bed..a4760e0 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -376,7 +376,7 @@ int devtmpfs_mount(const char *mntdir)
 	return err;
 }
 
-static __initdata DECLARE_COMPLETION(setup_done);
+static DECLARE_COMPLETION(setup_done);
 
 static int handle(const char *name, mode_t mode, struct device *dev)
 {
-- 
cgit v1.1


From b882fc1b03d46e67ffe06133f4f4532db6e8dd0d Mon Sep 17 00:00:00 2001
From: Kukjin Kim <kgene.kim@samsung.com>
Date: Thu, 28 Jul 2011 08:50:38 +0900
Subject: serial: samsung: Fix build error

drivers/tty/serial/samsung.c: In function 's3c24xx_serial_init':
drivers/tty/serial/samsung.c:1237: error: lvalue required as unary '&' operand

Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/samsung.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index afc6294..6edafb5 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -1225,15 +1225,19 @@ static const struct dev_pm_ops s3c24xx_serial_pm_ops = {
 	.suspend = s3c24xx_serial_suspend,
 	.resume = s3c24xx_serial_resume,
 };
+#define SERIAL_SAMSUNG_PM_OPS	(&s3c24xx_serial_pm_ops)
+
 #else /* !CONFIG_PM_SLEEP */
-#define s3c24xx_serial_pm_ops	NULL
+
+#define SERIAL_SAMSUNG_PM_OPS	NULL
 #endif /* CONFIG_PM_SLEEP */
 
 int s3c24xx_serial_init(struct platform_driver *drv,
 			struct s3c24xx_uart_info *info)
 {
 	dbg("s3c24xx_serial_init(%p,%p)\n", drv, info);
-	drv->driver.pm = &s3c24xx_serial_pm_ops;
+
+	drv->driver.pm = SERIAL_SAMSUNG_PM_OPS;
 
 	return platform_driver_register(drv);
 }
-- 
cgit v1.1


From cd566c64f50e568c0ac3c13bdd15f523631ce845 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Mon, 8 Aug 2011 23:39:59 -0700
Subject: Input: mma8450 - fix module device table type

The module device table for of_device_id should use "of" type.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/mma8450.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c
index 6c76cf7..0794778 100644
--- a/drivers/input/misc/mma8450.c
+++ b/drivers/input/misc/mma8450.c
@@ -234,7 +234,7 @@ static const struct of_device_id mma8450_dt_ids[] = {
 	{ .compatible = "fsl,mma8450", },
 	{ /* sentinel */ }
 };
-MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids);
+MODULE_DEVICE_TABLE(of, mma8450_dt_ids);
 
 static struct i2c_driver mma8450_driver = {
 	.driver = {
-- 
cgit v1.1


From db0b34b07438d92c4c190998c42a502fbf90064e Mon Sep 17 00:00:00 2001
From: "Joshua V. Dillon" <jvdillon@gmail.com>
Date: Mon, 8 Aug 2011 23:45:14 -0700
Subject: Input: bcm5974 - add support for touchpads found in MacBookAir4,2

Added USB device IDs for MacBookAir4,2 trackpad. Device constants were
copied from the MacBookAir3,2 constants. The 4,2 device specification is
reportedly unchanged from the 3,2 predecessor and seems to work well.

Signed-off-by: Joshua V Dillon <jvdillon@gmail.com>
Signed-off-by: Chase Douglas <chase.douglas@canonical.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
index 3126983..48d9ec1 100644
--- a/drivers/input/mouse/bcm5974.c
+++ b/drivers/input/mouse/bcm5974.c
@@ -67,6 +67,10 @@
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI	0x0245
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO	0x0246
 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS	0x0247
+/* MacbookAir4,2 (unibody, July 2011) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI	0x024c
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO	0x024d
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS	0x024e
 
 #define BCM5974_DEVICE(prod) {					\
 	.match_flags = (USB_DEVICE_ID_MATCH_DEVICE |		\
@@ -104,6 +108,10 @@ static const struct usb_device_id bcm5974_table[] = {
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
+	/* MacbookAir4,2 */
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
 	/* Terminating entry */
 	{}
 };
@@ -294,6 +302,18 @@ static const struct bcm5974_config bcm5974_config_table[] = {
 		{ DIM_X, DIM_X / SN_COORD, -4415, 5050 },
 		{ DIM_Y, DIM_Y / SN_COORD, -55, 6680 }
 	},
+	{
+		USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI,
+		USB_DEVICE_ID_APPLE_WELLSPRING6_ISO,
+		USB_DEVICE_ID_APPLE_WELLSPRING6_JIS,
+		HAS_INTEGRATED_BUTTON,
+		0x84, sizeof(struct bt_data),
+		0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+		{ DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+		{ DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+		{ DIM_X, DIM_X / SN_COORD, -4620, 5140 },
+		{ DIM_Y, DIM_Y / SN_COORD, -150, 6600 }
+	},
 	{}
 };
 
-- 
cgit v1.1


From 5ac04bf190e6f8b17238aef179ebd7f2bdfec919 Mon Sep 17 00:00:00 2001
From: Andiry Xu <andiry.xu@amd.com>
Date: Wed, 3 Aug 2011 16:46:48 +0800
Subject: xHCI: fix port U3 status check condition

Fix the port U3 status check when Clear PORT_SUSPEND Feature.
The port status should be masked with PORT_PLS_MASK to check if it's in
U3 state.

This should be backported to kernels as old as 2.6.37.

Signed-off-by: Andiry Xu <andiry.xu@amd.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@kernel.org
---
 drivers/usb/host/xhci-hub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 0be788c..cddcdcca 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -664,7 +664,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 			xhci_dbg(xhci, "PORTSC %04x\n", temp);
 			if (temp & PORT_RESET)
 				goto error;
-			if (temp & XDEV_U3) {
+			if ((temp & PORT_PLS_MASK) == XDEV_U3) {
 				if ((temp & PORT_PE) == 0)
 					goto error;
 
-- 
cgit v1.1


From 8a8ff2f9399b23b968901f585ccb5a70a537c5ae Mon Sep 17 00:00:00 2001
From: Andiry Xu <andiry.xu@amd.com>
Date: Wed, 3 Aug 2011 16:46:49 +0800
Subject: xHCI: report USB2 port in resuming as suspend

When a USB2 port initiate a remote wakeup, software shall ensure that
resume is signaled for at least 20ms, and then write '0' to the PLS field.
According to this, xhci driver do the following things:

1. When receive a remote wakeup event in irq_handler, set the resume_done
   value as jiffies + 20ms, and modify rh_timer to poll root hub status at
   that time;
2. When receive a GetPortStatus request, if the jiffies is after the
   resume_done value, clear the resume signal and resume_done.

However, if usb_port_resume() is called before the rh_timer triggered, it
will indicate the port as Suspend Cleared and skip the clear resume signal
part. The device will fail the usb_get_status request in finish_port_resume(),
and usbcore will try a reset-resume instead. Device will work OK after
reset-resume, but resume_done value is not cleared in this case, and
xhci_bus_suspend() will fail because when it finds a non-zero resume_done
value, it will regard the port as resuming and return -EBUSY.

This causes issue on some platforms that the system fail to suspend
after remote wakeup from suspend by USB2 devices connected to xHCI port.

To fix this issue, report the port status as suspend if the resume is
signaling less that 20ms, and usb_port_resume() will wait 25ms and check
port status again, so xHCI driver can clear the resume signaling and
resume_done value.

This should be backported to kernels as old as 2.6.37.

Signed-off-by: Andiry Xu <andiry.xu@amd.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@kernel.org
---
 drivers/usb/host/xhci-hub.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index cddcdcca..1e96d1f 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -463,11 +463,12 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 					&& (temp & PORT_POWER))
 				status |= USB_PORT_STAT_SUSPEND;
 		}
-		if ((temp & PORT_PLS_MASK) == XDEV_RESUME) {
+		if ((temp & PORT_PLS_MASK) == XDEV_RESUME &&
+				!DEV_SUPERSPEED(temp)) {
 			if ((temp & PORT_RESET) || !(temp & PORT_PE))
 				goto error;
-			if (!DEV_SUPERSPEED(temp) && time_after_eq(jiffies,
-						bus_state->resume_done[wIndex])) {
+			if (time_after_eq(jiffies,
+					bus_state->resume_done[wIndex])) {
 				xhci_dbg(xhci, "Resume USB2 port %d\n",
 					wIndex + 1);
 				bus_state->resume_done[wIndex] = 0;
@@ -487,6 +488,14 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 				xhci_ring_device(xhci, slot_id);
 				bus_state->port_c_suspend |= 1 << wIndex;
 				bus_state->suspended_ports &= ~(1 << wIndex);
+			} else {
+				/*
+				 * The resume has been signaling for less than
+				 * 20ms. Report the port status as SUSPEND,
+				 * let the usbcore check port status again
+				 * and clear resume signaling later.
+				 */
+				status |= USB_PORT_STAT_SUSPEND;
 			}
 		}
 		if ((temp & PORT_PLS_MASK) == XDEV_U0
-- 
cgit v1.1


From d13565c12828ce0cd2a3862bf6260164a0653352 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 22 Jul 2011 14:34:34 -0700
Subject: xhci: Fix memory leak during failed enqueue.

When the isochronous transfer support was introduced, and the xHCI driver
switched to using urb->hcpriv to store an "urb_priv" pointer, a couple of
memory leaks were introduced into the URB enqueue function in its error
handling paths.

xhci_urb_enqueue allocates urb_priv, but it doesn't free it if changing
the control endpoint's max packet size fails or the bulk endpoint is in
the middle of allocating or deallocating streams.

xhci_urb_enqueue also doesn't free urb_priv if any of the four endpoint
types' enqueue functions fail.  Instead, it expects those functions to
free urb_priv if an error occurs.  However, the bulk, control, and
interrupt enqueue functions do not free urb_priv if the endpoint ring is
NULL.  It will, however, get freed if prepare_transfer() fails in those
enqueue functions.

Several of the error paths in the isochronous endpoint enqueue function
also fail to free it.  xhci_queue_isoc_tx_prepare() doesn't free urb_priv
if prepare_ring() indicates there is not enough room for all the
isochronous TDs in this URB.  If individual isochronous TDs fail to be
queued (perhaps due to an endpoint state change), urb_priv is also leaked.

This argues that the freeing of urb_priv should be done in the function
that allocated it, xhci_urb_enqueue.

This patch looks rather ugly, but refactoring the code will have to wait
because this patch needs to be backported to stable kernels.

This patch should be backported to kernels as old as 2.6.36.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Andiry Xu <andiry.xu@amd.com>
Cc: stable@kernel.org
---
 drivers/usb/host/xhci-ring.c |  5 +----
 drivers/usb/host/xhci.c      | 21 +++++++++++++++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 7113d16..9d3f9dd 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2500,11 +2500,8 @@ static int prepare_transfer(struct xhci_hcd *xhci,
 
 	if (td_index == 0) {
 		ret = usb_hcd_link_urb_to_ep(bus_to_hcd(urb->dev->bus), urb);
-		if (unlikely(ret)) {
-			xhci_urb_free_priv(xhci, urb_priv);
-			urb->hcpriv = NULL;
+		if (unlikely(ret))
 			return ret;
-		}
 	}
 
 	td->urb = urb;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 1c4432d..8e84acf 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1085,8 +1085,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 		if (urb->dev->speed == USB_SPEED_FULL) {
 			ret = xhci_check_maxpacket(xhci, slot_id,
 					ep_index, urb);
-			if (ret < 0)
+			if (ret < 0) {
+				xhci_urb_free_priv(xhci, urb_priv);
+				urb->hcpriv = NULL;
 				return ret;
+			}
 		}
 
 		/* We have a spinlock and interrupts disabled, so we must pass
@@ -1097,6 +1100,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 			goto dying;
 		ret = xhci_queue_ctrl_tx(xhci, GFP_ATOMIC, urb,
 				slot_id, ep_index);
+		if (ret)
+			goto free_priv;
 		spin_unlock_irqrestore(&xhci->lock, flags);
 	} else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) {
 		spin_lock_irqsave(&xhci->lock, flags);
@@ -1117,6 +1122,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 			ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb,
 					slot_id, ep_index);
 		}
+		if (ret)
+			goto free_priv;
 		spin_unlock_irqrestore(&xhci->lock, flags);
 	} else if (usb_endpoint_xfer_int(&urb->ep->desc)) {
 		spin_lock_irqsave(&xhci->lock, flags);
@@ -1124,6 +1131,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 			goto dying;
 		ret = xhci_queue_intr_tx(xhci, GFP_ATOMIC, urb,
 				slot_id, ep_index);
+		if (ret)
+			goto free_priv;
 		spin_unlock_irqrestore(&xhci->lock, flags);
 	} else {
 		spin_lock_irqsave(&xhci->lock, flags);
@@ -1131,18 +1140,22 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 			goto dying;
 		ret = xhci_queue_isoc_tx_prepare(xhci, GFP_ATOMIC, urb,
 				slot_id, ep_index);
+		if (ret)
+			goto free_priv;
 		spin_unlock_irqrestore(&xhci->lock, flags);
 	}
 exit:
 	return ret;
 dying:
-	xhci_urb_free_priv(xhci, urb_priv);
-	urb->hcpriv = NULL;
 	xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for "
 			"non-responsive xHCI host.\n",
 			urb->ep->desc.bEndpointAddress, urb);
+	ret = -ESHUTDOWN;
+free_priv:
+	xhci_urb_free_priv(xhci, urb_priv);
+	urb->hcpriv = NULL;
 	spin_unlock_irqrestore(&xhci->lock, flags);
-	return -ESHUTDOWN;
+	return ret;
 }
 
 /* Get the right ring for the given URB.
-- 
cgit v1.1


From 522989a27c7badb608155b1f1dea3487ed431f74 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 29 Jul 2011 12:44:32 -0700
Subject: xhci: Fix failed enqueue in the middle of isoch TD.

When an isochronous transfer is enqueued, xhci_queue_isoc_tx_prepare()
will ensure that there is enough room on the transfer rings for all of the
isochronous TDs for that URB.  However, when xhci_queue_isoc_tx() is
enqueueing individual isoc TDs, the prepare_transfer() function can fail
if the endpoint state has changed to disabled, error, or some other
unknown state.

With the current code, if Nth TD (not the first TD) fails, the ring is
left in a sorry state.  The partially enqueued TDs are left on the ring,
and the first TRB of the TD is not given back to the hardware.  The
enqueue pointer is left on the TRB after the last successfully enqueued
TD.  This means the ring is basically useless.  Any new transfers will be
enqueued after the failed TDs, which the hardware will never read because
the cycle bit indicates it does not own them.  The ring will fill up with
untransferred TDs, and the endpoint will be basically unusable.

The untransferred TDs will also remain on the TD list.  Since the td_list
is a FIFO, this basically means the ring handler will be waiting on TDs
that will never be completed (or worse, dereference memory that doesn't
exist any more).

Change the code to clean up the isochronous ring after a failed transfer.
If the first TD failed, simply return and allow the xhci_urb_enqueue
function to free the urb_priv.  If the Nth TD failed, first remove the TDs
from the td_list.  Then convert the TRBs that were enqueued into No-op
TRBs.  Make sure to flip the cycle bit on all enqueued TRBs (including any
link TRBs in the middle or between TDs), but leave the cycle bit of the
first TRB (which will show software-owned) intact.  Then move the ring
enqueue pointer back to the first TRB and make sure to change the
xhci_ring's cycle state to what is appropriate for that ring segment.

This ensures that the No-op TRBs will be overwritten by subsequent TDs,
and the hardware will not start executing random TRBs because the cycle
bit was left as hardware-owned.

This bug is unlikely to be hit, but it was something I noticed while
tracking down the watchdog timer issue.  I verified that the fix works by
injecting some errors on the 250th isochronous URB queued, although I
could not verify that the ring is in the correct state because uvcvideo
refused to talk to the device after the first usb_submit_urb() failed.
Ring debugging shows that the ring looks correct, however.

This patch should be backported to kernels as old as 2.6.36.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Andiry Xu <andiry.xu@amd.com>
Cc: stable@kernel.org
---
 drivers/usb/host/xhci-ring.c | 50 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 9d3f9dd..f72149b 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -514,8 +514,12 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
 			(unsigned long long) addr);
 }
 
+/* flip_cycle means flip the cycle bit of all but the first and last TRB.
+ * (The last TRB actually points to the ring enqueue pointer, which is not part
+ * of this TD.)  This is used to remove partially enqueued isoc TDs from a ring.
+ */
 static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
-		struct xhci_td *cur_td)
+		struct xhci_td *cur_td, bool flip_cycle)
 {
 	struct xhci_segment *cur_seg;
 	union xhci_trb *cur_trb;
@@ -528,6 +532,12 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
 			 * leave the pointers intact.
 			 */
 			cur_trb->generic.field[3] &= cpu_to_le32(~TRB_CHAIN);
+			/* Flip the cycle bit (link TRBs can't be the first
+			 * or last TRB).
+			 */
+			if (flip_cycle)
+				cur_trb->generic.field[3] ^=
+					cpu_to_le32(TRB_CYCLE);
 			xhci_dbg(xhci, "Cancel (unchain) link TRB\n");
 			xhci_dbg(xhci, "Address = %p (0x%llx dma); "
 					"in seg %p (0x%llx dma)\n",
@@ -541,6 +551,11 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
 			cur_trb->generic.field[2] = 0;
 			/* Preserve only the cycle bit of this TRB */
 			cur_trb->generic.field[3] &= cpu_to_le32(TRB_CYCLE);
+			/* Flip the cycle bit except on the first or last TRB */
+			if (flip_cycle && cur_trb != cur_td->first_trb &&
+					cur_trb != cur_td->last_trb)
+				cur_trb->generic.field[3] ^=
+					cpu_to_le32(TRB_CYCLE);
 			cur_trb->generic.field[3] |= cpu_to_le32(
 				TRB_TYPE(TRB_TR_NOOP));
 			xhci_dbg(xhci, "Cancel TRB %p (0x%llx dma) "
@@ -719,7 +734,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
 					cur_td->urb->stream_id,
 					cur_td, &deq_state);
 		else
-			td_to_noop(xhci, ep_ring, cur_td);
+			td_to_noop(xhci, ep_ring, cur_td, false);
 remove_finished_td:
 		/*
 		 * The event handler won't see a completion for this TD anymore,
@@ -3223,6 +3238,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	start_trb = &ep_ring->enqueue->generic;
 	start_cycle = ep_ring->cycle_state;
 
+	urb_priv = urb->hcpriv;
 	/* Queue the first TRB, even if it's zero-length */
 	for (i = 0; i < num_tds; i++) {
 		unsigned int total_packet_count;
@@ -3246,12 +3262,13 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
 		ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
 				urb->stream_id, trbs_per_td, urb, i, mem_flags);
-		if (ret < 0)
-			return ret;
+		if (ret < 0) {
+			if (i == 0)
+				return ret;
+			goto cleanup;
+		}
 
-		urb_priv = urb->hcpriv;
 		td = urb_priv->td[i];
-
 		for (j = 0; j < trbs_per_td; j++) {
 			u32 remainder = 0;
 			field = TRB_TBC(burst_count) | TRB_TLBPC(residue);
@@ -3341,6 +3358,27 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id,
 			start_cycle, start_trb);
 	return 0;
+cleanup:
+	/* Clean up a partially enqueued isoc transfer. */
+
+	for (i--; i >= 0; i--)
+		list_del(&urb_priv->td[i]->td_list);
+
+	/* Use the first TD as a temporary variable to turn the TDs we've queued
+	 * into No-ops with a software-owned cycle bit. That way the hardware
+	 * won't accidentally start executing bogus TDs when we partially
+	 * overwrite them.  td->first_trb and td->start_seg are already set.
+	 */
+	urb_priv->td[0]->last_trb = ep_ring->enqueue;
+	/* Every TRB except the first & last will have its cycle bit flipped. */
+	td_to_noop(xhci, ep_ring, urb_priv->td[0], true);
+
+	/* Reset the ring enqueue back to the first TRB and its cycle bit. */
+	ep_ring->enqueue = urb_priv->td[0]->first_trb;
+	ep_ring->enq_seg = urb_priv->td[0]->start_seg;
+	ep_ring->cycle_state = start_cycle;
+	usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb);
+	return ret;
 }
 
 /*
-- 
cgit v1.1


From 585df1d90cb07a02ca6c7a7d339e56e46d50dafb Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Tue, 2 Aug 2011 15:43:40 -0700
Subject: xhci: Remove TDs from TD lists when URBs are canceled.

When a driver tries to cancel an URB, and the host controller is dying,
xhci_urb_dequeue will giveback the URB without removing the xhci_tds
that comprise that URB from the td_list or the cancelled_td_list.  This
can cause a race condition between the driver calling URB dequeue and
the stop endpoint command watchdog timer.

If the timer fires on a dying host, and a driver attempts to resubmit
while the watchdog timer has dropped the xhci->lock to giveback a
cancelled URB, URBs may be given back by the xhci_urb_dequeue() function.
At that point, the URB's priv pointer will be freed and set to NULL, but
the TDs will remain on the td_list.  This will cause an oops in
xhci_giveback_urb_in_irq() when the watchdog timer attempts to loop
through the endpoints' td_lists, giving back killed URBs.

Make sure that xhci_urb_dequeue() removes TDs from the TD lists and
canceled TD lists before it gives back the URB.

This patch should be backported to kernels as old as 2.6.36.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Andiry Xu <andiry.xu@amd.com>
Cc: stable@kernel.org
---
 drivers/usb/host/xhci-ring.c | 16 ++++++++--------
 drivers/usb/host/xhci.c      |  7 +++++++
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index f72149b..b2d654b 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -741,7 +741,7 @@ remove_finished_td:
 		 * so remove it from the endpoint ring's TD list.  Keep it in
 		 * the cancelled TD list for URB completion later.
 		 */
-		list_del(&cur_td->td_list);
+		list_del_init(&cur_td->td_list);
 	}
 	last_unlinked_td = cur_td;
 	xhci_stop_watchdog_timer_in_irq(xhci, ep);
@@ -769,7 +769,7 @@ remove_finished_td:
 	do {
 		cur_td = list_entry(ep->cancelled_td_list.next,
 				struct xhci_td, cancelled_td_list);
-		list_del(&cur_td->cancelled_td_list);
+		list_del_init(&cur_td->cancelled_td_list);
 
 		/* Clean up the cancelled URB */
 		/* Doesn't matter what we pass for status, since the core will
@@ -877,9 +877,9 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
 				cur_td = list_first_entry(&ring->td_list,
 						struct xhci_td,
 						td_list);
-				list_del(&cur_td->td_list);
+				list_del_init(&cur_td->td_list);
 				if (!list_empty(&cur_td->cancelled_td_list))
-					list_del(&cur_td->cancelled_td_list);
+					list_del_init(&cur_td->cancelled_td_list);
 				xhci_giveback_urb_in_irq(xhci, cur_td,
 						-ESHUTDOWN, "killed");
 			}
@@ -888,7 +888,7 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
 						&temp_ep->cancelled_td_list,
 						struct xhci_td,
 						cancelled_td_list);
-				list_del(&cur_td->cancelled_td_list);
+				list_del_init(&cur_td->cancelled_td_list);
 				xhci_giveback_urb_in_irq(xhci, cur_td,
 						-ESHUTDOWN, "killed");
 			}
@@ -1580,10 +1580,10 @@ td_cleanup:
 			else
 				*status = 0;
 		}
-		list_del(&td->td_list);
+		list_del_init(&td->td_list);
 		/* Was this TD slated to be cancelled but completed anyway? */
 		if (!list_empty(&td->cancelled_td_list))
-			list_del(&td->cancelled_td_list);
+			list_del_init(&td->cancelled_td_list);
 
 		urb_priv->td_cnt++;
 		/* Giveback the urb when all the tds are completed */
@@ -3362,7 +3362,7 @@ cleanup:
 	/* Clean up a partially enqueued isoc transfer. */
 
 	for (i--; i >= 0; i--)
-		list_del(&urb_priv->td[i]->td_list);
+		list_del_init(&urb_priv->td[i]->td_list);
 
 	/* Use the first TD as a temporary variable to turn the TDs we've queued
 	 * into No-ops with a software-owned cycle bit. That way the hardware
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 8e84acf..3a0f695 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1252,6 +1252,13 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 	if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) {
 		xhci_dbg(xhci, "HW died, freeing TD.\n");
 		urb_priv = urb->hcpriv;
+		for (i = urb_priv->td_cnt; i < urb_priv->length; i++) {
+			td = urb_priv->td[i];
+			if (!list_empty(&td->td_list))
+				list_del_init(&td->td_list);
+			if (!list_empty(&td->cancelled_td_list))
+				list_del_init(&td->cancelled_td_list);
+		}
 
 		usb_hcd_unlink_urb_from_ep(hcd, urb);
 		spin_unlock_irqrestore(&xhci->lock, flags);
-- 
cgit v1.1


From 30eefc95841ce51c3281876f0b954dd1d3c0bd5f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 10 Aug 2011 11:22:42 -0700
Subject: xen: xen-selfballoon.c needs more header files

Fix build errors (found when CONFIG_SYSFS is not enabled):

drivers/xen/xen-selfballoon.c:446: warning: data definition has no type or storage class
drivers/xen/xen-selfballoon.c:446: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL'
drivers/xen/xen-selfballoon.c:446: warning: parameter names (without types) in function declaration
drivers/xen/xen-selfballoon.c:485: error: expected declaration specifiers or '...' before string constant
drivers/xen/xen-selfballoon.c:485: warning: data definition has no type or storage class
drivers/xen/xen-selfballoon.c:485: warning: type defaults to 'int' in declaration of 'MODULE_LICENSE'
drivers/xen/xen-selfballoon.c:485: warning: function declaration isn't a prototype

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/xen-selfballoon.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 1b4afd8..6ea852e 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -70,6 +70,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/module.h>
 #include <linux/workqueue.h>
 #include <xen/balloon.h>
 #include <xen/tmem.h>
-- 
cgit v1.1


From d9b830fa444c1f4955d0ee88f5af2aa24d2c7837 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 11 Aug 2011 09:19:29 -0700
Subject: Input: mpu3050 - correct call to input_free_device

input_free_device() should be used if input_register_device() was not called
yet or if it failed.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/mpu3050.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c
index b95fac1..f71dc72 100644
--- a/drivers/input/misc/mpu3050.c
+++ b/drivers/input/misc/mpu3050.c
@@ -282,7 +282,7 @@ err_free_irq:
 err_pm_set_suspended:
 	pm_runtime_set_suspended(&client->dev);
 err_free_mem:
-	input_unregister_device(idev);
+	input_free_device(idev);
 	kfree(sensor);
 	return error;
 }
-- 
cgit v1.1


From 22f83205e59c97c2460ad8e4bd6e71268cb2f37f Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 11 Aug 2011 09:22:45 -0700
Subject: Input: tegra-kbc - correct call to input_free_device

If kzalloc for kbc fails, then we have NULL pointer dereference while
calling input_free_device(kbc->idev) in the error handling.
So it is safer to always use the original name, input_dev.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/tegra-kbc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index f270447..a5a7791 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -702,7 +702,7 @@ err_iounmap:
 err_free_mem_region:
 	release_mem_region(res->start, resource_size(res));
 err_free_mem:
-	input_free_device(kbc->idev);
+	input_free_device(input_dev);
 	kfree(kbc);
 
 	return err;
-- 
cgit v1.1


From 4b1bfb7d2d125af6653d6c2305356b2677f79dc6 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 10 Aug 2011 15:32:22 +0200
Subject: rt2x00: fix crash in rt2800usb_write_tx_desc

Patch should fix this oops:

BUG: unable to handle kernel NULL pointer dereference at 000000a0
IP: [<f8e06078>] rt2800usb_write_tx_desc+0x18/0xc0 [rt2800usb]
*pdpt = 000000002408c001 *pde = 0000000024079067 *pte = 0000000000000000
Oops: 0000 [#1] SMP
EIP: 0060:[<f8e06078>] EFLAGS: 00010282 CPU: 0
EIP is at rt2800usb_write_tx_desc+0x18/0xc0 [rt2800usb]
EAX: 00000035 EBX: ef2bef10 ECX: 00000000 EDX: d40958a0
ESI: ef1865f8 EDI: ef1865f8 EBP: d4095878 ESP: d409585c
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Call Trace:
 [<f8da5e85>] rt2x00queue_write_tx_frame+0x155/0x300 [rt2x00lib]
 [<f8da424c>] rt2x00mac_tx+0x7c/0x370 [rt2x00lib]
 [<c04882b2>] ? mark_held_locks+0x62/0x90
 [<c081f645>] ? _raw_spin_unlock_irqrestore+0x35/0x60
 [<c04884ba>] ? trace_hardirqs_on_caller+0x5a/0x170
 [<c04885db>] ? trace_hardirqs_on+0xb/0x10
 [<f8d618ac>] __ieee80211_tx+0x5c/0x1e0 [mac80211]
 [<f8d631fc>] ieee80211_tx+0xbc/0xe0 [mac80211]
 [<f8d63163>] ? ieee80211_tx+0x23/0xe0 [mac80211]
 [<f8d632e1>] ieee80211_xmit+0xc1/0x200 [mac80211]
 [<f8d63220>] ? ieee80211_tx+0xe0/0xe0 [mac80211]
 [<c0487d45>] ? lock_release_holdtime+0x35/0x1b0
 [<f8d63986>] ? ieee80211_subif_start_xmit+0x446/0x5f0 [mac80211]
 [<f8d637dd>] ieee80211_subif_start_xmit+0x29d/0x5f0 [mac80211]
 [<f8d63924>] ? ieee80211_subif_start_xmit+0x3e4/0x5f0 [mac80211]
 [<c0760188>] ? sock_setsockopt+0x6a8/0x6f0
 [<c0760000>] ? sock_setsockopt+0x520/0x6f0
 [<c076daef>] dev_hard_start_xmit+0x2ef/0x650

Oops might happen because we perform parallel putting new entries in a
queue (rt2x00queue_write_tx_frame()) and removing entries after
finishing transmitting (rt2800usb_work_txdone()). There are cases when
_txdone may process an entry that was not fully send and nullify
entry->skb .

To fix check in _txdone if entry has flags that indicate pending
transmission and wait until flags get cleared.

Reported-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: stable@kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2800usb.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index 9395631..2cb25ea 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -464,6 +464,15 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
 	int wcid, ack, pid;
 	int tx_wcid, tx_ack, tx_pid;
 
+	if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+	    !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
+		WARNING(entry->queue->rt2x00dev,
+			"Data pending for entry %u in queue %u\n",
+			entry->entry_idx, entry->queue->qid);
+		cond_resched();
+		return false;
+	}
+
 	wcid	= rt2x00_get_field32(reg, TX_STA_FIFO_WCID);
 	ack	= rt2x00_get_field32(reg, TX_STA_FIFO_TX_ACK_REQUIRED);
 	pid	= rt2x00_get_field32(reg, TX_STA_FIFO_PID_TYPE);
@@ -558,8 +567,10 @@ static void rt2800usb_work_txdone(struct work_struct *work)
 		while (!rt2x00queue_empty(queue)) {
 			entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
 
-			if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+			if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+			    !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags))
 				break;
+
 			if (test_bit(ENTRY_DATA_IO_FAILED, &entry->flags))
 				rt2x00lib_txdone_noinfo(entry, TXDONE_FAILURE);
 			else if (rt2x00queue_status_timeout(entry))
-- 
cgit v1.1


From df71c9cfceea801e7e26e2c74241758ef9c042e5 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 10 Aug 2011 15:32:23 +0200
Subject: rt2x00: fix order of entry flags modification

In rt2800usb_work_txdone we check flags in order:

- ENTRY_OWNER_DEVICE_DATA
- ENTRY_DATA_STATUS_PENDING
- ENTRY_DATA_IO_FAILED

Modify flags in separate order in rt2x00usb_interrupt_txdone, to avoid
processing entries in _txdone with wrong flags or skip processing
ready entries.

Reported-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: stable@kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2x00usb.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index b6b4542..7fbb55c 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -262,23 +262,20 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
 	struct queue_entry *entry = (struct queue_entry *)urb->context;
 	struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
 
-	if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+	if (!test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
 		return;
-
-	if (rt2x00dev->ops->lib->tx_dma_done)
-		rt2x00dev->ops->lib->tx_dma_done(entry);
-
-	/*
-	 * Report the frame as DMA done
-	 */
-	rt2x00lib_dmadone(entry);
-
 	/*
 	 * Check if the frame was correctly uploaded
 	 */
 	if (urb->status)
 		set_bit(ENTRY_DATA_IO_FAILED, &entry->flags);
+	/*
+	 * Report the frame as DMA done
+	 */
+	rt2x00lib_dmadone(entry);
 
+	if (rt2x00dev->ops->lib->tx_dma_done)
+		rt2x00dev->ops->lib->tx_dma_done(entry);
 	/*
 	 * Schedule the delayed work for reading the TX status
 	 * from the device.
-- 
cgit v1.1


From 674db1344443204b6ce3293f2df8fd1b7665deea Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 10 Aug 2011 15:32:24 +0200
Subject: rt2x00: fix crash in rt2800usb_get_txwi

Patch should fix this oops:

BUG: unable to handle kernel NULL pointer dereference at 000000a0
IP: [<f81b30c9>] rt2800usb_get_txwi+0x19/0x70 [rt2800usb]
*pdpt = 0000000000000000 *pde = f000ff53f000ff53
Oops: 0000 [#1] SMP
Pid: 198, comm: kworker/u:3 Tainted: G        W   3.0.0-wl+ #9 LENOVO 6369CTO/6369CTO
EIP: 0060:[<f81b30c9>] EFLAGS: 00010283 CPU: 1
EIP is at rt2800usb_get_txwi+0x19/0x70 [rt2800usb]
EAX: 00000000 EBX: f465e140 ECX: f4494960 EDX: ef24c5f8
ESI: 810f21f5 EDI: f1da9960 EBP: f4581e80 ESP: f4581e70
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process kworker/u:3 (pid: 198, ti=f4580000 task=f4494960 task.ti=f4580000)
Call Trace:
 [<f804790f>] rt2800_txdone_entry+0x2f/0xf0 [rt2800lib]
 [<c045110d>] ? warn_slowpath_common+0x7d/0xa0
 [<f81b3a38>] ? rt2800usb_work_txdone+0x288/0x360 [rt2800usb]
 [<f81b3a38>] ? rt2800usb_work_txdone+0x288/0x360 [rt2800usb]
 [<f81b3a13>] rt2800usb_work_txdone+0x263/0x360 [rt2800usb]
 [<c046a8d6>] process_one_work+0x186/0x440
 [<c046a85a>] ? process_one_work+0x10a/0x440
 [<f81b37b0>] ? rt2800usb_probe_hw+0x120/0x120 [rt2800usb]
 [<c046c283>] worker_thread+0x133/0x310
 [<c04885db>] ? trace_hardirqs_on+0xb/0x10
 [<c046c150>] ? manage_workers+0x1e0/0x1e0
 [<c047054c>] kthread+0x7c/0x90
 [<c04704d0>] ? __init_kthread_worker+0x60/0x60
 [<c0826b42>] kernel_thread_helper+0x6/0x1

Oops might happen because we check rt2x00queue_empty(queue) twice,
but this condition can change and we can process entry in
rt2800_txdone_entry(), which was already processed by
rt2800usb_txdone_entry_check() -> rt2x00lib_txdone_noinfo() and
has nullify entry->skb .

Reported-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: stable@kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2800usb.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index 2cb25ea..dbf501c 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -538,12 +538,11 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
 			entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
 			if (rt2800usb_txdone_entry_check(entry, reg))
 				break;
+			entry = NULL;
 		}
 
-		if (!entry || rt2x00queue_empty(queue))
-			break;
-
-		rt2800_txdone_entry(entry, reg);
+		if (entry)
+			rt2800_txdone_entry(entry, reg);
 	}
 }
 
-- 
cgit v1.1


From 567b20e02bc1b2bcd69e8bfc022dec3da3fefb89 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 12 Jul 2011 19:05:29 +0800
Subject: usb: gadget: s3c2410_udc: fix unterminated platform_device_id table

platform_device_id structures need a NULL terminating
entry, add it.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/s3c2410_udc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c
index 85c1b0d..8d31848 100644
--- a/drivers/usb/gadget/s3c2410_udc.c
+++ b/drivers/usb/gadget/s3c2410_udc.c
@@ -2060,6 +2060,7 @@ static int s3c2410_udc_resume(struct platform_device *pdev)
 static const struct platform_device_id s3c_udc_ids[] = {
 	{ "s3c2410-usbgadget", },
 	{ "s3c2440-usbgadget", },
+	{ }
 };
 MODULE_DEVICE_TABLE(platform, s3c_udc_ids);
 
-- 
cgit v1.1


From aba1350fdac7cb52f86e6818addb26d03b3ef9bc Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 19 Jul 2011 21:47:01 +0200
Subject: usb: gadget: fusb300: remove #if 0 block

The code in this block is unused and the Author is fine with removing:

| These functions were used to debug unstable hw fifo while developing
| fusb300.  It's much more stable now.
| So these functions can be removed.

Cc: "Wendy Yuan-Hsin Chen" <yhchen@faraday-tech.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/fusb300_udc.c | 101 ---------------------------------------
 1 file changed, 101 deletions(-)

diff --git a/drivers/usb/gadget/fusb300_udc.c b/drivers/usb/gadget/fusb300_udc.c
index 24a9243..4ec888f 100644
--- a/drivers/usb/gadget/fusb300_udc.c
+++ b/drivers/usb/gadget/fusb300_udc.c
@@ -609,107 +609,6 @@ void fusb300_rdcxf(struct fusb300 *fusb300,
 	}
 }
 
-#if 0
-static void fusb300_dbg_fifo(struct fusb300_ep *ep,
-				u8 entry, u16 length)
-{
-	u32 reg;
-	u32 i = 0;
-	u32 j = 0;
-
-	reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_GTM);
-	reg &= ~(FUSB300_GTM_TST_EP_ENTRY(0xF) |
-		FUSB300_GTM_TST_EP_NUM(0xF) | FUSB300_GTM_TST_FIFO_DEG);
-	reg |= (FUSB300_GTM_TST_EP_ENTRY(entry) |
-		FUSB300_GTM_TST_EP_NUM(ep->epnum) | FUSB300_GTM_TST_FIFO_DEG);
-	iowrite32(reg, ep->fusb300->reg + FUSB300_OFFSET_GTM);
-
-	for (i = 0; i < (length >> 2); i++) {
-		if (i * 4 == 1024)
-			break;
-		reg = ioread32(ep->fusb300->reg +
-			FUSB300_OFFSET_BUFDBG_START + i * 4);
-		printk(KERN_DEBUG"  0x%-8x", reg);
-		j++;
-		if ((j % 4)  == 0)
-			printk(KERN_DEBUG "\n");
-	}
-
-	if (length % 4) {
-		reg = ioread32(ep->fusb300->reg +
-			FUSB300_OFFSET_BUFDBG_START + i * 4);
-		printk(KERN_DEBUG "  0x%x\n", reg);
-	}
-
-	if ((j % 4)  != 0)
-		printk(KERN_DEBUG "\n");
-
-	fusb300_disable_bit(ep->fusb300, FUSB300_OFFSET_GTM,
-		FUSB300_GTM_TST_FIFO_DEG);
-}
-
-static void fusb300_cmp_dbg_fifo(struct fusb300_ep *ep,
-				u8 entry, u16 length, u8 *golden)
-{
-	u32 reg;
-	u32 i = 0;
-	u32 golden_value;
-	u8 *tmp;
-
-	tmp = golden;
-
-	printk(KERN_DEBUG "fusb300_cmp_dbg_fifo (entry %d) : start\n", entry);
-
-	reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_GTM);
-	reg &= ~(FUSB300_GTM_TST_EP_ENTRY(0xF) |
-		FUSB300_GTM_TST_EP_NUM(0xF) | FUSB300_GTM_TST_FIFO_DEG);
-	reg |= (FUSB300_GTM_TST_EP_ENTRY(entry) |
-		FUSB300_GTM_TST_EP_NUM(ep->epnum) | FUSB300_GTM_TST_FIFO_DEG);
-	iowrite32(reg, ep->fusb300->reg + FUSB300_OFFSET_GTM);
-
-	for (i = 0; i < (length >> 2); i++) {
-		if (i * 4 == 1024)
-			break;
-		golden_value = *tmp | *(tmp + 1) << 8 |
-				*(tmp + 2) << 16 | *(tmp + 3) << 24;
-
-		reg = ioread32(ep->fusb300->reg +
-			FUSB300_OFFSET_BUFDBG_START + i*4);
-
-		if (reg != golden_value) {
-			printk(KERN_DEBUG "0x%x  :  ", (u32)(ep->fusb300->reg +
-				FUSB300_OFFSET_BUFDBG_START + i*4));
-			printk(KERN_DEBUG "    golden = 0x%x, reg = 0x%x\n",
-				golden_value, reg);
-		}
-		tmp += 4;
-	}
-
-	switch (length % 4) {
-	case 1:
-		golden_value = *tmp;
-	case 2:
-		golden_value = *tmp | *(tmp + 1) << 8;
-	case 3:
-		golden_value = *tmp | *(tmp + 1) << 8 | *(tmp + 2) << 16;
-	default:
-		break;
-
-	reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_BUFDBG_START + i*4);
-	if (reg != golden_value) {
-		printk(KERN_DEBUG "0x%x:", (u32)(ep->fusb300->reg +
-			FUSB300_OFFSET_BUFDBG_START + i*4));
-		printk(KERN_DEBUG "  golden = 0x%x, reg = 0x%x\n",
-			golden_value, reg);
-	}
-	}
-
-	printk(KERN_DEBUG "fusb300_cmp_dbg_fifo : end\n");
-	fusb300_disable_bit(ep->fusb300, FUSB300_OFFSET_GTM,
-		FUSB300_GTM_TST_FIFO_DEG);
-}
-#endif
-
 static void fusb300_rdfifo(struct fusb300_ep *ep,
 			  struct fusb300_request *req,
 			  u32 length)
-- 
cgit v1.1


From 6a22158c596c1531b143c884d479285ef90608d1 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 19 Jul 2011 20:21:52 +0200
Subject: usb: gadget: composite: fix bMaxPacketSize for SuperSpeed

For bMaxPacketSize0 we usually take what is specified in ep0->maxpacket.
This is fine in most cases, however on SuperSpeed bMaxPacketSize0
specifies the exponent instead of the actual size in bytes. The only
valid value on SS is 9 which denotes 512 bytes.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 5ef8779..aef4741 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1079,10 +1079,12 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 			cdev->desc.bMaxPacketSize0 =
 				cdev->gadget->ep0->maxpacket;
 			if (gadget_is_superspeed(gadget)) {
-				if (gadget->speed >= USB_SPEED_SUPER)
+				if (gadget->speed >= USB_SPEED_SUPER) {
 					cdev->desc.bcdUSB = cpu_to_le16(0x0300);
-				else
+					cdev->desc.bMaxPacketSize0 = 9;
+				} else {
 					cdev->desc.bcdUSB = cpu_to_le16(0x0210);
+				}
 			}
 
 			value = min(w_length, (u16) sizeof cdev->desc);
-- 
cgit v1.1


From 74c6f3a42a5af424dd954916a5e69d00271b943a Mon Sep 17 00:00:00 2001
From: Sergei Trofimovich <slyfox@gentoo.org>
Date: Sun, 17 Jul 2011 18:28:00 +0300
Subject: usb: musb: tusb6010_omap: fix build failure: error: 'musb' undeclared

  CC      drivers/usb/musb/tusb6010_omap.o
drivers/usb/musb/tusb6010_omap.c: In function 'tusb_omap_use_shared_dmareq':
drivers/usb/musb/tusb6010_omap.c:92: error: 'musb' undeclared (first use in this function)
drivers/usb/musb/tusb6010_omap.c:92: error: (Each undeclared identifier is reported only once
drivers/usb/musb/tusb6010_omap.c:92: error: for each function it appears in.)

Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/tusb6010_omap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c
index c784e6c..07c8a73 100644
--- a/drivers/usb/musb/tusb6010_omap.c
+++ b/drivers/usb/musb/tusb6010_omap.c
@@ -89,7 +89,7 @@ static inline int tusb_omap_use_shared_dmareq(struct tusb_omap_dma_ch *chdat)
 	u32		reg = musb_readl(chdat->tbase, TUSB_DMA_EP_MAP);
 
 	if (reg != 0) {
-		dev_dbg(musb->controller, "ep%i dmareq0 is busy for ep%i\n",
+		dev_dbg(chdat->musb->controller, "ep%i dmareq0 is busy for ep%i\n",
 			chdat->epnum, reg & 0xf);
 		return -EAGAIN;
 	}
-- 
cgit v1.1


From 26e5c3e227d15a44402e1c9ab817fe48142b4b99 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Mon, 18 Jul 2011 18:38:47 +0530
Subject: usb: musb: fix Kconfig

After 622859634 (usb: musb: drop a gigantic amount of ifdeferry):

 - USB_GADGET_MUSB_HDRC is no longer selectable because it
   depends on the removed USB_MUSB_PERIPHERAL and USB_MUSB_OTG
   options

 - The Kconfig comment still says "Enable Host or Gadget support
   to see Inventra options", even though you now need to enable
   both of them to see Inventra options.

Fix the dependency and drop the anyway unnecessary comment.

Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/Kconfig | 2 +-
 drivers/usb/musb/Kconfig   | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 44b6b40..5a084b9 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -310,7 +310,7 @@ config USB_PXA_U2O
 # musb builds in ../musb along with host support
 config USB_GADGET_MUSB_HDRC
 	tristate "Inventra HDRC USB Peripheral (TI, ADI, ...)"
-	depends on USB_MUSB_HDRC && (USB_MUSB_PERIPHERAL || USB_MUSB_OTG)
+	depends on USB_MUSB_HDRC
 	select USB_GADGET_DUALSPEED
 	help
 	  This OTG-capable silicon IP is used in dual designs including
diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 6192b45..fc34b8b 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -3,9 +3,6 @@
 # for silicon based on Mentor Graphics INVENTRA designs
 #
 
-comment "Enable Host or Gadget support to see Inventra options"
-	depends on !USB && USB_GADGET=n
-
 # (M)HDRC = (Multipoint) Highspeed Dual-Role Controller
 config USB_MUSB_HDRC
 	depends on USB && USB_GADGET
-- 
cgit v1.1


From 71964b9a0c06f2804be3b6ff47fab07a9468ecb4 Mon Sep 17 00:00:00 2001
From: Sebastian Bauer <mail@sebastianbauer.info>
Date: Thu, 21 Jul 2011 15:40:07 +0200
Subject: usb: gadget: hid: don't STALL when processing a HID Descriptor
 request

This is a patch to fix an issue with the HID gadget which, at the moment,
returns STALL on a HID descriptor request. Essentially, the patch changes
the hid gadget such that a request for the HID descriptor is handled by
copying the descriptor into the response buffer, rather than falling
through the default case, in which the request is answered by a STALL.

Signed-off-by: Sebastian Bauer <mail@sebastianbauer.info>
Acked-by: Peter Korsgaard <peter.korsgaard@barco.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/f_hid.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/gadget/f_hid.c b/drivers/usb/gadget/f_hid.c
index 403a48b..83a266b 100644
--- a/drivers/usb/gadget/f_hid.c
+++ b/drivers/usb/gadget/f_hid.c
@@ -367,6 +367,13 @@ static int hidg_setup(struct usb_function *f,
 	case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8
 		  | USB_REQ_GET_DESCRIPTOR):
 		switch (value >> 8) {
+		case HID_DT_HID:
+			VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: HID\n");
+			length = min_t(unsigned short, length,
+						   hidg_desc.bLength);
+			memcpy(req->buf, &hidg_desc, length);
+			goto respond;
+			break;
 		case HID_DT_REPORT:
 			VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: REPORT\n");
 			length = min_t(unsigned short, length,
-- 
cgit v1.1


From 15154962f777e4ab38adb7641ccae92194c9a96b Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Thu, 28 Jul 2011 22:59:53 +0800
Subject: usb: host: ehci-omap: fix .remove and failure handling path of
 .probe(v1)

Obviously, disabling & put regulator and iounmap(hcd->regs)
are missed in .remove and failure handling path of .probe,
so add them.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Keshava Munegowda <Keshava_mgowda@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/host/ehci-omap.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c
index 55a57c2..4524032 100644
--- a/drivers/usb/host/ehci-omap.c
+++ b/drivers/usb/host/ehci-omap.c
@@ -98,6 +98,18 @@ static void omap_ehci_soft_phy_reset(struct platform_device *pdev, u8 port)
 	}
 }
 
+static void disable_put_regulator(
+		struct ehci_hcd_omap_platform_data *pdata)
+{
+	int i;
+
+	for (i = 0 ; i < OMAP3_HS_USB_PORTS ; i++) {
+		if (pdata->regulator[i]) {
+			regulator_disable(pdata->regulator[i]);
+			regulator_put(pdata->regulator[i]);
+		}
+	}
+}
 
 /* configure so an HC device and id are always provided */
 /* always called with process context; sleeping is OK */
@@ -231,9 +243,11 @@ err_add_hcd:
 	omap_usbhs_disable(dev);
 
 err_enable:
+	disable_put_regulator(pdata);
 	usb_put_hcd(hcd);
 
 err_io:
+	iounmap(regs);
 	return ret;
 }
 
@@ -253,6 +267,8 @@ static int ehci_hcd_omap_remove(struct platform_device *pdev)
 
 	usb_remove_hcd(hcd);
 	omap_usbhs_disable(dev);
+	disable_put_regulator(dev->platform_data);
+	iounmap(hcd->regs);
 	usb_put_hcd(hcd);
 	return 0;
 }
-- 
cgit v1.1


From 93e098a8fc02c579875e64001f7a511b7e75a16c Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 20 Jul 2011 17:09:34 -0700
Subject: usb: musb: fix oops on musb_gadget_pullup

an 'unhandled fault' is causes when a gadget driver calls
usb_gadget_connect() while the USB cable isn't plugged into
the OTG port.

the fault is caused by an access to MUSB's memory space
while its clock is turned off due to pm_runtime kicking
in.

in order to fix the fault, we enclose musb_gadget_pullup()
with pm_runtime_get_sync() ... pm_runtime_put() calls to
be sure we will always reach that path with clock turned on.

[ balbi@ti.com : simplified commit log; removed few things
	which didn't belong there ]

Cc: stable@kernel.org
Reported-by: Zach Pfeffer <zach.pfeffer@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/musb_gadget.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index b67a062..8c41a2e 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1698,6 +1698,8 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 
 	is_on = !!is_on;
 
+	pm_runtime_get_sync(musb->controller);
+
 	/* NOTE: this assumes we are sensing vbus; we'd rather
 	 * not pullup unless the B-session is active.
 	 */
@@ -1707,6 +1709,9 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 		musb_pullup(musb, is_on);
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
+
+	pm_runtime_put(musb->controller);
+
 	return 0;
 }
 
-- 
cgit v1.1


From d366d39bab562545ccb4a5931d62d0fd9e6a8ffc Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@linaro.org>
Date: Tue, 2 Aug 2011 17:33:39 +0200
Subject: usb: musb: ux500: set dma config for both src and dst

The dma driver requires both src and dst to be set.
This fix is needed in order to run gadget mass storage.
Patch is verified on snowball.

Signed-off-by: Per Forlin <per.forlin@linaro.org>
Acked-by: Mian Yousaf Kaukab <mian.yousaf.kaukab@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/ux500_dma.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c
index cecace4..2313475 100644
--- a/drivers/usb/musb/ux500_dma.c
+++ b/drivers/usb/musb/ux500_dma.c
@@ -133,15 +133,13 @@ static bool ux500_configure_channel(struct dma_channel *channel,
 					DMA_SLAVE_BUSWIDTH_4_BYTES;
 
 	slave_conf.direction = direction;
-	if (direction == DMA_FROM_DEVICE) {
-		slave_conf.src_addr = usb_fifo_addr;
-		slave_conf.src_addr_width = addr_width;
-		slave_conf.src_maxburst = 16;
-	} else {
-		slave_conf.dst_addr = usb_fifo_addr;
-		slave_conf.dst_addr_width = addr_width;
-		slave_conf.dst_maxburst = 16;
-	}
+	slave_conf.src_addr = usb_fifo_addr;
+	slave_conf.src_addr_width = addr_width;
+	slave_conf.src_maxburst = 16;
+	slave_conf.dst_addr = usb_fifo_addr;
+	slave_conf.dst_addr_width = addr_width;
+	slave_conf.dst_maxburst = 16;
+
 	dma_chan->device->device_control(dma_chan, DMA_SLAVE_CONFIG,
 					     (unsigned long) &slave_conf);
 
-- 
cgit v1.1


From afbd0749c0507d5fea980b3bfa76efc43af83d60 Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@linaro.org>
Date: Wed, 3 Aug 2011 14:22:17 +0200
Subject: usb: musb: ux500: replace missing DBG with dev_dbg

ux500_dma.c fail to compile becase DBG has been removed from musb_debug.
Use dev_dbg for all prints.

Cc: stable@vger.kernel.org
Signed-off-by: Per Forlin <per.forlin@linaro.org>
Acked-by: Mian Yousaf Kaukab<mian.yousaf.kaukab@stericsson.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/ux500_dma.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c
index 2313475..ef4333f 100644
--- a/drivers/usb/musb/ux500_dma.c
+++ b/drivers/usb/musb/ux500_dma.c
@@ -65,7 +65,8 @@ static void ux500_tx_work(struct work_struct *data)
 	struct musb *musb = hw_ep->musb;
 	unsigned long flags;
 
-	DBG(4, "DMA tx transfer done on hw_ep=%d\n", hw_ep->epnum);
+	dev_dbg(musb->controller, "DMA tx transfer done on hw_ep=%d\n",
+		hw_ep->epnum);
 
 	spin_lock_irqsave(&musb->lock, flags);
 	ux500_channel->channel.actual_len = ux500_channel->cur_len;
@@ -84,7 +85,8 @@ static void ux500_rx_work(struct work_struct *data)
 	struct musb *musb = hw_ep->musb;
 	unsigned long flags;
 
-	DBG(4, "DMA rx transfer done on hw_ep=%d\n", hw_ep->epnum);
+	dev_dbg(musb->controller, "DMA rx transfer done on hw_ep=%d\n",
+		hw_ep->epnum);
 
 	spin_lock_irqsave(&musb->lock, flags);
 	ux500_channel->channel.actual_len = ux500_channel->cur_len;
@@ -116,9 +118,11 @@ static bool ux500_configure_channel(struct dma_channel *channel,
 	enum dma_slave_buswidth addr_width;
 	dma_addr_t usb_fifo_addr = (MUSB_FIFO_OFFSET(hw_ep->epnum) +
 					ux500_channel->controller->phy_base);
+	struct musb *musb = ux500_channel->controller->private_data;
 
-	DBG(4, "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n",
-			packet_sz, mode, dma_addr, len, ux500_channel->is_tx);
+	dev_dbg(musb->controller,
+		"packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n",
+		packet_sz, mode, dma_addr, len, ux500_channel->is_tx);
 
 	ux500_channel->cur_len = len;
 
@@ -164,6 +168,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
 	struct ux500_dma_controller *controller = container_of(c,
 			struct ux500_dma_controller, controller);
 	struct ux500_dma_channel *ux500_channel = NULL;
+	struct musb *musb = controller->private_data;
 	u8 ch_num = hw_ep->epnum - 1;
 	u32 max_ch;
 
@@ -190,7 +195,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
 	ux500_channel->hw_ep = hw_ep;
 	ux500_channel->is_allocated = 1;
 
-	DBG(7, "hw_ep=%d, is_tx=0x%x, channel=%d\n",
+	dev_dbg(musb->controller, "hw_ep=%d, is_tx=0x%x, channel=%d\n",
 		hw_ep->epnum, is_tx, ch_num);
 
 	return &(ux500_channel->channel);
@@ -199,8 +204,9 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
 static void ux500_dma_channel_release(struct dma_channel *channel)
 {
 	struct ux500_dma_channel *ux500_channel = channel->private_data;
+	struct musb *musb = ux500_channel->controller->private_data;
 
-	DBG(7, "channel=%d\n", ux500_channel->ch_num);
+	dev_dbg(musb->controller, "channel=%d\n", ux500_channel->ch_num);
 
 	if (ux500_channel->is_allocated) {
 		ux500_channel->is_allocated = 0;
@@ -250,8 +256,8 @@ static int ux500_dma_channel_abort(struct dma_channel *channel)
 	void __iomem *epio = musb->endpoints[ux500_channel->hw_ep->epnum].regs;
 	u16 csr;
 
-	DBG(4, "channel=%d, is_tx=%d\n", ux500_channel->ch_num,
-						ux500_channel->is_tx);
+	dev_dbg(musb->controller, "channel=%d, is_tx=%d\n",
+		ux500_channel->ch_num, ux500_channel->is_tx);
 
 	if (channel->status == MUSB_DMA_STATUS_BUSY) {
 		if (ux500_channel->is_tx) {
-- 
cgit v1.1


From f847a79ab3c1faca3022061045cd22e4678c1b1c Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@linaro.org>
Date: Wed, 3 Aug 2011 15:39:15 +0200
Subject: usb: musb: cppi: fix build errors due to DBG and missing musb
 variable

Replace DBG with dev_dbg and fix invalid access of musb->controller.
With this patch cppi_dma builds successfully.

Cc: <stable@kernel.org>
Signed-off-by: Per Forlin <per.forlin@linaro.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/cppi_dma.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c
index 149f3f3..318fb4e 100644
--- a/drivers/usb/musb/cppi_dma.c
+++ b/drivers/usb/musb/cppi_dma.c
@@ -226,8 +226,10 @@ static int cppi_controller_stop(struct dma_controller *c)
 	struct cppi		*controller;
 	void __iomem		*tibase;
 	int			i;
+	struct musb		*musb;
 
 	controller = container_of(c, struct cppi, controller);
+	musb = controller->musb;
 
 	tibase = controller->tibase;
 	/* DISABLE INDIVIDUAL CHANNEL Interrupts */
@@ -289,9 +291,11 @@ cppi_channel_allocate(struct dma_controller *c,
 	u8			index;
 	struct cppi_channel	*cppi_ch;
 	void __iomem		*tibase;
+	struct musb		*musb;
 
 	controller = container_of(c, struct cppi, controller);
 	tibase = controller->tibase;
+	musb = controller->musb;
 
 	/* ep0 doesn't use DMA; remember cppi indices are 0..N-1 */
 	index = ep->epnum - 1;
@@ -339,7 +343,8 @@ static void cppi_channel_release(struct dma_channel *channel)
 	c = container_of(channel, struct cppi_channel, channel);
 	tibase = c->controller->tibase;
 	if (!c->hw_ep)
-		dev_dbg(musb->controller, "releasing idle DMA channel %p\n", c);
+		dev_dbg(c->controller->musb->controller,
+			"releasing idle DMA channel %p\n", c);
 	else if (!c->transmit)
 		core_rxirq_enable(tibase, c->index + 1);
 
@@ -357,10 +362,11 @@ cppi_dump_rx(int level, struct cppi_channel *c, const char *tag)
 
 	musb_ep_select(base, c->index + 1);
 
-	DBG(level, "RX DMA%d%s: %d left, csr %04x, "
-			"%08x H%08x S%08x C%08x, "
-			"B%08x L%08x %08x .. %08x"
-			"\n",
+	dev_dbg(c->controller->musb->controller,
+		"RX DMA%d%s: %d left, csr %04x, "
+		"%08x H%08x S%08x C%08x, "
+		"B%08x L%08x %08x .. %08x"
+		"\n",
 		c->index, tag,
 		musb_readl(c->controller->tibase,
 			DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index),
@@ -387,10 +393,11 @@ cppi_dump_tx(int level, struct cppi_channel *c, const char *tag)
 
 	musb_ep_select(base, c->index + 1);
 
-	DBG(level, "TX DMA%d%s: csr %04x, "
-			"H%08x S%08x C%08x %08x, "
-			"F%08x L%08x .. %08x"
-			"\n",
+	dev_dbg(c->controller->musb->controller,
+		"TX DMA%d%s: csr %04x, "
+		"H%08x S%08x C%08x %08x, "
+		"F%08x L%08x .. %08x"
+		"\n",
 		c->index, tag,
 		musb_readw(c->hw_ep->regs, MUSB_TXCSR),
 
@@ -1022,6 +1029,7 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
 	int				i;
 	dma_addr_t			safe2ack;
 	void __iomem			*regs = rx->hw_ep->regs;
+	struct musb			*musb = cppi->musb;
 
 	cppi_dump_rx(6, rx, "/K");
 
-- 
cgit v1.1


From cf6808cb09e72fa7ee8713bf48219a2eb98da91b Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 3 Aug 2011 21:41:26 -0700
Subject: usb: gadget: renesas_usbhs: fix DMA build by including dma-mapping.h

Include dma-mapping.h to fix build of the renesas_usbhs driver

| CC      drivers/usb/renesas_usbhs/mod_gadget.o
| drivers/usb/renesas_usbhs/mod_gadget.c: In function 'usbhsg_dma_map':
| drivers/usb/renesas_usbhs/mod_gadget.c:190: error: implicit declaration of function 'dma_map_single'
| drivers/usb/renesas_usbhs/mod_gadget.c:192: error: implicit declaration of function 'dma_sync_single_for_device'
| drivers/usb/renesas_usbhs/mod_gadget.c:196: error: implicit declaration of function 'dma_mapping_error'
| drivers/usb/renesas_usbhs/mod_gadget.c: In function 'usbhsg_dma_unmap':
| drivers/usb/renesas_usbhs/mod_gadget.c:217: error: implicit declaration of function 'dma_unmap_single'
| drivers/usb/renesas_usbhs/mod_gadget.c:219: error: implicit declaration of function 'dma_sync_single_for_cpu'
| make[5]: *** [drivers/usb/renesas_usbhs/mod_gadget.o] Error 1
| make[4]: *** [drivers/usb/renesas_usbhs] Error 2

Reported-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/renesas_usbhs/mod_gadget.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index ba79dbf..e7101dc 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -14,6 +14,7 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  *
  */
+#include <linux/dma-mapping.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-- 
cgit v1.1


From 240a16e2cd831cb25361b1d1797bd04e8faf8b4f Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Fri, 5 Aug 2011 13:29:49 +0300
Subject: usb: musb: tusb6010: fix compilation

earlier commits have broken compilation of
tusb6010 glue layer, fix it.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/musb_core.h     | 12 ++++++++----
 drivers/usb/musb/musb_regs.h     |  6 ++++--
 drivers/usb/musb/tusb6010.c      |  1 +
 drivers/usb/musb/tusb6010_omap.c |  1 +
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index 668eeef..b3c065a 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -172,7 +172,8 @@ enum musb_g_ep0_state {
 #endif
 
 /* TUSB mapping: "flat" plus ep0 special cases */
-#if	defined(CONFIG_USB_MUSB_TUSB6010)
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+	defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 #define musb_ep_select(_mbase, _epnum) \
 	musb_writeb((_mbase), MUSB_INDEX, (_epnum))
 #define	MUSB_EP_OFFSET			MUSB_TUSB_OFFSET
@@ -241,7 +242,8 @@ struct musb_hw_ep {
 	void __iomem		*fifo;
 	void __iomem		*regs;
 
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+	defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 	void __iomem		*conf;
 #endif
 
@@ -258,7 +260,8 @@ struct musb_hw_ep {
 	struct dma_channel	*tx_channel;
 	struct dma_channel	*rx_channel;
 
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+	defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 	/* TUSB has "asynchronous" and "synchronous" dma modes */
 	dma_addr_t		fifo_async;
 	dma_addr_t		fifo_sync;
@@ -356,7 +359,8 @@ struct musb {
 	void __iomem		*ctrl_base;
 	void __iomem		*mregs;
 
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+	defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 	dma_addr_t		async;
 	dma_addr_t		sync;
 	void __iomem		*sync_va;
diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h
index 8241070..03f2655 100644
--- a/drivers/usb/musb/musb_regs.h
+++ b/drivers/usb/musb/musb_regs.h
@@ -234,7 +234,8 @@
 #define MUSB_TESTMODE		0x0F	/* 8 bit */
 
 /* Get offset for a given FIFO from musb->mregs */
-#ifdef	CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) ||	\
+	defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 #define MUSB_FIFO_OFFSET(epnum)	(0x200 + ((epnum) * 0x20))
 #else
 #define MUSB_FIFO_OFFSET(epnum)	(0x20 + ((epnum) * 4))
@@ -295,7 +296,8 @@
 #define MUSB_FLAT_OFFSET(_epnum, _offset)	\
 	(0x100 + (0x10*(_epnum)) + (_offset))
 
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) ||	\
+	defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 /* TUSB6010 EP0 configuration register is special */
 #define MUSB_TUSB_OFFSET(_epnum, _offset)	\
 	(0x10 + _offset)
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index 9eec41f..ec14801 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/prefetch.h>
 #include <linux/usb.h>
 #include <linux/irq.h>
 #include <linux/platform_device.h>
diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c
index 07c8a73..b67b4bc 100644
--- a/drivers/usb/musb/tusb6010_omap.c
+++ b/drivers/usb/musb/tusb6010_omap.c
@@ -20,6 +20,7 @@
 #include <plat/mux.h>
 
 #include "musb_core.h"
+#include "tusb6010.h"
 
 #define to_chdat(c)		((struct tusb_omap_dma_ch *)(c)->private_data)
 
-- 
cgit v1.1


From ad50c1b20ff27780afbb8bcd3d153393d360419e Mon Sep 17 00:00:00 2001
From: Bob Liu <lliubbo@gmail.com>
Date: Fri, 5 Aug 2011 17:33:05 +0800
Subject: usb: musb: blackfin: include prefetch head file

After the prefetch/list.h restructure, drivers need to explicitly include
linux/prefetch.h in order to use the prefetch() function.  Otherwise, the
current driver fails to build:
drivers/usb/musb/blackfin.c: In function 'musb_write_fifo':
drivers/usb/musb/blackfin.c:43: error: implicit declaration of function
'prefetch'

Signed-off-by: Bob Liu <lliubbo@gmail.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/blackfin.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index ae8c396..5e7cfba 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
 
 #include <asm/cacheflush.h>
 
-- 
cgit v1.1


From bb8070c29ca87ac0aa24e04a1207cc932f62258f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 5 Aug 2011 22:14:46 +0200
Subject: usb: gadget: f_phonet: unlock in error case

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/f_phonet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 8f8d3f6..8f3eab1 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -434,6 +434,7 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
 			    config_ep_by_speed(gadget, f, fp->out_ep)) {
 				fp->in_ep->desc = NULL;
 				fp->out_ep->desc = NULL;
+				spin_unlock(&port->lock);
 				return -EINVAL;
 			}
 			usb_ep_enable(fp->out_ep);
-- 
cgit v1.1


From 6193d6997c90535af8f8491fc0019f785a3322b0 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 10 Aug 2011 11:01:57 +0200
Subject: usb: musb: gadget: fix error path

In case one "forgot" to load the receiver i.e. doing
|modprobe omap2430
|modprobe musb_hdrc

he ends up with:

|musb-hdrc: version 6.0, ?dma?, otg (peripheral+host)
|HS USB OTG: no transceiver configured
|musb-hdrc musb-hdrc: musb_init_controller failed with status -19
|(NULL device *): gadget not registered.
|Unable to handle kernel NULL pointer dereference at virtual address 0000001c
|Internal error: Oops: 17 [#1] SMP
|[<c011383c>] (sysfs_find_dirent+0x4/0x60) from [<c01138c0>] (sysfs_get_dirent+0x28/0x78)
|[<c01138c0>] (sysfs_get_dirent+0x28/0x78) from [<c0115b78>] (sysfs_unmerge_group+0x1c/0x90)
|[<c0115b78>] (sysfs_unmerge_group+0x1c/0x90) from [<c0179ba4>] (dpm_sysfs_remove+0x14/0x3c)
|[<c0179ba4>] (dpm_sysfs_remove+0x14/0x3c) from [<c01742f8>] (device_del+0x40/0x1b4)
|[<c01742f8>] (device_del+0x40/0x1b4) from [<c0174478>] (device_unregister+0xc/0x18)
|[<c0174478>] (device_unregister+0xc/0x18) from [<bf0489b4>] (musb_free+0x24/0x88 [musb_hdrc])
|[<bf0489b4>] (musb_free+0x24/0x88 [musb_hdrc]) from [<bf057d18>] (musb_probe+0xb50/0xe3c [musb_hdrc])
|[<bf057d18>] (musb_probe+0xb50/0xe3c [musb_hdrc]) from [<c01779c4>] (platform_drv_probe+0x1c/0x24)

The problem is that musb_free() tries to figure out what was
initializued and what wasn't and clean up only the initialized part.
This works well for usb_del_gadget_udc() but device_unregister() can't
deal with it. Therefore we rely on the fact the we always have a parent
device and only then remove the device.
I broke this in 0f91349 ("usb: gadget: convert all users to the new udc
infrastructure")

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/musb_gadget.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 8c41a2e..e818203 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1856,6 +1856,7 @@ int __init musb_gadget_setup(struct musb *musb)
 
 	return 0;
 err:
+	musb->g.dev.parent = NULL;
 	device_unregister(&musb->g.dev);
 	return status;
 }
@@ -1863,7 +1864,8 @@ err:
 void musb_gadget_cleanup(struct musb *musb)
 {
 	usb_del_gadget_udc(&musb->g);
-	device_unregister(&musb->g.dev);
+	if (musb->g.dev.parent)
+		device_unregister(&musb->g.dev);
 }
 
 /*
-- 
cgit v1.1


From f8afdf481f0fef5e170c6c928cec42879d505654 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 12 Aug 2011 13:31:30 -0400
Subject: drivers/net/wireless/wl12xx: add missing kfree

In each case, the freed data should be freed in the error handling code
as
well.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@exists@
local idexpression x;
statement S,S1;
expression E;
identifier fl;
expression *ptr != NULL;
@@

x = \(kmalloc\|kzalloc\|kcalloc\)(...);
...
if (x == NULL) S
<... when != x
     when != if (...) { <+...kfree(x)...+> }
     when any
     when != true x == NULL
x->fl
...>
(
if (x == NULL) S1
|
if (...) { ... when != x
               when forall
(
 return \(0\|<+...x...+>\|ptr\);
|
* return ...;
)
}
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Luciano Coelho <coelho@ti.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/wl12xx/acx.c      | 6 +-----
 drivers/net/wireless/wl12xx/testmode.c | 5 ++++-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/wl12xx/acx.c b/drivers/net/wireless/wl12xx/acx.c
index 7e33f1f..34f6ab5 100644
--- a/drivers/net/wireless/wl12xx/acx.c
+++ b/drivers/net/wireless/wl12xx/acx.c
@@ -77,8 +77,6 @@ int wl1271_acx_sleep_auth(struct wl1271 *wl, u8 sleep_auth)
 	auth->sleep_auth = sleep_auth;
 
 	ret = wl1271_cmd_configure(wl, ACX_SLEEP_AUTH, auth, sizeof(*auth));
-	if (ret < 0)
-		return ret;
 
 out:
 	kfree(auth);
@@ -624,10 +622,8 @@ int wl1271_acx_cca_threshold(struct wl1271 *wl)
 
 	ret = wl1271_cmd_configure(wl, ACX_CCA_THRESHOLD,
 				   detection, sizeof(*detection));
-	if (ret < 0) {
+	if (ret < 0)
 		wl1271_warning("failed to set cca threshold: %d", ret);
-		return ret;
-	}
 
 out:
 	kfree(detection);
diff --git a/drivers/net/wireless/wl12xx/testmode.c b/drivers/net/wireless/wl12xx/testmode.c
index 5d5e1ef..88add68 100644
--- a/drivers/net/wireless/wl12xx/testmode.c
+++ b/drivers/net/wireless/wl12xx/testmode.c
@@ -139,12 +139,15 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[])
 
 	if (ret < 0) {
 		wl1271_warning("testmode cmd interrogate failed: %d", ret);
+		kfree(cmd);
 		return ret;
 	}
 
 	skb = cfg80211_testmode_alloc_reply_skb(wl->hw->wiphy, sizeof(*cmd));
-	if (!skb)
+	if (!skb) {
+		kfree(cmd);
 		return -ENOMEM;
+	}
 
 	NLA_PUT(skb, WL1271_TM_ATTR_DATA, sizeof(*cmd), cmd);
 
-- 
cgit v1.1


From 06f8e2d6754dc631732415b741b5aa58a0f7133f Mon Sep 17 00:00:00 2001
From: Alex Elder <aelder@sgi.com>
Date: Fri, 12 Aug 2011 13:57:55 -0500
Subject: xfs: don't expect xfs headers to be in subdirectories

Fix up some #include directives in preparation for moving a few
header files out of xfs source subdirectories.

Note that "xfs_linux.h" also got its quoting convention for included
files switched.

Signed-off-by: Alex Elder <aelder@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/Makefile                 |  2 +-
 fs/xfs/linux-2.6/xfs_linux.h    | 27 +++++++++++++--------------
 fs/xfs/linux-2.6/xfs_quotaops.c |  2 +-
 fs/xfs/linux-2.6/xfs_trace.c    |  4 ++--
 fs/xfs/xfs.h                    |  3 ++-
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 75bb316..b100cf4 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,7 +16,7 @@
 # Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #
 
-ccflags-y := -I$(src) -I$(src)/linux-2.6
+ccflags-y := -I$(src) -I$(src)/linux-2.6 -I$(src)/quota -I$(src)/support
 ccflags-$(CONFIG_XFS_DEBUG) += -g
 
 XFS_LINUX := linux-2.6
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index d42f814..1e8a45e 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -32,13 +32,12 @@
 # define XFS_BIG_INUMS	0
 #endif
 
-#include <xfs_types.h>
+#include "xfs_types.h"
 
-#include <kmem.h>
-#include <mrlock.h>
-#include <time.h>
-
-#include <support/uuid.h>
+#include "kmem.h"
+#include "mrlock.h"
+#include "time.h"
+#include "uuid.h"
 
 #include <linux/semaphore.h>
 #include <linux/mm.h>
@@ -78,14 +77,14 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
-#include <xfs_vnode.h>
-#include <xfs_stats.h>
-#include <xfs_sysctl.h>
-#include <xfs_iops.h>
-#include <xfs_aops.h>
-#include <xfs_super.h>
-#include <xfs_buf.h>
-#include <xfs_message.h>
+#include "xfs_vnode.h"
+#include "xfs_stats.h"
+#include "xfs_sysctl.h"
+#include "xfs_iops.h"
+#include "xfs_aops.h"
+#include "xfs_super.h"
+#include "xfs_buf.h"
+#include "xfs_message.h"
 
 #ifdef __BIG_ENDIAN
 #define XFS_NATIVE_HOST 1
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 29b9d64..7e76f53 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -25,7 +25,7 @@
 #include "xfs_trans.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
-#include "quota/xfs_qm.h"
+#include "xfs_qm.h"
 #include <linux/quota.h>
 
 
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 88d25d4..9010ce8 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -43,8 +43,8 @@
 #include "xfs_quota.h"
 #include "xfs_iomap.h"
 #include "xfs_aops.h"
-#include "quota/xfs_dquot_item.h"
-#include "quota/xfs_dquot.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
 #include "xfs_log_recover.h"
 #include "xfs_inode_item.h"
 
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 53ec3ea..d8b11b7 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -24,5 +24,6 @@
 #define XFS_BUF_LOCK_TRACKING 1
 #endif
 
-#include <linux-2.6/xfs_linux.h>
+#include "xfs_linux.h"
+
 #endif	/* __XFS_H__ */
-- 
cgit v1.1


From c59d87c460767bc35dafd490139d3cfe78fb8da4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 12 Aug 2011 16:21:35 -0500
Subject: xfs: remove subdirectories

Use the move from Linux 2.6 to Linux 3.x as an excuse to kill the
annoying subdirectories in the XFS source code.  Besides the large
amount of file rename the only changes are to the Makefile, a few
files including headers with the subdirectory prefix, and the binary
sysctl compat code that includes a header under fs/xfs/ from
kernel/.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/Makefile                 |  117 +-
 fs/xfs/kmem.c                   |  132 +++
 fs/xfs/kmem.h                   |  124 ++
 fs/xfs/linux-2.6/kmem.c         |  132 ---
 fs/xfs/linux-2.6/kmem.h         |  124 --
 fs/xfs/linux-2.6/mrlock.h       |   90 --
 fs/xfs/linux-2.6/time.h         |   36 -
 fs/xfs/linux-2.6/xfs_acl.c      |  420 -------
 fs/xfs/linux-2.6/xfs_aops.c     | 1499 ------------------------
 fs/xfs/linux-2.6/xfs_aops.h     |   68 --
 fs/xfs/linux-2.6/xfs_buf.c      | 1876 ------------------------------
 fs/xfs/linux-2.6/xfs_buf.h      |  326 ------
 fs/xfs/linux-2.6/xfs_discard.c  |  222 ----
 fs/xfs/linux-2.6/xfs_discard.h  |   10 -
 fs/xfs/linux-2.6/xfs_export.c   |  250 ----
 fs/xfs/linux-2.6/xfs_export.h   |   72 --
 fs/xfs/linux-2.6/xfs_file.c     | 1096 ------------------
 fs/xfs/linux-2.6/xfs_fs_subr.c  |   96 --
 fs/xfs/linux-2.6/xfs_globals.c  |   43 -
 fs/xfs/linux-2.6/xfs_ioctl.c    | 1556 -------------------------
 fs/xfs/linux-2.6/xfs_ioctl.h    |   85 --
 fs/xfs/linux-2.6/xfs_ioctl32.c  |  672 -----------
 fs/xfs/linux-2.6/xfs_ioctl32.h  |  237 ----
 fs/xfs/linux-2.6/xfs_iops.c     | 1210 --------------------
 fs/xfs/linux-2.6/xfs_iops.h     |   30 -
 fs/xfs/linux-2.6/xfs_linux.h    |  309 -----
 fs/xfs/linux-2.6/xfs_message.c  |  108 --
 fs/xfs/linux-2.6/xfs_message.h  |   39 -
 fs/xfs/linux-2.6/xfs_quotaops.c |  139 ---
 fs/xfs/linux-2.6/xfs_stats.c    |  122 --
 fs/xfs/linux-2.6/xfs_stats.h    |  223 ----
 fs/xfs/linux-2.6/xfs_super.c    | 1773 ----------------------------
 fs/xfs/linux-2.6/xfs_super.h    |   87 --
 fs/xfs/linux-2.6/xfs_sync.c     | 1065 -----------------
 fs/xfs/linux-2.6/xfs_sync.h     |   51 -
 fs/xfs/linux-2.6/xfs_sysctl.c   |  252 ----
 fs/xfs/linux-2.6/xfs_sysctl.h   |  102 --
 fs/xfs/linux-2.6/xfs_trace.c    |   56 -
 fs/xfs/linux-2.6/xfs_trace.h    | 1746 ----------------------------
 fs/xfs/linux-2.6/xfs_vnode.h    |   64 --
 fs/xfs/linux-2.6/xfs_xattr.c    |  241 ----
 fs/xfs/mrlock.h                 |   90 ++
 fs/xfs/quota/xfs_dquot.c        | 1454 -----------------------
 fs/xfs/quota/xfs_dquot.h        |  137 ---
 fs/xfs/quota/xfs_dquot_item.c   |  529 ---------
 fs/xfs/quota/xfs_dquot_item.h   |   48 -
 fs/xfs/quota/xfs_qm.c           | 2416 ---------------------------------------
 fs/xfs/quota/xfs_qm.h           |  166 ---
 fs/xfs/quota/xfs_qm_bhv.c       |  176 ---
 fs/xfs/quota/xfs_qm_stats.c     |  105 --
 fs/xfs/quota/xfs_qm_stats.h     |   53 -
 fs/xfs/quota/xfs_qm_syscalls.c  |  906 ---------------
 fs/xfs/quota/xfs_quota_priv.h   |   53 -
 fs/xfs/quota/xfs_trans_dquot.c  |  890 --------------
 fs/xfs/support/uuid.c           |   63 -
 fs/xfs/support/uuid.h           |   29 -
 fs/xfs/time.h                   |   36 +
 fs/xfs/uuid.c                   |   63 +
 fs/xfs/uuid.h                   |   29 +
 fs/xfs/xfs_acl.c                |  420 +++++++
 fs/xfs/xfs_aops.c               | 1499 ++++++++++++++++++++++++
 fs/xfs/xfs_aops.h               |   68 ++
 fs/xfs/xfs_buf.c                | 1876 ++++++++++++++++++++++++++++++
 fs/xfs/xfs_buf.h                |  326 ++++++
 fs/xfs/xfs_discard.c            |  222 ++++
 fs/xfs/xfs_discard.h            |   10 +
 fs/xfs/xfs_dquot.c              | 1454 +++++++++++++++++++++++
 fs/xfs/xfs_dquot.h              |  137 +++
 fs/xfs/xfs_dquot_item.c         |  529 +++++++++
 fs/xfs/xfs_dquot_item.h         |   48 +
 fs/xfs/xfs_export.c             |  250 ++++
 fs/xfs/xfs_export.h             |   72 ++
 fs/xfs/xfs_file.c               | 1096 ++++++++++++++++++
 fs/xfs/xfs_fs_subr.c            |   96 ++
 fs/xfs/xfs_globals.c            |   43 +
 fs/xfs/xfs_ioctl.c              | 1556 +++++++++++++++++++++++++
 fs/xfs/xfs_ioctl.h              |   85 ++
 fs/xfs/xfs_ioctl32.c            |  672 +++++++++++
 fs/xfs/xfs_ioctl32.h            |  237 ++++
 fs/xfs/xfs_iops.c               | 1210 ++++++++++++++++++++
 fs/xfs/xfs_iops.h               |   30 +
 fs/xfs/xfs_linux.h              |  309 +++++
 fs/xfs/xfs_message.c            |  108 ++
 fs/xfs/xfs_message.h            |   39 +
 fs/xfs/xfs_qm.c                 | 2416 +++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_qm.h                 |  166 +++
 fs/xfs/xfs_qm_bhv.c             |  176 +++
 fs/xfs/xfs_qm_stats.c           |  105 ++
 fs/xfs/xfs_qm_stats.h           |   53 +
 fs/xfs/xfs_qm_syscalls.c        |  906 +++++++++++++++
 fs/xfs/xfs_quota_priv.h         |   53 +
 fs/xfs/xfs_quotaops.c           |  139 +++
 fs/xfs/xfs_stats.c              |  122 ++
 fs/xfs/xfs_stats.h              |  223 ++++
 fs/xfs/xfs_super.c              | 1773 ++++++++++++++++++++++++++++
 fs/xfs/xfs_super.h              |   87 ++
 fs/xfs/xfs_sync.c               | 1065 +++++++++++++++++
 fs/xfs/xfs_sync.h               |   51 +
 fs/xfs/xfs_sysctl.c             |  252 ++++
 fs/xfs/xfs_sysctl.h             |  102 ++
 fs/xfs/xfs_trace.c              |   56 +
 fs/xfs/xfs_trace.h              | 1746 ++++++++++++++++++++++++++++
 fs/xfs/xfs_trans_dquot.c        |  890 ++++++++++++++
 fs/xfs/xfs_vnode.h              |   64 ++
 fs/xfs/xfs_xattr.c              |  241 ++++
 kernel/sysctl_binary.c          |    2 +-
 kernel/sysctl_check.c           |    2 +-
 107 files changed, 23610 insertions(+), 23615 deletions(-)
 create mode 100644 fs/xfs/kmem.c
 create mode 100644 fs/xfs/kmem.h
 delete mode 100644 fs/xfs/linux-2.6/kmem.c
 delete mode 100644 fs/xfs/linux-2.6/kmem.h
 delete mode 100644 fs/xfs/linux-2.6/mrlock.h
 delete mode 100644 fs/xfs/linux-2.6/time.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_acl.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_aops.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_aops.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_buf.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_buf.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_discard.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_discard.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_export.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_export.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_file.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_fs_subr.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_globals.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl32.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl32.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_iops.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_iops.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_linux.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_message.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_message.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_quotaops.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_stats.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_stats.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_super.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_super.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_sync.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_sync.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_sysctl.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_sysctl.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_trace.c
 delete mode 100644 fs/xfs/linux-2.6/xfs_trace.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_vnode.h
 delete mode 100644 fs/xfs/linux-2.6/xfs_xattr.c
 create mode 100644 fs/xfs/mrlock.h
 delete mode 100644 fs/xfs/quota/xfs_dquot.c
 delete mode 100644 fs/xfs/quota/xfs_dquot.h
 delete mode 100644 fs/xfs/quota/xfs_dquot_item.c
 delete mode 100644 fs/xfs/quota/xfs_dquot_item.h
 delete mode 100644 fs/xfs/quota/xfs_qm.c
 delete mode 100644 fs/xfs/quota/xfs_qm.h
 delete mode 100644 fs/xfs/quota/xfs_qm_bhv.c
 delete mode 100644 fs/xfs/quota/xfs_qm_stats.c
 delete mode 100644 fs/xfs/quota/xfs_qm_stats.h
 delete mode 100644 fs/xfs/quota/xfs_qm_syscalls.c
 delete mode 100644 fs/xfs/quota/xfs_quota_priv.h
 delete mode 100644 fs/xfs/quota/xfs_trans_dquot.c
 delete mode 100644 fs/xfs/support/uuid.c
 delete mode 100644 fs/xfs/support/uuid.h
 create mode 100644 fs/xfs/time.h
 create mode 100644 fs/xfs/uuid.c
 create mode 100644 fs/xfs/uuid.h
 create mode 100644 fs/xfs/xfs_acl.c
 create mode 100644 fs/xfs/xfs_aops.c
 create mode 100644 fs/xfs/xfs_aops.h
 create mode 100644 fs/xfs/xfs_buf.c
 create mode 100644 fs/xfs/xfs_buf.h
 create mode 100644 fs/xfs/xfs_discard.c
 create mode 100644 fs/xfs/xfs_discard.h
 create mode 100644 fs/xfs/xfs_dquot.c
 create mode 100644 fs/xfs/xfs_dquot.h
 create mode 100644 fs/xfs/xfs_dquot_item.c
 create mode 100644 fs/xfs/xfs_dquot_item.h
 create mode 100644 fs/xfs/xfs_export.c
 create mode 100644 fs/xfs/xfs_export.h
 create mode 100644 fs/xfs/xfs_file.c
 create mode 100644 fs/xfs/xfs_fs_subr.c
 create mode 100644 fs/xfs/xfs_globals.c
 create mode 100644 fs/xfs/xfs_ioctl.c
 create mode 100644 fs/xfs/xfs_ioctl.h
 create mode 100644 fs/xfs/xfs_ioctl32.c
 create mode 100644 fs/xfs/xfs_ioctl32.h
 create mode 100644 fs/xfs/xfs_iops.c
 create mode 100644 fs/xfs/xfs_iops.h
 create mode 100644 fs/xfs/xfs_linux.h
 create mode 100644 fs/xfs/xfs_message.c
 create mode 100644 fs/xfs/xfs_message.h
 create mode 100644 fs/xfs/xfs_qm.c
 create mode 100644 fs/xfs/xfs_qm.h
 create mode 100644 fs/xfs/xfs_qm_bhv.c
 create mode 100644 fs/xfs/xfs_qm_stats.c
 create mode 100644 fs/xfs/xfs_qm_stats.h
 create mode 100644 fs/xfs/xfs_qm_syscalls.c
 create mode 100644 fs/xfs/xfs_quota_priv.h
 create mode 100644 fs/xfs/xfs_quotaops.c
 create mode 100644 fs/xfs/xfs_stats.c
 create mode 100644 fs/xfs/xfs_stats.h
 create mode 100644 fs/xfs/xfs_super.c
 create mode 100644 fs/xfs/xfs_super.h
 create mode 100644 fs/xfs/xfs_sync.c
 create mode 100644 fs/xfs/xfs_sync.h
 create mode 100644 fs/xfs/xfs_sysctl.c
 create mode 100644 fs/xfs/xfs_sysctl.h
 create mode 100644 fs/xfs/xfs_trace.c
 create mode 100644 fs/xfs/xfs_trace.h
 create mode 100644 fs/xfs/xfs_trans_dquot.c
 create mode 100644 fs/xfs/xfs_vnode.h
 create mode 100644 fs/xfs/xfs_xattr.c

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index b100cf4..ffce328 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,44 +16,51 @@
 # Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #
 
-ccflags-y := -I$(src) -I$(src)/linux-2.6 -I$(src)/quota -I$(src)/support
 ccflags-$(CONFIG_XFS_DEBUG) += -g
 
-XFS_LINUX := linux-2.6
-
 obj-$(CONFIG_XFS_FS)		+= xfs.o
 
-xfs-y				+= linux-2.6/xfs_trace.o
-
-xfs-$(CONFIG_XFS_QUOTA)		+= $(addprefix quota/, \
-				   xfs_dquot.o \
-				   xfs_dquot_item.o \
-				   xfs_trans_dquot.o \
-				   xfs_qm_syscalls.o \
-				   xfs_qm_bhv.o \
-				   xfs_qm.o)
-xfs-$(CONFIG_XFS_QUOTA)		+= linux-2.6/xfs_quotaops.o
-
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS)		+= quota/xfs_qm_stats.o
-endif
-
-xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)	+= $(XFS_LINUX)/xfs_acl.o
-xfs-$(CONFIG_PROC_FS)		+= $(XFS_LINUX)/xfs_stats.o
-xfs-$(CONFIG_SYSCTL)		+= $(XFS_LINUX)/xfs_sysctl.o
-xfs-$(CONFIG_COMPAT)		+= $(XFS_LINUX)/xfs_ioctl32.o
+# this one should be compiled first, as the tracing macros can easily blow up
+xfs-y				+= xfs_trace.o
 
+# highlevel code
+xfs-y				+= xfs_aops.o \
+				   xfs_bit.o \
+				   xfs_buf.o \
+				   xfs_dfrag.o \
+				   xfs_discard.o \
+				   xfs_error.o \
+				   xfs_export.o \
+				   xfs_file.o \
+				   xfs_filestream.o \
+				   xfs_fsops.o \
+				   xfs_fs_subr.o \
+				   xfs_globals.o \
+				   xfs_iget.o \
+				   xfs_ioctl.o \
+				   xfs_iomap.o \
+				   xfs_iops.o \
+				   xfs_itable.o \
+				   xfs_message.o \
+				   xfs_mru_cache.o \
+				   xfs_super.o \
+				   xfs_sync.o \
+				   xfs_xattr.o \
+				   xfs_rename.o \
+				   xfs_rw.o \
+				   xfs_utils.o \
+				   xfs_vnodeops.o \
+				   kmem.o \
+				   uuid.o
 
+# code shared with libxfs
 xfs-y				+= xfs_alloc.o \
 				   xfs_alloc_btree.o \
 				   xfs_attr.o \
 				   xfs_attr_leaf.o \
-				   xfs_bit.o \
 				   xfs_bmap.o \
 				   xfs_bmap_btree.o \
 				   xfs_btree.o \
-				   xfs_buf_item.o \
 				   xfs_da_btree.o \
 				   xfs_dir2.o \
 				   xfs_dir2_block.o \
@@ -61,49 +68,37 @@ xfs-y				+= xfs_alloc.o \
 				   xfs_dir2_leaf.o \
 				   xfs_dir2_node.o \
 				   xfs_dir2_sf.o \
-				   xfs_error.o \
-				   xfs_extfree_item.o \
-				   xfs_filestream.o \
-				   xfs_fsops.o \
 				   xfs_ialloc.o \
 				   xfs_ialloc_btree.o \
-				   xfs_iget.o \
 				   xfs_inode.o \
-				   xfs_inode_item.o \
-				   xfs_iomap.o \
-				   xfs_itable.o \
-				   xfs_dfrag.o \
-				   xfs_log.o \
-				   xfs_log_cil.o \
 				   xfs_log_recover.o \
 				   xfs_mount.o \
-				   xfs_mru_cache.o \
-				   xfs_rename.o \
-				   xfs_trans.o \
+				   xfs_trans.o
+
+# low-level transaction/log code
+xfs-y				+= xfs_log.o \
+				   xfs_log_cil.o \
+				   xfs_buf_item.o \
+				   xfs_extfree_item.o \
+				   xfs_inode_item.o \
 				   xfs_trans_ail.o \
 				   xfs_trans_buf.o \
 				   xfs_trans_extfree.o \
 				   xfs_trans_inode.o \
-				   xfs_utils.o \
-				   xfs_vnodeops.o \
-				   xfs_rw.o
-
-# Objects in linux/
-xfs-y				+= $(addprefix $(XFS_LINUX)/, \
-				   kmem.o \
-				   xfs_aops.o \
-				   xfs_buf.o \
-				   xfs_discard.o \
-				   xfs_export.o \
-				   xfs_file.o \
-				   xfs_fs_subr.o \
-				   xfs_globals.o \
-				   xfs_ioctl.o \
-				   xfs_iops.o \
-				   xfs_message.o \
-				   xfs_super.o \
-				   xfs_sync.o \
-				   xfs_xattr.o)
 
-# Objects in support/
-xfs-y				+= support/uuid.o
+# optional features
+xfs-$(CONFIG_XFS_QUOTA)		+= xfs_dquot.o \
+				   xfs_dquot_item.o \
+				   xfs_trans_dquot.o \
+				   xfs_qm_syscalls.o \
+				   xfs_qm_bhv.o \
+				   xfs_qm.o \
+				   xfs_quotaops.o
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS)		+= xfs_qm_stats.o
+endif
+xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
+xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
+xfs-$(CONFIG_PROC_FS)		+= xfs_stats.o
+xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
+xfs-$(CONFIG_COMPAT)		+= xfs_ioctl32.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
new file mode 100644
index 0000000..a907de5
--- /dev/null
+++ b/fs/xfs/kmem.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include "time.h"
+#include "kmem.h"
+#include "xfs_message.h"
+
+/*
+ * Greedy allocation.  May fail and may return vmalloced memory.
+ *
+ * Must be freed using kmem_free_large.
+ */
+void *
+kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
+{
+	void		*ptr;
+	size_t		kmsize = maxsize;
+
+	while (!(ptr = kmem_zalloc_large(kmsize))) {
+		if ((kmsize >>= 1) <= minsize)
+			kmsize = minsize;
+	}
+	if (ptr)
+		*size = kmsize;
+	return ptr;
+}
+
+void *
+kmem_alloc(size_t size, unsigned int __nocast flags)
+{
+	int	retries = 0;
+	gfp_t	lflags = kmem_flags_convert(flags);
+	void	*ptr;
+
+	do {
+		ptr = kmalloc(size, lflags);
+		if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+			return ptr;
+		if (!(++retries % 100))
+			xfs_err(NULL,
+		"possible memory allocation deadlock in %s (mode:0x%x)",
+					__func__, lflags);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
+	} while (1);
+}
+
+void *
+kmem_zalloc(size_t size, unsigned int __nocast flags)
+{
+	void	*ptr;
+
+	ptr = kmem_alloc(size, flags);
+	if (ptr)
+		memset((char *)ptr, 0, (int)size);
+	return ptr;
+}
+
+void
+kmem_free(const void *ptr)
+{
+	if (!is_vmalloc_addr(ptr)) {
+		kfree(ptr);
+	} else {
+		vfree(ptr);
+	}
+}
+
+void *
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
+	     unsigned int __nocast flags)
+{
+	void	*new;
+
+	new = kmem_alloc(newsize, flags);
+	if (ptr) {
+		if (new)
+			memcpy(new, ptr,
+				((oldsize < newsize) ? oldsize : newsize));
+		kmem_free(ptr);
+	}
+	return new;
+}
+
+void *
+kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
+{
+	int	retries = 0;
+	gfp_t	lflags = kmem_flags_convert(flags);
+	void	*ptr;
+
+	do {
+		ptr = kmem_cache_alloc(zone, lflags);
+		if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+			return ptr;
+		if (!(++retries % 100))
+			xfs_err(NULL,
+		"possible memory allocation deadlock in %s (mode:0x%x)",
+					__func__, lflags);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
+	} while (1);
+}
+
+void *
+kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
+{
+	void	*ptr;
+
+	ptr = kmem_zone_alloc(zone, flags);
+	if (ptr)
+		memset((char *)ptr, 0, kmem_cache_size(zone));
+	return ptr;
+}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
new file mode 100644
index 0000000..f7c8f7a
--- /dev/null
+++ b/fs/xfs/kmem.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+/*
+ * General memory allocation interfaces
+ */
+
+#define KM_SLEEP	0x0001u
+#define KM_NOSLEEP	0x0002u
+#define KM_NOFS		0x0004u
+#define KM_MAYFAIL	0x0008u
+
+/*
+ * We use a special process flag to avoid recursive callbacks into
+ * the filesystem during transactions.  We will also issue our own
+ * warnings, so we explicitly skip any generic ones (silly of us).
+ */
+static inline gfp_t
+kmem_flags_convert(unsigned int __nocast flags)
+{
+	gfp_t	lflags;
+
+	BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
+
+	if (flags & KM_NOSLEEP) {
+		lflags = GFP_ATOMIC | __GFP_NOWARN;
+	} else {
+		lflags = GFP_KERNEL | __GFP_NOWARN;
+		if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+			lflags &= ~__GFP_FS;
+	}
+	return lflags;
+}
+
+extern void *kmem_alloc(size_t, unsigned int __nocast);
+extern void *kmem_zalloc(size_t, unsigned int __nocast);
+extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
+extern void  kmem_free(const void *);
+
+static inline void *kmem_zalloc_large(size_t size)
+{
+	void *ptr;
+
+	ptr = vmalloc(size);
+	if (ptr)
+		memset(ptr, 0, size);
+	return ptr;
+}
+static inline void kmem_free_large(void *ptr)
+{
+	vfree(ptr);
+}
+
+extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
+
+/*
+ * Zone interfaces
+ */
+
+#define KM_ZONE_HWALIGN	SLAB_HWCACHE_ALIGN
+#define KM_ZONE_RECLAIM	SLAB_RECLAIM_ACCOUNT
+#define KM_ZONE_SPREAD	SLAB_MEM_SPREAD
+
+#define kmem_zone	kmem_cache
+#define kmem_zone_t	struct kmem_cache
+
+static inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+	return kmem_cache_create(zone_name, size, 0, 0, NULL);
+}
+
+static inline kmem_zone_t *
+kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
+		     void (*construct)(void *))
+{
+	return kmem_cache_create(zone_name, size, 0, flags, construct);
+}
+
+static inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+	kmem_cache_free(zone, ptr);
+}
+
+static inline void
+kmem_zone_destroy(kmem_zone_t *zone)
+{
+	if (zone)
+		kmem_cache_destroy(zone);
+}
+
+extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
+extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
+
+static inline int
+kmem_shake_allow(gfp_t gfp_mask)
+{
+	return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
deleted file mode 100644
index a907de5..0000000
--- a/fs/xfs/linux-2.6/kmem.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
-#include <linux/backing-dev.h>
-#include "time.h"
-#include "kmem.h"
-#include "xfs_message.h"
-
-/*
- * Greedy allocation.  May fail and may return vmalloced memory.
- *
- * Must be freed using kmem_free_large.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
-	void		*ptr;
-	size_t		kmsize = maxsize;
-
-	while (!(ptr = kmem_zalloc_large(kmsize))) {
-		if ((kmsize >>= 1) <= minsize)
-			kmsize = minsize;
-	}
-	if (ptr)
-		*size = kmsize;
-	return ptr;
-}
-
-void *
-kmem_alloc(size_t size, unsigned int __nocast flags)
-{
-	int	retries = 0;
-	gfp_t	lflags = kmem_flags_convert(flags);
-	void	*ptr;
-
-	do {
-		ptr = kmalloc(size, lflags);
-		if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
-			return ptr;
-		if (!(++retries % 100))
-			xfs_err(NULL,
-		"possible memory allocation deadlock in %s (mode:0x%x)",
-					__func__, lflags);
-		congestion_wait(BLK_RW_ASYNC, HZ/50);
-	} while (1);
-}
-
-void *
-kmem_zalloc(size_t size, unsigned int __nocast flags)
-{
-	void	*ptr;
-
-	ptr = kmem_alloc(size, flags);
-	if (ptr)
-		memset((char *)ptr, 0, (int)size);
-	return ptr;
-}
-
-void
-kmem_free(const void *ptr)
-{
-	if (!is_vmalloc_addr(ptr)) {
-		kfree(ptr);
-	} else {
-		vfree(ptr);
-	}
-}
-
-void *
-kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
-	     unsigned int __nocast flags)
-{
-	void	*new;
-
-	new = kmem_alloc(newsize, flags);
-	if (ptr) {
-		if (new)
-			memcpy(new, ptr,
-				((oldsize < newsize) ? oldsize : newsize));
-		kmem_free(ptr);
-	}
-	return new;
-}
-
-void *
-kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
-	int	retries = 0;
-	gfp_t	lflags = kmem_flags_convert(flags);
-	void	*ptr;
-
-	do {
-		ptr = kmem_cache_alloc(zone, lflags);
-		if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
-			return ptr;
-		if (!(++retries % 100))
-			xfs_err(NULL,
-		"possible memory allocation deadlock in %s (mode:0x%x)",
-					__func__, lflags);
-		congestion_wait(BLK_RW_ASYNC, HZ/50);
-	} while (1);
-}
-
-void *
-kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
-	void	*ptr;
-
-	ptr = kmem_zone_alloc(zone, flags);
-	if (ptr)
-		memset((char *)ptr, 0, kmem_cache_size(zone));
-	return ptr;
-}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
deleted file mode 100644
index f7c8f7a..0000000
--- a/fs/xfs/linux-2.6/kmem.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_KMEM_H__
-#define __XFS_SUPPORT_KMEM_H__
-
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-
-/*
- * General memory allocation interfaces
- */
-
-#define KM_SLEEP	0x0001u
-#define KM_NOSLEEP	0x0002u
-#define KM_NOFS		0x0004u
-#define KM_MAYFAIL	0x0008u
-
-/*
- * We use a special process flag to avoid recursive callbacks into
- * the filesystem during transactions.  We will also issue our own
- * warnings, so we explicitly skip any generic ones (silly of us).
- */
-static inline gfp_t
-kmem_flags_convert(unsigned int __nocast flags)
-{
-	gfp_t	lflags;
-
-	BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
-
-	if (flags & KM_NOSLEEP) {
-		lflags = GFP_ATOMIC | __GFP_NOWARN;
-	} else {
-		lflags = GFP_KERNEL | __GFP_NOWARN;
-		if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
-			lflags &= ~__GFP_FS;
-	}
-	return lflags;
-}
-
-extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_zalloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
-extern void  kmem_free(const void *);
-
-static inline void *kmem_zalloc_large(size_t size)
-{
-	void *ptr;
-
-	ptr = vmalloc(size);
-	if (ptr)
-		memset(ptr, 0, size);
-	return ptr;
-}
-static inline void kmem_free_large(void *ptr)
-{
-	vfree(ptr);
-}
-
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
-/*
- * Zone interfaces
- */
-
-#define KM_ZONE_HWALIGN	SLAB_HWCACHE_ALIGN
-#define KM_ZONE_RECLAIM	SLAB_RECLAIM_ACCOUNT
-#define KM_ZONE_SPREAD	SLAB_MEM_SPREAD
-
-#define kmem_zone	kmem_cache
-#define kmem_zone_t	struct kmem_cache
-
-static inline kmem_zone_t *
-kmem_zone_init(int size, char *zone_name)
-{
-	return kmem_cache_create(zone_name, size, 0, 0, NULL);
-}
-
-static inline kmem_zone_t *
-kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
-		     void (*construct)(void *))
-{
-	return kmem_cache_create(zone_name, size, 0, flags, construct);
-}
-
-static inline void
-kmem_zone_free(kmem_zone_t *zone, void *ptr)
-{
-	kmem_cache_free(zone, ptr);
-}
-
-static inline void
-kmem_zone_destroy(kmem_zone_t *zone)
-{
-	if (zone)
-		kmem_cache_destroy(zone);
-}
-
-extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
-extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-
-static inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
-	return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
-}
-
-#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
deleted file mode 100644
index ff6a198..0000000
--- a/fs/xfs/linux-2.6/mrlock.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_MRLOCK_H__
-#define __XFS_SUPPORT_MRLOCK_H__
-
-#include <linux/rwsem.h>
-
-typedef struct {
-	struct rw_semaphore	mr_lock;
-#ifdef DEBUG
-	int			mr_writer;
-#endif
-} mrlock_t;
-
-#ifdef DEBUG
-#define mrinit(mrp, name)	\
-	do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
-#else
-#define mrinit(mrp, name)	\
-	do { init_rwsem(&(mrp)->mr_lock); } while (0)
-#endif
-
-#define mrlock_init(mrp, t,n,s)	mrinit(mrp, n)
-#define mrfree(mrp)		do { } while (0)
-
-static inline void mraccess_nested(mrlock_t *mrp, int subclass)
-{
-	down_read_nested(&mrp->mr_lock, subclass);
-}
-
-static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
-{
-	down_write_nested(&mrp->mr_lock, subclass);
-#ifdef DEBUG
-	mrp->mr_writer = 1;
-#endif
-}
-
-static inline int mrtryaccess(mrlock_t *mrp)
-{
-	return down_read_trylock(&mrp->mr_lock);
-}
-
-static inline int mrtryupdate(mrlock_t *mrp)
-{
-	if (!down_write_trylock(&mrp->mr_lock))
-		return 0;
-#ifdef DEBUG
-	mrp->mr_writer = 1;
-#endif
-	return 1;
-}
-
-static inline void mrunlock_excl(mrlock_t *mrp)
-{
-#ifdef DEBUG
-	mrp->mr_writer = 0;
-#endif
-	up_write(&mrp->mr_lock);
-}
-
-static inline void mrunlock_shared(mrlock_t *mrp)
-{
-	up_read(&mrp->mr_lock);
-}
-
-static inline void mrdemote(mrlock_t *mrp)
-{
-#ifdef DEBUG
-	mrp->mr_writer = 0;
-#endif
-	downgrade_write(&mrp->mr_lock);
-}
-
-#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
deleted file mode 100644
index 387e695..0000000
--- a/fs/xfs/linux-2.6/time.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_TIME_H__
-#define __XFS_SUPPORT_TIME_H__
-
-#include <linux/sched.h>
-#include <linux/time.h>
-
-typedef struct timespec timespec_t;
-
-static inline void delay(long ticks)
-{
-	schedule_timeout_uninterruptible(ticks);
-}
-
-static inline void nanotime(struct timespec *tvp)
-{
-	*tvp = CURRENT_TIME;
-}
-
-#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
deleted file mode 100644
index b6c4b37..0000000
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-
-
-/*
- * Locking scheme:
- *  - all ACL updates are protected by inode->i_mutex, which is taken before
- *    calling into this file.
- */
-
-STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
-{
-	struct posix_acl_entry *acl_e;
-	struct posix_acl *acl;
-	struct xfs_acl_entry *ace;
-	int count, i;
-
-	count = be32_to_cpu(aclp->acl_cnt);
-
-	acl = posix_acl_alloc(count, GFP_KERNEL);
-	if (!acl)
-		return ERR_PTR(-ENOMEM);
-
-	for (i = 0; i < count; i++) {
-		acl_e = &acl->a_entries[i];
-		ace = &aclp->acl_entry[i];
-
-		/*
-		 * The tag is 32 bits on disk and 16 bits in core.
-		 *
-		 * Because every access to it goes through the core
-		 * format first this is not a problem.
-		 */
-		acl_e->e_tag = be32_to_cpu(ace->ae_tag);
-		acl_e->e_perm = be16_to_cpu(ace->ae_perm);
-
-		switch (acl_e->e_tag) {
-		case ACL_USER:
-		case ACL_GROUP:
-			acl_e->e_id = be32_to_cpu(ace->ae_id);
-			break;
-		case ACL_USER_OBJ:
-		case ACL_GROUP_OBJ:
-		case ACL_MASK:
-		case ACL_OTHER:
-			acl_e->e_id = ACL_UNDEFINED_ID;
-			break;
-		default:
-			goto fail;
-		}
-	}
-	return acl;
-
-fail:
-	posix_acl_release(acl);
-	return ERR_PTR(-EINVAL);
-}
-
-STATIC void
-xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
-{
-	const struct posix_acl_entry *acl_e;
-	struct xfs_acl_entry *ace;
-	int i;
-
-	aclp->acl_cnt = cpu_to_be32(acl->a_count);
-	for (i = 0; i < acl->a_count; i++) {
-		ace = &aclp->acl_entry[i];
-		acl_e = &acl->a_entries[i];
-
-		ace->ae_tag = cpu_to_be32(acl_e->e_tag);
-		ace->ae_id = cpu_to_be32(acl_e->e_id);
-		ace->ae_perm = cpu_to_be16(acl_e->e_perm);
-	}
-}
-
-struct posix_acl *
-xfs_get_acl(struct inode *inode, int type)
-{
-	struct xfs_inode *ip = XFS_I(inode);
-	struct posix_acl *acl;
-	struct xfs_acl *xfs_acl;
-	int len = sizeof(struct xfs_acl);
-	unsigned char *ea_name;
-	int error;
-
-	acl = get_cached_acl(inode, type);
-	if (acl != ACL_NOT_CACHED)
-		return acl;
-
-	trace_xfs_get_acl(ip);
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		ea_name = SGI_ACL_FILE;
-		break;
-	case ACL_TYPE_DEFAULT:
-		ea_name = SGI_ACL_DEFAULT;
-		break;
-	default:
-		BUG();
-	}
-
-	/*
-	 * If we have a cached ACLs value just return it, not need to
-	 * go out to the disk.
-	 */
-
-	xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-	if (!xfs_acl)
-		return ERR_PTR(-ENOMEM);
-
-	error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
-							&len, ATTR_ROOT);
-	if (error) {
-		/*
-		 * If the attribute doesn't exist make sure we have a negative
-		 * cache entry, for any other error assume it is transient and
-		 * leave the cache entry as ACL_NOT_CACHED.
-		 */
-		if (error == -ENOATTR) {
-			acl = NULL;
-			goto out_update_cache;
-		}
-		goto out;
-	}
-
-	acl = xfs_acl_from_disk(xfs_acl);
-	if (IS_ERR(acl))
-		goto out;
-
- out_update_cache:
-	set_cached_acl(inode, type, acl);
- out:
-	kfree(xfs_acl);
-	return acl;
-}
-
-STATIC int
-xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
-	struct xfs_inode *ip = XFS_I(inode);
-	unsigned char *ea_name;
-	int error;
-
-	if (S_ISLNK(inode->i_mode))
-		return -EOPNOTSUPP;
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		ea_name = SGI_ACL_FILE;
-		break;
-	case ACL_TYPE_DEFAULT:
-		if (!S_ISDIR(inode->i_mode))
-			return acl ? -EACCES : 0;
-		ea_name = SGI_ACL_DEFAULT;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (acl) {
-		struct xfs_acl *xfs_acl;
-		int len;
-
-		xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-		if (!xfs_acl)
-			return -ENOMEM;
-
-		xfs_acl_to_disk(xfs_acl, acl);
-		len = sizeof(struct xfs_acl) -
-			(sizeof(struct xfs_acl_entry) *
-			 (XFS_ACL_MAX_ENTRIES - acl->a_count));
-
-		error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
-				len, ATTR_ROOT);
-
-		kfree(xfs_acl);
-	} else {
-		/*
-		 * A NULL ACL argument means we want to remove the ACL.
-		 */
-		error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
-
-		/*
-		 * If the attribute didn't exist to start with that's fine.
-		 */
-		if (error == -ENOATTR)
-			error = 0;
-	}
-
-	if (!error)
-		set_cached_acl(inode, type, acl);
-	return error;
-}
-
-static int
-xfs_set_mode(struct inode *inode, umode_t mode)
-{
-	int error = 0;
-
-	if (mode != inode->i_mode) {
-		struct iattr iattr;
-
-		iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
-		iattr.ia_mode = mode;
-		iattr.ia_ctime = current_fs_time(inode->i_sb);
-
-		error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
-	}
-
-	return error;
-}
-
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
-	int len = sizeof(struct xfs_acl);
-
-	return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
-			    ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
-	return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
-	if (!S_ISDIR(inode->i_mode))
-		return 0;
-	return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
-/*
- * No need for i_mutex because the inode is not yet exposed to the VFS.
- */
-int
-xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
-{
-	umode_t mode = inode->i_mode;
-	int error = 0, inherit = 0;
-
-	if (S_ISDIR(inode->i_mode)) {
-		error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
-		if (error)
-			goto out;
-	}
-
-	error = posix_acl_create(&acl, GFP_KERNEL, &mode);
-	if (error < 0)
-		return error;
-
-	/*
-	 * If posix_acl_create returns a positive value we need to
-	 * inherit a permission that can't be represented using the Unix
-	 * mode bits and we actually need to set an ACL.
-	 */
-	if (error > 0)
-		inherit = 1;
-
-	error = xfs_set_mode(inode, mode);
-	if (error)
-		goto out;
-
-	if (inherit)
-		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-
-out:
-	posix_acl_release(acl);
-	return error;
-}
-
-int
-xfs_acl_chmod(struct inode *inode)
-{
-	struct posix_acl *acl;
-	int error;
-
-	if (S_ISLNK(inode->i_mode))
-		return -EOPNOTSUPP;
-
-	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
-	if (IS_ERR(acl) || !acl)
-		return PTR_ERR(acl);
-
-	error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
-	if (error)
-		return error;
-
-	error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-	posix_acl_release(acl);
-	return error;
-}
-
-static int
-xfs_xattr_acl_get(struct dentry *dentry, const char *name,
-		void *value, size_t size, int type)
-{
-	struct posix_acl *acl;
-	int error;
-
-	acl = xfs_get_acl(dentry->d_inode, type);
-	if (IS_ERR(acl))
-		return PTR_ERR(acl);
-	if (acl == NULL)
-		return -ENODATA;
-
-	error = posix_acl_to_xattr(acl, value, size);
-	posix_acl_release(acl);
-
-	return error;
-}
-
-static int
-xfs_xattr_acl_set(struct dentry *dentry, const char *name,
-		const void *value, size_t size, int flags, int type)
-{
-	struct inode *inode = dentry->d_inode;
-	struct posix_acl *acl = NULL;
-	int error = 0;
-
-	if (flags & XATTR_CREATE)
-		return -EINVAL;
-	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
-		return value ? -EACCES : 0;
-	if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
-		return -EPERM;
-
-	if (!value)
-		goto set_acl;
-
-	acl = posix_acl_from_xattr(value, size);
-	if (!acl) {
-		/*
-		 * acl_set_file(3) may request that we set default ACLs with
-		 * zero length -- defend (gracefully) against that here.
-		 */
-		goto out;
-	}
-	if (IS_ERR(acl)) {
-		error = PTR_ERR(acl);
-		goto out;
-	}
-
-	error = posix_acl_valid(acl);
-	if (error)
-		goto out_release;
-
-	error = -EINVAL;
-	if (acl->a_count > XFS_ACL_MAX_ENTRIES)
-		goto out_release;
-
-	if (type == ACL_TYPE_ACCESS) {
-		umode_t mode = inode->i_mode;
-		error = posix_acl_equiv_mode(acl, &mode);
-
-		if (error <= 0) {
-			posix_acl_release(acl);
-			acl = NULL;
-
-			if (error < 0)
-				return error;
-		}
-
-		error = xfs_set_mode(inode, mode);
-		if (error)
-			goto out_release;
-	}
-
- set_acl:
-	error = xfs_set_acl(inode, type, acl);
- out_release:
-	posix_acl_release(acl);
- out:
-	return error;
-}
-
-const struct xattr_handler xfs_xattr_acl_access_handler = {
-	.prefix	= POSIX_ACL_XATTR_ACCESS,
-	.flags	= ACL_TYPE_ACCESS,
-	.get	= xfs_xattr_acl_get,
-	.set	= xfs_xattr_acl_set,
-};
-
-const struct xattr_handler xfs_xattr_acl_default_handler = {
-	.prefix	= POSIX_ACL_XATTR_DEFAULT,
-	.flags	= ACL_TYPE_DEFAULT,
-	.get	= xfs_xattr_acl_get,
-	.set	= xfs_xattr_acl_set,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
deleted file mode 100644
index 63e971e..0000000
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_iomap.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include "xfs_bmap.h"
-#include <linux/gfp.h>
-#include <linux/mpage.h>
-#include <linux/pagevec.h>
-#include <linux/writeback.h>
-
-
-/*
- * Prime number of hash buckets since address is used as the key.
- */
-#define NVSYNC		37
-#define to_ioend_wq(v)	(&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
-static wait_queue_head_t xfs_ioend_wq[NVSYNC];
-
-void __init
-xfs_ioend_init(void)
-{
-	int i;
-
-	for (i = 0; i < NVSYNC; i++)
-		init_waitqueue_head(&xfs_ioend_wq[i]);
-}
-
-void
-xfs_ioend_wait(
-	xfs_inode_t	*ip)
-{
-	wait_queue_head_t *wq = to_ioend_wq(ip);
-
-	wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
-}
-
-STATIC void
-xfs_ioend_wake(
-	xfs_inode_t	*ip)
-{
-	if (atomic_dec_and_test(&ip->i_iocount))
-		wake_up(to_ioend_wq(ip));
-}
-
-void
-xfs_count_page_state(
-	struct page		*page,
-	int			*delalloc,
-	int			*unwritten)
-{
-	struct buffer_head	*bh, *head;
-
-	*delalloc = *unwritten = 0;
-
-	bh = head = page_buffers(page);
-	do {
-		if (buffer_unwritten(bh))
-			(*unwritten) = 1;
-		else if (buffer_delay(bh))
-			(*delalloc) = 1;
-	} while ((bh = bh->b_this_page) != head);
-}
-
-STATIC struct block_device *
-xfs_find_bdev_for_inode(
-	struct inode		*inode)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-
-	if (XFS_IS_REALTIME_INODE(ip))
-		return mp->m_rtdev_targp->bt_bdev;
-	else
-		return mp->m_ddev_targp->bt_bdev;
-}
-
-/*
- * We're now finished for good with this ioend structure.
- * Update the page state via the associated buffer_heads,
- * release holds on the inode and bio, and finally free
- * up memory.  Do not use the ioend after this.
- */
-STATIC void
-xfs_destroy_ioend(
-	xfs_ioend_t		*ioend)
-{
-	struct buffer_head	*bh, *next;
-	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
-
-	for (bh = ioend->io_buffer_head; bh; bh = next) {
-		next = bh->b_private;
-		bh->b_end_io(bh, !ioend->io_error);
-	}
-
-	/*
-	 * Volume managers supporting multiple paths can send back ENODEV
-	 * when the final path disappears.  In this case continuing to fill
-	 * the page cache with dirty data which cannot be written out is
-	 * evil, so prevent that.
-	 */
-	if (unlikely(ioend->io_error == -ENODEV)) {
-		xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
-				      __FILE__, __LINE__);
-	}
-
-	xfs_ioend_wake(ip);
-	mempool_free(ioend, xfs_ioend_pool);
-}
-
-/*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
-	xfs_ioend_t		*ioend)
-{
-	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
-	xfs_fsize_t		isize;
-	xfs_fsize_t		bsize;
-
-	bsize = ioend->io_offset + ioend->io_size;
-	isize = MAX(ip->i_size, ip->i_new_size);
-	isize = MIN(isize, bsize);
-	return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
- * Update on-disk file size now that data has been written to disk.  The
- * current in-memory file size is i_size.  If a write is beyond eof i_new_size
- * will be the intended file size until i_size is updated.  If this write does
- * not extend all the way to the valid file size then restrict this update to
- * the end of the write.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
- */
-STATIC int
-xfs_setfilesize(
-	xfs_ioend_t		*ioend)
-{
-	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
-	xfs_fsize_t		isize;
-
-	if (unlikely(ioend->io_error))
-		return 0;
-
-	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
-		return EAGAIN;
-
-	isize = xfs_ioend_new_eof(ioend);
-	if (isize) {
-		trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
-		ip->i_d.di_size = isize;
-		xfs_mark_inode_dirty(ip);
-	}
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return 0;
-}
-
-/*
- * Schedule IO completion handling on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend(
-	struct xfs_ioend	*ioend)
-{
-	if (atomic_dec_and_test(&ioend->io_remaining)) {
-		if (ioend->io_type == IO_UNWRITTEN)
-			queue_work(xfsconvertd_workqueue, &ioend->io_work);
-		else
-			queue_work(xfsdatad_workqueue, &ioend->io_work);
-	}
-}
-
-/*
- * IO write completion.
- */
-STATIC void
-xfs_end_io(
-	struct work_struct *work)
-{
-	xfs_ioend_t	*ioend = container_of(work, xfs_ioend_t, io_work);
-	struct xfs_inode *ip = XFS_I(ioend->io_inode);
-	int		error = 0;
-
-	/*
-	 * For unwritten extents we need to issue transactions to convert a
-	 * range to normal written extens after the data I/O has finished.
-	 */
-	if (ioend->io_type == IO_UNWRITTEN &&
-	    likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
-
-		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
-						 ioend->io_size);
-		if (error)
-			ioend->io_error = error;
-	}
-
-	/*
-	 * We might have to update the on-disk file size after extending
-	 * writes.
-	 */
-	error = xfs_setfilesize(ioend);
-	ASSERT(!error || error == EAGAIN);
-
-	/*
-	 * If we didn't complete processing of the ioend, requeue it to the
-	 * tail of the workqueue for another attempt later. Otherwise destroy
-	 * it.
-	 */
-	if (error == EAGAIN) {
-		atomic_inc(&ioend->io_remaining);
-		xfs_finish_ioend(ioend);
-		/* ensure we don't spin on blocked ioends */
-		delay(1);
-	} else {
-		if (ioend->io_iocb)
-			aio_complete(ioend->io_iocb, ioend->io_result, 0);
-		xfs_destroy_ioend(ioend);
-	}
-}
-
-/*
- * Call IO completion handling in caller context on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend_sync(
-	struct xfs_ioend	*ioend)
-{
-	if (atomic_dec_and_test(&ioend->io_remaining))
-		xfs_end_io(&ioend->io_work);
-}
-
-/*
- * Allocate and initialise an IO completion structure.
- * We need to track unwritten extent write completion here initially.
- * We'll need to extend this for updating the ondisk inode size later
- * (vs. incore size).
- */
-STATIC xfs_ioend_t *
-xfs_alloc_ioend(
-	struct inode		*inode,
-	unsigned int		type)
-{
-	xfs_ioend_t		*ioend;
-
-	ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
-
-	/*
-	 * Set the count to 1 initially, which will prevent an I/O
-	 * completion callback from happening before we have started
-	 * all the I/O from calling the completion routine too early.
-	 */
-	atomic_set(&ioend->io_remaining, 1);
-	ioend->io_error = 0;
-	ioend->io_list = NULL;
-	ioend->io_type = type;
-	ioend->io_inode = inode;
-	ioend->io_buffer_head = NULL;
-	ioend->io_buffer_tail = NULL;
-	atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
-	ioend->io_offset = 0;
-	ioend->io_size = 0;
-	ioend->io_iocb = NULL;
-	ioend->io_result = 0;
-
-	INIT_WORK(&ioend->io_work, xfs_end_io);
-	return ioend;
-}
-
-STATIC int
-xfs_map_blocks(
-	struct inode		*inode,
-	loff_t			offset,
-	struct xfs_bmbt_irec	*imap,
-	int			type,
-	int			nonblocking)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	ssize_t			count = 1 << inode->i_blkbits;
-	xfs_fileoff_t		offset_fsb, end_fsb;
-	int			error = 0;
-	int			bmapi_flags = XFS_BMAPI_ENTIRE;
-	int			nimaps = 1;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -XFS_ERROR(EIO);
-
-	if (type == IO_UNWRITTEN)
-		bmapi_flags |= XFS_BMAPI_IGSTATE;
-
-	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-		if (nonblocking)
-			return -XFS_ERROR(EAGAIN);
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
-	}
-
-	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
-	       (ip->i_df.if_flags & XFS_IFEXTENTS));
-	ASSERT(offset <= mp->m_maxioffset);
-
-	if (offset + count > mp->m_maxioffset)
-		count = mp->m_maxioffset - offset;
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
-			  bmapi_flags,  NULL, 0, imap, &nimaps, NULL);
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-	if (error)
-		return -XFS_ERROR(error);
-
-	if (type == IO_DELALLOC &&
-	    (!nimaps || isnullstartblock(imap->br_startblock))) {
-		error = xfs_iomap_write_allocate(ip, offset, count, imap);
-		if (!error)
-			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
-		return -XFS_ERROR(error);
-	}
-
-#ifdef DEBUG
-	if (type == IO_UNWRITTEN) {
-		ASSERT(nimaps);
-		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-	}
-#endif
-	if (nimaps)
-		trace_xfs_map_blocks_found(ip, offset, count, type, imap);
-	return 0;
-}
-
-STATIC int
-xfs_imap_valid(
-	struct inode		*inode,
-	struct xfs_bmbt_irec	*imap,
-	xfs_off_t		offset)
-{
-	offset >>= inode->i_blkbits;
-
-	return offset >= imap->br_startoff &&
-		offset < imap->br_startoff + imap->br_blockcount;
-}
-
-/*
- * BIO completion handler for buffered IO.
- */
-STATIC void
-xfs_end_bio(
-	struct bio		*bio,
-	int			error)
-{
-	xfs_ioend_t		*ioend = bio->bi_private;
-
-	ASSERT(atomic_read(&bio->bi_cnt) >= 1);
-	ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
-
-	/* Toss bio and pass work off to an xfsdatad thread */
-	bio->bi_private = NULL;
-	bio->bi_end_io = NULL;
-	bio_put(bio);
-
-	xfs_finish_ioend(ioend);
-}
-
-STATIC void
-xfs_submit_ioend_bio(
-	struct writeback_control *wbc,
-	xfs_ioend_t		*ioend,
-	struct bio		*bio)
-{
-	atomic_inc(&ioend->io_remaining);
-	bio->bi_private = ioend;
-	bio->bi_end_io = xfs_end_bio;
-
-	/*
-	 * If the I/O is beyond EOF we mark the inode dirty immediately
-	 * but don't update the inode size until I/O completion.
-	 */
-	if (xfs_ioend_new_eof(ioend))
-		xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
-
-	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
-}
-
-STATIC struct bio *
-xfs_alloc_ioend_bio(
-	struct buffer_head	*bh)
-{
-	int			nvecs = bio_get_nr_vecs(bh->b_bdev);
-	struct bio		*bio = bio_alloc(GFP_NOIO, nvecs);
-
-	ASSERT(bio->bi_private == NULL);
-	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-	bio->bi_bdev = bh->b_bdev;
-	return bio;
-}
-
-STATIC void
-xfs_start_buffer_writeback(
-	struct buffer_head	*bh)
-{
-	ASSERT(buffer_mapped(bh));
-	ASSERT(buffer_locked(bh));
-	ASSERT(!buffer_delay(bh));
-	ASSERT(!buffer_unwritten(bh));
-
-	mark_buffer_async_write(bh);
-	set_buffer_uptodate(bh);
-	clear_buffer_dirty(bh);
-}
-
-STATIC void
-xfs_start_page_writeback(
-	struct page		*page,
-	int			clear_dirty,
-	int			buffers)
-{
-	ASSERT(PageLocked(page));
-	ASSERT(!PageWriteback(page));
-	if (clear_dirty)
-		clear_page_dirty_for_io(page);
-	set_page_writeback(page);
-	unlock_page(page);
-	/* If no buffers on the page are to be written, finish it here */
-	if (!buffers)
-		end_page_writeback(page);
-}
-
-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
-{
-	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-}
-
-/*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
- */
-STATIC void
-xfs_submit_ioend(
-	struct writeback_control *wbc,
-	xfs_ioend_t		*ioend)
-{
-	xfs_ioend_t		*head = ioend;
-	xfs_ioend_t		*next;
-	struct buffer_head	*bh;
-	struct bio		*bio;
-	sector_t		lastblock = 0;
-
-	/* Pass 1 - start writeback */
-	do {
-		next = ioend->io_list;
-		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
-			xfs_start_buffer_writeback(bh);
-	} while ((ioend = next) != NULL);
-
-	/* Pass 2 - submit I/O */
-	ioend = head;
-	do {
-		next = ioend->io_list;
-		bio = NULL;
-
-		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
-			if (!bio) {
- retry:
-				bio = xfs_alloc_ioend_bio(bh);
-			} else if (bh->b_blocknr != lastblock + 1) {
-				xfs_submit_ioend_bio(wbc, ioend, bio);
-				goto retry;
-			}
-
-			if (bio_add_buffer(bio, bh) != bh->b_size) {
-				xfs_submit_ioend_bio(wbc, ioend, bio);
-				goto retry;
-			}
-
-			lastblock = bh->b_blocknr;
-		}
-		if (bio)
-			xfs_submit_ioend_bio(wbc, ioend, bio);
-		xfs_finish_ioend(ioend);
-	} while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too.  Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
-	xfs_ioend_t		*ioend)
-{
-	xfs_ioend_t		*next;
-	struct buffer_head	*bh, *next_bh;
-
-	do {
-		next = ioend->io_list;
-		bh = ioend->io_buffer_head;
-		do {
-			next_bh = bh->b_private;
-			clear_buffer_async_write(bh);
-			unlock_buffer(bh);
-		} while ((bh = next_bh) != NULL);
-
-		xfs_ioend_wake(XFS_I(ioend->io_inode));
-		mempool_free(ioend, xfs_ioend_pool);
-	} while ((ioend = next) != NULL);
-}
-
-/*
- * Test to see if we've been building up a completion structure for
- * earlier buffers -- if so, we try to append to this ioend if we
- * can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
- */
-STATIC void
-xfs_add_to_ioend(
-	struct inode		*inode,
-	struct buffer_head	*bh,
-	xfs_off_t		offset,
-	unsigned int		type,
-	xfs_ioend_t		**result,
-	int			need_ioend)
-{
-	xfs_ioend_t		*ioend = *result;
-
-	if (!ioend || need_ioend || type != ioend->io_type) {
-		xfs_ioend_t	*previous = *result;
-
-		ioend = xfs_alloc_ioend(inode, type);
-		ioend->io_offset = offset;
-		ioend->io_buffer_head = bh;
-		ioend->io_buffer_tail = bh;
-		if (previous)
-			previous->io_list = ioend;
-		*result = ioend;
-	} else {
-		ioend->io_buffer_tail->b_private = bh;
-		ioend->io_buffer_tail = bh;
-	}
-
-	bh->b_private = NULL;
-	ioend->io_size += bh->b_size;
-}
-
-STATIC void
-xfs_map_buffer(
-	struct inode		*inode,
-	struct buffer_head	*bh,
-	struct xfs_bmbt_irec	*imap,
-	xfs_off_t		offset)
-{
-	sector_t		bn;
-	struct xfs_mount	*m = XFS_I(inode)->i_mount;
-	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
-	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
-
-	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-	bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
-	      ((offset - iomap_offset) >> inode->i_blkbits);
-
-	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
-
-	bh->b_blocknr = bn;
-	set_buffer_mapped(bh);
-}
-
-STATIC void
-xfs_map_at_offset(
-	struct inode		*inode,
-	struct buffer_head	*bh,
-	struct xfs_bmbt_irec	*imap,
-	xfs_off_t		offset)
-{
-	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-	xfs_map_buffer(inode, bh, imap, offset);
-	set_buffer_mapped(bh);
-	clear_buffer_delay(bh);
-	clear_buffer_unwritten(bh);
-}
-
-/*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
- */
-STATIC int
-xfs_is_delayed_page(
-	struct page		*page,
-	unsigned int		type)
-{
-	if (PageWriteback(page))
-		return 0;
-
-	if (page->mapping && page_has_buffers(page)) {
-		struct buffer_head	*bh, *head;
-		int			acceptable = 0;
-
-		bh = head = page_buffers(page);
-		do {
-			if (buffer_unwritten(bh))
-				acceptable = (type == IO_UNWRITTEN);
-			else if (buffer_delay(bh))
-				acceptable = (type == IO_DELALLOC);
-			else if (buffer_dirty(bh) && buffer_mapped(bh))
-				acceptable = (type == IO_OVERWRITE);
-			else
-				break;
-		} while ((bh = bh->b_this_page) != head);
-
-		if (acceptable)
-			return 1;
-	}
-
-	return 0;
-}
-
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
-	struct inode		*inode,
-	struct page		*page,
-	loff_t			tindex,
-	struct xfs_bmbt_irec	*imap,
-	xfs_ioend_t		**ioendp,
-	struct writeback_control *wbc)
-{
-	struct buffer_head	*bh, *head;
-	xfs_off_t		end_offset;
-	unsigned long		p_offset;
-	unsigned int		type;
-	int			len, page_dirty;
-	int			count = 0, done = 0, uptodate = 1;
- 	xfs_off_t		offset = page_offset(page);
-
-	if (page->index != tindex)
-		goto fail;
-	if (!trylock_page(page))
-		goto fail;
-	if (PageWriteback(page))
-		goto fail_unlock_page;
-	if (page->mapping != inode->i_mapping)
-		goto fail_unlock_page;
-	if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
-		goto fail_unlock_page;
-
-	/*
-	 * page_dirty is initially a count of buffers on the page before
-	 * EOF and is decremented as we move each into a cleanable state.
-	 *
-	 * Derivation:
-	 *
-	 * End offset is the highest offset that this page should represent.
-	 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
-	 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
-	 * hence give us the correct page_dirty count. On any other page,
-	 * it will be zero and in that case we need page_dirty to be the
-	 * count of buffers on the page.
-	 */
-	end_offset = min_t(unsigned long long,
-			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-			i_size_read(inode));
-
-	len = 1 << inode->i_blkbits;
-	p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-					PAGE_CACHE_SIZE);
-	p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
-	page_dirty = p_offset / len;
-
-	bh = head = page_buffers(page);
-	do {
-		if (offset >= end_offset)
-			break;
-		if (!buffer_uptodate(bh))
-			uptodate = 0;
-		if (!(PageUptodate(page) || buffer_uptodate(bh))) {
-			done = 1;
-			continue;
-		}
-
-		if (buffer_unwritten(bh) || buffer_delay(bh) ||
-		    buffer_mapped(bh)) {
-			if (buffer_unwritten(bh))
-				type = IO_UNWRITTEN;
-			else if (buffer_delay(bh))
-				type = IO_DELALLOC;
-			else
-				type = IO_OVERWRITE;
-
-			if (!xfs_imap_valid(inode, imap, offset)) {
-				done = 1;
-				continue;
-			}
-
-			lock_buffer(bh);
-			if (type != IO_OVERWRITE)
-				xfs_map_at_offset(inode, bh, imap, offset);
-			xfs_add_to_ioend(inode, bh, offset, type,
-					 ioendp, done);
-
-			page_dirty--;
-			count++;
-		} else {
-			done = 1;
-		}
-	} while (offset += len, (bh = bh->b_this_page) != head);
-
-	if (uptodate && bh == head)
-		SetPageUptodate(page);
-
-	if (count) {
-		if (--wbc->nr_to_write <= 0 &&
-		    wbc->sync_mode == WB_SYNC_NONE)
-			done = 1;
-	}
-	xfs_start_page_writeback(page, !page_dirty, count);
-
-	return done;
- fail_unlock_page:
-	unlock_page(page);
- fail:
-	return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
-	struct inode		*inode,
-	pgoff_t			tindex,
-	struct xfs_bmbt_irec	*imap,
-	xfs_ioend_t		**ioendp,
-	struct writeback_control *wbc,
-	pgoff_t			tlast)
-{
-	struct pagevec		pvec;
-	int			done = 0, i;
-
-	pagevec_init(&pvec, 0);
-	while (!done && tindex <= tlast) {
-		unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
-		if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
-			break;
-
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-					imap, ioendp, wbc);
-			if (done)
-				break;
-		}
-
-		pagevec_release(&pvec);
-		cond_resched();
-	}
-}
-
-STATIC void
-xfs_vm_invalidatepage(
-	struct page		*page,
-	unsigned long		offset)
-{
-	trace_xfs_invalidatepage(page->mapping->host, page, offset);
-	block_invalidatepage(page, offset);
-}
-
-/*
- * If the page has delalloc buffers on it, we need to punch them out before we
- * invalidate the page. If we don't, we leave a stale delalloc mapping on the
- * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
- * is done on that same region - the delalloc extent is returned when none is
- * supposed to be there.
- *
- * We prevent this by truncating away the delalloc regions on the page before
- * invalidating it. Because they are delalloc, we can do this without needing a
- * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
- * truncation without a transaction as there is no space left for block
- * reservation (typically why we see a ENOSPC in writeback).
- *
- * This is not a performance critical path, so for now just do the punching a
- * buffer head at a time.
- */
-STATIC void
-xfs_aops_discard_page(
-	struct page		*page)
-{
-	struct inode		*inode = page->mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct buffer_head	*bh, *head;
-	loff_t			offset = page_offset(page);
-
-	if (!xfs_is_delayed_page(page, IO_DELALLOC))
-		goto out_invalidate;
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		goto out_invalidate;
-
-	xfs_alert(ip->i_mount,
-		"page discard on page %p, inode 0x%llx, offset %llu.",
-			page, ip->i_ino, offset);
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	bh = head = page_buffers(page);
-	do {
-		int		error;
-		xfs_fileoff_t	start_fsb;
-
-		if (!buffer_delay(bh))
-			goto next_buffer;
-
-		start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-		error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
-		if (error) {
-			/* something screwed, just bail */
-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-				xfs_alert(ip->i_mount,
-			"page discard unable to remove delalloc mapping.");
-			}
-			break;
-		}
-next_buffer:
-		offset += 1 << inode->i_blkbits;
-
-	} while ((bh = bh->b_this_page) != head);
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_invalidate:
-	xfs_vm_invalidatepage(page, 0);
-	return;
-}
-
-/*
- * Write out a dirty page.
- *
- * For delalloc space on the page we need to allocate space and flush it.
- * For unwritten space on the page we need to start the conversion to
- * regular allocated space.
- * For any other dirty buffer heads on the page we should flush them.
- */
-STATIC int
-xfs_vm_writepage(
-	struct page		*page,
-	struct writeback_control *wbc)
-{
-	struct inode		*inode = page->mapping->host;
-	struct buffer_head	*bh, *head;
-	struct xfs_bmbt_irec	imap;
-	xfs_ioend_t		*ioend = NULL, *iohead = NULL;
-	loff_t			offset;
-	unsigned int		type;
-	__uint64_t              end_offset;
-	pgoff_t                 end_index, last_index;
-	ssize_t			len;
-	int			err, imap_valid = 0, uptodate = 1;
-	int			count = 0;
-	int			nonblocking = 0;
-
-	trace_xfs_writepage(inode, page, 0);
-
-	ASSERT(page_has_buffers(page));
-
-	/*
-	 * Refuse to write the page out if we are called from reclaim context.
-	 *
-	 * This avoids stack overflows when called from deeply used stacks in
-	 * random callers for direct reclaim or memcg reclaim.  We explicitly
-	 * allow reclaim from kswapd as the stack usage there is relatively low.
-	 *
-	 * This should really be done by the core VM, but until that happens
-	 * filesystems like XFS, btrfs and ext4 have to take care of this
-	 * by themselves.
-	 */
-	if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
-		goto redirty;
-
-	/*
-	 * Given that we do not allow direct reclaim to call us, we should
-	 * never be called while in a filesystem transaction.
-	 */
-	if (WARN_ON(current->flags & PF_FSTRANS))
-		goto redirty;
-
-	/* Is this page beyond the end of the file? */
-	offset = i_size_read(inode);
-	end_index = offset >> PAGE_CACHE_SHIFT;
-	last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-	if (page->index >= end_index) {
-		if ((page->index >= end_index + 1) ||
-		    !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
-			unlock_page(page);
-			return 0;
-		}
-	}
-
-	end_offset = min_t(unsigned long long,
-			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-			offset);
-	len = 1 << inode->i_blkbits;
-
-	bh = head = page_buffers(page);
-	offset = page_offset(page);
-	type = IO_OVERWRITE;
-
-	if (wbc->sync_mode == WB_SYNC_NONE)
-		nonblocking = 1;
-
-	do {
-		int new_ioend = 0;
-
-		if (offset >= end_offset)
-			break;
-		if (!buffer_uptodate(bh))
-			uptodate = 0;
-
-		/*
-		 * set_page_dirty dirties all buffers in a page, independent
-		 * of their state.  The dirty state however is entirely
-		 * meaningless for holes (!mapped && uptodate), so skip
-		 * buffers covering holes here.
-		 */
-		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-			imap_valid = 0;
-			continue;
-		}
-
-		if (buffer_unwritten(bh)) {
-			if (type != IO_UNWRITTEN) {
-				type = IO_UNWRITTEN;
-				imap_valid = 0;
-			}
-		} else if (buffer_delay(bh)) {
-			if (type != IO_DELALLOC) {
-				type = IO_DELALLOC;
-				imap_valid = 0;
-			}
-		} else if (buffer_uptodate(bh)) {
-			if (type != IO_OVERWRITE) {
-				type = IO_OVERWRITE;
-				imap_valid = 0;
-			}
-		} else {
-			if (PageUptodate(page)) {
-				ASSERT(buffer_mapped(bh));
-				imap_valid = 0;
-			}
-			continue;
-		}
-
-		if (imap_valid)
-			imap_valid = xfs_imap_valid(inode, &imap, offset);
-		if (!imap_valid) {
-			/*
-			 * If we didn't have a valid mapping then we need to
-			 * put the new mapping into a separate ioend structure.
-			 * This ensures non-contiguous extents always have
-			 * separate ioends, which is particularly important
-			 * for unwritten extent conversion at I/O completion
-			 * time.
-			 */
-			new_ioend = 1;
-			err = xfs_map_blocks(inode, offset, &imap, type,
-					     nonblocking);
-			if (err)
-				goto error;
-			imap_valid = xfs_imap_valid(inode, &imap, offset);
-		}
-		if (imap_valid) {
-			lock_buffer(bh);
-			if (type != IO_OVERWRITE)
-				xfs_map_at_offset(inode, bh, &imap, offset);
-			xfs_add_to_ioend(inode, bh, offset, type, &ioend,
-					 new_ioend);
-			count++;
-		}
-
-		if (!iohead)
-			iohead = ioend;
-
-	} while (offset += len, ((bh = bh->b_this_page) != head));
-
-	if (uptodate && bh == head)
-		SetPageUptodate(page);
-
-	xfs_start_page_writeback(page, 1, count);
-
-	if (ioend && imap_valid) {
-		xfs_off_t		end_index;
-
-		end_index = imap.br_startoff + imap.br_blockcount;
-
-		/* to bytes */
-		end_index <<= inode->i_blkbits;
-
-		/* to pages */
-		end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
-		/* check against file size */
-		if (end_index > last_index)
-			end_index = last_index;
-
-		xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
-				  wbc, end_index);
-	}
-
-	if (iohead)
-		xfs_submit_ioend(wbc, iohead);
-
-	return 0;
-
-error:
-	if (iohead)
-		xfs_cancel_ioend(iohead);
-
-	if (err == -EAGAIN)
-		goto redirty;
-
-	xfs_aops_discard_page(page);
-	ClearPageUptodate(page);
-	unlock_page(page);
-	return err;
-
-redirty:
-	redirty_page_for_writepage(wbc, page);
-	unlock_page(page);
-	return 0;
-}
-
-STATIC int
-xfs_vm_writepages(
-	struct address_space	*mapping,
-	struct writeback_control *wbc)
-{
-	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
-	return generic_writepages(mapping, wbc);
-}
-
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. The page should already be clean. We always
- * have buffer heads in this call.
- *
- * Returns 1 if the page is ok to release, 0 otherwise.
- */
-STATIC int
-xfs_vm_releasepage(
-	struct page		*page,
-	gfp_t			gfp_mask)
-{
-	int			delalloc, unwritten;
-
-	trace_xfs_releasepage(page->mapping->host, page, 0);
-
-	xfs_count_page_state(page, &delalloc, &unwritten);
-
-	if (WARN_ON(delalloc))
-		return 0;
-	if (WARN_ON(unwritten))
-		return 0;
-
-	return try_to_free_buffers(page);
-}
-
-STATIC int
-__xfs_get_blocks(
-	struct inode		*inode,
-	sector_t		iblock,
-	struct buffer_head	*bh_result,
-	int			create,
-	int			direct)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		offset_fsb, end_fsb;
-	int			error = 0;
-	int			lockmode = 0;
-	struct xfs_bmbt_irec	imap;
-	int			nimaps = 1;
-	xfs_off_t		offset;
-	ssize_t			size;
-	int			new = 0;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -XFS_ERROR(EIO);
-
-	offset = (xfs_off_t)iblock << inode->i_blkbits;
-	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
-	size = bh_result->b_size;
-
-	if (!create && direct && offset >= i_size_read(inode))
-		return 0;
-
-	if (create) {
-		lockmode = XFS_ILOCK_EXCL;
-		xfs_ilock(ip, lockmode);
-	} else {
-		lockmode = xfs_ilock_map_shared(ip);
-	}
-
-	ASSERT(offset <= mp->m_maxioffset);
-	if (offset + size > mp->m_maxioffset)
-		size = mp->m_maxioffset - offset;
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
-	error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
-			  XFS_BMAPI_ENTIRE,  NULL, 0, &imap, &nimaps, NULL);
-	if (error)
-		goto out_unlock;
-
-	if (create &&
-	    (!nimaps ||
-	     (imap.br_startblock == HOLESTARTBLOCK ||
-	      imap.br_startblock == DELAYSTARTBLOCK))) {
-		if (direct) {
-			error = xfs_iomap_write_direct(ip, offset, size,
-						       &imap, nimaps);
-		} else {
-			error = xfs_iomap_write_delay(ip, offset, size, &imap);
-		}
-		if (error)
-			goto out_unlock;
-
-		trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
-	} else if (nimaps) {
-		trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
-	} else {
-		trace_xfs_get_blocks_notfound(ip, offset, size);
-		goto out_unlock;
-	}
-	xfs_iunlock(ip, lockmode);
-
-	if (imap.br_startblock != HOLESTARTBLOCK &&
-	    imap.br_startblock != DELAYSTARTBLOCK) {
-		/*
-		 * For unwritten extents do not report a disk address on
-		 * the read case (treat as if we're reading into a hole).
-		 */
-		if (create || !ISUNWRITTEN(&imap))
-			xfs_map_buffer(inode, bh_result, &imap, offset);
-		if (create && ISUNWRITTEN(&imap)) {
-			if (direct)
-				bh_result->b_private = inode;
-			set_buffer_unwritten(bh_result);
-		}
-	}
-
-	/*
-	 * If this is a realtime file, data may be on a different device.
-	 * to that pointed to from the buffer_head b_bdev currently.
-	 */
-	bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
-
-	/*
-	 * If we previously allocated a block out beyond eof and we are now
-	 * coming back to use it then we will need to flag it as new even if it
-	 * has a disk address.
-	 *
-	 * With sub-block writes into unwritten extents we also need to mark
-	 * the buffer as new so that the unwritten parts of the buffer gets
-	 * correctly zeroed.
-	 */
-	if (create &&
-	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
-	     (offset >= i_size_read(inode)) ||
-	     (new || ISUNWRITTEN(&imap))))
-		set_buffer_new(bh_result);
-
-	if (imap.br_startblock == DELAYSTARTBLOCK) {
-		BUG_ON(direct);
-		if (create) {
-			set_buffer_uptodate(bh_result);
-			set_buffer_mapped(bh_result);
-			set_buffer_delay(bh_result);
-		}
-	}
-
-	/*
-	 * If this is O_DIRECT or the mpage code calling tell them how large
-	 * the mapping is, so that we can avoid repeated get_blocks calls.
-	 */
-	if (direct || size > (1 << inode->i_blkbits)) {
-		xfs_off_t		mapping_size;
-
-		mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
-		mapping_size <<= inode->i_blkbits;
-
-		ASSERT(mapping_size > 0);
-		if (mapping_size > size)
-			mapping_size = size;
-		if (mapping_size > LONG_MAX)
-			mapping_size = LONG_MAX;
-
-		bh_result->b_size = mapping_size;
-	}
-
-	return 0;
-
-out_unlock:
-	xfs_iunlock(ip, lockmode);
-	return -error;
-}
-
-int
-xfs_get_blocks(
-	struct inode		*inode,
-	sector_t		iblock,
-	struct buffer_head	*bh_result,
-	int			create)
-{
-	return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
-}
-
-STATIC int
-xfs_get_blocks_direct(
-	struct inode		*inode,
-	sector_t		iblock,
-	struct buffer_head	*bh_result,
-	int			create)
-{
-	return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
-}
-
-/*
- * Complete a direct I/O write request.
- *
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
- * need to issue a transaction to convert the range from unwritten to written
- * extents.  In case this is regular synchronous I/O we just call xfs_end_io
- * to do this and we are done.  But in case this was a successful AIO
- * request this handler is called from interrupt context, from which we
- * can't start transactions.  In that case offload the I/O completion to
- * the workqueues we also use for buffered I/O completion.
- */
-STATIC void
-xfs_end_io_direct_write(
-	struct kiocb		*iocb,
-	loff_t			offset,
-	ssize_t			size,
-	void			*private,
-	int			ret,
-	bool			is_async)
-{
-	struct xfs_ioend	*ioend = iocb->private;
-
-	/*
-	 * blockdev_direct_IO can return an error even after the I/O
-	 * completion handler was called.  Thus we need to protect
-	 * against double-freeing.
-	 */
-	iocb->private = NULL;
-
-	ioend->io_offset = offset;
-	ioend->io_size = size;
-	if (private && size > 0)
-		ioend->io_type = IO_UNWRITTEN;
-
-	if (is_async) {
-		/*
-		 * If we are converting an unwritten extent we need to delay
-		 * the AIO completion until after the unwrittent extent
-		 * conversion has completed, otherwise do it ASAP.
-		 */
-		if (ioend->io_type == IO_UNWRITTEN) {
-			ioend->io_iocb = iocb;
-			ioend->io_result = ret;
-		} else {
-			aio_complete(iocb, ret, 0);
-		}
-		xfs_finish_ioend(ioend);
-	} else {
-		xfs_finish_ioend_sync(ioend);
-	}
-
-	/* XXX: probably should move into the real I/O completion handler */
-	inode_dio_done(ioend->io_inode);
-}
-
-STATIC ssize_t
-xfs_vm_direct_IO(
-	int			rw,
-	struct kiocb		*iocb,
-	const struct iovec	*iov,
-	loff_t			offset,
-	unsigned long		nr_segs)
-{
-	struct inode		*inode = iocb->ki_filp->f_mapping->host;
-	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
-	ssize_t			ret;
-
-	if (rw & WRITE) {
-		iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
-
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-					    offset, nr_segs,
-					    xfs_get_blocks_direct,
-					    xfs_end_io_direct_write, NULL, 0);
-		if (ret != -EIOCBQUEUED && iocb->private)
-			xfs_destroy_ioend(iocb->private);
-	} else {
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-					    offset, nr_segs,
-					    xfs_get_blocks_direct,
-					    NULL, NULL, 0);
-	}
-
-	return ret;
-}
-
-STATIC void
-xfs_vm_write_failed(
-	struct address_space	*mapping,
-	loff_t			to)
-{
-	struct inode		*inode = mapping->host;
-
-	if (to > inode->i_size) {
-		/*
-		 * punch out the delalloc blocks we have already allocated. We
-		 * don't call xfs_setattr() to do this as we may be in the
-		 * middle of a multi-iovec write and so the vfs inode->i_size
-		 * will not match the xfs ip->i_size and so it will zero too
-		 * much. Hence we jus truncate the page cache to zero what is
-		 * necessary and punch the delalloc blocks directly.
-		 */
-		struct xfs_inode	*ip = XFS_I(inode);
-		xfs_fileoff_t		start_fsb;
-		xfs_fileoff_t		end_fsb;
-		int			error;
-
-		truncate_pagecache(inode, to, inode->i_size);
-
-		/*
-		 * Check if there are any blocks that are outside of i_size
-		 * that need to be trimmed back.
-		 */
-		start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
-		end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
-		if (end_fsb <= start_fsb)
-			return;
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-							end_fsb - start_fsb);
-		if (error) {
-			/* something screwed, just bail */
-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-				xfs_alert(ip->i_mount,
-			"xfs_vm_write_failed: unable to clean up ino %lld",
-						ip->i_ino);
-			}
-		}
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	}
-}
-
-STATIC int
-xfs_vm_write_begin(
-	struct file		*file,
-	struct address_space	*mapping,
-	loff_t			pos,
-	unsigned		len,
-	unsigned		flags,
-	struct page		**pagep,
-	void			**fsdata)
-{
-	int			ret;
-
-	ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
-				pagep, xfs_get_blocks);
-	if (unlikely(ret))
-		xfs_vm_write_failed(mapping, pos + len);
-	return ret;
-}
-
-STATIC int
-xfs_vm_write_end(
-	struct file		*file,
-	struct address_space	*mapping,
-	loff_t			pos,
-	unsigned		len,
-	unsigned		copied,
-	struct page		*page,
-	void			*fsdata)
-{
-	int			ret;
-
-	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-	if (unlikely(ret < len))
-		xfs_vm_write_failed(mapping, pos + len);
-	return ret;
-}
-
-STATIC sector_t
-xfs_vm_bmap(
-	struct address_space	*mapping,
-	sector_t		block)
-{
-	struct inode		*inode = (struct inode *)mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-
-	trace_xfs_vm_bmap(XFS_I(inode));
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-	return generic_block_bmap(mapping, block, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpage(
-	struct file		*unused,
-	struct page		*page)
-{
-	return mpage_readpage(page, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpages(
-	struct file		*unused,
-	struct address_space	*mapping,
-	struct list_head	*pages,
-	unsigned		nr_pages)
-{
-	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
-}
-
-const struct address_space_operations xfs_address_space_operations = {
-	.readpage		= xfs_vm_readpage,
-	.readpages		= xfs_vm_readpages,
-	.writepage		= xfs_vm_writepage,
-	.writepages		= xfs_vm_writepages,
-	.releasepage		= xfs_vm_releasepage,
-	.invalidatepage		= xfs_vm_invalidatepage,
-	.write_begin		= xfs_vm_write_begin,
-	.write_end		= xfs_vm_write_end,
-	.bmap			= xfs_vm_bmap,
-	.direct_IO		= xfs_vm_direct_IO,
-	.migratepage		= buffer_migrate_page,
-	.is_partially_uptodate  = block_is_partially_uptodate,
-	.error_remove_page	= generic_error_remove_page,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
deleted file mode 100644
index 71f721e..0000000
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_AOPS_H__
-#define __XFS_AOPS_H__
-
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
-extern mempool_t *xfs_ioend_pool;
-
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- */
-enum {
-	IO_DIRECT = 0,	/* special case for direct I/O ioends */
-	IO_DELALLOC,	/* mapping covers delalloc region */
-	IO_UNWRITTEN,	/* mapping covers allocated but uninitialized data */
-	IO_OVERWRITE,	/* mapping covers already allocated extent */
-};
-
-#define XFS_IO_TYPES \
-	{ 0,			"" }, \
-	{ IO_DELALLOC,		"delalloc" }, \
-	{ IO_UNWRITTEN,		"unwritten" }, \
-	{ IO_OVERWRITE,		"overwrite" }
-
-/*
- * xfs_ioend struct manages large extent writes for XFS.
- * It can manage several multi-page bio's at once.
- */
-typedef struct xfs_ioend {
-	struct xfs_ioend	*io_list;	/* next ioend in chain */
-	unsigned int		io_type;	/* delalloc / unwritten */
-	int			io_error;	/* I/O error code */
-	atomic_t		io_remaining;	/* hold count */
-	struct inode		*io_inode;	/* file being written to */
-	struct buffer_head	*io_buffer_head;/* buffer linked list head */
-	struct buffer_head	*io_buffer_tail;/* buffer linked list tail */
-	size_t			io_size;	/* size of the extent */
-	xfs_off_t		io_offset;	/* offset in the file */
-	struct work_struct	io_work;	/* xfsdatad work queue */
-	struct kiocb		*io_iocb;
-	int			io_result;
-} xfs_ioend_t;
-
-extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
-
-extern void xfs_ioend_init(void);
-extern void xfs_ioend_wait(struct xfs_inode *);
-
-extern void xfs_count_page_state(struct page *, int *, int *);
-
-#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
deleted file mode 100644
index c57836d..0000000
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ /dev/null
@@ -1,1876 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
-#include <linux/backing-dev.h>
-#include <linux/freezer.h>
-
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_trace.h"
-
-static kmem_zone_t *xfs_buf_zone;
-STATIC int xfsbufd(void *);
-STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-
-static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
-
-#ifdef XFS_BUF_LOCK_TRACKING
-# define XB_SET_OWNER(bp)	((bp)->b_last_holder = current->pid)
-# define XB_CLEAR_OWNER(bp)	((bp)->b_last_holder = -1)
-# define XB_GET_OWNER(bp)	((bp)->b_last_holder)
-#else
-# define XB_SET_OWNER(bp)	do { } while (0)
-# define XB_CLEAR_OWNER(bp)	do { } while (0)
-# define XB_GET_OWNER(bp)	do { } while (0)
-#endif
-
-#define xb_to_gfp(flags) \
-	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
-	  ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-
-#define xb_to_km(flags) \
-	 (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-
-#define xfs_buf_allocate(flags) \
-	kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
-#define xfs_buf_deallocate(bp) \
-	kmem_zone_free(xfs_buf_zone, (bp));
-
-static inline int
-xfs_buf_is_vmapped(
-	struct xfs_buf	*bp)
-{
-	/*
-	 * Return true if the buffer is vmapped.
-	 *
-	 * The XBF_MAPPED flag is set if the buffer should be mapped, but the
-	 * code is clever enough to know it doesn't have to map a single page,
-	 * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
-	 */
-	return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
-}
-
-static inline int
-xfs_buf_vmap_len(
-	struct xfs_buf	*bp)
-{
-	return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
-}
-
-/*
- * xfs_buf_lru_add - add a buffer to the LRU.
- *
- * The LRU takes a new reference to the buffer so that it will only be freed
- * once the shrinker takes the buffer off the LRU.
- */
-STATIC void
-xfs_buf_lru_add(
-	struct xfs_buf	*bp)
-{
-	struct xfs_buftarg *btp = bp->b_target;
-
-	spin_lock(&btp->bt_lru_lock);
-	if (list_empty(&bp->b_lru)) {
-		atomic_inc(&bp->b_hold);
-		list_add_tail(&bp->b_lru, &btp->bt_lru);
-		btp->bt_lru_nr++;
-	}
-	spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * xfs_buf_lru_del - remove a buffer from the LRU
- *
- * The unlocked check is safe here because it only occurs when there are not
- * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
- * to optimise the shrinker removing the buffer from the LRU and calling
- * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
- * bt_lru_lock.
- */
-STATIC void
-xfs_buf_lru_del(
-	struct xfs_buf	*bp)
-{
-	struct xfs_buftarg *btp = bp->b_target;
-
-	if (list_empty(&bp->b_lru))
-		return;
-
-	spin_lock(&btp->bt_lru_lock);
-	if (!list_empty(&bp->b_lru)) {
-		list_del_init(&bp->b_lru);
-		btp->bt_lru_nr--;
-	}
-	spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * When we mark a buffer stale, we remove the buffer from the LRU and clear the
- * b_lru_ref count so that the buffer is freed immediately when the buffer
- * reference count falls to zero. If the buffer is already on the LRU, we need
- * to remove the reference that LRU holds on the buffer.
- *
- * This prevents build-up of stale buffers on the LRU.
- */
-void
-xfs_buf_stale(
-	struct xfs_buf	*bp)
-{
-	bp->b_flags |= XBF_STALE;
-	atomic_set(&(bp)->b_lru_ref, 0);
-	if (!list_empty(&bp->b_lru)) {
-		struct xfs_buftarg *btp = bp->b_target;
-
-		spin_lock(&btp->bt_lru_lock);
-		if (!list_empty(&bp->b_lru)) {
-			list_del_init(&bp->b_lru);
-			btp->bt_lru_nr--;
-			atomic_dec(&bp->b_hold);
-		}
-		spin_unlock(&btp->bt_lru_lock);
-	}
-	ASSERT(atomic_read(&bp->b_hold) >= 1);
-}
-
-STATIC void
-_xfs_buf_initialize(
-	xfs_buf_t		*bp,
-	xfs_buftarg_t		*target,
-	xfs_off_t		range_base,
-	size_t			range_length,
-	xfs_buf_flags_t		flags)
-{
-	/*
-	 * We don't want certain flags to appear in b_flags.
-	 */
-	flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
-
-	memset(bp, 0, sizeof(xfs_buf_t));
-	atomic_set(&bp->b_hold, 1);
-	atomic_set(&bp->b_lru_ref, 1);
-	init_completion(&bp->b_iowait);
-	INIT_LIST_HEAD(&bp->b_lru);
-	INIT_LIST_HEAD(&bp->b_list);
-	RB_CLEAR_NODE(&bp->b_rbnode);
-	sema_init(&bp->b_sema, 0); /* held, no waiters */
-	XB_SET_OWNER(bp);
-	bp->b_target = target;
-	bp->b_file_offset = range_base;
-	/*
-	 * Set buffer_length and count_desired to the same value initially.
-	 * I/O routines should use count_desired, which will be the same in
-	 * most cases but may be reset (e.g. XFS recovery).
-	 */
-	bp->b_buffer_length = bp->b_count_desired = range_length;
-	bp->b_flags = flags;
-	bp->b_bn = XFS_BUF_DADDR_NULL;
-	atomic_set(&bp->b_pin_count, 0);
-	init_waitqueue_head(&bp->b_waiters);
-
-	XFS_STATS_INC(xb_create);
-
-	trace_xfs_buf_init(bp, _RET_IP_);
-}
-
-/*
- *	Allocate a page array capable of holding a specified number
- *	of pages, and point the page buf at it.
- */
-STATIC int
-_xfs_buf_get_pages(
-	xfs_buf_t		*bp,
-	int			page_count,
-	xfs_buf_flags_t		flags)
-{
-	/* Make sure that we have a page list */
-	if (bp->b_pages == NULL) {
-		bp->b_offset = xfs_buf_poff(bp->b_file_offset);
-		bp->b_page_count = page_count;
-		if (page_count <= XB_PAGES) {
-			bp->b_pages = bp->b_page_array;
-		} else {
-			bp->b_pages = kmem_alloc(sizeof(struct page *) *
-					page_count, xb_to_km(flags));
-			if (bp->b_pages == NULL)
-				return -ENOMEM;
-		}
-		memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
-	}
-	return 0;
-}
-
-/*
- *	Frees b_pages if it was allocated.
- */
-STATIC void
-_xfs_buf_free_pages(
-	xfs_buf_t	*bp)
-{
-	if (bp->b_pages != bp->b_page_array) {
-		kmem_free(bp->b_pages);
-		bp->b_pages = NULL;
-	}
-}
-
-/*
- *	Releases the specified buffer.
- *
- * 	The modification state of any associated pages is left unchanged.
- * 	The buffer most not be on any hash - use xfs_buf_rele instead for
- * 	hashed and refcounted buffers
- */
-void
-xfs_buf_free(
-	xfs_buf_t		*bp)
-{
-	trace_xfs_buf_free(bp, _RET_IP_);
-
-	ASSERT(list_empty(&bp->b_lru));
-
-	if (bp->b_flags & _XBF_PAGES) {
-		uint		i;
-
-		if (xfs_buf_is_vmapped(bp))
-			vm_unmap_ram(bp->b_addr - bp->b_offset,
-					bp->b_page_count);
-
-		for (i = 0; i < bp->b_page_count; i++) {
-			struct page	*page = bp->b_pages[i];
-
-			__free_page(page);
-		}
-	} else if (bp->b_flags & _XBF_KMEM)
-		kmem_free(bp->b_addr);
-	_xfs_buf_free_pages(bp);
-	xfs_buf_deallocate(bp);
-}
-
-/*
- * Allocates all the pages for buffer in question and builds it's page list.
- */
-STATIC int
-xfs_buf_allocate_memory(
-	xfs_buf_t		*bp,
-	uint			flags)
-{
-	size_t			size = bp->b_count_desired;
-	size_t			nbytes, offset;
-	gfp_t			gfp_mask = xb_to_gfp(flags);
-	unsigned short		page_count, i;
-	xfs_off_t		end;
-	int			error;
-
-	/*
-	 * for buffers that are contained within a single page, just allocate
-	 * the memory from the heap - there's no need for the complexity of
-	 * page arrays to keep allocation down to order 0.
-	 */
-	if (bp->b_buffer_length < PAGE_SIZE) {
-		bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
-		if (!bp->b_addr) {
-			/* low memory - use alloc_page loop instead */
-			goto use_alloc_page;
-		}
-
-		if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
-								PAGE_MASK) !=
-		    ((unsigned long)bp->b_addr & PAGE_MASK)) {
-			/* b_addr spans two pages - use alloc_page instead */
-			kmem_free(bp->b_addr);
-			bp->b_addr = NULL;
-			goto use_alloc_page;
-		}
-		bp->b_offset = offset_in_page(bp->b_addr);
-		bp->b_pages = bp->b_page_array;
-		bp->b_pages[0] = virt_to_page(bp->b_addr);
-		bp->b_page_count = 1;
-		bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
-		return 0;
-	}
-
-use_alloc_page:
-	end = bp->b_file_offset + bp->b_buffer_length;
-	page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
-	error = _xfs_buf_get_pages(bp, page_count, flags);
-	if (unlikely(error))
-		return error;
-
-	offset = bp->b_offset;
-	bp->b_flags |= _XBF_PAGES;
-
-	for (i = 0; i < bp->b_page_count; i++) {
-		struct page	*page;
-		uint		retries = 0;
-retry:
-		page = alloc_page(gfp_mask);
-		if (unlikely(page == NULL)) {
-			if (flags & XBF_READ_AHEAD) {
-				bp->b_page_count = i;
-				error = ENOMEM;
-				goto out_free_pages;
-			}
-
-			/*
-			 * This could deadlock.
-			 *
-			 * But until all the XFS lowlevel code is revamped to
-			 * handle buffer allocation failures we can't do much.
-			 */
-			if (!(++retries % 100))
-				xfs_err(NULL,
-		"possible memory allocation deadlock in %s (mode:0x%x)",
-					__func__, gfp_mask);
-
-			XFS_STATS_INC(xb_page_retries);
-			congestion_wait(BLK_RW_ASYNC, HZ/50);
-			goto retry;
-		}
-
-		XFS_STATS_INC(xb_page_found);
-
-		nbytes = min_t(size_t, size, PAGE_SIZE - offset);
-		size -= nbytes;
-		bp->b_pages[i] = page;
-		offset = 0;
-	}
-	return 0;
-
-out_free_pages:
-	for (i = 0; i < bp->b_page_count; i++)
-		__free_page(bp->b_pages[i]);
-	return error;
-}
-
-/*
- *	Map buffer into kernel address-space if necessary.
- */
-STATIC int
-_xfs_buf_map_pages(
-	xfs_buf_t		*bp,
-	uint			flags)
-{
-	ASSERT(bp->b_flags & _XBF_PAGES);
-	if (bp->b_page_count == 1) {
-		/* A single page buffer is always mappable */
-		bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
-		bp->b_flags |= XBF_MAPPED;
-	} else if (flags & XBF_MAPPED) {
-		int retried = 0;
-
-		do {
-			bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-						-1, PAGE_KERNEL);
-			if (bp->b_addr)
-				break;
-			vm_unmap_aliases();
-		} while (retried++ <= 1);
-
-		if (!bp->b_addr)
-			return -ENOMEM;
-		bp->b_addr += bp->b_offset;
-		bp->b_flags |= XBF_MAPPED;
-	}
-
-	return 0;
-}
-
-/*
- *	Finding and Reading Buffers
- */
-
-/*
- *	Look up, and creates if absent, a lockable buffer for
- *	a given range of an inode.  The buffer is returned
- *	locked.	 If other overlapping buffers exist, they are
- *	released before the new buffer is created and locked,
- *	which may imply that this call will block until those buffers
- *	are unlocked.  No I/O is implied by this call.
- */
-xfs_buf_t *
-_xfs_buf_find(
-	xfs_buftarg_t		*btp,	/* block device target		*/
-	xfs_off_t		ioff,	/* starting offset of range	*/
-	size_t			isize,	/* length of range		*/
-	xfs_buf_flags_t		flags,
-	xfs_buf_t		*new_bp)
-{
-	xfs_off_t		range_base;
-	size_t			range_length;
-	struct xfs_perag	*pag;
-	struct rb_node		**rbp;
-	struct rb_node		*parent;
-	xfs_buf_t		*bp;
-
-	range_base = (ioff << BBSHIFT);
-	range_length = (isize << BBSHIFT);
-
-	/* Check for IOs smaller than the sector size / not sector aligned */
-	ASSERT(!(range_length < (1 << btp->bt_sshift)));
-	ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
-
-	/* get tree root */
-	pag = xfs_perag_get(btp->bt_mount,
-				xfs_daddr_to_agno(btp->bt_mount, ioff));
-
-	/* walk tree */
-	spin_lock(&pag->pag_buf_lock);
-	rbp = &pag->pag_buf_tree.rb_node;
-	parent = NULL;
-	bp = NULL;
-	while (*rbp) {
-		parent = *rbp;
-		bp = rb_entry(parent, struct xfs_buf, b_rbnode);
-
-		if (range_base < bp->b_file_offset)
-			rbp = &(*rbp)->rb_left;
-		else if (range_base > bp->b_file_offset)
-			rbp = &(*rbp)->rb_right;
-		else {
-			/*
-			 * found a block offset match. If the range doesn't
-			 * match, the only way this is allowed is if the buffer
-			 * in the cache is stale and the transaction that made
-			 * it stale has not yet committed. i.e. we are
-			 * reallocating a busy extent. Skip this buffer and
-			 * continue searching to the right for an exact match.
-			 */
-			if (bp->b_buffer_length != range_length) {
-				ASSERT(bp->b_flags & XBF_STALE);
-				rbp = &(*rbp)->rb_right;
-				continue;
-			}
-			atomic_inc(&bp->b_hold);
-			goto found;
-		}
-	}
-
-	/* No match found */
-	if (new_bp) {
-		_xfs_buf_initialize(new_bp, btp, range_base,
-				range_length, flags);
-		rb_link_node(&new_bp->b_rbnode, parent, rbp);
-		rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
-		/* the buffer keeps the perag reference until it is freed */
-		new_bp->b_pag = pag;
-		spin_unlock(&pag->pag_buf_lock);
-	} else {
-		XFS_STATS_INC(xb_miss_locked);
-		spin_unlock(&pag->pag_buf_lock);
-		xfs_perag_put(pag);
-	}
-	return new_bp;
-
-found:
-	spin_unlock(&pag->pag_buf_lock);
-	xfs_perag_put(pag);
-
-	if (!xfs_buf_trylock(bp)) {
-		if (flags & XBF_TRYLOCK) {
-			xfs_buf_rele(bp);
-			XFS_STATS_INC(xb_busy_locked);
-			return NULL;
-		}
-		xfs_buf_lock(bp);
-		XFS_STATS_INC(xb_get_locked_waited);
-	}
-
-	/*
-	 * if the buffer is stale, clear all the external state associated with
-	 * it. We need to keep flags such as how we allocated the buffer memory
-	 * intact here.
-	 */
-	if (bp->b_flags & XBF_STALE) {
-		ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
-		bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
-	}
-
-	trace_xfs_buf_find(bp, flags, _RET_IP_);
-	XFS_STATS_INC(xb_get_locked);
-	return bp;
-}
-
-/*
- *	Assembles a buffer covering the specified range.
- *	Storage in memory for all portions of the buffer will be allocated,
- *	although backing storage may not be.
- */
-xfs_buf_t *
-xfs_buf_get(
-	xfs_buftarg_t		*target,/* target for buffer		*/
-	xfs_off_t		ioff,	/* starting offset of range	*/
-	size_t			isize,	/* length of range		*/
-	xfs_buf_flags_t		flags)
-{
-	xfs_buf_t		*bp, *new_bp;
-	int			error = 0;
-
-	new_bp = xfs_buf_allocate(flags);
-	if (unlikely(!new_bp))
-		return NULL;
-
-	bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
-	if (bp == new_bp) {
-		error = xfs_buf_allocate_memory(bp, flags);
-		if (error)
-			goto no_buffer;
-	} else {
-		xfs_buf_deallocate(new_bp);
-		if (unlikely(bp == NULL))
-			return NULL;
-	}
-
-	if (!(bp->b_flags & XBF_MAPPED)) {
-		error = _xfs_buf_map_pages(bp, flags);
-		if (unlikely(error)) {
-			xfs_warn(target->bt_mount,
-				"%s: failed to map pages\n", __func__);
-			goto no_buffer;
-		}
-	}
-
-	XFS_STATS_INC(xb_get);
-
-	/*
-	 * Always fill in the block number now, the mapped cases can do
-	 * their own overlay of this later.
-	 */
-	bp->b_bn = ioff;
-	bp->b_count_desired = bp->b_buffer_length;
-
-	trace_xfs_buf_get(bp, flags, _RET_IP_);
-	return bp;
-
- no_buffer:
-	if (flags & (XBF_LOCK | XBF_TRYLOCK))
-		xfs_buf_unlock(bp);
-	xfs_buf_rele(bp);
-	return NULL;
-}
-
-STATIC int
-_xfs_buf_read(
-	xfs_buf_t		*bp,
-	xfs_buf_flags_t		flags)
-{
-	int			status;
-
-	ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
-	ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
-
-	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
-	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
-
-	status = xfs_buf_iorequest(bp);
-	if (status || bp->b_error || (flags & XBF_ASYNC))
-		return status;
-	return xfs_buf_iowait(bp);
-}
-
-xfs_buf_t *
-xfs_buf_read(
-	xfs_buftarg_t		*target,
-	xfs_off_t		ioff,
-	size_t			isize,
-	xfs_buf_flags_t		flags)
-{
-	xfs_buf_t		*bp;
-
-	flags |= XBF_READ;
-
-	bp = xfs_buf_get(target, ioff, isize, flags);
-	if (bp) {
-		trace_xfs_buf_read(bp, flags, _RET_IP_);
-
-		if (!XFS_BUF_ISDONE(bp)) {
-			XFS_STATS_INC(xb_get_read);
-			_xfs_buf_read(bp, flags);
-		} else if (flags & XBF_ASYNC) {
-			/*
-			 * Read ahead call which is already satisfied,
-			 * drop the buffer
-			 */
-			goto no_buffer;
-		} else {
-			/* We do not want read in the flags */
-			bp->b_flags &= ~XBF_READ;
-		}
-	}
-
-	return bp;
-
- no_buffer:
-	if (flags & (XBF_LOCK | XBF_TRYLOCK))
-		xfs_buf_unlock(bp);
-	xfs_buf_rele(bp);
-	return NULL;
-}
-
-/*
- *	If we are not low on memory then do the readahead in a deadlock
- *	safe manner.
- */
-void
-xfs_buf_readahead(
-	xfs_buftarg_t		*target,
-	xfs_off_t		ioff,
-	size_t			isize)
-{
-	if (bdi_read_congested(target->bt_bdi))
-		return;
-
-	xfs_buf_read(target, ioff, isize,
-		     XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
-}
-
-/*
- * Read an uncached buffer from disk. Allocates and returns a locked
- * buffer containing the disk contents or nothing.
- */
-struct xfs_buf *
-xfs_buf_read_uncached(
-	struct xfs_mount	*mp,
-	struct xfs_buftarg	*target,
-	xfs_daddr_t		daddr,
-	size_t			length,
-	int			flags)
-{
-	xfs_buf_t		*bp;
-	int			error;
-
-	bp = xfs_buf_get_uncached(target, length, flags);
-	if (!bp)
-		return NULL;
-
-	/* set up the buffer for a read IO */
-	XFS_BUF_SET_ADDR(bp, daddr);
-	XFS_BUF_READ(bp);
-
-	xfsbdstrat(mp, bp);
-	error = xfs_buf_iowait(bp);
-	if (error || bp->b_error) {
-		xfs_buf_relse(bp);
-		return NULL;
-	}
-	return bp;
-}
-
-xfs_buf_t *
-xfs_buf_get_empty(
-	size_t			len,
-	xfs_buftarg_t		*target)
-{
-	xfs_buf_t		*bp;
-
-	bp = xfs_buf_allocate(0);
-	if (bp)
-		_xfs_buf_initialize(bp, target, 0, len, 0);
-	return bp;
-}
-
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
-	struct xfs_buf		*bp,
-	size_t			len)
-{
-	if (bp->b_pages)
-		_xfs_buf_free_pages(bp);
-
-	bp->b_pages = NULL;
-	bp->b_page_count = 0;
-	bp->b_addr = NULL;
-	bp->b_file_offset = 0;
-	bp->b_buffer_length = bp->b_count_desired = len;
-	bp->b_bn = XFS_BUF_DADDR_NULL;
-	bp->b_flags &= ~XBF_MAPPED;
-}
-
-static inline struct page *
-mem_to_page(
-	void			*addr)
-{
-	if ((!is_vmalloc_addr(addr))) {
-		return virt_to_page(addr);
-	} else {
-		return vmalloc_to_page(addr);
-	}
-}
-
-int
-xfs_buf_associate_memory(
-	xfs_buf_t		*bp,
-	void			*mem,
-	size_t			len)
-{
-	int			rval;
-	int			i = 0;
-	unsigned long		pageaddr;
-	unsigned long		offset;
-	size_t			buflen;
-	int			page_count;
-
-	pageaddr = (unsigned long)mem & PAGE_MASK;
-	offset = (unsigned long)mem - pageaddr;
-	buflen = PAGE_ALIGN(len + offset);
-	page_count = buflen >> PAGE_SHIFT;
-
-	/* Free any previous set of page pointers */
-	if (bp->b_pages)
-		_xfs_buf_free_pages(bp);
-
-	bp->b_pages = NULL;
-	bp->b_addr = mem;
-
-	rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
-	if (rval)
-		return rval;
-
-	bp->b_offset = offset;
-
-	for (i = 0; i < bp->b_page_count; i++) {
-		bp->b_pages[i] = mem_to_page((void *)pageaddr);
-		pageaddr += PAGE_SIZE;
-	}
-
-	bp->b_count_desired = len;
-	bp->b_buffer_length = buflen;
-	bp->b_flags |= XBF_MAPPED;
-
-	return 0;
-}
-
-xfs_buf_t *
-xfs_buf_get_uncached(
-	struct xfs_buftarg	*target,
-	size_t			len,
-	int			flags)
-{
-	unsigned long		page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
-	int			error, i;
-	xfs_buf_t		*bp;
-
-	bp = xfs_buf_allocate(0);
-	if (unlikely(bp == NULL))
-		goto fail;
-	_xfs_buf_initialize(bp, target, 0, len, 0);
-
-	error = _xfs_buf_get_pages(bp, page_count, 0);
-	if (error)
-		goto fail_free_buf;
-
-	for (i = 0; i < page_count; i++) {
-		bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
-		if (!bp->b_pages[i])
-			goto fail_free_mem;
-	}
-	bp->b_flags |= _XBF_PAGES;
-
-	error = _xfs_buf_map_pages(bp, XBF_MAPPED);
-	if (unlikely(error)) {
-		xfs_warn(target->bt_mount,
-			"%s: failed to map pages\n", __func__);
-		goto fail_free_mem;
-	}
-
-	trace_xfs_buf_get_uncached(bp, _RET_IP_);
-	return bp;
-
- fail_free_mem:
-	while (--i >= 0)
-		__free_page(bp->b_pages[i]);
-	_xfs_buf_free_pages(bp);
- fail_free_buf:
-	xfs_buf_deallocate(bp);
- fail:
-	return NULL;
-}
-
-/*
- *	Increment reference count on buffer, to hold the buffer concurrently
- *	with another thread which may release (free) the buffer asynchronously.
- *	Must hold the buffer already to call this function.
- */
-void
-xfs_buf_hold(
-	xfs_buf_t		*bp)
-{
-	trace_xfs_buf_hold(bp, _RET_IP_);
-	atomic_inc(&bp->b_hold);
-}
-
-/*
- *	Releases a hold on the specified buffer.  If the
- *	the hold count is 1, calls xfs_buf_free.
- */
-void
-xfs_buf_rele(
-	xfs_buf_t		*bp)
-{
-	struct xfs_perag	*pag = bp->b_pag;
-
-	trace_xfs_buf_rele(bp, _RET_IP_);
-
-	if (!pag) {
-		ASSERT(list_empty(&bp->b_lru));
-		ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
-		if (atomic_dec_and_test(&bp->b_hold))
-			xfs_buf_free(bp);
-		return;
-	}
-
-	ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
-
-	ASSERT(atomic_read(&bp->b_hold) > 0);
-	if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
-		if (!(bp->b_flags & XBF_STALE) &&
-			   atomic_read(&bp->b_lru_ref)) {
-			xfs_buf_lru_add(bp);
-			spin_unlock(&pag->pag_buf_lock);
-		} else {
-			xfs_buf_lru_del(bp);
-			ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-			rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
-			spin_unlock(&pag->pag_buf_lock);
-			xfs_perag_put(pag);
-			xfs_buf_free(bp);
-		}
-	}
-}
-
-
-/*
- *	Lock a buffer object, if it is not already locked.
- *
- *	If we come across a stale, pinned, locked buffer, we know that we are
- *	being asked to lock a buffer that has been reallocated. Because it is
- *	pinned, we know that the log has not been pushed to disk and hence it
- *	will still be locked.  Rather than continuing to have trylock attempts
- *	fail until someone else pushes the log, push it ourselves before
- *	returning.  This means that the xfsaild will not get stuck trying
- *	to push on stale inode buffers.
- */
-int
-xfs_buf_trylock(
-	struct xfs_buf		*bp)
-{
-	int			locked;
-
-	locked = down_trylock(&bp->b_sema) == 0;
-	if (locked)
-		XB_SET_OWNER(bp);
-	else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-		xfs_log_force(bp->b_target->bt_mount, 0);
-
-	trace_xfs_buf_trylock(bp, _RET_IP_);
-	return locked;
-}
-
-/*
- *	Lock a buffer object.
- *
- *	If we come across a stale, pinned, locked buffer, we know that we
- *	are being asked to lock a buffer that has been reallocated. Because
- *	it is pinned, we know that the log has not been pushed to disk and
- *	hence it will still be locked. Rather than sleeping until someone
- *	else pushes the log, push it ourselves before trying to get the lock.
- */
-void
-xfs_buf_lock(
-	struct xfs_buf		*bp)
-{
-	trace_xfs_buf_lock(bp, _RET_IP_);
-
-	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-		xfs_log_force(bp->b_target->bt_mount, 0);
-	down(&bp->b_sema);
-	XB_SET_OWNER(bp);
-
-	trace_xfs_buf_lock_done(bp, _RET_IP_);
-}
-
-/*
- *	Releases the lock on the buffer object.
- *	If the buffer is marked delwri but is not queued, do so before we
- *	unlock the buffer as we need to set flags correctly.  We also need to
- *	take a reference for the delwri queue because the unlocker is going to
- *	drop their's and they don't know we just queued it.
- */
-void
-xfs_buf_unlock(
-	struct xfs_buf		*bp)
-{
-	if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
-		atomic_inc(&bp->b_hold);
-		bp->b_flags |= XBF_ASYNC;
-		xfs_buf_delwri_queue(bp, 0);
-	}
-
-	XB_CLEAR_OWNER(bp);
-	up(&bp->b_sema);
-
-	trace_xfs_buf_unlock(bp, _RET_IP_);
-}
-
-STATIC void
-xfs_buf_wait_unpin(
-	xfs_buf_t		*bp)
-{
-	DECLARE_WAITQUEUE	(wait, current);
-
-	if (atomic_read(&bp->b_pin_count) == 0)
-		return;
-
-	add_wait_queue(&bp->b_waiters, &wait);
-	for (;;) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&bp->b_pin_count) == 0)
-			break;
-		io_schedule();
-	}
-	remove_wait_queue(&bp->b_waiters, &wait);
-	set_current_state(TASK_RUNNING);
-}
-
-/*
- *	Buffer Utility Routines
- */
-
-STATIC void
-xfs_buf_iodone_work(
-	struct work_struct	*work)
-{
-	xfs_buf_t		*bp =
-		container_of(work, xfs_buf_t, b_iodone_work);
-
-	if (bp->b_iodone)
-		(*(bp->b_iodone))(bp);
-	else if (bp->b_flags & XBF_ASYNC)
-		xfs_buf_relse(bp);
-}
-
-void
-xfs_buf_ioend(
-	xfs_buf_t		*bp,
-	int			schedule)
-{
-	trace_xfs_buf_iodone(bp, _RET_IP_);
-
-	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
-	if (bp->b_error == 0)
-		bp->b_flags |= XBF_DONE;
-
-	if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
-		if (schedule) {
-			INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
-			queue_work(xfslogd_workqueue, &bp->b_iodone_work);
-		} else {
-			xfs_buf_iodone_work(&bp->b_iodone_work);
-		}
-	} else {
-		complete(&bp->b_iowait);
-	}
-}
-
-void
-xfs_buf_ioerror(
-	xfs_buf_t		*bp,
-	int			error)
-{
-	ASSERT(error >= 0 && error <= 0xffff);
-	bp->b_error = (unsigned short)error;
-	trace_xfs_buf_ioerror(bp, error, _RET_IP_);
-}
-
-int
-xfs_bwrite(
-	struct xfs_mount	*mp,
-	struct xfs_buf		*bp)
-{
-	int			error;
-
-	bp->b_flags |= XBF_WRITE;
-	bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
-
-	xfs_buf_delwri_dequeue(bp);
-	xfs_bdstrat_cb(bp);
-
-	error = xfs_buf_iowait(bp);
-	if (error)
-		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
-	xfs_buf_relse(bp);
-	return error;
-}
-
-void
-xfs_bdwrite(
-	void			*mp,
-	struct xfs_buf		*bp)
-{
-	trace_xfs_buf_bdwrite(bp, _RET_IP_);
-
-	bp->b_flags &= ~XBF_READ;
-	bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
-
-	xfs_buf_delwri_queue(bp, 1);
-}
-
-/*
- * Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
- * so that the proper iodone callbacks get called.
- */
-STATIC int
-xfs_bioerror(
-	xfs_buf_t *bp)
-{
-#ifdef XFSERRORDEBUG
-	ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
-#endif
-
-	/*
-	 * No need to wait until the buffer is unpinned, we aren't flushing it.
-	 */
-	xfs_buf_ioerror(bp, EIO);
-
-	/*
-	 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
-	 */
-	XFS_BUF_UNREAD(bp);
-	XFS_BUF_UNDELAYWRITE(bp);
-	XFS_BUF_UNDONE(bp);
-	XFS_BUF_STALE(bp);
-
-	xfs_buf_ioend(bp, 0);
-
-	return EIO;
-}
-
-/*
- * Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the xfs_buf_ioend call.
- * This is meant for userdata errors; metadata bufs come with
- * iodone functions attached, so that we can track down errors.
- */
-STATIC int
-xfs_bioerror_relse(
-	struct xfs_buf	*bp)
-{
-	int64_t		fl = bp->b_flags;
-	/*
-	 * No need to wait until the buffer is unpinned.
-	 * We aren't flushing it.
-	 *
-	 * chunkhold expects B_DONE to be set, whether
-	 * we actually finish the I/O or not. We don't want to
-	 * change that interface.
-	 */
-	XFS_BUF_UNREAD(bp);
-	XFS_BUF_UNDELAYWRITE(bp);
-	XFS_BUF_DONE(bp);
-	XFS_BUF_STALE(bp);
-	bp->b_iodone = NULL;
-	if (!(fl & XBF_ASYNC)) {
-		/*
-		 * Mark b_error and B_ERROR _both_.
-		 * Lot's of chunkcache code assumes that.
-		 * There's no reason to mark error for
-		 * ASYNC buffers.
-		 */
-		xfs_buf_ioerror(bp, EIO);
-		XFS_BUF_FINISH_IOWAIT(bp);
-	} else {
-		xfs_buf_relse(bp);
-	}
-
-	return EIO;
-}
-
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
-xfs_bdstrat_cb(
-	struct xfs_buf	*bp)
-{
-	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
-		trace_xfs_bdstrat_shut(bp, _RET_IP_);
-		/*
-		 * Metadata write that didn't get logged but
-		 * written delayed anyway. These aren't associated
-		 * with a transaction, and can be ignored.
-		 */
-		if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
-			return xfs_bioerror_relse(bp);
-		else
-			return xfs_bioerror(bp);
-	}
-
-	xfs_buf_iorequest(bp);
-	return 0;
-}
-
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem.  Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
-	struct xfs_mount	*mp,
-	struct xfs_buf		*bp)
-{
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		trace_xfs_bdstrat_shut(bp, _RET_IP_);
-		xfs_bioerror_relse(bp);
-		return;
-	}
-
-	xfs_buf_iorequest(bp);
-}
-
-STATIC void
-_xfs_buf_ioend(
-	xfs_buf_t		*bp,
-	int			schedule)
-{
-	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
-		xfs_buf_ioend(bp, schedule);
-}
-
-STATIC void
-xfs_buf_bio_end_io(
-	struct bio		*bio,
-	int			error)
-{
-	xfs_buf_t		*bp = (xfs_buf_t *)bio->bi_private;
-
-	xfs_buf_ioerror(bp, -error);
-
-	if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
-		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
-
-	_xfs_buf_ioend(bp, 1);
-	bio_put(bio);
-}
-
-STATIC void
-_xfs_buf_ioapply(
-	xfs_buf_t		*bp)
-{
-	int			rw, map_i, total_nr_pages, nr_pages;
-	struct bio		*bio;
-	int			offset = bp->b_offset;
-	int			size = bp->b_count_desired;
-	sector_t		sector = bp->b_bn;
-
-	total_nr_pages = bp->b_page_count;
-	map_i = 0;
-
-	if (bp->b_flags & XBF_WRITE) {
-		if (bp->b_flags & XBF_SYNCIO)
-			rw = WRITE_SYNC;
-		else
-			rw = WRITE;
-		if (bp->b_flags & XBF_FUA)
-			rw |= REQ_FUA;
-		if (bp->b_flags & XBF_FLUSH)
-			rw |= REQ_FLUSH;
-	} else if (bp->b_flags & XBF_READ_AHEAD) {
-		rw = READA;
-	} else {
-		rw = READ;
-	}
-
-	/* we only use the buffer cache for meta-data */
-	rw |= REQ_META;
-
-next_chunk:
-	atomic_inc(&bp->b_io_remaining);
-	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
-	if (nr_pages > total_nr_pages)
-		nr_pages = total_nr_pages;
-
-	bio = bio_alloc(GFP_NOIO, nr_pages);
-	bio->bi_bdev = bp->b_target->bt_bdev;
-	bio->bi_sector = sector;
-	bio->bi_end_io = xfs_buf_bio_end_io;
-	bio->bi_private = bp;
-
-
-	for (; size && nr_pages; nr_pages--, map_i++) {
-		int	rbytes, nbytes = PAGE_SIZE - offset;
-
-		if (nbytes > size)
-			nbytes = size;
-
-		rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
-		if (rbytes < nbytes)
-			break;
-
-		offset = 0;
-		sector += nbytes >> BBSHIFT;
-		size -= nbytes;
-		total_nr_pages--;
-	}
-
-	if (likely(bio->bi_size)) {
-		if (xfs_buf_is_vmapped(bp)) {
-			flush_kernel_vmap_range(bp->b_addr,
-						xfs_buf_vmap_len(bp));
-		}
-		submit_bio(rw, bio);
-		if (size)
-			goto next_chunk;
-	} else {
-		xfs_buf_ioerror(bp, EIO);
-		bio_put(bio);
-	}
-}
-
-int
-xfs_buf_iorequest(
-	xfs_buf_t		*bp)
-{
-	trace_xfs_buf_iorequest(bp, _RET_IP_);
-
-	if (bp->b_flags & XBF_DELWRI) {
-		xfs_buf_delwri_queue(bp, 1);
-		return 0;
-	}
-
-	if (bp->b_flags & XBF_WRITE) {
-		xfs_buf_wait_unpin(bp);
-	}
-
-	xfs_buf_hold(bp);
-
-	/* Set the count to 1 initially, this will stop an I/O
-	 * completion callout which happens before we have started
-	 * all the I/O from calling xfs_buf_ioend too early.
-	 */
-	atomic_set(&bp->b_io_remaining, 1);
-	_xfs_buf_ioapply(bp);
-	_xfs_buf_ioend(bp, 0);
-
-	xfs_buf_rele(bp);
-	return 0;
-}
-
-/*
- *	Waits for I/O to complete on the buffer supplied.
- *	It returns immediately if no I/O is pending.
- *	It returns the I/O error code, if any, or 0 if there was no error.
- */
-int
-xfs_buf_iowait(
-	xfs_buf_t		*bp)
-{
-	trace_xfs_buf_iowait(bp, _RET_IP_);
-
-	wait_for_completion(&bp->b_iowait);
-
-	trace_xfs_buf_iowait_done(bp, _RET_IP_);
-	return bp->b_error;
-}
-
-xfs_caddr_t
-xfs_buf_offset(
-	xfs_buf_t		*bp,
-	size_t			offset)
-{
-	struct page		*page;
-
-	if (bp->b_flags & XBF_MAPPED)
-		return bp->b_addr + offset;
-
-	offset += bp->b_offset;
-	page = bp->b_pages[offset >> PAGE_SHIFT];
-	return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
-}
-
-/*
- *	Move data into or out of a buffer.
- */
-void
-xfs_buf_iomove(
-	xfs_buf_t		*bp,	/* buffer to process		*/
-	size_t			boff,	/* starting buffer offset	*/
-	size_t			bsize,	/* length to copy		*/
-	void			*data,	/* data address			*/
-	xfs_buf_rw_t		mode)	/* read/write/zero flag		*/
-{
-	size_t			bend, cpoff, csize;
-	struct page		*page;
-
-	bend = boff + bsize;
-	while (boff < bend) {
-		page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
-		cpoff = xfs_buf_poff(boff + bp->b_offset);
-		csize = min_t(size_t,
-			      PAGE_SIZE-cpoff, bp->b_count_desired-boff);
-
-		ASSERT(((csize + cpoff) <= PAGE_SIZE));
-
-		switch (mode) {
-		case XBRW_ZERO:
-			memset(page_address(page) + cpoff, 0, csize);
-			break;
-		case XBRW_READ:
-			memcpy(data, page_address(page) + cpoff, csize);
-			break;
-		case XBRW_WRITE:
-			memcpy(page_address(page) + cpoff, data, csize);
-		}
-
-		boff += csize;
-		data += csize;
-	}
-}
-
-/*
- *	Handling of buffer targets (buftargs).
- */
-
-/*
- * Wait for any bufs with callbacks that have been submitted but have not yet
- * returned. These buffers will have an elevated hold count, so wait on those
- * while freeing all the buffers only held by the LRU.
- */
-void
-xfs_wait_buftarg(
-	struct xfs_buftarg	*btp)
-{
-	struct xfs_buf		*bp;
-
-restart:
-	spin_lock(&btp->bt_lru_lock);
-	while (!list_empty(&btp->bt_lru)) {
-		bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-		if (atomic_read(&bp->b_hold) > 1) {
-			spin_unlock(&btp->bt_lru_lock);
-			delay(100);
-			goto restart;
-		}
-		/*
-		 * clear the LRU reference count so the bufer doesn't get
-		 * ignored in xfs_buf_rele().
-		 */
-		atomic_set(&bp->b_lru_ref, 0);
-		spin_unlock(&btp->bt_lru_lock);
-		xfs_buf_rele(bp);
-		spin_lock(&btp->bt_lru_lock);
-	}
-	spin_unlock(&btp->bt_lru_lock);
-}
-
-int
-xfs_buftarg_shrink(
-	struct shrinker		*shrink,
-	struct shrink_control	*sc)
-{
-	struct xfs_buftarg	*btp = container_of(shrink,
-					struct xfs_buftarg, bt_shrinker);
-	struct xfs_buf		*bp;
-	int nr_to_scan = sc->nr_to_scan;
-	LIST_HEAD(dispose);
-
-	if (!nr_to_scan)
-		return btp->bt_lru_nr;
-
-	spin_lock(&btp->bt_lru_lock);
-	while (!list_empty(&btp->bt_lru)) {
-		if (nr_to_scan-- <= 0)
-			break;
-
-		bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-
-		/*
-		 * Decrement the b_lru_ref count unless the value is already
-		 * zero. If the value is already zero, we need to reclaim the
-		 * buffer, otherwise it gets another trip through the LRU.
-		 */
-		if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
-			list_move_tail(&bp->b_lru, &btp->bt_lru);
-			continue;
-		}
-
-		/*
-		 * remove the buffer from the LRU now to avoid needing another
-		 * lock round trip inside xfs_buf_rele().
-		 */
-		list_move(&bp->b_lru, &dispose);
-		btp->bt_lru_nr--;
-	}
-	spin_unlock(&btp->bt_lru_lock);
-
-	while (!list_empty(&dispose)) {
-		bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
-		list_del_init(&bp->b_lru);
-		xfs_buf_rele(bp);
-	}
-
-	return btp->bt_lru_nr;
-}
-
-void
-xfs_free_buftarg(
-	struct xfs_mount	*mp,
-	struct xfs_buftarg	*btp)
-{
-	unregister_shrinker(&btp->bt_shrinker);
-
-	xfs_flush_buftarg(btp, 1);
-	if (mp->m_flags & XFS_MOUNT_BARRIER)
-		xfs_blkdev_issue_flush(btp);
-
-	kthread_stop(btp->bt_task);
-	kmem_free(btp);
-}
-
-STATIC int
-xfs_setsize_buftarg_flags(
-	xfs_buftarg_t		*btp,
-	unsigned int		blocksize,
-	unsigned int		sectorsize,
-	int			verbose)
-{
-	btp->bt_bsize = blocksize;
-	btp->bt_sshift = ffs(sectorsize) - 1;
-	btp->bt_smask = sectorsize - 1;
-
-	if (set_blocksize(btp->bt_bdev, sectorsize)) {
-		xfs_warn(btp->bt_mount,
-			"Cannot set_blocksize to %u on device %s\n",
-			sectorsize, xfs_buf_target_name(btp));
-		return EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- *	When allocating the initial buffer target we have not yet
- *	read in the superblock, so don't know what sized sectors
- *	are being used is at this early stage.  Play safe.
- */
-STATIC int
-xfs_setsize_buftarg_early(
-	xfs_buftarg_t		*btp,
-	struct block_device	*bdev)
-{
-	return xfs_setsize_buftarg_flags(btp,
-			PAGE_SIZE, bdev_logical_block_size(bdev), 0);
-}
-
-int
-xfs_setsize_buftarg(
-	xfs_buftarg_t		*btp,
-	unsigned int		blocksize,
-	unsigned int		sectorsize)
-{
-	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
-}
-
-STATIC int
-xfs_alloc_delwrite_queue(
-	xfs_buftarg_t		*btp,
-	const char		*fsname)
-{
-	INIT_LIST_HEAD(&btp->bt_delwrite_queue);
-	spin_lock_init(&btp->bt_delwrite_lock);
-	btp->bt_flags = 0;
-	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
-	if (IS_ERR(btp->bt_task))
-		return PTR_ERR(btp->bt_task);
-	return 0;
-}
-
-xfs_buftarg_t *
-xfs_alloc_buftarg(
-	struct xfs_mount	*mp,
-	struct block_device	*bdev,
-	int			external,
-	const char		*fsname)
-{
-	xfs_buftarg_t		*btp;
-
-	btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
-
-	btp->bt_mount = mp;
-	btp->bt_dev =  bdev->bd_dev;
-	btp->bt_bdev = bdev;
-	btp->bt_bdi = blk_get_backing_dev_info(bdev);
-	if (!btp->bt_bdi)
-		goto error;
-
-	INIT_LIST_HEAD(&btp->bt_lru);
-	spin_lock_init(&btp->bt_lru_lock);
-	if (xfs_setsize_buftarg_early(btp, bdev))
-		goto error;
-	if (xfs_alloc_delwrite_queue(btp, fsname))
-		goto error;
-	btp->bt_shrinker.shrink = xfs_buftarg_shrink;
-	btp->bt_shrinker.seeks = DEFAULT_SEEKS;
-	register_shrinker(&btp->bt_shrinker);
-	return btp;
-
-error:
-	kmem_free(btp);
-	return NULL;
-}
-
-
-/*
- *	Delayed write buffer handling
- */
-STATIC void
-xfs_buf_delwri_queue(
-	xfs_buf_t		*bp,
-	int			unlock)
-{
-	struct list_head	*dwq = &bp->b_target->bt_delwrite_queue;
-	spinlock_t		*dwlk = &bp->b_target->bt_delwrite_lock;
-
-	trace_xfs_buf_delwri_queue(bp, _RET_IP_);
-
-	ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
-
-	spin_lock(dwlk);
-	/* If already in the queue, dequeue and place at tail */
-	if (!list_empty(&bp->b_list)) {
-		ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-		if (unlock)
-			atomic_dec(&bp->b_hold);
-		list_del(&bp->b_list);
-	}
-
-	if (list_empty(dwq)) {
-		/* start xfsbufd as it is about to have something to do */
-		wake_up_process(bp->b_target->bt_task);
-	}
-
-	bp->b_flags |= _XBF_DELWRI_Q;
-	list_add_tail(&bp->b_list, dwq);
-	bp->b_queuetime = jiffies;
-	spin_unlock(dwlk);
-
-	if (unlock)
-		xfs_buf_unlock(bp);
-}
-
-void
-xfs_buf_delwri_dequeue(
-	xfs_buf_t		*bp)
-{
-	spinlock_t		*dwlk = &bp->b_target->bt_delwrite_lock;
-	int			dequeued = 0;
-
-	spin_lock(dwlk);
-	if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
-		ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-		list_del_init(&bp->b_list);
-		dequeued = 1;
-	}
-	bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-	spin_unlock(dwlk);
-
-	if (dequeued)
-		xfs_buf_rele(bp);
-
-	trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
-}
-
-/*
- * If a delwri buffer needs to be pushed before it has aged out, then promote
- * it to the head of the delwri queue so that it will be flushed on the next
- * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
- * than the age currently needed to flush the buffer. Hence the next time the
- * xfsbufd sees it is guaranteed to be considered old enough to flush.
- */
-void
-xfs_buf_delwri_promote(
-	struct xfs_buf	*bp)
-{
-	struct xfs_buftarg *btp = bp->b_target;
-	long		age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
-
-	ASSERT(bp->b_flags & XBF_DELWRI);
-	ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
-	/*
-	 * Check the buffer age before locking the delayed write queue as we
-	 * don't need to promote buffers that are already past the flush age.
-	 */
-	if (bp->b_queuetime < jiffies - age)
-		return;
-	bp->b_queuetime = jiffies - age;
-	spin_lock(&btp->bt_delwrite_lock);
-	list_move(&bp->b_list, &btp->bt_delwrite_queue);
-	spin_unlock(&btp->bt_delwrite_lock);
-}
-
-STATIC void
-xfs_buf_runall_queues(
-	struct workqueue_struct	*queue)
-{
-	flush_workqueue(queue);
-}
-
-/*
- * Move as many buffers as specified to the supplied list
- * idicating if we skipped any buffers to prevent deadlocks.
- */
-STATIC int
-xfs_buf_delwri_split(
-	xfs_buftarg_t	*target,
-	struct list_head *list,
-	unsigned long	age)
-{
-	xfs_buf_t	*bp, *n;
-	struct list_head *dwq = &target->bt_delwrite_queue;
-	spinlock_t	*dwlk = &target->bt_delwrite_lock;
-	int		skipped = 0;
-	int		force;
-
-	force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-	INIT_LIST_HEAD(list);
-	spin_lock(dwlk);
-	list_for_each_entry_safe(bp, n, dwq, b_list) {
-		ASSERT(bp->b_flags & XBF_DELWRI);
-
-		if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
-			if (!force &&
-			    time_before(jiffies, bp->b_queuetime + age)) {
-				xfs_buf_unlock(bp);
-				break;
-			}
-
-			bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
-			bp->b_flags |= XBF_WRITE;
-			list_move_tail(&bp->b_list, list);
-			trace_xfs_buf_delwri_split(bp, _RET_IP_);
-		} else
-			skipped++;
-	}
-	spin_unlock(dwlk);
-
-	return skipped;
-
-}
-
-/*
- * Compare function is more complex than it needs to be because
- * the return value is only 32 bits and we are doing comparisons
- * on 64 bit values
- */
-static int
-xfs_buf_cmp(
-	void		*priv,
-	struct list_head *a,
-	struct list_head *b)
-{
-	struct xfs_buf	*ap = container_of(a, struct xfs_buf, b_list);
-	struct xfs_buf	*bp = container_of(b, struct xfs_buf, b_list);
-	xfs_daddr_t		diff;
-
-	diff = ap->b_bn - bp->b_bn;
-	if (diff < 0)
-		return -1;
-	if (diff > 0)
-		return 1;
-	return 0;
-}
-
-STATIC int
-xfsbufd(
-	void		*data)
-{
-	xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
-
-	current->flags |= PF_MEMALLOC;
-
-	set_freezable();
-
-	do {
-		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
-		long	tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
-		struct list_head tmp;
-		struct blk_plug plug;
-
-		if (unlikely(freezing(current))) {
-			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-			refrigerator();
-		} else {
-			clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-		}
-
-		/* sleep for a long time if there is nothing to do. */
-		if (list_empty(&target->bt_delwrite_queue))
-			tout = MAX_SCHEDULE_TIMEOUT;
-		schedule_timeout_interruptible(tout);
-
-		xfs_buf_delwri_split(target, &tmp, age);
-		list_sort(NULL, &tmp, xfs_buf_cmp);
-
-		blk_start_plug(&plug);
-		while (!list_empty(&tmp)) {
-			struct xfs_buf *bp;
-			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
-			list_del_init(&bp->b_list);
-			xfs_bdstrat_cb(bp);
-		}
-		blk_finish_plug(&plug);
-	} while (!kthread_should_stop());
-
-	return 0;
-}
-
-/*
- *	Go through all incore buffers, and release buffers if they belong to
- *	the given device. This is used in filesystem error handling to
- *	preserve the consistency of its metadata.
- */
-int
-xfs_flush_buftarg(
-	xfs_buftarg_t	*target,
-	int		wait)
-{
-	xfs_buf_t	*bp;
-	int		pincount = 0;
-	LIST_HEAD(tmp_list);
-	LIST_HEAD(wait_list);
-	struct blk_plug plug;
-
-	xfs_buf_runall_queues(xfsconvertd_workqueue);
-	xfs_buf_runall_queues(xfsdatad_workqueue);
-	xfs_buf_runall_queues(xfslogd_workqueue);
-
-	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-	pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
-
-	/*
-	 * Dropped the delayed write list lock, now walk the temporary list.
-	 * All I/O is issued async and then if we need to wait for completion
-	 * we do that after issuing all the IO.
-	 */
-	list_sort(NULL, &tmp_list, xfs_buf_cmp);
-
-	blk_start_plug(&plug);
-	while (!list_empty(&tmp_list)) {
-		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
-		ASSERT(target == bp->b_target);
-		list_del_init(&bp->b_list);
-		if (wait) {
-			bp->b_flags &= ~XBF_ASYNC;
-			list_add(&bp->b_list, &wait_list);
-		}
-		xfs_bdstrat_cb(bp);
-	}
-	blk_finish_plug(&plug);
-
-	if (wait) {
-		/* Wait for IO to complete. */
-		while (!list_empty(&wait_list)) {
-			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
-
-			list_del_init(&bp->b_list);
-			xfs_buf_iowait(bp);
-			xfs_buf_relse(bp);
-		}
-	}
-
-	return pincount;
-}
-
-int __init
-xfs_buf_init(void)
-{
-	xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
-						KM_ZONE_HWALIGN, NULL);
-	if (!xfs_buf_zone)
-		goto out;
-
-	xfslogd_workqueue = alloc_workqueue("xfslogd",
-					WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
-	if (!xfslogd_workqueue)
-		goto out_free_buf_zone;
-
-	xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
-	if (!xfsdatad_workqueue)
-		goto out_destroy_xfslogd_workqueue;
-
-	xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
-						WQ_MEM_RECLAIM, 1);
-	if (!xfsconvertd_workqueue)
-		goto out_destroy_xfsdatad_workqueue;
-
-	return 0;
-
- out_destroy_xfsdatad_workqueue:
-	destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
-	destroy_workqueue(xfslogd_workqueue);
- out_free_buf_zone:
-	kmem_zone_destroy(xfs_buf_zone);
- out:
-	return -ENOMEM;
-}
-
-void
-xfs_buf_terminate(void)
-{
-	destroy_workqueue(xfsconvertd_workqueue);
-	destroy_workqueue(xfsdatad_workqueue);
-	destroy_workqueue(xfslogd_workqueue);
-	kmem_zone_destroy(xfs_buf_zone);
-}
-
-#ifdef CONFIG_KDB_MODULES
-struct list_head *
-xfs_get_buftarg_list(void)
-{
-	return &xfs_buftarg_list;
-}
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
deleted file mode 100644
index 620972b..0000000
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_BUF_H__
-#define __XFS_BUF_H__
-
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/uio.h>
-
-/*
- *	Base types
- */
-
-#define XFS_BUF_DADDR_NULL	((xfs_daddr_t) (-1LL))
-
-#define xfs_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
-#define xfs_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_btoct(dd)	((dd) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_poff(aa)	((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum {
-	XBRW_READ = 1,			/* transfer into target memory */
-	XBRW_WRITE = 2,			/* transfer from target memory */
-	XBRW_ZERO = 3,			/* Zero target memory */
-} xfs_buf_rw_t;
-
-#define XBF_READ	(1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE	(1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD	(1 << 2) /* asynchronous read-ahead */
-#define XBF_MAPPED	(1 << 3) /* buffer mapped (b_addr valid) */
-#define XBF_ASYNC	(1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE	(1 << 5) /* all pages in the buffer uptodate */
-#define XBF_DELWRI	(1 << 6) /* buffer has dirty pages */
-#define XBF_STALE	(1 << 7) /* buffer has been staled, do not find it */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO	(1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA		(1 << 11)/* force cache write through mode */
-#define XBF_FLUSH	(1 << 12)/* flush the disk cache before a write */
-
-/* flags used only as arguments to access routines */
-#define XBF_LOCK	(1 << 15)/* lock requested */
-#define XBF_TRYLOCK	(1 << 16)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK	(1 << 17)/* do not block in current thread */
-
-/* flags used only internally */
-#define _XBF_PAGES	(1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM	(1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q	(1 << 22)/* buffer on delwri queue */
-
-typedef unsigned int xfs_buf_flags_t;
-
-#define XFS_BUF_FLAGS \
-	{ XBF_READ,		"READ" }, \
-	{ XBF_WRITE,		"WRITE" }, \
-	{ XBF_READ_AHEAD,	"READ_AHEAD" }, \
-	{ XBF_MAPPED,		"MAPPED" }, \
-	{ XBF_ASYNC,		"ASYNC" }, \
-	{ XBF_DONE,		"DONE" }, \
-	{ XBF_DELWRI,		"DELWRI" }, \
-	{ XBF_STALE,		"STALE" }, \
-	{ XBF_SYNCIO,		"SYNCIO" }, \
-	{ XBF_FUA,		"FUA" }, \
-	{ XBF_FLUSH,		"FLUSH" }, \
-	{ XBF_LOCK,		"LOCK" },  	/* should never be set */\
-	{ XBF_TRYLOCK,		"TRYLOCK" }, 	/* ditto */\
-	{ XBF_DONT_BLOCK,	"DONT_BLOCK" },	/* ditto */\
-	{ _XBF_PAGES,		"PAGES" }, \
-	{ _XBF_KMEM,		"KMEM" }, \
-	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
-
-typedef enum {
-	XBT_FORCE_SLEEP = 0,
-	XBT_FORCE_FLUSH = 1,
-} xfs_buftarg_flags_t;
-
-typedef struct xfs_buftarg {
-	dev_t			bt_dev;
-	struct block_device	*bt_bdev;
-	struct backing_dev_info	*bt_bdi;
-	struct xfs_mount	*bt_mount;
-	unsigned int		bt_bsize;
-	unsigned int		bt_sshift;
-	size_t			bt_smask;
-
-	/* per device delwri queue */
-	struct task_struct	*bt_task;
-	struct list_head	bt_delwrite_queue;
-	spinlock_t		bt_delwrite_lock;
-	unsigned long		bt_flags;
-
-	/* LRU control structures */
-	struct shrinker		bt_shrinker;
-	struct list_head	bt_lru;
-	spinlock_t		bt_lru_lock;
-	unsigned int		bt_lru_nr;
-} xfs_buftarg_t;
-
-struct xfs_buf;
-typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
-
-#define XB_PAGES	2
-
-typedef struct xfs_buf {
-	/*
-	 * first cacheline holds all the fields needed for an uncontended cache
-	 * hit to be fully processed. The semaphore straddles the cacheline
-	 * boundary, but the counter and lock sits on the first cacheline,
-	 * which is the only bit that is touched if we hit the semaphore
-	 * fast-path on locking.
-	 */
-	struct rb_node		b_rbnode;	/* rbtree node */
-	xfs_off_t		b_file_offset;	/* offset in file */
-	size_t			b_buffer_length;/* size of buffer in bytes */
-	atomic_t		b_hold;		/* reference count */
-	atomic_t		b_lru_ref;	/* lru reclaim ref count */
-	xfs_buf_flags_t		b_flags;	/* status flags */
-	struct semaphore	b_sema;		/* semaphore for lockables */
-
-	struct list_head	b_lru;		/* lru list */
-	wait_queue_head_t	b_waiters;	/* unpin waiters */
-	struct list_head	b_list;
-	struct xfs_perag	*b_pag;		/* contains rbtree root */
-	xfs_buftarg_t		*b_target;	/* buffer target (device) */
-	xfs_daddr_t		b_bn;		/* block number for I/O */
-	size_t			b_count_desired;/* desired transfer size */
-	void			*b_addr;	/* virtual address of buffer */
-	struct work_struct	b_iodone_work;
-	xfs_buf_iodone_t	b_iodone;	/* I/O completion function */
-	struct completion	b_iowait;	/* queue for I/O waiters */
-	void			*b_fspriv;
-	struct xfs_trans	*b_transp;
-	struct page		**b_pages;	/* array of page pointers */
-	struct page		*b_page_array[XB_PAGES]; /* inline pages */
-	unsigned long		b_queuetime;	/* time buffer was queued */
-	atomic_t		b_pin_count;	/* pin count */
-	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
-	unsigned int		b_page_count;	/* size of page array */
-	unsigned int		b_offset;	/* page offset in first page */
-	unsigned short		b_error;	/* error code on I/O */
-#ifdef XFS_BUF_LOCK_TRACKING
-	int			b_last_holder;
-#endif
-} xfs_buf_t;
-
-
-/* Finding and Reading Buffers */
-extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
-				xfs_buf_flags_t, xfs_buf_t *);
-#define xfs_incore(buftarg,blkno,len,lockit) \
-	_xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-
-extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
-				xfs_buf_flags_t);
-extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
-				xfs_buf_flags_t);
-
-extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
-extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
-extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
-extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
-struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
-				struct xfs_buftarg *target,
-				xfs_daddr_t daddr, size_t length, int flags);
-
-/* Releasing Buffers */
-extern void xfs_buf_free(xfs_buf_t *);
-extern void xfs_buf_rele(xfs_buf_t *);
-
-/* Locking and Unlocking Buffers */
-extern int xfs_buf_trylock(xfs_buf_t *);
-extern void xfs_buf_lock(xfs_buf_t *);
-extern void xfs_buf_unlock(xfs_buf_t *);
-#define xfs_buf_islocked(bp) \
-	((bp)->b_sema.count <= 0)
-
-/* Buffer Read and Write Routines */
-extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
-extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
-
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
-
-extern void xfs_buf_ioend(xfs_buf_t *,	int);
-extern void xfs_buf_ioerror(xfs_buf_t *, int);
-extern int xfs_buf_iorequest(xfs_buf_t *);
-extern int xfs_buf_iowait(xfs_buf_t *);
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
-				xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
-	    xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-static inline int xfs_buf_geterror(xfs_buf_t *bp)
-{
-	return bp ? bp->b_error : ENOMEM;
-}
-
-/* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-
-/* Delayed Write Buffer Routines */
-extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
-extern void xfs_buf_delwri_promote(xfs_buf_t *);
-
-/* Buffer Daemon Setup Routines */
-extern int xfs_buf_init(void);
-extern void xfs_buf_terminate(void);
-
-static inline const char *
-xfs_buf_target_name(struct xfs_buftarg *target)
-{
-	static char __b[BDEVNAME_SIZE];
-
-	return bdevname(target->bt_bdev, __b);
-}
-
-
-#define XFS_BUF_ZEROFLAGS(bp) \
-	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
-			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
-
-void xfs_buf_stale(struct xfs_buf *bp);
-#define XFS_BUF_STALE(bp)	xfs_buf_stale(bp);
-#define XFS_BUF_UNSTALE(bp)	((bp)->b_flags &= ~XBF_STALE)
-#define XFS_BUF_ISSTALE(bp)	((bp)->b_flags & XBF_STALE)
-#define XFS_BUF_SUPER_STALE(bp)	do {				\
-					XFS_BUF_STALE(bp);	\
-					xfs_buf_delwri_dequeue(bp);	\
-					XFS_BUF_DONE(bp);	\
-				} while (0)
-
-#define XFS_BUF_DELAYWRITE(bp)		((bp)->b_flags |= XBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(bp)	xfs_buf_delwri_dequeue(bp)
-#define XFS_BUF_ISDELAYWRITE(bp)	((bp)->b_flags & XBF_DELWRI)
-
-#define XFS_BUF_DONE(bp)	((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp)	((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp)	((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_ASYNC(bp)	((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp)	((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp)	((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_READ(bp)	((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp)	((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp)	((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp)	((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp)	((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp)	((bp)->b_flags & XBF_WRITE)
-
-#define XFS_BUF_ADDR(bp)		((bp)->b_bn)
-#define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_bn = (xfs_daddr_t)(bno))
-#define XFS_BUF_OFFSET(bp)		((bp)->b_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off)	((bp)->b_file_offset = (off))
-#define XFS_BUF_COUNT(bp)		((bp)->b_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt)	((bp)->b_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp)		((bp)->b_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt)	((bp)->b_buffer_length = (cnt))
-
-static inline void
-xfs_buf_set_ref(
-	struct xfs_buf	*bp,
-	int		lru_ref)
-{
-	atomic_set(&bp->b_lru_ref, lru_ref);
-}
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)	xfs_buf_set_ref(bp, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)		do { } while (0)
-
-static inline int xfs_buf_ispinned(struct xfs_buf *bp)
-{
-	return atomic_read(&bp->b_pin_count);
-}
-
-#define XFS_BUF_FINISH_IOWAIT(bp)	complete(&bp->b_iowait);
-
-static inline void xfs_buf_relse(xfs_buf_t *bp)
-{
-	xfs_buf_unlock(bp);
-	xfs_buf_rele(bp);
-}
-
-/*
- *	Handling of buftargs.
- */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
-			struct block_device *, int, const char *);
-extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
-extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
-extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-
-#ifdef CONFIG_KDB_MODULES
-extern struct list_head *xfs_get_buftarg_list(void);
-#endif
-
-#define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
-
-#define xfs_binval(buftarg)		xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg)		xfs_flush_buftarg(buftarg, 1)
-
-#endif	/* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
deleted file mode 100644
index 244e797..0000000
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (C) 2010 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_discard.h"
-#include "xfs_trace.h"
-
-STATIC int
-xfs_trim_extents(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		agno,
-	xfs_fsblock_t		start,
-	xfs_fsblock_t		len,
-	xfs_fsblock_t		minlen,
-	__uint64_t		*blocks_trimmed)
-{
-	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
-	struct xfs_btree_cur	*cur;
-	struct xfs_buf		*agbp;
-	struct xfs_perag	*pag;
-	int			error;
-	int			i;
-
-	pag = xfs_perag_get(mp, agno);
-
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
-	if (error || !agbp)
-		goto out_put_perag;
-
-	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
-
-	/*
-	 * Force out the log.  This means any transactions that might have freed
-	 * space before we took the AGF buffer lock are now on disk, and the
-	 * volatile disk cache is flushed.
-	 */
-	xfs_log_force(mp, XFS_LOG_SYNC);
-
-	/*
-	 * Look up the longest btree in the AGF and start with it.
-	 */
-	error = xfs_alloc_lookup_le(cur, 0,
-				    XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
-	if (error)
-		goto out_del_cursor;
-
-	/*
-	 * Loop until we are done with all extents that are large
-	 * enough to be worth discarding.
-	 */
-	while (i) {
-		xfs_agblock_t fbno;
-		xfs_extlen_t flen;
-
-		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
-		if (error)
-			goto out_del_cursor;
-		XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
-		ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
-
-		/*
-		 * Too small?  Give up.
-		 */
-		if (flen < minlen) {
-			trace_xfs_discard_toosmall(mp, agno, fbno, flen);
-			goto out_del_cursor;
-		}
-
-		/*
-		 * If the extent is entirely outside of the range we are
-		 * supposed to discard skip it.  Do not bother to trim
-		 * down partially overlapping ranges for now.
-		 */
-		if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
-		    XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
-			trace_xfs_discard_exclude(mp, agno, fbno, flen);
-			goto next_extent;
-		}
-
-		/*
-		 * If any blocks in the range are still busy, skip the
-		 * discard and try again the next time.
-		 */
-		if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
-			trace_xfs_discard_busy(mp, agno, fbno, flen);
-			goto next_extent;
-		}
-
-		trace_xfs_discard_extent(mp, agno, fbno, flen);
-		error = -blkdev_issue_discard(bdev,
-				XFS_AGB_TO_DADDR(mp, agno, fbno),
-				XFS_FSB_TO_BB(mp, flen),
-				GFP_NOFS, 0);
-		if (error)
-			goto out_del_cursor;
-		*blocks_trimmed += flen;
-
-next_extent:
-		error = xfs_btree_decrement(cur, 0, &i);
-		if (error)
-			goto out_del_cursor;
-	}
-
-out_del_cursor:
-	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-	xfs_buf_relse(agbp);
-out_put_perag:
-	xfs_perag_put(pag);
-	return error;
-}
-
-int
-xfs_ioc_trim(
-	struct xfs_mount		*mp,
-	struct fstrim_range __user	*urange)
-{
-	struct request_queue	*q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
-	unsigned int		granularity = q->limits.discard_granularity;
-	struct fstrim_range	range;
-	xfs_fsblock_t		start, len, minlen;
-	xfs_agnumber_t		start_agno, end_agno, agno;
-	__uint64_t		blocks_trimmed = 0;
-	int			error, last_error = 0;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-	if (!blk_queue_discard(q))
-		return -XFS_ERROR(EOPNOTSUPP);
-	if (copy_from_user(&range, urange, sizeof(range)))
-		return -XFS_ERROR(EFAULT);
-
-	/*
-	 * Truncating down the len isn't actually quite correct, but using
-	 * XFS_B_TO_FSB would mean we trivially get overflows for values
-	 * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
-	 * used by the fstrim application.  In the end it really doesn't
-	 * matter as trimming blocks is an advisory interface.
-	 */
-	start = XFS_B_TO_FSBT(mp, range.start);
-	len = XFS_B_TO_FSBT(mp, range.len);
-	minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
-
-	start_agno = XFS_FSB_TO_AGNO(mp, start);
-	if (start_agno >= mp->m_sb.sb_agcount)
-		return -XFS_ERROR(EINVAL);
-
-	end_agno = XFS_FSB_TO_AGNO(mp, start + len);
-	if (end_agno >= mp->m_sb.sb_agcount)
-		end_agno = mp->m_sb.sb_agcount - 1;
-
-	for (agno = start_agno; agno <= end_agno; agno++) {
-		error = -xfs_trim_extents(mp, agno, start, len, minlen,
-					  &blocks_trimmed);
-		if (error)
-			last_error = error;
-	}
-
-	if (last_error)
-		return last_error;
-
-	range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
-	if (copy_to_user(urange, &range, sizeof(range)))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-int
-xfs_discard_extents(
-	struct xfs_mount	*mp,
-	struct list_head	*list)
-{
-	struct xfs_busy_extent	*busyp;
-	int			error = 0;
-
-	list_for_each_entry(busyp, list, list) {
-		trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
-					 busyp->length);
-
-		error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
-				XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
-				XFS_FSB_TO_BB(mp, busyp->length),
-				GFP_NOFS, 0);
-		if (error && error != EOPNOTSUPP) {
-			xfs_info(mp,
-	 "discard failed for extent [0x%llu,%u], error %d",
-				 (unsigned long long)busyp->bno,
-				 busyp->length,
-				 error);
-			return error;
-		}
-	}
-
-	return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h
deleted file mode 100644
index 344879a..0000000
--- a/fs/xfs/linux-2.6/xfs_discard.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef XFS_DISCARD_H
-#define XFS_DISCARD_H 1
-
-struct fstrim_range;
-struct list_head;
-
-extern int	xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
-extern int	xfs_discard_extents(struct xfs_mount *, struct list_head *);
-
-#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
deleted file mode 100644
index 75e5d32..0000000
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_mount.h"
-#include "xfs_export.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-/*
- * Note that we only accept fileids which are long enough rather than allow
- * the parent generation number to default to zero.  XFS considers zero a
- * valid generation number not an invalid/wildcard value.
- */
-static int xfs_fileid_length(int fileid_type)
-{
-	switch (fileid_type) {
-	case FILEID_INO32_GEN:
-		return 2;
-	case FILEID_INO32_GEN_PARENT:
-		return 4;
-	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-		return 3;
-	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-		return 6;
-	}
-	return 255; /* invalid */
-}
-
-STATIC int
-xfs_fs_encode_fh(
-	struct dentry		*dentry,
-	__u32			*fh,
-	int			*max_len,
-	int			connectable)
-{
-	struct fid		*fid = (struct fid *)fh;
-	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fh;
-	struct inode		*inode = dentry->d_inode;
-	int			fileid_type;
-	int			len;
-
-	/* Directories don't need their parent encoded, they have ".." */
-	if (S_ISDIR(inode->i_mode) || !connectable)
-		fileid_type = FILEID_INO32_GEN;
-	else
-		fileid_type = FILEID_INO32_GEN_PARENT;
-
-	/*
-	 * If the the filesystem may contain 64bit inode numbers, we need
-	 * to use larger file handles that can represent them.
-	 *
-	 * While we only allocate inodes that do not fit into 32 bits any
-	 * large enough filesystem may contain them, thus the slightly
-	 * confusing looking conditional below.
-	 */
-	if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
-	    (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
-		fileid_type |= XFS_FILEID_TYPE_64FLAG;
-
-	/*
-	 * Only encode if there is enough space given.  In practice
-	 * this means we can't export a filesystem with 64bit inodes
-	 * over NFSv2 with the subtree_check export option; the other
-	 * seven combinations work.  The real answer is "don't use v2".
-	 */
-	len = xfs_fileid_length(fileid_type);
-	if (*max_len < len) {
-		*max_len = len;
-		return 255;
-	}
-	*max_len = len;
-
-	switch (fileid_type) {
-	case FILEID_INO32_GEN_PARENT:
-		spin_lock(&dentry->d_lock);
-		fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
-		fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
-		spin_unlock(&dentry->d_lock);
-		/*FALLTHRU*/
-	case FILEID_INO32_GEN:
-		fid->i32.ino = inode->i_ino;
-		fid->i32.gen = inode->i_generation;
-		break;
-	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-		spin_lock(&dentry->d_lock);
-		fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
-		fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
-		spin_unlock(&dentry->d_lock);
-		/*FALLTHRU*/
-	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-		fid64->ino = inode->i_ino;
-		fid64->gen = inode->i_generation;
-		break;
-	}
-
-	return fileid_type;
-}
-
-STATIC struct inode *
-xfs_nfs_get_inode(
-	struct super_block	*sb,
-	u64			ino,
-	u32			generation)
- {
- 	xfs_mount_t		*mp = XFS_M(sb);
-	xfs_inode_t		*ip;
-	int			error;
-
-	/*
-	 * NFS can sometimes send requests for ino 0.  Fail them gracefully.
-	 */
-	if (ino == 0)
-		return ERR_PTR(-ESTALE);
-
-	/*
-	 * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
-	 * fine and not an indication of a corrupted filesystem as clients can
-	 * send invalid file handles and we have to handle it gracefully..
-	 */
-	error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
-	if (error) {
-		/*
-		 * EINVAL means the inode cluster doesn't exist anymore.
-		 * This implies the filehandle is stale, so we should
-		 * translate it here.
-		 * We don't use ESTALE directly down the chain to not
-		 * confuse applications using bulkstat that expect EINVAL.
-		 */
-		if (error == EINVAL || error == ENOENT)
-			error = ESTALE;
-		return ERR_PTR(-error);
-	}
-
-	if (ip->i_d.di_gen != generation) {
-		IRELE(ip);
-		return ERR_PTR(-ESTALE);
-	}
-
-	return VFS_I(ip);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-		 int fh_len, int fileid_type)
-{
-	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fid;
-	struct inode		*inode = NULL;
-
-	if (fh_len < xfs_fileid_length(fileid_type))
-		return NULL;
-
-	switch (fileid_type) {
-	case FILEID_INO32_GEN_PARENT:
-	case FILEID_INO32_GEN:
-		inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
-		break;
-	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-		inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
-		break;
-	}
-
-	return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
-		 int fh_len, int fileid_type)
-{
-	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fid;
-	struct inode		*inode = NULL;
-
-	switch (fileid_type) {
-	case FILEID_INO32_GEN_PARENT:
-		inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
-					      fid->i32.parent_gen);
-		break;
-	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-		inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
-					      fid64->parent_gen);
-		break;
-	}
-
-	return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_get_parent(
-	struct dentry		*child)
-{
-	int			error;
-	struct xfs_inode	*cip;
-
-	error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
-	if (unlikely(error))
-		return ERR_PTR(-error);
-
-	return d_obtain_alias(VFS_I(cip));
-}
-
-STATIC int
-xfs_fs_nfs_commit_metadata(
-	struct inode		*inode)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	int			error = 0;
-
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	if (xfs_ipincount(ip)) {
-		error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
-				XFS_LOG_SYNC, NULL);
-	}
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-	return error;
-}
-
-const struct export_operations xfs_export_operations = {
-	.encode_fh		= xfs_fs_encode_fh,
-	.fh_to_dentry		= xfs_fs_fh_to_dentry,
-	.fh_to_parent		= xfs_fs_fh_to_parent,
-	.get_parent		= xfs_fs_get_parent,
-	.commit_metadata	= xfs_fs_nfs_commit_metadata,
-};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
deleted file mode 100644
index 3272b6a..0000000
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_EXPORT_H__
-#define __XFS_EXPORT_H__
-
-/*
- * Common defines for code related to exporting XFS filesystems over NFS.
- *
- * The NFS fileid goes out on the wire as an array of
- * 32bit unsigned ints in host order.  There are 5 possible
- * formats.
- *
- * (1)	fileid_type=0x00
- *	(no fileid data; handled by the generic code)
- *
- * (2)	fileid_type=0x01
- *	inode-num
- *	generation
- *
- * (3)	fileid_type=0x02
- *	inode-num
- *	generation
- *	parent-inode-num
- *	parent-generation
- *
- * (4)	fileid_type=0x81
- *	inode-num-lo32
- *	inode-num-hi32
- *	generation
- *
- * (5)	fileid_type=0x82
- *	inode-num-lo32
- *	inode-num-hi32
- *	generation
- *	parent-inode-num-lo32
- *	parent-inode-num-hi32
- *	parent-generation
- *
- * Note, the NFS filehandle also includes an fsid portion which
- * may have an inode number in it.  That number is hardcoded to
- * 32bits and there is no way for XFS to intercept it.  In
- * practice this means when exporting an XFS filesystem with 64bit
- * inodes you should either export the mountpoint (rather than
- * a subdirectory) or use the "fsid" export option.
- */
-
-struct xfs_fid64 {
-	u64 ino;
-	u32 gen;
-	u64 parent_ino;
-	u32 parent_gen;
-} __attribute__((packed));
-
-/* This flag goes on the wire.  Don't play with it. */
-#define XFS_FILEID_TYPE_64FLAG	0x80	/* NFS fileid has 64bit inodes */
-
-#endif	/* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
deleted file mode 100644
index 7f7b424..0000000
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ /dev/null
@@ -1,1096 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_vnodeops.h"
-#include "xfs_da_btree.h"
-#include "xfs_ioctl.h"
-#include "xfs_trace.h"
-
-#include <linux/dcache.h>
-#include <linux/falloc.h>
-
-static const struct vm_operations_struct xfs_file_vm_ops;
-
-/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
-	struct xfs_inode	*ip,
-	int			type)
-{
-	if (type & XFS_IOLOCK_EXCL)
-		mutex_lock(&VFS_I(ip)->i_mutex);
-	xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
-	struct xfs_inode	*ip,
-	int			type)
-{
-	xfs_iunlock(ip, type);
-	if (type & XFS_IOLOCK_EXCL)
-		mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-static inline void
-xfs_rw_ilock_demote(
-	struct xfs_inode	*ip,
-	int			type)
-{
-	xfs_ilock_demote(ip, type);
-	if (type & XFS_IOLOCK_EXCL)
-		mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-/*
- *	xfs_iozero
- *
- *	xfs_iozero clears the specified range of buffer supplied,
- *	and marks all the affected blocks as valid and modified.  If
- *	an affected block is not allocated, it will be allocated.  If
- *	an affected block is not completely overwritten, and is not
- *	valid before the operation, it will be read from disk before
- *	being partially zeroed.
- */
-STATIC int
-xfs_iozero(
-	struct xfs_inode	*ip,	/* inode			*/
-	loff_t			pos,	/* offset in file		*/
-	size_t			count)	/* size of data to zero		*/
-{
-	struct page		*page;
-	struct address_space	*mapping;
-	int			status;
-
-	mapping = VFS_I(ip)->i_mapping;
-	do {
-		unsigned offset, bytes;
-		void *fsdata;
-
-		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-		bytes = PAGE_CACHE_SIZE - offset;
-		if (bytes > count)
-			bytes = count;
-
-		status = pagecache_write_begin(NULL, mapping, pos, bytes,
-					AOP_FLAG_UNINTERRUPTIBLE,
-					&page, &fsdata);
-		if (status)
-			break;
-
-		zero_user(page, offset, bytes);
-
-		status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
-					page, fsdata);
-		WARN_ON(status <= 0); /* can't return less than zero! */
-		pos += bytes;
-		count -= bytes;
-		status = 0;
-	} while (count);
-
-	return (-status);
-}
-
-STATIC int
-xfs_file_fsync(
-	struct file		*file,
-	loff_t			start,
-	loff_t			end,
-	int			datasync)
-{
-	struct inode		*inode = file->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_trans	*tp;
-	int			error = 0;
-	int			log_flushed = 0;
-
-	trace_xfs_file_fsync(ip);
-
-	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (error)
-		return error;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -XFS_ERROR(EIO);
-
-	xfs_iflags_clear(ip, XFS_ITRUNCATED);
-
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	xfs_ioend_wait(ip);
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-	if (mp->m_flags & XFS_MOUNT_BARRIER) {
-		/*
-		 * If we have an RT and/or log subvolume we need to make sure
-		 * to flush the write cache the device used for file data
-		 * first.  This is to ensure newly written file data make
-		 * it to disk before logging the new inode size in case of
-		 * an extending write.
-		 */
-		if (XFS_IS_REALTIME_INODE(ip))
-			xfs_blkdev_issue_flush(mp->m_rtdev_targp);
-		else if (mp->m_logdev_targp != mp->m_ddev_targp)
-			xfs_blkdev_issue_flush(mp->m_ddev_targp);
-	}
-
-	/*
-	 * We always need to make sure that the required inode state is safe on
-	 * disk.  The inode might be clean but we still might need to force the
-	 * log because of committed transactions that haven't hit the disk yet.
-	 * Likewise, there could be unflushed non-transactional changes to the
-	 * inode core that have to go to disk and this requires us to issue
-	 * a synchronous transaction to capture these changes correctly.
-	 *
-	 * This code relies on the assumption that if the i_update_core field
-	 * of the inode is clear and the inode is unpinned then it is clean
-	 * and no action is required.
-	 */
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-	/*
-	 * First check if the VFS inode is marked dirty.  All the dirtying
-	 * of non-transactional updates no goes through mark_inode_dirty*,
-	 * which allows us to distinguish beteeen pure timestamp updates
-	 * and i_size updates which need to be caught for fdatasync.
-	 * After that also theck for the dirty state in the XFS inode, which
-	 * might gets cleared when the inode gets written out via the AIL
-	 * or xfs_iflush_cluster.
-	 */
-	if (((inode->i_state & I_DIRTY_DATASYNC) ||
-	    ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
-	    ip->i_update_core) {
-		/*
-		 * Kick off a transaction to log the inode core to get the
-		 * updates.  The sync transaction will also force the log.
-		 */
-		xfs_iunlock(ip, XFS_ILOCK_SHARED);
-		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-		error = xfs_trans_reserve(tp, 0,
-				XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-		if (error) {
-			xfs_trans_cancel(tp, 0);
-			return -error;
-		}
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-		/*
-		 * Note - it's possible that we might have pushed ourselves out
-		 * of the way during trans_reserve which would flush the inode.
-		 * But there's no guarantee that the inode buffer has actually
-		 * gone out yet (it's delwri).	Plus the buffer could be pinned
-		 * anyway if it's part of an inode in another recent
-		 * transaction.	 So we play it safe and fire off the
-		 * transaction anyway.
-		 */
-		xfs_trans_ijoin(tp, ip);
-		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-		xfs_trans_set_sync(tp);
-		error = _xfs_trans_commit(tp, 0, &log_flushed);
-
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	} else {
-		/*
-		 * Timestamps/size haven't changed since last inode flush or
-		 * inode transaction commit.  That means either nothing got
-		 * written or a transaction committed which caught the updates.
-		 * If the latter happened and the transaction hasn't hit the
-		 * disk yet, the inode will be still be pinned.  If it is,
-		 * force the log.
-		 */
-		if (xfs_ipincount(ip)) {
-			error = _xfs_log_force_lsn(mp,
-					ip->i_itemp->ili_last_lsn,
-					XFS_LOG_SYNC, &log_flushed);
-		}
-		xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	}
-
-	/*
-	 * If we only have a single device, and the log force about was
-	 * a no-op we might have to flush the data device cache here.
-	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
-	 * an already allocated file and thus do not have any metadata to
-	 * commit.
-	 */
-	if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
-	    mp->m_logdev_targp == mp->m_ddev_targp &&
-	    !XFS_IS_REALTIME_INODE(ip) &&
-	    !log_flushed)
-		xfs_blkdev_issue_flush(mp->m_ddev_targp);
-
-	return -error;
-}
-
-STATIC ssize_t
-xfs_file_aio_read(
-	struct kiocb		*iocb,
-	const struct iovec	*iovp,
-	unsigned long		nr_segs,
-	loff_t			pos)
-{
-	struct file		*file = iocb->ki_filp;
-	struct inode		*inode = file->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	size_t			size = 0;
-	ssize_t			ret = 0;
-	int			ioflags = 0;
-	xfs_fsize_t		n;
-	unsigned long		seg;
-
-	XFS_STATS_INC(xs_read_calls);
-
-	BUG_ON(iocb->ki_pos != pos);
-
-	if (unlikely(file->f_flags & O_DIRECT))
-		ioflags |= IO_ISDIRECT;
-	if (file->f_mode & FMODE_NOCMTIME)
-		ioflags |= IO_INVIS;
-
-	/* START copy & waste from filemap.c */
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *iv = &iovp[seg];
-
-		/*
-		 * If any segment has a negative length, or the cumulative
-		 * length ever wraps negative then return -EINVAL.
-		 */
-		size += iv->iov_len;
-		if (unlikely((ssize_t)(size|iv->iov_len) < 0))
-			return XFS_ERROR(-EINVAL);
-	}
-	/* END copy & waste from filemap.c */
-
-	if (unlikely(ioflags & IO_ISDIRECT)) {
-		xfs_buftarg_t	*target =
-			XFS_IS_REALTIME_INODE(ip) ?
-				mp->m_rtdev_targp : mp->m_ddev_targp;
-		if ((iocb->ki_pos & target->bt_smask) ||
-		    (size & target->bt_smask)) {
-			if (iocb->ki_pos == ip->i_size)
-				return 0;
-			return -XFS_ERROR(EINVAL);
-		}
-	}
-
-	n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
-	if (n <= 0 || size == 0)
-		return 0;
-
-	if (n < size)
-		size = n;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
-	if (unlikely(ioflags & IO_ISDIRECT)) {
-		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
-
-		if (inode->i_mapping->nrpages) {
-			ret = -xfs_flushinval_pages(ip,
-					(iocb->ki_pos & PAGE_CACHE_MASK),
-					-1, FI_REMAPF_LOCKED);
-			if (ret) {
-				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-				return ret;
-			}
-		}
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-	} else
-		xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
-	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
-
-	ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
-	if (ret > 0)
-		XFS_STATS_ADD(xs_read_bytes, ret);
-
-	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-	return ret;
-}
-
-STATIC ssize_t
-xfs_file_splice_read(
-	struct file		*infilp,
-	loff_t			*ppos,
-	struct pipe_inode_info	*pipe,
-	size_t			count,
-	unsigned int		flags)
-{
-	struct xfs_inode	*ip = XFS_I(infilp->f_mapping->host);
-	int			ioflags = 0;
-	ssize_t			ret;
-
-	XFS_STATS_INC(xs_read_calls);
-
-	if (infilp->f_mode & FMODE_NOCMTIME)
-		ioflags |= IO_INVIS;
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return -EIO;
-
-	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
-	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
-
-	ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
-	if (ret > 0)
-		XFS_STATS_ADD(xs_read_bytes, ret);
-
-	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-	return ret;
-}
-
-STATIC void
-xfs_aio_write_isize_update(
-	struct inode	*inode,
-	loff_t		*ppos,
-	ssize_t		bytes_written)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	xfs_fsize_t		isize = i_size_read(inode);
-
-	if (bytes_written > 0)
-		XFS_STATS_ADD(xs_write_bytes, bytes_written);
-
-	if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
-					*ppos > isize))
-		*ppos = isize;
-
-	if (*ppos > ip->i_size) {
-		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
-		if (*ppos > ip->i_size)
-			ip->i_size = *ppos;
-		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-	}
-}
-
-/*
- * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
- * part of the I/O may have been written to disk before the error occurred.  In
- * this case the on-disk file size may have been adjusted beyond the in-memory
- * file size and now needs to be truncated back.
- */
-STATIC void
-xfs_aio_write_newsize_update(
-	struct xfs_inode	*ip)
-{
-	if (ip->i_new_size) {
-		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
-		ip->i_new_size = 0;
-		if (ip->i_d.di_size > ip->i_size)
-			ip->i_d.di_size = ip->i_size;
-		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-	}
-}
-
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-	struct pipe_inode_info	*pipe,
-	struct file		*outfilp,
-	loff_t			*ppos,
-	size_t			count,
-	unsigned int		flags)
-{
-	struct inode		*inode = outfilp->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	xfs_fsize_t		new_size;
-	int			ioflags = 0;
-	ssize_t			ret;
-
-	XFS_STATS_INC(xs_write_calls);
-
-	if (outfilp->f_mode & FMODE_NOCMTIME)
-		ioflags |= IO_INVIS;
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return -EIO;
-
-	xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-	new_size = *ppos + count;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if (new_size > ip->i_size)
-		ip->i_new_size = new_size;
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-
-	xfs_aio_write_isize_update(inode, ppos, ret);
-	xfs_aio_write_newsize_update(ip);
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return ret;
-}
-
-/*
- * This routine is called to handle zeroing any space in the last
- * block of the file that is beyond the EOF.  We do this since the
- * size is being increased without writing anything to that block
- * and we don't want anyone to read the garbage on the disk.
- */
-STATIC int				/* error (positive) */
-xfs_zero_last_block(
-	xfs_inode_t	*ip,
-	xfs_fsize_t	offset,
-	xfs_fsize_t	isize)
-{
-	xfs_fileoff_t	last_fsb;
-	xfs_mount_t	*mp = ip->i_mount;
-	int		nimaps;
-	int		zero_offset;
-	int		zero_len;
-	int		error = 0;
-	xfs_bmbt_irec_t	imap;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-	if (zero_offset == 0) {
-		/*
-		 * There are no extra bytes in the last block on disk to
-		 * zero, so return.
-		 */
-		return 0;
-	}
-
-	last_fsb = XFS_B_TO_FSBT(mp, isize);
-	nimaps = 1;
-	error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
-			  &nimaps, NULL);
-	if (error) {
-		return error;
-	}
-	ASSERT(nimaps > 0);
-	/*
-	 * If the block underlying isize is just a hole, then there
-	 * is nothing to zero.
-	 */
-	if (imap.br_startblock == HOLESTARTBLOCK) {
-		return 0;
-	}
-	/*
-	 * Zero the part of the last block beyond the EOF, and write it
-	 * out sync.  We need to drop the ilock while we do this so we
-	 * don't deadlock when the buffer cache calls back to us.
-	 */
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	zero_len = mp->m_sb.sb_blocksize - zero_offset;
-	if (isize + zero_len > offset)
-		zero_len = offset - isize;
-	error = xfs_iozero(ip, isize, zero_len);
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	ASSERT(error >= 0);
-	return error;
-}
-
-/*
- * Zero any on disk space between the current EOF and the new,
- * larger EOF.  This handles the normal case of zeroing the remainder
- * of the last block in the file and the unusual case of zeroing blocks
- * out beyond the size of the file.  This second case only happens
- * with fixed size extents and when the system crashes before the inode
- * size was updated but after blocks were allocated.  If fill is set,
- * then any holes in the range are filled and zeroed.  If not, the holes
- * are left alone as holes.
- */
-
-int					/* error (positive) */
-xfs_zero_eof(
-	xfs_inode_t	*ip,
-	xfs_off_t	offset,		/* starting I/O offset */
-	xfs_fsize_t	isize)		/* current inode size */
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	xfs_fileoff_t	start_zero_fsb;
-	xfs_fileoff_t	end_zero_fsb;
-	xfs_fileoff_t	zero_count_fsb;
-	xfs_fileoff_t	last_fsb;
-	xfs_fileoff_t	zero_off;
-	xfs_fsize_t	zero_len;
-	int		nimaps;
-	int		error = 0;
-	xfs_bmbt_irec_t	imap;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-	ASSERT(offset > isize);
-
-	/*
-	 * First handle zeroing the block on which isize resides.
-	 * We only zero a part of that block so it is handled specially.
-	 */
-	error = xfs_zero_last_block(ip, offset, isize);
-	if (error) {
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-		return error;
-	}
-
-	/*
-	 * Calculate the range between the new size and the old
-	 * where blocks needing to be zeroed may exist.  To get the
-	 * block where the last byte in the file currently resides,
-	 * we need to subtract one from the size and truncate back
-	 * to a block boundary.  We subtract 1 in case the size is
-	 * exactly on a block boundary.
-	 */
-	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
-	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
-	end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
-	ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
-	if (last_fsb == end_zero_fsb) {
-		/*
-		 * The size was only incremented on its last block.
-		 * We took care of that above, so just return.
-		 */
-		return 0;
-	}
-
-	ASSERT(start_zero_fsb <= end_zero_fsb);
-	while (start_zero_fsb <= end_zero_fsb) {
-		nimaps = 1;
-		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-		error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
-				  0, NULL, 0, &imap, &nimaps, NULL);
-		if (error) {
-			ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-			return error;
-		}
-		ASSERT(nimaps > 0);
-
-		if (imap.br_state == XFS_EXT_UNWRITTEN ||
-		    imap.br_startblock == HOLESTARTBLOCK) {
-			/*
-			 * This loop handles initializing pages that were
-			 * partially initialized by the code below this
-			 * loop. It basically zeroes the part of the page
-			 * that sits on a hole and sets the page as P_HOLE
-			 * and calls remapf if it is a mapped file.
-			 */
-			start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-			ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-			continue;
-		}
-
-		/*
-		 * There are blocks we need to zero.
-		 * Drop the inode lock while we're doing the I/O.
-		 * We'll still have the iolock to protect us.
-		 */
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-		zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
-		zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
-		if ((zero_off + zero_len) > offset)
-			zero_len = offset - zero_off;
-
-		error = xfs_iozero(ip, zero_off, zero_len);
-		if (error) {
-			goto out_lock;
-		}
-
-		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-	}
-
-	return 0;
-
-out_lock:
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	ASSERT(error >= 0);
-	return error;
-}
-
-/*
- * Common pre-write limit and setup checks.
- *
- * Returns with iolock held according to @iolock.
- */
-STATIC ssize_t
-xfs_file_aio_write_checks(
-	struct file		*file,
-	loff_t			*pos,
-	size_t			*count,
-	int			*iolock)
-{
-	struct inode		*inode = file->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	xfs_fsize_t		new_size;
-	int			error = 0;
-
-	error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
-	if (error) {
-		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
-		*iolock = 0;
-		return error;
-	}
-
-	new_size = *pos + *count;
-	if (new_size > ip->i_size)
-		ip->i_new_size = new_size;
-
-	if (likely(!(file->f_mode & FMODE_NOCMTIME)))
-		file_update_time(file);
-
-	/*
-	 * If the offset is beyond the size of the file, we need to zero any
-	 * blocks that fall between the existing EOF and the start of this
-	 * write.
-	 */
-	if (*pos > ip->i_size)
-		error = -xfs_zero_eof(ip, *pos, ip->i_size);
-
-	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-	if (error)
-		return error;
-
-	/*
-	 * If we're writing the file then make sure to clear the setuid and
-	 * setgid bits if the process is not being run by root.  This keeps
-	 * people from modifying setuid and setgid binaries.
-	 */
-	return file_remove_suid(file);
-
-}
-
-/*
- * xfs_file_dio_aio_write - handle direct IO writes
- *
- * Lock the inode appropriately to prepare for and issue a direct IO write.
- * By separating it from the buffered write path we remove all the tricky to
- * follow locking changes and looping.
- *
- * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
- * until we're sure the bytes at the new EOF have been zeroed and/or the cached
- * pages are flushed out.
- *
- * In most cases the direct IO writes will be done holding IOLOCK_SHARED
- * allowing them to be done in parallel with reads and other direct IO writes.
- * However, if the IO is not aligned to filesystem blocks, the direct IO layer
- * needs to do sub-block zeroing and that requires serialisation against other
- * direct IOs to the same block. In this case we need to serialise the
- * submission of the unaligned IOs so that we don't get racing block zeroing in
- * the dio layer.  To avoid the problem with aio, we also need to wait for
- * outstanding IOs to complete so that unwritten extent conversion is completed
- * before we try to map the overlapping block. This is currently implemented by
- * hitting it with a big hammer (i.e. xfs_ioend_wait()).
- *
- * Returns with locks held indicated by @iolock and errors indicated by
- * negative return values.
- */
-STATIC ssize_t
-xfs_file_dio_aio_write(
-	struct kiocb		*iocb,
-	const struct iovec	*iovp,
-	unsigned long		nr_segs,
-	loff_t			pos,
-	size_t			ocount,
-	int			*iolock)
-{
-	struct file		*file = iocb->ki_filp;
-	struct address_space	*mapping = file->f_mapping;
-	struct inode		*inode = mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	ssize_t			ret = 0;
-	size_t			count = ocount;
-	int			unaligned_io = 0;
-	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
-					mp->m_rtdev_targp : mp->m_ddev_targp;
-
-	*iolock = 0;
-	if ((pos & target->bt_smask) || (count & target->bt_smask))
-		return -XFS_ERROR(EINVAL);
-
-	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
-		unaligned_io = 1;
-
-	if (unaligned_io || mapping->nrpages || pos > ip->i_size)
-		*iolock = XFS_IOLOCK_EXCL;
-	else
-		*iolock = XFS_IOLOCK_SHARED;
-	xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
-
-	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
-	if (ret)
-		return ret;
-
-	if (mapping->nrpages) {
-		WARN_ON(*iolock != XFS_IOLOCK_EXCL);
-		ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
-							FI_REMAPF_LOCKED);
-		if (ret)
-			return ret;
-	}
-
-	/*
-	 * If we are doing unaligned IO, wait for all other IO to drain,
-	 * otherwise demote the lock if we had to flush cached pages
-	 */
-	if (unaligned_io)
-		xfs_ioend_wait(ip);
-	else if (*iolock == XFS_IOLOCK_EXCL) {
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-		*iolock = XFS_IOLOCK_SHARED;
-	}
-
-	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-	ret = generic_file_direct_write(iocb, iovp,
-			&nr_segs, pos, &iocb->ki_pos, count, ocount);
-
-	/* No fallback to buffered IO on errors for XFS. */
-	ASSERT(ret < 0 || ret == count);
-	return ret;
-}
-
-STATIC ssize_t
-xfs_file_buffered_aio_write(
-	struct kiocb		*iocb,
-	const struct iovec	*iovp,
-	unsigned long		nr_segs,
-	loff_t			pos,
-	size_t			ocount,
-	int			*iolock)
-{
-	struct file		*file = iocb->ki_filp;
-	struct address_space	*mapping = file->f_mapping;
-	struct inode		*inode = mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	ssize_t			ret;
-	int			enospc = 0;
-	size_t			count = ocount;
-
-	*iolock = XFS_IOLOCK_EXCL;
-	xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
-
-	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
-	if (ret)
-		return ret;
-
-	/* We can write back this queue in page reclaim */
-	current->backing_dev_info = mapping->backing_dev_info;
-
-write_retry:
-	trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-	ret = generic_file_buffered_write(iocb, iovp, nr_segs,
-			pos, &iocb->ki_pos, count, ret);
-	/*
-	 * if we just got an ENOSPC, flush the inode now we aren't holding any
-	 * page locks and retry *once*
-	 */
-	if (ret == -ENOSPC && !enospc) {
-		ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
-		if (ret)
-			return ret;
-		enospc = 1;
-		goto write_retry;
-	}
-	current->backing_dev_info = NULL;
-	return ret;
-}
-
-STATIC ssize_t
-xfs_file_aio_write(
-	struct kiocb		*iocb,
-	const struct iovec	*iovp,
-	unsigned long		nr_segs,
-	loff_t			pos)
-{
-	struct file		*file = iocb->ki_filp;
-	struct address_space	*mapping = file->f_mapping;
-	struct inode		*inode = mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	ssize_t			ret;
-	int			iolock;
-	size_t			ocount = 0;
-
-	XFS_STATS_INC(xs_write_calls);
-
-	BUG_ON(iocb->ki_pos != pos);
-
-	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
-	if (ret)
-		return ret;
-
-	if (ocount == 0)
-		return 0;
-
-	xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return -EIO;
-
-	if (unlikely(file->f_flags & O_DIRECT))
-		ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
-						ocount, &iolock);
-	else
-		ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
-						ocount, &iolock);
-
-	xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
-
-	if (ret <= 0)
-		goto out_unlock;
-
-	/* Handle various SYNC-type writes */
-	if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
-		loff_t end = pos + ret - 1;
-		int error;
-
-		xfs_rw_iunlock(ip, iolock);
-		error = xfs_file_fsync(file, pos, end,
-				      (file->f_flags & __O_SYNC) ? 0 : 1);
-		xfs_rw_ilock(ip, iolock);
-		if (error)
-			ret = error;
-	}
-
-out_unlock:
-	xfs_aio_write_newsize_update(ip);
-	xfs_rw_iunlock(ip, iolock);
-	return ret;
-}
-
-STATIC long
-xfs_file_fallocate(
-	struct file	*file,
-	int		mode,
-	loff_t		offset,
-	loff_t		len)
-{
-	struct inode	*inode = file->f_path.dentry->d_inode;
-	long		error;
-	loff_t		new_size = 0;
-	xfs_flock64_t	bf;
-	xfs_inode_t	*ip = XFS_I(inode);
-	int		cmd = XFS_IOC_RESVSP;
-	int		attr_flags = XFS_ATTR_NOLOCK;
-
-	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-		return -EOPNOTSUPP;
-
-	bf.l_whence = 0;
-	bf.l_start = offset;
-	bf.l_len = len;
-
-	xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-	if (mode & FALLOC_FL_PUNCH_HOLE)
-		cmd = XFS_IOC_UNRESVSP;
-
-	/* check the new inode size is valid before allocating */
-	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-	    offset + len > i_size_read(inode)) {
-		new_size = offset + len;
-		error = inode_newsize_ok(inode, new_size);
-		if (error)
-			goto out_unlock;
-	}
-
-	if (file->f_flags & O_DSYNC)
-		attr_flags |= XFS_ATTR_SYNC;
-
-	error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
-	if (error)
-		goto out_unlock;
-
-	/* Change file size if needed */
-	if (new_size) {
-		struct iattr iattr;
-
-		iattr.ia_valid = ATTR_SIZE;
-		iattr.ia_size = new_size;
-		error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
-	}
-
-out_unlock:
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return error;
-}
-
-
-STATIC int
-xfs_file_open(
-	struct inode	*inode,
-	struct file	*file)
-{
-	if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
-		return -EFBIG;
-	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
-		return -EIO;
-	return 0;
-}
-
-STATIC int
-xfs_dir_open(
-	struct inode	*inode,
-	struct file	*file)
-{
-	struct xfs_inode *ip = XFS_I(inode);
-	int		mode;
-	int		error;
-
-	error = xfs_file_open(inode, file);
-	if (error)
-		return error;
-
-	/*
-	 * If there are any blocks, read-ahead block 0 as we're almost
-	 * certain to have the next operation be a read there.
-	 */
-	mode = xfs_ilock_map_shared(ip);
-	if (ip->i_d.di_nextents > 0)
-		xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
-	xfs_iunlock(ip, mode);
-	return 0;
-}
-
-STATIC int
-xfs_file_release(
-	struct inode	*inode,
-	struct file	*filp)
-{
-	return -xfs_release(XFS_I(inode));
-}
-
-STATIC int
-xfs_file_readdir(
-	struct file	*filp,
-	void		*dirent,
-	filldir_t	filldir)
-{
-	struct inode	*inode = filp->f_path.dentry->d_inode;
-	xfs_inode_t	*ip = XFS_I(inode);
-	int		error;
-	size_t		bufsize;
-
-	/*
-	 * The Linux API doesn't pass down the total size of the buffer
-	 * we read into down to the filesystem.  With the filldir concept
-	 * it's not needed for correct information, but the XFS dir2 leaf
-	 * code wants an estimate of the buffer size to calculate it's
-	 * readahead window and size the buffers used for mapping to
-	 * physical blocks.
-	 *
-	 * Try to give it an estimate that's good enough, maybe at some
-	 * point we can change the ->readdir prototype to include the
-	 * buffer size.  For now we use the current glibc buffer size.
-	 */
-	bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
-
-	error = xfs_readdir(ip, dirent, bufsize,
-				(xfs_off_t *)&filp->f_pos, filldir);
-	if (error)
-		return -error;
-	return 0;
-}
-
-STATIC int
-xfs_file_mmap(
-	struct file	*filp,
-	struct vm_area_struct *vma)
-{
-	vma->vm_ops = &xfs_file_vm_ops;
-	vma->vm_flags |= VM_CAN_NONLINEAR;
-
-	file_accessed(filp);
-	return 0;
-}
-
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-	struct vm_area_struct	*vma,
-	struct vm_fault		*vmf)
-{
-	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
-const struct file_operations xfs_file_operations = {
-	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= xfs_file_aio_read,
-	.aio_write	= xfs_file_aio_write,
-	.splice_read	= xfs_file_splice_read,
-	.splice_write	= xfs_file_splice_write,
-	.unlocked_ioctl	= xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= xfs_file_compat_ioctl,
-#endif
-	.mmap		= xfs_file_mmap,
-	.open		= xfs_file_open,
-	.release	= xfs_file_release,
-	.fsync		= xfs_file_fsync,
-	.fallocate	= xfs_file_fallocate,
-};
-
-const struct file_operations xfs_dir_file_operations = {
-	.open		= xfs_dir_open,
-	.read		= generic_read_dir,
-	.readdir	= xfs_file_readdir,
-	.llseek		= generic_file_llseek,
-	.unlocked_ioctl	= xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= xfs_file_compat_ioctl,
-#endif
-	.fsync		= xfs_file_fsync,
-};
-
-static const struct vm_operations_struct xfs_file_vm_ops = {
-	.fault		= filemap_fault,
-	.page_mkwrite	= xfs_vm_page_mkwrite,
-};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
deleted file mode 100644
index ed88ed1..0000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trace.h"
-
-/*
- * note: all filemap functions return negative error codes. These
- * need to be inverted before returning to the xfs core functions.
- */
-void
-xfs_tosspages(
-	xfs_inode_t	*ip,
-	xfs_off_t	first,
-	xfs_off_t	last,
-	int		fiopt)
-{
-	/* can't toss partial tail pages, so mask them out */
-	last &= ~(PAGE_SIZE - 1);
-	truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
-}
-
-int
-xfs_flushinval_pages(
-	xfs_inode_t	*ip,
-	xfs_off_t	first,
-	xfs_off_t	last,
-	int		fiopt)
-{
-	struct address_space *mapping = VFS_I(ip)->i_mapping;
-	int		ret = 0;
-
-	trace_xfs_pagecache_inval(ip, first, last);
-
-	xfs_iflags_clear(ip, XFS_ITRUNCATED);
-	ret = filemap_write_and_wait_range(mapping, first,
-				last == -1 ? LLONG_MAX : last);
-	if (!ret)
-		truncate_inode_pages_range(mapping, first, last);
-	return -ret;
-}
-
-int
-xfs_flush_pages(
-	xfs_inode_t	*ip,
-	xfs_off_t	first,
-	xfs_off_t	last,
-	uint64_t	flags,
-	int		fiopt)
-{
-	struct address_space *mapping = VFS_I(ip)->i_mapping;
-	int		ret = 0;
-	int		ret2;
-
-	xfs_iflags_clear(ip, XFS_ITRUNCATED);
-	ret = -filemap_fdatawrite_range(mapping, first,
-				last == -1 ? LLONG_MAX : last);
-	if (flags & XBF_ASYNC)
-		return ret;
-	ret2 = xfs_wait_on_pages(ip, first, last);
-	if (!ret)
-		ret = ret2;
-	return ret;
-}
-
-int
-xfs_wait_on_pages(
-	xfs_inode_t	*ip,
-	xfs_off_t	first,
-	xfs_off_t	last)
-{
-	struct address_space *mapping = VFS_I(ip)->i_mapping;
-
-	if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
-		return -filemap_fdatawait_range(mapping, first,
-					last == -1 ? ip->i_size - 1 : last);
-	}
-	return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
deleted file mode 100644
index 76e81cf..0000000
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sysctl.h"
-
-/*
- * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
- * other XFS code uses these values.  Times are measured in centisecs (i.e.
- * 100ths of a second).
- */
-xfs_param_t xfs_params = {
-			  /*	MIN		DFLT		MAX	*/
-	.sgid_inherit	= {	0,		0,		1	},
-	.symlink_mode	= {	0,		0,		1	},
-	.panic_mask	= {	0,		0,		255	},
-	.error_level	= {	0,		3,		11	},
-	.syncd_timer	= {	1*100,		30*100,		7200*100},
-	.stats_clear	= {	0,		0,		1	},
-	.inherit_sync	= {	0,		1,		1	},
-	.inherit_nodump	= {	0,		1,		1	},
-	.inherit_noatim = {	0,		1,		1	},
-	.xfs_buf_timer	= {	100/2,		1*100,		30*100	},
-	.xfs_buf_age	= {	1*100,		15*100,		7200*100},
-	.inherit_nosym	= {	0,		0,		1	},
-	.rotorstep	= {	1,		1,		255	},
-	.inherit_nodfrg	= {	0,		1,		1	},
-	.fstrm_timer	= {	1,		30*100,		3600*100},
-};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
deleted file mode 100644
index f7ce7de..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ /dev/null
@@ -1,1556 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_rtalloc.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_bmap.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_dfrag.h"
-#include "xfs_fsops.h"
-#include "xfs_vnodeops.h"
-#include "xfs_discard.h"
-#include "xfs_quota.h"
-#include "xfs_inode_item.h"
-#include "xfs_export.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/dcache.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
-
-/*
- * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
- * a file or fs handle.
- *
- * XFS_IOC_PATH_TO_FSHANDLE
- *    returns fs handle for a mount point or path within that mount point
- * XFS_IOC_FD_TO_HANDLE
- *    returns full handle for a FD opened in user space
- * XFS_IOC_PATH_TO_HANDLE
- *    returns full handle for a path
- */
-int
-xfs_find_handle(
-	unsigned int		cmd,
-	xfs_fsop_handlereq_t	*hreq)
-{
-	int			hsize;
-	xfs_handle_t		handle;
-	struct inode		*inode;
-	struct file		*file = NULL;
-	struct path		path;
-	int			error;
-	struct xfs_inode	*ip;
-
-	if (cmd == XFS_IOC_FD_TO_HANDLE) {
-		file = fget(hreq->fd);
-		if (!file)
-			return -EBADF;
-		inode = file->f_path.dentry->d_inode;
-	} else {
-		error = user_lpath((const char __user *)hreq->path, &path);
-		if (error)
-			return error;
-		inode = path.dentry->d_inode;
-	}
-	ip = XFS_I(inode);
-
-	/*
-	 * We can only generate handles for inodes residing on a XFS filesystem,
-	 * and only for regular files, directories or symbolic links.
-	 */
-	error = -EINVAL;
-	if (inode->i_sb->s_magic != XFS_SB_MAGIC)
-		goto out_put;
-
-	error = -EBADF;
-	if (!S_ISREG(inode->i_mode) &&
-	    !S_ISDIR(inode->i_mode) &&
-	    !S_ISLNK(inode->i_mode))
-		goto out_put;
-
-
-	memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
-
-	if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
-		/*
-		 * This handle only contains an fsid, zero the rest.
-		 */
-		memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
-		hsize = sizeof(xfs_fsid_t);
-	} else {
-		int		lock_mode;
-
-		lock_mode = xfs_ilock_map_shared(ip);
-		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
-					sizeof(handle.ha_fid.fid_len);
-		handle.ha_fid.fid_pad = 0;
-		handle.ha_fid.fid_gen = ip->i_d.di_gen;
-		handle.ha_fid.fid_ino = ip->i_ino;
-		xfs_iunlock_map_shared(ip, lock_mode);
-
-		hsize = XFS_HSIZE(handle);
-	}
-
-	error = -EFAULT;
-	if (copy_to_user(hreq->ohandle, &handle, hsize) ||
-	    copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
-		goto out_put;
-
-	error = 0;
-
- out_put:
-	if (cmd == XFS_IOC_FD_TO_HANDLE)
-		fput(file);
-	else
-		path_put(&path);
-	return error;
-}
-
-/*
- * No need to do permission checks on the various pathname components
- * as the handle operations are privileged.
- */
-STATIC int
-xfs_handle_acceptable(
-	void			*context,
-	struct dentry		*dentry)
-{
-	return 1;
-}
-
-/*
- * Convert userspace handle data into a dentry.
- */
-struct dentry *
-xfs_handle_to_dentry(
-	struct file		*parfilp,
-	void __user		*uhandle,
-	u32			hlen)
-{
-	xfs_handle_t		handle;
-	struct xfs_fid64	fid;
-
-	/*
-	 * Only allow handle opens under a directory.
-	 */
-	if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
-		return ERR_PTR(-ENOTDIR);
-
-	if (hlen != sizeof(xfs_handle_t))
-		return ERR_PTR(-EINVAL);
-	if (copy_from_user(&handle, uhandle, hlen))
-		return ERR_PTR(-EFAULT);
-	if (handle.ha_fid.fid_len !=
-	    sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
-		return ERR_PTR(-EINVAL);
-
-	memset(&fid, 0, sizeof(struct fid));
-	fid.ino = handle.ha_fid.fid_ino;
-	fid.gen = handle.ha_fid.fid_gen;
-
-	return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
-			FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
-			xfs_handle_acceptable, NULL);
-}
-
-STATIC struct dentry *
-xfs_handlereq_to_dentry(
-	struct file		*parfilp,
-	xfs_fsop_handlereq_t	*hreq)
-{
-	return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
-}
-
-int
-xfs_open_by_handle(
-	struct file		*parfilp,
-	xfs_fsop_handlereq_t	*hreq)
-{
-	const struct cred	*cred = current_cred();
-	int			error;
-	int			fd;
-	int			permflag;
-	struct file		*filp;
-	struct inode		*inode;
-	struct dentry		*dentry;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-
-	dentry = xfs_handlereq_to_dentry(parfilp, hreq);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-	inode = dentry->d_inode;
-
-	/* Restrict xfs_open_by_handle to directories & regular files. */
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
-		error = -XFS_ERROR(EPERM);
-		goto out_dput;
-	}
-
-#if BITS_PER_LONG != 32
-	hreq->oflags |= O_LARGEFILE;
-#endif
-
-	/* Put open permission in namei format. */
-	permflag = hreq->oflags;
-	if ((permflag+1) & O_ACCMODE)
-		permflag++;
-	if (permflag & O_TRUNC)
-		permflag |= 2;
-
-	if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
-	    (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
-		error = -XFS_ERROR(EPERM);
-		goto out_dput;
-	}
-
-	if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
-		error = -XFS_ERROR(EACCES);
-		goto out_dput;
-	}
-
-	/* Can't write directories. */
-	if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
-		error = -XFS_ERROR(EISDIR);
-		goto out_dput;
-	}
-
-	fd = get_unused_fd();
-	if (fd < 0) {
-		error = fd;
-		goto out_dput;
-	}
-
-	filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
-			   hreq->oflags, cred);
-	if (IS_ERR(filp)) {
-		put_unused_fd(fd);
-		return PTR_ERR(filp);
-	}
-
-	if (S_ISREG(inode->i_mode)) {
-		filp->f_flags |= O_NOATIME;
-		filp->f_mode |= FMODE_NOCMTIME;
-	}
-
-	fd_install(fd, filp);
-	return fd;
-
- out_dput:
-	dput(dentry);
-	return error;
-}
-
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
-	char __user		*buffer,
-	int			buflen,
-	const char		*link)
-{
-        int len;
-
-	len = PTR_ERR(link);
-	if (IS_ERR(link))
-		goto out;
-
-	len = strlen(link);
-	if (len > (unsigned) buflen)
-		len = buflen;
-	if (copy_to_user(buffer, link, len))
-		len = -EFAULT;
- out:
-	return len;
-}
-
-
-int
-xfs_readlink_by_handle(
-	struct file		*parfilp,
-	xfs_fsop_handlereq_t	*hreq)
-{
-	struct dentry		*dentry;
-	__u32			olen;
-	void			*link;
-	int			error;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-
-	dentry = xfs_handlereq_to_dentry(parfilp, hreq);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-
-	/* Restrict this handle operation to symlinks only. */
-	if (!S_ISLNK(dentry->d_inode->i_mode)) {
-		error = -XFS_ERROR(EINVAL);
-		goto out_dput;
-	}
-
-	if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
-		error = -XFS_ERROR(EFAULT);
-		goto out_dput;
-	}
-
-	link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-	if (!link) {
-		error = -XFS_ERROR(ENOMEM);
-		goto out_dput;
-	}
-
-	error = -xfs_readlink(XFS_I(dentry->d_inode), link);
-	if (error)
-		goto out_kfree;
-	error = do_readlink(hreq->ohandle, olen, link);
-	if (error)
-		goto out_kfree;
-
- out_kfree:
-	kfree(link);
- out_dput:
-	dput(dentry);
-	return error;
-}
-
-STATIC int
-xfs_fssetdm_by_handle(
-	struct file		*parfilp,
-	void			__user *arg)
-{
-	int			error;
-	struct fsdmidata	fsd;
-	xfs_fsop_setdm_handlereq_t dmhreq;
-	struct dentry		*dentry;
-
-	if (!capable(CAP_MKNOD))
-		return -XFS_ERROR(EPERM);
-	if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
-		return -XFS_ERROR(EFAULT);
-
-	dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-
-	if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
-		error = -XFS_ERROR(EPERM);
-		goto out;
-	}
-
-	if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
-		error = -XFS_ERROR(EFAULT);
-		goto out;
-	}
-
-	error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
-				 fsd.fsd_dmstate);
-
- out:
-	dput(dentry);
-	return error;
-}
-
-STATIC int
-xfs_attrlist_by_handle(
-	struct file		*parfilp,
-	void			__user *arg)
-{
-	int			error = -ENOMEM;
-	attrlist_cursor_kern_t	*cursor;
-	xfs_fsop_attrlist_handlereq_t al_hreq;
-	struct dentry		*dentry;
-	char			*kbuf;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-	if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
-		return -XFS_ERROR(EFAULT);
-	if (al_hreq.buflen > XATTR_LIST_MAX)
-		return -XFS_ERROR(EINVAL);
-
-	/*
-	 * Reject flags, only allow namespaces.
-	 */
-	if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
-		return -XFS_ERROR(EINVAL);
-
-	dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-
-	kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
-	if (!kbuf)
-		goto out_dput;
-
-	cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-	error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
-					al_hreq.flags, cursor);
-	if (error)
-		goto out_kfree;
-
-	if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
-		error = -EFAULT;
-
- out_kfree:
-	kfree(kbuf);
- out_dput:
-	dput(dentry);
-	return error;
-}
-
-int
-xfs_attrmulti_attr_get(
-	struct inode		*inode,
-	unsigned char		*name,
-	unsigned char		__user *ubuf,
-	__uint32_t		*len,
-	__uint32_t		flags)
-{
-	unsigned char		*kbuf;
-	int			error = EFAULT;
-
-	if (*len > XATTR_SIZE_MAX)
-		return EINVAL;
-	kbuf = kmalloc(*len, GFP_KERNEL);
-	if (!kbuf)
-		return ENOMEM;
-
-	error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
-	if (error)
-		goto out_kfree;
-
-	if (copy_to_user(ubuf, kbuf, *len))
-		error = EFAULT;
-
- out_kfree:
-	kfree(kbuf);
-	return error;
-}
-
-int
-xfs_attrmulti_attr_set(
-	struct inode		*inode,
-	unsigned char		*name,
-	const unsigned char	__user *ubuf,
-	__uint32_t		len,
-	__uint32_t		flags)
-{
-	unsigned char		*kbuf;
-	int			error = EFAULT;
-
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return EPERM;
-	if (len > XATTR_SIZE_MAX)
-		return EINVAL;
-
-	kbuf = memdup_user(ubuf, len);
-	if (IS_ERR(kbuf))
-		return PTR_ERR(kbuf);
-
-	error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
-
-	return error;
-}
-
-int
-xfs_attrmulti_attr_remove(
-	struct inode		*inode,
-	unsigned char		*name,
-	__uint32_t		flags)
-{
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-		return EPERM;
-	return xfs_attr_remove(XFS_I(inode), name, flags);
-}
-
-STATIC int
-xfs_attrmulti_by_handle(
-	struct file		*parfilp,
-	void			__user *arg)
-{
-	int			error;
-	xfs_attr_multiop_t	*ops;
-	xfs_fsop_attrmulti_handlereq_t am_hreq;
-	struct dentry		*dentry;
-	unsigned int		i, size;
-	unsigned char		*attr_name;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-	if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
-		return -XFS_ERROR(EFAULT);
-
-	/* overflow check */
-	if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
-		return -E2BIG;
-
-	dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-
-	error = E2BIG;
-	size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
-	if (!size || size > 16 * PAGE_SIZE)
-		goto out_dput;
-
-	ops = memdup_user(am_hreq.ops, size);
-	if (IS_ERR(ops)) {
-		error = PTR_ERR(ops);
-		goto out_dput;
-	}
-
-	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
-	if (!attr_name)
-		goto out_kfree_ops;
-
-	error = 0;
-	for (i = 0; i < am_hreq.opcount; i++) {
-		ops[i].am_error = strncpy_from_user((char *)attr_name,
-				ops[i].am_attrname, MAXNAMELEN);
-		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-			error = -ERANGE;
-		if (ops[i].am_error < 0)
-			break;
-
-		switch (ops[i].am_opcode) {
-		case ATTR_OP_GET:
-			ops[i].am_error = xfs_attrmulti_attr_get(
-					dentry->d_inode, attr_name,
-					ops[i].am_attrvalue, &ops[i].am_length,
-					ops[i].am_flags);
-			break;
-		case ATTR_OP_SET:
-			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-			if (ops[i].am_error)
-				break;
-			ops[i].am_error = xfs_attrmulti_attr_set(
-					dentry->d_inode, attr_name,
-					ops[i].am_attrvalue, ops[i].am_length,
-					ops[i].am_flags);
-			mnt_drop_write(parfilp->f_path.mnt);
-			break;
-		case ATTR_OP_REMOVE:
-			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-			if (ops[i].am_error)
-				break;
-			ops[i].am_error = xfs_attrmulti_attr_remove(
-					dentry->d_inode, attr_name,
-					ops[i].am_flags);
-			mnt_drop_write(parfilp->f_path.mnt);
-			break;
-		default:
-			ops[i].am_error = EINVAL;
-		}
-	}
-
-	if (copy_to_user(am_hreq.ops, ops, size))
-		error = XFS_ERROR(EFAULT);
-
-	kfree(attr_name);
- out_kfree_ops:
-	kfree(ops);
- out_dput:
-	dput(dentry);
-	return -error;
-}
-
-int
-xfs_ioc_space(
-	struct xfs_inode	*ip,
-	struct inode		*inode,
-	struct file		*filp,
-	int			ioflags,
-	unsigned int		cmd,
-	xfs_flock64_t		*bf)
-{
-	int			attr_flags = 0;
-	int			error;
-
-	/*
-	 * Only allow the sys admin to reserve space unless
-	 * unwritten extents are enabled.
-	 */
-	if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
-	    !capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-
-	if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
-		return -XFS_ERROR(EPERM);
-
-	if (!(filp->f_mode & FMODE_WRITE))
-		return -XFS_ERROR(EBADF);
-
-	if (!S_ISREG(inode->i_mode))
-		return -XFS_ERROR(EINVAL);
-
-	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-		attr_flags |= XFS_ATTR_NONBLOCK;
-
-	if (filp->f_flags & O_DSYNC)
-		attr_flags |= XFS_ATTR_SYNC;
-
-	if (ioflags & IO_INVIS)
-		attr_flags |= XFS_ATTR_DMI;
-
-	error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
-	return -error;
-}
-
-STATIC int
-xfs_ioc_bulkstat(
-	xfs_mount_t		*mp,
-	unsigned int		cmd,
-	void			__user *arg)
-{
-	xfs_fsop_bulkreq_t	bulkreq;
-	int			count;	/* # of records returned */
-	xfs_ino_t		inlast;	/* last inode number */
-	int			done;
-	int			error;
-
-	/* done = 1 if there are more stats to get and if bulkstat */
-	/* should be called again (unused here, but used in dmapi) */
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -XFS_ERROR(EIO);
-
-	if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
-		return -XFS_ERROR(EFAULT);
-
-	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
-		return -XFS_ERROR(EFAULT);
-
-	if ((count = bulkreq.icount) <= 0)
-		return -XFS_ERROR(EINVAL);
-
-	if (bulkreq.ubuffer == NULL)
-		return -XFS_ERROR(EINVAL);
-
-	if (cmd == XFS_IOC_FSINUMBERS)
-		error = xfs_inumbers(mp, &inlast, &count,
-					bulkreq.ubuffer, xfs_inumbers_fmt);
-	else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
-		error = xfs_bulkstat_single(mp, &inlast,
-						bulkreq.ubuffer, &done);
-	else	/* XFS_IOC_FSBULKSTAT */
-		error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
-				     sizeof(xfs_bstat_t), bulkreq.ubuffer,
-				     &done);
-
-	if (error)
-		return -error;
-
-	if (bulkreq.ocount != NULL) {
-		if (copy_to_user(bulkreq.lastip, &inlast,
-						sizeof(xfs_ino_t)))
-			return -XFS_ERROR(EFAULT);
-
-		if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-			return -XFS_ERROR(EFAULT);
-	}
-
-	return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
-	xfs_mount_t		*mp,
-	void			__user *arg)
-{
-	xfs_fsop_geom_t         fsgeo;
-	int			error;
-
-	error = xfs_fs_geometry(mp, &fsgeo, 3);
-	if (error)
-		return -error;
-
-	/*
-	 * Caller should have passed an argument of type
-	 * xfs_fsop_geom_v1_t.  This is a proper subset of the
-	 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
-	 */
-	if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry(
-	xfs_mount_t		*mp,
-	void			__user *arg)
-{
-	xfs_fsop_geom_t		fsgeo;
-	int			error;
-
-	error = xfs_fs_geometry(mp, &fsgeo, 4);
-	if (error)
-		return -error;
-
-	if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-/*
- * Linux extended inode flags interface.
- */
-
-STATIC unsigned int
-xfs_merge_ioc_xflags(
-	unsigned int	flags,
-	unsigned int	start)
-{
-	unsigned int	xflags = start;
-
-	if (flags & FS_IMMUTABLE_FL)
-		xflags |= XFS_XFLAG_IMMUTABLE;
-	else
-		xflags &= ~XFS_XFLAG_IMMUTABLE;
-	if (flags & FS_APPEND_FL)
-		xflags |= XFS_XFLAG_APPEND;
-	else
-		xflags &= ~XFS_XFLAG_APPEND;
-	if (flags & FS_SYNC_FL)
-		xflags |= XFS_XFLAG_SYNC;
-	else
-		xflags &= ~XFS_XFLAG_SYNC;
-	if (flags & FS_NOATIME_FL)
-		xflags |= XFS_XFLAG_NOATIME;
-	else
-		xflags &= ~XFS_XFLAG_NOATIME;
-	if (flags & FS_NODUMP_FL)
-		xflags |= XFS_XFLAG_NODUMP;
-	else
-		xflags &= ~XFS_XFLAG_NODUMP;
-
-	return xflags;
-}
-
-STATIC unsigned int
-xfs_di2lxflags(
-	__uint16_t	di_flags)
-{
-	unsigned int	flags = 0;
-
-	if (di_flags & XFS_DIFLAG_IMMUTABLE)
-		flags |= FS_IMMUTABLE_FL;
-	if (di_flags & XFS_DIFLAG_APPEND)
-		flags |= FS_APPEND_FL;
-	if (di_flags & XFS_DIFLAG_SYNC)
-		flags |= FS_SYNC_FL;
-	if (di_flags & XFS_DIFLAG_NOATIME)
-		flags |= FS_NOATIME_FL;
-	if (di_flags & XFS_DIFLAG_NODUMP)
-		flags |= FS_NODUMP_FL;
-	return flags;
-}
-
-STATIC int
-xfs_ioc_fsgetxattr(
-	xfs_inode_t		*ip,
-	int			attr,
-	void			__user *arg)
-{
-	struct fsxattr		fa;
-
-	memset(&fa, 0, sizeof(struct fsxattr));
-
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	fa.fsx_xflags = xfs_ip2xflags(ip);
-	fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-	fa.fsx_projid = xfs_get_projid(ip);
-
-	if (attr) {
-		if (ip->i_afp) {
-			if (ip->i_afp->if_flags & XFS_IFEXTENTS)
-				fa.fsx_nextents = ip->i_afp->if_bytes /
-							sizeof(xfs_bmbt_rec_t);
-			else
-				fa.fsx_nextents = ip->i_d.di_anextents;
-		} else
-			fa.fsx_nextents = 0;
-	} else {
-		if (ip->i_df.if_flags & XFS_IFEXTENTS)
-			fa.fsx_nextents = ip->i_df.if_bytes /
-						sizeof(xfs_bmbt_rec_t);
-		else
-			fa.fsx_nextents = ip->i_d.di_nextents;
-	}
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-	if (copy_to_user(arg, &fa, sizeof(fa)))
-		return -EFAULT;
-	return 0;
-}
-
-STATIC void
-xfs_set_diflags(
-	struct xfs_inode	*ip,
-	unsigned int		xflags)
-{
-	unsigned int		di_flags;
-
-	/* can't set PREALLOC this way, just preserve it */
-	di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-	if (xflags & XFS_XFLAG_IMMUTABLE)
-		di_flags |= XFS_DIFLAG_IMMUTABLE;
-	if (xflags & XFS_XFLAG_APPEND)
-		di_flags |= XFS_DIFLAG_APPEND;
-	if (xflags & XFS_XFLAG_SYNC)
-		di_flags |= XFS_DIFLAG_SYNC;
-	if (xflags & XFS_XFLAG_NOATIME)
-		di_flags |= XFS_DIFLAG_NOATIME;
-	if (xflags & XFS_XFLAG_NODUMP)
-		di_flags |= XFS_DIFLAG_NODUMP;
-	if (xflags & XFS_XFLAG_PROJINHERIT)
-		di_flags |= XFS_DIFLAG_PROJINHERIT;
-	if (xflags & XFS_XFLAG_NODEFRAG)
-		di_flags |= XFS_DIFLAG_NODEFRAG;
-	if (xflags & XFS_XFLAG_FILESTREAM)
-		di_flags |= XFS_DIFLAG_FILESTREAM;
-	if (S_ISDIR(ip->i_d.di_mode)) {
-		if (xflags & XFS_XFLAG_RTINHERIT)
-			di_flags |= XFS_DIFLAG_RTINHERIT;
-		if (xflags & XFS_XFLAG_NOSYMLINKS)
-			di_flags |= XFS_DIFLAG_NOSYMLINKS;
-		if (xflags & XFS_XFLAG_EXTSZINHERIT)
-			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-	} else if (S_ISREG(ip->i_d.di_mode)) {
-		if (xflags & XFS_XFLAG_REALTIME)
-			di_flags |= XFS_DIFLAG_REALTIME;
-		if (xflags & XFS_XFLAG_EXTSIZE)
-			di_flags |= XFS_DIFLAG_EXTSIZE;
-	}
-
-	ip->i_d.di_flags = di_flags;
-}
-
-STATIC void
-xfs_diflags_to_linux(
-	struct xfs_inode	*ip)
-{
-	struct inode		*inode = VFS_I(ip);
-	unsigned int		xflags = xfs_ip2xflags(ip);
-
-	if (xflags & XFS_XFLAG_IMMUTABLE)
-		inode->i_flags |= S_IMMUTABLE;
-	else
-		inode->i_flags &= ~S_IMMUTABLE;
-	if (xflags & XFS_XFLAG_APPEND)
-		inode->i_flags |= S_APPEND;
-	else
-		inode->i_flags &= ~S_APPEND;
-	if (xflags & XFS_XFLAG_SYNC)
-		inode->i_flags |= S_SYNC;
-	else
-		inode->i_flags &= ~S_SYNC;
-	if (xflags & XFS_XFLAG_NOATIME)
-		inode->i_flags |= S_NOATIME;
-	else
-		inode->i_flags &= ~S_NOATIME;
-}
-
-#define FSX_PROJID	1
-#define FSX_EXTSIZE	2
-#define FSX_XFLAGS	4
-#define FSX_NONBLOCK	8
-
-STATIC int
-xfs_ioctl_setattr(
-	xfs_inode_t		*ip,
-	struct fsxattr		*fa,
-	int			mask)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_trans	*tp;
-	unsigned int		lock_flags = 0;
-	struct xfs_dquot	*udqp = NULL;
-	struct xfs_dquot	*gdqp = NULL;
-	struct xfs_dquot	*olddquot = NULL;
-	int			code;
-
-	trace_xfs_ioctl_setattr(ip);
-
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return XFS_ERROR(EROFS);
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	/*
-	 * Disallow 32bit project ids when projid32bit feature is not enabled.
-	 */
-	if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
-			!xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
-		return XFS_ERROR(EINVAL);
-
-	/*
-	 * If disk quotas is on, we make sure that the dquots do exist on disk,
-	 * before we start any other transactions. Trying to do this later
-	 * is messy. We don't care to take a readlock to look at the ids
-	 * in inode here, because we can't hold it across the trans_reserve.
-	 * If the IDs do change before we take the ilock, we're covered
-	 * because the i_*dquot fields will get updated anyway.
-	 */
-	if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
-		code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
-					 ip->i_d.di_gid, fa->fsx_projid,
-					 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
-		if (code)
-			return code;
-	}
-
-	/*
-	 * For the other attributes, we acquire the inode lock and
-	 * first do an error checking pass.
-	 */
-	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-	code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-	if (code)
-		goto error_return;
-
-	lock_flags = XFS_ILOCK_EXCL;
-	xfs_ilock(ip, lock_flags);
-
-	/*
-	 * CAP_FOWNER overrides the following restrictions:
-	 *
-	 * The user ID of the calling process must be equal
-	 * to the file owner ID, except in cases where the
-	 * CAP_FSETID capability is applicable.
-	 */
-	if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
-		code = XFS_ERROR(EPERM);
-		goto error_return;
-	}
-
-	/*
-	 * Do a quota reservation only if projid is actually going to change.
-	 */
-	if (mask & FSX_PROJID) {
-		if (XFS_IS_QUOTA_RUNNING(mp) &&
-		    XFS_IS_PQUOTA_ON(mp) &&
-		    xfs_get_projid(ip) != fa->fsx_projid) {
-			ASSERT(tp);
-			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-						capable(CAP_FOWNER) ?
-						XFS_QMOPT_FORCE_RES : 0);
-			if (code)	/* out of quota */
-				goto error_return;
-		}
-	}
-
-	if (mask & FSX_EXTSIZE) {
-		/*
-		 * Can't change extent size if any extents are allocated.
-		 */
-		if (ip->i_d.di_nextents &&
-		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-		     fa->fsx_extsize)) {
-			code = XFS_ERROR(EINVAL);	/* EFBIG? */
-			goto error_return;
-		}
-
-		/*
-		 * Extent size must be a multiple of the appropriate block
-		 * size, if set at all. It must also be smaller than the
-		 * maximum extent size supported by the filesystem.
-		 *
-		 * Also, for non-realtime files, limit the extent size hint to
-		 * half the size of the AGs in the filesystem so alignment
-		 * doesn't result in extents larger than an AG.
-		 */
-		if (fa->fsx_extsize != 0) {
-			xfs_extlen_t    size;
-			xfs_fsblock_t   extsize_fsb;
-
-			extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
-			if (extsize_fsb > MAXEXTLEN) {
-				code = XFS_ERROR(EINVAL);
-				goto error_return;
-			}
-
-			if (XFS_IS_REALTIME_INODE(ip) ||
-			    ((mask & FSX_XFLAGS) &&
-			    (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
-				size = mp->m_sb.sb_rextsize <<
-				       mp->m_sb.sb_blocklog;
-			} else {
-				size = mp->m_sb.sb_blocksize;
-				if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
-					code = XFS_ERROR(EINVAL);
-					goto error_return;
-				}
-			}
-
-			if (fa->fsx_extsize % size) {
-				code = XFS_ERROR(EINVAL);
-				goto error_return;
-			}
-		}
-	}
-
-
-	if (mask & FSX_XFLAGS) {
-		/*
-		 * Can't change realtime flag if any extents are allocated.
-		 */
-		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-		    (XFS_IS_REALTIME_INODE(ip)) !=
-		    (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
-			code = XFS_ERROR(EINVAL);	/* EFBIG? */
-			goto error_return;
-		}
-
-		/*
-		 * If realtime flag is set then must have realtime data.
-		 */
-		if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
-			if ((mp->m_sb.sb_rblocks == 0) ||
-			    (mp->m_sb.sb_rextsize == 0) ||
-			    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
-				code = XFS_ERROR(EINVAL);
-				goto error_return;
-			}
-		}
-
-		/*
-		 * Can't modify an immutable/append-only file unless
-		 * we have appropriate permission.
-		 */
-		if ((ip->i_d.di_flags &
-				(XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
-		     (fa->fsx_xflags &
-				(XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
-		    !capable(CAP_LINUX_IMMUTABLE)) {
-			code = XFS_ERROR(EPERM);
-			goto error_return;
-		}
-	}
-
-	xfs_trans_ijoin(tp, ip);
-
-	/*
-	 * Change file ownership.  Must be the owner or privileged.
-	 */
-	if (mask & FSX_PROJID) {
-		/*
-		 * CAP_FSETID overrides the following restrictions:
-		 *
-		 * The set-user-ID and set-group-ID bits of a file will be
-		 * cleared upon successful return from chown()
-		 */
-		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-		    !capable(CAP_FSETID))
-			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-		/*
-		 * Change the ownerships and register quota modifications
-		 * in the transaction.
-		 */
-		if (xfs_get_projid(ip) != fa->fsx_projid) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
-				olddquot = xfs_qm_vop_chown(tp, ip,
-							&ip->i_gdquot, gdqp);
-			}
-			xfs_set_projid(ip, fa->fsx_projid);
-
-			/*
-			 * We may have to rev the inode as well as
-			 * the superblock version number since projids didn't
-			 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
-			 */
-			if (ip->i_d.di_version == 1)
-				xfs_bump_ino_vers2(tp, ip);
-		}
-
-	}
-
-	if (mask & FSX_EXTSIZE)
-		ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
-	if (mask & FSX_XFLAGS) {
-		xfs_set_diflags(ip, fa->fsx_xflags);
-		xfs_diflags_to_linux(ip);
-	}
-
-	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-	XFS_STATS_INC(xs_ig_attrchg);
-
-	/*
-	 * If this is a synchronous mount, make sure that the
-	 * transaction goes to disk before returning to the user.
-	 * This is slightly sub-optimal in that truncates require
-	 * two sync transactions instead of one for wsync filesystems.
-	 * One for the truncate and one for the timestamps since we
-	 * don't want to change the timestamps unless we're sure the
-	 * truncate worked.  Truncates are less than 1% of the laddis
-	 * mix so this probably isn't worth the trouble to optimize.
-	 */
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
-		xfs_trans_set_sync(tp);
-	code = xfs_trans_commit(tp, 0);
-	xfs_iunlock(ip, lock_flags);
-
-	/*
-	 * Release any dquot(s) the inode had kept before chown.
-	 */
-	xfs_qm_dqrele(olddquot);
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
-
-	return code;
-
- error_return:
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
-	xfs_trans_cancel(tp, 0);
-	if (lock_flags)
-		xfs_iunlock(ip, lock_flags);
-	return code;
-}
-
-STATIC int
-xfs_ioc_fssetxattr(
-	xfs_inode_t		*ip,
-	struct file		*filp,
-	void			__user *arg)
-{
-	struct fsxattr		fa;
-	unsigned int		mask;
-
-	if (copy_from_user(&fa, arg, sizeof(fa)))
-		return -EFAULT;
-
-	mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
-	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-		mask |= FSX_NONBLOCK;
-
-	return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_ioc_getxflags(
-	xfs_inode_t		*ip,
-	void			__user *arg)
-{
-	unsigned int		flags;
-
-	flags = xfs_di2lxflags(ip->i_d.di_flags);
-	if (copy_to_user(arg, &flags, sizeof(flags)))
-		return -EFAULT;
-	return 0;
-}
-
-STATIC int
-xfs_ioc_setxflags(
-	xfs_inode_t		*ip,
-	struct file		*filp,
-	void			__user *arg)
-{
-	struct fsxattr		fa;
-	unsigned int		flags;
-	unsigned int		mask;
-
-	if (copy_from_user(&flags, arg, sizeof(flags)))
-		return -EFAULT;
-
-	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
-		      FS_NOATIME_FL | FS_NODUMP_FL | \
-		      FS_SYNC_FL))
-		return -EOPNOTSUPP;
-
-	mask = FSX_XFLAGS;
-	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-		mask |= FSX_NONBLOCK;
-	fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-
-	return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
-{
-	struct getbmap __user	*base = *ap;
-
-	/* copy only getbmap portion (not getbmapx) */
-	if (copy_to_user(base, bmv, sizeof(struct getbmap)))
-		return XFS_ERROR(EFAULT);
-
-	*ap += sizeof(struct getbmap);
-	return 0;
-}
-
-STATIC int
-xfs_ioc_getbmap(
-	struct xfs_inode	*ip,
-	int			ioflags,
-	unsigned int		cmd,
-	void			__user *arg)
-{
-	struct getbmapx		bmx;
-	int			error;
-
-	if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
-		return -XFS_ERROR(EFAULT);
-
-	if (bmx.bmv_count < 2)
-		return -XFS_ERROR(EINVAL);
-
-	bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
-	if (ioflags & IO_INVIS)
-		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
-
-	error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
-			    (struct getbmap *)arg+1);
-	if (error)
-		return -error;
-
-	/* copy back header - only size of getbmap */
-	if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-STATIC int
-xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
-{
-	struct getbmapx __user	*base = *ap;
-
-	if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
-		return XFS_ERROR(EFAULT);
-
-	*ap += sizeof(struct getbmapx);
-	return 0;
-}
-
-STATIC int
-xfs_ioc_getbmapx(
-	struct xfs_inode	*ip,
-	void			__user *arg)
-{
-	struct getbmapx		bmx;
-	int			error;
-
-	if (copy_from_user(&bmx, arg, sizeof(bmx)))
-		return -XFS_ERROR(EFAULT);
-
-	if (bmx.bmv_count < 2)
-		return -XFS_ERROR(EINVAL);
-
-	if (bmx.bmv_iflags & (~BMV_IF_VALID))
-		return -XFS_ERROR(EINVAL);
-
-	error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
-			    (struct getbmapx *)arg+1);
-	if (error)
-		return -error;
-
-	/* copy back header */
-	if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
-		return -XFS_ERROR(EFAULT);
-
-	return 0;
-}
-
-/*
- * Note: some of the ioctl's return positive numbers as a
- * byte count indicating success, such as readlink_by_handle.
- * So we don't "sign flip" like most other routines.  This means
- * true errors need to be returned as a negative value.
- */
-long
-xfs_file_ioctl(
-	struct file		*filp,
-	unsigned int		cmd,
-	unsigned long		p)
-{
-	struct inode		*inode = filp->f_path.dentry->d_inode;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	void			__user *arg = (void __user *)p;
-	int			ioflags = 0;
-	int			error;
-
-	if (filp->f_mode & FMODE_NOCMTIME)
-		ioflags |= IO_INVIS;
-
-	trace_xfs_file_ioctl(ip);
-
-	switch (cmd) {
-	case FITRIM:
-		return xfs_ioc_trim(mp, arg);
-	case XFS_IOC_ALLOCSP:
-	case XFS_IOC_FREESP:
-	case XFS_IOC_RESVSP:
-	case XFS_IOC_UNRESVSP:
-	case XFS_IOC_ALLOCSP64:
-	case XFS_IOC_FREESP64:
-	case XFS_IOC_RESVSP64:
-	case XFS_IOC_UNRESVSP64:
-	case XFS_IOC_ZERO_RANGE: {
-		xfs_flock64_t		bf;
-
-		if (copy_from_user(&bf, arg, sizeof(bf)))
-			return -XFS_ERROR(EFAULT);
-		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
-	}
-	case XFS_IOC_DIOINFO: {
-		struct dioattr	da;
-		xfs_buftarg_t	*target =
-			XFS_IS_REALTIME_INODE(ip) ?
-			mp->m_rtdev_targp : mp->m_ddev_targp;
-
-		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
-		if (copy_to_user(arg, &da, sizeof(da)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_FSBULKSTAT_SINGLE:
-	case XFS_IOC_FSBULKSTAT:
-	case XFS_IOC_FSINUMBERS:
-		return xfs_ioc_bulkstat(mp, cmd, arg);
-
-	case XFS_IOC_FSGEOMETRY_V1:
-		return xfs_ioc_fsgeometry_v1(mp, arg);
-
-	case XFS_IOC_FSGEOMETRY:
-		return xfs_ioc_fsgeometry(mp, arg);
-
-	case XFS_IOC_GETVERSION:
-		return put_user(inode->i_generation, (int __user *)arg);
-
-	case XFS_IOC_FSGETXATTR:
-		return xfs_ioc_fsgetxattr(ip, 0, arg);
-	case XFS_IOC_FSGETXATTRA:
-		return xfs_ioc_fsgetxattr(ip, 1, arg);
-	case XFS_IOC_FSSETXATTR:
-		return xfs_ioc_fssetxattr(ip, filp, arg);
-	case XFS_IOC_GETXFLAGS:
-		return xfs_ioc_getxflags(ip, arg);
-	case XFS_IOC_SETXFLAGS:
-		return xfs_ioc_setxflags(ip, filp, arg);
-
-	case XFS_IOC_FSSETDM: {
-		struct fsdmidata	dmi;
-
-		if (copy_from_user(&dmi, arg, sizeof(dmi)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
-				dmi.fsd_dmstate);
-		return -error;
-	}
-
-	case XFS_IOC_GETBMAP:
-	case XFS_IOC_GETBMAPA:
-		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
-
-	case XFS_IOC_GETBMAPX:
-		return xfs_ioc_getbmapx(ip, arg);
-
-	case XFS_IOC_FD_TO_HANDLE:
-	case XFS_IOC_PATH_TO_HANDLE:
-	case XFS_IOC_PATH_TO_FSHANDLE: {
-		xfs_fsop_handlereq_t	hreq;
-
-		if (copy_from_user(&hreq, arg, sizeof(hreq)))
-			return -XFS_ERROR(EFAULT);
-		return xfs_find_handle(cmd, &hreq);
-	}
-	case XFS_IOC_OPEN_BY_HANDLE: {
-		xfs_fsop_handlereq_t	hreq;
-
-		if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-			return -XFS_ERROR(EFAULT);
-		return xfs_open_by_handle(filp, &hreq);
-	}
-	case XFS_IOC_FSSETDM_BY_HANDLE:
-		return xfs_fssetdm_by_handle(filp, arg);
-
-	case XFS_IOC_READLINK_BY_HANDLE: {
-		xfs_fsop_handlereq_t	hreq;
-
-		if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-			return -XFS_ERROR(EFAULT);
-		return xfs_readlink_by_handle(filp, &hreq);
-	}
-	case XFS_IOC_ATTRLIST_BY_HANDLE:
-		return xfs_attrlist_by_handle(filp, arg);
-
-	case XFS_IOC_ATTRMULTI_BY_HANDLE:
-		return xfs_attrmulti_by_handle(filp, arg);
-
-	case XFS_IOC_SWAPEXT: {
-		struct xfs_swapext	sxp;
-
-		if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
-			return -XFS_ERROR(EFAULT);
-		error = xfs_swapext(&sxp);
-		return -error;
-	}
-
-	case XFS_IOC_FSCOUNTS: {
-		xfs_fsop_counts_t out;
-
-		error = xfs_fs_counts(mp, &out);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &out, sizeof(out)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_SET_RESBLKS: {
-		xfs_fsop_resblks_t inout;
-		__uint64_t	   in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (mp->m_flags & XFS_MOUNT_RDONLY)
-			return -XFS_ERROR(EROFS);
-
-		if (copy_from_user(&inout, arg, sizeof(inout)))
-			return -XFS_ERROR(EFAULT);
-
-		/* input parameter is passed in resblks field of structure */
-		in = inout.resblks;
-		error = xfs_reserve_blocks(mp, &in, &inout);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &inout, sizeof(inout)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_GET_RESBLKS: {
-		xfs_fsop_resblks_t out;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		error = xfs_reserve_blocks(mp, NULL, &out);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &out, sizeof(out)))
-			return -XFS_ERROR(EFAULT);
-
-		return 0;
-	}
-
-	case XFS_IOC_FSGROWFSDATA: {
-		xfs_growfs_data_t in;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_data(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_FSGROWFSLOG: {
-		xfs_growfs_log_t in;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_log(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_FSGROWFSRT: {
-		xfs_growfs_rt_t in;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_rt(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_GOINGDOWN: {
-		__uint32_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (get_user(in, (__uint32_t __user *)arg))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_fs_goingdown(mp, in);
-		return -error;
-	}
-
-	case XFS_IOC_ERROR_INJECTION: {
-		xfs_error_injection_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_errortag_add(in.errtag, mp);
-		return -error;
-	}
-
-	case XFS_IOC_ERROR_CLEARALL:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		error = xfs_errortag_clearall(mp, 1);
-		return -error;
-
-	default:
-		return -ENOTTY;
-	}
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
deleted file mode 100644
index d56173b..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOCTL_H__
-#define __XFS_IOCTL_H__
-
-extern int
-xfs_ioc_space(
-	struct xfs_inode	*ip,
-	struct inode		*inode,
-	struct file		*filp,
-	int			ioflags,
-	unsigned int		cmd,
-	xfs_flock64_t		*bf);
-
-extern int
-xfs_find_handle(
-	unsigned int		cmd,
-	xfs_fsop_handlereq_t	*hreq);
-
-extern int
-xfs_open_by_handle(
-	struct file		*parfilp,
-	xfs_fsop_handlereq_t	*hreq);
-
-extern int
-xfs_readlink_by_handle(
-	struct file		*parfilp,
-	xfs_fsop_handlereq_t	*hreq);
-
-extern int
-xfs_attrmulti_attr_get(
-	struct inode		*inode,
-	unsigned char		*name,
-	unsigned char		__user *ubuf,
-	__uint32_t		*len,
-	__uint32_t		flags);
-
-extern int
-xfs_attrmulti_attr_set(
-	struct inode		*inode,
-	unsigned char		*name,
-	const unsigned char	__user *ubuf,
-	__uint32_t		len,
-	__uint32_t		flags);
-
-extern int
-xfs_attrmulti_attr_remove(
-	struct inode		*inode,
-	unsigned char		*name,
-	__uint32_t		flags);
-
-extern struct dentry *
-xfs_handle_to_dentry(
-	struct file		*parfilp,
-	void __user		*uhandle,
-	u32			hlen);
-
-extern long
-xfs_file_ioctl(
-	struct file		*filp,
-	unsigned int		cmd,
-	unsigned long		p);
-
-extern long
-xfs_file_compat_ioctl(
-	struct file		*file,
-	unsigned int		cmd,
-	unsigned long		arg);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
deleted file mode 100644
index 54e623b..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/compat.h>
-#include <linux/ioctl.h>
-#include <linux/mount.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_vnode.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_dfrag.h"
-#include "xfs_vnodeops.h"
-#include "xfs_fsops.h"
-#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_attr.h"
-#include "xfs_ioctl.h"
-#include "xfs_ioctl32.h"
-#include "xfs_trace.h"
-
-#define  _NATIVE_IOC(cmd, type) \
-	  _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
-
-#ifdef BROKEN_X86_ALIGNMENT
-STATIC int
-xfs_compat_flock64_copyin(
-	xfs_flock64_t		*bf,
-	compat_xfs_flock64_t	__user *arg32)
-{
-	if (get_user(bf->l_type,	&arg32->l_type) ||
-	    get_user(bf->l_whence,	&arg32->l_whence) ||
-	    get_user(bf->l_start,	&arg32->l_start) ||
-	    get_user(bf->l_len,		&arg32->l_len) ||
-	    get_user(bf->l_sysid,	&arg32->l_sysid) ||
-	    get_user(bf->l_pid,		&arg32->l_pid) ||
-	    copy_from_user(bf->l_pad,	&arg32->l_pad,	4*sizeof(u32)))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-STATIC int
-xfs_compat_ioc_fsgeometry_v1(
-	struct xfs_mount	  *mp,
-	compat_xfs_fsop_geom_v1_t __user *arg32)
-{
-	xfs_fsop_geom_t		  fsgeo;
-	int			  error;
-
-	error = xfs_fs_geometry(mp, &fsgeo, 3);
-	if (error)
-		return -error;
-	/* The 32-bit variant simply has some padding at the end */
-	if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-STATIC int
-xfs_compat_growfs_data_copyin(
-	struct xfs_growfs_data	 *in,
-	compat_xfs_growfs_data_t __user *arg32)
-{
-	if (get_user(in->newblocks, &arg32->newblocks) ||
-	    get_user(in->imaxpct,   &arg32->imaxpct))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-STATIC int
-xfs_compat_growfs_rt_copyin(
-	struct xfs_growfs_rt	 *in,
-	compat_xfs_growfs_rt_t	__user *arg32)
-{
-	if (get_user(in->newblocks, &arg32->newblocks) ||
-	    get_user(in->extsize,   &arg32->extsize))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-STATIC int
-xfs_inumbers_fmt_compat(
-	void			__user *ubuffer,
-	const xfs_inogrp_t	*buffer,
-	long			count,
-	long			*written)
-{
-	compat_xfs_inogrp_t	__user *p32 = ubuffer;
-	long			i;
-
-	for (i = 0; i < count; i++) {
-		if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
-		    put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
-		    put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
-			return -XFS_ERROR(EFAULT);
-	}
-	*written = count * sizeof(*p32);
-	return 0;
-}
-
-#else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
-#endif	/* BROKEN_X86_ALIGNMENT */
-
-STATIC int
-xfs_ioctl32_bstime_copyin(
-	xfs_bstime_t		*bstime,
-	compat_xfs_bstime_t	__user *bstime32)
-{
-	compat_time_t		sec32;	/* tv_sec differs on 64 vs. 32 */
-
-	if (get_user(sec32,		&bstime32->tv_sec)	||
-	    get_user(bstime->tv_nsec,	&bstime32->tv_nsec))
-		return -XFS_ERROR(EFAULT);
-	bstime->tv_sec = sec32;
-	return 0;
-}
-
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
-STATIC int
-xfs_ioctl32_bstat_copyin(
-	xfs_bstat_t		*bstat,
-	compat_xfs_bstat_t	__user *bstat32)
-{
-	if (get_user(bstat->bs_ino,	&bstat32->bs_ino)	||
-	    get_user(bstat->bs_mode,	&bstat32->bs_mode)	||
-	    get_user(bstat->bs_nlink,	&bstat32->bs_nlink)	||
-	    get_user(bstat->bs_uid,	&bstat32->bs_uid)	||
-	    get_user(bstat->bs_gid,	&bstat32->bs_gid)	||
-	    get_user(bstat->bs_rdev,	&bstat32->bs_rdev)	||
-	    get_user(bstat->bs_blksize,	&bstat32->bs_blksize)	||
-	    get_user(bstat->bs_size,	&bstat32->bs_size)	||
-	    xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
-	    xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
-	    xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
-	    get_user(bstat->bs_blocks,	&bstat32->bs_size)	||
-	    get_user(bstat->bs_xflags,	&bstat32->bs_size)	||
-	    get_user(bstat->bs_extsize,	&bstat32->bs_extsize)	||
-	    get_user(bstat->bs_extents,	&bstat32->bs_extents)	||
-	    get_user(bstat->bs_gen,	&bstat32->bs_gen)	||
-	    get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
-	    get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
-	    get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask)	||
-	    get_user(bstat->bs_dmstate,	&bstat32->bs_dmstate)	||
-	    get_user(bstat->bs_aextents, &bstat32->bs_aextents))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-/* XFS_IOC_FSBULKSTAT and friends */
-
-STATIC int
-xfs_bstime_store_compat(
-	compat_xfs_bstime_t	__user *p32,
-	const xfs_bstime_t	*p)
-{
-	__s32			sec32;
-
-	sec32 = p->tv_sec;
-	if (put_user(sec32, &p32->tv_sec) ||
-	    put_user(p->tv_nsec, &p32->tv_nsec))
-		return -XFS_ERROR(EFAULT);
-	return 0;
-}
-
-/* Return 0 on success or positive error (to xfs_bulkstat()) */
-STATIC int
-xfs_bulkstat_one_fmt_compat(
-	void			__user *ubuffer,
-	int			ubsize,
-	int			*ubused,
-	const xfs_bstat_t	*buffer)
-{
-	compat_xfs_bstat_t	__user *p32 = ubuffer;
-
-	if (ubsize < sizeof(*p32))
-		return XFS_ERROR(ENOMEM);
-
-	if (put_user(buffer->bs_ino,	  &p32->bs_ino)		||
-	    put_user(buffer->bs_mode,	  &p32->bs_mode)	||
-	    put_user(buffer->bs_nlink,	  &p32->bs_nlink)	||
-	    put_user(buffer->bs_uid,	  &p32->bs_uid)		||
-	    put_user(buffer->bs_gid,	  &p32->bs_gid)		||
-	    put_user(buffer->bs_rdev,	  &p32->bs_rdev)	||
-	    put_user(buffer->bs_blksize,  &p32->bs_blksize)	||
-	    put_user(buffer->bs_size,	  &p32->bs_size)	||
-	    xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
-	    xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
-	    xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
-	    put_user(buffer->bs_blocks,	  &p32->bs_blocks)	||
-	    put_user(buffer->bs_xflags,	  &p32->bs_xflags)	||
-	    put_user(buffer->bs_extsize,  &p32->bs_extsize)	||
-	    put_user(buffer->bs_extents,  &p32->bs_extents)	||
-	    put_user(buffer->bs_gen,	  &p32->bs_gen)		||
-	    put_user(buffer->bs_projid,	  &p32->bs_projid)	||
-	    put_user(buffer->bs_projid_hi,	&p32->bs_projid_hi)	||
-	    put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)	||
-	    put_user(buffer->bs_dmstate,  &p32->bs_dmstate)	||
-	    put_user(buffer->bs_aextents, &p32->bs_aextents))
-		return XFS_ERROR(EFAULT);
-	if (ubused)
-		*ubused = sizeof(*p32);
-	return 0;
-}
-
-STATIC int
-xfs_bulkstat_one_compat(
-	xfs_mount_t	*mp,		/* mount point for filesystem */
-	xfs_ino_t	ino,		/* inode number to get data for */
-	void		__user *buffer,	/* buffer to place output in */
-	int		ubsize,		/* size of buffer */
-	int		*ubused,	/* bytes used by me */
-	int		*stat)		/* BULKSTAT_RV_... */
-{
-	return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-				    xfs_bulkstat_one_fmt_compat,
-				    ubused, stat);
-}
-
-/* copied from xfs_ioctl.c */
-STATIC int
-xfs_compat_ioc_bulkstat(
-	xfs_mount_t		  *mp,
-	unsigned int		  cmd,
-	compat_xfs_fsop_bulkreq_t __user *p32)
-{
-	u32			addr;
-	xfs_fsop_bulkreq_t	bulkreq;
-	int			count;	/* # of records returned */
-	xfs_ino_t		inlast;	/* last inode number */
-	int			done;
-	int			error;
-
-	/* done = 1 if there are more stats to get and if bulkstat */
-	/* should be called again (unused here, but used in dmapi) */
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -XFS_ERROR(EIO);
-
-	if (get_user(addr, &p32->lastip))
-		return -XFS_ERROR(EFAULT);
-	bulkreq.lastip = compat_ptr(addr);
-	if (get_user(bulkreq.icount, &p32->icount) ||
-	    get_user(addr, &p32->ubuffer))
-		return -XFS_ERROR(EFAULT);
-	bulkreq.ubuffer = compat_ptr(addr);
-	if (get_user(addr, &p32->ocount))
-		return -XFS_ERROR(EFAULT);
-	bulkreq.ocount = compat_ptr(addr);
-
-	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
-		return -XFS_ERROR(EFAULT);
-
-	if ((count = bulkreq.icount) <= 0)
-		return -XFS_ERROR(EINVAL);
-
-	if (bulkreq.ubuffer == NULL)
-		return -XFS_ERROR(EINVAL);
-
-	if (cmd == XFS_IOC_FSINUMBERS_32) {
-		error = xfs_inumbers(mp, &inlast, &count,
-				bulkreq.ubuffer, xfs_inumbers_fmt_compat);
-	} else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
-		int res;
-
-		error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
-				sizeof(compat_xfs_bstat_t), 0, &res);
-	} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
-		error = xfs_bulkstat(mp, &inlast, &count,
-			xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
-			bulkreq.ubuffer, &done);
-	} else
-		error = XFS_ERROR(EINVAL);
-	if (error)
-		return -error;
-
-	if (bulkreq.ocount != NULL) {
-		if (copy_to_user(bulkreq.lastip, &inlast,
-						sizeof(xfs_ino_t)))
-			return -XFS_ERROR(EFAULT);
-
-		if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-			return -XFS_ERROR(EFAULT);
-	}
-
-	return 0;
-}
-
-STATIC int
-xfs_compat_handlereq_copyin(
-	xfs_fsop_handlereq_t		*hreq,
-	compat_xfs_fsop_handlereq_t	__user *arg32)
-{
-	compat_xfs_fsop_handlereq_t	hreq32;
-
-	if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
-		return -XFS_ERROR(EFAULT);
-
-	hreq->fd = hreq32.fd;
-	hreq->path = compat_ptr(hreq32.path);
-	hreq->oflags = hreq32.oflags;
-	hreq->ihandle = compat_ptr(hreq32.ihandle);
-	hreq->ihandlen = hreq32.ihandlen;
-	hreq->ohandle = compat_ptr(hreq32.ohandle);
-	hreq->ohandlen = compat_ptr(hreq32.ohandlen);
-
-	return 0;
-}
-
-STATIC struct dentry *
-xfs_compat_handlereq_to_dentry(
-	struct file		*parfilp,
-	compat_xfs_fsop_handlereq_t *hreq)
-{
-	return xfs_handle_to_dentry(parfilp,
-			compat_ptr(hreq->ihandle), hreq->ihandlen);
-}
-
-STATIC int
-xfs_compat_attrlist_by_handle(
-	struct file		*parfilp,
-	void			__user *arg)
-{
-	int			error;
-	attrlist_cursor_kern_t	*cursor;
-	compat_xfs_fsop_attrlist_handlereq_t al_hreq;
-	struct dentry		*dentry;
-	char			*kbuf;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-	if (copy_from_user(&al_hreq, arg,
-			   sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
-		return -XFS_ERROR(EFAULT);
-	if (al_hreq.buflen > XATTR_LIST_MAX)
-		return -XFS_ERROR(EINVAL);
-
-	/*
-	 * Reject flags, only allow namespaces.
-	 */
-	if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
-		return -XFS_ERROR(EINVAL);
-
-	dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-
-	error = -ENOMEM;
-	kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
-	if (!kbuf)
-		goto out_dput;
-
-	cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-	error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
-					al_hreq.flags, cursor);
-	if (error)
-		goto out_kfree;
-
-	if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
-		error = -EFAULT;
-
- out_kfree:
-	kfree(kbuf);
- out_dput:
-	dput(dentry);
-	return error;
-}
-
-STATIC int
-xfs_compat_attrmulti_by_handle(
-	struct file				*parfilp,
-	void					__user *arg)
-{
-	int					error;
-	compat_xfs_attr_multiop_t		*ops;
-	compat_xfs_fsop_attrmulti_handlereq_t	am_hreq;
-	struct dentry				*dentry;
-	unsigned int				i, size;
-	unsigned char				*attr_name;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -XFS_ERROR(EPERM);
-	if (copy_from_user(&am_hreq, arg,
-			   sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
-		return -XFS_ERROR(EFAULT);
-
-	/* overflow check */
-	if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
-		return -E2BIG;
-
-	dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-
-	error = E2BIG;
-	size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
-	if (!size || size > 16 * PAGE_SIZE)
-		goto out_dput;
-
-	ops = memdup_user(compat_ptr(am_hreq.ops), size);
-	if (IS_ERR(ops)) {
-		error = PTR_ERR(ops);
-		goto out_dput;
-	}
-
-	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
-	if (!attr_name)
-		goto out_kfree_ops;
-
-	error = 0;
-	for (i = 0; i < am_hreq.opcount; i++) {
-		ops[i].am_error = strncpy_from_user((char *)attr_name,
-				compat_ptr(ops[i].am_attrname),
-				MAXNAMELEN);
-		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-			error = -ERANGE;
-		if (ops[i].am_error < 0)
-			break;
-
-		switch (ops[i].am_opcode) {
-		case ATTR_OP_GET:
-			ops[i].am_error = xfs_attrmulti_attr_get(
-					dentry->d_inode, attr_name,
-					compat_ptr(ops[i].am_attrvalue),
-					&ops[i].am_length, ops[i].am_flags);
-			break;
-		case ATTR_OP_SET:
-			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-			if (ops[i].am_error)
-				break;
-			ops[i].am_error = xfs_attrmulti_attr_set(
-					dentry->d_inode, attr_name,
-					compat_ptr(ops[i].am_attrvalue),
-					ops[i].am_length, ops[i].am_flags);
-			mnt_drop_write(parfilp->f_path.mnt);
-			break;
-		case ATTR_OP_REMOVE:
-			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-			if (ops[i].am_error)
-				break;
-			ops[i].am_error = xfs_attrmulti_attr_remove(
-					dentry->d_inode, attr_name,
-					ops[i].am_flags);
-			mnt_drop_write(parfilp->f_path.mnt);
-			break;
-		default:
-			ops[i].am_error = EINVAL;
-		}
-	}
-
-	if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
-		error = XFS_ERROR(EFAULT);
-
-	kfree(attr_name);
- out_kfree_ops:
-	kfree(ops);
- out_dput:
-	dput(dentry);
-	return -error;
-}
-
-STATIC int
-xfs_compat_fssetdm_by_handle(
-	struct file		*parfilp,
-	void			__user *arg)
-{
-	int			error;
-	struct fsdmidata	fsd;
-	compat_xfs_fsop_setdm_handlereq_t dmhreq;
-	struct dentry		*dentry;
-
-	if (!capable(CAP_MKNOD))
-		return -XFS_ERROR(EPERM);
-	if (copy_from_user(&dmhreq, arg,
-			   sizeof(compat_xfs_fsop_setdm_handlereq_t)))
-		return -XFS_ERROR(EFAULT);
-
-	dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-
-	if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
-		error = -XFS_ERROR(EPERM);
-		goto out;
-	}
-
-	if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
-		error = -XFS_ERROR(EFAULT);
-		goto out;
-	}
-
-	error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
-				 fsd.fsd_dmstate);
-
-out:
-	dput(dentry);
-	return error;
-}
-
-long
-xfs_file_compat_ioctl(
-	struct file		*filp,
-	unsigned		cmd,
-	unsigned long		p)
-{
-	struct inode		*inode = filp->f_path.dentry->d_inode;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	void			__user *arg = (void __user *)p;
-	int			ioflags = 0;
-	int			error;
-
-	if (filp->f_mode & FMODE_NOCMTIME)
-		ioflags |= IO_INVIS;
-
-	trace_xfs_file_compat_ioctl(ip);
-
-	switch (cmd) {
-	/* No size or alignment issues on any arch */
-	case XFS_IOC_DIOINFO:
-	case XFS_IOC_FSGEOMETRY:
-	case XFS_IOC_FSGETXATTR:
-	case XFS_IOC_FSSETXATTR:
-	case XFS_IOC_FSGETXATTRA:
-	case XFS_IOC_FSSETDM:
-	case XFS_IOC_GETBMAP:
-	case XFS_IOC_GETBMAPA:
-	case XFS_IOC_GETBMAPX:
-	case XFS_IOC_FSCOUNTS:
-	case XFS_IOC_SET_RESBLKS:
-	case XFS_IOC_GET_RESBLKS:
-	case XFS_IOC_FSGROWFSLOG:
-	case XFS_IOC_GOINGDOWN:
-	case XFS_IOC_ERROR_INJECTION:
-	case XFS_IOC_ERROR_CLEARALL:
-		return xfs_file_ioctl(filp, cmd, p);
-#ifndef BROKEN_X86_ALIGNMENT
-	/* These are handled fine if no alignment issues */
-	case XFS_IOC_ALLOCSP:
-	case XFS_IOC_FREESP:
-	case XFS_IOC_RESVSP:
-	case XFS_IOC_UNRESVSP:
-	case XFS_IOC_ALLOCSP64:
-	case XFS_IOC_FREESP64:
-	case XFS_IOC_RESVSP64:
-	case XFS_IOC_UNRESVSP64:
-	case XFS_IOC_FSGEOMETRY_V1:
-	case XFS_IOC_FSGROWFSDATA:
-	case XFS_IOC_FSGROWFSRT:
-	case XFS_IOC_ZERO_RANGE:
-		return xfs_file_ioctl(filp, cmd, p);
-#else
-	case XFS_IOC_ALLOCSP_32:
-	case XFS_IOC_FREESP_32:
-	case XFS_IOC_ALLOCSP64_32:
-	case XFS_IOC_FREESP64_32:
-	case XFS_IOC_RESVSP_32:
-	case XFS_IOC_UNRESVSP_32:
-	case XFS_IOC_RESVSP64_32:
-	case XFS_IOC_UNRESVSP64_32:
-	case XFS_IOC_ZERO_RANGE_32: {
-		struct xfs_flock64	bf;
-
-		if (xfs_compat_flock64_copyin(&bf, arg))
-			return -XFS_ERROR(EFAULT);
-		cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
-		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
-	}
-	case XFS_IOC_FSGEOMETRY_V1_32:
-		return xfs_compat_ioc_fsgeometry_v1(mp, arg);
-	case XFS_IOC_FSGROWFSDATA_32: {
-		struct xfs_growfs_data	in;
-
-		if (xfs_compat_growfs_data_copyin(&in, arg))
-			return -XFS_ERROR(EFAULT);
-		error = xfs_growfs_data(mp, &in);
-		return -error;
-	}
-	case XFS_IOC_FSGROWFSRT_32: {
-		struct xfs_growfs_rt	in;
-
-		if (xfs_compat_growfs_rt_copyin(&in, arg))
-			return -XFS_ERROR(EFAULT);
-		error = xfs_growfs_rt(mp, &in);
-		return -error;
-	}
-#endif
-	/* long changes size, but xfs only copiese out 32 bits */
-	case XFS_IOC_GETXFLAGS_32:
-	case XFS_IOC_SETXFLAGS_32:
-	case XFS_IOC_GETVERSION_32:
-		cmd = _NATIVE_IOC(cmd, long);
-		return xfs_file_ioctl(filp, cmd, p);
-	case XFS_IOC_SWAPEXT_32: {
-		struct xfs_swapext	  sxp;
-		struct compat_xfs_swapext __user *sxu = arg;
-
-		/* Bulk copy in up to the sx_stat field, then copy bstat */
-		if (copy_from_user(&sxp, sxu,
-				   offsetof(struct xfs_swapext, sx_stat)) ||
-		    xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
-			return -XFS_ERROR(EFAULT);
-		error = xfs_swapext(&sxp);
-		return -error;
-	}
-	case XFS_IOC_FSBULKSTAT_32:
-	case XFS_IOC_FSBULKSTAT_SINGLE_32:
-	case XFS_IOC_FSINUMBERS_32:
-		return xfs_compat_ioc_bulkstat(mp, cmd, arg);
-	case XFS_IOC_FD_TO_HANDLE_32:
-	case XFS_IOC_PATH_TO_HANDLE_32:
-	case XFS_IOC_PATH_TO_FSHANDLE_32: {
-		struct xfs_fsop_handlereq	hreq;
-
-		if (xfs_compat_handlereq_copyin(&hreq, arg))
-			return -XFS_ERROR(EFAULT);
-		cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
-		return xfs_find_handle(cmd, &hreq);
-	}
-	case XFS_IOC_OPEN_BY_HANDLE_32: {
-		struct xfs_fsop_handlereq	hreq;
-
-		if (xfs_compat_handlereq_copyin(&hreq, arg))
-			return -XFS_ERROR(EFAULT);
-		return xfs_open_by_handle(filp, &hreq);
-	}
-	case XFS_IOC_READLINK_BY_HANDLE_32: {
-		struct xfs_fsop_handlereq	hreq;
-
-		if (xfs_compat_handlereq_copyin(&hreq, arg))
-			return -XFS_ERROR(EFAULT);
-		return xfs_readlink_by_handle(filp, &hreq);
-	}
-	case XFS_IOC_ATTRLIST_BY_HANDLE_32:
-		return xfs_compat_attrlist_by_handle(filp, arg);
-	case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
-		return xfs_compat_attrmulti_by_handle(filp, arg);
-	case XFS_IOC_FSSETDM_BY_HANDLE_32:
-		return xfs_compat_fssetdm_by_handle(filp, arg);
-	default:
-		return -XFS_ERROR(ENOIOCTLCMD);
-	}
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
deleted file mode 100644
index 80f4060..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOCTL32_H__
-#define __XFS_IOCTL32_H__
-
-#include <linux/compat.h>
-
-/*
- * on 32-bit arches, ioctl argument structures may have different sizes
- * and/or alignment.  We define compat structures which match the
- * 32-bit sizes/alignments here, and their associated ioctl numbers.
- *
- * xfs_ioctl32.c contains routines to copy these structures in and out.
- */
-
-/* stock kernel-level ioctls we support */
-#define XFS_IOC_GETXFLAGS_32	FS_IOC32_GETFLAGS
-#define XFS_IOC_SETXFLAGS_32	FS_IOC32_SETFLAGS
-#define XFS_IOC_GETVERSION_32	FS_IOC32_GETVERSION
-
-/*
- * On intel, even if sizes match, alignment and/or padding may differ.
- */
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
-#define BROKEN_X86_ALIGNMENT
-#define __compat_packed __attribute__((packed))
-#else
-#define __compat_packed
-#endif
-
-typedef struct compat_xfs_bstime {
-	compat_time_t	tv_sec;		/* seconds		*/
-	__s32		tv_nsec;	/* and nanoseconds	*/
-} compat_xfs_bstime_t;
-
-typedef struct compat_xfs_bstat {
-	__u64		bs_ino;		/* inode number			*/
-	__u16		bs_mode;	/* type and mode		*/
-	__u16		bs_nlink;	/* number of links		*/
-	__u32		bs_uid;		/* user id			*/
-	__u32		bs_gid;		/* group id			*/
-	__u32		bs_rdev;	/* device value			*/
-	__s32		bs_blksize;	/* block size			*/
-	__s64		bs_size;	/* file size			*/
-	compat_xfs_bstime_t bs_atime;	/* access time			*/
-	compat_xfs_bstime_t bs_mtime;	/* modify time			*/
-	compat_xfs_bstime_t bs_ctime;	/* inode change time		*/
-	int64_t		bs_blocks;	/* number of blocks		*/
-	__u32		bs_xflags;	/* extended flags		*/
-	__s32		bs_extsize;	/* extent size			*/
-	__s32		bs_extents;	/* number of extents		*/
-	__u32		bs_gen;		/* generation count		*/
-	__u16		bs_projid_lo;	/* lower part of project id	*/
-#define	bs_projid	bs_projid_lo	/* (previously just bs_projid)	*/
-	__u16		bs_projid_hi;	/* high part of project id	*/
-	unsigned char	bs_pad[12];	/* pad space, unused		*/
-	__u32		bs_dmevmask;	/* DMIG event mask		*/
-	__u16		bs_dmstate;	/* DMIG state info		*/
-	__u16		bs_aextents;	/* attribute number of extents	*/
-} __compat_packed compat_xfs_bstat_t;
-
-typedef struct compat_xfs_fsop_bulkreq {
-	compat_uptr_t	lastip;		/* last inode # pointer		*/
-	__s32		icount;		/* count of entries in buffer	*/
-	compat_uptr_t	ubuffer;	/* user buffer for inode desc.	*/
-	compat_uptr_t	ocount;		/* output count pointer		*/
-} compat_xfs_fsop_bulkreq_t;
-
-#define XFS_IOC_FSBULKSTAT_32 \
-	_IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
-	_IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSINUMBERS_32 \
-	_IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
-
-typedef struct compat_xfs_fsop_handlereq {
-	__u32		fd;		/* fd for FD_TO_HANDLE		*/
-	compat_uptr_t	path;		/* user pathname		*/
-	__u32		oflags;		/* open flags			*/
-	compat_uptr_t	ihandle;	/* user supplied handle		*/
-	__u32		ihandlen;	/* user supplied length		*/
-	compat_uptr_t	ohandle;	/* user buffer for handle	*/
-	compat_uptr_t	ohandlen;	/* user buffer length		*/
-} compat_xfs_fsop_handlereq_t;
-
-#define XFS_IOC_PATH_TO_FSHANDLE_32 \
-	_IOWR('X', 104, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_PATH_TO_HANDLE_32 \
-	_IOWR('X', 105, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_FD_TO_HANDLE_32 \
-	_IOWR('X', 106, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_OPEN_BY_HANDLE_32 \
-	_IOWR('X', 107, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_READLINK_BY_HANDLE_32 \
-	_IOWR('X', 108, struct compat_xfs_fsop_handlereq)
-
-/* The bstat field in the swapext struct needs translation */
-typedef struct compat_xfs_swapext {
-	__int64_t		sx_version;	/* version */
-	__int64_t		sx_fdtarget;	/* fd of target file */
-	__int64_t		sx_fdtmp;	/* fd of tmp file */
-	xfs_off_t		sx_offset;	/* offset into file */
-	xfs_off_t		sx_length;	/* leng from offset */
-	char			sx_pad[16];	/* pad space, unused */
-	compat_xfs_bstat_t	sx_stat;	/* stat of target b4 copy */
-} __compat_packed compat_xfs_swapext_t;
-
-#define XFS_IOC_SWAPEXT_32	_IOWR('X', 109, struct compat_xfs_swapext)
-
-typedef struct compat_xfs_fsop_attrlist_handlereq {
-	struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
-	struct xfs_attrlist_cursor	pos; /* opaque cookie, list offset */
-	__u32				flags;	/* which namespace to use */
-	__u32				buflen;	/* length of buffer supplied */
-	compat_uptr_t			buffer;	/* returned names */
-} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
-
-/* Note: actually this is read/write */
-#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
-	_IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
-
-/* am_opcodes defined in xfs_fs.h */
-typedef struct compat_xfs_attr_multiop {
-	__u32		am_opcode;
-	__s32		am_error;
-	compat_uptr_t	am_attrname;
-	compat_uptr_t	am_attrvalue;
-	__u32		am_length;
-	__u32		am_flags;
-} compat_xfs_attr_multiop_t;
-
-typedef struct compat_xfs_fsop_attrmulti_handlereq {
-	struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
-	__u32				opcount;/* count of following multiop */
-	/* ptr to compat_xfs_attr_multiop */
-	compat_uptr_t			ops; /* attr_multi data */
-} compat_xfs_fsop_attrmulti_handlereq_t;
-
-#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
-	_IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
-
-typedef struct compat_xfs_fsop_setdm_handlereq {
-	struct compat_xfs_fsop_handlereq hreq;	/* handle information   */
-	/* ptr to struct fsdmidata */
-	compat_uptr_t			data;	/* DMAPI data   */
-} compat_xfs_fsop_setdm_handlereq_t;
-
-#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
-	_IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
-
-#ifdef BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct compat_xfs_flock64 {
-	__s16		l_type;
-	__s16		l_whence;
-	__s64		l_start	__attribute__((packed));
-			/* len == 0 means until end of file */
-	__s64		l_len __attribute__((packed));
-	__s32		l_sysid;
-	__u32		l_pid;
-	__s32		l_pad[4];	/* reserve area */
-} compat_xfs_flock64_t;
-
-#define XFS_IOC_ALLOCSP_32	_IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32	_IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32	_IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32	_IOW('X', 37, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP_32	_IOW('X', 40, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP_32	_IOW('X', 41, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP64_32	_IOW('X', 42, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP64_32	_IOW('X', 43, struct compat_xfs_flock64)
-#define XFS_IOC_ZERO_RANGE_32	_IOW('X', 57, struct compat_xfs_flock64)
-
-typedef struct compat_xfs_fsop_geom_v1 {
-	__u32		blocksize;	/* filesystem (data) block size */
-	__u32		rtextsize;	/* realtime extent size		*/
-	__u32		agblocks;	/* fsblocks in an AG		*/
-	__u32		agcount;	/* number of allocation groups	*/
-	__u32		logblocks;	/* fsblocks in the log		*/
-	__u32		sectsize;	/* (data) sector size, bytes	*/
-	__u32		inodesize;	/* inode size in bytes		*/
-	__u32		imaxpct;	/* max allowed inode space(%)	*/
-	__u64		datablocks;	/* fsblocks in data subvolume	*/
-	__u64		rtblocks;	/* fsblocks in realtime subvol	*/
-	__u64		rtextents;	/* rt extents in realtime subvol*/
-	__u64		logstart;	/* starting fsblock of the log	*/
-	unsigned char	uuid[16];	/* unique id of the filesystem	*/
-	__u32		sunit;		/* stripe unit, fsblocks	*/
-	__u32		swidth;		/* stripe width, fsblocks	*/
-	__s32		version;	/* structure version		*/
-	__u32		flags;		/* superblock version flags	*/
-	__u32		logsectsize;	/* log sector size, bytes	*/
-	__u32		rtsectsize;	/* realtime sector size, bytes	*/
-	__u32		dirblocksize;	/* directory block size, bytes	*/
-} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
-
-#define XFS_IOC_FSGEOMETRY_V1_32  \
-	_IOR('X', 100, struct compat_xfs_fsop_geom_v1)
-
-typedef struct compat_xfs_inogrp {
-	__u64		xi_startino;	/* starting inode number	*/
-	__s32		xi_alloccount;	/* # bits set in allocmask	*/
-	__u64		xi_allocmask;	/* mask of allocated inodes	*/
-} __attribute__((packed)) compat_xfs_inogrp_t;
-
-/* These growfs input structures have padding on the end, so must translate */
-typedef struct compat_xfs_growfs_data {
-	__u64		newblocks;	/* new data subvol size, fsblocks */
-	__u32		imaxpct;	/* new inode space percentage limit */
-} __attribute__((packed)) compat_xfs_growfs_data_t;
-
-typedef struct compat_xfs_growfs_rt {
-	__u64		newblocks;	/* new realtime size, fsblocks */
-	__u32		extsize;	/* new realtime extent size, fsblocks */
-} __attribute__((packed)) compat_xfs_growfs_rt_t;
-
-#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
-#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
-
-#endif /* BROKEN_X86_ALIGNMENT */
-
-#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
deleted file mode 100644
index b9c172b..0000000
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ /dev/null
@@ -1,1210 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_acl.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/xattr.h>
-#include <linux/namei.h>
-#include <linux/posix_acl.h>
-#include <linux/security.h>
-#include <linux/fiemap.h>
-#include <linux/slab.h>
-
-/*
- * Bring the timestamps in the XFS inode uptodate.
- *
- * Used before writing the inode to disk.
- */
-void
-xfs_synchronize_times(
-	xfs_inode_t	*ip)
-{
-	struct inode	*inode = VFS_I(ip);
-
-	ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
-	ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
-	ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
-	ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
-	ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
-	ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-}
-
-/*
- * If the linux inode is valid, mark it dirty.
- * Used when committing a dirty inode into a transaction so that
- * the inode will get written back by the linux code
- */
-void
-xfs_mark_inode_dirty_sync(
-	xfs_inode_t	*ip)
-{
-	struct inode	*inode = VFS_I(ip);
-
-	if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-		mark_inode_dirty_sync(inode);
-}
-
-void
-xfs_mark_inode_dirty(
-	xfs_inode_t	*ip)
-{
-	struct inode	*inode = VFS_I(ip);
-
-	if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-		mark_inode_dirty(inode);
-}
-
-/*
- * Hook in SELinux.  This is not quite correct yet, what we really need
- * here (as we do for default ACLs) is a mechanism by which creation of
- * these attrs can be journalled at inode creation time (along with the
- * inode, of course, such that log replay can't cause these to be lost).
- */
-STATIC int
-xfs_init_security(
-	struct inode	*inode,
-	struct inode	*dir,
-	const struct qstr *qstr)
-{
-	struct xfs_inode *ip = XFS_I(inode);
-	size_t		length;
-	void		*value;
-	unsigned char	*name;
-	int		error;
-
-	error = security_inode_init_security(inode, dir, qstr, (char **)&name,
-					     &value, &length);
-	if (error) {
-		if (error == -EOPNOTSUPP)
-			return 0;
-		return -error;
-	}
-
-	error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
-
-	kfree(name);
-	kfree(value);
-	return error;
-}
-
-static void
-xfs_dentry_to_name(
-	struct xfs_name	*namep,
-	struct dentry	*dentry)
-{
-	namep->name = dentry->d_name.name;
-	namep->len = dentry->d_name.len;
-}
-
-STATIC void
-xfs_cleanup_inode(
-	struct inode	*dir,
-	struct inode	*inode,
-	struct dentry	*dentry)
-{
-	struct xfs_name	teardown;
-
-	/* Oh, the horror.
-	 * If we can't add the ACL or we fail in
-	 * xfs_init_security we must back out.
-	 * ENOSPC can hit here, among other things.
-	 */
-	xfs_dentry_to_name(&teardown, dentry);
-
-	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
-	iput(inode);
-}
-
-STATIC int
-xfs_vn_mknod(
-	struct inode	*dir,
-	struct dentry	*dentry,
-	int		mode,
-	dev_t		rdev)
-{
-	struct inode	*inode;
-	struct xfs_inode *ip = NULL;
-	struct posix_acl *default_acl = NULL;
-	struct xfs_name	name;
-	int		error;
-
-	/*
-	 * Irix uses Missed'em'V split, but doesn't want to see
-	 * the upper 5 bits of (14bit) major.
-	 */
-	if (S_ISCHR(mode) || S_ISBLK(mode)) {
-		if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
-			return -EINVAL;
-		rdev = sysv_encode_dev(rdev);
-	} else {
-		rdev = 0;
-	}
-
-	if (IS_POSIXACL(dir)) {
-		default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
-		if (IS_ERR(default_acl))
-			return PTR_ERR(default_acl);
-
-		if (!default_acl)
-			mode &= ~current_umask();
-	}
-
-	xfs_dentry_to_name(&name, dentry);
-	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
-	if (unlikely(error))
-		goto out_free_acl;
-
-	inode = VFS_I(ip);
-
-	error = xfs_init_security(inode, dir, &dentry->d_name);
-	if (unlikely(error))
-		goto out_cleanup_inode;
-
-	if (default_acl) {
-		error = -xfs_inherit_acl(inode, default_acl);
-		default_acl = NULL;
-		if (unlikely(error))
-			goto out_cleanup_inode;
-	}
-
-
-	d_instantiate(dentry, inode);
-	return -error;
-
- out_cleanup_inode:
-	xfs_cleanup_inode(dir, inode, dentry);
- out_free_acl:
-	posix_acl_release(default_acl);
-	return -error;
-}
-
-STATIC int
-xfs_vn_create(
-	struct inode	*dir,
-	struct dentry	*dentry,
-	int		mode,
-	struct nameidata *nd)
-{
-	return xfs_vn_mknod(dir, dentry, mode, 0);
-}
-
-STATIC int
-xfs_vn_mkdir(
-	struct inode	*dir,
-	struct dentry	*dentry,
-	int		mode)
-{
-	return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
-}
-
-STATIC struct dentry *
-xfs_vn_lookup(
-	struct inode	*dir,
-	struct dentry	*dentry,
-	struct nameidata *nd)
-{
-	struct xfs_inode *cip;
-	struct xfs_name	name;
-	int		error;
-
-	if (dentry->d_name.len >= MAXNAMELEN)
-		return ERR_PTR(-ENAMETOOLONG);
-
-	xfs_dentry_to_name(&name, dentry);
-	error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
-	if (unlikely(error)) {
-		if (unlikely(error != ENOENT))
-			return ERR_PTR(-error);
-		d_add(dentry, NULL);
-		return NULL;
-	}
-
-	return d_splice_alias(VFS_I(cip), dentry);
-}
-
-STATIC struct dentry *
-xfs_vn_ci_lookup(
-	struct inode	*dir,
-	struct dentry	*dentry,
-	struct nameidata *nd)
-{
-	struct xfs_inode *ip;
-	struct xfs_name	xname;
-	struct xfs_name ci_name;
-	struct qstr	dname;
-	int		error;
-
-	if (dentry->d_name.len >= MAXNAMELEN)
-		return ERR_PTR(-ENAMETOOLONG);
-
-	xfs_dentry_to_name(&xname, dentry);
-	error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
-	if (unlikely(error)) {
-		if (unlikely(error != ENOENT))
-			return ERR_PTR(-error);
-		/*
-		 * call d_add(dentry, NULL) here when d_drop_negative_children
-		 * is called in xfs_vn_mknod (ie. allow negative dentries
-		 * with CI filesystems).
-		 */
-		return NULL;
-	}
-
-	/* if exact match, just splice and exit */
-	if (!ci_name.name)
-		return d_splice_alias(VFS_I(ip), dentry);
-
-	/* else case-insensitive match... */
-	dname.name = ci_name.name;
-	dname.len = ci_name.len;
-	dentry = d_add_ci(dentry, VFS_I(ip), &dname);
-	kmem_free(ci_name.name);
-	return dentry;
-}
-
-STATIC int
-xfs_vn_link(
-	struct dentry	*old_dentry,
-	struct inode	*dir,
-	struct dentry	*dentry)
-{
-	struct inode	*inode = old_dentry->d_inode;
-	struct xfs_name	name;
-	int		error;
-
-	xfs_dentry_to_name(&name, dentry);
-
-	error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
-	if (unlikely(error))
-		return -error;
-
-	ihold(inode);
-	d_instantiate(dentry, inode);
-	return 0;
-}
-
-STATIC int
-xfs_vn_unlink(
-	struct inode	*dir,
-	struct dentry	*dentry)
-{
-	struct xfs_name	name;
-	int		error;
-
-	xfs_dentry_to_name(&name, dentry);
-
-	error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
-	if (error)
-		return error;
-
-	/*
-	 * With unlink, the VFS makes the dentry "negative": no inode,
-	 * but still hashed. This is incompatible with case-insensitive
-	 * mode, so invalidate (unhash) the dentry in CI-mode.
-	 */
-	if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
-		d_invalidate(dentry);
-	return 0;
-}
-
-STATIC int
-xfs_vn_symlink(
-	struct inode	*dir,
-	struct dentry	*dentry,
-	const char	*symname)
-{
-	struct inode	*inode;
-	struct xfs_inode *cip = NULL;
-	struct xfs_name	name;
-	int		error;
-	mode_t		mode;
-
-	mode = S_IFLNK |
-		(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
-	xfs_dentry_to_name(&name, dentry);
-
-	error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
-	if (unlikely(error))
-		goto out;
-
-	inode = VFS_I(cip);
-
-	error = xfs_init_security(inode, dir, &dentry->d_name);
-	if (unlikely(error))
-		goto out_cleanup_inode;
-
-	d_instantiate(dentry, inode);
-	return 0;
-
- out_cleanup_inode:
-	xfs_cleanup_inode(dir, inode, dentry);
- out:
-	return -error;
-}
-
-STATIC int
-xfs_vn_rename(
-	struct inode	*odir,
-	struct dentry	*odentry,
-	struct inode	*ndir,
-	struct dentry	*ndentry)
-{
-	struct inode	*new_inode = ndentry->d_inode;
-	struct xfs_name	oname;
-	struct xfs_name	nname;
-
-	xfs_dentry_to_name(&oname, odentry);
-	xfs_dentry_to_name(&nname, ndentry);
-
-	return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
-			   XFS_I(ndir), &nname, new_inode ?
-			   			XFS_I(new_inode) : NULL);
-}
-
-/*
- * careful here - this function can get called recursively, so
- * we need to be very careful about how much stack we use.
- * uio is kmalloced for this reason...
- */
-STATIC void *
-xfs_vn_follow_link(
-	struct dentry		*dentry,
-	struct nameidata	*nd)
-{
-	char			*link;
-	int			error = -ENOMEM;
-
-	link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-	if (!link)
-		goto out_err;
-
-	error = -xfs_readlink(XFS_I(dentry->d_inode), link);
-	if (unlikely(error))
-		goto out_kfree;
-
-	nd_set_link(nd, link);
-	return NULL;
-
- out_kfree:
-	kfree(link);
- out_err:
-	nd_set_link(nd, ERR_PTR(error));
-	return NULL;
-}
-
-STATIC void
-xfs_vn_put_link(
-	struct dentry	*dentry,
-	struct nameidata *nd,
-	void		*p)
-{
-	char		*s = nd_get_link(nd);
-
-	if (!IS_ERR(s))
-		kfree(s);
-}
-
-STATIC int
-xfs_vn_getattr(
-	struct vfsmount		*mnt,
-	struct dentry		*dentry,
-	struct kstat		*stat)
-{
-	struct inode		*inode = dentry->d_inode;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-
-	trace_xfs_getattr(ip);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	stat->size = XFS_ISIZE(ip);
-	stat->dev = inode->i_sb->s_dev;
-	stat->mode = ip->i_d.di_mode;
-	stat->nlink = ip->i_d.di_nlink;
-	stat->uid = ip->i_d.di_uid;
-	stat->gid = ip->i_d.di_gid;
-	stat->ino = ip->i_ino;
-	stat->atime = inode->i_atime;
-	stat->mtime = inode->i_mtime;
-	stat->ctime = inode->i_ctime;
-	stat->blocks =
-		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
-
-
-	switch (inode->i_mode & S_IFMT) {
-	case S_IFBLK:
-	case S_IFCHR:
-		stat->blksize = BLKDEV_IOSIZE;
-		stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-				   sysv_minor(ip->i_df.if_u2.if_rdev));
-		break;
-	default:
-		if (XFS_IS_REALTIME_INODE(ip)) {
-			/*
-			 * If the file blocks are being allocated from a
-			 * realtime volume, then return the inode's realtime
-			 * extent size or the realtime volume's extent size.
-			 */
-			stat->blksize =
-				xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
-		} else
-			stat->blksize = xfs_preferred_iosize(mp);
-		stat->rdev = 0;
-		break;
-	}
-
-	return 0;
-}
-
-int
-xfs_setattr_nonsize(
-	struct xfs_inode	*ip,
-	struct iattr		*iattr,
-	int			flags)
-{
-	xfs_mount_t		*mp = ip->i_mount;
-	struct inode		*inode = VFS_I(ip);
-	int			mask = iattr->ia_valid;
-	xfs_trans_t		*tp;
-	int			error;
-	uid_t			uid = 0, iuid = 0;
-	gid_t			gid = 0, igid = 0;
-	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
-	struct xfs_dquot	*olddquot1 = NULL, *olddquot2 = NULL;
-
-	trace_xfs_setattr(ip);
-
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return XFS_ERROR(EROFS);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	error = -inode_change_ok(inode, iattr);
-	if (error)
-		return XFS_ERROR(error);
-
-	ASSERT((mask & ATTR_SIZE) == 0);
-
-	/*
-	 * If disk quotas is on, we make sure that the dquots do exist on disk,
-	 * before we start any other transactions. Trying to do this later
-	 * is messy. We don't care to take a readlock to look at the ids
-	 * in inode here, because we can't hold it across the trans_reserve.
-	 * If the IDs do change before we take the ilock, we're covered
-	 * because the i_*dquot fields will get updated anyway.
-	 */
-	if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
-		uint	qflags = 0;
-
-		if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
-			uid = iattr->ia_uid;
-			qflags |= XFS_QMOPT_UQUOTA;
-		} else {
-			uid = ip->i_d.di_uid;
-		}
-		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
-			gid = iattr->ia_gid;
-			qflags |= XFS_QMOPT_GQUOTA;
-		}  else {
-			gid = ip->i_d.di_gid;
-		}
-
-		/*
-		 * We take a reference when we initialize udqp and gdqp,
-		 * so it is important that we never blindly double trip on
-		 * the same variable. See xfs_create() for an example.
-		 */
-		ASSERT(udqp == NULL);
-		ASSERT(gdqp == NULL);
-		error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
-					 qflags, &udqp, &gdqp);
-		if (error)
-			return error;
-	}
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-	if (error)
-		goto out_dqrele;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * Change file ownership.  Must be the owner or privileged.
-	 */
-	if (mask & (ATTR_UID|ATTR_GID)) {
-		/*
-		 * These IDs could have changed since we last looked at them.
-		 * But, we're assured that if the ownership did change
-		 * while we didn't have the inode locked, inode's dquot(s)
-		 * would have changed also.
-		 */
-		iuid = ip->i_d.di_uid;
-		igid = ip->i_d.di_gid;
-		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
-		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-
-		/*
-		 * Do a quota reservation only if uid/gid is actually
-		 * going to change.
-		 */
-		if (XFS_IS_QUOTA_RUNNING(mp) &&
-		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
-			ASSERT(tp);
-			error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-						capable(CAP_FOWNER) ?
-						XFS_QMOPT_FORCE_RES : 0);
-			if (error)	/* out of quota */
-				goto out_trans_cancel;
-		}
-	}
-
-	xfs_trans_ijoin(tp, ip);
-
-	/*
-	 * Change file ownership.  Must be the owner or privileged.
-	 */
-	if (mask & (ATTR_UID|ATTR_GID)) {
-		/*
-		 * CAP_FSETID overrides the following restrictions:
-		 *
-		 * The set-user-ID and set-group-ID bits of a file will be
-		 * cleared upon successful return from chown()
-		 */
-		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-		    !capable(CAP_FSETID))
-			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-		/*
-		 * Change the ownerships and register quota modifications
-		 * in the transaction.
-		 */
-		if (iuid != uid) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
-				ASSERT(mask & ATTR_UID);
-				ASSERT(udqp);
-				olddquot1 = xfs_qm_vop_chown(tp, ip,
-							&ip->i_udquot, udqp);
-			}
-			ip->i_d.di_uid = uid;
-			inode->i_uid = uid;
-		}
-		if (igid != gid) {
-			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-				ASSERT(!XFS_IS_PQUOTA_ON(mp));
-				ASSERT(mask & ATTR_GID);
-				ASSERT(gdqp);
-				olddquot2 = xfs_qm_vop_chown(tp, ip,
-							&ip->i_gdquot, gdqp);
-			}
-			ip->i_d.di_gid = gid;
-			inode->i_gid = gid;
-		}
-	}
-
-	/*
-	 * Change file access modes.
-	 */
-	if (mask & ATTR_MODE) {
-		umode_t mode = iattr->ia_mode;
-
-		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-			mode &= ~S_ISGID;
-
-		ip->i_d.di_mode &= S_IFMT;
-		ip->i_d.di_mode |= mode & ~S_IFMT;
-
-		inode->i_mode &= S_IFMT;
-		inode->i_mode |= mode & ~S_IFMT;
-	}
-
-	/*
-	 * Change file access or modified times.
-	 */
-	if (mask & ATTR_ATIME) {
-		inode->i_atime = iattr->ia_atime;
-		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-	if (mask & ATTR_CTIME) {
-		inode->i_ctime = iattr->ia_ctime;
-		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-	if (mask & ATTR_MTIME) {
-		inode->i_mtime = iattr->ia_mtime;
-		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-	XFS_STATS_INC(xs_ig_attrchg);
-
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
-		xfs_trans_set_sync(tp);
-	error = xfs_trans_commit(tp, 0);
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * Release any dquot(s) the inode had kept before chown.
-	 */
-	xfs_qm_dqrele(olddquot1);
-	xfs_qm_dqrele(olddquot2);
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
-
-	if (error)
-		return XFS_ERROR(error);
-
-	/*
-	 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
-	 * 	     update.  We could avoid this with linked transactions
-	 * 	     and passing down the transaction pointer all the way
-	 *	     to attr_set.  No previous user of the generic
-	 * 	     Posix ACL code seems to care about this issue either.
-	 */
-	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
-		error = -xfs_acl_chmod(inode);
-		if (error)
-			return XFS_ERROR(error);
-	}
-
-	return 0;
-
-out_trans_cancel:
-	xfs_trans_cancel(tp, 0);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_dqrele:
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
-	return error;
-}
-
-/*
- * Truncate file.  Must have write permission and not be a directory.
- */
-int
-xfs_setattr_size(
-	struct xfs_inode	*ip,
-	struct iattr		*iattr,
-	int			flags)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	struct inode		*inode = VFS_I(ip);
-	int			mask = iattr->ia_valid;
-	struct xfs_trans	*tp;
-	int			error;
-	uint			lock_flags;
-	uint			commit_flags = 0;
-
-	trace_xfs_setattr(ip);
-
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return XFS_ERROR(EROFS);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	error = -inode_change_ok(inode, iattr);
-	if (error)
-		return XFS_ERROR(error);
-
-	ASSERT(S_ISREG(ip->i_d.di_mode));
-	ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
-			ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
-			ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
-
-	lock_flags = XFS_ILOCK_EXCL;
-	if (!(flags & XFS_ATTR_NOLOCK))
-		lock_flags |= XFS_IOLOCK_EXCL;
-	xfs_ilock(ip, lock_flags);
-
-	/*
-	 * Short circuit the truncate case for zero length files.
-	 */
-	if (iattr->ia_size == 0 &&
-	    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
-		if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
-			goto out_unlock;
-
-		/*
-		 * Use the regular setattr path to update the timestamps.
-		 */
-		xfs_iunlock(ip, lock_flags);
-		iattr->ia_valid &= ~ATTR_SIZE;
-		return xfs_setattr_nonsize(ip, iattr, 0);
-	}
-
-	/*
-	 * Make sure that the dquots are attached to the inode.
-	 */
-	error = xfs_qm_dqattach_locked(ip, 0);
-	if (error)
-		goto out_unlock;
-
-	/*
-	 * Now we can make the changes.  Before we join the inode to the
-	 * transaction, take care of the part of the truncation that must be
-	 * done without the inode lock.  This needs to be done before joining
-	 * the inode to the transaction, because the inode cannot be unlocked
-	 * once it is a part of the transaction.
-	 */
-	if (iattr->ia_size > ip->i_size) {
-		/*
-		 * Do the first part of growing a file: zero any data in the
-		 * last block that is beyond the old EOF.  We need to do this
-		 * before the inode is joined to the transaction to modify
-		 * i_size.
-		 */
-		error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
-		if (error)
-			goto out_unlock;
-	}
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	lock_flags &= ~XFS_ILOCK_EXCL;
-
-	/*
-	 * We are going to log the inode size change in this transaction so
-	 * any previous writes that are beyond the on disk EOF and the new
-	 * EOF that have not been written out need to be written here.  If we
-	 * do not write the data out, we expose ourselves to the null files
-	 * problem.
-	 *
-	 * Only flush from the on disk size to the smaller of the in memory
-	 * file size or the new size as that's the range we really care about
-	 * here and prevents waiting for other data not within the range we
-	 * care about here.
-	 */
-	if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
-		error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
-					XBF_ASYNC, FI_NONE);
-		if (error)
-			goto out_unlock;
-	}
-
-	/*
-	 * Wait for all I/O to complete.
-	 */
-	xfs_ioend_wait(ip);
-
-	error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
-				     xfs_get_blocks);
-	if (error)
-		goto out_unlock;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-				 XFS_TRANS_PERM_LOG_RES,
-				 XFS_ITRUNCATE_LOG_COUNT);
-	if (error)
-		goto out_trans_cancel;
-
-	truncate_setsize(inode, iattr->ia_size);
-
-	commit_flags = XFS_TRANS_RELEASE_LOG_RES;
-	lock_flags |= XFS_ILOCK_EXCL;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	xfs_trans_ijoin(tp, ip);
-
-	/*
-	 * Only change the c/mtime if we are changing the size or we are
-	 * explicitly asked to change it.  This handles the semantic difference
-	 * between truncate() and ftruncate() as implemented in the VFS.
-	 *
-	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
-	 * special case where we need to update the times despite not having
-	 * these flags set.  For all other operations the VFS set these flags
-	 * explicitly if it wants a timestamp update.
-	 */
-	if (iattr->ia_size != ip->i_size &&
-	    (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
-		iattr->ia_ctime = iattr->ia_mtime =
-			current_fs_time(inode->i_sb);
-		mask |= ATTR_CTIME | ATTR_MTIME;
-	}
-
-	if (iattr->ia_size > ip->i_size) {
-		ip->i_d.di_size = iattr->ia_size;
-		ip->i_size = iattr->ia_size;
-	} else if (iattr->ia_size <= ip->i_size ||
-		   (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
-		error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
-		if (error)
-			goto out_trans_abort;
-
-		/*
-		 * Truncated "down", so we're removing references to old data
-		 * here - if we delay flushing for a long time, we expose
-		 * ourselves unduly to the notorious NULL files problem.  So,
-		 * we mark this inode and flush it when the file is closed,
-		 * and do not wait the usual (long) time for writeout.
-		 */
-		xfs_iflags_set(ip, XFS_ITRUNCATED);
-	}
-
-	if (mask & ATTR_CTIME) {
-		inode->i_ctime = iattr->ia_ctime;
-		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-	if (mask & ATTR_MTIME) {
-		inode->i_mtime = iattr->ia_mtime;
-		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-		ip->i_update_core = 1;
-	}
-
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-	XFS_STATS_INC(xs_ig_attrchg);
-
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
-		xfs_trans_set_sync(tp);
-
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-out_unlock:
-	if (lock_flags)
-		xfs_iunlock(ip, lock_flags);
-	return error;
-
-out_trans_abort:
-	commit_flags |= XFS_TRANS_ABORT;
-out_trans_cancel:
-	xfs_trans_cancel(tp, commit_flags);
-	goto out_unlock;
-}
-
-STATIC int
-xfs_vn_setattr(
-	struct dentry	*dentry,
-	struct iattr	*iattr)
-{
-	if (iattr->ia_valid & ATTR_SIZE)
-		return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
-	return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
-}
-
-#define XFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-
-/*
- * Call fiemap helper to fill in user data.
- * Returns positive errors to xfs_getbmap.
- */
-STATIC int
-xfs_fiemap_format(
-	void			**arg,
-	struct getbmapx		*bmv,
-	int			*full)
-{
-	int			error;
-	struct fiemap_extent_info *fieinfo = *arg;
-	u32			fiemap_flags = 0;
-	u64			logical, physical, length;
-
-	/* Do nothing for a hole */
-	if (bmv->bmv_block == -1LL)
-		return 0;
-
-	logical = BBTOB(bmv->bmv_offset);
-	physical = BBTOB(bmv->bmv_block);
-	length = BBTOB(bmv->bmv_length);
-
-	if (bmv->bmv_oflags & BMV_OF_PREALLOC)
-		fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
-	else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
-		fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
-		physical = 0;   /* no block yet */
-	}
-	if (bmv->bmv_oflags & BMV_OF_LAST)
-		fiemap_flags |= FIEMAP_EXTENT_LAST;
-
-	error = fiemap_fill_next_extent(fieinfo, logical, physical,
-					length, fiemap_flags);
-	if (error > 0) {
-		error = 0;
-		*full = 1;	/* user array now full */
-	}
-
-	return -error;
-}
-
-STATIC int
-xfs_vn_fiemap(
-	struct inode		*inode,
-	struct fiemap_extent_info *fieinfo,
-	u64			start,
-	u64			length)
-{
-	xfs_inode_t		*ip = XFS_I(inode);
-	struct getbmapx		bm;
-	int			error;
-
-	error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
-	if (error)
-		return error;
-
-	/* Set up bmap header for xfs internal routine */
-	bm.bmv_offset = BTOBB(start);
-	/* Special case for whole file */
-	if (length == FIEMAP_MAX_OFFSET)
-		bm.bmv_length = -1LL;
-	else
-		bm.bmv_length = BTOBB(length);
-
-	/* We add one because in getbmap world count includes the header */
-	bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
-					fieinfo->fi_extents_max + 1;
-	bm.bmv_count = min_t(__s32, bm.bmv_count,
-			     (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
-	bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
-	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
-		bm.bmv_iflags |= BMV_IF_ATTRFORK;
-	if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
-		bm.bmv_iflags |= BMV_IF_DELALLOC;
-
-	error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
-	if (error)
-		return -error;
-
-	return 0;
-}
-
-static const struct inode_operations xfs_inode_operations = {
-	.get_acl		= xfs_get_acl,
-	.getattr		= xfs_vn_getattr,
-	.setattr		= xfs_vn_setattr,
-	.setxattr		= generic_setxattr,
-	.getxattr		= generic_getxattr,
-	.removexattr		= generic_removexattr,
-	.listxattr		= xfs_vn_listxattr,
-	.fiemap			= xfs_vn_fiemap,
-};
-
-static const struct inode_operations xfs_dir_inode_operations = {
-	.create			= xfs_vn_create,
-	.lookup			= xfs_vn_lookup,
-	.link			= xfs_vn_link,
-	.unlink			= xfs_vn_unlink,
-	.symlink		= xfs_vn_symlink,
-	.mkdir			= xfs_vn_mkdir,
-	/*
-	 * Yes, XFS uses the same method for rmdir and unlink.
-	 *
-	 * There are some subtile differences deeper in the code,
-	 * but we use S_ISDIR to check for those.
-	 */
-	.rmdir			= xfs_vn_unlink,
-	.mknod			= xfs_vn_mknod,
-	.rename			= xfs_vn_rename,
-	.get_acl		= xfs_get_acl,
-	.getattr		= xfs_vn_getattr,
-	.setattr		= xfs_vn_setattr,
-	.setxattr		= generic_setxattr,
-	.getxattr		= generic_getxattr,
-	.removexattr		= generic_removexattr,
-	.listxattr		= xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_dir_ci_inode_operations = {
-	.create			= xfs_vn_create,
-	.lookup			= xfs_vn_ci_lookup,
-	.link			= xfs_vn_link,
-	.unlink			= xfs_vn_unlink,
-	.symlink		= xfs_vn_symlink,
-	.mkdir			= xfs_vn_mkdir,
-	/*
-	 * Yes, XFS uses the same method for rmdir and unlink.
-	 *
-	 * There are some subtile differences deeper in the code,
-	 * but we use S_ISDIR to check for those.
-	 */
-	.rmdir			= xfs_vn_unlink,
-	.mknod			= xfs_vn_mknod,
-	.rename			= xfs_vn_rename,
-	.get_acl		= xfs_get_acl,
-	.getattr		= xfs_vn_getattr,
-	.setattr		= xfs_vn_setattr,
-	.setxattr		= generic_setxattr,
-	.getxattr		= generic_getxattr,
-	.removexattr		= generic_removexattr,
-	.listxattr		= xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_symlink_inode_operations = {
-	.readlink		= generic_readlink,
-	.follow_link		= xfs_vn_follow_link,
-	.put_link		= xfs_vn_put_link,
-	.get_acl		= xfs_get_acl,
-	.getattr		= xfs_vn_getattr,
-	.setattr		= xfs_vn_setattr,
-	.setxattr		= generic_setxattr,
-	.getxattr		= generic_getxattr,
-	.removexattr		= generic_removexattr,
-	.listxattr		= xfs_vn_listxattr,
-};
-
-STATIC void
-xfs_diflags_to_iflags(
-	struct inode		*inode,
-	struct xfs_inode	*ip)
-{
-	if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
-		inode->i_flags |= S_IMMUTABLE;
-	else
-		inode->i_flags &= ~S_IMMUTABLE;
-	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
-		inode->i_flags |= S_APPEND;
-	else
-		inode->i_flags &= ~S_APPEND;
-	if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
-		inode->i_flags |= S_SYNC;
-	else
-		inode->i_flags &= ~S_SYNC;
-	if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
-		inode->i_flags |= S_NOATIME;
-	else
-		inode->i_flags &= ~S_NOATIME;
-}
-
-/*
- * Initialize the Linux inode, set up the operation vectors and
- * unlock the inode.
- *
- * When reading existing inodes from disk this is called directly
- * from xfs_iget, when creating a new inode it is called from
- * xfs_ialloc after setting up the inode.
- *
- * We are always called with an uninitialised linux inode here.
- * We need to initialise the necessary fields and take a reference
- * on it.
- */
-void
-xfs_setup_inode(
-	struct xfs_inode	*ip)
-{
-	struct inode		*inode = &ip->i_vnode;
-
-	inode->i_ino = ip->i_ino;
-	inode->i_state = I_NEW;
-
-	inode_sb_list_add(inode);
-	/* make the inode look hashed for the writeback code */
-	hlist_add_fake(&inode->i_hash);
-
-	inode->i_mode	= ip->i_d.di_mode;
-	inode->i_nlink	= ip->i_d.di_nlink;
-	inode->i_uid	= ip->i_d.di_uid;
-	inode->i_gid	= ip->i_d.di_gid;
-
-	switch (inode->i_mode & S_IFMT) {
-	case S_IFBLK:
-	case S_IFCHR:
-		inode->i_rdev =
-			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-			      sysv_minor(ip->i_df.if_u2.if_rdev));
-		break;
-	default:
-		inode->i_rdev = 0;
-		break;
-	}
-
-	inode->i_generation = ip->i_d.di_gen;
-	i_size_write(inode, ip->i_d.di_size);
-	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
-	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
-	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
-	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec;
-	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec;
-	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;
-	xfs_diflags_to_iflags(inode, ip);
-
-	switch (inode->i_mode & S_IFMT) {
-	case S_IFREG:
-		inode->i_op = &xfs_inode_operations;
-		inode->i_fop = &xfs_file_operations;
-		inode->i_mapping->a_ops = &xfs_address_space_operations;
-		break;
-	case S_IFDIR:
-		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
-			inode->i_op = &xfs_dir_ci_inode_operations;
-		else
-			inode->i_op = &xfs_dir_inode_operations;
-		inode->i_fop = &xfs_dir_file_operations;
-		break;
-	case S_IFLNK:
-		inode->i_op = &xfs_symlink_inode_operations;
-		if (!(ip->i_df.if_flags & XFS_IFINLINE))
-			inode->i_mapping->a_ops = &xfs_address_space_operations;
-		break;
-	default:
-		inode->i_op = &xfs_inode_operations;
-		init_special_inode(inode, inode->i_mode, inode->i_rdev);
-		break;
-	}
-
-	/*
-	 * If there is no attribute fork no ACL can exist on this inode,
-	 * and it can't have any file capabilities attached to it either.
-	 */
-	if (!XFS_IFORK_Q(ip)) {
-		inode_has_no_xattr(inode);
-		cache_no_acl(inode);
-	}
-
-	xfs_iflags_clear(ip, XFS_INEW);
-	barrier();
-
-	unlock_new_inode(inode);
-}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
deleted file mode 100644
index ef41c92..0000000
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOPS_H__
-#define __XFS_IOPS_H__
-
-struct xfs_inode;
-
-extern const struct file_operations xfs_file_operations;
-extern const struct file_operations xfs_dir_file_operations;
-
-extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
-
-extern void xfs_setup_inode(struct xfs_inode *);
-
-#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
deleted file mode 100644
index 1e8a45e..0000000
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_LINUX__
-#define __XFS_LINUX__
-
-#include <linux/types.h>
-
-/*
- * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
- */
-#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
-# define XFS_BIG_BLKNOS	1
-# define XFS_BIG_INUMS	1
-#else
-# define XFS_BIG_BLKNOS	0
-# define XFS_BIG_INUMS	0
-#endif
-
-#include "xfs_types.h"
-
-#include "kmem.h"
-#include "mrlock.h"
-#include "time.h"
-#include "uuid.h"
-
-#include <linux/semaphore.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/file.h>
-#include <linux/swap.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/bitops.h>
-#include <linux/major.h>
-#include <linux/pagemap.h>
-#include <linux/vfs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/proc_fs.h>
-#include <linux/sort.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-#include <linux/delay.h>
-#include <linux/log2.h>
-#include <linux/spinlock.h>
-#include <linux/random.h>
-#include <linux/ctype.h>
-#include <linux/writeback.h>
-#include <linux/capability.h>
-#include <linux/list_sort.h>
-
-#include <asm/page.h>
-#include <asm/div64.h>
-#include <asm/param.h>
-#include <asm/uaccess.h>
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include "xfs_vnode.h"
-#include "xfs_stats.h"
-#include "xfs_sysctl.h"
-#include "xfs_iops.h"
-#include "xfs_aops.h"
-#include "xfs_super.h"
-#include "xfs_buf.h"
-#include "xfs_message.h"
-
-#ifdef __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
-#else
-#undef  HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
-#endif
-
-#define irix_sgid_inherit	xfs_params.sgid_inherit.val
-#define irix_symlink_mode	xfs_params.symlink_mode.val
-#define xfs_panic_mask		xfs_params.panic_mask.val
-#define xfs_error_level		xfs_params.error_level.val
-#define xfs_syncd_centisecs	xfs_params.syncd_timer.val
-#define xfs_stats_clear		xfs_params.stats_clear.val
-#define xfs_inherit_sync	xfs_params.inherit_sync.val
-#define xfs_inherit_nodump	xfs_params.inherit_nodump.val
-#define xfs_inherit_noatime	xfs_params.inherit_noatim.val
-#define xfs_buf_timer_centisecs	xfs_params.xfs_buf_timer.val
-#define xfs_buf_age_centisecs	xfs_params.xfs_buf_age.val
-#define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
-#define xfs_rotorstep		xfs_params.rotorstep.val
-#define xfs_inherit_nodefrag	xfs_params.inherit_nodfrg.val
-#define xfs_fstrm_centisecs	xfs_params.fstrm_timer.val
-
-#define current_cpu()		(raw_smp_processor_id())
-#define current_pid()		(current->pid)
-#define current_test_flags(f)	(current->flags & (f))
-#define current_set_flags_nested(sp, f)		\
-		(*(sp) = current->flags, current->flags |= (f))
-#define current_clear_flags_nested(sp, f)	\
-		(*(sp) = current->flags, current->flags &= ~(f))
-#define current_restore_flags_nested(sp, f)	\
-		(current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
-
-#define spinlock_destroy(lock)
-
-#define NBBY		8		/* number of bits per byte */
-
-/*
- * Size of block device i/o is parameterized here.
- * Currently the system supports page-sized i/o.
- */
-#define	BLKDEV_IOSHIFT		PAGE_CACHE_SHIFT
-#define	BLKDEV_IOSIZE		(1<<BLKDEV_IOSHIFT)
-/* number of BB's per block device block */
-#define	BLKDEV_BB		BTOBB(BLKDEV_IOSIZE)
-
-#define ENOATTR		ENODATA		/* Attribute not found */
-#define EWRONGFS	EINVAL		/* Mount with wrong filesystem type */
-#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
-
-#define SYNCHRONIZE()	barrier()
-#define __return_address __builtin_return_address(0)
-
-#define XFS_PROJID_DEFAULT	0
-#define MAXPATHLEN	1024
-
-#define MIN(a,b)	(min(a,b))
-#define MAX(a,b)	(max(a,b))
-#define howmany(x, y)	(((x)+((y)-1))/(y))
-
-/*
- * Various platform dependent calls that don't fit anywhere else
- */
-#define xfs_sort(a,n,s,fn)	sort(a,n,s,fn,NULL)
-#define xfs_stack_trace()	dump_stack()
-
-
-/* Move the kernel do_div definition off to one side */
-
-#if defined __i386__
-/* For ia32 we need to pull some tricks to get past various versions
- * of the compiler which do not like us using do_div in the middle
- * of large functions.
- */
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
-	__u32	mod;
-
-	switch (n) {
-		case 4:
-			mod = *(__u32 *)a % b;
-			*(__u32 *)a = *(__u32 *)a / b;
-			return mod;
-		case 8:
-			{
-			unsigned long __upper, __low, __high, __mod;
-			__u64	c = *(__u64 *)a;
-			__upper = __high = c >> 32;
-			__low = c;
-			if (__high) {
-				__upper = __high % (b);
-				__high = __high / (b);
-			}
-			asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
-			asm("":"=A" (c):"a" (__low),"d" (__high));
-			*(__u64 *)a = c;
-			return __mod;
-			}
-	}
-
-	/* NOTREACHED */
-	return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
-	switch (n) {
-		case 4:
-			return *(__u32 *)a % b;
-		case 8:
-			{
-			unsigned long __upper, __low, __high, __mod;
-			__u64	c = *(__u64 *)a;
-			__upper = __high = c >> 32;
-			__low = c;
-			if (__high) {
-				__upper = __high % (b);
-				__high = __high / (b);
-			}
-			asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
-			asm("":"=A" (c):"a" (__low),"d" (__high));
-			return __mod;
-			}
-	}
-
-	/* NOTREACHED */
-	return 0;
-}
-#else
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
-	__u32	mod;
-
-	switch (n) {
-		case 4:
-			mod = *(__u32 *)a % b;
-			*(__u32 *)a = *(__u32 *)a / b;
-			return mod;
-		case 8:
-			mod = do_div(*(__u64 *)a, b);
-			return mod;
-	}
-
-	/* NOTREACHED */
-	return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
-	switch (n) {
-		case 4:
-			return *(__u32 *)a % b;
-		case 8:
-			{
-			__u64	c = *(__u64 *)a;
-			return do_div(c, b);
-			}
-	}
-
-	/* NOTREACHED */
-	return 0;
-}
-#endif
-
-#undef do_div
-#define do_div(a, b)	xfs_do_div(&(a), (b), sizeof(a))
-#define do_mod(a, b)	xfs_do_mod(&(a), (b), sizeof(a))
-
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
-{
-	x += y - 1;
-	do_div(x, y);
-	return(x * y);
-}
-
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
-{
-	x += y - 1;
-	do_div(x, y);
-	return x;
-}
-
-/* ARM old ABI has some weird alignment/padding */
-#if defined(__arm__) && !defined(__ARM_EABI__)
-#define __arch_pack __attribute__((packed))
-#else
-#define __arch_pack
-#endif
-
-#define ASSERT_ALWAYS(expr)	\
-	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef DEBUG
-#define ASSERT(expr)	((void)0)
-
-#ifndef STATIC
-# define STATIC static noinline
-#endif
-
-#else /* DEBUG */
-
-#define ASSERT(expr)	\
-	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef STATIC
-# define STATIC noinline
-#endif
-
-#endif /* DEBUG */
-
-#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
deleted file mode 100644
index bd672de..0000000
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2011 Red Hat, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-
-/*
- * XFS logging functions
- */
-static void
-__xfs_printk(
-	const char		*level,
-	const struct xfs_mount	*mp,
-	struct va_format	*vaf)
-{
-	if (mp && mp->m_fsname) {
-		printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
-		return;
-	}
-	printk("%sXFS: %pV\n", level, vaf);
-}
-
-#define define_xfs_printk_level(func, kern_level)		\
-void func(const struct xfs_mount *mp, const char *fmt, ...)	\
-{								\
-	struct va_format	vaf;				\
-	va_list			args;				\
-								\
-	va_start(args, fmt);					\
-								\
-	vaf.fmt = fmt;						\
-	vaf.va = &args;						\
-								\
-	__xfs_printk(kern_level, mp, &vaf);			\
-	va_end(args);						\
-}								\
-
-define_xfs_printk_level(xfs_emerg, KERN_EMERG);
-define_xfs_printk_level(xfs_alert, KERN_ALERT);
-define_xfs_printk_level(xfs_crit, KERN_CRIT);
-define_xfs_printk_level(xfs_err, KERN_ERR);
-define_xfs_printk_level(xfs_warn, KERN_WARNING);
-define_xfs_printk_level(xfs_notice, KERN_NOTICE);
-define_xfs_printk_level(xfs_info, KERN_INFO);
-#ifdef DEBUG
-define_xfs_printk_level(xfs_debug, KERN_DEBUG);
-#endif
-
-void
-xfs_alert_tag(
-	const struct xfs_mount	*mp,
-	int			panic_tag,
-	const char		*fmt, ...)
-{
-	struct va_format	vaf;
-	va_list			args;
-	int			do_panic = 0;
-
-	if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
-		xfs_alert(mp, "Transforming an alert into a BUG.");
-		do_panic = 1;
-	}
-
-	va_start(args, fmt);
-
-	vaf.fmt = fmt;
-	vaf.va = &args;
-
-	__xfs_printk(KERN_ALERT, mp, &vaf);
-	va_end(args);
-
-	BUG_ON(do_panic);
-}
-
-void
-assfail(char *expr, char *file, int line)
-{
-	xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
-		expr, file, line);
-	BUG();
-}
-
-void
-xfs_hex_dump(void *p, int length)
-{
-	print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
-}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
deleted file mode 100644
index 7fb7ea0..0000000
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __XFS_MESSAGE_H
-#define __XFS_MESSAGE_H 1
-
-struct xfs_mount;
-
-extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
-			 const char *fmt, ...)
-        __attribute__ ((format (printf, 3, 4)));
-extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-
-#ifdef DEBUG
-extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-#else
-static inline void
-__attribute__ ((format (printf, 2, 3)))
-xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-{
-}
-#endif
-
-extern void assfail(char *expr, char *f, int l);
-
-extern void xfs_hex_dump(void *p, int length);
-
-#endif	/* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
deleted file mode 100644
index 7e76f53..0000000
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_qm.h"
-#include <linux/quota.h>
-
-
-STATIC int
-xfs_quota_type(int type)
-{
-	switch (type) {
-	case USRQUOTA:
-		return XFS_DQ_USER;
-	case GRPQUOTA:
-		return XFS_DQ_GROUP;
-	default:
-		return XFS_DQ_PROJ;
-	}
-}
-
-STATIC int
-xfs_fs_get_xstate(
-	struct super_block	*sb,
-	struct fs_quota_stat	*fqs)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
-	return -xfs_qm_scall_getqstat(mp, fqs);
-}
-
-STATIC int
-xfs_fs_set_xstate(
-	struct super_block	*sb,
-	unsigned int		uflags,
-	int			op)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-	unsigned int		flags = 0;
-
-	if (sb->s_flags & MS_RDONLY)
-		return -EROFS;
-	if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
-
-	if (uflags & FS_QUOTA_UDQ_ACCT)
-		flags |= XFS_UQUOTA_ACCT;
-	if (uflags & FS_QUOTA_PDQ_ACCT)
-		flags |= XFS_PQUOTA_ACCT;
-	if (uflags & FS_QUOTA_GDQ_ACCT)
-		flags |= XFS_GQUOTA_ACCT;
-	if (uflags & FS_QUOTA_UDQ_ENFD)
-		flags |= XFS_UQUOTA_ENFD;
-	if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
-		flags |= XFS_OQUOTA_ENFD;
-
-	switch (op) {
-	case Q_XQUOTAON:
-		return -xfs_qm_scall_quotaon(mp, flags);
-	case Q_XQUOTAOFF:
-		if (!XFS_IS_QUOTA_ON(mp))
-			return -EINVAL;
-		return -xfs_qm_scall_quotaoff(mp, flags);
-	case Q_XQUOTARM:
-		if (XFS_IS_QUOTA_ON(mp))
-			return -EINVAL;
-		return -xfs_qm_scall_trunc_qfiles(mp, flags);
-	}
-
-	return -EINVAL;
-}
-
-STATIC int
-xfs_fs_get_dqblk(
-	struct super_block	*sb,
-	int			type,
-	qid_t			id,
-	struct fs_disk_quota	*fdq)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
-	if (!XFS_IS_QUOTA_ON(mp))
-		return -ESRCH;
-
-	return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
-}
-
-STATIC int
-xfs_fs_set_dqblk(
-	struct super_block	*sb,
-	int			type,
-	qid_t			id,
-	struct fs_disk_quota	*fdq)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	if (sb->s_flags & MS_RDONLY)
-		return -EROFS;
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
-	if (!XFS_IS_QUOTA_ON(mp))
-		return -ESRCH;
-
-	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
-}
-
-const struct quotactl_ops xfs_quotactl_operations = {
-	.get_xstate		= xfs_fs_get_xstate,
-	.set_xstate		= xfs_fs_set_xstate,
-	.get_dqblk		= xfs_fs_get_dqblk,
-	.set_dqblk		= xfs_fs_set_dqblk,
-};
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
deleted file mode 100644
index 76fdc58..0000000
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/proc_fs.h>
-
-DEFINE_PER_CPU(struct xfsstats, xfsstats);
-
-static int xfs_stat_proc_show(struct seq_file *m, void *v)
-{
-	int		c, i, j, val;
-	__uint64_t	xs_xstrat_bytes = 0;
-	__uint64_t	xs_write_bytes = 0;
-	__uint64_t	xs_read_bytes = 0;
-
-	static const struct xstats_entry {
-		char	*desc;
-		int	endpoint;
-	} xstats[] = {
-		{ "extent_alloc",	XFSSTAT_END_EXTENT_ALLOC	},
-		{ "abt",		XFSSTAT_END_ALLOC_BTREE		},
-		{ "blk_map",		XFSSTAT_END_BLOCK_MAPPING	},
-		{ "bmbt",		XFSSTAT_END_BLOCK_MAP_BTREE	},
-		{ "dir",		XFSSTAT_END_DIRECTORY_OPS	},
-		{ "trans",		XFSSTAT_END_TRANSACTIONS	},
-		{ "ig",			XFSSTAT_END_INODE_OPS		},
-		{ "log",		XFSSTAT_END_LOG_OPS		},
-		{ "push_ail",		XFSSTAT_END_TAIL_PUSHING	},
-		{ "xstrat",		XFSSTAT_END_WRITE_CONVERT	},
-		{ "rw",			XFSSTAT_END_READ_WRITE_OPS	},
-		{ "attr",		XFSSTAT_END_ATTRIBUTE_OPS	},
-		{ "icluster",		XFSSTAT_END_INODE_CLUSTER	},
-		{ "vnodes",		XFSSTAT_END_VNODE_OPS		},
-		{ "buf",		XFSSTAT_END_BUF			},
-		{ "abtb2",		XFSSTAT_END_ABTB_V2		},
-		{ "abtc2",		XFSSTAT_END_ABTC_V2		},
-		{ "bmbt2",		XFSSTAT_END_BMBT_V2		},
-		{ "ibt2",		XFSSTAT_END_IBT_V2		},
-	};
-
-	/* Loop over all stats groups */
-	for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
-		seq_printf(m, "%s", xstats[i].desc);
-		/* inner loop does each group */
-		while (j < xstats[i].endpoint) {
-			val = 0;
-			/* sum over all cpus */
-			for_each_possible_cpu(c)
-				val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-			seq_printf(m, " %u", val);
-			j++;
-		}
-		seq_putc(m, '\n');
-	}
-	/* extra precision counters */
-	for_each_possible_cpu(i) {
-		xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
-		xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
-		xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
-	}
-
-	seq_printf(m, "xpc %Lu %Lu %Lu\n",
-			xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
-	seq_printf(m, "debug %u\n",
-#if defined(DEBUG)
-		1);
-#else
-		0);
-#endif
-	return 0;
-}
-
-static int xfs_stat_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, xfs_stat_proc_show, NULL);
-}
-
-static const struct file_operations xfs_stat_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= xfs_stat_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-int
-xfs_init_procfs(void)
-{
-	if (!proc_mkdir("fs/xfs", NULL))
-		goto out;
-
-	if (!proc_create("fs/xfs/stat", 0, NULL,
-			 &xfs_stat_proc_fops))
-		goto out_remove_entry;
-	return 0;
-
- out_remove_entry:
-	remove_proc_entry("fs/xfs", NULL);
- out:
-	return -ENOMEM;
-}
-
-void
-xfs_cleanup_procfs(void)
-{
-	remove_proc_entry("fs/xfs/stat", NULL);
-	remove_proc_entry("fs/xfs", NULL);
-}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
deleted file mode 100644
index 736854b..0000000
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_STATS_H__
-#define __XFS_STATS_H__
-
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-#include <linux/percpu.h>
-
-/*
- * XFS global statistics
- */
-struct xfsstats {
-# define XFSSTAT_END_EXTENT_ALLOC	4
-	__uint32_t		xs_allocx;
-	__uint32_t		xs_allocb;
-	__uint32_t		xs_freex;
-	__uint32_t		xs_freeb;
-# define XFSSTAT_END_ALLOC_BTREE	(XFSSTAT_END_EXTENT_ALLOC+4)
-	__uint32_t		xs_abt_lookup;
-	__uint32_t		xs_abt_compare;
-	__uint32_t		xs_abt_insrec;
-	__uint32_t		xs_abt_delrec;
-# define XFSSTAT_END_BLOCK_MAPPING	(XFSSTAT_END_ALLOC_BTREE+7)
-	__uint32_t		xs_blk_mapr;
-	__uint32_t		xs_blk_mapw;
-	__uint32_t		xs_blk_unmap;
-	__uint32_t		xs_add_exlist;
-	__uint32_t		xs_del_exlist;
-	__uint32_t		xs_look_exlist;
-	__uint32_t		xs_cmp_exlist;
-# define XFSSTAT_END_BLOCK_MAP_BTREE	(XFSSTAT_END_BLOCK_MAPPING+4)
-	__uint32_t		xs_bmbt_lookup;
-	__uint32_t		xs_bmbt_compare;
-	__uint32_t		xs_bmbt_insrec;
-	__uint32_t		xs_bmbt_delrec;
-# define XFSSTAT_END_DIRECTORY_OPS	(XFSSTAT_END_BLOCK_MAP_BTREE+4)
-	__uint32_t		xs_dir_lookup;
-	__uint32_t		xs_dir_create;
-	__uint32_t		xs_dir_remove;
-	__uint32_t		xs_dir_getdents;
-# define XFSSTAT_END_TRANSACTIONS	(XFSSTAT_END_DIRECTORY_OPS+3)
-	__uint32_t		xs_trans_sync;
-	__uint32_t		xs_trans_async;
-	__uint32_t		xs_trans_empty;
-# define XFSSTAT_END_INODE_OPS		(XFSSTAT_END_TRANSACTIONS+7)
-	__uint32_t		xs_ig_attempts;
-	__uint32_t		xs_ig_found;
-	__uint32_t		xs_ig_frecycle;
-	__uint32_t		xs_ig_missed;
-	__uint32_t		xs_ig_dup;
-	__uint32_t		xs_ig_reclaims;
-	__uint32_t		xs_ig_attrchg;
-# define XFSSTAT_END_LOG_OPS		(XFSSTAT_END_INODE_OPS+5)
-	__uint32_t		xs_log_writes;
-	__uint32_t		xs_log_blocks;
-	__uint32_t		xs_log_noiclogs;
-	__uint32_t		xs_log_force;
-	__uint32_t		xs_log_force_sleep;
-# define XFSSTAT_END_TAIL_PUSHING	(XFSSTAT_END_LOG_OPS+10)
-	__uint32_t		xs_try_logspace;
-	__uint32_t		xs_sleep_logspace;
-	__uint32_t		xs_push_ail;
-	__uint32_t		xs_push_ail_success;
-	__uint32_t		xs_push_ail_pushbuf;
-	__uint32_t		xs_push_ail_pinned;
-	__uint32_t		xs_push_ail_locked;
-	__uint32_t		xs_push_ail_flushing;
-	__uint32_t		xs_push_ail_restarts;
-	__uint32_t		xs_push_ail_flush;
-# define XFSSTAT_END_WRITE_CONVERT	(XFSSTAT_END_TAIL_PUSHING+2)
-	__uint32_t		xs_xstrat_quick;
-	__uint32_t		xs_xstrat_split;
-# define XFSSTAT_END_READ_WRITE_OPS	(XFSSTAT_END_WRITE_CONVERT+2)
-	__uint32_t		xs_write_calls;
-	__uint32_t		xs_read_calls;
-# define XFSSTAT_END_ATTRIBUTE_OPS	(XFSSTAT_END_READ_WRITE_OPS+4)
-	__uint32_t		xs_attr_get;
-	__uint32_t		xs_attr_set;
-	__uint32_t		xs_attr_remove;
-	__uint32_t		xs_attr_list;
-# define XFSSTAT_END_INODE_CLUSTER	(XFSSTAT_END_ATTRIBUTE_OPS+3)
-	__uint32_t		xs_iflush_count;
-	__uint32_t		xs_icluster_flushcnt;
-	__uint32_t		xs_icluster_flushinode;
-# define XFSSTAT_END_VNODE_OPS		(XFSSTAT_END_INODE_CLUSTER+8)
-	__uint32_t		vn_active;	/* # vnodes not on free lists */
-	__uint32_t		vn_alloc;	/* # times vn_alloc called */
-	__uint32_t		vn_get;		/* # times vn_get called */
-	__uint32_t		vn_hold;	/* # times vn_hold called */
-	__uint32_t		vn_rele;	/* # times vn_rele called */
-	__uint32_t		vn_reclaim;	/* # times vn_reclaim called */
-	__uint32_t		vn_remove;	/* # times vn_remove called */
-	__uint32_t		vn_free;	/* # times vn_free called */
-#define XFSSTAT_END_BUF			(XFSSTAT_END_VNODE_OPS+9)
-	__uint32_t		xb_get;
-	__uint32_t		xb_create;
-	__uint32_t		xb_get_locked;
-	__uint32_t		xb_get_locked_waited;
-	__uint32_t		xb_busy_locked;
-	__uint32_t		xb_miss_locked;
-	__uint32_t		xb_page_retries;
-	__uint32_t		xb_page_found;
-	__uint32_t		xb_get_read;
-/* Version 2 btree counters */
-#define XFSSTAT_END_ABTB_V2		(XFSSTAT_END_BUF+15)
-	__uint32_t		xs_abtb_2_lookup;
-	__uint32_t		xs_abtb_2_compare;
-	__uint32_t		xs_abtb_2_insrec;
-	__uint32_t		xs_abtb_2_delrec;
-	__uint32_t		xs_abtb_2_newroot;
-	__uint32_t		xs_abtb_2_killroot;
-	__uint32_t		xs_abtb_2_increment;
-	__uint32_t		xs_abtb_2_decrement;
-	__uint32_t		xs_abtb_2_lshift;
-	__uint32_t		xs_abtb_2_rshift;
-	__uint32_t		xs_abtb_2_split;
-	__uint32_t		xs_abtb_2_join;
-	__uint32_t		xs_abtb_2_alloc;
-	__uint32_t		xs_abtb_2_free;
-	__uint32_t		xs_abtb_2_moves;
-#define XFSSTAT_END_ABTC_V2		(XFSSTAT_END_ABTB_V2+15)
-	__uint32_t		xs_abtc_2_lookup;
-	__uint32_t		xs_abtc_2_compare;
-	__uint32_t		xs_abtc_2_insrec;
-	__uint32_t		xs_abtc_2_delrec;
-	__uint32_t		xs_abtc_2_newroot;
-	__uint32_t		xs_abtc_2_killroot;
-	__uint32_t		xs_abtc_2_increment;
-	__uint32_t		xs_abtc_2_decrement;
-	__uint32_t		xs_abtc_2_lshift;
-	__uint32_t		xs_abtc_2_rshift;
-	__uint32_t		xs_abtc_2_split;
-	__uint32_t		xs_abtc_2_join;
-	__uint32_t		xs_abtc_2_alloc;
-	__uint32_t		xs_abtc_2_free;
-	__uint32_t		xs_abtc_2_moves;
-#define XFSSTAT_END_BMBT_V2		(XFSSTAT_END_ABTC_V2+15)
-	__uint32_t		xs_bmbt_2_lookup;
-	__uint32_t		xs_bmbt_2_compare;
-	__uint32_t		xs_bmbt_2_insrec;
-	__uint32_t		xs_bmbt_2_delrec;
-	__uint32_t		xs_bmbt_2_newroot;
-	__uint32_t		xs_bmbt_2_killroot;
-	__uint32_t		xs_bmbt_2_increment;
-	__uint32_t		xs_bmbt_2_decrement;
-	__uint32_t		xs_bmbt_2_lshift;
-	__uint32_t		xs_bmbt_2_rshift;
-	__uint32_t		xs_bmbt_2_split;
-	__uint32_t		xs_bmbt_2_join;
-	__uint32_t		xs_bmbt_2_alloc;
-	__uint32_t		xs_bmbt_2_free;
-	__uint32_t		xs_bmbt_2_moves;
-#define XFSSTAT_END_IBT_V2		(XFSSTAT_END_BMBT_V2+15)
-	__uint32_t		xs_ibt_2_lookup;
-	__uint32_t		xs_ibt_2_compare;
-	__uint32_t		xs_ibt_2_insrec;
-	__uint32_t		xs_ibt_2_delrec;
-	__uint32_t		xs_ibt_2_newroot;
-	__uint32_t		xs_ibt_2_killroot;
-	__uint32_t		xs_ibt_2_increment;
-	__uint32_t		xs_ibt_2_decrement;
-	__uint32_t		xs_ibt_2_lshift;
-	__uint32_t		xs_ibt_2_rshift;
-	__uint32_t		xs_ibt_2_split;
-	__uint32_t		xs_ibt_2_join;
-	__uint32_t		xs_ibt_2_alloc;
-	__uint32_t		xs_ibt_2_free;
-	__uint32_t		xs_ibt_2_moves;
-/* Extra precision counters */
-	__uint64_t		xs_xstrat_bytes;
-	__uint64_t		xs_write_bytes;
-	__uint64_t		xs_read_bytes;
-};
-
-DECLARE_PER_CPU(struct xfsstats, xfsstats);
-
-/*
- * We don't disable preempt, not too worried about poking the
- * wrong CPU's stat for now (also aggregated before reporting).
- */
-#define XFS_STATS_INC(v)	(per_cpu(xfsstats, current_cpu()).v++)
-#define XFS_STATS_DEC(v)	(per_cpu(xfsstats, current_cpu()).v--)
-#define XFS_STATS_ADD(v, inc)	(per_cpu(xfsstats, current_cpu()).v += (inc))
-
-extern int xfs_init_procfs(void);
-extern void xfs_cleanup_procfs(void);
-
-
-#else	/* !CONFIG_PROC_FS */
-
-# define XFS_STATS_INC(count)
-# define XFS_STATS_DEC(count)
-# define XFS_STATS_ADD(count, inc)
-
-static inline int xfs_init_procfs(void)
-{
-	return 0;
-}
-
-static inline void xfs_cleanup_procfs(void)
-{
-}
-
-#endif	/* !CONFIG_PROC_FS */
-
-#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
deleted file mode 100644
index 9a72dda..0000000
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ /dev/null
@@ -1,1773 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_fsops.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_log_priv.h"
-#include "xfs_trans_priv.h"
-#include "xfs_filestream.h"
-#include "xfs_da_btree.h"
-#include "xfs_extfree_item.h"
-#include "xfs_mru_cache.h"
-#include "xfs_inode_item.h"
-#include "xfs_sync.h"
-#include "xfs_trace.h"
-
-#include <linux/namei.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/parser.h>
-
-static const struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_ioend_zone;
-mempool_t *xfs_ioend_pool;
-
-#define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
-#define MNTOPT_LOGDEV	"logdev"	/* log device */
-#define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
-#define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
-#define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
-#define MNTOPT_SWALLOC	"swalloc"	/* turn on stripe width allocation */
-#define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
-#define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
-#define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
-#define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
-#define MNTOPT_GRPID	"grpid"		/* group-ID from parent directory */
-#define MNTOPT_NOGRPID	"nogrpid"	/* group-ID from current process */
-#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
-#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
-#define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
-					 * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
-#define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
-#define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
-#define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO   "nolargeio"	/* do not report large I/O sizes
-					 * in stat(). */
-#define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
-#define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
-#define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
-#define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
-#define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
-#define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
-#define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
-#define MNTOPT_DELAYLOG    "delaylog"	/* Delayed logging enabled */
-#define MNTOPT_NODELAYLOG  "nodelaylog"	/* Delayed logging disabled */
-#define MNTOPT_DISCARD	   "discard"	/* Discard unused blocks */
-#define MNTOPT_NODISCARD   "nodiscard"	/* Do not discard unused blocks */
-
-/*
- * Table driven mount option parser.
- *
- * Currently only used for remount, but it will be used for mount
- * in the future, too.
- */
-enum {
-	Opt_barrier, Opt_nobarrier, Opt_err
-};
-
-static const match_table_t tokens = {
-	{Opt_barrier, "barrier"},
-	{Opt_nobarrier, "nobarrier"},
-	{Opt_err, NULL}
-};
-
-
-STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
-{
-	int	last, shift_left_factor = 0;
-	char	*value = s;
-
-	last = strlen(value) - 1;
-	if (value[last] == 'K' || value[last] == 'k') {
-		shift_left_factor = 10;
-		value[last] = '\0';
-	}
-	if (value[last] == 'M' || value[last] == 'm') {
-		shift_left_factor = 20;
-		value[last] = '\0';
-	}
-	if (value[last] == 'G' || value[last] == 'g') {
-		shift_left_factor = 30;
-		value[last] = '\0';
-	}
-
-	return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- *
- * Note that this function leaks the various device name allocations on
- * failure.  The caller takes care of them.
- */
-STATIC int
-xfs_parseargs(
-	struct xfs_mount	*mp,
-	char			*options)
-{
-	struct super_block	*sb = mp->m_super;
-	char			*this_char, *value, *eov;
-	int			dsunit = 0;
-	int			dswidth = 0;
-	int			iosize = 0;
-	__uint8_t		iosizelog = 0;
-
-	/*
-	 * set up the mount name first so all the errors will refer to the
-	 * correct device.
-	 */
-	mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
-	if (!mp->m_fsname)
-		return ENOMEM;
-	mp->m_fsname_len = strlen(mp->m_fsname) + 1;
-
-	/*
-	 * Copy binary VFS mount flags we are interested in.
-	 */
-	if (sb->s_flags & MS_RDONLY)
-		mp->m_flags |= XFS_MOUNT_RDONLY;
-	if (sb->s_flags & MS_DIRSYNC)
-		mp->m_flags |= XFS_MOUNT_DIRSYNC;
-	if (sb->s_flags & MS_SYNCHRONOUS)
-		mp->m_flags |= XFS_MOUNT_WSYNC;
-
-	/*
-	 * Set some default flags that could be cleared by the mount option
-	 * parsing.
-	 */
-	mp->m_flags |= XFS_MOUNT_BARRIER;
-	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-	mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-	mp->m_flags |= XFS_MOUNT_DELAYLOG;
-
-	/*
-	 * These can be overridden by the mount option parsing.
-	 */
-	mp->m_logbufs = -1;
-	mp->m_logbsize = -1;
-
-	if (!options)
-		goto done;
-
-	while ((this_char = strsep(&options, ",")) != NULL) {
-		if (!*this_char)
-			continue;
-		if ((value = strchr(this_char, '=')) != NULL)
-			*value++ = 0;
-
-		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
-			if (!value || !*value) {
-				xfs_warn(mp, "%s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			mp->m_logbufs = simple_strtoul(value, &eov, 10);
-		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-			if (!value || !*value) {
-				xfs_warn(mp, "%s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			mp->m_logbsize = suffix_strtoul(value, &eov, 10);
-		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
-			if (!value || !*value) {
-				xfs_warn(mp, "%s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-			if (!mp->m_logname)
-				return ENOMEM;
-		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
-			xfs_warn(mp, "%s option not allowed on this system",
-				this_char);
-			return EINVAL;
-		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
-			if (!value || !*value) {
-				xfs_warn(mp, "%s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-			if (!mp->m_rtname)
-				return ENOMEM;
-		} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
-			if (!value || !*value) {
-				xfs_warn(mp, "%s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			iosize = simple_strtoul(value, &eov, 10);
-			iosizelog = ffs(iosize) - 1;
-		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
-			if (!value || !*value) {
-				xfs_warn(mp, "%s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			iosize = suffix_strtoul(value, &eov, 10);
-			iosizelog = ffs(iosize) - 1;
-		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
-			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
-			mp->m_flags |= XFS_MOUNT_GRPID;
-		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
-			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
-			mp->m_flags &= ~XFS_MOUNT_GRPID;
-		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
-			mp->m_flags |= XFS_MOUNT_WSYNC;
-		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
-			mp->m_flags |= XFS_MOUNT_NORECOVERY;
-		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
-			mp->m_flags |= XFS_MOUNT_NOALIGN;
-		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
-			mp->m_flags |= XFS_MOUNT_SWALLOC;
-		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
-			if (!value || !*value) {
-				xfs_warn(mp, "%s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			dsunit = simple_strtoul(value, &eov, 10);
-		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
-			if (!value || !*value) {
-				xfs_warn(mp, "%s option requires an argument",
-					this_char);
-				return EINVAL;
-			}
-			dswidth = simple_strtoul(value, &eov, 10);
-		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
-			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-#if !XFS_BIG_INUMS
-			xfs_warn(mp, "%s option not allowed on this system",
-				this_char);
-			return EINVAL;
-#endif
-		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
-			mp->m_flags |= XFS_MOUNT_NOUUID;
-		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
-			mp->m_flags |= XFS_MOUNT_BARRIER;
-		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
-			mp->m_flags &= ~XFS_MOUNT_BARRIER;
-		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
-			mp->m_flags |= XFS_MOUNT_IKEEP;
-		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
-			mp->m_flags &= ~XFS_MOUNT_IKEEP;
-		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
-			mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
-		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
-			mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-			mp->m_flags |= XFS_MOUNT_ATTR2;
-		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-			mp->m_flags &= ~XFS_MOUNT_ATTR2;
-			mp->m_flags |= XFS_MOUNT_NOATTR2;
-		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
-			mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-		} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
-			mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-					  XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-					  XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-					  XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
-		} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
-			   !strcmp(this_char, MNTOPT_UQUOTA) ||
-			   !strcmp(this_char, MNTOPT_USRQUOTA)) {
-			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-					 XFS_UQUOTA_ENFD);
-		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
-			   !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
-			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-			mp->m_qflags &= ~XFS_UQUOTA_ENFD;
-		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
-			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
-			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-					 XFS_OQUOTA_ENFD);
-		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
-			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-			mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
-			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
-			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-					 XFS_OQUOTA_ENFD);
-		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
-			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-			mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-		} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
-			mp->m_flags |= XFS_MOUNT_DELAYLOG;
-		} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
-			mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
-		} else if (!strcmp(this_char, MNTOPT_DISCARD)) {
-			mp->m_flags |= XFS_MOUNT_DISCARD;
-		} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
-			mp->m_flags &= ~XFS_MOUNT_DISCARD;
-		} else if (!strcmp(this_char, "ihashsize")) {
-			xfs_warn(mp,
-	"ihashsize no longer used, option is deprecated.");
-		} else if (!strcmp(this_char, "osyncisdsync")) {
-			xfs_warn(mp,
-	"osyncisdsync has no effect, option is deprecated.");
-		} else if (!strcmp(this_char, "osyncisosync")) {
-			xfs_warn(mp,
-	"osyncisosync has no effect, option is deprecated.");
-		} else if (!strcmp(this_char, "irixsgid")) {
-			xfs_warn(mp,
-	"irixsgid is now a sysctl(2) variable, option is deprecated.");
-		} else {
-			xfs_warn(mp, "unknown mount option [%s].", this_char);
-			return EINVAL;
-		}
-	}
-
-	/*
-	 * no recovery flag requires a read-only mount
-	 */
-	if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
-	    !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-		xfs_warn(mp, "no-recovery mounts must be read-only.");
-		return EINVAL;
-	}
-
-	if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
-		xfs_warn(mp,
-	"sunit and swidth options incompatible with the noalign option");
-		return EINVAL;
-	}
-
-	if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
-	    !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
-		xfs_warn(mp,
-	"the discard option is incompatible with the nodelaylog option");
-		return EINVAL;
-	}
-
-#ifndef CONFIG_XFS_QUOTA
-	if (XFS_IS_QUOTA_RUNNING(mp)) {
-		xfs_warn(mp, "quota support not available in this kernel.");
-		return EINVAL;
-	}
-#endif
-
-	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
-	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
-		xfs_warn(mp, "cannot mount with both project and group quota");
-		return EINVAL;
-	}
-
-	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
-		xfs_warn(mp, "sunit and swidth must be specified together");
-		return EINVAL;
-	}
-
-	if (dsunit && (dswidth % dsunit != 0)) {
-		xfs_warn(mp,
-	"stripe width (%d) must be a multiple of the stripe unit (%d)",
-			dswidth, dsunit);
-		return EINVAL;
-	}
-
-done:
-	if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
-		/*
-		 * At this point the superblock has not been read
-		 * in, therefore we do not know the block size.
-		 * Before the mount call ends we will convert
-		 * these to FSBs.
-		 */
-		if (dsunit) {
-			mp->m_dalign = dsunit;
-			mp->m_flags |= XFS_MOUNT_RETERR;
-		}
-
-		if (dswidth)
-			mp->m_swidth = dswidth;
-	}
-
-	if (mp->m_logbufs != -1 &&
-	    mp->m_logbufs != 0 &&
-	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
-	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
-		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
-			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
-		return XFS_ERROR(EINVAL);
-	}
-	if (mp->m_logbsize != -1 &&
-	    mp->m_logbsize !=  0 &&
-	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
-	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
-	     !is_power_of_2(mp->m_logbsize))) {
-		xfs_warn(mp,
-			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
-			mp->m_logbsize);
-		return XFS_ERROR(EINVAL);
-	}
-
-	if (iosizelog) {
-		if (iosizelog > XFS_MAX_IO_LOG ||
-		    iosizelog < XFS_MIN_IO_LOG) {
-			xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
-				iosizelog, XFS_MIN_IO_LOG,
-				XFS_MAX_IO_LOG);
-			return XFS_ERROR(EINVAL);
-		}
-
-		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-		mp->m_readio_log = iosizelog;
-		mp->m_writeio_log = iosizelog;
-	}
-
-	return 0;
-}
-
-struct proc_xfs_info {
-	int	flag;
-	char	*str;
-};
-
-STATIC int
-xfs_showargs(
-	struct xfs_mount	*mp,
-	struct seq_file		*m)
-{
-	static struct proc_xfs_info xfs_info_set[] = {
-		/* the few simple ones we can get from the mount struct */
-		{ XFS_MOUNT_IKEEP,		"," MNTOPT_IKEEP },
-		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
-		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
-		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
-		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
-		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
-		{ XFS_MOUNT_ATTR2,		"," MNTOPT_ATTR2 },
-		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
-		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
-		{ XFS_MOUNT_DELAYLOG,		"," MNTOPT_DELAYLOG },
-		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
-		{ 0, NULL }
-	};
-	static struct proc_xfs_info xfs_info_unset[] = {
-		/* the few simple ones we can get from the mount struct */
-		{ XFS_MOUNT_COMPAT_IOSIZE,	"," MNTOPT_LARGEIO },
-		{ XFS_MOUNT_BARRIER,		"," MNTOPT_NOBARRIER },
-		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_64BITINODE },
-		{ 0, NULL }
-	};
-	struct proc_xfs_info	*xfs_infop;
-
-	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
-		if (mp->m_flags & xfs_infop->flag)
-			seq_puts(m, xfs_infop->str);
-	}
-	for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
-		if (!(mp->m_flags & xfs_infop->flag))
-			seq_puts(m, xfs_infop->str);
-	}
-
-	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
-				(int)(1 << mp->m_writeio_log) >> 10);
-
-	if (mp->m_logbufs > 0)
-		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
-	if (mp->m_logbsize > 0)
-		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
-
-	if (mp->m_logname)
-		seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
-	if (mp->m_rtname)
-		seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
-
-	if (mp->m_dalign > 0)
-		seq_printf(m, "," MNTOPT_SUNIT "=%d",
-				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-	if (mp->m_swidth > 0)
-		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
-				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-
-	if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
-		seq_puts(m, "," MNTOPT_USRQUOTA);
-	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
-		seq_puts(m, "," MNTOPT_UQUOTANOENF);
-
-	/* Either project or group quotas can be active, not both */
-
-	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
-		if (mp->m_qflags & XFS_OQUOTA_ENFD)
-			seq_puts(m, "," MNTOPT_PRJQUOTA);
-		else
-			seq_puts(m, "," MNTOPT_PQUOTANOENF);
-	} else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-		if (mp->m_qflags & XFS_OQUOTA_ENFD)
-			seq_puts(m, "," MNTOPT_GRPQUOTA);
-		else
-			seq_puts(m, "," MNTOPT_GQUOTANOENF);
-	}
-
-	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
-		seq_puts(m, "," MNTOPT_NOQUOTA);
-
-	return 0;
-}
-__uint64_t
-xfs_max_file_offset(
-	unsigned int		blockshift)
-{
-	unsigned int		pagefactor = 1;
-	unsigned int		bitshift = BITS_PER_LONG - 1;
-
-	/* Figure out maximum filesize, on Linux this can depend on
-	 * the filesystem blocksize (on 32 bit platforms).
-	 * __block_write_begin does this in an [unsigned] long...
-	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
-	 * So, for page sized blocks (4K on 32 bit platforms),
-	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
-	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
-	 * but for smaller blocksizes it is less (bbits = log2 bsize).
-	 * Note1: get_block_t takes a long (implicit cast from above)
-	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
-	 * can optionally convert the [unsigned] long from above into
-	 * an [unsigned] long long.
-	 */
-
-#if BITS_PER_LONG == 32
-# if defined(CONFIG_LBDAF)
-	ASSERT(sizeof(sector_t) == 8);
-	pagefactor = PAGE_CACHE_SIZE;
-	bitshift = BITS_PER_LONG;
-# else
-	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
-# endif
-#endif
-
-	return (((__uint64_t)pagefactor) << bitshift) - 1;
-}
-
-STATIC int
-xfs_blkdev_get(
-	xfs_mount_t		*mp,
-	const char		*name,
-	struct block_device	**bdevp)
-{
-	int			error = 0;
-
-	*bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-				    mp);
-	if (IS_ERR(*bdevp)) {
-		error = PTR_ERR(*bdevp);
-		xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
-	}
-
-	return -error;
-}
-
-STATIC void
-xfs_blkdev_put(
-	struct block_device	*bdev)
-{
-	if (bdev)
-		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void
-xfs_blkdev_issue_flush(
-	xfs_buftarg_t		*buftarg)
-{
-	blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
-}
-
-STATIC void
-xfs_close_devices(
-	struct xfs_mount	*mp)
-{
-	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-		xfs_free_buftarg(mp, mp->m_logdev_targp);
-		xfs_blkdev_put(logdev);
-	}
-	if (mp->m_rtdev_targp) {
-		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-		xfs_free_buftarg(mp, mp->m_rtdev_targp);
-		xfs_blkdev_put(rtdev);
-	}
-	xfs_free_buftarg(mp, mp->m_ddev_targp);
-}
-
-/*
- * The file system configurations are:
- *	(1) device (partition) with data and internal log
- *	(2) logical volume with data and log subvolumes.
- *	(3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present.  The data subvolume has already been opened by
- * get_sb_bdev() and is stored in sb->s_bdev.
- */
-STATIC int
-xfs_open_devices(
-	struct xfs_mount	*mp)
-{
-	struct block_device	*ddev = mp->m_super->s_bdev;
-	struct block_device	*logdev = NULL, *rtdev = NULL;
-	int			error;
-
-	/*
-	 * Open real time and log devices - order is important.
-	 */
-	if (mp->m_logname) {
-		error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
-		if (error)
-			goto out;
-	}
-
-	if (mp->m_rtname) {
-		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
-		if (error)
-			goto out_close_logdev;
-
-		if (rtdev == ddev || rtdev == logdev) {
-			xfs_warn(mp,
-	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
-			error = EINVAL;
-			goto out_close_rtdev;
-		}
-	}
-
-	/*
-	 * Setup xfs_mount buffer target pointers
-	 */
-	error = ENOMEM;
-	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
-	if (!mp->m_ddev_targp)
-		goto out_close_rtdev;
-
-	if (rtdev) {
-		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
-							mp->m_fsname);
-		if (!mp->m_rtdev_targp)
-			goto out_free_ddev_targ;
-	}
-
-	if (logdev && logdev != ddev) {
-		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
-							mp->m_fsname);
-		if (!mp->m_logdev_targp)
-			goto out_free_rtdev_targ;
-	} else {
-		mp->m_logdev_targp = mp->m_ddev_targp;
-	}
-
-	return 0;
-
- out_free_rtdev_targ:
-	if (mp->m_rtdev_targp)
-		xfs_free_buftarg(mp, mp->m_rtdev_targp);
- out_free_ddev_targ:
-	xfs_free_buftarg(mp, mp->m_ddev_targp);
- out_close_rtdev:
-	if (rtdev)
-		xfs_blkdev_put(rtdev);
- out_close_logdev:
-	if (logdev && logdev != ddev)
-		xfs_blkdev_put(logdev);
- out:
-	return error;
-}
-
-/*
- * Setup xfs_mount buffer target pointers based on superblock
- */
-STATIC int
-xfs_setup_devices(
-	struct xfs_mount	*mp)
-{
-	int			error;
-
-	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-				    mp->m_sb.sb_sectsize);
-	if (error)
-		return error;
-
-	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-		unsigned int	log_sector_size = BBSIZE;
-
-		if (xfs_sb_version_hassector(&mp->m_sb))
-			log_sector_size = mp->m_sb.sb_logsectsize;
-		error = xfs_setsize_buftarg(mp->m_logdev_targp,
-					    mp->m_sb.sb_blocksize,
-					    log_sector_size);
-		if (error)
-			return error;
-	}
-	if (mp->m_rtdev_targp) {
-		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-					    mp->m_sb.sb_blocksize,
-					    mp->m_sb.sb_sectsize);
-		if (error)
-			return error;
-	}
-
-	return 0;
-}
-
-/* Catch misguided souls that try to use this interface on XFS */
-STATIC struct inode *
-xfs_fs_alloc_inode(
-	struct super_block	*sb)
-{
-	BUG();
-	return NULL;
-}
-
-/*
- * Now that the generic code is guaranteed not to be accessing
- * the linux inode, we can reclaim the inode.
- */
-STATIC void
-xfs_fs_destroy_inode(
-	struct inode		*inode)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-
-	trace_xfs_destroy_inode(ip);
-
-	XFS_STATS_INC(vn_reclaim);
-
-	/* bad inode, get out here ASAP */
-	if (is_bad_inode(inode))
-		goto out_reclaim;
-
-	xfs_ioend_wait(ip);
-
-	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-
-	/*
-	 * We should never get here with one of the reclaim flags already set.
-	 */
-	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
-
-	/*
-	 * We always use background reclaim here because even if the
-	 * inode is clean, it still may be under IO and hence we have
-	 * to take the flush lock. The background reclaim path handles
-	 * this more efficiently than we can here, so simply let background
-	 * reclaim tear down all inodes.
-	 */
-out_reclaim:
-	xfs_inode_set_reclaim_tag(ip);
-}
-
-/*
- * Slab object creation initialisation for the XFS inode.
- * This covers only the idempotent fields in the XFS inode;
- * all other fields need to be initialised on allocation
- * from the slab. This avoids the need to repeatedly initialise
- * fields in the xfs inode that left in the initialise state
- * when freeing the inode.
- */
-STATIC void
-xfs_fs_inode_init_once(
-	void			*inode)
-{
-	struct xfs_inode	*ip = inode;
-
-	memset(ip, 0, sizeof(struct xfs_inode));
-
-	/* vfs inode */
-	inode_init_once(VFS_I(ip));
-
-	/* xfs inode */
-	atomic_set(&ip->i_iocount, 0);
-	atomic_set(&ip->i_pincount, 0);
-	spin_lock_init(&ip->i_flags_lock);
-	init_waitqueue_head(&ip->i_ipin_wait);
-	/*
-	 * Because we want to use a counting completion, complete
-	 * the flush completion once to allow a single access to
-	 * the flush completion without blocking.
-	 */
-	init_completion(&ip->i_flush);
-	complete(&ip->i_flush);
-
-	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
-		     "xfsino", ip->i_ino);
-}
-
-/*
- * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode.
- *
- * We need the barrier() to maintain correct ordering between unlogged
- * updates and the transaction commit code that clears the i_update_core
- * field. This requires all updates to be completed before marking the
- * inode dirty.
- */
-STATIC void
-xfs_fs_dirty_inode(
-	struct inode	*inode,
-	int		flags)
-{
-	barrier();
-	XFS_I(inode)->i_update_core = 1;
-}
-
-STATIC int
-xfs_log_inode(
-	struct xfs_inode	*ip)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_trans	*tp;
-	int			error;
-
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-
-	if (error) {
-		xfs_trans_cancel(tp, 0);
-		/* we need to return with the lock hold shared */
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
-		return error;
-	}
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * Note - it's possible that we might have pushed ourselves out of the
-	 * way during trans_reserve which would flush the inode.  But there's
-	 * no guarantee that the inode buffer has actually gone out yet (it's
-	 * delwri).  Plus the buffer could be pinned anyway if it's part of
-	 * an inode in another recent transaction.  So we play it safe and
-	 * fire off the transaction anyway.
-	 */
-	xfs_trans_ijoin(tp, ip);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	error = xfs_trans_commit(tp, 0);
-	xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
-
-	return error;
-}
-
-STATIC int
-xfs_fs_write_inode(
-	struct inode		*inode,
-	struct writeback_control *wbc)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	int			error = EAGAIN;
-
-	trace_xfs_write_inode(ip);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	if (wbc->sync_mode == WB_SYNC_ALL) {
-		/*
-		 * Make sure the inode has made it it into the log.  Instead
-		 * of forcing it all the way to stable storage using a
-		 * synchronous transaction we let the log force inside the
-		 * ->sync_fs call do that for thus, which reduces the number
-		 * of synchronous log foces dramatically.
-		 */
-		xfs_ioend_wait(ip);
-		xfs_ilock(ip, XFS_ILOCK_SHARED);
-		if (ip->i_update_core) {
-			error = xfs_log_inode(ip);
-			if (error)
-				goto out_unlock;
-		}
-	} else {
-		/*
-		 * We make this non-blocking if the inode is contended, return
-		 * EAGAIN to indicate to the caller that they did not succeed.
-		 * This prevents the flush path from blocking on inodes inside
-		 * another operation right now, they get caught later by
-		 * xfs_sync.
-		 */
-		if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
-			goto out;
-
-		if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
-			goto out_unlock;
-
-		/*
-		 * Now we have the flush lock and the inode is not pinned, we
-		 * can check if the inode is really clean as we know that
-		 * there are no pending transaction completions, it is not
-		 * waiting on the delayed write queue and there is no IO in
-		 * progress.
-		 */
-		if (xfs_inode_clean(ip)) {
-			xfs_ifunlock(ip);
-			error = 0;
-			goto out_unlock;
-		}
-		error = xfs_iflush(ip, SYNC_TRYLOCK);
-	}
-
- out_unlock:
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
- out:
-	/*
-	 * if we failed to write out the inode then mark
-	 * it dirty again so we'll try again later.
-	 */
-	if (error)
-		xfs_mark_inode_dirty_sync(ip);
-	return -error;
-}
-
-STATIC void
-xfs_fs_evict_inode(
-	struct inode		*inode)
-{
-	xfs_inode_t		*ip = XFS_I(inode);
-
-	trace_xfs_evict_inode(ip);
-
-	truncate_inode_pages(&inode->i_data, 0);
-	end_writeback(inode);
-	XFS_STATS_INC(vn_rele);
-	XFS_STATS_INC(vn_remove);
-	XFS_STATS_DEC(vn_active);
-
-	/*
-	 * The iolock is used by the file system to coordinate reads,
-	 * writes, and block truncates.  Up to this point the lock
-	 * protected concurrent accesses by users of the inode.  But
-	 * from here forward we're doing some final processing of the
-	 * inode because we're done with it, and although we reuse the
-	 * iolock for protection it is really a distinct lock class
-	 * (in the lockdep sense) from before.  To keep lockdep happy
-	 * (and basically indicate what we are doing), we explicitly
-	 * re-init the iolock here.
-	 */
-	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
-	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-	lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
-			&xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
-
-	xfs_inactive(ip);
-}
-
-STATIC void
-xfs_free_fsname(
-	struct xfs_mount	*mp)
-{
-	kfree(mp->m_fsname);
-	kfree(mp->m_rtname);
-	kfree(mp->m_logname);
-}
-
-STATIC void
-xfs_fs_put_super(
-	struct super_block	*sb)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	xfs_syncd_stop(mp);
-
-	/*
-	 * Blow away any referenced inode in the filestreams cache.
-	 * This can and will cause log traffic as inodes go inactive
-	 * here.
-	 */
-	xfs_filestream_unmount(mp);
-
-	XFS_bflush(mp->m_ddev_targp);
-
-	xfs_unmountfs(mp);
-	xfs_freesb(mp);
-	xfs_icsb_destroy_counters(mp);
-	xfs_close_devices(mp);
-	xfs_free_fsname(mp);
-	kfree(mp);
-}
-
-STATIC int
-xfs_fs_sync_fs(
-	struct super_block	*sb,
-	int			wait)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-	int			error;
-
-	/*
-	 * Not much we can do for the first async pass.  Writing out the
-	 * superblock would be counter-productive as we are going to redirty
-	 * when writing out other data and metadata (and writing out a single
-	 * block is quite fast anyway).
-	 *
-	 * Try to asynchronously kick off quota syncing at least.
-	 */
-	if (!wait) {
-		xfs_qm_sync(mp, SYNC_TRYLOCK);
-		return 0;
-	}
-
-	error = xfs_quiesce_data(mp);
-	if (error)
-		return -error;
-
-	if (laptop_mode) {
-		/*
-		 * The disk must be active because we're syncing.
-		 * We schedule xfssyncd now (now that the disk is
-		 * active) instead of later (when it might not be).
-		 */
-		flush_delayed_work_sync(&mp->m_sync_work);
-	}
-
-	return 0;
-}
-
-STATIC int
-xfs_fs_statfs(
-	struct dentry		*dentry,
-	struct kstatfs		*statp)
-{
-	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
-	xfs_sb_t		*sbp = &mp->m_sb;
-	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
-	__uint64_t		fakeinos, id;
-	xfs_extlen_t		lsize;
-	__int64_t		ffree;
-
-	statp->f_type = XFS_SB_MAGIC;
-	statp->f_namelen = MAXNAMELEN - 1;
-
-	id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
-	statp->f_fsid.val[0] = (u32)id;
-	statp->f_fsid.val[1] = (u32)(id >> 32);
-
-	xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
-
-	spin_lock(&mp->m_sb_lock);
-	statp->f_bsize = sbp->sb_blocksize;
-	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
-	statp->f_blocks = sbp->sb_dblocks - lsize;
-	statp->f_bfree = statp->f_bavail =
-				sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-	fakeinos = statp->f_bfree << sbp->sb_inopblog;
-	statp->f_files =
-	    MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
-	if (mp->m_maxicount)
-		statp->f_files = min_t(typeof(statp->f_files),
-					statp->f_files,
-					mp->m_maxicount);
-
-	/* make sure statp->f_ffree does not underflow */
-	ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
-	statp->f_ffree = max_t(__int64_t, ffree, 0);
-
-	spin_unlock(&mp->m_sb_lock);
-
-	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
-	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
-			      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-		xfs_qm_statvfs(ip, statp);
-	return 0;
-}
-
-STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
-{
-	__uint64_t resblks = 0;
-
-	mp->m_resblks_save = mp->m_resblks;
-	xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
-{
-	__uint64_t resblks;
-
-	if (mp->m_resblks_save) {
-		resblks = mp->m_resblks_save;
-		mp->m_resblks_save = 0;
-	} else
-		resblks = xfs_default_resblks(mp);
-
-	xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC int
-xfs_fs_remount(
-	struct super_block	*sb,
-	int			*flags,
-	char			*options)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-	substring_t		args[MAX_OPT_ARGS];
-	char			*p;
-	int			error;
-
-	while ((p = strsep(&options, ",")) != NULL) {
-		int token;
-
-		if (!*p)
-			continue;
-
-		token = match_token(p, tokens, args);
-		switch (token) {
-		case Opt_barrier:
-			mp->m_flags |= XFS_MOUNT_BARRIER;
-			break;
-		case Opt_nobarrier:
-			mp->m_flags &= ~XFS_MOUNT_BARRIER;
-			break;
-		default:
-			/*
-			 * Logically we would return an error here to prevent
-			 * users from believing they might have changed
-			 * mount options using remount which can't be changed.
-			 *
-			 * But unfortunately mount(8) adds all options from
-			 * mtab and fstab to the mount arguments in some cases
-			 * so we can't blindly reject options, but have to
-			 * check for each specified option if it actually
-			 * differs from the currently set option and only
-			 * reject it if that's the case.
-			 *
-			 * Until that is implemented we return success for
-			 * every remount request, and silently ignore all
-			 * options that we can't actually change.
-			 */
-#if 0
-			xfs_info(mp,
-		"mount option \"%s\" not supported for remount\n", p);
-			return -EINVAL;
-#else
-			break;
-#endif
-		}
-	}
-
-	/* ro -> rw */
-	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
-		mp->m_flags &= ~XFS_MOUNT_RDONLY;
-
-		/*
-		 * If this is the first remount to writeable state we
-		 * might have some superblock changes to update.
-		 */
-		if (mp->m_update_flags) {
-			error = xfs_mount_log_sb(mp, mp->m_update_flags);
-			if (error) {
-				xfs_warn(mp, "failed to write sb changes");
-				return error;
-			}
-			mp->m_update_flags = 0;
-		}
-
-		/*
-		 * Fill out the reserve pool if it is empty. Use the stashed
-		 * value if it is non-zero, otherwise go with the default.
-		 */
-		xfs_restore_resvblks(mp);
-	}
-
-	/* rw -> ro */
-	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
-		/*
-		 * After we have synced the data but before we sync the
-		 * metadata, we need to free up the reserve block pool so that
-		 * the used block count in the superblock on disk is correct at
-		 * the end of the remount. Stash the current reserve pool size
-		 * so that if we get remounted rw, we can return it to the same
-		 * size.
-		 */
-
-		xfs_quiesce_data(mp);
-		xfs_save_resvblks(mp);
-		xfs_quiesce_attr(mp);
-		mp->m_flags |= XFS_MOUNT_RDONLY;
-	}
-
-	return 0;
-}
-
-/*
- * Second stage of a freeze. The data is already frozen so we only
- * need to take care of the metadata. Once that's done write a dummy
- * record to dirty the log in case of a crash while frozen.
- */
-STATIC int
-xfs_fs_freeze(
-	struct super_block	*sb)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	xfs_save_resvblks(mp);
-	xfs_quiesce_attr(mp);
-	return -xfs_fs_log_dummy(mp);
-}
-
-STATIC int
-xfs_fs_unfreeze(
-	struct super_block	*sb)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	xfs_restore_resvblks(mp);
-	return 0;
-}
-
-STATIC int
-xfs_fs_show_options(
-	struct seq_file		*m,
-	struct vfsmount		*mnt)
-{
-	return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
-	struct xfs_mount	*mp)
-{
-	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
-	/* Fail a mount where the logbuf is smaller than the log stripe */
-	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-		if (mp->m_logbsize <= 0 &&
-		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
-			mp->m_logbsize = mp->m_sb.sb_logsunit;
-		} else if (mp->m_logbsize > 0 &&
-			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
-			xfs_warn(mp,
-		"logbuf size must be greater than or equal to log stripe size");
-			return XFS_ERROR(EINVAL);
-		}
-	} else {
-		/* Fail a mount if the logbuf is larger than 32K */
-		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
-			xfs_warn(mp,
-		"logbuf size for version 1 logs must be 16K or 32K");
-			return XFS_ERROR(EINVAL);
-		}
-	}
-
-	/*
-	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
-	 * told by noattr2 to turn it off
-	 */
-	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-	    !(mp->m_flags & XFS_MOUNT_NOATTR2))
-		mp->m_flags |= XFS_MOUNT_ATTR2;
-
-	/*
-	 * prohibit r/w mounts of read-only filesystems
-	 */
-	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
-		xfs_warn(mp,
-			"cannot mount a read-only filesystem as read-write");
-		return XFS_ERROR(EROFS);
-	}
-
-	return 0;
-}
-
-STATIC int
-xfs_fs_fill_super(
-	struct super_block	*sb,
-	void			*data,
-	int			silent)
-{
-	struct inode		*root;
-	struct xfs_mount	*mp = NULL;
-	int			flags = 0, error = ENOMEM;
-
-	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
-	if (!mp)
-		goto out;
-
-	spin_lock_init(&mp->m_sb_lock);
-	mutex_init(&mp->m_growlock);
-	atomic_set(&mp->m_active_trans, 0);
-
-	mp->m_super = sb;
-	sb->s_fs_info = mp;
-
-	error = xfs_parseargs(mp, (char *)data);
-	if (error)
-		goto out_free_fsname;
-
-	sb_min_blocksize(sb, BBSIZE);
-	sb->s_xattr = xfs_xattr_handlers;
-	sb->s_export_op = &xfs_export_operations;
-#ifdef CONFIG_XFS_QUOTA
-	sb->s_qcop = &xfs_quotactl_operations;
-#endif
-	sb->s_op = &xfs_super_operations;
-
-	if (silent)
-		flags |= XFS_MFSI_QUIET;
-
-	error = xfs_open_devices(mp);
-	if (error)
-		goto out_free_fsname;
-
-	error = xfs_icsb_init_counters(mp);
-	if (error)
-		goto out_close_devices;
-
-	error = xfs_readsb(mp, flags);
-	if (error)
-		goto out_destroy_counters;
-
-	error = xfs_finish_flags(mp);
-	if (error)
-		goto out_free_sb;
-
-	error = xfs_setup_devices(mp);
-	if (error)
-		goto out_free_sb;
-
-	error = xfs_filestream_mount(mp);
-	if (error)
-		goto out_free_sb;
-
-	/*
-	 * we must configure the block size in the superblock before we run the
-	 * full mount process as the mount process can lookup and cache inodes.
-	 * For the same reason we must also initialise the syncd and register
-	 * the inode cache shrinker so that inodes can be reclaimed during
-	 * operations like a quotacheck that iterate all inodes in the
-	 * filesystem.
-	 */
-	sb->s_magic = XFS_SB_MAGIC;
-	sb->s_blocksize = mp->m_sb.sb_blocksize;
-	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
-	sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
-	sb->s_time_gran = 1;
-	set_posix_acl_flag(sb);
-
-	error = xfs_mountfs(mp);
-	if (error)
-		goto out_filestream_unmount;
-
-	error = xfs_syncd_init(mp);
-	if (error)
-		goto out_unmount;
-
-	root = igrab(VFS_I(mp->m_rootip));
-	if (!root) {
-		error = ENOENT;
-		goto out_syncd_stop;
-	}
-	if (is_bad_inode(root)) {
-		error = EINVAL;
-		goto out_syncd_stop;
-	}
-	sb->s_root = d_alloc_root(root);
-	if (!sb->s_root) {
-		error = ENOMEM;
-		goto out_iput;
-	}
-
-	return 0;
-
- out_filestream_unmount:
-	xfs_filestream_unmount(mp);
- out_free_sb:
-	xfs_freesb(mp);
- out_destroy_counters:
-	xfs_icsb_destroy_counters(mp);
- out_close_devices:
-	xfs_close_devices(mp);
- out_free_fsname:
-	xfs_free_fsname(mp);
-	kfree(mp);
- out:
-	return -error;
-
- out_iput:
-	iput(root);
- out_syncd_stop:
-	xfs_syncd_stop(mp);
- out_unmount:
-	/*
-	 * Blow away any referenced inode in the filestreams cache.
-	 * This can and will cause log traffic as inodes go inactive
-	 * here.
-	 */
-	xfs_filestream_unmount(mp);
-
-	XFS_bflush(mp->m_ddev_targp);
-
-	xfs_unmountfs(mp);
-	goto out_free_sb;
-}
-
-STATIC struct dentry *
-xfs_fs_mount(
-	struct file_system_type	*fs_type,
-	int			flags,
-	const char		*dev_name,
-	void			*data)
-{
-	return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
-}
-
-static int
-xfs_fs_nr_cached_objects(
-	struct super_block	*sb)
-{
-	return xfs_reclaim_inodes_count(XFS_M(sb));
-}
-
-static void
-xfs_fs_free_cached_objects(
-	struct super_block	*sb,
-	int			nr_to_scan)
-{
-	xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
-}
-
-static const struct super_operations xfs_super_operations = {
-	.alloc_inode		= xfs_fs_alloc_inode,
-	.destroy_inode		= xfs_fs_destroy_inode,
-	.dirty_inode		= xfs_fs_dirty_inode,
-	.write_inode		= xfs_fs_write_inode,
-	.evict_inode		= xfs_fs_evict_inode,
-	.put_super		= xfs_fs_put_super,
-	.sync_fs		= xfs_fs_sync_fs,
-	.freeze_fs		= xfs_fs_freeze,
-	.unfreeze_fs		= xfs_fs_unfreeze,
-	.statfs			= xfs_fs_statfs,
-	.remount_fs		= xfs_fs_remount,
-	.show_options		= xfs_fs_show_options,
-	.nr_cached_objects	= xfs_fs_nr_cached_objects,
-	.free_cached_objects	= xfs_fs_free_cached_objects,
-};
-
-static struct file_system_type xfs_fs_type = {
-	.owner			= THIS_MODULE,
-	.name			= "xfs",
-	.mount			= xfs_fs_mount,
-	.kill_sb		= kill_block_super,
-	.fs_flags		= FS_REQUIRES_DEV,
-};
-
-STATIC int __init
-xfs_init_zones(void)
-{
-
-	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-	if (!xfs_ioend_zone)
-		goto out;
-
-	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-						  xfs_ioend_zone);
-	if (!xfs_ioend_pool)
-		goto out_destroy_ioend_zone;
-
-	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
-						"xfs_log_ticket");
-	if (!xfs_log_ticket_zone)
-		goto out_destroy_ioend_pool;
-
-	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
-						"xfs_bmap_free_item");
-	if (!xfs_bmap_free_item_zone)
-		goto out_destroy_log_ticket_zone;
-
-	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
-						"xfs_btree_cur");
-	if (!xfs_btree_cur_zone)
-		goto out_destroy_bmap_free_item_zone;
-
-	xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
-						"xfs_da_state");
-	if (!xfs_da_state_zone)
-		goto out_destroy_btree_cur_zone;
-
-	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
-	if (!xfs_dabuf_zone)
-		goto out_destroy_da_state_zone;
-
-	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-	if (!xfs_ifork_zone)
-		goto out_destroy_dabuf_zone;
-
-	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
-	if (!xfs_trans_zone)
-		goto out_destroy_ifork_zone;
-
-	xfs_log_item_desc_zone =
-		kmem_zone_init(sizeof(struct xfs_log_item_desc),
-			       "xfs_log_item_desc");
-	if (!xfs_log_item_desc_zone)
-		goto out_destroy_trans_zone;
-
-	/*
-	 * The size of the zone allocated buf log item is the maximum
-	 * size possible under XFS.  This wastes a little bit of memory,
-	 * but it is much faster.
-	 */
-	xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
-				(((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
-				  NBWORD) * sizeof(int))), "xfs_buf_item");
-	if (!xfs_buf_item_zone)
-		goto out_destroy_log_item_desc_zone;
-
-	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
-			((XFS_EFD_MAX_FAST_EXTENTS - 1) *
-				 sizeof(xfs_extent_t))), "xfs_efd_item");
-	if (!xfs_efd_zone)
-		goto out_destroy_buf_item_zone;
-
-	xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
-			((XFS_EFI_MAX_FAST_EXTENTS - 1) *
-				sizeof(xfs_extent_t))), "xfs_efi_item");
-	if (!xfs_efi_zone)
-		goto out_destroy_efd_zone;
-
-	xfs_inode_zone =
-		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
-			KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
-			xfs_fs_inode_init_once);
-	if (!xfs_inode_zone)
-		goto out_destroy_efi_zone;
-
-	xfs_ili_zone =
-		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
-					KM_ZONE_SPREAD, NULL);
-	if (!xfs_ili_zone)
-		goto out_destroy_inode_zone;
-
-	return 0;
-
- out_destroy_inode_zone:
-	kmem_zone_destroy(xfs_inode_zone);
- out_destroy_efi_zone:
-	kmem_zone_destroy(xfs_efi_zone);
- out_destroy_efd_zone:
-	kmem_zone_destroy(xfs_efd_zone);
- out_destroy_buf_item_zone:
-	kmem_zone_destroy(xfs_buf_item_zone);
- out_destroy_log_item_desc_zone:
-	kmem_zone_destroy(xfs_log_item_desc_zone);
- out_destroy_trans_zone:
-	kmem_zone_destroy(xfs_trans_zone);
- out_destroy_ifork_zone:
-	kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_dabuf_zone:
-	kmem_zone_destroy(xfs_dabuf_zone);
- out_destroy_da_state_zone:
-	kmem_zone_destroy(xfs_da_state_zone);
- out_destroy_btree_cur_zone:
-	kmem_zone_destroy(xfs_btree_cur_zone);
- out_destroy_bmap_free_item_zone:
-	kmem_zone_destroy(xfs_bmap_free_item_zone);
- out_destroy_log_ticket_zone:
-	kmem_zone_destroy(xfs_log_ticket_zone);
- out_destroy_ioend_pool:
-	mempool_destroy(xfs_ioend_pool);
- out_destroy_ioend_zone:
-	kmem_zone_destroy(xfs_ioend_zone);
- out:
-	return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_zones(void)
-{
-	kmem_zone_destroy(xfs_ili_zone);
-	kmem_zone_destroy(xfs_inode_zone);
-	kmem_zone_destroy(xfs_efi_zone);
-	kmem_zone_destroy(xfs_efd_zone);
-	kmem_zone_destroy(xfs_buf_item_zone);
-	kmem_zone_destroy(xfs_log_item_desc_zone);
-	kmem_zone_destroy(xfs_trans_zone);
-	kmem_zone_destroy(xfs_ifork_zone);
-	kmem_zone_destroy(xfs_dabuf_zone);
-	kmem_zone_destroy(xfs_da_state_zone);
-	kmem_zone_destroy(xfs_btree_cur_zone);
-	kmem_zone_destroy(xfs_bmap_free_item_zone);
-	kmem_zone_destroy(xfs_log_ticket_zone);
-	mempool_destroy(xfs_ioend_pool);
-	kmem_zone_destroy(xfs_ioend_zone);
-
-}
-
-STATIC int __init
-xfs_init_workqueues(void)
-{
-	/*
-	 * max_active is set to 8 to give enough concurency to allow
-	 * multiple work operations on each CPU to run. This allows multiple
-	 * filesystems to be running sync work concurrently, and scales with
-	 * the number of CPUs in the system.
-	 */
-	xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
-	if (!xfs_syncd_wq)
-		goto out;
-
-	xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
-	if (!xfs_ail_wq)
-		goto out_destroy_syncd;
-
-	return 0;
-
-out_destroy_syncd:
-	destroy_workqueue(xfs_syncd_wq);
-out:
-	return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_workqueues(void)
-{
-	destroy_workqueue(xfs_ail_wq);
-	destroy_workqueue(xfs_syncd_wq);
-}
-
-STATIC int __init
-init_xfs_fs(void)
-{
-	int			error;
-
-	printk(KERN_INFO XFS_VERSION_STRING " with "
-			 XFS_BUILD_OPTIONS " enabled\n");
-
-	xfs_ioend_init();
-	xfs_dir_startup();
-
-	error = xfs_init_zones();
-	if (error)
-		goto out;
-
-	error = xfs_init_workqueues();
-	if (error)
-		goto out_destroy_zones;
-
-	error = xfs_mru_cache_init();
-	if (error)
-		goto out_destroy_wq;
-
-	error = xfs_filestream_init();
-	if (error)
-		goto out_mru_cache_uninit;
-
-	error = xfs_buf_init();
-	if (error)
-		goto out_filestream_uninit;
-
-	error = xfs_init_procfs();
-	if (error)
-		goto out_buf_terminate;
-
-	error = xfs_sysctl_register();
-	if (error)
-		goto out_cleanup_procfs;
-
-	vfs_initquota();
-
-	error = register_filesystem(&xfs_fs_type);
-	if (error)
-		goto out_sysctl_unregister;
-	return 0;
-
- out_sysctl_unregister:
-	xfs_sysctl_unregister();
- out_cleanup_procfs:
-	xfs_cleanup_procfs();
- out_buf_terminate:
-	xfs_buf_terminate();
- out_filestream_uninit:
-	xfs_filestream_uninit();
- out_mru_cache_uninit:
-	xfs_mru_cache_uninit();
- out_destroy_wq:
-	xfs_destroy_workqueues();
- out_destroy_zones:
-	xfs_destroy_zones();
- out:
-	return error;
-}
-
-STATIC void __exit
-exit_xfs_fs(void)
-{
-	vfs_exitquota();
-	unregister_filesystem(&xfs_fs_type);
-	xfs_sysctl_unregister();
-	xfs_cleanup_procfs();
-	xfs_buf_terminate();
-	xfs_filestream_uninit();
-	xfs_mru_cache_uninit();
-	xfs_destroy_workqueues();
-	xfs_destroy_zones();
-}
-
-module_init(init_xfs_fs);
-module_exit(exit_xfs_fs);
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
-MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
deleted file mode 100644
index 50a3266..0000000
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPER_H__
-#define __XFS_SUPER_H__
-
-#include <linux/exportfs.h>
-
-#ifdef CONFIG_XFS_QUOTA
-extern void xfs_qm_init(void);
-extern void xfs_qm_exit(void);
-# define vfs_initquota()	xfs_qm_init()
-# define vfs_exitquota()	xfs_qm_exit()
-#else
-# define vfs_initquota()	do { } while (0)
-# define vfs_exitquota()	do { } while (0)
-#endif
-
-#ifdef CONFIG_XFS_POSIX_ACL
-# define XFS_ACL_STRING		"ACLs, "
-# define set_posix_acl_flag(sb)	((sb)->s_flags |= MS_POSIXACL)
-#else
-# define XFS_ACL_STRING
-# define set_posix_acl_flag(sb)	do { } while (0)
-#endif
-
-#define XFS_SECURITY_STRING	"security attributes, "
-
-#ifdef CONFIG_XFS_RT
-# define XFS_REALTIME_STRING	"realtime, "
-#else
-# define XFS_REALTIME_STRING
-#endif
-
-#if XFS_BIG_BLKNOS
-# if XFS_BIG_INUMS
-#  define XFS_BIGFS_STRING	"large block/inode numbers, "
-# else
-#  define XFS_BIGFS_STRING	"large block numbers, "
-# endif
-#else
-# define XFS_BIGFS_STRING
-#endif
-
-#ifdef DEBUG
-# define XFS_DBG_STRING		"debug"
-#else
-# define XFS_DBG_STRING		"no debug"
-#endif
-
-#define XFS_VERSION_STRING	"SGI XFS"
-#define XFS_BUILD_OPTIONS	XFS_ACL_STRING \
-				XFS_SECURITY_STRING \
-				XFS_REALTIME_STRING \
-				XFS_BIGFS_STRING \
-				XFS_DBG_STRING /* DBG must be last */
-
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_buftarg;
-struct block_device;
-
-extern __uint64_t xfs_max_file_offset(unsigned int);
-
-extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-
-extern const struct export_operations xfs_export_operations;
-extern const struct xattr_handler *xfs_xattr_handlers[];
-extern const struct quotactl_ops xfs_quotactl_operations;
-
-#define XFS_M(sb)		((struct xfs_mount *)((sb)->s_fs_info))
-
-#endif	/* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
deleted file mode 100644
index 4604f90..0000000
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ /dev/null
@@ -1,1065 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_dinode.h"
-#include "xfs_error.h"
-#include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_quota.h"
-#include "xfs_trace.h"
-#include "xfs_fsops.h"
-
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-
-struct workqueue_struct	*xfs_syncd_wq;	/* sync workqueue */
-
-/*
- * The inode lookup is done in batches to keep the amount of lock traffic and
- * radix tree lookups to a minimum. The batch size is a trade off between
- * lookup reduction and stack usage. This is in the reclaim path, so we can't
- * be too greedy.
- */
-#define XFS_LOOKUP_BATCH	32
-
-STATIC int
-xfs_inode_ag_walk_grab(
-	struct xfs_inode	*ip)
-{
-	struct inode		*inode = VFS_I(ip);
-
-	ASSERT(rcu_read_lock_held());
-
-	/*
-	 * check for stale RCU freed inode
-	 *
-	 * If the inode has been reallocated, it doesn't matter if it's not in
-	 * the AG we are walking - we are walking for writeback, so if it
-	 * passes all the "valid inode" checks and is dirty, then we'll write
-	 * it back anyway.  If it has been reallocated and still being
-	 * initialised, the XFS_INEW check below will catch it.
-	 */
-	spin_lock(&ip->i_flags_lock);
-	if (!ip->i_ino)
-		goto out_unlock_noent;
-
-	/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-	if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-		goto out_unlock_noent;
-	spin_unlock(&ip->i_flags_lock);
-
-	/* nothing to sync during shutdown */
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return EFSCORRUPTED;
-
-	/* If we can't grab the inode, it must on it's way to reclaim. */
-	if (!igrab(inode))
-		return ENOENT;
-
-	if (is_bad_inode(inode)) {
-		IRELE(ip);
-		return ENOENT;
-	}
-
-	/* inode is valid */
-	return 0;
-
-out_unlock_noent:
-	spin_unlock(&ip->i_flags_lock);
-	return ENOENT;
-}
-
-STATIC int
-xfs_inode_ag_walk(
-	struct xfs_mount	*mp,
-	struct xfs_perag	*pag,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags),
-	int			flags)
-{
-	uint32_t		first_index;
-	int			last_error = 0;
-	int			skipped;
-	int			done;
-	int			nr_found;
-
-restart:
-	done = 0;
-	skipped = 0;
-	first_index = 0;
-	nr_found = 0;
-	do {
-		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-		int		error = 0;
-		int		i;
-
-		rcu_read_lock();
-		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-					(void **)batch, first_index,
-					XFS_LOOKUP_BATCH);
-		if (!nr_found) {
-			rcu_read_unlock();
-			break;
-		}
-
-		/*
-		 * Grab the inodes before we drop the lock. if we found
-		 * nothing, nr == 0 and the loop will be skipped.
-		 */
-		for (i = 0; i < nr_found; i++) {
-			struct xfs_inode *ip = batch[i];
-
-			if (done || xfs_inode_ag_walk_grab(ip))
-				batch[i] = NULL;
-
-			/*
-			 * Update the index for the next lookup. Catch
-			 * overflows into the next AG range which can occur if
-			 * we have inodes in the last block of the AG and we
-			 * are currently pointing to the last inode.
-			 *
-			 * Because we may see inodes that are from the wrong AG
-			 * due to RCU freeing and reallocation, only update the
-			 * index if it lies in this AG. It was a race that lead
-			 * us to see this inode, so another lookup from the
-			 * same index will not find it again.
-			 */
-			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
-				continue;
-			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-				done = 1;
-		}
-
-		/* unlock now we've grabbed the inodes. */
-		rcu_read_unlock();
-
-		for (i = 0; i < nr_found; i++) {
-			if (!batch[i])
-				continue;
-			error = execute(batch[i], pag, flags);
-			IRELE(batch[i]);
-			if (error == EAGAIN) {
-				skipped++;
-				continue;
-			}
-			if (error && last_error != EFSCORRUPTED)
-				last_error = error;
-		}
-
-		/* bail out if the filesystem is corrupted.  */
-		if (error == EFSCORRUPTED)
-			break;
-
-		cond_resched();
-
-	} while (nr_found && !done);
-
-	if (skipped) {
-		delay(1);
-		goto restart;
-	}
-	return last_error;
-}
-
-int
-xfs_inode_ag_iterator(
-	struct xfs_mount	*mp,
-	int			(*execute)(struct xfs_inode *ip,
-					   struct xfs_perag *pag, int flags),
-	int			flags)
-{
-	struct xfs_perag	*pag;
-	int			error = 0;
-	int			last_error = 0;
-	xfs_agnumber_t		ag;
-
-	ag = 0;
-	while ((pag = xfs_perag_get(mp, ag))) {
-		ag = pag->pag_agno + 1;
-		error = xfs_inode_ag_walk(mp, pag, execute, flags);
-		xfs_perag_put(pag);
-		if (error) {
-			last_error = error;
-			if (error == EFSCORRUPTED)
-				break;
-		}
-	}
-	return XFS_ERROR(last_error);
-}
-
-STATIC int
-xfs_sync_inode_data(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
-	int			flags)
-{
-	struct inode		*inode = VFS_I(ip);
-	struct address_space *mapping = inode->i_mapping;
-	int			error = 0;
-
-	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
-		goto out_wait;
-
-	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
-		if (flags & SYNC_TRYLOCK)
-			goto out_wait;
-		xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	}
-
-	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
-				0 : XBF_ASYNC, FI_NONE);
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
- out_wait:
-	if (flags & SYNC_WAIT)
-		xfs_ioend_wait(ip);
-	return error;
-}
-
-STATIC int
-xfs_sync_inode_attr(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
-	int			flags)
-{
-	int			error = 0;
-
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	if (xfs_inode_clean(ip))
-		goto out_unlock;
-	if (!xfs_iflock_nowait(ip)) {
-		if (!(flags & SYNC_WAIT))
-			goto out_unlock;
-		xfs_iflock(ip);
-	}
-
-	if (xfs_inode_clean(ip)) {
-		xfs_ifunlock(ip);
-		goto out_unlock;
-	}
-
-	error = xfs_iflush(ip, flags);
-
-	/*
-	 * We don't want to try again on non-blocking flushes that can't run
-	 * again immediately. If an inode really must be written, then that's
-	 * what the SYNC_WAIT flag is for.
-	 */
-	if (error == EAGAIN) {
-		ASSERT(!(flags & SYNC_WAIT));
-		error = 0;
-	}
-
- out_unlock:
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-	return error;
-}
-
-/*
- * Write out pagecache data for the whole filesystem.
- */
-STATIC int
-xfs_sync_data(
-	struct xfs_mount	*mp,
-	int			flags)
-{
-	int			error;
-
-	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
-
-	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
-	if (error)
-		return XFS_ERROR(error);
-
-	xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
-	return 0;
-}
-
-/*
- * Write out inode metadata (attributes) for the whole filesystem.
- */
-STATIC int
-xfs_sync_attr(
-	struct xfs_mount	*mp,
-	int			flags)
-{
-	ASSERT((flags & ~SYNC_WAIT) == 0);
-
-	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
-}
-
-STATIC int
-xfs_sync_fsdata(
-	struct xfs_mount	*mp)
-{
-	struct xfs_buf		*bp;
-
-	/*
-	 * If the buffer is pinned then push on the log so we won't get stuck
-	 * waiting in the write for someone, maybe ourselves, to flush the log.
-	 *
-	 * Even though we just pushed the log above, we did not have the
-	 * superblock buffer locked at that point so it can become pinned in
-	 * between there and here.
-	 */
-	bp = xfs_getsb(mp, 0);
-	if (xfs_buf_ispinned(bp))
-		xfs_log_force(mp, 0);
-
-	return xfs_bwrite(mp, bp);
-}
-
-/*
- * When remounting a filesystem read-only or freezing the filesystem, we have
- * two phases to execute. This first phase is syncing the data before we
- * quiesce the filesystem, and the second is flushing all the inodes out after
- * we've waited for all the transactions created by the first phase to
- * complete. The second phase ensures that the inodes are written to their
- * location on disk rather than just existing in transactions in the log. This
- * means after a quiesce there is no log replay required to write the inodes to
- * disk (this is the main difference between a sync and a quiesce).
- */
-/*
- * First stage of freeze - no writers will make progress now we are here,
- * so we flush delwri and delalloc buffers here, then wait for all I/O to
- * complete.  Data is frozen at that point. Metadata is not frozen,
- * transactions can still occur here so don't bother flushing the buftarg
- * because it'll just get dirty again.
- */
-int
-xfs_quiesce_data(
-	struct xfs_mount	*mp)
-{
-	int			error, error2 = 0;
-
-	xfs_qm_sync(mp, SYNC_TRYLOCK);
-	xfs_qm_sync(mp, SYNC_WAIT);
-
-	/* force out the newly dirtied log buffers */
-	xfs_log_force(mp, XFS_LOG_SYNC);
-
-	/* write superblock and hoover up shutdown errors */
-	error = xfs_sync_fsdata(mp);
-
-	/* make sure all delwri buffers are written out */
-	xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
-	/* mark the log as covered if needed */
-	if (xfs_log_need_covered(mp))
-		error2 = xfs_fs_log_dummy(mp);
-
-	/* flush data-only devices */
-	if (mp->m_rtdev_targp)
-		XFS_bflush(mp->m_rtdev_targp);
-
-	return error ? error : error2;
-}
-
-STATIC void
-xfs_quiesce_fs(
-	struct xfs_mount	*mp)
-{
-	int	count = 0, pincount;
-
-	xfs_reclaim_inodes(mp, 0);
-	xfs_flush_buftarg(mp->m_ddev_targp, 0);
-
-	/*
-	 * This loop must run at least twice.  The first instance of the loop
-	 * will flush most meta data but that will generate more meta data
-	 * (typically directory updates).  Which then must be flushed and
-	 * logged before we can write the unmount record. We also so sync
-	 * reclaim of inodes to catch any that the above delwri flush skipped.
-	 */
-	do {
-		xfs_reclaim_inodes(mp, SYNC_WAIT);
-		xfs_sync_attr(mp, SYNC_WAIT);
-		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-		if (!pincount) {
-			delay(50);
-			count++;
-		}
-	} while (count < 2);
-}
-
-/*
- * Second stage of a quiesce. The data is already synced, now we have to take
- * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceeding.
- */
-void
-xfs_quiesce_attr(
-	struct xfs_mount	*mp)
-{
-	int	error = 0;
-
-	/* wait for all modifications to complete */
-	while (atomic_read(&mp->m_active_trans) > 0)
-		delay(100);
-
-	/* flush inodes and push all remaining buffers out to disk */
-	xfs_quiesce_fs(mp);
-
-	/*
-	 * Just warn here till VFS can correctly support
-	 * read-only remount without racing.
-	 */
-	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
-
-	/* Push the superblock and write an unmount record */
-	error = xfs_log_sbcount(mp);
-	if (error)
-		xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
-				"Frozen image may not be consistent.");
-	xfs_log_unmount_write(mp);
-	xfs_unmountfs_writesb(mp);
-}
-
-static void
-xfs_syncd_queue_sync(
-	struct xfs_mount        *mp)
-{
-	queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
-				msecs_to_jiffies(xfs_syncd_centisecs * 10));
-}
-
-/*
- * Every sync period we need to unpin all items, reclaim inodes and sync
- * disk quotas.  We might need to cover the log to indicate that the
- * filesystem is idle and not frozen.
- */
-STATIC void
-xfs_sync_worker(
-	struct work_struct *work)
-{
-	struct xfs_mount *mp = container_of(to_delayed_work(work),
-					struct xfs_mount, m_sync_work);
-	int		error;
-
-	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-		/* dgc: errors ignored here */
-		if (mp->m_super->s_frozen == SB_UNFROZEN &&
-		    xfs_log_need_covered(mp))
-			error = xfs_fs_log_dummy(mp);
-		else
-			xfs_log_force(mp, 0);
-		error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-
-		/* start pushing all the metadata that is currently dirty */
-		xfs_ail_push_all(mp->m_ail);
-	}
-
-	/* queue us up again */
-	xfs_syncd_queue_sync(mp);
-}
-
-/*
- * Queue a new inode reclaim pass if there are reclaimable inodes and there
- * isn't a reclaim pass already in progress. By default it runs every 5s based
- * on the xfs syncd work default of 30s. Perhaps this should have it's own
- * tunable, but that can be done if this method proves to be ineffective or too
- * aggressive.
- */
-static void
-xfs_syncd_queue_reclaim(
-	struct xfs_mount        *mp)
-{
-
-	/*
-	 * We can have inodes enter reclaim after we've shut down the syncd
-	 * workqueue during unmount, so don't allow reclaim work to be queued
-	 * during unmount.
-	 */
-	if (!(mp->m_super->s_flags & MS_ACTIVE))
-		return;
-
-	rcu_read_lock();
-	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
-		queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
-			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
-	}
-	rcu_read_unlock();
-}
-
-/*
- * This is a fast pass over the inode cache to try to get reclaim moving on as
- * many inodes as possible in a short period of time. It kicks itself every few
- * seconds, as well as being kicked by the inode cache shrinker when memory
- * goes low. It scans as quickly as possible avoiding locked inodes or those
- * already being flushed, and once done schedules a future pass.
- */
-STATIC void
-xfs_reclaim_worker(
-	struct work_struct *work)
-{
-	struct xfs_mount *mp = container_of(to_delayed_work(work),
-					struct xfs_mount, m_reclaim_work);
-
-	xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
-	xfs_syncd_queue_reclaim(mp);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room.
- *
- * Queue a new data flush if there isn't one already in progress and
- * wait for completion of the flush. This means that we only ever have one
- * inode flush in progress no matter how many ENOSPC events are occurring and
- * so will prevent the system from bogging down due to every concurrent
- * ENOSPC event scanning all the active inodes in the system for writeback.
- */
-void
-xfs_flush_inodes(
-	struct xfs_inode	*ip)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-
-	queue_work(xfs_syncd_wq, &mp->m_flush_work);
-	flush_work_sync(&mp->m_flush_work);
-}
-
-STATIC void
-xfs_flush_worker(
-	struct work_struct *work)
-{
-	struct xfs_mount *mp = container_of(work,
-					struct xfs_mount, m_flush_work);
-
-	xfs_sync_data(mp, SYNC_TRYLOCK);
-	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-}
-
-int
-xfs_syncd_init(
-	struct xfs_mount	*mp)
-{
-	INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
-	INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
-	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-
-	xfs_syncd_queue_sync(mp);
-	xfs_syncd_queue_reclaim(mp);
-
-	return 0;
-}
-
-void
-xfs_syncd_stop(
-	struct xfs_mount	*mp)
-{
-	cancel_delayed_work_sync(&mp->m_sync_work);
-	cancel_delayed_work_sync(&mp->m_reclaim_work);
-	cancel_work_sync(&mp->m_flush_work);
-}
-
-void
-__xfs_inode_set_reclaim_tag(
-	struct xfs_perag	*pag,
-	struct xfs_inode	*ip)
-{
-	radix_tree_tag_set(&pag->pag_ici_root,
-			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
-			   XFS_ICI_RECLAIM_TAG);
-
-	if (!pag->pag_ici_reclaimable) {
-		/* propagate the reclaim tag up into the perag radix tree */
-		spin_lock(&ip->i_mount->m_perag_lock);
-		radix_tree_tag_set(&ip->i_mount->m_perag_tree,
-				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-				XFS_ICI_RECLAIM_TAG);
-		spin_unlock(&ip->i_mount->m_perag_lock);
-
-		/* schedule periodic background inode reclaim */
-		xfs_syncd_queue_reclaim(ip->i_mount);
-
-		trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
-							-1, _RET_IP_);
-	}
-	pag->pag_ici_reclaimable++;
-}
-
-/*
- * We set the inode flag atomically with the radix tree tag.
- * Once we get tag lookups on the radix tree, this inode flag
- * can go away.
- */
-void
-xfs_inode_set_reclaim_tag(
-	xfs_inode_t	*ip)
-{
-	struct xfs_mount *mp = ip->i_mount;
-	struct xfs_perag *pag;
-
-	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-	spin_lock(&pag->pag_ici_lock);
-	spin_lock(&ip->i_flags_lock);
-	__xfs_inode_set_reclaim_tag(pag, ip);
-	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-	spin_unlock(&ip->i_flags_lock);
-	spin_unlock(&pag->pag_ici_lock);
-	xfs_perag_put(pag);
-}
-
-STATIC void
-__xfs_inode_clear_reclaim(
-	xfs_perag_t	*pag,
-	xfs_inode_t	*ip)
-{
-	pag->pag_ici_reclaimable--;
-	if (!pag->pag_ici_reclaimable) {
-		/* clear the reclaim tag from the perag radix tree */
-		spin_lock(&ip->i_mount->m_perag_lock);
-		radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
-				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-				XFS_ICI_RECLAIM_TAG);
-		spin_unlock(&ip->i_mount->m_perag_lock);
-		trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
-							-1, _RET_IP_);
-	}
-}
-
-void
-__xfs_inode_clear_reclaim_tag(
-	xfs_mount_t	*mp,
-	xfs_perag_t	*pag,
-	xfs_inode_t	*ip)
-{
-	radix_tree_tag_clear(&pag->pag_ici_root,
-			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
-	__xfs_inode_clear_reclaim(pag, ip);
-}
-
-/*
- * Grab the inode for reclaim exclusively.
- * Return 0 if we grabbed it, non-zero otherwise.
- */
-STATIC int
-xfs_reclaim_inode_grab(
-	struct xfs_inode	*ip,
-	int			flags)
-{
-	ASSERT(rcu_read_lock_held());
-
-	/* quick check for stale RCU freed inode */
-	if (!ip->i_ino)
-		return 1;
-
-	/*
-	 * do some unlocked checks first to avoid unnecessary lock traffic.
-	 * The first is a flush lock check, the second is a already in reclaim
-	 * check. Only do these checks if we are not going to block on locks.
-	 */
-	if ((flags & SYNC_TRYLOCK) &&
-	    (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
-		return 1;
-	}
-
-	/*
-	 * The radix tree lock here protects a thread in xfs_iget from racing
-	 * with us starting reclaim on the inode.  Once we have the
-	 * XFS_IRECLAIM flag set it will not touch us.
-	 *
-	 * Due to RCU lookup, we may find inodes that have been freed and only
-	 * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
-	 * aren't candidates for reclaim at all, so we must check the
-	 * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
-	 */
-	spin_lock(&ip->i_flags_lock);
-	if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
-	    __xfs_iflags_test(ip, XFS_IRECLAIM)) {
-		/* not a reclaim candidate. */
-		spin_unlock(&ip->i_flags_lock);
-		return 1;
-	}
-	__xfs_iflags_set(ip, XFS_IRECLAIM);
-	spin_unlock(&ip->i_flags_lock);
-	return 0;
-}
-
-/*
- * Inodes in different states need to be treated differently, and the return
- * value of xfs_iflush is not sufficient to get this right. The following table
- * lists the inode states and the reclaim actions necessary for non-blocking
- * reclaim:
- *
- *
- *	inode state	     iflush ret		required action
- *      ---------------      ----------         ---------------
- *	bad			-		reclaim
- *	shutdown		EIO		unpin and reclaim
- *	clean, unpinned		0		reclaim
- *	stale, unpinned		0		reclaim
- *	clean, pinned(*)	0		requeue
- *	stale, pinned		EAGAIN		requeue
- *	dirty, delwri ok	0		requeue
- *	dirty, delwri blocked	EAGAIN		requeue
- *	dirty, sync flush	0		reclaim
- *
- * (*) dgc: I don't think the clean, pinned state is possible but it gets
- * handled anyway given the order of checks implemented.
- *
- * As can be seen from the table, the return value of xfs_iflush() is not
- * sufficient to correctly decide the reclaim action here. The checks in
- * xfs_iflush() might look like duplicates, but they are not.
- *
- * Also, because we get the flush lock first, we know that any inode that has
- * been flushed delwri has had the flush completed by the time we check that
- * the inode is clean. The clean inode check needs to be done before flushing
- * the inode delwri otherwise we would loop forever requeuing clean inodes as
- * we cannot tell apart a successful delwri flush and a clean inode from the
- * return value of xfs_iflush().
- *
- * Note that because the inode is flushed delayed write by background
- * writeback, the flush lock may already be held here and waiting on it can
- * result in very long latencies. Hence for sync reclaims, where we wait on the
- * flush lock, the caller should push out delayed write inodes first before
- * trying to reclaim them to minimise the amount of time spent waiting. For
- * background relaim, we just requeue the inode for the next pass.
- *
- * Hence the order of actions after gaining the locks should be:
- *	bad		=> reclaim
- *	shutdown	=> unpin and reclaim
- *	pinned, delwri	=> requeue
- *	pinned, sync	=> unpin
- *	stale		=> reclaim
- *	clean		=> reclaim
- *	dirty, delwri	=> flush and requeue
- *	dirty, sync	=> flush, wait and reclaim
- */
-STATIC int
-xfs_reclaim_inode(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
-	int			sync_mode)
-{
-	int	error;
-
-restart:
-	error = 0;
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if (!xfs_iflock_nowait(ip)) {
-		if (!(sync_mode & SYNC_WAIT))
-			goto out;
-		xfs_iflock(ip);
-	}
-
-	if (is_bad_inode(VFS_I(ip)))
-		goto reclaim;
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		xfs_iunpin_wait(ip);
-		goto reclaim;
-	}
-	if (xfs_ipincount(ip)) {
-		if (!(sync_mode & SYNC_WAIT)) {
-			xfs_ifunlock(ip);
-			goto out;
-		}
-		xfs_iunpin_wait(ip);
-	}
-	if (xfs_iflags_test(ip, XFS_ISTALE))
-		goto reclaim;
-	if (xfs_inode_clean(ip))
-		goto reclaim;
-
-	/*
-	 * Now we have an inode that needs flushing.
-	 *
-	 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
-	 * reclaim as we can deadlock with inode cluster removal.
-	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
-	 * ip->i_lock, and we are doing the exact opposite here. As a result,
-	 * doing a blocking xfs_itobp() to get the cluster buffer will result
-	 * in an ABBA deadlock with xfs_ifree_cluster().
-	 *
-	 * As xfs_ifree_cluser() must gather all inodes that are active in the
-	 * cache to mark them stale, if we hit this case we don't actually want
-	 * to do IO here - we want the inode marked stale so we can simply
-	 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
-	 * just unlock the inode, back off and try again. Hopefully the next
-	 * pass through will see the stale flag set on the inode.
-	 */
-	error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
-	if (sync_mode & SYNC_WAIT) {
-		if (error == EAGAIN) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-			/* backoff longer than in xfs_ifree_cluster */
-			delay(2);
-			goto restart;
-		}
-		xfs_iflock(ip);
-		goto reclaim;
-	}
-
-	/*
-	 * When we have to flush an inode but don't have SYNC_WAIT set, we
-	 * flush the inode out using a delwri buffer and wait for the next
-	 * call into reclaim to find it in a clean state instead of waiting for
-	 * it now. We also don't return errors here - if the error is transient
-	 * then the next reclaim pass will flush the inode, and if the error
-	 * is permanent then the next sync reclaim will reclaim the inode and
-	 * pass on the error.
-	 */
-	if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		xfs_warn(ip->i_mount,
-			"inode 0x%llx background reclaim flush failed with %d",
-			(long long)ip->i_ino, error);
-	}
-out:
-	xfs_iflags_clear(ip, XFS_IRECLAIM);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	/*
-	 * We could return EAGAIN here to make reclaim rescan the inode tree in
-	 * a short while. However, this just burns CPU time scanning the tree
-	 * waiting for IO to complete and xfssyncd never goes back to the idle
-	 * state. Instead, return 0 to let the next scheduled background reclaim
-	 * attempt to reclaim the inode again.
-	 */
-	return 0;
-
-reclaim:
-	xfs_ifunlock(ip);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	XFS_STATS_INC(xs_ig_reclaims);
-	/*
-	 * Remove the inode from the per-AG radix tree.
-	 *
-	 * Because radix_tree_delete won't complain even if the item was never
-	 * added to the tree assert that it's been there before to catch
-	 * problems with the inode life time early on.
-	 */
-	spin_lock(&pag->pag_ici_lock);
-	if (!radix_tree_delete(&pag->pag_ici_root,
-				XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
-		ASSERT(0);
-	__xfs_inode_clear_reclaim(pag, ip);
-	spin_unlock(&pag->pag_ici_lock);
-
-	/*
-	 * Here we do an (almost) spurious inode lock in order to coordinate
-	 * with inode cache radix tree lookups.  This is because the lookup
-	 * can reference the inodes in the cache without taking references.
-	 *
-	 * We make that OK here by ensuring that we wait until the inode is
-	 * unlocked after the lookup before we go ahead and free it.  We get
-	 * both the ilock and the iolock because the code may need to drop the
-	 * ilock one but will still hold the iolock.
-	 */
-	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	xfs_qm_dqdetach(ip);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
-	xfs_inode_free(ip);
-	return error;
-
-}
-
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- */
-int
-xfs_reclaim_inodes_ag(
-	struct xfs_mount	*mp,
-	int			flags,
-	int			*nr_to_scan)
-{
-	struct xfs_perag	*pag;
-	int			error = 0;
-	int			last_error = 0;
-	xfs_agnumber_t		ag;
-	int			trylock = flags & SYNC_TRYLOCK;
-	int			skipped;
-
-restart:
-	ag = 0;
-	skipped = 0;
-	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-		unsigned long	first_index = 0;
-		int		done = 0;
-		int		nr_found = 0;
-
-		ag = pag->pag_agno + 1;
-
-		if (trylock) {
-			if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
-				skipped++;
-				xfs_perag_put(pag);
-				continue;
-			}
-			first_index = pag->pag_ici_reclaim_cursor;
-		} else
-			mutex_lock(&pag->pag_ici_reclaim_lock);
-
-		do {
-			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-			int	i;
-
-			rcu_read_lock();
-			nr_found = radix_tree_gang_lookup_tag(
-					&pag->pag_ici_root,
-					(void **)batch, first_index,
-					XFS_LOOKUP_BATCH,
-					XFS_ICI_RECLAIM_TAG);
-			if (!nr_found) {
-				done = 1;
-				rcu_read_unlock();
-				break;
-			}
-
-			/*
-			 * Grab the inodes before we drop the lock. if we found
-			 * nothing, nr == 0 and the loop will be skipped.
-			 */
-			for (i = 0; i < nr_found; i++) {
-				struct xfs_inode *ip = batch[i];
-
-				if (done || xfs_reclaim_inode_grab(ip, flags))
-					batch[i] = NULL;
-
-				/*
-				 * Update the index for the next lookup. Catch
-				 * overflows into the next AG range which can
-				 * occur if we have inodes in the last block of
-				 * the AG and we are currently pointing to the
-				 * last inode.
-				 *
-				 * Because we may see inodes that are from the
-				 * wrong AG due to RCU freeing and
-				 * reallocation, only update the index if it
-				 * lies in this AG. It was a race that lead us
-				 * to see this inode, so another lookup from
-				 * the same index will not find it again.
-				 */
-				if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
-								pag->pag_agno)
-					continue;
-				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-					done = 1;
-			}
-
-			/* unlock now we've grabbed the inodes. */
-			rcu_read_unlock();
-
-			for (i = 0; i < nr_found; i++) {
-				if (!batch[i])
-					continue;
-				error = xfs_reclaim_inode(batch[i], pag, flags);
-				if (error && last_error != EFSCORRUPTED)
-					last_error = error;
-			}
-
-			*nr_to_scan -= XFS_LOOKUP_BATCH;
-
-			cond_resched();
-
-		} while (nr_found && !done && *nr_to_scan > 0);
-
-		if (trylock && !done)
-			pag->pag_ici_reclaim_cursor = first_index;
-		else
-			pag->pag_ici_reclaim_cursor = 0;
-		mutex_unlock(&pag->pag_ici_reclaim_lock);
-		xfs_perag_put(pag);
-	}
-
-	/*
-	 * if we skipped any AG, and we still have scan count remaining, do
-	 * another pass this time using blocking reclaim semantics (i.e
-	 * waiting on the reclaim locks and ignoring the reclaim cursors). This
-	 * ensure that when we get more reclaimers than AGs we block rather
-	 * than spin trying to execute reclaim.
-	 */
-	if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
-		trylock = 0;
-		goto restart;
-	}
-	return XFS_ERROR(last_error);
-}
-
-int
-xfs_reclaim_inodes(
-	xfs_mount_t	*mp,
-	int		mode)
-{
-	int		nr_to_scan = INT_MAX;
-
-	return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
-}
-
-/*
- * Scan a certain number of inodes for reclaim.
- *
- * When called we make sure that there is a background (fast) inode reclaim in
- * progress, while we will throttle the speed of reclaim via doing synchronous
- * reclaim of inodes. That means if we come across dirty inodes, we wait for
- * them to be cleaned, which we hope will not be very long due to the
- * background walker having already kicked the IO off on those dirty inodes.
- */
-void
-xfs_reclaim_inodes_nr(
-	struct xfs_mount	*mp,
-	int			nr_to_scan)
-{
-	/* kick background reclaimer and push the AIL */
-	xfs_syncd_queue_reclaim(mp);
-	xfs_ail_push_all(mp->m_ail);
-
-	xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
-}
-
-/*
- * Return the number of reclaimable inodes in the filesystem for
- * the shrinker to determine how much to reclaim.
- */
-int
-xfs_reclaim_inodes_count(
-	struct xfs_mount	*mp)
-{
-	struct xfs_perag	*pag;
-	xfs_agnumber_t		ag = 0;
-	int			reclaimable = 0;
-
-	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-		ag = pag->pag_agno + 1;
-		reclaimable += pag->pag_ici_reclaimable;
-		xfs_perag_put(pag);
-	}
-	return reclaimable;
-}
-
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
deleted file mode 100644
index 941202e..0000000
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef XFS_SYNC_H
-#define XFS_SYNC_H 1
-
-struct xfs_mount;
-struct xfs_perag;
-
-#define SYNC_WAIT		0x0001	/* wait for i/o to complete */
-#define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
-
-extern struct workqueue_struct	*xfs_syncd_wq;	/* sync workqueue */
-
-int xfs_syncd_init(struct xfs_mount *mp);
-void xfs_syncd_stop(struct xfs_mount *mp);
-
-int xfs_quiesce_data(struct xfs_mount *mp);
-void xfs_quiesce_attr(struct xfs_mount *mp);
-
-void xfs_flush_inodes(struct xfs_inode *ip);
-
-int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
-int xfs_reclaim_inodes_count(struct xfs_mount *mp);
-void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
-
-void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
-void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
-void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
-				struct xfs_inode *ip);
-
-int xfs_sync_inode_grab(struct xfs_inode *ip);
-int xfs_inode_ag_iterator(struct xfs_mount *mp,
-	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-	int flags);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
deleted file mode 100644
index ee2d2ad..0000000
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include "xfs_error.h"
-
-static struct ctl_table_header *xfs_table_header;
-
-#ifdef CONFIG_PROC_FS
-STATIC int
-xfs_stats_clear_proc_handler(
-	ctl_table	*ctl,
-	int		write,
-	void		__user *buffer,
-	size_t		*lenp,
-	loff_t		*ppos)
-{
-	int		c, ret, *valp = ctl->data;
-	__uint32_t	vn_active;
-
-	ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-
-	if (!ret && write && *valp) {
-		xfs_notice(NULL, "Clearing xfsstats");
-		for_each_possible_cpu(c) {
-			preempt_disable();
-			/* save vn_active, it's a universal truth! */
-			vn_active = per_cpu(xfsstats, c).vn_active;
-			memset(&per_cpu(xfsstats, c), 0,
-			       sizeof(struct xfsstats));
-			per_cpu(xfsstats, c).vn_active = vn_active;
-			preempt_enable();
-		}
-		xfs_stats_clear = 0;
-	}
-
-	return ret;
-}
-
-STATIC int
-xfs_panic_mask_proc_handler(
-	ctl_table	*ctl,
-	int		write,
-	void		__user *buffer,
-	size_t		*lenp,
-	loff_t		*ppos)
-{
-	int		ret, *valp = ctl->data;
-
-	ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-	if (!ret && write) {
-		xfs_panic_mask = *valp;
-#ifdef DEBUG
-		xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
-#endif
-	}
-	return ret;
-}
-#endif /* CONFIG_PROC_FS */
-
-static ctl_table xfs_table[] = {
-	{
-		.procname	= "irix_sgid_inherit",
-		.data		= &xfs_params.sgid_inherit.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.sgid_inherit.min,
-		.extra2		= &xfs_params.sgid_inherit.max
-	},
-	{
-		.procname	= "irix_symlink_mode",
-		.data		= &xfs_params.symlink_mode.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.symlink_mode.min,
-		.extra2		= &xfs_params.symlink_mode.max
-	},
-	{
-		.procname	= "panic_mask",
-		.data		= &xfs_params.panic_mask.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= xfs_panic_mask_proc_handler,
-		.extra1		= &xfs_params.panic_mask.min,
-		.extra2		= &xfs_params.panic_mask.max
-	},
-
-	{
-		.procname	= "error_level",
-		.data		= &xfs_params.error_level.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.error_level.min,
-		.extra2		= &xfs_params.error_level.max
-	},
-	{
-		.procname	= "xfssyncd_centisecs",
-		.data		= &xfs_params.syncd_timer.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.syncd_timer.min,
-		.extra2		= &xfs_params.syncd_timer.max
-	},
-	{
-		.procname	= "inherit_sync",
-		.data		= &xfs_params.inherit_sync.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.inherit_sync.min,
-		.extra2		= &xfs_params.inherit_sync.max
-	},
-	{
-		.procname	= "inherit_nodump",
-		.data		= &xfs_params.inherit_nodump.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.inherit_nodump.min,
-		.extra2		= &xfs_params.inherit_nodump.max
-	},
-	{
-		.procname	= "inherit_noatime",
-		.data		= &xfs_params.inherit_noatim.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.inherit_noatim.min,
-		.extra2		= &xfs_params.inherit_noatim.max
-	},
-	{
-		.procname	= "xfsbufd_centisecs",
-		.data		= &xfs_params.xfs_buf_timer.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.xfs_buf_timer.min,
-		.extra2		= &xfs_params.xfs_buf_timer.max
-	},
-	{
-		.procname	= "age_buffer_centisecs",
-		.data		= &xfs_params.xfs_buf_age.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.xfs_buf_age.min,
-		.extra2		= &xfs_params.xfs_buf_age.max
-	},
-	{
-		.procname	= "inherit_nosymlinks",
-		.data		= &xfs_params.inherit_nosym.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.inherit_nosym.min,
-		.extra2		= &xfs_params.inherit_nosym.max
-	},
-	{
-		.procname	= "rotorstep",
-		.data		= &xfs_params.rotorstep.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.rotorstep.min,
-		.extra2		= &xfs_params.rotorstep.max
-	},
-	{
-		.procname	= "inherit_nodefrag",
-		.data		= &xfs_params.inherit_nodfrg.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.inherit_nodfrg.min,
-		.extra2		= &xfs_params.inherit_nodfrg.max
-	},
-	{
-		.procname	= "filestream_centisecs",
-		.data		= &xfs_params.fstrm_timer.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &xfs_params.fstrm_timer.min,
-		.extra2		= &xfs_params.fstrm_timer.max,
-	},
-	/* please keep this the last entry */
-#ifdef CONFIG_PROC_FS
-	{
-		.procname	= "stats_clear",
-		.data		= &xfs_params.stats_clear.val,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= xfs_stats_clear_proc_handler,
-		.extra1		= &xfs_params.stats_clear.min,
-		.extra2		= &xfs_params.stats_clear.max
-	},
-#endif /* CONFIG_PROC_FS */
-
-	{}
-};
-
-static ctl_table xfs_dir_table[] = {
-	{
-		.procname	= "xfs",
-		.mode		= 0555,
-		.child		= xfs_table
-	},
-	{}
-};
-
-static ctl_table xfs_root_table[] = {
-	{
-		.procname	= "fs",
-		.mode		= 0555,
-		.child		= xfs_dir_table
-	},
-	{}
-};
-
-int
-xfs_sysctl_register(void)
-{
-	xfs_table_header = register_sysctl_table(xfs_root_table);
-	if (!xfs_table_header)
-		return -ENOMEM;
-	return 0;
-}
-
-void
-xfs_sysctl_unregister(void)
-{
-	unregister_sysctl_table(xfs_table_header);
-}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
deleted file mode 100644
index b9937d4..0000000
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SYSCTL_H__
-#define __XFS_SYSCTL_H__
-
-#include <linux/sysctl.h>
-
-/*
- * Tunable xfs parameters
- */
-
-typedef struct xfs_sysctl_val {
-	int min;
-	int val;
-	int max;
-} xfs_sysctl_val_t;
-
-typedef struct xfs_param {
-	xfs_sysctl_val_t sgid_inherit;	/* Inherit S_ISGID if process' GID is
-					 * not a member of parent dir GID. */
-	xfs_sysctl_val_t symlink_mode;	/* Link creat mode affected by umask */
-	xfs_sysctl_val_t panic_mask;	/* bitmask to cause panic on errors. */
-	xfs_sysctl_val_t error_level;	/* Degree of reporting for problems  */
-	xfs_sysctl_val_t syncd_timer;	/* Interval between xfssyncd wakeups */
-	xfs_sysctl_val_t stats_clear;	/* Reset all XFS statistics to zero. */
-	xfs_sysctl_val_t inherit_sync;	/* Inherit the "sync" inode flag. */
-	xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
-	xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
-	xfs_sysctl_val_t xfs_buf_timer;	/* Interval between xfsbufd wakeups. */
-	xfs_sysctl_val_t xfs_buf_age;	/* Metadata buffer age before flush. */
-	xfs_sysctl_val_t inherit_nosym;	/* Inherit the "nosymlinks" flag. */
-	xfs_sysctl_val_t rotorstep;	/* inode32 AG rotoring control knob */
-	xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
-	xfs_sysctl_val_t fstrm_timer;	/* Filestream dir-AG assoc'n timeout. */
-} xfs_param_t;
-
-/*
- * xfs_error_level:
- *
- * How much error reporting will be done when internal problems are
- * encountered.  These problems normally return an EFSCORRUPTED to their
- * caller, with no other information reported.
- *
- * 0	No error reports
- * 1	Report EFSCORRUPTED errors that will cause a filesystem shutdown
- * 5	Report all EFSCORRUPTED errors (all of the above errors, plus any
- *	additional errors that are known to not cause shutdowns)
- *
- * xfs_panic_mask bit 0x8 turns the error reports into panics
- */
-
-enum {
-	/* XFS_REFCACHE_SIZE = 1 */
-	/* XFS_REFCACHE_PURGE = 2 */
-	/* XFS_RESTRICT_CHOWN = 3 */
-	XFS_SGID_INHERIT = 4,
-	XFS_SYMLINK_MODE = 5,
-	XFS_PANIC_MASK = 6,
-	XFS_ERRLEVEL = 7,
-	XFS_SYNCD_TIMER = 8,
-	/* XFS_PROBE_DMAPI = 9 */
-	/* XFS_PROBE_IOOPS = 10 */
-	/* XFS_PROBE_QUOTA = 11 */
-	XFS_STATS_CLEAR = 12,
-	XFS_INHERIT_SYNC = 13,
-	XFS_INHERIT_NODUMP = 14,
-	XFS_INHERIT_NOATIME = 15,
-	XFS_BUF_TIMER = 16,
-	XFS_BUF_AGE = 17,
-	/* XFS_IO_BYPASS = 18 */
-	XFS_INHERIT_NOSYM = 19,
-	XFS_ROTORSTEP = 20,
-	XFS_INHERIT_NODFRG = 21,
-	XFS_FILESTREAM_TIMER = 22,
-};
-
-extern xfs_param_t	xfs_params;
-
-#ifdef CONFIG_SYSCTL
-extern int xfs_sysctl_register(void);
-extern void xfs_sysctl_unregister(void);
-#else
-# define xfs_sysctl_register()		(0)
-# define xfs_sysctl_unregister()	do { } while (0)
-#endif /* CONFIG_SYSCTL */
-
-#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
deleted file mode 100644
index 9010ce8..0000000
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_mount.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_log_priv.h"
-#include "xfs_buf_item.h"
-#include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
-#include "xfs_dquot_item.h"
-#include "xfs_dquot.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-
-/*
- * We include this last to have the helpers above available for the trace
- * event implementations.
- */
-#define CREATE_TRACE_POINTS
-#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
deleted file mode 100644
index 690fc7a..0000000
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ /dev/null
@@ -1,1746 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM xfs
-
-#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_XFS_H
-
-#include <linux/tracepoint.h>
-
-struct xfs_agf;
-struct xfs_alloc_arg;
-struct xfs_attr_list_context;
-struct xfs_buf_log_item;
-struct xfs_da_args;
-struct xfs_da_node_entry;
-struct xfs_dquot;
-struct xlog_ticket;
-struct log;
-struct xlog_recover;
-struct xlog_recover_item;
-struct xfs_buf_log_format;
-struct xfs_inode_log_format;
-
-DECLARE_EVENT_CLASS(xfs_attr_list_class,
-	TP_PROTO(struct xfs_attr_list_context *ctx),
-	TP_ARGS(ctx),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(u32, hashval)
-		__field(u32, blkno)
-		__field(u32, offset)
-		__field(void *, alist)
-		__field(int, bufsize)
-		__field(int, count)
-		__field(int, firstu)
-		__field(int, dupcnt)
-		__field(int, flags)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
-		__entry->ino = ctx->dp->i_ino;
-		__entry->hashval = ctx->cursor->hashval;
-		__entry->blkno = ctx->cursor->blkno;
-		__entry->offset = ctx->cursor->offset;
-		__entry->alist = ctx->alist;
-		__entry->bufsize = ctx->bufsize;
-		__entry->count = ctx->count;
-		__entry->firstu = ctx->firstu;
-		__entry->flags = ctx->flags;
-	),
-	TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-		  "alist 0x%p size %u count %u firstu %u flags %d %s",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		   __entry->ino,
-		   __entry->hashval,
-		   __entry->blkno,
-		   __entry->offset,
-		   __entry->dupcnt,
-		   __entry->alist,
-		   __entry->bufsize,
-		   __entry->count,
-		   __entry->firstu,
-		   __entry->flags,
-		   __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
-	)
-)
-
-#define DEFINE_ATTR_LIST_EVENT(name) \
-DEFINE_EVENT(xfs_attr_list_class, name, \
-	TP_PROTO(struct xfs_attr_list_context *ctx), \
-	TP_ARGS(ctx))
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
-
-DECLARE_EVENT_CLASS(xfs_perag_class,
-	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
-		 unsigned long caller_ip),
-	TP_ARGS(mp, agno, refcount, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_agnumber_t, agno)
-		__field(int, refcount)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->dev = mp->m_super->s_dev;
-		__entry->agno = agno;
-		__entry->refcount = refcount;
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d agno %u refcount %d caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->agno,
-		  __entry->refcount,
-		  (char *)__entry->caller_ip)
-);
-
-#define DEFINE_PERAG_REF_EVENT(name)	\
-DEFINE_EVENT(xfs_perag_class, name,	\
-	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,	\
-		 unsigned long caller_ip),					\
-	TP_ARGS(mp, agno, refcount, caller_ip))
-DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
-DEFINE_PERAG_REF_EVENT(xfs_perag_put);
-DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
-DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
-
-TRACE_EVENT(xfs_attr_list_node_descend,
-	TP_PROTO(struct xfs_attr_list_context *ctx,
-		 struct xfs_da_node_entry *btree),
-	TP_ARGS(ctx, btree),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(u32, hashval)
-		__field(u32, blkno)
-		__field(u32, offset)
-		__field(void *, alist)
-		__field(int, bufsize)
-		__field(int, count)
-		__field(int, firstu)
-		__field(int, dupcnt)
-		__field(int, flags)
-		__field(u32, bt_hashval)
-		__field(u32, bt_before)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
-		__entry->ino = ctx->dp->i_ino;
-		__entry->hashval = ctx->cursor->hashval;
-		__entry->blkno = ctx->cursor->blkno;
-		__entry->offset = ctx->cursor->offset;
-		__entry->alist = ctx->alist;
-		__entry->bufsize = ctx->bufsize;
-		__entry->count = ctx->count;
-		__entry->firstu = ctx->firstu;
-		__entry->flags = ctx->flags;
-		__entry->bt_hashval = be32_to_cpu(btree->hashval);
-		__entry->bt_before = be32_to_cpu(btree->before);
-	),
-	TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-		  "alist 0x%p size %u count %u firstu %u flags %d %s "
-		  "node hashval %u, node before %u",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		   __entry->ino,
-		   __entry->hashval,
-		   __entry->blkno,
-		   __entry->offset,
-		   __entry->dupcnt,
-		   __entry->alist,
-		   __entry->bufsize,
-		   __entry->count,
-		   __entry->firstu,
-		   __entry->flags,
-		   __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
-		   __entry->bt_hashval,
-		   __entry->bt_before)
-);
-
-TRACE_EVENT(xfs_iext_insert,
-	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
-		 struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
-	TP_ARGS(ip, idx, r, state, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(xfs_extnum_t, idx)
-		__field(xfs_fileoff_t, startoff)
-		__field(xfs_fsblock_t, startblock)
-		__field(xfs_filblks_t, blockcount)
-		__field(xfs_exntst_t, state)
-		__field(int, bmap_state)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->idx = idx;
-		__entry->startoff = r->br_startoff;
-		__entry->startblock = r->br_startblock;
-		__entry->blockcount = r->br_blockcount;
-		__entry->state = r->br_state;
-		__entry->bmap_state = state;
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-		  "offset %lld block %lld count %lld flag %d caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
-		  (long)__entry->idx,
-		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
-		  __entry->blockcount,
-		  __entry->state,
-		  (char *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_bmap_class,
-	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
-		 unsigned long caller_ip),
-	TP_ARGS(ip, idx, state, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(xfs_extnum_t, idx)
-		__field(xfs_fileoff_t, startoff)
-		__field(xfs_fsblock_t, startblock)
-		__field(xfs_filblks_t, blockcount)
-		__field(xfs_exntst_t, state)
-		__field(int, bmap_state)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		struct xfs_ifork	*ifp = (state & BMAP_ATTRFORK) ?
-						ip->i_afp : &ip->i_df;
-		struct xfs_bmbt_irec	r;
-
-		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->idx = idx;
-		__entry->startoff = r.br_startoff;
-		__entry->startblock = r.br_startblock;
-		__entry->blockcount = r.br_blockcount;
-		__entry->state = r.br_state;
-		__entry->bmap_state = state;
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-		  "offset %lld block %lld count %lld flag %d caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
-		  (long)__entry->idx,
-		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
-		  __entry->blockcount,
-		  __entry->state,
-		  (char *)__entry->caller_ip)
-)
-
-#define DEFINE_BMAP_EVENT(name) \
-DEFINE_EVENT(xfs_bmap_class, name, \
-	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
-		 unsigned long caller_ip), \
-	TP_ARGS(ip, idx, state, caller_ip))
-DEFINE_BMAP_EVENT(xfs_iext_remove);
-DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
-DEFINE_BMAP_EVENT(xfs_bmap_post_update);
-DEFINE_BMAP_EVENT(xfs_extlist);
-
-DECLARE_EVENT_CLASS(xfs_buf_class,
-	TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
-	TP_ARGS(bp, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_daddr_t, bno)
-		__field(size_t, buffer_length)
-		__field(int, hold)
-		__field(int, pincount)
-		__field(unsigned, lockval)
-		__field(unsigned, flags)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->dev = bp->b_target->bt_dev;
-		__entry->bno = bp->b_bn;
-		__entry->buffer_length = bp->b_buffer_length;
-		__entry->hold = atomic_read(&bp->b_hold);
-		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
-		__entry->flags = bp->b_flags;
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-		  "lock %d flags %s caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long long)__entry->bno,
-		  __entry->buffer_length,
-		  __entry->hold,
-		  __entry->pincount,
-		  __entry->lockval,
-		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-		  (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_EVENT(name) \
-DEFINE_EVENT(xfs_buf_class, name, \
-	TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
-	TP_ARGS(bp, caller_ip))
-DEFINE_BUF_EVENT(xfs_buf_init);
-DEFINE_BUF_EVENT(xfs_buf_free);
-DEFINE_BUF_EVENT(xfs_buf_hold);
-DEFINE_BUF_EVENT(xfs_buf_rele);
-DEFINE_BUF_EVENT(xfs_buf_iodone);
-DEFINE_BUF_EVENT(xfs_buf_iorequest);
-DEFINE_BUF_EVENT(xfs_buf_bawrite);
-DEFINE_BUF_EVENT(xfs_buf_bdwrite);
-DEFINE_BUF_EVENT(xfs_buf_lock);
-DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_trylock);
-DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_iowait);
-DEFINE_BUF_EVENT(xfs_buf_iowait_done);
-DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_uncached);
-DEFINE_BUF_EVENT(xfs_bdstrat_shut);
-DEFINE_BUF_EVENT(xfs_buf_item_relse);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
-DEFINE_BUF_EVENT(xfs_buf_error_relse);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
-
-/* not really buffer traces, but the buf provides useful information */
-DEFINE_BUF_EVENT(xfs_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_reset_dqcounts);
-DEFINE_BUF_EVENT(xfs_inode_item_push);
-
-/* pass flags explicitly */
-DECLARE_EVENT_CLASS(xfs_buf_flags_class,
-	TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
-	TP_ARGS(bp, flags, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_daddr_t, bno)
-		__field(size_t, buffer_length)
-		__field(int, hold)
-		__field(int, pincount)
-		__field(unsigned, lockval)
-		__field(unsigned, flags)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->dev = bp->b_target->bt_dev;
-		__entry->bno = bp->b_bn;
-		__entry->buffer_length = bp->b_buffer_length;
-		__entry->flags = flags;
-		__entry->hold = atomic_read(&bp->b_hold);
-		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-		  "lock %d flags %s caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long long)__entry->bno,
-		  __entry->buffer_length,
-		  __entry->hold,
-		  __entry->pincount,
-		  __entry->lockval,
-		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-		  (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_FLAGS_EVENT(name) \
-DEFINE_EVENT(xfs_buf_flags_class, name, \
-	TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
-	TP_ARGS(bp, flags, caller_ip))
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
-
-TRACE_EVENT(xfs_buf_ioerror,
-	TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
-	TP_ARGS(bp, error, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_daddr_t, bno)
-		__field(size_t, buffer_length)
-		__field(unsigned, flags)
-		__field(int, hold)
-		__field(int, pincount)
-		__field(unsigned, lockval)
-		__field(int, error)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->dev = bp->b_target->bt_dev;
-		__entry->bno = bp->b_bn;
-		__entry->buffer_length = bp->b_buffer_length;
-		__entry->hold = atomic_read(&bp->b_hold);
-		__entry->pincount = atomic_read(&bp->b_pin_count);
-		__entry->lockval = bp->b_sema.count;
-		__entry->error = error;
-		__entry->flags = bp->b_flags;
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-		  "lock %d error %d flags %s caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long long)__entry->bno,
-		  __entry->buffer_length,
-		  __entry->hold,
-		  __entry->pincount,
-		  __entry->lockval,
-		  __entry->error,
-		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-		  (void *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_buf_item_class,
-	TP_PROTO(struct xfs_buf_log_item *bip),
-	TP_ARGS(bip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_daddr_t, buf_bno)
-		__field(size_t, buf_len)
-		__field(int, buf_hold)
-		__field(int, buf_pincount)
-		__field(int, buf_lockval)
-		__field(unsigned, buf_flags)
-		__field(unsigned, bli_recur)
-		__field(int, bli_refcount)
-		__field(unsigned, bli_flags)
-		__field(void *, li_desc)
-		__field(unsigned, li_flags)
-	),
-	TP_fast_assign(
-		__entry->dev = bip->bli_buf->b_target->bt_dev;
-		__entry->bli_flags = bip->bli_flags;
-		__entry->bli_recur = bip->bli_recur;
-		__entry->bli_refcount = atomic_read(&bip->bli_refcount);
-		__entry->buf_bno = bip->bli_buf->b_bn;
-		__entry->buf_len = bip->bli_buf->b_buffer_length;
-		__entry->buf_flags = bip->bli_buf->b_flags;
-		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
-		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
-		__entry->buf_lockval = bip->bli_buf->b_sema.count;
-		__entry->li_desc = bip->bli_item.li_desc;
-		__entry->li_flags = bip->bli_item.li_flags;
-	),
-	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-		  "lock %d flags %s recur %d refcount %d bliflags %s "
-		  "lidesc 0x%p liflags %s",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long long)__entry->buf_bno,
-		  __entry->buf_len,
-		  __entry->buf_hold,
-		  __entry->buf_pincount,
-		  __entry->buf_lockval,
-		  __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
-		  __entry->bli_recur,
-		  __entry->bli_refcount,
-		  __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
-		  __entry->li_desc,
-		  __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
-)
-
-#define DEFINE_BUF_ITEM_EVENT(name) \
-DEFINE_EVENT(xfs_buf_item_class, name, \
-	TP_PROTO(struct xfs_buf_log_item *bip), \
-	TP_ARGS(bip))
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
-
-DECLARE_EVENT_CLASS(xfs_lock_class,
-	TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
-		 unsigned long caller_ip),
-	TP_ARGS(ip,  lock_flags, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(int, lock_flags)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->lock_flags = lock_flags;
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
-		  (void *)__entry->caller_ip)
-)
-
-#define DEFINE_LOCK_EVENT(name) \
-DEFINE_EVENT(xfs_lock_class, name, \
-	TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
-		 unsigned long caller_ip), \
-	TP_ARGS(ip,  lock_flags, caller_ip))
-DEFINE_LOCK_EVENT(xfs_ilock);
-DEFINE_LOCK_EVENT(xfs_ilock_nowait);
-DEFINE_LOCK_EVENT(xfs_ilock_demote);
-DEFINE_LOCK_EVENT(xfs_iunlock);
-
-DECLARE_EVENT_CLASS(xfs_inode_class,
-	TP_PROTO(struct xfs_inode *ip),
-	TP_ARGS(ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-	),
-	TP_printk("dev %d:%d ino 0x%llx",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino)
-)
-
-#define DEFINE_INODE_EVENT(name) \
-DEFINE_EVENT(xfs_inode_class, name, \
-	TP_PROTO(struct xfs_inode *ip), \
-	TP_ARGS(ip))
-DEFINE_INODE_EVENT(xfs_iget_skip);
-DEFINE_INODE_EVENT(xfs_iget_reclaim);
-DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
-DEFINE_INODE_EVENT(xfs_iget_hit);
-DEFINE_INODE_EVENT(xfs_iget_miss);
-
-DEFINE_INODE_EVENT(xfs_getattr);
-DEFINE_INODE_EVENT(xfs_setattr);
-DEFINE_INODE_EVENT(xfs_readlink);
-DEFINE_INODE_EVENT(xfs_alloc_file_space);
-DEFINE_INODE_EVENT(xfs_free_file_space);
-DEFINE_INODE_EVENT(xfs_readdir);
-#ifdef CONFIG_XFS_POSIX_ACL
-DEFINE_INODE_EVENT(xfs_get_acl);
-#endif
-DEFINE_INODE_EVENT(xfs_vm_bmap);
-DEFINE_INODE_EVENT(xfs_file_ioctl);
-DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
-DEFINE_INODE_EVENT(xfs_ioctl_setattr);
-DEFINE_INODE_EVENT(xfs_file_fsync);
-DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_write_inode);
-DEFINE_INODE_EVENT(xfs_evict_inode);
-
-DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
-DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
-
-DECLARE_EVENT_CLASS(xfs_iref_class,
-	TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
-	TP_ARGS(ip, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(int, count)
-		__field(int, pincount)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->count = atomic_read(&VFS_I(ip)->i_count);
-		__entry->pincount = atomic_read(&ip->i_pincount);
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->count,
-		  __entry->pincount,
-		  (char *)__entry->caller_ip)
-)
-
-#define DEFINE_IREF_EVENT(name) \
-DEFINE_EVENT(xfs_iref_class, name, \
-	TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
-	TP_ARGS(ip, caller_ip))
-DEFINE_IREF_EVENT(xfs_ihold);
-DEFINE_IREF_EVENT(xfs_irele);
-DEFINE_IREF_EVENT(xfs_inode_pin);
-DEFINE_IREF_EVENT(xfs_inode_unpin);
-DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
-
-DECLARE_EVENT_CLASS(xfs_namespace_class,
-	TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
-	TP_ARGS(dp, name),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, dp_ino)
-		__dynamic_array(char, name, name->len)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(dp)->i_sb->s_dev;
-		__entry->dp_ino = dp->i_ino;
-		memcpy(__get_str(name), name->name, name->len);
-	),
-	TP_printk("dev %d:%d dp ino 0x%llx name %s",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->dp_ino,
-		  __get_str(name))
-)
-
-#define DEFINE_NAMESPACE_EVENT(name) \
-DEFINE_EVENT(xfs_namespace_class, name, \
-	TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
-	TP_ARGS(dp, name))
-DEFINE_NAMESPACE_EVENT(xfs_remove);
-DEFINE_NAMESPACE_EVENT(xfs_link);
-DEFINE_NAMESPACE_EVENT(xfs_lookup);
-DEFINE_NAMESPACE_EVENT(xfs_create);
-DEFINE_NAMESPACE_EVENT(xfs_symlink);
-
-TRACE_EVENT(xfs_rename,
-	TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
-		 struct xfs_name *src_name, struct xfs_name *target_name),
-	TP_ARGS(src_dp, target_dp, src_name, target_name),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, src_dp_ino)
-		__field(xfs_ino_t, target_dp_ino)
-		__dynamic_array(char, src_name, src_name->len)
-		__dynamic_array(char, target_name, target_name->len)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(src_dp)->i_sb->s_dev;
-		__entry->src_dp_ino = src_dp->i_ino;
-		__entry->target_dp_ino = target_dp->i_ino;
-		memcpy(__get_str(src_name), src_name->name, src_name->len);
-		memcpy(__get_str(target_name), target_name->name, target_name->len);
-	),
-	TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
-		  " src name %s target name %s",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->src_dp_ino,
-		  __entry->target_dp_ino,
-		  __get_str(src_name),
-		  __get_str(target_name))
-)
-
-DECLARE_EVENT_CLASS(xfs_dquot_class,
-	TP_PROTO(struct xfs_dquot *dqp),
-	TP_ARGS(dqp),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(u32, id)
-		__field(unsigned, flags)
-		__field(unsigned, nrefs)
-		__field(unsigned long long, res_bcount)
-		__field(unsigned long long, bcount)
-		__field(unsigned long long, icount)
-		__field(unsigned long long, blk_hardlimit)
-		__field(unsigned long long, blk_softlimit)
-		__field(unsigned long long, ino_hardlimit)
-		__field(unsigned long long, ino_softlimit)
-	), \
-	TP_fast_assign(
-		__entry->dev = dqp->q_mount->m_super->s_dev;
-		__entry->id = be32_to_cpu(dqp->q_core.d_id);
-		__entry->flags = dqp->dq_flags;
-		__entry->nrefs = dqp->q_nrefs;
-		__entry->res_bcount = dqp->q_res_bcount;
-		__entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
-		__entry->icount = be64_to_cpu(dqp->q_core.d_icount);
-		__entry->blk_hardlimit =
-			be64_to_cpu(dqp->q_core.d_blk_hardlimit);
-		__entry->blk_softlimit =
-			be64_to_cpu(dqp->q_core.d_blk_softlimit);
-		__entry->ino_hardlimit =
-			be64_to_cpu(dqp->q_core.d_ino_hardlimit);
-		__entry->ino_softlimit =
-			be64_to_cpu(dqp->q_core.d_ino_softlimit);
-	),
-	TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
-		  "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
-		  "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->id,
-		  __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
-		  __entry->nrefs,
-		  __entry->res_bcount,
-		  __entry->bcount,
-		  __entry->blk_hardlimit,
-		  __entry->blk_softlimit,
-		  __entry->icount,
-		  __entry->ino_hardlimit,
-		  __entry->ino_softlimit)
-)
-
-#define DEFINE_DQUOT_EVENT(name) \
-DEFINE_EVENT(xfs_dquot_class, name, \
-	TP_PROTO(struct xfs_dquot *dqp), \
-	TP_ARGS(dqp))
-DEFINE_DQUOT_EVENT(xfs_dqadjust);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
-DEFINE_DQUOT_EVENT(xfs_dqattach_found);
-DEFINE_DQUOT_EVENT(xfs_dqattach_get);
-DEFINE_DQUOT_EVENT(xfs_dqinit);
-DEFINE_DQUOT_EVENT(xfs_dqreuse);
-DEFINE_DQUOT_EVENT(xfs_dqalloc);
-DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
-DEFINE_DQUOT_EVENT(xfs_dqread);
-DEFINE_DQUOT_EVENT(xfs_dqread_fail);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
-DEFINE_DQUOT_EVENT(xfs_dqget_hit);
-DEFINE_DQUOT_EVENT(xfs_dqget_miss);
-DEFINE_DQUOT_EVENT(xfs_dqput);
-DEFINE_DQUOT_EVENT(xfs_dqput_wait);
-DEFINE_DQUOT_EVENT(xfs_dqput_free);
-DEFINE_DQUOT_EVENT(xfs_dqrele);
-DEFINE_DQUOT_EVENT(xfs_dqflush);
-DEFINE_DQUOT_EVENT(xfs_dqflush_force);
-DEFINE_DQUOT_EVENT(xfs_dqflush_done);
-
-DECLARE_EVENT_CLASS(xfs_loggrant_class,
-	TP_PROTO(struct log *log, struct xlog_ticket *tic),
-	TP_ARGS(log, tic),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(unsigned, trans_type)
-		__field(char, ocnt)
-		__field(char, cnt)
-		__field(int, curr_res)
-		__field(int, unit_res)
-		__field(unsigned int, flags)
-		__field(int, reserveq)
-		__field(int, writeq)
-		__field(int, grant_reserve_cycle)
-		__field(int, grant_reserve_bytes)
-		__field(int, grant_write_cycle)
-		__field(int, grant_write_bytes)
-		__field(int, curr_cycle)
-		__field(int, curr_block)
-		__field(xfs_lsn_t, tail_lsn)
-	),
-	TP_fast_assign(
-		__entry->dev = log->l_mp->m_super->s_dev;
-		__entry->trans_type = tic->t_trans_type;
-		__entry->ocnt = tic->t_ocnt;
-		__entry->cnt = tic->t_cnt;
-		__entry->curr_res = tic->t_curr_res;
-		__entry->unit_res = tic->t_unit_res;
-		__entry->flags = tic->t_flags;
-		__entry->reserveq = list_empty(&log->l_reserveq);
-		__entry->writeq = list_empty(&log->l_writeq);
-		xlog_crack_grant_head(&log->l_grant_reserve_head,
-				&__entry->grant_reserve_cycle,
-				&__entry->grant_reserve_bytes);
-		xlog_crack_grant_head(&log->l_grant_write_head,
-				&__entry->grant_write_cycle,
-				&__entry->grant_write_bytes);
-		__entry->curr_cycle = log->l_curr_cycle;
-		__entry->curr_block = log->l_curr_block;
-		__entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
-	),
-	TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
-		  "t_unit_res %u t_flags %s reserveq %s "
-		  "writeq %s grant_reserve_cycle %d "
-		  "grant_reserve_bytes %d grant_write_cycle %d "
-		  "grant_write_bytes %d curr_cycle %d curr_block %d "
-		  "tail_cycle %d tail_block %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
-		  __entry->ocnt,
-		  __entry->cnt,
-		  __entry->curr_res,
-		  __entry->unit_res,
-		  __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
-		  __entry->reserveq ? "empty" : "active",
-		  __entry->writeq ? "empty" : "active",
-		  __entry->grant_reserve_cycle,
-		  __entry->grant_reserve_bytes,
-		  __entry->grant_write_cycle,
-		  __entry->grant_write_bytes,
-		  __entry->curr_cycle,
-		  __entry->curr_block,
-		  CYCLE_LSN(__entry->tail_lsn),
-		  BLOCK_LSN(__entry->tail_lsn)
-	)
-)
-
-#define DEFINE_LOGGRANT_EVENT(name) \
-DEFINE_EVENT(xfs_loggrant_class, name, \
-	TP_PROTO(struct log *log, struct xlog_ticket *tic), \
-	TP_ARGS(log, tic))
-DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
-DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
-DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
-DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
-
-DECLARE_EVENT_CLASS(xfs_file_class,
-	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
-	TP_ARGS(ip, count, offset, flags),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(xfs_fsize_t, size)
-		__field(xfs_fsize_t, new_size)
-		__field(loff_t, offset)
-		__field(size_t, count)
-		__field(int, flags)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->size = ip->i_d.di_size;
-		__entry->new_size = ip->i_new_size;
-		__entry->offset = offset;
-		__entry->count = count;
-		__entry->flags = flags;
-	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
-		  "offset 0x%llx count 0x%zx ioflags %s",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->size,
-		  __entry->new_size,
-		  __entry->offset,
-		  __entry->count,
-		  __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
-)
-
-#define DEFINE_RW_EVENT(name)		\
-DEFINE_EVENT(xfs_file_class, name,	\
-	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),	\
-	TP_ARGS(ip, count, offset, flags))
-DEFINE_RW_EVENT(xfs_file_read);
-DEFINE_RW_EVENT(xfs_file_buffered_write);
-DEFINE_RW_EVENT(xfs_file_direct_write);
-DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
-
-DECLARE_EVENT_CLASS(xfs_page_class,
-	TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
-	TP_ARGS(inode, page, off),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(pgoff_t, pgoff)
-		__field(loff_t, size)
-		__field(unsigned long, offset)
-		__field(int, delalloc)
-		__field(int, unwritten)
-	),
-	TP_fast_assign(
-		int delalloc = -1, unwritten = -1;
-
-		if (page_has_buffers(page))
-			xfs_count_page_state(page, &delalloc, &unwritten);
-		__entry->dev = inode->i_sb->s_dev;
-		__entry->ino = XFS_I(inode)->i_ino;
-		__entry->pgoff = page_offset(page);
-		__entry->size = i_size_read(inode);
-		__entry->offset = off;
-		__entry->delalloc = delalloc;
-		__entry->unwritten = unwritten;
-	),
-	TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
-		  "delalloc %d unwritten %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->pgoff,
-		  __entry->size,
-		  __entry->offset,
-		  __entry->delalloc,
-		  __entry->unwritten)
-)
-
-#define DEFINE_PAGE_EVENT(name)		\
-DEFINE_EVENT(xfs_page_class, name,	\
-	TP_PROTO(struct inode *inode, struct page *page, unsigned long off),	\
-	TP_ARGS(inode, page, off))
-DEFINE_PAGE_EVENT(xfs_writepage);
-DEFINE_PAGE_EVENT(xfs_releasepage);
-DEFINE_PAGE_EVENT(xfs_invalidatepage);
-
-DECLARE_EVENT_CLASS(xfs_imap_class,
-	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
-		 int type, struct xfs_bmbt_irec *irec),
-	TP_ARGS(ip, offset, count, type, irec),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(loff_t, size)
-		__field(loff_t, new_size)
-		__field(loff_t, offset)
-		__field(size_t, count)
-		__field(int, type)
-		__field(xfs_fileoff_t, startoff)
-		__field(xfs_fsblock_t, startblock)
-		__field(xfs_filblks_t, blockcount)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->size = ip->i_d.di_size;
-		__entry->new_size = ip->i_new_size;
-		__entry->offset = offset;
-		__entry->count = count;
-		__entry->type = type;
-		__entry->startoff = irec ? irec->br_startoff : 0;
-		__entry->startblock = irec ? irec->br_startblock : 0;
-		__entry->blockcount = irec ? irec->br_blockcount : 0;
-	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
-		  "offset 0x%llx count %zd type %s "
-		  "startoff 0x%llx startblock %lld blockcount 0x%llx",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->size,
-		  __entry->new_size,
-		  __entry->offset,
-		  __entry->count,
-		  __print_symbolic(__entry->type, XFS_IO_TYPES),
-		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
-		  __entry->blockcount)
-)
-
-#define DEFINE_IOMAP_EVENT(name)	\
-DEFINE_EVENT(xfs_imap_class, name,	\
-	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,	\
-		 int type, struct xfs_bmbt_irec *irec),		\
-	TP_ARGS(ip, offset, count, type, irec))
-DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-
-DECLARE_EVENT_CLASS(xfs_simple_io_class,
-	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
-	TP_ARGS(ip, offset, count),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(loff_t, isize)
-		__field(loff_t, disize)
-		__field(loff_t, new_size)
-		__field(loff_t, offset)
-		__field(size_t, count)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->isize = ip->i_size;
-		__entry->disize = ip->i_d.di_size;
-		__entry->new_size = ip->i_new_size;
-		__entry->offset = offset;
-		__entry->count = count;
-	),
-	TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
-		  "offset 0x%llx count %zd",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->isize,
-		  __entry->disize,
-		  __entry->new_size,
-		  __entry->offset,
-		  __entry->count)
-);
-
-#define DEFINE_SIMPLE_IO_EVENT(name)	\
-DEFINE_EVENT(xfs_simple_io_class, name,	\
-	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),	\
-	TP_ARGS(ip, offset, count))
-DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
-DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
-DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
-DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
-
-DECLARE_EVENT_CLASS(xfs_itrunc_class,
-	TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
-	TP_ARGS(ip, new_size),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(xfs_fsize_t, size)
-		__field(xfs_fsize_t, new_size)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->size = ip->i_d.di_size;
-		__entry->new_size = new_size;
-	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->size,
-		  __entry->new_size)
-)
-
-#define DEFINE_ITRUNC_EVENT(name) \
-DEFINE_EVENT(xfs_itrunc_class, name, \
-	TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
-	TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
-
-TRACE_EVENT(xfs_pagecache_inval,
-	TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
-	TP_ARGS(ip, start, finish),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(xfs_fsize_t, size)
-		__field(xfs_off_t, start)
-		__field(xfs_off_t, finish)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->size = ip->i_d.di_size;
-		__entry->start = start;
-		__entry->finish = finish;
-	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->size,
-		  __entry->start,
-		  __entry->finish)
-);
-
-TRACE_EVENT(xfs_bunmap,
-	TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
-		 int flags, unsigned long caller_ip),
-	TP_ARGS(ip, bno, len, flags, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(xfs_fsize_t, size)
-		__field(xfs_fileoff_t, bno)
-		__field(xfs_filblks_t, len)
-		__field(unsigned long, caller_ip)
-		__field(int, flags)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
-		__entry->size = ip->i_d.di_size;
-		__entry->bno = bno;
-		__entry->len = len;
-		__entry->caller_ip = caller_ip;
-		__entry->flags = flags;
-	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-		  "flags %s caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->size,
-		  __entry->bno,
-		  __entry->len,
-		  __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
-		  (void *)__entry->caller_ip)
-
-);
-
-DECLARE_EVENT_CLASS(xfs_busy_class,
-	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-		 xfs_agblock_t agbno, xfs_extlen_t len),
-	TP_ARGS(mp, agno, agbno, len),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_agnumber_t, agno)
-		__field(xfs_agblock_t, agbno)
-		__field(xfs_extlen_t, len)
-	),
-	TP_fast_assign(
-		__entry->dev = mp->m_super->s_dev;
-		__entry->agno = agno;
-		__entry->agbno = agbno;
-		__entry->len = len;
-	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->agno,
-		  __entry->agbno,
-		  __entry->len)
-);
-#define DEFINE_BUSY_EVENT(name) \
-DEFINE_EVENT(xfs_busy_class, name, \
-	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
-		 xfs_agblock_t agbno, xfs_extlen_t len), \
-	TP_ARGS(mp, agno, agbno, len))
-DEFINE_BUSY_EVENT(xfs_alloc_busy);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
-
-TRACE_EVENT(xfs_alloc_busy_trim,
-	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-		 xfs_agblock_t agbno, xfs_extlen_t len,
-		 xfs_agblock_t tbno, xfs_extlen_t tlen),
-	TP_ARGS(mp, agno, agbno, len, tbno, tlen),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_agnumber_t, agno)
-		__field(xfs_agblock_t, agbno)
-		__field(xfs_extlen_t, len)
-		__field(xfs_agblock_t, tbno)
-		__field(xfs_extlen_t, tlen)
-	),
-	TP_fast_assign(
-		__entry->dev = mp->m_super->s_dev;
-		__entry->agno = agno;
-		__entry->agbno = agbno;
-		__entry->len = len;
-		__entry->tbno = tbno;
-		__entry->tlen = tlen;
-	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->agno,
-		  __entry->agbno,
-		  __entry->len,
-		  __entry->tbno,
-		  __entry->tlen)
-);
-
-TRACE_EVENT(xfs_trans_commit_lsn,
-	TP_PROTO(struct xfs_trans *trans),
-	TP_ARGS(trans),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(struct xfs_trans *, tp)
-		__field(xfs_lsn_t, lsn)
-	),
-	TP_fast_assign(
-		__entry->dev = trans->t_mountp->m_super->s_dev;
-		__entry->tp = trans;
-		__entry->lsn = trans->t_commit_lsn;
-	),
-	TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->tp,
-		  __entry->lsn)
-);
-
-TRACE_EVENT(xfs_agf,
-	TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
-		 unsigned long caller_ip),
-	TP_ARGS(mp, agf, flags, caller_ip),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_agnumber_t, agno)
-		__field(int, flags)
-		__field(__u32, length)
-		__field(__u32, bno_root)
-		__field(__u32, cnt_root)
-		__field(__u32, bno_level)
-		__field(__u32, cnt_level)
-		__field(__u32, flfirst)
-		__field(__u32, fllast)
-		__field(__u32, flcount)
-		__field(__u32, freeblks)
-		__field(__u32, longest)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->dev = mp->m_super->s_dev;
-		__entry->agno = be32_to_cpu(agf->agf_seqno),
-		__entry->flags = flags;
-		__entry->length = be32_to_cpu(agf->agf_length),
-		__entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
-		__entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
-		__entry->bno_level =
-				be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
-		__entry->cnt_level =
-				be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
-		__entry->flfirst = be32_to_cpu(agf->agf_flfirst),
-		__entry->fllast = be32_to_cpu(agf->agf_fllast),
-		__entry->flcount = be32_to_cpu(agf->agf_flcount),
-		__entry->freeblks = be32_to_cpu(agf->agf_freeblks),
-		__entry->longest = be32_to_cpu(agf->agf_longest);
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
-		  "levels b %u c %u flfirst %u fllast %u flcount %u "
-		  "freeblks %u longest %u caller %pf",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->agno,
-		  __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
-		  __entry->length,
-		  __entry->bno_root,
-		  __entry->cnt_root,
-		  __entry->bno_level,
-		  __entry->cnt_level,
-		  __entry->flfirst,
-		  __entry->fllast,
-		  __entry->flcount,
-		  __entry->freeblks,
-		  __entry->longest,
-		  (void *)__entry->caller_ip)
-);
-
-TRACE_EVENT(xfs_free_extent,
-	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
-		 xfs_extlen_t len, bool isfl, int haveleft, int haveright),
-	TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_agnumber_t, agno)
-		__field(xfs_agblock_t, agbno)
-		__field(xfs_extlen_t, len)
-		__field(int, isfl)
-		__field(int, haveleft)
-		__field(int, haveright)
-	),
-	TP_fast_assign(
-		__entry->dev = mp->m_super->s_dev;
-		__entry->agno = agno;
-		__entry->agbno = agbno;
-		__entry->len = len;
-		__entry->isfl = isfl;
-		__entry->haveleft = haveleft;
-		__entry->haveright = haveright;
-	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->agno,
-		  __entry->agbno,
-		  __entry->len,
-		  __entry->isfl,
-		  __entry->haveleft ?
-			(__entry->haveright ? "both" : "left") :
-			(__entry->haveright ? "right" : "none"))
-
-);
-
-DECLARE_EVENT_CLASS(xfs_alloc_class,
-	TP_PROTO(struct xfs_alloc_arg *args),
-	TP_ARGS(args),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_agnumber_t, agno)
-		__field(xfs_agblock_t, agbno)
-		__field(xfs_extlen_t, minlen)
-		__field(xfs_extlen_t, maxlen)
-		__field(xfs_extlen_t, mod)
-		__field(xfs_extlen_t, prod)
-		__field(xfs_extlen_t, minleft)
-		__field(xfs_extlen_t, total)
-		__field(xfs_extlen_t, alignment)
-		__field(xfs_extlen_t, minalignslop)
-		__field(xfs_extlen_t, len)
-		__field(short, type)
-		__field(short, otype)
-		__field(char, wasdel)
-		__field(char, wasfromfl)
-		__field(char, isfl)
-		__field(char, userdata)
-		__field(xfs_fsblock_t, firstblock)
-	),
-	TP_fast_assign(
-		__entry->dev = args->mp->m_super->s_dev;
-		__entry->agno = args->agno;
-		__entry->agbno = args->agbno;
-		__entry->minlen = args->minlen;
-		__entry->maxlen = args->maxlen;
-		__entry->mod = args->mod;
-		__entry->prod = args->prod;
-		__entry->minleft = args->minleft;
-		__entry->total = args->total;
-		__entry->alignment = args->alignment;
-		__entry->minalignslop = args->minalignslop;
-		__entry->len = args->len;
-		__entry->type = args->type;
-		__entry->otype = args->otype;
-		__entry->wasdel = args->wasdel;
-		__entry->wasfromfl = args->wasfromfl;
-		__entry->isfl = args->isfl;
-		__entry->userdata = args->userdata;
-		__entry->firstblock = args->firstblock;
-	),
-	TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
-		  "prod %u minleft %u total %u alignment %u minalignslop %u "
-		  "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
-		  "userdata %d firstblock 0x%llx",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->agno,
-		  __entry->agbno,
-		  __entry->minlen,
-		  __entry->maxlen,
-		  __entry->mod,
-		  __entry->prod,
-		  __entry->minleft,
-		  __entry->total,
-		  __entry->alignment,
-		  __entry->minalignslop,
-		  __entry->len,
-		  __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
-		  __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
-		  __entry->wasdel,
-		  __entry->wasfromfl,
-		  __entry->isfl,
-		  __entry->userdata,
-		  (unsigned long long)__entry->firstblock)
-)
-
-#define DEFINE_ALLOC_EVENT(name) \
-DEFINE_EVENT(xfs_alloc_class, name, \
-	TP_PROTO(struct xfs_alloc_arg *args), \
-	TP_ARGS(args))
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
-
-DECLARE_EVENT_CLASS(xfs_dir2_class,
-	TP_PROTO(struct xfs_da_args *args),
-	TP_ARGS(args),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__dynamic_array(char, name, args->namelen)
-		__field(int, namelen)
-		__field(xfs_dahash_t, hashval)
-		__field(xfs_ino_t, inumber)
-		__field(int, op_flags)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-		__entry->ino = args->dp->i_ino;
-		if (args->namelen)
-			memcpy(__get_str(name), args->name, args->namelen);
-		__entry->namelen = args->namelen;
-		__entry->hashval = args->hashval;
-		__entry->inumber = args->inumber;
-		__entry->op_flags = args->op_flags;
-	),
-	TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
-		  "inumber 0x%llx op_flags %s",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->namelen,
-		  __entry->namelen ? __get_str(name) : NULL,
-		  __entry->namelen,
-		  __entry->hashval,
-		  __entry->inumber,
-		  __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
-)
-
-#define DEFINE_DIR2_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_class, name, \
-	TP_PROTO(struct xfs_da_args *args), \
-	TP_ARGS(args))
-DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
-DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
-
-DECLARE_EVENT_CLASS(xfs_dir2_space_class,
-	TP_PROTO(struct xfs_da_args *args, int idx),
-	TP_ARGS(args, idx),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(int, op_flags)
-		__field(int, idx)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-		__entry->ino = args->dp->i_ino;
-		__entry->op_flags = args->op_flags;
-		__entry->idx = idx;
-	),
-	TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
-		  __entry->idx)
-)
-
-#define DEFINE_DIR2_SPACE_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_space_class, name, \
-	TP_PROTO(struct xfs_da_args *args, int idx), \
-	TP_ARGS(args, idx))
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
-
-TRACE_EVENT(xfs_dir2_leafn_moveents,
-	TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
-	TP_ARGS(args, src_idx, dst_idx, count),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(int, op_flags)
-		__field(int, src_idx)
-		__field(int, dst_idx)
-		__field(int, count)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-		__entry->ino = args->dp->i_ino;
-		__entry->op_flags = args->op_flags;
-		__entry->src_idx = src_idx;
-		__entry->dst_idx = dst_idx;
-		__entry->count = count;
-	),
-	TP_printk("dev %d:%d ino 0x%llx op_flags %s "
-		  "src_idx %d dst_idx %d count %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
-		  __entry->src_idx,
-		  __entry->dst_idx,
-		  __entry->count)
-);
-
-#define XFS_SWAPEXT_INODES \
-	{ 0,	"target" }, \
-	{ 1,	"temp" }
-
-#define XFS_INODE_FORMAT_STR \
-	{ 0,	"invalid" }, \
-	{ 1,	"local" }, \
-	{ 2,	"extent" }, \
-	{ 3,	"btree" }
-
-DECLARE_EVENT_CLASS(xfs_swap_extent_class,
-	TP_PROTO(struct xfs_inode *ip, int which),
-	TP_ARGS(ip, which),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(int, which)
-		__field(xfs_ino_t, ino)
-		__field(int, format)
-		__field(int, nex)
-		__field(int, max_nex)
-		__field(int, broot_size)
-		__field(int, fork_off)
-	),
-	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->which = which;
-		__entry->ino = ip->i_ino;
-		__entry->format = ip->i_d.di_format;
-		__entry->nex = ip->i_d.di_nextents;
-		__entry->max_nex = ip->i_df.if_ext_max;
-		__entry->broot_size = ip->i_df.if_broot_bytes;
-		__entry->fork_off = XFS_IFORK_BOFF(ip);
-	),
-	TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
-		  "Max in-fork extents %d, broot size %d, fork offset %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
-		  __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
-		  __entry->nex,
-		  __entry->max_nex,
-		  __entry->broot_size,
-		  __entry->fork_off)
-)
-
-#define DEFINE_SWAPEXT_EVENT(name) \
-DEFINE_EVENT(xfs_swap_extent_class, name, \
-	TP_PROTO(struct xfs_inode *ip, int which), \
-	TP_ARGS(ip, which))
-
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
-	TP_PROTO(struct log *log, struct xlog_recover *trans,
-		struct xlog_recover_item *item, int pass),
-	TP_ARGS(log, trans, item, pass),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(unsigned long, item)
-		__field(xlog_tid_t, tid)
-		__field(int, type)
-		__field(int, pass)
-		__field(int, count)
-		__field(int, total)
-	),
-	TP_fast_assign(
-		__entry->dev = log->l_mp->m_super->s_dev;
-		__entry->item = (unsigned long)item;
-		__entry->tid = trans->r_log_tid;
-		__entry->type = ITEM_TYPE(item);
-		__entry->pass = pass;
-		__entry->count = item->ri_cnt;
-		__entry->total = item->ri_total;
-	),
-	TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
-		  "item region count/total %d/%d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->tid,
-		  __entry->pass,
-		  (void *)__entry->item,
-		  __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
-		  __entry->count,
-		  __entry->total)
-)
-
-#define DEFINE_LOG_RECOVER_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_item_class, name, \
-	TP_PROTO(struct log *log, struct xlog_recover *trans, \
-		struct xlog_recover_item *item, int pass), \
-	TP_ARGS(log, trans, item, pass))
-
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
-	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
-	TP_ARGS(log, buf_f),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(__int64_t, blkno)
-		__field(unsigned short, len)
-		__field(unsigned short, flags)
-		__field(unsigned short, size)
-		__field(unsigned int, map_size)
-	),
-	TP_fast_assign(
-		__entry->dev = log->l_mp->m_super->s_dev;
-		__entry->blkno = buf_f->blf_blkno;
-		__entry->len = buf_f->blf_len;
-		__entry->flags = buf_f->blf_flags;
-		__entry->size = buf_f->blf_size;
-		__entry->map_size = buf_f->blf_map_size;
-	),
-	TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
-			"map_size %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->blkno,
-		  __entry->len,
-		  __entry->flags,
-		  __entry->size,
-		  __entry->map_size)
-)
-
-#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
-	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
-	TP_ARGS(log, buf_f))
-
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
-	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
-	TP_ARGS(log, in_f),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_ino_t, ino)
-		__field(unsigned short, size)
-		__field(int, fields)
-		__field(unsigned short, asize)
-		__field(unsigned short, dsize)
-		__field(__int64_t, blkno)
-		__field(int, len)
-		__field(int, boffset)
-	),
-	TP_fast_assign(
-		__entry->dev = log->l_mp->m_super->s_dev;
-		__entry->ino = in_f->ilf_ino;
-		__entry->size = in_f->ilf_size;
-		__entry->fields = in_f->ilf_fields;
-		__entry->asize = in_f->ilf_asize;
-		__entry->dsize = in_f->ilf_dsize;
-		__entry->blkno = in_f->ilf_blkno;
-		__entry->len = in_f->ilf_len;
-		__entry->boffset = in_f->ilf_boffset;
-	),
-	TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
-			"dsize %d, blkno 0x%llx, len %d, boffset %d",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->ino,
-		  __entry->size,
-		  __entry->fields,
-		  __entry->asize,
-		  __entry->dsize,
-		  __entry->blkno,
-		  __entry->len,
-		  __entry->boffset)
-)
-#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
-	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
-	TP_ARGS(log, in_f))
-
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
-
-DECLARE_EVENT_CLASS(xfs_discard_class,
-	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-		 xfs_agblock_t agbno, xfs_extlen_t len),
-	TP_ARGS(mp, agno, agbno, len),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(xfs_agnumber_t, agno)
-		__field(xfs_agblock_t, agbno)
-		__field(xfs_extlen_t, len)
-	),
-	TP_fast_assign(
-		__entry->dev = mp->m_super->s_dev;
-		__entry->agno = agno;
-		__entry->agbno = agbno;
-		__entry->len = len;
-	),
-	TP_printk("dev %d:%d agno %u agbno %u len %u\n",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->agno,
-		  __entry->agbno,
-		  __entry->len)
-)
-
-#define DEFINE_DISCARD_EVENT(name) \
-DEFINE_EVENT(xfs_discard_class, name, \
-	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
-		 xfs_agblock_t agbno, xfs_extlen_t len), \
-	TP_ARGS(mp, agno, agbno, len))
-DEFINE_DISCARD_EVENT(xfs_discard_extent);
-DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
-DEFINE_DISCARD_EVENT(xfs_discard_exclude);
-DEFINE_DISCARD_EVENT(xfs_discard_busy);
-
-#endif /* _TRACE_XFS_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE xfs_trace
-#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
deleted file mode 100644
index 7c220b4..0000000
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-#include "xfs_fs.h"
-
-struct file;
-struct xfs_inode;
-struct xfs_iomap;
-struct attrlist_cursor_kern;
-
-/*
- * Return values for xfs_inactive.  A return value of
- * VN_INACTIVE_NOCACHE implies that the file system behavior
- * has disassociated its state and bhv_desc_t from the vnode.
- */
-#define	VN_INACTIVE_CACHE	0
-#define	VN_INACTIVE_NOCACHE	1
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISDIRECT	0x00004		/* bypass page cache */
-#define IO_INVIS	0x00020		/* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
-	{ IO_ISDIRECT,	"DIRECT" }, \
-	{ IO_INVIS,	"INVIS"}
-
-/*
- * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
- */
-#define FI_NONE			0	/* none */
-#define FI_REMAPF		1	/* Do a remapf prior to the operation */
-#define FI_REMAPF_LOCKED	2	/* Do a remapf prior to the operation.
-					   Prevent VM access to the pages until
-					   the operation completes. */
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp)	mapping_mapped(vp->i_mapping)
-#define VN_CACHED(vp)	(vp->i_mapping->nrpages)
-#define VN_DIRTY(vp)	mapping_tagged(vp->i_mapping, \
-					PAGECACHE_TAG_DIRTY)
-
-
-#endif	/* __XFS_VNODE_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
deleted file mode 100644
index 87d3e03..0000000
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (C) 2008 Christoph Hellwig.
- * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
-
-
-static int
-xfs_xattr_get(struct dentry *dentry, const char *name,
-		void *value, size_t size, int xflags)
-{
-	struct xfs_inode *ip = XFS_I(dentry->d_inode);
-	int error, asize = size;
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	/* Convert Linux syscall to XFS internal ATTR flags */
-	if (!size) {
-		xflags |= ATTR_KERNOVAL;
-		value = NULL;
-	}
-
-	error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
-	if (error)
-		return error;
-	return asize;
-}
-
-static int
-xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
-		size_t size, int flags, int xflags)
-{
-	struct xfs_inode *ip = XFS_I(dentry->d_inode);
-
-	if (strcmp(name, "") == 0)
-		return -EINVAL;
-
-	/* Convert Linux syscall to XFS internal ATTR flags */
-	if (flags & XATTR_CREATE)
-		xflags |= ATTR_CREATE;
-	if (flags & XATTR_REPLACE)
-		xflags |= ATTR_REPLACE;
-
-	if (!value)
-		return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
-	return -xfs_attr_set(ip, (unsigned char *)name,
-				(void *)value, size, xflags);
-}
-
-static const struct xattr_handler xfs_xattr_user_handler = {
-	.prefix	= XATTR_USER_PREFIX,
-	.flags	= 0, /* no flags implies user namespace */
-	.get	= xfs_xattr_get,
-	.set	= xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_trusted_handler = {
-	.prefix	= XATTR_TRUSTED_PREFIX,
-	.flags	= ATTR_ROOT,
-	.get	= xfs_xattr_get,
-	.set	= xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_security_handler = {
-	.prefix	= XATTR_SECURITY_PREFIX,
-	.flags	= ATTR_SECURE,
-	.get	= xfs_xattr_get,
-	.set	= xfs_xattr_set,
-};
-
-const struct xattr_handler *xfs_xattr_handlers[] = {
-	&xfs_xattr_user_handler,
-	&xfs_xattr_trusted_handler,
-	&xfs_xattr_security_handler,
-#ifdef CONFIG_XFS_POSIX_ACL
-	&xfs_xattr_acl_access_handler,
-	&xfs_xattr_acl_default_handler,
-#endif
-	NULL
-};
-
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
-	if (flags & XFS_ATTR_SECURE)
-		return sizeof("security");
-	else if (flags & XFS_ATTR_ROOT)
-		return sizeof("trusted");
-	else
-		return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
-	if (flags & XFS_ATTR_SECURE)
-		return xfs_xattr_security_handler.prefix;
-	else if (flags & XFS_ATTR_ROOT)
-		return xfs_xattr_trusted_handler.prefix;
-	else
-		return xfs_xattr_user_handler.prefix;
-}
-
-static int
-xfs_xattr_put_listent(
-	struct xfs_attr_list_context *context,
-	int		flags,
-	unsigned char	*name,
-	int		namelen,
-	int		valuelen,
-	unsigned char	*value)
-{
-	unsigned int prefix_len = xfs_xattr_prefix_len(flags);
-	char *offset;
-	int arraytop;
-
-	ASSERT(context->count >= 0);
-
-	/*
-	 * Only show root namespace entries if we are actually allowed to
-	 * see them.
-	 */
-	if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
-		return 0;
-
-	arraytop = context->count + prefix_len + namelen + 1;
-	if (arraytop > context->firstu) {
-		context->count = -1;	/* insufficient space */
-		return 1;
-	}
-	offset = (char *)context->alist + context->count;
-	strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
-	offset += prefix_len;
-	strncpy(offset, (char *)name, namelen);			/* real name */
-	offset += namelen;
-	*offset = '\0';
-	context->count += prefix_len + namelen + 1;
-	return 0;
-}
-
-static int
-xfs_xattr_put_listent_sizes(
-	struct xfs_attr_list_context *context,
-	int		flags,
-	unsigned char	*name,
-	int		namelen,
-	int		valuelen,
-	unsigned char	*value)
-{
-	context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
-	return 0;
-}
-
-static int
-list_one_attr(const char *name, const size_t len, void *data,
-		size_t size, ssize_t *result)
-{
-	char *p = data + *result;
-
-	*result += len;
-	if (!size)
-		return 0;
-	if (*result > size)
-		return -ERANGE;
-
-	strcpy(p, name);
-	return 0;
-}
-
-ssize_t
-xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
-{
-	struct xfs_attr_list_context context;
-	struct attrlist_cursor_kern cursor = { 0 };
-	struct inode		*inode = dentry->d_inode;
-	int			error;
-
-	/*
-	 * First read the regular on-disk attributes.
-	 */
-	memset(&context, 0, sizeof(context));
-	context.dp = XFS_I(inode);
-	context.cursor = &cursor;
-	context.resynch = 1;
-	context.alist = data;
-	context.bufsize = size;
-	context.firstu = context.bufsize;
-
-	if (size)
-		context.put_listent = xfs_xattr_put_listent;
-	else
-		context.put_listent = xfs_xattr_put_listent_sizes;
-
-	xfs_attr_list_int(&context);
-	if (context.count < 0)
-		return -ERANGE;
-
-	/*
-	 * Then add the two synthetic ACL attributes.
-	 */
-	if (posix_acl_access_exists(inode)) {
-		error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
-				strlen(POSIX_ACL_XATTR_ACCESS) + 1,
-				data, size, &context.count);
-		if (error)
-			return error;
-	}
-
-	if (posix_acl_default_exists(inode)) {
-		error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
-				strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
-				data, size, &context.count);
-		if (error)
-			return error;
-	}
-
-	return context.count;
-}
diff --git a/fs/xfs/mrlock.h b/fs/xfs/mrlock.h
new file mode 100644
index 0000000..ff6a198
--- /dev/null
+++ b/fs/xfs/mrlock.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+
+#include <linux/rwsem.h>
+
+typedef struct {
+	struct rw_semaphore	mr_lock;
+#ifdef DEBUG
+	int			mr_writer;
+#endif
+} mrlock_t;
+
+#ifdef DEBUG
+#define mrinit(mrp, name)	\
+	do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
+#else
+#define mrinit(mrp, name)	\
+	do { init_rwsem(&(mrp)->mr_lock); } while (0)
+#endif
+
+#define mrlock_init(mrp, t,n,s)	mrinit(mrp, n)
+#define mrfree(mrp)		do { } while (0)
+
+static inline void mraccess_nested(mrlock_t *mrp, int subclass)
+{
+	down_read_nested(&mrp->mr_lock, subclass);
+}
+
+static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
+{
+	down_write_nested(&mrp->mr_lock, subclass);
+#ifdef DEBUG
+	mrp->mr_writer = 1;
+#endif
+}
+
+static inline int mrtryaccess(mrlock_t *mrp)
+{
+	return down_read_trylock(&mrp->mr_lock);
+}
+
+static inline int mrtryupdate(mrlock_t *mrp)
+{
+	if (!down_write_trylock(&mrp->mr_lock))
+		return 0;
+#ifdef DEBUG
+	mrp->mr_writer = 1;
+#endif
+	return 1;
+}
+
+static inline void mrunlock_excl(mrlock_t *mrp)
+{
+#ifdef DEBUG
+	mrp->mr_writer = 0;
+#endif
+	up_write(&mrp->mr_lock);
+}
+
+static inline void mrunlock_shared(mrlock_t *mrp)
+{
+	up_read(&mrp->mr_lock);
+}
+
+static inline void mrdemote(mrlock_t *mrp)
+{
+#ifdef DEBUG
+	mrp->mr_writer = 0;
+#endif
+	downgrade_write(&mrp->mr_lock);
+}
+
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
deleted file mode 100644
index db62959..0000000
--- a/fs/xfs/quota/xfs_dquot.c
+++ /dev/null
@@ -1,1454 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-
-/*
-   LOCK ORDER
-
-   inode lock		    (ilock)
-   dquot hash-chain lock    (hashlock)
-   xqm dquot freelist lock  (freelistlock
-   mount's dquot list lock  (mplistlock)
-   user dquot lock - lock ordering among dquots is based on the uid or gid
-   group dquot lock - similar to udquots. Between the two dquots, the udquot
-		      has to be locked first.
-   pin lock - the dquot lock must be held to take this lock.
-   flush lock - ditto.
-*/
-
-#ifdef DEBUG
-xfs_buftarg_t *xfs_dqerror_target;
-int xfs_do_dqerror;
-int xfs_dqreq_num;
-int xfs_dqerror_mod = 33;
-#endif
-
-static struct lock_class_key xfs_dquot_other_class;
-
-/*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
-	xfs_mount_t  *mp,
-	xfs_dqid_t   id,
-	uint	     type)
-{
-	xfs_dquot_t	*dqp;
-	boolean_t	brandnewdquot;
-
-	brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
-	dqp->dq_flags = type;
-	dqp->q_core.d_id = cpu_to_be32(id);
-	dqp->q_mount = mp;
-
-	/*
-	 * No need to re-initialize these if this is a reclaimed dquot.
-	 */
-	if (brandnewdquot) {
-		INIT_LIST_HEAD(&dqp->q_freelist);
-		mutex_init(&dqp->q_qlock);
-		init_waitqueue_head(&dqp->q_pinwait);
-
-		/*
-		 * Because we want to use a counting completion, complete
-		 * the flush completion once to allow a single access to
-		 * the flush completion without blocking.
-		 */
-		init_completion(&dqp->q_flush);
-		complete(&dqp->q_flush);
-
-		trace_xfs_dqinit(dqp);
-	} else {
-		/*
-		 * Only the q_core portion was zeroed in dqreclaim_one().
-		 * So, we need to reset others.
-		 */
-		dqp->q_nrefs = 0;
-		dqp->q_blkno = 0;
-		INIT_LIST_HEAD(&dqp->q_mplist);
-		INIT_LIST_HEAD(&dqp->q_hashlist);
-		dqp->q_bufoffset = 0;
-		dqp->q_fileoffset = 0;
-		dqp->q_transp = NULL;
-		dqp->q_gdquot = NULL;
-		dqp->q_res_bcount = 0;
-		dqp->q_res_icount = 0;
-		dqp->q_res_rtbcount = 0;
-		atomic_set(&dqp->q_pincount, 0);
-		dqp->q_hash = NULL;
-		ASSERT(list_empty(&dqp->q_freelist));
-
-		trace_xfs_dqreuse(dqp);
-	}
-
-	/*
-	 * In either case we need to make sure group quotas have a different
-	 * lock class than user quotas, to make sure lockdep knows we can
-	 * locks of one of each at the same time.
-	 */
-	if (!(type & XFS_DQ_USER))
-		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-
-	/*
-	 * log item gets initialized later
-	 */
-	return (dqp);
-}
-
-/*
- * This is called to free all the memory associated with a dquot
- */
-void
-xfs_qm_dqdestroy(
-	xfs_dquot_t	*dqp)
-{
-	ASSERT(list_empty(&dqp->q_freelist));
-
-	mutex_destroy(&dqp->q_qlock);
-	kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
-
-	atomic_dec(&xfs_Gqm->qm_totaldquots);
-}
-
-/*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
-	xfs_dqid_t	id,
-	uint		type,
-	xfs_dqblk_t	*d)
-{
-	/*
-	 * Caller has zero'd the entire dquot 'chunk' already.
-	 */
-	d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
-	d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
-	d->dd_diskdq.d_id = cpu_to_be32(id);
-	d->dd_diskdq.d_flags = type;
-}
-
-/*
- * If default limits are in force, push them into the dquot now.
- * We overwrite the dquot limits only if they are zero and this
- * is not the root dquot.
- */
-void
-xfs_qm_adjust_dqlimits(
-	xfs_mount_t		*mp,
-	xfs_disk_dquot_t	*d)
-{
-	xfs_quotainfo_t		*q = mp->m_quotainfo;
-
-	ASSERT(d->d_id);
-
-	if (q->qi_bsoftlimit && !d->d_blk_softlimit)
-		d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
-	if (q->qi_bhardlimit && !d->d_blk_hardlimit)
-		d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
-	if (q->qi_isoftlimit && !d->d_ino_softlimit)
-		d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
-	if (q->qi_ihardlimit && !d->d_ino_hardlimit)
-		d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
-	if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
-		d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
-	if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
-		d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
-}
-
-/*
- * Check the limits and timers of a dquot and start or reset timers
- * if necessary.
- * This gets called even when quota enforcement is OFF, which makes our
- * life a little less complicated. (We just don't reject any quota
- * reservations in that case, when enforcement is off).
- * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
- * enforcement's off.
- * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded.  They do
- * get reset to zero, however, when we find the count to be under
- * the soft limit (they are only ever set non-zero via userspace).
- */
-void
-xfs_qm_adjust_dqtimers(
-	xfs_mount_t		*mp,
-	xfs_disk_dquot_t	*d)
-{
-	ASSERT(d->d_id);
-
-#ifdef DEBUG
-	if (d->d_blk_hardlimit)
-		ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
-		       be64_to_cpu(d->d_blk_hardlimit));
-	if (d->d_ino_hardlimit)
-		ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
-		       be64_to_cpu(d->d_ino_hardlimit));
-	if (d->d_rtb_hardlimit)
-		ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
-		       be64_to_cpu(d->d_rtb_hardlimit));
-#endif
-
-	if (!d->d_btimer) {
-		if ((d->d_blk_softlimit &&
-		     (be64_to_cpu(d->d_bcount) >=
-		      be64_to_cpu(d->d_blk_softlimit))) ||
-		    (d->d_blk_hardlimit &&
-		     (be64_to_cpu(d->d_bcount) >=
-		      be64_to_cpu(d->d_blk_hardlimit)))) {
-			d->d_btimer = cpu_to_be32(get_seconds() +
-					mp->m_quotainfo->qi_btimelimit);
-		} else {
-			d->d_bwarns = 0;
-		}
-	} else {
-		if ((!d->d_blk_softlimit ||
-		     (be64_to_cpu(d->d_bcount) <
-		      be64_to_cpu(d->d_blk_softlimit))) &&
-		    (!d->d_blk_hardlimit ||
-		    (be64_to_cpu(d->d_bcount) <
-		     be64_to_cpu(d->d_blk_hardlimit)))) {
-			d->d_btimer = 0;
-		}
-	}
-
-	if (!d->d_itimer) {
-		if ((d->d_ino_softlimit &&
-		     (be64_to_cpu(d->d_icount) >=
-		      be64_to_cpu(d->d_ino_softlimit))) ||
-		    (d->d_ino_hardlimit &&
-		     (be64_to_cpu(d->d_icount) >=
-		      be64_to_cpu(d->d_ino_hardlimit)))) {
-			d->d_itimer = cpu_to_be32(get_seconds() +
-					mp->m_quotainfo->qi_itimelimit);
-		} else {
-			d->d_iwarns = 0;
-		}
-	} else {
-		if ((!d->d_ino_softlimit ||
-		     (be64_to_cpu(d->d_icount) <
-		      be64_to_cpu(d->d_ino_softlimit)))  &&
-		    (!d->d_ino_hardlimit ||
-		     (be64_to_cpu(d->d_icount) <
-		      be64_to_cpu(d->d_ino_hardlimit)))) {
-			d->d_itimer = 0;
-		}
-	}
-
-	if (!d->d_rtbtimer) {
-		if ((d->d_rtb_softlimit &&
-		     (be64_to_cpu(d->d_rtbcount) >=
-		      be64_to_cpu(d->d_rtb_softlimit))) ||
-		    (d->d_rtb_hardlimit &&
-		     (be64_to_cpu(d->d_rtbcount) >=
-		      be64_to_cpu(d->d_rtb_hardlimit)))) {
-			d->d_rtbtimer = cpu_to_be32(get_seconds() +
-					mp->m_quotainfo->qi_rtbtimelimit);
-		} else {
-			d->d_rtbwarns = 0;
-		}
-	} else {
-		if ((!d->d_rtb_softlimit ||
-		     (be64_to_cpu(d->d_rtbcount) <
-		      be64_to_cpu(d->d_rtb_softlimit))) &&
-		    (!d->d_rtb_hardlimit ||
-		     (be64_to_cpu(d->d_rtbcount) <
-		      be64_to_cpu(d->d_rtb_hardlimit)))) {
-			d->d_rtbtimer = 0;
-		}
-	}
-}
-
-/*
- * initialize a buffer full of dquots and log the whole thing
- */
-STATIC void
-xfs_qm_init_dquot_blk(
-	xfs_trans_t	*tp,
-	xfs_mount_t	*mp,
-	xfs_dqid_t	id,
-	uint		type,
-	xfs_buf_t	*bp)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	xfs_dqblk_t	*d;
-	int		curid, i;
-
-	ASSERT(tp);
-	ASSERT(xfs_buf_islocked(bp));
-
-	d = bp->b_addr;
-
-	/*
-	 * ID of the first dquot in the block - id's are zero based.
-	 */
-	curid = id - (id % q->qi_dqperchunk);
-	ASSERT(curid >= 0);
-	memset(d, 0, BBTOB(q->qi_dqchunklen));
-	for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
-		xfs_qm_dqinit_core(curid, type, d);
-	xfs_trans_dquot_buf(tp, bp,
-			    (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
-			    ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
-			     XFS_BLF_GDQUOT_BUF)));
-	xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
-}
-
-
-
-/*
- * Allocate a block and fill it with dquots.
- * This is called when the bmapi finds a hole.
- */
-STATIC int
-xfs_qm_dqalloc(
-	xfs_trans_t	**tpp,
-	xfs_mount_t	*mp,
-	xfs_dquot_t	*dqp,
-	xfs_inode_t	*quotip,
-	xfs_fileoff_t	offset_fsb,
-	xfs_buf_t	**O_bpp)
-{
-	xfs_fsblock_t	firstblock;
-	xfs_bmap_free_t flist;
-	xfs_bmbt_irec_t map;
-	int		nmaps, error, committed;
-	xfs_buf_t	*bp;
-	xfs_trans_t	*tp = *tpp;
-
-	ASSERT(tp != NULL);
-
-	trace_xfs_dqalloc(dqp);
-
-	/*
-	 * Initialize the bmap freelist prior to calling bmapi code.
-	 */
-	xfs_bmap_init(&flist, &firstblock);
-	xfs_ilock(quotip, XFS_ILOCK_EXCL);
-	/*
-	 * Return if this type of quotas is turned off while we didn't
-	 * have an inode lock
-	 */
-	if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-		xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-		return (ESRCH);
-	}
-
-	xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
-	nmaps = 1;
-	if ((error = xfs_bmapi(tp, quotip,
-			      offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
-			      XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
-			      &firstblock,
-			      XFS_QM_DQALLOC_SPACE_RES(mp),
-			      &map, &nmaps, &flist))) {
-		goto error0;
-	}
-	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
-	ASSERT(nmaps == 1);
-	ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
-	       (map.br_startblock != HOLESTARTBLOCK));
-
-	/*
-	 * Keep track of the blkno to save a lookup later
-	 */
-	dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-	/* now we can just get the buffer (there's nothing to read yet) */
-	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
-			       dqp->q_blkno,
-			       mp->m_quotainfo->qi_dqchunklen,
-			       0);
-	if (!bp || (error = xfs_buf_geterror(bp)))
-		goto error1;
-	/*
-	 * Make a chunk of dquots out of this buffer and log
-	 * the entire thing.
-	 */
-	xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
-			      dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
-
-	/*
-	 * xfs_bmap_finish() may commit the current transaction and
-	 * start a second transaction if the freelist is not empty.
-	 *
-	 * Since we still want to modify this buffer, we need to
-	 * ensure that the buffer is not released on commit of
-	 * the first transaction and ensure the buffer is added to the
-	 * second transaction.
-	 *
-	 * If there is only one transaction then don't stop the buffer
-	 * from being released when it commits later on.
-	 */
-
-	xfs_trans_bhold(tp, bp);
-
-	if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
-		goto error1;
-	}
-
-	if (committed) {
-		tp = *tpp;
-		xfs_trans_bjoin(tp, bp);
-	} else {
-		xfs_trans_bhold_release(tp, bp);
-	}
-
-	*O_bpp = bp;
-	return 0;
-
-      error1:
-	xfs_bmap_cancel(&flist);
-      error0:
-	xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
-	return (error);
-}
-
-/*
- * Maps a dquot to the buffer containing its on-disk version.
- * This returns a ptr to the buffer containing the on-disk dquot
- * in the bpp param, and a ptr to the on-disk dquot within that buffer
- */
-STATIC int
-xfs_qm_dqtobp(
-	xfs_trans_t		**tpp,
-	xfs_dquot_t		*dqp,
-	xfs_disk_dquot_t	**O_ddpp,
-	xfs_buf_t		**O_bpp,
-	uint			flags)
-{
-	xfs_bmbt_irec_t map;
-	int		nmaps = 1, error;
-	xfs_buf_t	*bp;
-	xfs_inode_t	*quotip = XFS_DQ_TO_QIP(dqp);
-	xfs_mount_t	*mp = dqp->q_mount;
-	xfs_disk_dquot_t *ddq;
-	xfs_dqid_t	id = be32_to_cpu(dqp->q_core.d_id);
-	xfs_trans_t	*tp = (tpp ? *tpp : NULL);
-
-	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
-
-	xfs_ilock(quotip, XFS_ILOCK_SHARED);
-	if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-		/*
-		 * Return if this type of quotas is turned off while we
-		 * didn't have the quota inode lock.
-		 */
-		xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-		return ESRCH;
-	}
-
-	/*
-	 * Find the block map; no allocations yet
-	 */
-	error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
-			  XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
-			  NULL, 0, &map, &nmaps, NULL);
-
-	xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-	if (error)
-		return error;
-
-	ASSERT(nmaps == 1);
-	ASSERT(map.br_blockcount == 1);
-
-	/*
-	 * Offset of dquot in the (fixed sized) dquot chunk.
-	 */
-	dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
-		sizeof(xfs_dqblk_t);
-
-	ASSERT(map.br_startblock != DELAYSTARTBLOCK);
-	if (map.br_startblock == HOLESTARTBLOCK) {
-		/*
-		 * We don't allocate unless we're asked to
-		 */
-		if (!(flags & XFS_QMOPT_DQALLOC))
-			return ENOENT;
-
-		ASSERT(tp);
-		error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
-					dqp->q_fileoffset, &bp);
-		if (error)
-			return error;
-		tp = *tpp;
-	} else {
-		trace_xfs_dqtobp_read(dqp);
-
-		/*
-		 * store the blkno etc so that we don't have to do the
-		 * mapping all the time
-		 */
-		dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-		error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-					   dqp->q_blkno,
-					   mp->m_quotainfo->qi_dqchunklen,
-					   0, &bp);
-		if (error || !bp)
-			return XFS_ERROR(error);
-	}
-
-	ASSERT(xfs_buf_islocked(bp));
-
-	/*
-	 * calculate the location of the dquot inside the buffer.
-	 */
-	ddq = bp->b_addr + dqp->q_bufoffset;
-
-	/*
-	 * A simple sanity check in case we got a corrupted dquot...
-	 */
-	error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
-			   flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
-			   "dqtobp");
-	if (error) {
-		if (!(flags & XFS_QMOPT_DQREPAIR)) {
-			xfs_trans_brelse(tp, bp);
-			return XFS_ERROR(EIO);
-		}
-	}
-
-	*O_bpp = bp;
-	*O_ddpp = ddq;
-
-	return (0);
-}
-
-
-/*
- * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
- * and release the buffer immediately.
- *
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqread(
-	xfs_trans_t	**tpp,
-	xfs_dqid_t	id,
-	xfs_dquot_t	*dqp,	/* dquot to get filled in */
-	uint		flags)
-{
-	xfs_disk_dquot_t *ddqp;
-	xfs_buf_t	 *bp;
-	int		 error;
-	xfs_trans_t	 *tp;
-
-	ASSERT(tpp);
-
-	trace_xfs_dqread(dqp);
-
-	/*
-	 * get a pointer to the on-disk dquot and the buffer containing it
-	 * dqp already knows its own type (GROUP/USER).
-	 */
-	if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
-		return (error);
-	}
-	tp = *tpp;
-
-	/* copy everything from disk dquot to the incore dquot */
-	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-	ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-	xfs_qm_dquot_logitem_init(dqp);
-
-	/*
-	 * Reservation counters are defined as reservation plus current usage
-	 * to avoid having to add every time.
-	 */
-	dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
-	dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
-	dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
-
-	/* Mark the buf so that this will stay incore a little longer */
-	XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
-
-	/*
-	 * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
-	 * So we need to release with xfs_trans_brelse().
-	 * The strategy here is identical to that of inodes; we lock
-	 * the dquot in xfs_qm_dqget() before making it accessible to
-	 * others. This is because dquots, like inodes, need a good level of
-	 * concurrency, and we don't want to take locks on the entire buffers
-	 * for dquot accesses.
-	 * Note also that the dquot buffer may even be dirty at this point, if
-	 * this particular dquot was repaired. We still aren't afraid to
-	 * brelse it because we have the changes incore.
-	 */
-	ASSERT(xfs_buf_islocked(bp));
-	xfs_trans_brelse(tp, bp);
-
-	return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
-	xfs_mount_t	*mp,
-	xfs_dqid_t	id,	 /* gid or uid, depending on type */
-	uint		type,	 /* UDQUOT or GDQUOT */
-	uint		flags,	 /* DQALLOC, DQREPAIR */
-	xfs_dquot_t	**O_dqpp)/* OUT : incore dquot, not locked */
-{
-	xfs_dquot_t	*dqp;
-	int		error;
-	xfs_trans_t	*tp;
-	int		cancelflags=0;
-
-	dqp = xfs_qm_dqinit(mp, id, type);
-	tp = NULL;
-	if (flags & XFS_QMOPT_DQALLOC) {
-		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-		error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
-				XFS_WRITE_LOG_RES(mp) +
-				BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
-				128,
-				0,
-				XFS_TRANS_PERM_LOG_RES,
-				XFS_WRITE_LOG_COUNT);
-		if (error) {
-			cancelflags = 0;
-			goto error0;
-		}
-		cancelflags = XFS_TRANS_RELEASE_LOG_RES;
-	}
-
-	/*
-	 * Read it from disk; xfs_dqread() takes care of
-	 * all the necessary initialization of dquot's fields (locks, etc)
-	 */
-	if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
-		/*
-		 * This can happen if quotas got turned off (ESRCH),
-		 * or if the dquot didn't exist on disk and we ask to
-		 * allocate (ENOENT).
-		 */
-		trace_xfs_dqread_fail(dqp);
-		cancelflags |= XFS_TRANS_ABORT;
-		goto error0;
-	}
-	if (tp) {
-		if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
-			goto error1;
-	}
-
-	*O_dqpp = dqp;
-	return (0);
-
- error0:
-	ASSERT(error);
-	if (tp)
-		xfs_trans_cancel(tp, cancelflags);
- error1:
-	xfs_qm_dqdestroy(dqp);
-	*O_dqpp = NULL;
-	return (error);
-}
-
-/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
-	xfs_mount_t		*mp,
-	xfs_dqid_t		id,
-	xfs_dqhash_t		*qh,
-	xfs_dquot_t		**O_dqpp)
-{
-	xfs_dquot_t		*dqp;
-	uint			flist_locked;
-
-	ASSERT(mutex_is_locked(&qh->qh_lock));
-
-	flist_locked = B_FALSE;
-
-	/*
-	 * Traverse the hashchain looking for a match
-	 */
-	list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
-		/*
-		 * We already have the hashlock. We don't need the
-		 * dqlock to look at the id field of the dquot, since the
-		 * id can't be modified without the hashlock anyway.
-		 */
-		if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
-			trace_xfs_dqlookup_found(dqp);
-
-			/*
-			 * All in core dquots must be on the dqlist of mp
-			 */
-			ASSERT(!list_empty(&dqp->q_mplist));
-
-			xfs_dqlock(dqp);
-			if (dqp->q_nrefs == 0) {
-				ASSERT(!list_empty(&dqp->q_freelist));
-				if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-					trace_xfs_dqlookup_want(dqp);
-
-					/*
-					 * We may have raced with dqreclaim_one()
-					 * (and lost). So, flag that we don't
-					 * want the dquot to be reclaimed.
-					 */
-					dqp->dq_flags |= XFS_DQ_WANT;
-					xfs_dqunlock(dqp);
-					mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-					xfs_dqlock(dqp);
-					dqp->dq_flags &= ~(XFS_DQ_WANT);
-				}
-				flist_locked = B_TRUE;
-			}
-
-			/*
-			 * id couldn't have changed; we had the hashlock all
-			 * along
-			 */
-			ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-
-			if (flist_locked) {
-				if (dqp->q_nrefs != 0) {
-					mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-					flist_locked = B_FALSE;
-				} else {
-					/* take it off the freelist */
-					trace_xfs_dqlookup_freelist(dqp);
-					list_del_init(&dqp->q_freelist);
-					xfs_Gqm->qm_dqfrlist_cnt--;
-				}
-			}
-
-			XFS_DQHOLD(dqp);
-
-			if (flist_locked)
-				mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-			/*
-			 * move the dquot to the front of the hashchain
-			 */
-			ASSERT(mutex_is_locked(&qh->qh_lock));
-			list_move(&dqp->q_hashlist, &qh->qh_list);
-			trace_xfs_dqlookup_done(dqp);
-			*O_dqpp = dqp;
-			return 0;
-		}
-	}
-
-	*O_dqpp = NULL;
-	ASSERT(mutex_is_locked(&qh->qh_lock));
-	return (1);
-}
-
-/*
- * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
- * a locked dquot, doing an allocation (if requested) as needed.
- * When both an inode and an id are given, the inode's id takes precedence.
- * That is, if the id changes while we don't hold the ilock inside this
- * function, the new dquot is returned, not necessarily the one requested
- * in the id argument.
- */
-int
-xfs_qm_dqget(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,	  /* locked inode (optional) */
-	xfs_dqid_t	id,	  /* uid/projid/gid depending on type */
-	uint		type,	  /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
-	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
-	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
-{
-	xfs_dquot_t	*dqp;
-	xfs_dqhash_t	*h;
-	uint		version;
-	int		error;
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-	if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
-	    (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
-	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
-		return (ESRCH);
-	}
-	h = XFS_DQ_HASH(mp, id, type);
-
-#ifdef DEBUG
-	if (xfs_do_dqerror) {
-		if ((xfs_dqerror_target == mp->m_ddev_targp) &&
-		    (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
-			xfs_debug(mp, "Returning error in dqget");
-			return (EIO);
-		}
-	}
-#endif
-
- again:
-
-#ifdef DEBUG
-	ASSERT(type == XFS_DQ_USER ||
-	       type == XFS_DQ_PROJ ||
-	       type == XFS_DQ_GROUP);
-	if (ip) {
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-		if (type == XFS_DQ_USER)
-			ASSERT(ip->i_udquot == NULL);
-		else
-			ASSERT(ip->i_gdquot == NULL);
-	}
-#endif
-	mutex_lock(&h->qh_lock);
-
-	/*
-	 * Look in the cache (hashtable).
-	 * The chain is kept locked during lookup.
-	 */
-	if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
-		XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
-		/*
-		 * The dquot was found, moved to the front of the chain,
-		 * taken off the freelist if it was on it, and locked
-		 * at this point. Just unlock the hashchain and return.
-		 */
-		ASSERT(*O_dqpp);
-		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
-		mutex_unlock(&h->qh_lock);
-		trace_xfs_dqget_hit(*O_dqpp);
-		return (0);	/* success */
-	}
-	XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
-
-	/*
-	 * Dquot cache miss. We don't want to keep the inode lock across
-	 * a (potential) disk read. Also we don't want to deal with the lock
-	 * ordering between quotainode and this inode. OTOH, dropping the inode
-	 * lock here means dealing with a chown that can happen before
-	 * we re-acquire the lock.
-	 */
-	if (ip)
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	/*
-	 * Save the hashchain version stamp, and unlock the chain, so that
-	 * we don't keep the lock across a disk read
-	 */
-	version = h->qh_version;
-	mutex_unlock(&h->qh_lock);
-
-	/*
-	 * Allocate the dquot on the kernel heap, and read the ondisk
-	 * portion off the disk. Also, do all the necessary initialization
-	 * This can return ENOENT if dquot didn't exist on disk and we didn't
-	 * ask it to allocate; ESRCH if quotas got turned off suddenly.
-	 */
-	if ((error = xfs_qm_idtodq(mp, id, type,
-				  flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
-					   XFS_QMOPT_DOWARN),
-				  &dqp))) {
-		if (ip)
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
-		return (error);
-	}
-
-	/*
-	 * See if this is mount code calling to look at the overall quota limits
-	 * which are stored in the id == 0 user or group's dquot.
-	 * Since we may not have done a quotacheck by this point, just return
-	 * the dquot without attaching it to any hashtables, lists, etc, or even
-	 * taking a reference.
-	 * The caller must dqdestroy this once done.
-	 */
-	if (flags & XFS_QMOPT_DQSUSER) {
-		ASSERT(id == 0);
-		ASSERT(! ip);
-		goto dqret;
-	}
-
-	/*
-	 * Dquot lock comes after hashlock in the lock ordering
-	 */
-	if (ip) {
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-		/*
-		 * A dquot could be attached to this inode by now, since
-		 * we had dropped the ilock.
-		 */
-		if (type == XFS_DQ_USER) {
-			if (!XFS_IS_UQUOTA_ON(mp)) {
-				/* inode stays locked on return */
-				xfs_qm_dqdestroy(dqp);
-				return XFS_ERROR(ESRCH);
-			}
-			if (ip->i_udquot) {
-				xfs_qm_dqdestroy(dqp);
-				dqp = ip->i_udquot;
-				xfs_dqlock(dqp);
-				goto dqret;
-			}
-		} else {
-			if (!XFS_IS_OQUOTA_ON(mp)) {
-				/* inode stays locked on return */
-				xfs_qm_dqdestroy(dqp);
-				return XFS_ERROR(ESRCH);
-			}
-			if (ip->i_gdquot) {
-				xfs_qm_dqdestroy(dqp);
-				dqp = ip->i_gdquot;
-				xfs_dqlock(dqp);
-				goto dqret;
-			}
-		}
-	}
-
-	/*
-	 * Hashlock comes after ilock in lock order
-	 */
-	mutex_lock(&h->qh_lock);
-	if (version != h->qh_version) {
-		xfs_dquot_t *tmpdqp;
-		/*
-		 * Now, see if somebody else put the dquot in the
-		 * hashtable before us. This can happen because we didn't
-		 * keep the hashchain lock. We don't have to worry about
-		 * lock order between the two dquots here since dqp isn't
-		 * on any findable lists yet.
-		 */
-		if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
-			/*
-			 * Duplicate found. Just throw away the new dquot
-			 * and start over.
-			 */
-			xfs_qm_dqput(tmpdqp);
-			mutex_unlock(&h->qh_lock);
-			xfs_qm_dqdestroy(dqp);
-			XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-			goto again;
-		}
-	}
-
-	/*
-	 * Put the dquot at the beginning of the hash-chain and mp's list
-	 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
-	 */
-	ASSERT(mutex_is_locked(&h->qh_lock));
-	dqp->q_hash = h;
-	list_add(&dqp->q_hashlist, &h->qh_list);
-	h->qh_version++;
-
-	/*
-	 * Attach this dquot to this filesystem's list of all dquots,
-	 * kept inside the mount structure in m_quotainfo field
-	 */
-	mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-
-	/*
-	 * We return a locked dquot to the caller, with a reference taken
-	 */
-	xfs_dqlock(dqp);
-	dqp->q_nrefs = 1;
-
-	list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
-	mp->m_quotainfo->qi_dquots++;
-	mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-	mutex_unlock(&h->qh_lock);
- dqret:
-	ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	trace_xfs_dqget_miss(dqp);
-	*O_dqpp = dqp;
-	return (0);
-}
-
-
-/*
- * Release a reference to the dquot (decrement ref-count)
- * and unlock it. If there is a group quota attached to this
- * dquot, carefully release that too without tripping over
- * deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
-	xfs_dquot_t	*dqp)
-{
-	xfs_dquot_t	*gdqp;
-
-	ASSERT(dqp->q_nrefs > 0);
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-	trace_xfs_dqput(dqp);
-
-	if (dqp->q_nrefs != 1) {
-		dqp->q_nrefs--;
-		xfs_dqunlock(dqp);
-		return;
-	}
-
-	/*
-	 * drop the dqlock and acquire the freelist and dqlock
-	 * in the right order; but try to get it out-of-order first
-	 */
-	if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-		trace_xfs_dqput_wait(dqp);
-		xfs_dqunlock(dqp);
-		mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-		xfs_dqlock(dqp);
-	}
-
-	while (1) {
-		gdqp = NULL;
-
-		/* We can't depend on nrefs being == 1 here */
-		if (--dqp->q_nrefs == 0) {
-			trace_xfs_dqput_free(dqp);
-
-			list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-			xfs_Gqm->qm_dqfrlist_cnt++;
-
-			/*
-			 * If we just added a udquot to the freelist, then
-			 * we want to release the gdquot reference that
-			 * it (probably) has. Otherwise it'll keep the
-			 * gdquot from getting reclaimed.
-			 */
-			if ((gdqp = dqp->q_gdquot)) {
-				/*
-				 * Avoid a recursive dqput call
-				 */
-				xfs_dqlock(gdqp);
-				dqp->q_gdquot = NULL;
-			}
-		}
-		xfs_dqunlock(dqp);
-
-		/*
-		 * If we had a group quota inside the user quota as a hint,
-		 * release it now.
-		 */
-		if (! gdqp)
-			break;
-		dqp = gdqp;
-	}
-	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-}
-
-/*
- * Release a dquot. Flush it if dirty, then dqput() it.
- * dquot must not be locked.
- */
-void
-xfs_qm_dqrele(
-	xfs_dquot_t	*dqp)
-{
-	if (!dqp)
-		return;
-
-	trace_xfs_dqrele(dqp);
-
-	xfs_dqlock(dqp);
-	/*
-	 * We don't care to flush it if the dquot is dirty here.
-	 * That will create stutters that we want to avoid.
-	 * Instead we do a delayed write when we try to reclaim
-	 * a dirty dquot. Also xfs_sync will take part of the burden...
-	 */
-	xfs_qm_dqput(dqp);
-}
-
-/*
- * This is the dquot flushing I/O completion routine.  It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk.  It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-STATIC void
-xfs_qm_dqflush_done(
-	struct xfs_buf		*bp,
-	struct xfs_log_item	*lip)
-{
-	xfs_dq_logitem_t	*qip = (struct xfs_dq_logitem *)lip;
-	xfs_dquot_t		*dqp = qip->qli_dquot;
-	struct xfs_ail		*ailp = lip->li_ailp;
-
-	/*
-	 * We only want to pull the item from the AIL if its
-	 * location in the log has not changed since we started the flush.
-	 * Thus, we only bother if the dquot's lsn has
-	 * not changed. First we check the lsn outside the lock
-	 * since it's cheaper, and then we recheck while
-	 * holding the lock before removing the dquot from the AIL.
-	 */
-	if ((lip->li_flags & XFS_LI_IN_AIL) &&
-	    lip->li_lsn == qip->qli_flush_lsn) {
-
-		/* xfs_trans_ail_delete() drops the AIL lock. */
-		spin_lock(&ailp->xa_lock);
-		if (lip->li_lsn == qip->qli_flush_lsn)
-			xfs_trans_ail_delete(ailp, lip);
-		else
-			spin_unlock(&ailp->xa_lock);
-	}
-
-	/*
-	 * Release the dq's flush lock since we're done with it.
-	 */
-	xfs_dqfunlock(dqp);
-}
-
-/*
- * Write a modified dquot to disk.
- * The dquot must be locked and the flush lock too taken by caller.
- * The flush lock will not be unlocked until the dquot reaches the disk,
- * but the dquot is free to be unlocked and modified by the caller
- * in the interim. Dquot is still locked on return. This behavior is
- * identical to that of inodes.
- */
-int
-xfs_qm_dqflush(
-	xfs_dquot_t		*dqp,
-	uint			flags)
-{
-	struct xfs_mount	*mp = dqp->q_mount;
-	struct xfs_buf		*bp;
-	struct xfs_disk_dquot	*ddqp;
-	int			error;
-
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	ASSERT(!completion_done(&dqp->q_flush));
-
-	trace_xfs_dqflush(dqp);
-
-	/*
-	 * If not dirty, or it's pinned and we are not supposed to block, nada.
-	 */
-	if (!XFS_DQ_IS_DIRTY(dqp) ||
-	    (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
-		xfs_dqfunlock(dqp);
-		return 0;
-	}
-	xfs_qm_dqunpin_wait(dqp);
-
-	/*
-	 * This may have been unpinned because the filesystem is shutting
-	 * down forcibly. If that's the case we must not write this dquot
-	 * to disk, because the log record didn't make it to disk!
-	 */
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		dqp->dq_flags &= ~XFS_DQ_DIRTY;
-		xfs_dqfunlock(dqp);
-		return XFS_ERROR(EIO);
-	}
-
-	/*
-	 * Get the buffer containing the on-disk dquot
-	 */
-	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
-				   mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-	if (error) {
-		ASSERT(error != ENOENT);
-		xfs_dqfunlock(dqp);
-		return error;
-	}
-
-	/*
-	 * Calculate the location of the dquot inside the buffer.
-	 */
-	ddqp = bp->b_addr + dqp->q_bufoffset;
-
-	/*
-	 * A simple sanity check in case we got a corrupted dquot..
-	 */
-	error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
-			   XFS_QMOPT_DOWARN, "dqflush (incore copy)");
-	if (error) {
-		xfs_buf_relse(bp);
-		xfs_dqfunlock(dqp);
-		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-		return XFS_ERROR(EIO);
-	}
-
-	/* This is the only portion of data that needs to persist */
-	memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
-
-	/*
-	 * Clear the dirty field and remember the flush lsn for later use.
-	 */
-	dqp->dq_flags &= ~XFS_DQ_DIRTY;
-
-	xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
-					&dqp->q_logitem.qli_item.li_lsn);
-
-	/*
-	 * Attach an iodone routine so that we can remove this dquot from the
-	 * AIL and release the flush lock once the dquot is synced to disk.
-	 */
-	xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
-				  &dqp->q_logitem.qli_item);
-
-	/*
-	 * If the buffer is pinned then push on the log so we won't
-	 * get stuck waiting in the write for too long.
-	 */
-	if (xfs_buf_ispinned(bp)) {
-		trace_xfs_dqflush_force(dqp);
-		xfs_log_force(mp, 0);
-	}
-
-	if (flags & SYNC_WAIT)
-		error = xfs_bwrite(mp, bp);
-	else
-		xfs_bdwrite(mp, bp);
-
-	trace_xfs_dqflush_done(dqp);
-
-	/*
-	 * dqp is still locked, but caller is free to unlock it now.
-	 */
-	return error;
-
-}
-
-int
-xfs_qm_dqlock_nowait(
-	xfs_dquot_t *dqp)
-{
-	return mutex_trylock(&dqp->q_qlock);
-}
-
-void
-xfs_dqlock(
-	xfs_dquot_t *dqp)
-{
-	mutex_lock(&dqp->q_qlock);
-}
-
-void
-xfs_dqunlock(
-	xfs_dquot_t *dqp)
-{
-	mutex_unlock(&(dqp->q_qlock));
-	if (dqp->q_logitem.qli_dquot == dqp) {
-		/* Once was dqp->q_mount, but might just have been cleared */
-		xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
-					(xfs_log_item_t*)&(dqp->q_logitem));
-	}
-}
-
-
-void
-xfs_dqunlock_nonotify(
-	xfs_dquot_t *dqp)
-{
-	mutex_unlock(&(dqp->q_qlock));
-}
-
-/*
- * Lock two xfs_dquot structures.
- *
- * To avoid deadlocks we always lock the quota structure with
- * the lowerd id first.
- */
-void
-xfs_dqlock2(
-	xfs_dquot_t	*d1,
-	xfs_dquot_t	*d2)
-{
-	if (d1 && d2) {
-		ASSERT(d1 != d2);
-		if (be32_to_cpu(d1->q_core.d_id) >
-		    be32_to_cpu(d2->q_core.d_id)) {
-			mutex_lock(&d2->q_qlock);
-			mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
-		} else {
-			mutex_lock(&d1->q_qlock);
-			mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
-		}
-	} else if (d1) {
-		mutex_lock(&d1->q_qlock);
-	} else if (d2) {
-		mutex_lock(&d2->q_qlock);
-	}
-}
-
-
-/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
- */
-/* ARGSUSED */
-int
-xfs_qm_dqpurge(
-	xfs_dquot_t	*dqp)
-{
-	xfs_dqhash_t	*qh = dqp->q_hash;
-	xfs_mount_t	*mp = dqp->q_mount;
-
-	ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
-	ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
-
-	xfs_dqlock(dqp);
-	/*
-	 * We really can't afford to purge a dquot that is
-	 * referenced, because these are hard refs.
-	 * It shouldn't happen in general because we went thru _all_ inodes in
-	 * dqrele_all_inodes before calling this and didn't let the mountlock go.
-	 * However it is possible that we have dquots with temporary
-	 * references that are not attached to an inode. e.g. see xfs_setattr().
-	 */
-	if (dqp->q_nrefs != 0) {
-		xfs_dqunlock(dqp);
-		mutex_unlock(&dqp->q_hash->qh_lock);
-		return (1);
-	}
-
-	ASSERT(!list_empty(&dqp->q_freelist));
-
-	/*
-	 * If we're turning off quotas, we have to make sure that, for
-	 * example, we don't delete quota disk blocks while dquots are
-	 * in the process of getting written to those disk blocks.
-	 * This dquot might well be on AIL, and we can't leave it there
-	 * if we're turning off quotas. Basically, we need this flush
-	 * lock, and are willing to block on it.
-	 */
-	if (!xfs_dqflock_nowait(dqp)) {
-		/*
-		 * Block on the flush lock after nudging dquot buffer,
-		 * if it is incore.
-		 */
-		xfs_qm_dqflock_pushbuf_wait(dqp);
-	}
-
-	/*
-	 * XXXIf we're turning this type of quotas off, we don't care
-	 * about the dirty metadata sitting in this dquot. OTOH, if
-	 * we're unmounting, we do care, so we flush it and wait.
-	 */
-	if (XFS_DQ_IS_DIRTY(dqp)) {
-		int	error;
-
-		/* dqflush unlocks dqflock */
-		/*
-		 * Given that dqpurge is a very rare occurrence, it is OK
-		 * that we're holding the hashlist and mplist locks
-		 * across the disk write. But, ... XXXsup
-		 *
-		 * We don't care about getting disk errors here. We need
-		 * to purge this dquot anyway, so we go ahead regardless.
-		 */
-		error = xfs_qm_dqflush(dqp, SYNC_WAIT);
-		if (error)
-			xfs_warn(mp, "%s: dquot %p flush failed",
-				__func__, dqp);
-		xfs_dqflock(dqp);
-	}
-	ASSERT(atomic_read(&dqp->q_pincount) == 0);
-	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
-	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
-
-	list_del_init(&dqp->q_hashlist);
-	qh->qh_version++;
-	list_del_init(&dqp->q_mplist);
-	mp->m_quotainfo->qi_dqreclaims++;
-	mp->m_quotainfo->qi_dquots--;
-	/*
-	 * XXX Move this to the front of the freelist, if we can get the
-	 * freelist lock.
-	 */
-	ASSERT(!list_empty(&dqp->q_freelist));
-
-	dqp->q_mount = NULL;
-	dqp->q_hash = NULL;
-	dqp->dq_flags = XFS_DQ_INACTIVE;
-	memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-	xfs_dqfunlock(dqp);
-	xfs_dqunlock(dqp);
-	mutex_unlock(&qh->qh_lock);
-	return (0);
-}
-
-
-/*
- * Give the buffer a little push if it is incore and
- * wait on the flush lock.
- */
-void
-xfs_qm_dqflock_pushbuf_wait(
-	xfs_dquot_t	*dqp)
-{
-	xfs_mount_t	*mp = dqp->q_mount;
-	xfs_buf_t	*bp;
-
-	/*
-	 * Check to see if the dquot has been flushed delayed
-	 * write.  If so, grab its buffer and send it
-	 * out immediately.  We'll be able to acquire
-	 * the flush lock when the I/O completes.
-	 */
-	bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
-			mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
-	if (!bp)
-		goto out_lock;
-
-	if (XFS_BUF_ISDELAYWRITE(bp)) {
-		if (xfs_buf_ispinned(bp))
-			xfs_log_force(mp, 0);
-		xfs_buf_delwri_promote(bp);
-		wake_up_process(bp->b_target->bt_task);
-	}
-	xfs_buf_relse(bp);
-out_lock:
-	xfs_dqflock(dqp);
-}
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
deleted file mode 100644
index 34b7e94..0000000
--- a/fs/xfs/quota/xfs_dquot.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DQUOT_H__
-#define __XFS_DQUOT_H__
-
-/*
- * Dquots are structures that hold quota information about a user or a group,
- * much like inodes are for files. In fact, dquots share many characteristics
- * with inodes. However, dquots can also be a centralized resource, relative
- * to a collection of inodes. In this respect, dquots share some characteristics
- * of the superblock.
- * XFS dquots exploit both those in its algorithms. They make every attempt
- * to not be a bottleneck when quotas are on and have minimal impact, if any,
- * when quotas are off.
- */
-
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
-	struct list_head  qh_list;
-	struct mutex	  qh_lock;
-	uint		  qh_version;	/* ever increasing version */
-	uint		  qh_nelems;	/* number of dquots on the list */
-} xfs_dqhash_t;
-
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * The incore dquot structure
- */
-typedef struct xfs_dquot {
-	uint		 dq_flags;	/* various flags (XFS_DQ_*) */
-	struct list_head q_freelist;	/* global free list of dquots */
-	struct list_head q_mplist;	/* mount's list of dquots */
-	struct list_head q_hashlist;	/* gloabl hash list of dquots */
-	xfs_dqhash_t	*q_hash;	/* the hashchain header */
-	struct xfs_mount*q_mount;	/* filesystem this relates to */
-	struct xfs_trans*q_transp;	/* trans this belongs to currently */
-	uint		 q_nrefs;	/* # active refs from inodes */
-	xfs_daddr_t	 q_blkno;	/* blkno of dquot buffer */
-	int		 q_bufoffset;	/* off of dq in buffer (# dquots) */
-	xfs_fileoff_t	 q_fileoffset;	/* offset in quotas file */
-
-	struct xfs_dquot*q_gdquot;	/* group dquot, hint only */
-	xfs_disk_dquot_t q_core;	/* actual usage & quotas */
-	xfs_dq_logitem_t q_logitem;	/* dquot log item */
-	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */
-	xfs_qcnt_t	 q_res_icount;	/* total inos allocd+reserved */
-	xfs_qcnt_t	 q_res_rtbcount;/* total realtime blks used+reserved */
-	struct mutex	 q_qlock;	/* quota lock */
-	struct completion q_flush;	/* flush completion queue */
-	atomic_t          q_pincount;	/* dquot pin count */
-	wait_queue_head_t q_pinwait;	/* dquot pinning wait queue */
-} xfs_dquot_t;
-
-/*
- * Lock hierarchy for q_qlock:
- *	XFS_QLOCK_NORMAL is the implicit default,
- * 	XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
- */
-enum {
-	XFS_QLOCK_NORMAL = 0,
-	XFS_QLOCK_NESTED,
-};
-
-#define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++)
-
-/*
- * Manage the q_flush completion queue embedded in the dquot.  This completion
- * queue synchronizes processes attempting to flush the in-core dquot back to
- * disk.
- */
-static inline void xfs_dqflock(xfs_dquot_t *dqp)
-{
-	wait_for_completion(&dqp->q_flush);
-}
-
-static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
-{
-	return try_wait_for_completion(&dqp->q_flush);
-}
-
-static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
-{
-	complete(&dqp->q_flush);
-}
-
-#define XFS_DQ_IS_LOCKED(dqp)	(mutex_is_locked(&((dqp)->q_qlock)))
-#define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)
-#define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)
-#define XFS_QM_ISPDQ(dqp)	((dqp)->dq_flags & XFS_DQ_PROJ)
-#define XFS_QM_ISGDQ(dqp)	((dqp)->dq_flags & XFS_DQ_GROUP)
-#define XFS_DQ_TO_QINF(dqp)	((dqp)->q_mount->m_quotainfo)
-#define XFS_DQ_TO_QIP(dqp)	(XFS_QM_ISUDQ(dqp) ? \
-				 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
-				 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
-
-#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
-				     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
-				     (XFS_IS_OQUOTA_ON((d)->q_mount))))
-
-extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
-extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int		xfs_qm_dqpurge(xfs_dquot_t *);
-extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int		xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern void		xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
-extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
-					xfs_disk_dquot_t *);
-extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
-					xfs_disk_dquot_t *);
-extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
-					xfs_dqid_t, uint, uint, xfs_dquot_t **);
-extern void		xfs_qm_dqput(xfs_dquot_t *);
-extern void		xfs_dqlock(xfs_dquot_t *);
-extern void		xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
-extern void		xfs_dqunlock(xfs_dquot_t *);
-extern void		xfs_dqunlock_nonotify(xfs_dquot_t *);
-
-#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
deleted file mode 100644
index 9e0e2fa..0000000
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
-{
-	return container_of(lip, struct xfs_dq_logitem, qli_item);
-}
-
-/*
- * returns the number of iovecs needed to log the given dquot item.
- */
-STATIC uint
-xfs_qm_dquot_logitem_size(
-	struct xfs_log_item	*lip)
-{
-	/*
-	 * we need only two iovecs, one for the format, one for the real thing
-	 */
-	return 2;
-}
-
-/*
- * fills in the vector of log iovecs for the given dquot log item.
- */
-STATIC void
-xfs_qm_dquot_logitem_format(
-	struct xfs_log_item	*lip,
-	struct xfs_log_iovec	*logvec)
-{
-	struct xfs_dq_logitem	*qlip = DQUOT_ITEM(lip);
-
-	logvec->i_addr = &qlip->qli_format;
-	logvec->i_len  = sizeof(xfs_dq_logformat_t);
-	logvec->i_type = XLOG_REG_TYPE_QFORMAT;
-	logvec++;
-	logvec->i_addr = &qlip->qli_dquot->q_core;
-	logvec->i_len  = sizeof(xfs_disk_dquot_t);
-	logvec->i_type = XLOG_REG_TYPE_DQUOT;
-
-	ASSERT(2 == lip->li_desc->lid_size);
-	qlip->qli_format.qlf_size = 2;
-
-}
-
-/*
- * Increment the pin count of the given dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_pin(
-	struct xfs_log_item	*lip)
-{
-	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	atomic_inc(&dqp->q_pincount);
-}
-
-/*
- * Decrement the pin count of the given dquot, and wake up
- * anyone in xfs_dqwait_unpin() if the count goes to 0.	 The
- * dquot must have been previously pinned with a call to
- * xfs_qm_dquot_logitem_pin().
- */
-STATIC void
-xfs_qm_dquot_logitem_unpin(
-	struct xfs_log_item	*lip,
-	int			remove)
-{
-	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-	ASSERT(atomic_read(&dqp->q_pincount) > 0);
-	if (atomic_dec_and_test(&dqp->q_pincount))
-		wake_up(&dqp->q_pinwait);
-}
-
-/*
- * Given the logitem, this writes the corresponding dquot entry to disk
- * asynchronously. This is called with the dquot entry securely locked;
- * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
- * at the end.
- */
-STATIC void
-xfs_qm_dquot_logitem_push(
-	struct xfs_log_item	*lip)
-{
-	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
-	int			error;
-
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	ASSERT(!completion_done(&dqp->q_flush));
-
-	/*
-	 * Since we were able to lock the dquot's flush lock and
-	 * we found it on the AIL, the dquot must be dirty.  This
-	 * is because the dquot is removed from the AIL while still
-	 * holding the flush lock in xfs_dqflush_done().  Thus, if
-	 * we found it in the AIL and were able to obtain the flush
-	 * lock without sleeping, then there must not have been
-	 * anyone in the process of flushing the dquot.
-	 */
-	error = xfs_qm_dqflush(dqp, 0);
-	if (error)
-		xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
-			__func__, error, dqp);
-	xfs_dqunlock(dqp);
-}
-
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
-	struct xfs_log_item	*lip,
-	xfs_lsn_t		lsn)
-{
-	/*
-	 * We always re-log the entire dquot when it becomes dirty,
-	 * so, the latest copy _is_ the only one that matters.
-	 */
-	return lsn;
-}
-
-/*
- * This is called to wait for the given dquot to be unpinned.
- * Most of these pin/unpin routines are plagiarized from inode code.
- */
-void
-xfs_qm_dqunpin_wait(
-	struct xfs_dquot	*dqp)
-{
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	if (atomic_read(&dqp->q_pincount) == 0)
-		return;
-
-	/*
-	 * Give the log a push so we don't wait here too long.
-	 */
-	xfs_log_force(dqp->q_mount, 0);
-	wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
-}
-
-/*
- * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
- * the dquot is locked by us, but the flush lock isn't. So, here we are
- * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
- * If so, we want to push it out to help us take this item off the AIL as soon
- * as possible.
- *
- * We must not be holding the AIL lock at this point. Calling incore() to
- * search the buffer cache can be a time consuming thing, and AIL lock is a
- * spinlock.
- */
-STATIC void
-xfs_qm_dquot_logitem_pushbuf(
-	struct xfs_log_item	*lip)
-{
-	struct xfs_dq_logitem	*qlip = DQUOT_ITEM(lip);
-	struct xfs_dquot	*dqp = qlip->qli_dquot;
-	struct xfs_buf		*bp;
-
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-	/*
-	 * If flushlock isn't locked anymore, chances are that the
-	 * inode flush completed and the inode was taken off the AIL.
-	 * So, just get out.
-	 */
-	if (completion_done(&dqp->q_flush) ||
-	    !(lip->li_flags & XFS_LI_IN_AIL)) {
-		xfs_dqunlock(dqp);
-		return;
-	}
-
-	bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
-			dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
-	xfs_dqunlock(dqp);
-	if (!bp)
-		return;
-	if (XFS_BUF_ISDELAYWRITE(bp))
-		xfs_buf_delwri_promote(bp);
-	xfs_buf_relse(bp);
-}
-
-/*
- * This is called to attempt to lock the dquot associated with this
- * dquot log item.  Don't sleep on the dquot lock or the flush lock.
- * If the flush lock is already held, indicating that the dquot has
- * been or is in the process of being flushed, then see if we can
- * find the dquot's buffer in the buffer cache without sleeping.  If
- * we can and it is marked delayed write, then we want to send it out.
- * We delay doing so until the push routine, though, to avoid sleeping
- * in any device strategy routines.
- */
-STATIC uint
-xfs_qm_dquot_logitem_trylock(
-	struct xfs_log_item	*lip)
-{
-	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-	if (atomic_read(&dqp->q_pincount) > 0)
-		return XFS_ITEM_PINNED;
-
-	if (!xfs_qm_dqlock_nowait(dqp))
-		return XFS_ITEM_LOCKED;
-
-	if (!xfs_dqflock_nowait(dqp)) {
-		/*
-		 * dquot has already been flushed to the backing buffer,
-		 * leave it locked, pushbuf routine will unlock it.
-		 */
-		return XFS_ITEM_PUSHBUF;
-	}
-
-	ASSERT(lip->li_flags & XFS_LI_IN_AIL);
-	return XFS_ITEM_SUCCESS;
-}
-
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction.  If the
- * hold flags is set, do not unlock the dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_unlock(
-	struct xfs_log_item	*lip)
-{
-	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-	/*
-	 * Clear the transaction pointer in the dquot
-	 */
-	dqp->q_transp = NULL;
-
-	/*
-	 * dquots are never 'held' from getting unlocked at the end of
-	 * a transaction.  Their locking and unlocking is hidden inside the
-	 * transaction layer, within trans_commit. Hence, no LI_HOLD flag
-	 * for the logitem.
-	 */
-	xfs_dqunlock(dqp);
-}
-
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
-STATIC void
-xfs_qm_dquot_logitem_committing(
-	struct xfs_log_item	*lip,
-	xfs_lsn_t		lsn)
-{
-}
-
-/*
- * This is the ops vector for dquots
- */
-static struct xfs_item_ops xfs_dquot_item_ops = {
-	.iop_size	= xfs_qm_dquot_logitem_size,
-	.iop_format	= xfs_qm_dquot_logitem_format,
-	.iop_pin	= xfs_qm_dquot_logitem_pin,
-	.iop_unpin	= xfs_qm_dquot_logitem_unpin,
-	.iop_trylock	= xfs_qm_dquot_logitem_trylock,
-	.iop_unlock	= xfs_qm_dquot_logitem_unlock,
-	.iop_committed	= xfs_qm_dquot_logitem_committed,
-	.iop_push	= xfs_qm_dquot_logitem_push,
-	.iop_pushbuf	= xfs_qm_dquot_logitem_pushbuf,
-	.iop_committing = xfs_qm_dquot_logitem_committing
-};
-
-/*
- * Initialize the dquot log item for a newly allocated dquot.
- * The dquot isn't locked at this point, but it isn't on any of the lists
- * either, so we don't care.
- */
-void
-xfs_qm_dquot_logitem_init(
-	struct xfs_dquot	*dqp)
-{
-	struct xfs_dq_logitem	*lp = &dqp->q_logitem;
-
-	xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
-					&xfs_dquot_item_ops);
-	lp->qli_dquot = dqp;
-	lp->qli_format.qlf_type = XFS_LI_DQUOT;
-	lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
-	lp->qli_format.qlf_blkno = dqp->q_blkno;
-	lp->qli_format.qlf_len = 1;
-	/*
-	 * This is just the offset of this dquot within its buffer
-	 * (which is currently 1 FSB and probably won't change).
-	 * Hence 32 bits for this offset should be just fine.
-	 * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
-	 * here, and recompute it at recovery time.
-	 */
-	lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
-}
-
-/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
-
-static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
-{
-	return container_of(lip, struct xfs_qoff_logitem, qql_item);
-}
-
-
-/*
- * This returns the number of iovecs needed to log the given quotaoff item.
- * We only need 1 iovec for an quotaoff item.  It just logs the
- * quotaoff_log_format structure.
- */
-STATIC uint
-xfs_qm_qoff_logitem_size(
-	struct xfs_log_item	*lip)
-{
-	return 1;
-}
-
-/*
- * This is called to fill in the vector of log iovecs for the
- * given quotaoff log item. We use only 1 iovec, and we point that
- * at the quotaoff_log_format structure embedded in the quotaoff item.
- * It is at this point that we assert that all of the extent
- * slots in the quotaoff item have been filled.
- */
-STATIC void
-xfs_qm_qoff_logitem_format(
-	struct xfs_log_item	*lip,
-	struct xfs_log_iovec	*log_vector)
-{
-	struct xfs_qoff_logitem	*qflip = QOFF_ITEM(lip);
-
-	ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
-
-	log_vector->i_addr = &qflip->qql_format;
-	log_vector->i_len = sizeof(xfs_qoff_logitem_t);
-	log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
-	qflip->qql_format.qf_size = 1;
-}
-
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
-	struct xfs_log_item	*lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
-	struct xfs_log_item	*lip,
-	int			remove)
-{
-}
-
-/*
- * Quotaoff items have no locking, so just return success.
- */
-STATIC uint
-xfs_qm_qoff_logitem_trylock(
-	struct xfs_log_item	*lip)
-{
-	return XFS_ITEM_LOCKED;
-}
-
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
-	struct xfs_log_item	*lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
-	struct xfs_log_item	*lip,
-	xfs_lsn_t		lsn)
-{
-	return lsn;
-}
-
-/*
- * There isn't much you can do to push on an quotaoff item.  It is simply
- * stuck waiting for the log to be flushed to disk.
- */
-STATIC void
-xfs_qm_qoff_logitem_push(
-	struct xfs_log_item	*lip)
-{
-}
-
-
-STATIC xfs_lsn_t
-xfs_qm_qoffend_logitem_committed(
-	struct xfs_log_item	*lip,
-	xfs_lsn_t		lsn)
-{
-	struct xfs_qoff_logitem	*qfe = QOFF_ITEM(lip);
-	struct xfs_qoff_logitem	*qfs = qfe->qql_start_lip;
-	struct xfs_ail		*ailp = qfs->qql_item.li_ailp;
-
-	/*
-	 * Delete the qoff-start logitem from the AIL.
-	 * xfs_trans_ail_delete() drops the AIL lock.
-	 */
-	spin_lock(&ailp->xa_lock);
-	xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
-
-	kmem_free(qfs);
-	kmem_free(qfe);
-	return (xfs_lsn_t)-1;
-}
-
-/*
- * XXX rcc - don't know quite what to do with this.  I think we can
- * just ignore it.  The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen?  Also, do we need different routines for
- * quotaoff start and quotaoff end?  I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable).  If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
-	struct xfs_log_item	*lip,
-	xfs_lsn_t		commit_lsn)
-{
-}
-
-static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
-	.iop_size	= xfs_qm_qoff_logitem_size,
-	.iop_format	= xfs_qm_qoff_logitem_format,
-	.iop_pin	= xfs_qm_qoff_logitem_pin,
-	.iop_unpin	= xfs_qm_qoff_logitem_unpin,
-	.iop_trylock	= xfs_qm_qoff_logitem_trylock,
-	.iop_unlock	= xfs_qm_qoff_logitem_unlock,
-	.iop_committed	= xfs_qm_qoffend_logitem_committed,
-	.iop_push	= xfs_qm_qoff_logitem_push,
-	.iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
-static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
-	.iop_size	= xfs_qm_qoff_logitem_size,
-	.iop_format	= xfs_qm_qoff_logitem_format,
-	.iop_pin	= xfs_qm_qoff_logitem_pin,
-	.iop_unpin	= xfs_qm_qoff_logitem_unpin,
-	.iop_trylock	= xfs_qm_qoff_logitem_trylock,
-	.iop_unlock	= xfs_qm_qoff_logitem_unlock,
-	.iop_committed	= xfs_qm_qoff_logitem_committed,
-	.iop_push	= xfs_qm_qoff_logitem_push,
-	.iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * Allocate and initialize an quotaoff item of the correct quota type(s).
- */
-struct xfs_qoff_logitem *
-xfs_qm_qoff_logitem_init(
-	struct xfs_mount	*mp,
-	struct xfs_qoff_logitem	*start,
-	uint			flags)
-{
-	struct xfs_qoff_logitem	*qf;
-
-	qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
-
-	xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
-			&xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
-	qf->qql_item.li_mountp = mp;
-	qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
-	qf->qql_format.qf_flags = flags;
-	qf->qql_start_lip = start;
-	return qf;
-}
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
deleted file mode 100644
index 5acae2a..0000000
--- a/fs/xfs/quota/xfs_dquot_item.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DQUOT_ITEM_H__
-#define __XFS_DQUOT_ITEM_H__
-
-struct xfs_dquot;
-struct xfs_trans;
-struct xfs_mount;
-struct xfs_qoff_logitem;
-
-typedef struct xfs_dq_logitem {
-	xfs_log_item_t		 qli_item;	   /* common portion */
-	struct xfs_dquot	*qli_dquot;	   /* dquot ptr */
-	xfs_lsn_t		 qli_flush_lsn;	   /* lsn at last flush */
-	xfs_dq_logformat_t	 qli_format;	   /* logged structure */
-} xfs_dq_logitem_t;
-
-typedef struct xfs_qoff_logitem {
-	xfs_log_item_t		 qql_item;	/* common portion */
-	struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
-	xfs_qoff_logformat_t	 qql_format;	/* logged structure */
-} xfs_qoff_logitem_t;
-
-
-extern void		   xfs_qm_dquot_logitem_init(struct xfs_dquot *);
-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
-					struct xfs_qoff_logitem *, uint);
-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
-					struct xfs_qoff_logitem *, uint);
-extern void		   xfs_trans_log_quotaoff_item(struct xfs_trans *,
-					struct xfs_qoff_logitem *);
-
-#endif	/* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
deleted file mode 100644
index 9a0aa76..0000000
--- a/fs/xfs/quota/xfs_qm.c
+++ /dev/null
@@ -1,2416 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-/*
- * The global quota manager. There is only one of these for the entire
- * system, _not_ one per file system. XQM keeps track of the overall
- * quota functionality, including maintaining the freelist and hash
- * tables of dquots.
- */
-struct mutex	xfs_Gqm_lock;
-struct xfs_qm	*xfs_Gqm;
-uint		ndquot;
-
-kmem_zone_t	*qm_dqzone;
-kmem_zone_t	*qm_dqtrxzone;
-
-STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
-
-STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
-STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
-
-static struct shrinker xfs_qm_shaker = {
-	.shrink = xfs_qm_shake,
-	.seeks = DEFAULT_SEEKS,
-};
-
-/*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
- */
-STATIC struct xfs_qm *
-xfs_Gqm_init(void)
-{
-	xfs_dqhash_t	*udqhash, *gdqhash;
-	xfs_qm_t	*xqm;
-	size_t		hsize;
-	uint		i;
-
-	/*
-	 * Initialize the dquot hash tables.
-	 */
-	udqhash = kmem_zalloc_greedy(&hsize,
-				     XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
-				     XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
-	if (!udqhash)
-		goto out;
-
-	gdqhash = kmem_zalloc_large(hsize);
-	if (!gdqhash)
-		goto out_free_udqhash;
-
-	hsize /= sizeof(xfs_dqhash_t);
-	ndquot = hsize << 8;
-
-	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-	xqm->qm_dqhashmask = hsize - 1;
-	xqm->qm_usr_dqhtable = udqhash;
-	xqm->qm_grp_dqhtable = gdqhash;
-	ASSERT(xqm->qm_usr_dqhtable != NULL);
-	ASSERT(xqm->qm_grp_dqhtable != NULL);
-
-	for (i = 0; i < hsize; i++) {
-		xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
-		xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
-	}
-
-	/*
-	 * Freelist of all dquots of all file systems
-	 */
-	INIT_LIST_HEAD(&xqm->qm_dqfrlist);
-	xqm->qm_dqfrlist_cnt = 0;
-	mutex_init(&xqm->qm_dqfrlist_lock);
-
-	/*
-	 * dquot zone. we register our own low-memory callback.
-	 */
-	if (!qm_dqzone) {
-		xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
-						"xfs_dquots");
-		qm_dqzone = xqm->qm_dqzone;
-	} else
-		xqm->qm_dqzone = qm_dqzone;
-
-	register_shrinker(&xfs_qm_shaker);
-
-	/*
-	 * The t_dqinfo portion of transactions.
-	 */
-	if (!qm_dqtrxzone) {
-		xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
-						   "xfs_dqtrx");
-		qm_dqtrxzone = xqm->qm_dqtrxzone;
-	} else
-		xqm->qm_dqtrxzone = qm_dqtrxzone;
-
-	atomic_set(&xqm->qm_totaldquots, 0);
-	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
-	xqm->qm_nrefs = 0;
-	return xqm;
-
- out_free_udqhash:
-	kmem_free_large(udqhash);
- out:
-	return NULL;
-}
-
-/*
- * Destroy the global quota manager when its reference count goes to zero.
- */
-STATIC void
-xfs_qm_destroy(
-	struct xfs_qm	*xqm)
-{
-	struct xfs_dquot *dqp, *n;
-	int		hsize, i;
-
-	ASSERT(xqm != NULL);
-	ASSERT(xqm->qm_nrefs == 0);
-	unregister_shrinker(&xfs_qm_shaker);
-	hsize = xqm->qm_dqhashmask + 1;
-	for (i = 0; i < hsize; i++) {
-		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
-		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
-	}
-	kmem_free_large(xqm->qm_usr_dqhtable);
-	kmem_free_large(xqm->qm_grp_dqhtable);
-	xqm->qm_usr_dqhtable = NULL;
-	xqm->qm_grp_dqhtable = NULL;
-	xqm->qm_dqhashmask = 0;
-
-	/* frlist cleanup */
-	mutex_lock(&xqm->qm_dqfrlist_lock);
-	list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
-		xfs_dqlock(dqp);
-		list_del_init(&dqp->q_freelist);
-		xfs_Gqm->qm_dqfrlist_cnt--;
-		xfs_dqunlock(dqp);
-		xfs_qm_dqdestroy(dqp);
-	}
-	mutex_unlock(&xqm->qm_dqfrlist_lock);
-	mutex_destroy(&xqm->qm_dqfrlist_lock);
-	kmem_free(xqm);
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
-	struct xfs_mount *mp)
-{
-	/*
-	 * Need to lock the xfs_Gqm structure for things like this. For example,
-	 * the structure could disappear between the entry to this routine and
-	 * a HOLD operation if not locked.
-	 */
-	mutex_lock(&xfs_Gqm_lock);
-
-	if (!xfs_Gqm) {
-		xfs_Gqm = xfs_Gqm_init();
-		if (!xfs_Gqm) {
-			mutex_unlock(&xfs_Gqm_lock);
-			return ENOMEM;
-		}
-	}
-
-	/*
-	 * We can keep a list of all filesystems with quotas mounted for
-	 * debugging and statistical purposes, but ...
-	 * Just take a reference and get out.
-	 */
-	xfs_Gqm->qm_nrefs++;
-	mutex_unlock(&xfs_Gqm_lock);
-
-	return 0;
-}
-
-
-/*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
-	struct xfs_mount *mp)
-{
-	xfs_dquot_t	*dqp, *n;
-
-	ASSERT(xfs_Gqm);
-	ASSERT(xfs_Gqm->qm_nrefs > 0);
-
-	/*
-	 * Go thru the freelist and destroy all inactive dquots.
-	 */
-	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
-	list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-		xfs_dqlock(dqp);
-		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-			ASSERT(dqp->q_mount == NULL);
-			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-			ASSERT(list_empty(&dqp->q_hashlist));
-			ASSERT(list_empty(&dqp->q_mplist));
-			list_del_init(&dqp->q_freelist);
-			xfs_Gqm->qm_dqfrlist_cnt--;
-			xfs_dqunlock(dqp);
-			xfs_qm_dqdestroy(dqp);
-		} else {
-			xfs_dqunlock(dqp);
-		}
-	}
-	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-
-	/*
-	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
-	 * be restarted.
-	 */
-	mutex_lock(&xfs_Gqm_lock);
-	if (--xfs_Gqm->qm_nrefs == 0) {
-		xfs_qm_destroy(xfs_Gqm);
-		xfs_Gqm = NULL;
-	}
-	mutex_unlock(&xfs_Gqm_lock);
-}
-
-/*
- * Just destroy the quotainfo structure.
- */
-void
-xfs_qm_unmount(
-	struct xfs_mount	*mp)
-{
-	if (mp->m_quotainfo) {
-		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
-		xfs_qm_destroy_quotainfo(mp);
-	}
-}
-
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo.  This is also responsible for
- * running a quotacheck as necessary.  We are guaranteed that the superblock
- * is consistently read in at this point.
- *
- * If we fail here, the mount will continue with quota turned off. We don't
- * need to inidicate success or failure at all.
- */
-void
-xfs_qm_mount_quotas(
-	xfs_mount_t	*mp)
-{
-	int		error = 0;
-	uint		sbf;
-
-	/*
-	 * If quotas on realtime volumes is not supported, we disable
-	 * quotas immediately.
-	 */
-	if (mp->m_sb.sb_rextents) {
-		xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
-		mp->m_qflags = 0;
-		goto write_changes;
-	}
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	/*
-	 * Allocate the quotainfo structure inside the mount struct, and
-	 * create quotainode(s), and change/rev superblock if necessary.
-	 */
-	error = xfs_qm_init_quotainfo(mp);
-	if (error) {
-		/*
-		 * We must turn off quotas.
-		 */
-		ASSERT(mp->m_quotainfo == NULL);
-		mp->m_qflags = 0;
-		goto write_changes;
-	}
-	/*
-	 * If any of the quotas are not consistent, do a quotacheck.
-	 */
-	if (XFS_QM_NEED_QUOTACHECK(mp)) {
-		error = xfs_qm_quotacheck(mp);
-		if (error) {
-			/* Quotacheck failed and disabled quotas. */
-			return;
-		}
-	}
-	/* 
-	 * If one type of quotas is off, then it will lose its
-	 * quotachecked status, since we won't be doing accounting for
-	 * that type anymore.
-	 */
-	if (!XFS_IS_UQUOTA_ON(mp))
-		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
-	if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
-		mp->m_qflags &= ~XFS_OQUOTA_CHKD;
-
- write_changes:
-	/*
-	 * We actually don't have to acquire the m_sb_lock at all.
-	 * This can only be called from mount, and that's single threaded. XXX
-	 */
-	spin_lock(&mp->m_sb_lock);
-	sbf = mp->m_sb.sb_qflags;
-	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
-	spin_unlock(&mp->m_sb_lock);
-
-	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
-		if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
-			/*
-			 * We could only have been turning quotas off.
-			 * We aren't in very good shape actually because
-			 * the incore structures are convinced that quotas are
-			 * off, but the on disk superblock doesn't know that !
-			 */
-			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
-			xfs_alert(mp, "%s: Superblock update failed!",
-				__func__);
-		}
-	}
-
-	if (error) {
-		xfs_warn(mp, "Failed to initialize disk quotas.");
-		return;
-	}
-}
-
-/*
- * Called from the vfsops layer.
- */
-void
-xfs_qm_unmount_quotas(
-	xfs_mount_t	*mp)
-{
-	/*
-	 * Release the dquots that root inode, et al might be holding,
-	 * before we flush quotas and blow away the quotainfo structure.
-	 */
-	ASSERT(mp->m_rootip);
-	xfs_qm_dqdetach(mp->m_rootip);
-	if (mp->m_rbmip)
-		xfs_qm_dqdetach(mp->m_rbmip);
-	if (mp->m_rsumip)
-		xfs_qm_dqdetach(mp->m_rsumip);
-
-	/*
-	 * Release the quota inodes.
-	 */
-	if (mp->m_quotainfo) {
-		if (mp->m_quotainfo->qi_uquotaip) {
-			IRELE(mp->m_quotainfo->qi_uquotaip);
-			mp->m_quotainfo->qi_uquotaip = NULL;
-		}
-		if (mp->m_quotainfo->qi_gquotaip) {
-			IRELE(mp->m_quotainfo->qi_gquotaip);
-			mp->m_quotainfo->qi_gquotaip = NULL;
-		}
-	}
-}
-
-/*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
- */
-STATIC int
-xfs_qm_dqflush_all(
-	struct xfs_mount	*mp,
-	int			sync_mode)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	int			recl;
-	struct xfs_dquot	*dqp;
-	int			error;
-
-	if (!q)
-		return 0;
-again:
-	mutex_lock(&q->qi_dqlist_lock);
-	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-		xfs_dqlock(dqp);
-		if (! XFS_DQ_IS_DIRTY(dqp)) {
-			xfs_dqunlock(dqp);
-			continue;
-		}
-
-		/* XXX a sentinel would be better */
-		recl = q->qi_dqreclaims;
-		if (!xfs_dqflock_nowait(dqp)) {
-			/*
-			 * If we can't grab the flush lock then check
-			 * to see if the dquot has been flushed delayed
-			 * write.  If so, grab its buffer and send it
-			 * out immediately.  We'll be able to acquire
-			 * the flush lock when the I/O completes.
-			 */
-			xfs_qm_dqflock_pushbuf_wait(dqp);
-		}
-		/*
-		 * Let go of the mplist lock. We don't want to hold it
-		 * across a disk write.
-		 */
-		mutex_unlock(&q->qi_dqlist_lock);
-		error = xfs_qm_dqflush(dqp, sync_mode);
-		xfs_dqunlock(dqp);
-		if (error)
-			return error;
-
-		mutex_lock(&q->qi_dqlist_lock);
-		if (recl != q->qi_dqreclaims) {
-			mutex_unlock(&q->qi_dqlist_lock);
-			/* XXX restart limit */
-			goto again;
-		}
-	}
-
-	mutex_unlock(&q->qi_dqlist_lock);
-	/* return ! busy */
-	return 0;
-}
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
-	struct xfs_mount	*mp)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	struct xfs_dquot	*dqp, *gdqp;
-	int			nrecl;
-
- again:
-	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-		xfs_dqlock(dqp);
-		if ((gdqp = dqp->q_gdquot)) {
-			xfs_dqlock(gdqp);
-			dqp->q_gdquot = NULL;
-		}
-		xfs_dqunlock(dqp);
-
-		if (gdqp) {
-			/*
-			 * Can't hold the mplist lock across a dqput.
-			 * XXXmust convert to marker based iterations here.
-			 */
-			nrecl = q->qi_dqreclaims;
-			mutex_unlock(&q->qi_dqlist_lock);
-			xfs_qm_dqput(gdqp);
-
-			mutex_lock(&q->qi_dqlist_lock);
-			if (nrecl != q->qi_dqreclaims)
-				goto again;
-		}
-	}
-}
-
-/*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
- */
-STATIC int
-xfs_qm_dqpurge_int(
-	struct xfs_mount	*mp,
-	uint			flags)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	struct xfs_dquot	*dqp, *n;
-	uint			dqtype;
-	int			nrecl;
-	int			nmisses;
-
-	if (!q)
-		return 0;
-
-	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
-	dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
-	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
-	mutex_lock(&q->qi_dqlist_lock);
-
-	/*
-	 * In the first pass through all incore dquots of this filesystem,
-	 * we release the group dquot pointers the user dquots may be
-	 * carrying around as a hint. We need to do this irrespective of
-	 * what's being turned off.
-	 */
-	xfs_qm_detach_gdquots(mp);
-
-      again:
-	nmisses = 0;
-	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-	/*
-	 * Try to get rid of all of the unwanted dquots. The idea is to
-	 * get them off mplist and hashlist, but leave them on freelist.
-	 */
-	list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
-		/*
-		 * It's OK to look at the type without taking dqlock here.
-		 * We're holding the mplist lock here, and that's needed for
-		 * a dqreclaim.
-		 */
-		if ((dqp->dq_flags & dqtype) == 0)
-			continue;
-
-		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-			nrecl = q->qi_dqreclaims;
-			mutex_unlock(&q->qi_dqlist_lock);
-			mutex_lock(&dqp->q_hash->qh_lock);
-			mutex_lock(&q->qi_dqlist_lock);
-
-			/*
-			 * XXXTheoretically, we can get into a very long
-			 * ping pong game here.
-			 * No one can be adding dquots to the mplist at
-			 * this point, but somebody might be taking things off.
-			 */
-			if (nrecl != q->qi_dqreclaims) {
-				mutex_unlock(&dqp->q_hash->qh_lock);
-				goto again;
-			}
-		}
-
-		/*
-		 * Take the dquot off the mplist and hashlist. It may remain on
-		 * freelist in INACTIVE state.
-		 */
-		nmisses += xfs_qm_dqpurge(dqp);
-	}
-	mutex_unlock(&q->qi_dqlist_lock);
-	return nmisses;
-}
-
-int
-xfs_qm_dqpurge_all(
-	xfs_mount_t	*mp,
-	uint		flags)
-{
-	int		ndquots;
-
-	/*
-	 * Purge the dquot cache.
-	 * None of the dquots should really be busy at this point.
-	 */
-	if (mp->m_quotainfo) {
-		while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
-			delay(ndquots * 10);
-		}
-	}
-	return 0;
-}
-
-STATIC int
-xfs_qm_dqattach_one(
-	xfs_inode_t	*ip,
-	xfs_dqid_t	id,
-	uint		type,
-	uint		doalloc,
-	xfs_dquot_t	*udqhint, /* hint */
-	xfs_dquot_t	**IO_idqpp)
-{
-	xfs_dquot_t	*dqp;
-	int		error;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	error = 0;
-
-	/*
-	 * See if we already have it in the inode itself. IO_idqpp is
-	 * &i_udquot or &i_gdquot. This made the code look weird, but
-	 * made the logic a lot simpler.
-	 */
-	dqp = *IO_idqpp;
-	if (dqp) {
-		trace_xfs_dqattach_found(dqp);
-		return 0;
-	}
-
-	/*
-	 * udqhint is the i_udquot field in inode, and is non-NULL only
-	 * when the type arg is group/project. Its purpose is to save a
-	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
-	 * the user dquot.
-	 */
-	if (udqhint) {
-		ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
-		xfs_dqlock(udqhint);
-
-		/*
-		 * No need to take dqlock to look at the id.
-		 *
-		 * The ID can't change until it gets reclaimed, and it won't
-		 * be reclaimed as long as we have a ref from inode and we
-		 * hold the ilock.
-		 */
-		dqp = udqhint->q_gdquot;
-		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
-			xfs_dqlock(dqp);
-			XFS_DQHOLD(dqp);
-			ASSERT(*IO_idqpp == NULL);
-			*IO_idqpp = dqp;
-
-			xfs_dqunlock(dqp);
-			xfs_dqunlock(udqhint);
-			return 0;
-		}
-
-		/*
-		 * We can't hold a dquot lock when we call the dqget code.
-		 * We'll deadlock in no time, because of (not conforming to)
-		 * lock ordering - the inodelock comes before any dquot lock,
-		 * and we may drop and reacquire the ilock in xfs_qm_dqget().
-		 */
-		xfs_dqunlock(udqhint);
-	}
-
-	/*
-	 * Find the dquot from somewhere. This bumps the
-	 * reference count of dquot and returns it locked.
-	 * This can return ENOENT if dquot didn't exist on
-	 * disk and we didn't ask it to allocate;
-	 * ESRCH if quotas got turned off suddenly.
-	 */
-	error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
-	if (error)
-		return error;
-
-	trace_xfs_dqattach_get(dqp);
-
-	/*
-	 * dqget may have dropped and re-acquired the ilock, but it guarantees
-	 * that the dquot returned is the one that should go in the inode.
-	 */
-	*IO_idqpp = dqp;
-	xfs_dqunlock(dqp);
-	return 0;
-}
-
-
-/*
- * Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering constraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
- */
-STATIC void
-xfs_qm_dqattach_grouphint(
-	xfs_dquot_t	*udq,
-	xfs_dquot_t	*gdq)
-{
-	xfs_dquot_t	*tmp;
-
-	xfs_dqlock(udq);
-
-	if ((tmp = udq->q_gdquot)) {
-		if (tmp == gdq) {
-			xfs_dqunlock(udq);
-			return;
-		}
-
-		udq->q_gdquot = NULL;
-		/*
-		 * We can't keep any dqlocks when calling dqrele,
-		 * because the freelist lock comes before dqlocks.
-		 */
-		xfs_dqunlock(udq);
-		/*
-		 * we took a hard reference once upon a time in dqget,
-		 * so give it back when the udquot no longer points at it
-		 * dqput() does the unlocking of the dquot.
-		 */
-		xfs_qm_dqrele(tmp);
-
-		xfs_dqlock(udq);
-		xfs_dqlock(gdq);
-
-	} else {
-		ASSERT(XFS_DQ_IS_LOCKED(udq));
-		xfs_dqlock(gdq);
-	}
-
-	ASSERT(XFS_DQ_IS_LOCKED(udq));
-	ASSERT(XFS_DQ_IS_LOCKED(gdq));
-	/*
-	 * Somebody could have attached a gdquot here,
-	 * when we dropped the uqlock. If so, just do nothing.
-	 */
-	if (udq->q_gdquot == NULL) {
-		XFS_DQHOLD(gdq);
-		udq->q_gdquot = gdq;
-	}
-
-	xfs_dqunlock(gdq);
-	xfs_dqunlock(udq);
-}
-
-
-/*
- * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
- * into account.
- * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * Inode may get unlocked and relocked in here, and the caller must deal with
- * the consequences.
- */
-int
-xfs_qm_dqattach_locked(
-	xfs_inode_t	*ip,
-	uint		flags)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	uint		nquotas = 0;
-	int		error = 0;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) ||
-	    !XFS_IS_QUOTA_ON(mp) ||
-	    !XFS_NOT_DQATTACHED(mp, ip) ||
-	    ip->i_ino == mp->m_sb.sb_uquotino ||
-	    ip->i_ino == mp->m_sb.sb_gquotino)
-		return 0;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-	if (XFS_IS_UQUOTA_ON(mp)) {
-		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
-						flags & XFS_QMOPT_DQALLOC,
-						NULL, &ip->i_udquot);
-		if (error)
-			goto done;
-		nquotas++;
-	}
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	if (XFS_IS_OQUOTA_ON(mp)) {
-		error = XFS_IS_GQUOTA_ON(mp) ?
-			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-						flags & XFS_QMOPT_DQALLOC,
-						ip->i_udquot, &ip->i_gdquot) :
-			xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
-						flags & XFS_QMOPT_DQALLOC,
-						ip->i_udquot, &ip->i_gdquot);
-		/*
-		 * Don't worry about the udquot that we may have
-		 * attached above. It'll get detached, if not already.
-		 */
-		if (error)
-			goto done;
-		nquotas++;
-	}
-
-	/*
-	 * Attach this group quota to the user quota as a hint.
-	 * This WON'T, in general, result in a thrash.
-	 */
-	if (nquotas == 2) {
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-		ASSERT(ip->i_udquot);
-		ASSERT(ip->i_gdquot);
-
-		/*
-		 * We may or may not have the i_udquot locked at this point,
-		 * but this check is OK since we don't depend on the i_gdquot to
-		 * be accurate 100% all the time. It is just a hint, and this
-		 * will succeed in general.
-		 */
-		if (ip->i_udquot->q_gdquot == ip->i_gdquot)
-			goto done;
-		/*
-		 * Attach i_gdquot to the gdquot hint inside the i_udquot.
-		 */
-		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
-	}
-
- done:
-#ifdef DEBUG
-	if (!error) {
-		if (XFS_IS_UQUOTA_ON(mp))
-			ASSERT(ip->i_udquot);
-		if (XFS_IS_OQUOTA_ON(mp))
-			ASSERT(ip->i_gdquot);
-	}
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
-	return error;
-}
-
-int
-xfs_qm_dqattach(
-	struct xfs_inode	*ip,
-	uint			flags)
-{
-	int			error;
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = xfs_qm_dqattach_locked(ip, flags);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	return error;
-}
-
-/*
- * Release dquots (and their references) if any.
- * The inode should be locked EXCL except when this's called by
- * xfs_ireclaim.
- */
-void
-xfs_qm_dqdetach(
-	xfs_inode_t	*ip)
-{
-	if (!(ip->i_udquot || ip->i_gdquot))
-		return;
-
-	trace_xfs_dquot_dqdetach(ip);
-
-	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
-	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
-	if (ip->i_udquot) {
-		xfs_qm_dqrele(ip->i_udquot);
-		ip->i_udquot = NULL;
-	}
-	if (ip->i_gdquot) {
-		xfs_qm_dqrele(ip->i_gdquot);
-		ip->i_gdquot = NULL;
-	}
-}
-
-int
-xfs_qm_sync(
-	struct xfs_mount	*mp,
-	int			flags)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	int			recl, restarts;
-	struct xfs_dquot	*dqp;
-	int			error;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-		return 0;
-
-	restarts = 0;
-
-  again:
-	mutex_lock(&q->qi_dqlist_lock);
-	/*
-	 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
-	 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
-	 * when we have the mplist lock, we know that dquots will be consistent
-	 * as long as we have it locked.
-	 */
-	if (!XFS_IS_QUOTA_ON(mp)) {
-		mutex_unlock(&q->qi_dqlist_lock);
-		return 0;
-	}
-	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-		/*
-		 * If this is vfs_sync calling, then skip the dquots that
-		 * don't 'seem' to be dirty. ie. don't acquire dqlock.
-		 * This is very similar to what xfs_sync does with inodes.
-		 */
-		if (flags & SYNC_TRYLOCK) {
-			if (!XFS_DQ_IS_DIRTY(dqp))
-				continue;
-			if (!xfs_qm_dqlock_nowait(dqp))
-				continue;
-		} else {
-			xfs_dqlock(dqp);
-		}
-
-		/*
-		 * Now, find out for sure if this dquot is dirty or not.
-		 */
-		if (! XFS_DQ_IS_DIRTY(dqp)) {
-			xfs_dqunlock(dqp);
-			continue;
-		}
-
-		/* XXX a sentinel would be better */
-		recl = q->qi_dqreclaims;
-		if (!xfs_dqflock_nowait(dqp)) {
-			if (flags & SYNC_TRYLOCK) {
-				xfs_dqunlock(dqp);
-				continue;
-			}
-			/*
-			 * If we can't grab the flush lock then if the caller
-			 * really wanted us to give this our best shot, so
-			 * see if we can give a push to the buffer before we wait
-			 * on the flush lock. At this point, we know that
-			 * even though the dquot is being flushed,
-			 * it has (new) dirty data.
-			 */
-			xfs_qm_dqflock_pushbuf_wait(dqp);
-		}
-		/*
-		 * Let go of the mplist lock. We don't want to hold it
-		 * across a disk write
-		 */
-		mutex_unlock(&q->qi_dqlist_lock);
-		error = xfs_qm_dqflush(dqp, flags);
-		xfs_dqunlock(dqp);
-		if (error && XFS_FORCED_SHUTDOWN(mp))
-			return 0;	/* Need to prevent umount failure */
-		else if (error)
-			return error;
-
-		mutex_lock(&q->qi_dqlist_lock);
-		if (recl != q->qi_dqreclaims) {
-			if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
-				break;
-
-			mutex_unlock(&q->qi_dqlist_lock);
-			goto again;
-		}
-	}
-
-	mutex_unlock(&q->qi_dqlist_lock);
-	return 0;
-}
-
-/*
- * The hash chains and the mplist use the same xfs_dqhash structure as
- * their list head, but we can take the mplist qh_lock and one of the
- * hash qh_locks at the same time without any problem as they aren't
- * related.
- */
-static struct lock_class_key xfs_quota_mplist_class;
-
-/*
- * This initializes all the quota information that's kept in the
- * mount structure
- */
-STATIC int
-xfs_qm_init_quotainfo(
-	xfs_mount_t	*mp)
-{
-	xfs_quotainfo_t *qinf;
-	int		error;
-	xfs_dquot_t	*dqp;
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	/*
-	 * Tell XQM that we exist as soon as possible.
-	 */
-	if ((error = xfs_qm_hold_quotafs_ref(mp))) {
-		return error;
-	}
-
-	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
-
-	/*
-	 * See if quotainodes are setup, and if not, allocate them,
-	 * and change the superblock accordingly.
-	 */
-	if ((error = xfs_qm_init_quotainos(mp))) {
-		kmem_free(qinf);
-		mp->m_quotainfo = NULL;
-		return error;
-	}
-
-	INIT_LIST_HEAD(&qinf->qi_dqlist);
-	mutex_init(&qinf->qi_dqlist_lock);
-	lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
-
-	qinf->qi_dqreclaims = 0;
-
-	/* mutex used to serialize quotaoffs */
-	mutex_init(&qinf->qi_quotaofflock);
-
-	/* Precalc some constants */
-	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-	ASSERT(qinf->qi_dqchunklen);
-	qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
-	do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
-
-	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
-
-	/*
-	 * We try to get the limits from the superuser's limits fields.
-	 * This is quite hacky, but it is standard quota practice.
-	 * We look at the USR dquot with id == 0 first, but if user quotas
-	 * are not enabled we goto the GRP dquot with id == 0.
-	 * We don't really care to keep separate default limits for user
-	 * and group quotas, at least not at this point.
-	 */
-	error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
-			     XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
-			     (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
-				XFS_DQ_PROJ),
-			     XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
-			     &dqp);
-	if (! error) {
-		xfs_disk_dquot_t	*ddqp = &dqp->q_core;
-
-		/*
-		 * The warnings and timers set the grace period given to
-		 * a user or group before he or she can not perform any
-		 * more writing. If it is zero, a default is used.
-		 */
-		qinf->qi_btimelimit = ddqp->d_btimer ?
-			be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
-		qinf->qi_itimelimit = ddqp->d_itimer ?
-			be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
-		qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
-			be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
-		qinf->qi_bwarnlimit = ddqp->d_bwarns ?
-			be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
-		qinf->qi_iwarnlimit = ddqp->d_iwarns ?
-			be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
-		qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
-			be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
-		qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
-		qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
-		qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
-		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
-		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
-		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
- 
-		/*
-		 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
-		 * we don't want this dquot cached. We haven't done a
-		 * quotacheck yet, and quotacheck doesn't like incore dquots.
-		 */
-		xfs_qm_dqdestroy(dqp);
-	} else {
-		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
-		qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
-		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
-		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
-		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
-		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
-	}
-
-	return 0;
-}
-
-
-/*
- * Gets called when unmounting a filesystem or when all quotas get
- * turned off.
- * This purges the quota inodes, destroys locks and frees itself.
- */
-void
-xfs_qm_destroy_quotainfo(
-	xfs_mount_t	*mp)
-{
-	xfs_quotainfo_t *qi;
-
-	qi = mp->m_quotainfo;
-	ASSERT(qi != NULL);
-	ASSERT(xfs_Gqm != NULL);
-
-	/*
-	 * Release the reference that XQM kept, so that we know
-	 * when the XQM structure should be freed. We cannot assume
-	 * that xfs_Gqm is non-null after this point.
-	 */
-	xfs_qm_rele_quotafs_ref(mp);
-
-	ASSERT(list_empty(&qi->qi_dqlist));
-	mutex_destroy(&qi->qi_dqlist_lock);
-
-	if (qi->qi_uquotaip) {
-		IRELE(qi->qi_uquotaip);
-		qi->qi_uquotaip = NULL; /* paranoia */
-	}
-	if (qi->qi_gquotaip) {
-		IRELE(qi->qi_gquotaip);
-		qi->qi_gquotaip = NULL;
-	}
-	mutex_destroy(&qi->qi_quotaofflock);
-	kmem_free(qi);
-	mp->m_quotainfo = NULL;
-}
-
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
-	xfs_dqlist_t	*list,
-	char		*str,
-	int		n)
-{
-	mutex_init(&list->qh_lock);
-	INIT_LIST_HEAD(&list->qh_list);
-	list->qh_version = 0;
-	list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
-	xfs_dqlist_t	*list)
-{
-	mutex_destroy(&(list->qh_lock));
-}
-
-/*
- * Create an inode and return with a reference already taken, but unlocked
- * This is how we create quota inodes
- */
-STATIC int
-xfs_qm_qino_alloc(
-	xfs_mount_t	*mp,
-	xfs_inode_t	**ip,
-	__int64_t	sbfields,
-	uint		flags)
-{
-	xfs_trans_t	*tp;
-	int		error;
-	int		committed;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
-	if ((error = xfs_trans_reserve(tp,
-				      XFS_QM_QINOCREATE_SPACE_RES(mp),
-				      XFS_CREATE_LOG_RES(mp), 0,
-				      XFS_TRANS_PERM_LOG_RES,
-				      XFS_CREATE_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		return error;
-	}
-
-	error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
-	if (error) {
-		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-				 XFS_TRANS_ABORT);
-		return error;
-	}
-
-	/*
-	 * Make the changes in the superblock, and log those too.
-	 * sbfields arg may contain fields other than *QUOTINO;
-	 * VERSIONNUM for example.
-	 */
-	spin_lock(&mp->m_sb_lock);
-	if (flags & XFS_QMOPT_SBVERSION) {
-		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
-		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
-		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
-
-		xfs_sb_version_addquota(&mp->m_sb);
-		mp->m_sb.sb_uquotino = NULLFSINO;
-		mp->m_sb.sb_gquotino = NULLFSINO;
-
-		/* qflags will get updated _after_ quotacheck */
-		mp->m_sb.sb_qflags = 0;
-	}
-	if (flags & XFS_QMOPT_UQUOTA)
-		mp->m_sb.sb_uquotino = (*ip)->i_ino;
-	else
-		mp->m_sb.sb_gquotino = (*ip)->i_ino;
-	spin_unlock(&mp->m_sb_lock);
-	xfs_mod_sb(tp, sbfields);
-
-	if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
-		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
-		return error;
-	}
-	return 0;
-}
-
-
-STATIC void
-xfs_qm_reset_dqcounts(
-	xfs_mount_t	*mp,
-	xfs_buf_t	*bp,
-	xfs_dqid_t	id,
-	uint		type)
-{
-	xfs_disk_dquot_t	*ddq;
-	int			j;
-
-	trace_xfs_reset_dqcounts(bp, _RET_IP_);
-
-	/*
-	 * Reset all counters and timers. They'll be
-	 * started afresh by xfs_qm_quotacheck.
-	 */
-#ifdef DEBUG
-	j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-	do_div(j, sizeof(xfs_dqblk_t));
-	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
-#endif
-	ddq = bp->b_addr;
-	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
-		/*
-		 * Do a sanity check, and if needed, repair the dqblk. Don't
-		 * output any warnings because it's perfectly possible to
-		 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
-		 */
-		(void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
-				      "xfs_quotacheck");
-		ddq->d_bcount = 0;
-		ddq->d_icount = 0;
-		ddq->d_rtbcount = 0;
-		ddq->d_btimer = 0;
-		ddq->d_itimer = 0;
-		ddq->d_rtbtimer = 0;
-		ddq->d_bwarns = 0;
-		ddq->d_iwarns = 0;
-		ddq->d_rtbwarns = 0;
-		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
-	}
-}
-
-STATIC int
-xfs_qm_dqiter_bufs(
-	xfs_mount_t	*mp,
-	xfs_dqid_t	firstid,
-	xfs_fsblock_t	bno,
-	xfs_filblks_t	blkcnt,
-	uint		flags)
-{
-	xfs_buf_t	*bp;
-	int		error;
-	int		type;
-
-	ASSERT(blkcnt > 0);
-	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
-		(flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
-	error = 0;
-
-	/*
-	 * Blkcnt arg can be a very big number, and might even be
-	 * larger than the log itself. So, we have to break it up into
-	 * manageable-sized transactions.
-	 * Note that we don't start a permanent transaction here; we might
-	 * not be able to get a log reservation for the whole thing up front,
-	 * and we don't really care to either, because we just discard
-	 * everything if we were to crash in the middle of this loop.
-	 */
-	while (blkcnt--) {
-		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-			      XFS_FSB_TO_DADDR(mp, bno),
-			      mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-		if (error)
-			break;
-
-		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
-		xfs_bdwrite(mp, bp);
-		/*
-		 * goto the next block.
-		 */
-		bno++;
-		firstid += mp->m_quotainfo->qi_dqperchunk;
-	}
-	return error;
-}
-
-/*
- * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
- * caller supplied function for every chunk of dquots that we find.
- */
-STATIC int
-xfs_qm_dqiterate(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*qip,
-	uint		flags)
-{
-	xfs_bmbt_irec_t		*map;
-	int			i, nmaps;	/* number of map entries */
-	int			error;		/* return value */
-	xfs_fileoff_t		lblkno;
-	xfs_filblks_t		maxlblkcnt;
-	xfs_dqid_t		firstid;
-	xfs_fsblock_t		rablkno;
-	xfs_filblks_t		rablkcnt;
-
-	error = 0;
-	/*
-	 * This looks racy, but we can't keep an inode lock across a
-	 * trans_reserve. But, this gets called during quotacheck, and that
-	 * happens only at mount time which is single threaded.
-	 */
-	if (qip->i_d.di_nblocks == 0)
-		return 0;
-
-	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
-
-	lblkno = 0;
-	maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-	do {
-		nmaps = XFS_DQITER_MAP_SIZE;
-		/*
-		 * We aren't changing the inode itself. Just changing
-		 * some of its data. No new blocks are added here, and
-		 * the inode is never added to the transaction.
-		 */
-		xfs_ilock(qip, XFS_ILOCK_SHARED);
-		error = xfs_bmapi(NULL, qip, lblkno,
-				  maxlblkcnt - lblkno,
-				  XFS_BMAPI_METADATA,
-				  NULL,
-				  0, map, &nmaps, NULL);
-		xfs_iunlock(qip, XFS_ILOCK_SHARED);
-		if (error)
-			break;
-
-		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
-		for (i = 0; i < nmaps; i++) {
-			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
-			ASSERT(map[i].br_blockcount);
-
-
-			lblkno += map[i].br_blockcount;
-
-			if (map[i].br_startblock == HOLESTARTBLOCK)
-				continue;
-
-			firstid = (xfs_dqid_t) map[i].br_startoff *
-				mp->m_quotainfo->qi_dqperchunk;
-			/*
-			 * Do a read-ahead on the next extent.
-			 */
-			if ((i+1 < nmaps) &&
-			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
-				rablkcnt =  map[i+1].br_blockcount;
-				rablkno = map[i+1].br_startblock;
-				while (rablkcnt--) {
-					xfs_buf_readahead(mp->m_ddev_targp,
-					       XFS_FSB_TO_DADDR(mp, rablkno),
-					       mp->m_quotainfo->qi_dqchunklen);
-					rablkno++;
-				}
-			}
-			/*
-			 * Iterate thru all the blks in the extent and
-			 * reset the counters of all the dquots inside them.
-			 */
-			if ((error = xfs_qm_dqiter_bufs(mp,
-						       firstid,
-						       map[i].br_startblock,
-						       map[i].br_blockcount,
-						       flags))) {
-				break;
-			}
-		}
-
-		if (error)
-			break;
-	} while (nmaps > 0);
-
-	kmem_free(map);
-
-	return error;
-}
-
-/*
- * Called by dqusage_adjust in doing a quotacheck.
- *
- * Given the inode, and a dquot id this updates both the incore dqout as well
- * as the buffer copy. This is so that once the quotacheck is done, we can
- * just log all the buffers, as opposed to logging numerous updates to
- * individual dquots.
- */
-STATIC int
-xfs_qm_quotacheck_dqadjust(
-	struct xfs_inode	*ip,
-	xfs_dqid_t		id,
-	uint			type,
-	xfs_qcnt_t		nblks,
-	xfs_qcnt_t		rtblks)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_dquot	*dqp;
-	int			error;
-
-	error = xfs_qm_dqget(mp, ip, id, type,
-			     XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
-	if (error) {
-		/*
-		 * Shouldn't be able to turn off quotas here.
-		 */
-		ASSERT(error != ESRCH);
-		ASSERT(error != ENOENT);
-		return error;
-	}
-
-	trace_xfs_dqadjust(dqp);
-
-	/*
-	 * Adjust the inode count and the block count to reflect this inode's
-	 * resource usage.
-	 */
-	be64_add_cpu(&dqp->q_core.d_icount, 1);
-	dqp->q_res_icount++;
-	if (nblks) {
-		be64_add_cpu(&dqp->q_core.d_bcount, nblks);
-		dqp->q_res_bcount += nblks;
-	}
-	if (rtblks) {
-		be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
-		dqp->q_res_rtbcount += rtblks;
-	}
-
-	/*
-	 * Set default limits, adjust timers (since we changed usages)
-	 *
-	 * There are no timers for the default values set in the root dquot.
-	 */
-	if (dqp->q_core.d_id) {
-		xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
-		xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
-	}
-
-	dqp->dq_flags |= XFS_DQ_DIRTY;
-	xfs_qm_dqput(dqp);
-	return 0;
-}
-
-STATIC int
-xfs_qm_get_rtblks(
-	xfs_inode_t	*ip,
-	xfs_qcnt_t	*O_rtblks)
-{
-	xfs_filblks_t	rtblks;			/* total rt blks */
-	xfs_extnum_t	idx;			/* extent record index */
-	xfs_ifork_t	*ifp;			/* inode fork pointer */
-	xfs_extnum_t	nextents;		/* number of extent entries */
-	int		error;
-
-	ASSERT(XFS_IS_REALTIME_INODE(ip));
-	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-		if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
-			return error;
-	}
-	rtblks = 0;
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	for (idx = 0; idx < nextents; idx++)
-		rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
-	*O_rtblks = (xfs_qcnt_t)rtblks;
-	return 0;
-}
-
-/*
- * callback routine supplied to bulkstat(). Given an inumber, find its
- * dquots and update them to account for resources taken by that inode.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqusage_adjust(
-	xfs_mount_t	*mp,		/* mount point for filesystem */
-	xfs_ino_t	ino,		/* inode number to get data for */
-	void		__user *buffer,	/* not used */
-	int		ubsize,		/* not used */
-	int		*ubused,	/* not used */
-	int		*res)		/* result code value */
-{
-	xfs_inode_t	*ip;
-	xfs_qcnt_t	nblks, rtblks = 0;
-	int		error;
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	/*
-	 * rootino must have its resources accounted for, not so with the quota
-	 * inodes.
-	 */
-	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
-		*res = BULKSTAT_RV_NOTHING;
-		return XFS_ERROR(EINVAL);
-	}
-
-	/*
-	 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
-	 * interface expects the inode to be exclusively locked because that's
-	 * the case in all other instances. It's OK that we do this because
-	 * quotacheck is done only at mount time.
-	 */
-	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
-	if (error) {
-		*res = BULKSTAT_RV_NOTHING;
-		return error;
-	}
-
-	ASSERT(ip->i_delayed_blks == 0);
-
-	if (XFS_IS_REALTIME_INODE(ip)) {
-		/*
-		 * Walk thru the extent list and count the realtime blocks.
-		 */
-		error = xfs_qm_get_rtblks(ip, &rtblks);
-		if (error)
-			goto error0;
-	}
-
-	nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
-
-	/*
-	 * Add the (disk blocks and inode) resources occupied by this
-	 * inode to its dquots. We do this adjustment in the incore dquot,
-	 * and also copy the changes to its buffer.
-	 * We don't care about putting these changes in a transaction
-	 * envelope because if we crash in the middle of a 'quotacheck'
-	 * we have to start from the beginning anyway.
-	 * Once we're done, we'll log all the dquot bufs.
-	 *
-	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
-	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
-	 */
-	if (XFS_IS_UQUOTA_ON(mp)) {
-		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
-						   XFS_DQ_USER, nblks, rtblks);
-		if (error)
-			goto error0;
-	}
-
-	if (XFS_IS_GQUOTA_ON(mp)) {
-		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
-						   XFS_DQ_GROUP, nblks, rtblks);
-		if (error)
-			goto error0;
-	}
-
-	if (XFS_IS_PQUOTA_ON(mp)) {
-		error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
-						   XFS_DQ_PROJ, nblks, rtblks);
-		if (error)
-			goto error0;
-	}
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	IRELE(ip);
-	*res = BULKSTAT_RV_DIDONE;
-	return 0;
-
-error0:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	IRELE(ip);
-	*res = BULKSTAT_RV_GIVEUP;
-	return error;
-}
-
-/*
- * Walk thru all the filesystem inodes and construct a consistent view
- * of the disk quota world. If the quotacheck fails, disable quotas.
- */
-int
-xfs_qm_quotacheck(
-	xfs_mount_t	*mp)
-{
-	int		done, count, error;
-	xfs_ino_t	lastino;
-	size_t		structsz;
-	xfs_inode_t	*uip, *gip;
-	uint		flags;
-
-	count = INT_MAX;
-	structsz = 1;
-	lastino = 0;
-	flags = 0;
-
-	ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	/*
-	 * There should be no cached dquots. The (simplistic) quotacheck
-	 * algorithm doesn't like that.
-	 */
-	ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
-
-	xfs_notice(mp, "Quotacheck needed: Please wait.");
-
-	/*
-	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
-	 * their counters to zero. We need a clean slate.
-	 * We don't log our changes till later.
-	 */
-	uip = mp->m_quotainfo->qi_uquotaip;
-	if (uip) {
-		error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
-		if (error)
-			goto error_return;
-		flags |= XFS_UQUOTA_CHKD;
-	}
-
-	gip = mp->m_quotainfo->qi_gquotaip;
-	if (gip) {
-		error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
-					XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
-		if (error)
-			goto error_return;
-		flags |= XFS_OQUOTA_CHKD;
-	}
-
-	do {
-		/*
-		 * Iterate thru all the inodes in the file system,
-		 * adjusting the corresponding dquot counters in core.
-		 */
-		error = xfs_bulkstat(mp, &lastino, &count,
-				     xfs_qm_dqusage_adjust,
-				     structsz, NULL, &done);
-		if (error)
-			break;
-
-	} while (!done);
-
-	/*
-	 * We've made all the changes that we need to make incore.
-	 * Flush them down to disk buffers if everything was updated
-	 * successfully.
-	 */
-	if (!error)
-		error = xfs_qm_dqflush_all(mp, 0);
-
-	/*
-	 * We can get this error if we couldn't do a dquot allocation inside
-	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
-	 * dirty dquots that might be cached, we just want to get rid of them
-	 * and turn quotaoff. The dquots won't be attached to any of the inodes
-	 * at this point (because we intentionally didn't in dqget_noattach).
-	 */
-	if (error) {
-		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
-		goto error_return;
-	}
-
-	/*
-	 * We didn't log anything, because if we crashed, we'll have to
-	 * start the quotacheck from scratch anyway. However, we must make
-	 * sure that our dquot changes are secure before we put the
-	 * quotacheck'd stamp on the superblock. So, here we do a synchronous
-	 * flush.
-	 */
-	XFS_bflush(mp->m_ddev_targp);
-
-	/*
-	 * If one type of quotas is off, then it will lose its
-	 * quotachecked status, since we won't be doing accounting for
-	 * that type anymore.
-	 */
-	mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
-	mp->m_qflags |= flags;
-
- error_return:
-	if (error) {
-		xfs_warn(mp,
-	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
-			error);
-		/*
-		 * We must turn off quotas.
-		 */
-		ASSERT(mp->m_quotainfo != NULL);
-		ASSERT(xfs_Gqm != NULL);
-		xfs_qm_destroy_quotainfo(mp);
-		if (xfs_mount_reset_sbqflags(mp)) {
-			xfs_warn(mp,
-				"Quotacheck: Failed to reset quota flags.");
-		}
-	} else
-		xfs_notice(mp, "Quotacheck: Done.");
-	return (error);
-}
-
-/*
- * This is called after the superblock has been read in and we're ready to
- * iget the quota inodes.
- */
-STATIC int
-xfs_qm_init_quotainos(
-	xfs_mount_t	*mp)
-{
-	xfs_inode_t	*uip, *gip;
-	int		error;
-	__int64_t	sbflags;
-	uint		flags;
-
-	ASSERT(mp->m_quotainfo);
-	uip = gip = NULL;
-	sbflags = 0;
-	flags = 0;
-
-	/*
-	 * Get the uquota and gquota inodes
-	 */
-	if (xfs_sb_version_hasquota(&mp->m_sb)) {
-		if (XFS_IS_UQUOTA_ON(mp) &&
-		    mp->m_sb.sb_uquotino != NULLFSINO) {
-			ASSERT(mp->m_sb.sb_uquotino > 0);
-			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-					     0, 0, &uip)))
-				return XFS_ERROR(error);
-		}
-		if (XFS_IS_OQUOTA_ON(mp) &&
-		    mp->m_sb.sb_gquotino != NULLFSINO) {
-			ASSERT(mp->m_sb.sb_gquotino > 0);
-			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-					     0, 0, &gip))) {
-				if (uip)
-					IRELE(uip);
-				return XFS_ERROR(error);
-			}
-		}
-	} else {
-		flags |= XFS_QMOPT_SBVERSION;
-		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-			    XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
-	}
-
-	/*
-	 * Create the two inodes, if they don't exist already. The changes
-	 * made above will get added to a transaction and logged in one of
-	 * the qino_alloc calls below.  If the device is readonly,
-	 * temporarily switch to read-write to do this.
-	 */
-	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
-		if ((error = xfs_qm_qino_alloc(mp, &uip,
-					      sbflags | XFS_SB_UQUOTINO,
-					      flags | XFS_QMOPT_UQUOTA)))
-			return XFS_ERROR(error);
-
-		flags &= ~XFS_QMOPT_SBVERSION;
-	}
-	if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
-		flags |= (XFS_IS_GQUOTA_ON(mp) ?
-				XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
-		error = xfs_qm_qino_alloc(mp, &gip,
-					  sbflags | XFS_SB_GQUOTINO, flags);
-		if (error) {
-			if (uip)
-				IRELE(uip);
-
-			return XFS_ERROR(error);
-		}
-	}
-
-	mp->m_quotainfo->qi_uquotaip = uip;
-	mp->m_quotainfo->qi_gquotaip = gip;
-
-	return 0;
-}
-
-
-
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
-	xfs_dquot_t	*dqpout;
-	xfs_dquot_t	*dqp;
-	int		restarts;
-	int		startagain;
-
-	restarts = 0;
-	dqpout = NULL;
-
-	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-again:
-	startagain = 0;
-	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
-	list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-		struct xfs_mount *mp = dqp->q_mount;
-		xfs_dqlock(dqp);
-
-		/*
-		 * We are racing with dqlookup here. Naturally we don't
-		 * want to reclaim a dquot that lookup wants. We release the
-		 * freelist lock and start over, so that lookup will grab
-		 * both the dquot and the freelistlock.
-		 */
-		if (dqp->dq_flags & XFS_DQ_WANT) {
-			ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-
-			trace_xfs_dqreclaim_want(dqp);
-			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-			restarts++;
-			startagain = 1;
-			goto dqunlock;
-		}
-
-		/*
-		 * If the dquot is inactive, we are assured that it is
-		 * not on the mplist or the hashlist, and that makes our
-		 * life easier.
-		 */
-		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-			ASSERT(mp == NULL);
-			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-			ASSERT(list_empty(&dqp->q_hashlist));
-			ASSERT(list_empty(&dqp->q_mplist));
-			list_del_init(&dqp->q_freelist);
-			xfs_Gqm->qm_dqfrlist_cnt--;
-			dqpout = dqp;
-			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-			goto dqunlock;
-		}
-
-		ASSERT(dqp->q_hash);
-		ASSERT(!list_empty(&dqp->q_mplist));
-
-		/*
-		 * Try to grab the flush lock. If this dquot is in the process
-		 * of getting flushed to disk, we don't want to reclaim it.
-		 */
-		if (!xfs_dqflock_nowait(dqp))
-			goto dqunlock;
-
-		/*
-		 * We have the flush lock so we know that this is not in the
-		 * process of being flushed. So, if this is dirty, flush it
-		 * DELWRI so that we don't get a freelist infested with
-		 * dirty dquots.
-		 */
-		if (XFS_DQ_IS_DIRTY(dqp)) {
-			int	error;
-
-			trace_xfs_dqreclaim_dirty(dqp);
-
-			/*
-			 * We flush it delayed write, so don't bother
-			 * releasing the freelist lock.
-			 */
-			error = xfs_qm_dqflush(dqp, 0);
-			if (error) {
-				xfs_warn(mp, "%s: dquot %p flush failed",
-					__func__, dqp);
-			}
-			goto dqunlock;
-		}
-
-		/*
-		 * We're trying to get the hashlock out of order. This races
-		 * with dqlookup; so, we giveup and goto the next dquot if
-		 * we couldn't get the hashlock. This way, we won't starve
-		 * a dqlookup process that holds the hashlock that is
-		 * waiting for the freelist lock.
-		 */
-		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-			restarts++;
-			goto dqfunlock;
-		}
-
-		/*
-		 * This races with dquot allocation code as well as dqflush_all
-		 * and reclaim code. So, if we failed to grab the mplist lock,
-		 * giveup everything and start over.
-		 */
-		if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
-			restarts++;
-			startagain = 1;
-			goto qhunlock;
-		}
-
-		ASSERT(dqp->q_nrefs == 0);
-		list_del_init(&dqp->q_mplist);
-		mp->m_quotainfo->qi_dquots--;
-		mp->m_quotainfo->qi_dqreclaims++;
-		list_del_init(&dqp->q_hashlist);
-		dqp->q_hash->qh_version++;
-		list_del_init(&dqp->q_freelist);
-		xfs_Gqm->qm_dqfrlist_cnt--;
-		dqpout = dqp;
-		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-qhunlock:
-		mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
-		xfs_dqfunlock(dqp);
-dqunlock:
-		xfs_dqunlock(dqp);
-		if (dqpout)
-			break;
-		if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-			break;
-		if (startagain) {
-			mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-			goto again;
-		}
-	}
-	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-	return dqpout;
-}
-
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- */
-STATIC int
-xfs_qm_shake_freelist(
-	int	howmany)
-{
-	int		nreclaimed = 0;
-	xfs_dquot_t	*dqp;
-
-	if (howmany <= 0)
-		return 0;
-
-	while (nreclaimed < howmany) {
-		dqp = xfs_qm_dqreclaim_one();
-		if (!dqp)
-			return nreclaimed;
-		xfs_qm_dqdestroy(dqp);
-		nreclaimed++;
-	}
-	return nreclaimed;
-}
-
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_shake(
-	struct shrinker	*shrink,
-	struct shrink_control *sc)
-{
-	int	ndqused, nfree, n;
-	gfp_t gfp_mask = sc->gfp_mask;
-
-	if (!kmem_shake_allow(gfp_mask))
-		return 0;
-	if (!xfs_Gqm)
-		return 0;
-
-	nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
-	/* incore dquots in all f/s's */
-	ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
-	ASSERT(ndqused >= 0);
-
-	if (nfree <= ndqused && nfree < ndquot)
-		return 0;
-
-	ndqused *= xfs_Gqm->qm_dqfree_ratio;	/* target # of free dquots */
-	n = nfree - ndqused - ndquot;		/* # over target */
-
-	return xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
-	xfs_dquot_t **O_dqpp)
-{
-	xfs_dquot_t	*dqp;
-
-	/*
-	 * Check against high water mark to see if we want to pop
-	 * a nincompoop dquot off the freelist.
-	 */
-	if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
-		/*
-		 * Try to recycle a dquot from the freelist.
-		 */
-		if ((dqp = xfs_qm_dqreclaim_one())) {
-			XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
-			/*
-			 * Just zero the core here. The rest will get
-			 * reinitialized by caller. XXX we shouldn't even
-			 * do this zero ...
-			 */
-			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-			*O_dqpp = dqp;
-			return B_FALSE;
-		}
-		XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
-	}
-
-	/*
-	 * Allocate a brand new dquot on the kernel heap and return it
-	 * to the caller to initialize.
-	 */
-	ASSERT(xfs_Gqm->qm_dqzone != NULL);
-	*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
-	atomic_inc(&xfs_Gqm->qm_totaldquots);
-
-	return B_TRUE;
-}
-
-
-/*
- * Start a transaction and write the incore superblock changes to
- * disk. flags parameter indicates which fields have changed.
- */
-int
-xfs_qm_write_sb_changes(
-	xfs_mount_t	*mp,
-	__int64_t	flags)
-{
-	xfs_trans_t	*tp;
-	int		error;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	if ((error = xfs_trans_reserve(tp, 0,
-				      mp->m_sb.sb_sectsize + 128, 0,
-				      0,
-				      XFS_DEFAULT_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		return error;
-	}
-
-	xfs_mod_sb(tp, flags);
-	error = xfs_trans_commit(tp, 0);
-
-	return error;
-}
-
-
-/* --------------- utility functions for vnodeops ---------------- */
-
-
-/*
- * Given an inode, a uid, gid and prid make sure that we have
- * allocated relevant dquot(s) on disk, and that we won't exceed inode
- * quotas by creating this file.
- * This also attaches dquot(s) to the given inode after locking it,
- * and returns the dquots corresponding to the uid and/or gid.
- *
- * in	: inode (unlocked)
- * out	: udquot, gdquot with references taken and unlocked
- */
-int
-xfs_qm_vop_dqalloc(
-	struct xfs_inode	*ip,
-	uid_t			uid,
-	gid_t			gid,
-	prid_t			prid,
-	uint			flags,
-	struct xfs_dquot	**O_udqpp,
-	struct xfs_dquot	**O_gdqpp)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_dquot	*uq, *gq;
-	int			error;
-	uint			lockflags;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-		return 0;
-
-	lockflags = XFS_ILOCK_EXCL;
-	xfs_ilock(ip, lockflags);
-
-	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
-		gid = ip->i_d.di_gid;
-
-	/*
-	 * Attach the dquot(s) to this inode, doing a dquot allocation
-	 * if necessary. The dquot(s) will not be locked.
-	 */
-	if (XFS_NOT_DQATTACHED(mp, ip)) {
-		error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
-		if (error) {
-			xfs_iunlock(ip, lockflags);
-			return error;
-		}
-	}
-
-	uq = gq = NULL;
-	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
-		if (ip->i_d.di_uid != uid) {
-			/*
-			 * What we need is the dquot that has this uid, and
-			 * if we send the inode to dqget, the uid of the inode
-			 * takes priority over what's sent in the uid argument.
-			 * We must unlock inode here before calling dqget if
-			 * we're not sending the inode, because otherwise
-			 * we'll deadlock by doing trans_reserve while
-			 * holding ilock.
-			 */
-			xfs_iunlock(ip, lockflags);
-			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
-						 XFS_DQ_USER,
-						 XFS_QMOPT_DQALLOC |
-						 XFS_QMOPT_DOWARN,
-						 &uq))) {
-				ASSERT(error != ENOENT);
-				return error;
-			}
-			/*
-			 * Get the ilock in the right order.
-			 */
-			xfs_dqunlock(uq);
-			lockflags = XFS_ILOCK_SHARED;
-			xfs_ilock(ip, lockflags);
-		} else {
-			/*
-			 * Take an extra reference, because we'll return
-			 * this to caller
-			 */
-			ASSERT(ip->i_udquot);
-			uq = ip->i_udquot;
-			xfs_dqlock(uq);
-			XFS_DQHOLD(uq);
-			xfs_dqunlock(uq);
-		}
-	}
-	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
-		if (ip->i_d.di_gid != gid) {
-			xfs_iunlock(ip, lockflags);
-			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
-						 XFS_DQ_GROUP,
-						 XFS_QMOPT_DQALLOC |
-						 XFS_QMOPT_DOWARN,
-						 &gq))) {
-				if (uq)
-					xfs_qm_dqrele(uq);
-				ASSERT(error != ENOENT);
-				return error;
-			}
-			xfs_dqunlock(gq);
-			lockflags = XFS_ILOCK_SHARED;
-			xfs_ilock(ip, lockflags);
-		} else {
-			ASSERT(ip->i_gdquot);
-			gq = ip->i_gdquot;
-			xfs_dqlock(gq);
-			XFS_DQHOLD(gq);
-			xfs_dqunlock(gq);
-		}
-	} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
-		if (xfs_get_projid(ip) != prid) {
-			xfs_iunlock(ip, lockflags);
-			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
-						 XFS_DQ_PROJ,
-						 XFS_QMOPT_DQALLOC |
-						 XFS_QMOPT_DOWARN,
-						 &gq))) {
-				if (uq)
-					xfs_qm_dqrele(uq);
-				ASSERT(error != ENOENT);
-				return (error);
-			}
-			xfs_dqunlock(gq);
-			lockflags = XFS_ILOCK_SHARED;
-			xfs_ilock(ip, lockflags);
-		} else {
-			ASSERT(ip->i_gdquot);
-			gq = ip->i_gdquot;
-			xfs_dqlock(gq);
-			XFS_DQHOLD(gq);
-			xfs_dqunlock(gq);
-		}
-	}
-	if (uq)
-		trace_xfs_dquot_dqalloc(ip);
-
-	xfs_iunlock(ip, lockflags);
-	if (O_udqpp)
-		*O_udqpp = uq;
-	else if (uq)
-		xfs_qm_dqrele(uq);
-	if (O_gdqpp)
-		*O_gdqpp = gq;
-	else if (gq)
-		xfs_qm_dqrele(gq);
-	return 0;
-}
-
-/*
- * Actually transfer ownership, and do dquot modifications.
- * These were already reserved.
- */
-xfs_dquot_t *
-xfs_qm_vop_chown(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_dquot_t	**IO_olddq,
-	xfs_dquot_t	*newdq)
-{
-	xfs_dquot_t	*prevdq;
-	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
-				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
-
-	/* old dquot */
-	prevdq = *IO_olddq;
-	ASSERT(prevdq);
-	ASSERT(prevdq != newdq);
-
-	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
-	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
-
-	/* the sparkling new dquot */
-	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
-	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
-
-	/*
-	 * Take an extra reference, because the inode
-	 * is going to keep this dquot pointer even
-	 * after the trans_commit.
-	 */
-	xfs_dqlock(newdq);
-	XFS_DQHOLD(newdq);
-	xfs_dqunlock(newdq);
-	*IO_olddq = newdq;
-
-	return prevdq;
-}
-
-/*
- * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
- */
-int
-xfs_qm_vop_chown_reserve(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	xfs_dquot_t	*udqp,
-	xfs_dquot_t	*gdqp,
-	uint		flags)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	uint		delblks, blkflags, prjflags = 0;
-	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
-	int		error;
-
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	delblks = ip->i_delayed_blks;
-	delblksudq = delblksgdq = unresudq = unresgdq = NULL;
-	blkflags = XFS_IS_REALTIME_INODE(ip) ?
-			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
-
-	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
-	    ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
-		delblksudq = udqp;
-		/*
-		 * If there are delayed allocation blocks, then we have to
-		 * unreserve those from the old dquot, and add them to the
-		 * new dquot.
-		 */
-		if (delblks) {
-			ASSERT(ip->i_udquot);
-			unresudq = ip->i_udquot;
-		}
-	}
-	if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
-		if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
-		     xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
-			prjflags = XFS_QMOPT_ENOSPC;
-
-		if (prjflags ||
-		    (XFS_IS_GQUOTA_ON(ip->i_mount) &&
-		     ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
-			delblksgdq = gdqp;
-			if (delblks) {
-				ASSERT(ip->i_gdquot);
-				unresgdq = ip->i_gdquot;
-			}
-		}
-	}
-
-	if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
-				delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
-				flags | blkflags | prjflags)))
-		return (error);
-
-	/*
-	 * Do the delayed blks reservations/unreservations now. Since, these
-	 * are done without the help of a transaction, if a reservation fails
-	 * its previous reservations won't be automatically undone by trans
-	 * code. So, we have to do it manually here.
-	 */
-	if (delblks) {
-		/*
-		 * Do the reservations first. Unreservation can't fail.
-		 */
-		ASSERT(delblksudq || delblksgdq);
-		ASSERT(unresudq || unresgdq);
-		if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
-				delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
-				flags | blkflags | prjflags)))
-			return (error);
-		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
-				unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
-				blkflags);
-	}
-
-	return (0);
-}
-
-int
-xfs_qm_vop_rename_dqattach(
-	struct xfs_inode	**i_tab)
-{
-	struct xfs_mount	*mp = i_tab[0]->i_mount;
-	int			i;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-		return 0;
-
-	for (i = 0; (i < 4 && i_tab[i]); i++) {
-		struct xfs_inode	*ip = i_tab[i];
-		int			error;
-
-		/*
-		 * Watch out for duplicate entries in the table.
-		 */
-		if (i == 0 || ip != i_tab[i-1]) {
-			if (XFS_NOT_DQATTACHED(mp, ip)) {
-				error = xfs_qm_dqattach(ip, 0);
-				if (error)
-					return error;
-			}
-		}
-	}
-	return 0;
-}
-
-void
-xfs_qm_vop_create_dqattach(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*ip,
-	struct xfs_dquot	*udqp,
-	struct xfs_dquot	*gdqp)
-{
-	struct xfs_mount	*mp = tp->t_mountp;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-		return;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-	if (udqp) {
-		xfs_dqlock(udqp);
-		XFS_DQHOLD(udqp);
-		xfs_dqunlock(udqp);
-		ASSERT(ip->i_udquot == NULL);
-		ip->i_udquot = udqp;
-		ASSERT(XFS_IS_UQUOTA_ON(mp));
-		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
-		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
-	}
-	if (gdqp) {
-		xfs_dqlock(gdqp);
-		XFS_DQHOLD(gdqp);
-		xfs_dqunlock(gdqp);
-		ASSERT(ip->i_gdquot == NULL);
-		ip->i_gdquot = gdqp;
-		ASSERT(XFS_IS_OQUOTA_ON(mp));
-		ASSERT((XFS_IS_GQUOTA_ON(mp) ?
-			ip->i_d.di_gid : xfs_get_projid(ip)) ==
-				be32_to_cpu(gdqp->q_core.d_id));
-		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
-	}
-}
-
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
deleted file mode 100644
index 43b9abe..0000000
--- a/fs/xfs/quota/xfs_qm.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_H__
-#define __XFS_QM_H__
-
-#include "xfs_dquot_item.h"
-#include "xfs_dquot.h"
-#include "xfs_quota_priv.h"
-#include "xfs_qm_stats.h"
-
-struct xfs_qm;
-struct xfs_inode;
-
-extern uint		ndquot;
-extern struct mutex	xfs_Gqm_lock;
-extern struct xfs_qm	*xfs_Gqm;
-extern kmem_zone_t	*qm_dqzone;
-extern kmem_zone_t	*qm_dqtrxzone;
-
-/*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS	7
-
-/*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS	4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO		2
-
-/*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_HASHSIZE_LOW		(PAGE_SIZE / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH		((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
-
-/*
- * This defines the unit of allocation of dquots.
- * Currently, it is just one file system block, and a 4K blk contains 30
- * (136 * 30 = 4080) dquots. It's probably not worth trying to make
- * this more dynamic.
- * XXXsup However, if this number is changed, we have to make sure that we don't
- * implicitly assume that we do allocations in chunks of a single filesystem
- * block in the dquot/xqm code.
- */
-#define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
-
-typedef xfs_dqhash_t	xfs_dqlist_t;
-
-/*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
-	xfs_dqlist_t	*qm_usr_dqhtable;/* udquot hash table */
-	xfs_dqlist_t	*qm_grp_dqhtable;/* gdquot hash table */
-	uint		 qm_dqhashmask;	 /* # buckets in dq hashtab - 1 */
-	struct list_head qm_dqfrlist;	 /* freelist of dquots */
-	struct mutex	 qm_dqfrlist_lock;
-	int		 qm_dqfrlist_cnt;
-	atomic_t	 qm_totaldquots; /* total incore dquots */
-	uint		 qm_nrefs;	 /* file systems with quota on */
-	int		 qm_dqfree_ratio;/* ratio of free to inuse dquots */
-	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
-	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */
-} xfs_qm_t;
-
-/*
- * Various quota information for individual filesystems.
- * The mount structure keeps a pointer to this.
- */
-typedef struct xfs_quotainfo {
-	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */
-	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */
-	struct list_head qi_dqlist;	 /* all dquots in filesys */
-	struct mutex	 qi_dqlist_lock;
-	int		 qi_dquots;
-	int		 qi_dqreclaims;	 /* a change here indicates
-					    a removal in the dqlist */
-	time_t		 qi_btimelimit;	 /* limit for blks timer */
-	time_t		 qi_itimelimit;	 /* limit for inodes timer */
-	time_t		 qi_rtbtimelimit;/* limit for rt blks timer */
-	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for blks warnings */
-	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for inodes warnings */
-	xfs_qwarncnt_t	 qi_rtbwarnlimit;/* limit for rt blks warnings */
-	struct mutex	 qi_quotaofflock;/* to serialize quotaoff */
-	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
-	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
-	xfs_qcnt_t	 qi_bhardlimit;	 /* default data blk hard limit */
-	xfs_qcnt_t	 qi_bsoftlimit;	 /* default data blk soft limit */
-	xfs_qcnt_t	 qi_ihardlimit;	 /* default inode count hard limit */
-	xfs_qcnt_t	 qi_isoftlimit;	 /* default inode count soft limit */
-	xfs_qcnt_t	 qi_rtbhardlimit;/* default realtime blk hard limit */
-	xfs_qcnt_t	 qi_rtbsoftlimit;/* default realtime blk soft limit */
-} xfs_quotainfo_t;
-
-
-extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
-extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
-			xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
-extern void	xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
-extern void	xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
-
-/*
- * We keep the usr and grp dquots separately so that locking will be easier
- * to do at commit time. All transactions that we know of at this point
- * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
- */
-#define XFS_QM_TRANS_MAXDQS		2
-typedef struct xfs_dquot_acct {
-	xfs_dqtrx_t	dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
-	xfs_dqtrx_t	dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
-} xfs_dquot_acct_t;
-
-/*
- * Users are allowed to have a usage exceeding their softlimit for
- * a period this long.
- */
-#define XFS_QM_BTIMELIMIT	(7 * 24*60*60)          /* 1 week */
-#define XFS_QM_RTBTIMELIMIT	(7 * 24*60*60)          /* 1 week */
-#define XFS_QM_ITIMELIMIT	(7 * 24*60*60)          /* 1 week */
-
-#define XFS_QM_BWARNLIMIT	5
-#define XFS_QM_IWARNLIMIT	5
-#define XFS_QM_RTBWARNLIMIT	5
-
-extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern int		xfs_qm_quotacheck(xfs_mount_t *);
-extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-
-/* dquot stuff */
-extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
-extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
-
-/* quota ops */
-extern int		xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
-extern int		xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
-					fs_disk_quota_t *);
-extern int		xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
-					fs_disk_quota_t *);
-extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
-extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint);
-extern int		xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-
-#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
deleted file mode 100644
index a0a829a..0000000
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-
-STATIC void
-xfs_fill_statvfs_from_dquot(
-	struct kstatfs		*statp,
-	xfs_disk_dquot_t	*dp)
-{
-	__uint64_t		limit;
-
-	limit = dp->d_blk_softlimit ?
-		be64_to_cpu(dp->d_blk_softlimit) :
-		be64_to_cpu(dp->d_blk_hardlimit);
-	if (limit && statp->f_blocks > limit) {
-		statp->f_blocks = limit;
-		statp->f_bfree = statp->f_bavail =
-			(statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
-			 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
-	}
-
-	limit = dp->d_ino_softlimit ?
-		be64_to_cpu(dp->d_ino_softlimit) :
-		be64_to_cpu(dp->d_ino_hardlimit);
-	if (limit && statp->f_files > limit) {
-		statp->f_files = limit;
-		statp->f_ffree =
-			(statp->f_files > be64_to_cpu(dp->d_icount)) ?
-			 (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
-	}
-}
-
-
-/*
- * Directory tree accounting is implemented using project quotas, where
- * the project identifier is inherited from parent directories.
- * A statvfs (df, etc.) of a directory that is using project quota should
- * return a statvfs of the project, not the entire filesystem.
- * This makes such trees appear as if they are filesystems in themselves.
- */
-void
-xfs_qm_statvfs(
-	xfs_inode_t		*ip,
-	struct kstatfs		*statp)
-{
-	xfs_mount_t		*mp = ip->i_mount;
-	xfs_dquot_t		*dqp;
-
-	if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
-		xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
-		xfs_qm_dqput(dqp);
-	}
-}
-
-int
-xfs_qm_newmount(
-	xfs_mount_t	*mp,
-	uint		*needquotamount,
-	uint		*quotaflags)
-{
-	uint		quotaondisk;
-	uint		uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
-
-	quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
-				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
-
-	if (quotaondisk) {
-		uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
-		pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
-		gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
-	}
-
-	/*
-	 * If the device itself is read-only, we can't allow
-	 * the user to change the state of quota on the mount -
-	 * this would generate a transaction on the ro device,
-	 * which would lead to an I/O error and shutdown
-	 */
-
-	if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
-	    (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
-	     (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
-	    (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
-	     (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
-	    (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
-	    xfs_dev_is_read_only(mp, "changing quota state")) {
-		xfs_warn(mp, "please mount with%s%s%s%s.",
-			(!quotaondisk ? "out quota" : ""),
-			(uquotaondisk ? " usrquota" : ""),
-			(pquotaondisk ? " prjquota" : ""),
-			(gquotaondisk ? " grpquota" : ""));
-		return XFS_ERROR(EPERM);
-	}
-
-	if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
-		/*
-		 * Call mount_quotas at this point only if we won't have to do
-		 * a quotacheck.
-		 */
-		if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
-			/*
-			 * If an error occurred, qm_mount_quotas code
-			 * has already disabled quotas. So, just finish
-			 * mounting, and get on with the boring life
-			 * without disk quotas.
-			 */
-			xfs_qm_mount_quotas(mp);
-		} else {
-			/*
-			 * Clear the quota flags, but remember them. This
-			 * is so that the quota code doesn't get invoked
-			 * before we're ready. This can happen when an
-			 * inode goes inactive and wants to free blocks,
-			 * or via xfs_log_mount_finish.
-			 */
-			*needquotamount = B_TRUE;
-			*quotaflags = mp->m_qflags;
-			mp->m_qflags = 0;
-		}
-	}
-
-	return 0;
-}
-
-void __init
-xfs_qm_init(void)
-{
-	printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
-	mutex_init(&xfs_Gqm_lock);
-	xfs_qm_init_procfs();
-}
-
-void __exit
-xfs_qm_exit(void)
-{
-	xfs_qm_cleanup_procfs();
-	if (qm_dqzone)
-		kmem_zone_destroy(qm_dqzone);
-	if (qm_dqtrxzone)
-		kmem_zone_destroy(qm_dqtrxzone);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
deleted file mode 100644
index 8671a0b..0000000
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-struct xqmstats xqmstats;
-
-static int xqm_proc_show(struct seq_file *m, void *v)
-{
-	/* maximum; incore; ratio free to inuse; freelist */
-	seq_printf(m, "%d\t%d\t%d\t%u\n",
-			ndquot,
-			xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
-			xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
-			xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
-	return 0;
-}
-
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= xqm_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int xqmstat_proc_show(struct seq_file *m, void *v)
-{
-	/* quota performance statistics */
-	seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
-			xqmstats.xs_qm_dqreclaims,
-			xqmstats.xs_qm_dqreclaim_misses,
-			xqmstats.xs_qm_dquot_dups,
-			xqmstats.xs_qm_dqcachemisses,
-			xqmstats.xs_qm_dqcachehits,
-			xqmstats.xs_qm_dqwants,
-			xqmstats.xs_qm_dqshake_reclaims,
-			xqmstats.xs_qm_dqinact_reclaims);
-	return 0;
-}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= xqmstat_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-void
-xfs_qm_init_procfs(void)
-{
-	proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
-	proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
-}
-
-void
-xfs_qm_cleanup_procfs(void)
-{
-	remove_proc_entry("fs/xfs/xqm", NULL);
-	remove_proc_entry("fs/xfs/xqmstat", NULL);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
deleted file mode 100644
index 5b964fc..0000000
--- a/fs/xfs/quota/xfs_qm_stats.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_STATS_H__
-#define __XFS_QM_STATS_H__
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-/*
- * XQM global statistics
- */
-struct xqmstats {
-	__uint32_t		xs_qm_dqreclaims;
-	__uint32_t		xs_qm_dqreclaim_misses;
-	__uint32_t		xs_qm_dquot_dups;
-	__uint32_t		xs_qm_dqcachemisses;
-	__uint32_t		xs_qm_dqcachehits;
-	__uint32_t		xs_qm_dqwants;
-	__uint32_t		xs_qm_dqshake_reclaims;
-	__uint32_t		xs_qm_dqinact_reclaims;
-};
-
-extern struct xqmstats xqmstats;
-
-# define XQM_STATS_INC(count)	( (count)++ )
-
-extern void xfs_qm_init_procfs(void);
-extern void xfs_qm_cleanup_procfs(void);
-
-#else
-
-# define XQM_STATS_INC(count)	do { } while (0)
-
-static inline void xfs_qm_init_procfs(void) { };
-static inline void xfs_qm_cleanup_procfs(void) { };
-
-#endif
-
-#endif	/* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
deleted file mode 100644
index 609246f..0000000
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include <linux/capability.h>
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-STATIC int	xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
-STATIC int	xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
-					uint);
-STATIC uint	xfs_qm_export_flags(uint);
-STATIC uint	xfs_qm_export_qtype_flags(uint);
-STATIC void	xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
-					fs_disk_quota_t *);
-
-
-/*
- * Turn off quota accounting and/or enforcement for all udquots and/or
- * gdquots. Called only at unmount time.
- *
- * This assumes that there are no dquots of this file system cached
- * incore, and modifies the ondisk dquot directly. Therefore, for example,
- * it is an error to call this twice, without purging the cache.
- */
-int
-xfs_qm_scall_quotaoff(
-	xfs_mount_t		*mp,
-	uint			flags)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	uint			dqtype;
-	int			error;
-	uint			inactivate_flags;
-	xfs_qoff_logitem_t	*qoffstart;
-	int			nculprits;
-
-	/*
-	 * No file system can have quotas enabled on disk but not in core.
-	 * Note that quota utilities (like quotaoff) _expect_
-	 * errno == EEXIST here.
-	 */
-	if ((mp->m_qflags & flags) == 0)
-		return XFS_ERROR(EEXIST);
-	error = 0;
-
-	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-
-	/*
-	 * We don't want to deal with two quotaoffs messing up each other,
-	 * so we're going to serialize it. quotaoff isn't exactly a performance
-	 * critical thing.
-	 * If quotaoff, then we must be dealing with the root filesystem.
-	 */
-	ASSERT(q);
-	mutex_lock(&q->qi_quotaofflock);
-
-	/*
-	 * If we're just turning off quota enforcement, change mp and go.
-	 */
-	if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
-		mp->m_qflags &= ~(flags);
-
-		spin_lock(&mp->m_sb_lock);
-		mp->m_sb.sb_qflags = mp->m_qflags;
-		spin_unlock(&mp->m_sb_lock);
-		mutex_unlock(&q->qi_quotaofflock);
-
-		/* XXX what to do if error ? Revert back to old vals incore ? */
-		error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
-		return (error);
-	}
-
-	dqtype = 0;
-	inactivate_flags = 0;
-	/*
-	 * If accounting is off, we must turn enforcement off, clear the
-	 * quota 'CHKD' certificate to make it known that we have to
-	 * do a quotacheck the next time this quota is turned on.
-	 */
-	if (flags & XFS_UQUOTA_ACCT) {
-		dqtype |= XFS_QMOPT_UQUOTA;
-		flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
-		inactivate_flags |= XFS_UQUOTA_ACTIVE;
-	}
-	if (flags & XFS_GQUOTA_ACCT) {
-		dqtype |= XFS_QMOPT_GQUOTA;
-		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
-		inactivate_flags |= XFS_GQUOTA_ACTIVE;
-	} else if (flags & XFS_PQUOTA_ACCT) {
-		dqtype |= XFS_QMOPT_PQUOTA;
-		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
-		inactivate_flags |= XFS_PQUOTA_ACTIVE;
-	}
-
-	/*
-	 * Nothing to do?  Don't complain. This happens when we're just
-	 * turning off quota enforcement.
-	 */
-	if ((mp->m_qflags & flags) == 0)
-		goto out_unlock;
-
-	/*
-	 * Write the LI_QUOTAOFF log record, and do SB changes atomically,
-	 * and synchronously. If we fail to write, we should abort the
-	 * operation as it cannot be recovered safely if we crash.
-	 */
-	error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
-	if (error)
-		goto out_unlock;
-
-	/*
-	 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
-	 * to take care of the race between dqget and quotaoff. We don't take
-	 * any special locks to reset these bits. All processes need to check
-	 * these bits *after* taking inode lock(s) to see if the particular
-	 * quota type is in the process of being turned off. If *ACTIVE, it is
-	 * guaranteed that all dquot structures and all quotainode ptrs will all
-	 * stay valid as long as that inode is kept locked.
-	 *
-	 * There is no turning back after this.
-	 */
-	mp->m_qflags &= ~inactivate_flags;
-
-	/*
-	 * Give back all the dquot reference(s) held by inodes.
-	 * Here we go thru every single incore inode in this file system, and
-	 * do a dqrele on the i_udquot/i_gdquot that it may have.
-	 * Essentially, as long as somebody has an inode locked, this guarantees
-	 * that quotas will not be turned off. This is handy because in a
-	 * transaction once we lock the inode(s) and check for quotaon, we can
-	 * depend on the quota inodes (and other things) being valid as long as
-	 * we keep the lock(s).
-	 */
-	xfs_qm_dqrele_all_inodes(mp, flags);
-
-	/*
-	 * Next we make the changes in the quota flag in the mount struct.
-	 * This isn't protected by a particular lock directly, because we
-	 * don't want to take a mrlock every time we depend on quotas being on.
-	 */
-	mp->m_qflags &= ~(flags);
-
-	/*
-	 * Go through all the dquots of this file system and purge them,
-	 * according to what was turned off. We may not be able to get rid
-	 * of all dquots, because dquots can have temporary references that
-	 * are not attached to inodes. eg. xfs_setattr, xfs_create.
-	 * So, if we couldn't purge all the dquots from the filesystem,
-	 * we can't get rid of the incore data structures.
-	 */
-	while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
-		delay(10 * nculprits);
-
-	/*
-	 * Transactions that had started before ACTIVE state bit was cleared
-	 * could have logged many dquots, so they'd have higher LSNs than
-	 * the first QUOTAOFF log record does. If we happen to crash when
-	 * the tail of the log has gone past the QUOTAOFF record, but
-	 * before the last dquot modification, those dquots __will__
-	 * recover, and that's not good.
-	 *
-	 * So, we have QUOTAOFF start and end logitems; the start
-	 * logitem won't get overwritten until the end logitem appears...
-	 */
-	error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
-	if (error) {
-		/* We're screwed now. Shutdown is the only option. */
-		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-		goto out_unlock;
-	}
-
-	/*
-	 * If quotas is completely disabled, close shop.
-	 */
-	if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
-	    ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
-		mutex_unlock(&q->qi_quotaofflock);
-		xfs_qm_destroy_quotainfo(mp);
-		return (0);
-	}
-
-	/*
-	 * Release our quotainode references if we don't need them anymore.
-	 */
-	if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
-		IRELE(q->qi_uquotaip);
-		q->qi_uquotaip = NULL;
-	}
-	if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
-		IRELE(q->qi_gquotaip);
-		q->qi_gquotaip = NULL;
-	}
-
-out_unlock:
-	mutex_unlock(&q->qi_quotaofflock);
-	return error;
-}
-
-STATIC int
-xfs_qm_scall_trunc_qfile(
-	struct xfs_mount	*mp,
-	xfs_ino_t		ino)
-{
-	struct xfs_inode	*ip;
-	struct xfs_trans	*tp;
-	int			error;
-
-	if (ino == NULLFSINO)
-		return 0;
-
-	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
-	if (error)
-		return error;
-
-	xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-				  XFS_TRANS_PERM_LOG_RES,
-				  XFS_ITRUNCATE_LOG_COUNT);
-	if (error) {
-		xfs_trans_cancel(tp, 0);
-		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-		goto out_put;
-	}
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip);
-
-	error = xfs_itruncate_data(&tp, ip, 0);
-	if (error) {
-		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-				     XFS_TRANS_ABORT);
-		goto out_unlock;
-	}
-
-	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
-out_unlock:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-out_put:
-	IRELE(ip);
-	return error;
-}
-
-int
-xfs_qm_scall_trunc_qfiles(
-	xfs_mount_t	*mp,
-	uint		flags)
-{
-	int		error = 0, error2 = 0;
-
-	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
-		xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
-			__func__, flags, mp->m_qflags);
-		return XFS_ERROR(EINVAL);
-	}
-
-	if (flags & XFS_DQ_USER)
-		error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
-	if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
-		error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
-
-	return error ? error : error2;
-}
-
-/*
- * Switch on (a given) quota enforcement for a filesystem.  This takes
- * effect immediately.
- * (Switching on quota accounting must be done at mount time.)
- */
-int
-xfs_qm_scall_quotaon(
-	xfs_mount_t	*mp,
-	uint		flags)
-{
-	int		error;
-	uint		qf;
-	__int64_t	sbflags;
-
-	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-	/*
-	 * Switching on quota accounting must be done at mount time.
-	 */
-	flags &= ~(XFS_ALL_QUOTA_ACCT);
-
-	sbflags = 0;
-
-	if (flags == 0) {
-		xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
-			__func__, mp->m_qflags);
-		return XFS_ERROR(EINVAL);
-	}
-
-	/* No fs can turn on quotas with a delayed effect */
-	ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
-
-	/*
-	 * Can't enforce without accounting. We check the superblock
-	 * qflags here instead of m_qflags because rootfs can have
-	 * quota acct on ondisk without m_qflags' knowing.
-	 */
-	if (((flags & XFS_UQUOTA_ACCT) == 0 &&
-	    (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
-	    (flags & XFS_UQUOTA_ENFD))
-	    ||
-	    ((flags & XFS_PQUOTA_ACCT) == 0 &&
-	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
-	    (flags & XFS_GQUOTA_ACCT) == 0 &&
-	    (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
-	    (flags & XFS_OQUOTA_ENFD))) {
-		xfs_debug(mp,
-			"%s: Can't enforce without acct, flags=%x sbflags=%x\n",
-			__func__, flags, mp->m_sb.sb_qflags);
-		return XFS_ERROR(EINVAL);
-	}
-	/*
-	 * If everything's up to-date incore, then don't waste time.
-	 */
-	if ((mp->m_qflags & flags) == flags)
-		return XFS_ERROR(EEXIST);
-
-	/*
-	 * Change sb_qflags on disk but not incore mp->qflags
-	 * if this is the root filesystem.
-	 */
-	spin_lock(&mp->m_sb_lock);
-	qf = mp->m_sb.sb_qflags;
-	mp->m_sb.sb_qflags = qf | flags;
-	spin_unlock(&mp->m_sb_lock);
-
-	/*
-	 * There's nothing to change if it's the same.
-	 */
-	if ((qf & flags) == flags && sbflags == 0)
-		return XFS_ERROR(EEXIST);
-	sbflags |= XFS_SB_QFLAGS;
-
-	if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
-		return (error);
-	/*
-	 * If we aren't trying to switch on quota enforcement, we are done.
-	 */
-	if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
-	     (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
-	     ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
-	     (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
-	     ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
-	     (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
-	    (flags & XFS_ALL_QUOTA_ENFD) == 0)
-		return (0);
-
-	if (! XFS_IS_QUOTA_RUNNING(mp))
-		return XFS_ERROR(ESRCH);
-
-	/*
-	 * Switch on quota enforcement in core.
-	 */
-	mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
-	mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
-	mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
-
-	return (0);
-}
-
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- */
-int
-xfs_qm_scall_getqstat(
-	struct xfs_mount	*mp,
-	struct fs_quota_stat	*out)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	struct xfs_inode	*uip, *gip;
-	boolean_t		tempuqip, tempgqip;
-
-	uip = gip = NULL;
-	tempuqip = tempgqip = B_FALSE;
-	memset(out, 0, sizeof(fs_quota_stat_t));
-
-	out->qs_version = FS_QSTAT_VERSION;
-	if (!xfs_sb_version_hasquota(&mp->m_sb)) {
-		out->qs_uquota.qfs_ino = NULLFSINO;
-		out->qs_gquota.qfs_ino = NULLFSINO;
-		return (0);
-	}
-	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
-							(XFS_ALL_QUOTA_ACCT|
-							 XFS_ALL_QUOTA_ENFD));
-	out->qs_pad = 0;
-	out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
-	out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-
-	if (q) {
-		uip = q->qi_uquotaip;
-		gip = q->qi_gquotaip;
-	}
-	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-					0, 0, &uip) == 0)
-			tempuqip = B_TRUE;
-	}
-	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
-		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-					0, 0, &gip) == 0)
-			tempgqip = B_TRUE;
-	}
-	if (uip) {
-		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
-		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
-		if (tempuqip)
-			IRELE(uip);
-	}
-	if (gip) {
-		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
-		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
-		if (tempgqip)
-			IRELE(gip);
-	}
-	if (q) {
-		out->qs_incoredqs = q->qi_dquots;
-		out->qs_btimelimit = q->qi_btimelimit;
-		out->qs_itimelimit = q->qi_itimelimit;
-		out->qs_rtbtimelimit = q->qi_rtbtimelimit;
-		out->qs_bwarnlimit = q->qi_bwarnlimit;
-		out->qs_iwarnlimit = q->qi_iwarnlimit;
-	}
-	return 0;
-}
-
-#define XFS_DQ_MASK \
-	(FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
-
-/*
- * Adjust quota limits, and start/stop timers accordingly.
- */
-int
-xfs_qm_scall_setqlim(
-	xfs_mount_t		*mp,
-	xfs_dqid_t		id,
-	uint			type,
-	fs_disk_quota_t		*newlim)
-{
-	struct xfs_quotainfo	*q = mp->m_quotainfo;
-	xfs_disk_dquot_t	*ddq;
-	xfs_dquot_t		*dqp;
-	xfs_trans_t		*tp;
-	int			error;
-	xfs_qcnt_t		hard, soft;
-
-	if (newlim->d_fieldmask & ~XFS_DQ_MASK)
-		return EINVAL;
-	if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
-		return 0;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
-				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		return (error);
-	}
-
-	/*
-	 * We don't want to race with a quotaoff so take the quotaoff lock.
-	 * (We don't hold an inode lock, so there's nothing else to stop
-	 * a quotaoff from happening). (XXXThis doesn't currently happen
-	 * because we take the vfslock before calling xfs_qm_sysent).
-	 */
-	mutex_lock(&q->qi_quotaofflock);
-
-	/*
-	 * Get the dquot (locked), and join it to the transaction.
-	 * Allocate the dquot if this doesn't exist.
-	 */
-	if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
-		xfs_trans_cancel(tp, XFS_TRANS_ABORT);
-		ASSERT(error != ENOENT);
-		goto out_unlock;
-	}
-	xfs_trans_dqjoin(tp, dqp);
-	ddq = &dqp->q_core;
-
-	/*
-	 * Make sure that hardlimits are >= soft limits before changing.
-	 */
-	hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
-		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
-			be64_to_cpu(ddq->d_blk_hardlimit);
-	soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
-		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
-			be64_to_cpu(ddq->d_blk_softlimit);
-	if (hard == 0 || hard >= soft) {
-		ddq->d_blk_hardlimit = cpu_to_be64(hard);
-		ddq->d_blk_softlimit = cpu_to_be64(soft);
-		if (id == 0) {
-			q->qi_bhardlimit = hard;
-			q->qi_bsoftlimit = soft;
-		}
-	} else {
-		xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
-	}
-	hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
-		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
-			be64_to_cpu(ddq->d_rtb_hardlimit);
-	soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
-		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
-			be64_to_cpu(ddq->d_rtb_softlimit);
-	if (hard == 0 || hard >= soft) {
-		ddq->d_rtb_hardlimit = cpu_to_be64(hard);
-		ddq->d_rtb_softlimit = cpu_to_be64(soft);
-		if (id == 0) {
-			q->qi_rtbhardlimit = hard;
-			q->qi_rtbsoftlimit = soft;
-		}
-	} else {
-		xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
-	}
-
-	hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
-		(xfs_qcnt_t) newlim->d_ino_hardlimit :
-			be64_to_cpu(ddq->d_ino_hardlimit);
-	soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
-		(xfs_qcnt_t) newlim->d_ino_softlimit :
-			be64_to_cpu(ddq->d_ino_softlimit);
-	if (hard == 0 || hard >= soft) {
-		ddq->d_ino_hardlimit = cpu_to_be64(hard);
-		ddq->d_ino_softlimit = cpu_to_be64(soft);
-		if (id == 0) {
-			q->qi_ihardlimit = hard;
-			q->qi_isoftlimit = soft;
-		}
-	} else {
-		xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
-	}
-
-	/*
-	 * Update warnings counter(s) if requested
-	 */
-	if (newlim->d_fieldmask & FS_DQ_BWARNS)
-		ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
-	if (newlim->d_fieldmask & FS_DQ_IWARNS)
-		ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
-	if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-		ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
-
-	if (id == 0) {
-		/*
-		 * Timelimits for the super user set the relative time
-		 * the other users can be over quota for this file system.
-		 * If it is zero a default is used.  Ditto for the default
-		 * soft and hard limit values (already done, above), and
-		 * for warnings.
-		 */
-		if (newlim->d_fieldmask & FS_DQ_BTIMER) {
-			q->qi_btimelimit = newlim->d_btimer;
-			ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
-		}
-		if (newlim->d_fieldmask & FS_DQ_ITIMER) {
-			q->qi_itimelimit = newlim->d_itimer;
-			ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
-		}
-		if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
-			q->qi_rtbtimelimit = newlim->d_rtbtimer;
-			ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
-		}
-		if (newlim->d_fieldmask & FS_DQ_BWARNS)
-			q->qi_bwarnlimit = newlim->d_bwarns;
-		if (newlim->d_fieldmask & FS_DQ_IWARNS)
-			q->qi_iwarnlimit = newlim->d_iwarns;
-		if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-			q->qi_rtbwarnlimit = newlim->d_rtbwarns;
-	} else {
-		/*
-		 * If the user is now over quota, start the timelimit.
-		 * The user will not be 'warned'.
-		 * Note that we keep the timers ticking, whether enforcement
-		 * is on or off. We don't really want to bother with iterating
-		 * over all ondisk dquots and turning the timers on/off.
-		 */
-		xfs_qm_adjust_dqtimers(mp, ddq);
-	}
-	dqp->dq_flags |= XFS_DQ_DIRTY;
-	xfs_trans_log_dquot(tp, dqp);
-
-	error = xfs_trans_commit(tp, 0);
-	xfs_qm_dqrele(dqp);
-
- out_unlock:
-	mutex_unlock(&q->qi_quotaofflock);
-	return error;
-}
-
-int
-xfs_qm_scall_getquota(
-	xfs_mount_t	*mp,
-	xfs_dqid_t	id,
-	uint		type,
-	fs_disk_quota_t *out)
-{
-	xfs_dquot_t	*dqp;
-	int		error;
-
-	/*
-	 * Try to get the dquot. We don't want it allocated on disk, so
-	 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
-	 * exist, we'll get ENOENT back.
-	 */
-	if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
-		return (error);
-	}
-
-	/*
-	 * If everything's NULL, this dquot doesn't quite exist as far as
-	 * our utility programs are concerned.
-	 */
-	if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-		xfs_qm_dqput(dqp);
-		return XFS_ERROR(ENOENT);
-	}
-	/*
-	 * Convert the disk dquot to the exportable format
-	 */
-	xfs_qm_export_dquot(mp, &dqp->q_core, out);
-	xfs_qm_dqput(dqp);
-	return (error ? XFS_ERROR(EFAULT) : 0);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff_end(
-	xfs_mount_t		*mp,
-	xfs_qoff_logitem_t	*startqoff,
-	uint			flags)
-{
-	xfs_trans_t		*tp;
-	int			error;
-	xfs_qoff_logitem_t	*qoffi;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-
-	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
-				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
-		xfs_trans_cancel(tp, 0);
-		return (error);
-	}
-
-	qoffi = xfs_trans_get_qoff_item(tp, startqoff,
-					flags & XFS_ALL_QUOTA_ACCT);
-	xfs_trans_log_quotaoff_item(tp, qoffi);
-
-	/*
-	 * We have to make sure that the transaction is secure on disk before we
-	 * return and actually stop quota accounting. So, make it synchronous.
-	 * We don't care about quotoff's performance.
-	 */
-	xfs_trans_set_sync(tp);
-	error = xfs_trans_commit(tp, 0);
-	return (error);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff(
-	xfs_mount_t	       *mp,
-	xfs_qoff_logitem_t     **qoffstartp,
-	uint		       flags)
-{
-	xfs_trans_t	       *tp;
-	int			error;
-	xfs_qoff_logitem_t     *qoffi=NULL;
-	uint			oldsbqflag=0;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
-	if ((error = xfs_trans_reserve(tp, 0,
-				      sizeof(xfs_qoff_logitem_t) * 2 +
-				      mp->m_sb.sb_sectsize + 128,
-				      0,
-				      0,
-				      XFS_DEFAULT_LOG_COUNT))) {
-		goto error0;
-	}
-
-	qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
-	xfs_trans_log_quotaoff_item(tp, qoffi);
-
-	spin_lock(&mp->m_sb_lock);
-	oldsbqflag = mp->m_sb.sb_qflags;
-	mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
-	spin_unlock(&mp->m_sb_lock);
-
-	xfs_mod_sb(tp, XFS_SB_QFLAGS);
-
-	/*
-	 * We have to make sure that the transaction is secure on disk before we
-	 * return and actually stop quota accounting. So, make it synchronous.
-	 * We don't care about quotoff's performance.
-	 */
-	xfs_trans_set_sync(tp);
-	error = xfs_trans_commit(tp, 0);
-
-error0:
-	if (error) {
-		xfs_trans_cancel(tp, 0);
-		/*
-		 * No one else is modifying sb_qflags, so this is OK.
-		 * We still hold the quotaofflock.
-		 */
-		spin_lock(&mp->m_sb_lock);
-		mp->m_sb.sb_qflags = oldsbqflag;
-		spin_unlock(&mp->m_sb_lock);
-	}
-	*qoffstartp = qoffi;
-	return (error);
-}
-
-
-/*
- * Translate an internal style on-disk-dquot to the exportable format.
- * The main differences are that the counters/limits are all in Basic
- * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
- * to be converted to the native endianness.
- */
-STATIC void
-xfs_qm_export_dquot(
-	xfs_mount_t		*mp,
-	xfs_disk_dquot_t	*src,
-	struct fs_disk_quota	*dst)
-{
-	memset(dst, 0, sizeof(*dst));
-	dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
-	dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
-	dst->d_id = be32_to_cpu(src->d_id);
-	dst->d_blk_hardlimit =
-		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
-	dst->d_blk_softlimit =
-		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
-	dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
-	dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
-	dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
-	dst->d_icount = be64_to_cpu(src->d_icount);
-	dst->d_btimer = be32_to_cpu(src->d_btimer);
-	dst->d_itimer = be32_to_cpu(src->d_itimer);
-	dst->d_iwarns = be16_to_cpu(src->d_iwarns);
-	dst->d_bwarns = be16_to_cpu(src->d_bwarns);
-	dst->d_rtb_hardlimit =
-		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
-	dst->d_rtb_softlimit =
-		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
-	dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
-	dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
-	dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
-
-	/*
-	 * Internally, we don't reset all the timers when quota enforcement
-	 * gets turned off. No need to confuse the user level code,
-	 * so return zeroes in that case.
-	 */
-	if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
-	    (!XFS_IS_OQUOTA_ENFORCED(mp) &&
-			(src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
-		dst->d_btimer = 0;
-		dst->d_itimer = 0;
-		dst->d_rtbtimer = 0;
-	}
-
-#ifdef DEBUG
-	if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
-	     (XFS_IS_OQUOTA_ENFORCED(mp) &&
-			(dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
-	    dst->d_id != 0) {
-		if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
-		    (dst->d_blk_softlimit > 0)) {
-			ASSERT(dst->d_btimer != 0);
-		}
-		if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
-		    (dst->d_ino_softlimit > 0)) {
-			ASSERT(dst->d_itimer != 0);
-		}
-	}
-#endif
-}
-
-STATIC uint
-xfs_qm_export_qtype_flags(
-	uint flags)
-{
-	/*
-	 * Can't be more than one, or none.
-	 */
-	ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
-		(FS_PROJ_QUOTA | FS_USER_QUOTA));
-	ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
-		(FS_PROJ_QUOTA | FS_GROUP_QUOTA));
-	ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
-		(FS_USER_QUOTA | FS_GROUP_QUOTA));
-	ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
-
-	return (flags & XFS_DQ_USER) ?
-		FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
-			FS_PROJ_QUOTA : FS_GROUP_QUOTA;
-}
-
-STATIC uint
-xfs_qm_export_flags(
-	uint flags)
-{
-	uint uflags;
-
-	uflags = 0;
-	if (flags & XFS_UQUOTA_ACCT)
-		uflags |= FS_QUOTA_UDQ_ACCT;
-	if (flags & XFS_PQUOTA_ACCT)
-		uflags |= FS_QUOTA_PDQ_ACCT;
-	if (flags & XFS_GQUOTA_ACCT)
-		uflags |= FS_QUOTA_GDQ_ACCT;
-	if (flags & XFS_UQUOTA_ENFD)
-		uflags |= FS_QUOTA_UDQ_ENFD;
-	if (flags & (XFS_OQUOTA_ENFD)) {
-		uflags |= (flags & XFS_GQUOTA_ACCT) ?
-			FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
-	}
-	return (uflags);
-}
-
-
-STATIC int
-xfs_dqrele_inode(
-	struct xfs_inode	*ip,
-	struct xfs_perag	*pag,
-	int			flags)
-{
-	/* skip quota inodes */
-	if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
-	    ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
-		ASSERT(ip->i_udquot == NULL);
-		ASSERT(ip->i_gdquot == NULL);
-		return 0;
-	}
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-		xfs_qm_dqrele(ip->i_udquot);
-		ip->i_udquot = NULL;
-	}
-	if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
-		xfs_qm_dqrele(ip->i_gdquot);
-		ip->i_gdquot = NULL;
-	}
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return 0;
-}
-
-
-/*
- * Go thru all the inodes in the file system, releasing their dquots.
- *
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff.
- */
-void
-xfs_qm_dqrele_all_inodes(
-	struct xfs_mount *mp,
-	uint		 flags)
-{
-	ASSERT(mp->m_quotainfo);
-	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
-}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
deleted file mode 100644
index 94a3d92..0000000
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QUOTA_PRIV_H__
-#define __XFS_QUOTA_PRIV_H__
-
-/*
- * Number of bmaps that we ask from bmapi when doing a quotacheck.
- * We make this restriction to keep the memory usage to a minimum.
- */
-#define XFS_DQITER_MAP_SIZE	10
-
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-				 (__psunsigned_t)(id)) & \
-				(xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
-				     (xfs_Gqm->qm_usr_dqhtable + \
-				      XFS_DQ_HASHVAL(mp, id)) : \
-				     (xfs_Gqm->qm_grp_dqhtable + \
-				      XFS_DQ_HASHVAL(mp, id)))
-#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
-	!dqp->q_core.d_blk_hardlimit && \
-	!dqp->q_core.d_blk_softlimit && \
-	!dqp->q_core.d_rtb_hardlimit && \
-	!dqp->q_core.d_rtb_softlimit && \
-	!dqp->q_core.d_ino_hardlimit && \
-	!dqp->q_core.d_ino_softlimit && \
-	!dqp->q_core.d_bcount && \
-	!dqp->q_core.d_rtbcount && \
-	!dqp->q_core.d_icount)
-
-#define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
-				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
-				 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-
-#endif	/* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
deleted file mode 100644
index 4d00ee6..0000000
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ /dev/null
@@ -1,890 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-STATIC void	xfs_trans_alloc_dqinfo(xfs_trans_t *);
-
-/*
- * Add the locked dquot to the transaction.
- * The dquot must be locked, and it cannot be associated with any
- * transaction.
- */
-void
-xfs_trans_dqjoin(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp)
-{
-	ASSERT(dqp->q_transp != tp);
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-	ASSERT(dqp->q_logitem.qli_dquot == dqp);
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
-	/*
-	 * Initialize d_transp so we can later determine if this dquot is
-	 * associated with this transaction.
-	 */
-	dqp->q_transp = tp;
-}
-
-
-/*
- * This is called to mark the dquot as needing
- * to be logged when the transaction is committed.  The dquot must
- * already be associated with the given transaction.
- * Note that it marks the entire transaction as dirty. In the ordinary
- * case, this gets called via xfs_trans_commit, after the transaction
- * is already dirty. However, there's nothing stop this from getting
- * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
- * flag.
- */
-void
-xfs_trans_log_dquot(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp)
-{
-	ASSERT(dqp->q_transp == tp);
-	ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-	tp->t_flags |= XFS_TRANS_DIRTY;
-	dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-/*
- * Carry forward whatever is left of the quota blk reservation to
- * the spanky new transaction
- */
-void
-xfs_trans_dup_dqinfo(
-	xfs_trans_t	*otp,
-	xfs_trans_t	*ntp)
-{
-	xfs_dqtrx_t	*oq, *nq;
-	int		i,j;
-	xfs_dqtrx_t	*oqa, *nqa;
-
-	if (!otp->t_dqinfo)
-		return;
-
-	xfs_trans_alloc_dqinfo(ntp);
-	oqa = otp->t_dqinfo->dqa_usrdquots;
-	nqa = ntp->t_dqinfo->dqa_usrdquots;
-
-	/*
-	 * Because the quota blk reservation is carried forward,
-	 * it is also necessary to carry forward the DQ_DIRTY flag.
-	 */
-	if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
-		ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
-
-	for (j = 0; j < 2; j++) {
-		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-			if (oqa[i].qt_dquot == NULL)
-				break;
-			oq = &oqa[i];
-			nq = &nqa[i];
-
-			nq->qt_dquot = oq->qt_dquot;
-			nq->qt_bcount_delta = nq->qt_icount_delta = 0;
-			nq->qt_rtbcount_delta = 0;
-
-			/*
-			 * Transfer whatever is left of the reservations.
-			 */
-			nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
-			oq->qt_blk_res = oq->qt_blk_res_used;
-
-			nq->qt_rtblk_res = oq->qt_rtblk_res -
-				oq->qt_rtblk_res_used;
-			oq->qt_rtblk_res = oq->qt_rtblk_res_used;
-
-			nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
-			oq->qt_ino_res = oq->qt_ino_res_used;
-
-		}
-		oqa = otp->t_dqinfo->dqa_grpdquots;
-		nqa = ntp->t_dqinfo->dqa_grpdquots;
-	}
-}
-
-/*
- * Wrap around mod_dquot to account for both user and group quotas.
- */
-void
-xfs_trans_mod_dquot_byino(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	uint		field,
-	long		delta)
-{
-	xfs_mount_t	*mp = tp->t_mountp;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) ||
-	    !XFS_IS_QUOTA_ON(mp) ||
-	    ip->i_ino == mp->m_sb.sb_uquotino ||
-	    ip->i_ino == mp->m_sb.sb_gquotino)
-		return;
-
-	if (tp->t_dqinfo == NULL)
-		xfs_trans_alloc_dqinfo(tp);
-
-	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
-		(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
-	if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
-		(void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
-}
-
-STATIC xfs_dqtrx_t *
-xfs_trans_get_dqtrx(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp)
-{
-	int		i;
-	xfs_dqtrx_t	*qa;
-
-	qa = XFS_QM_ISUDQ(dqp) ?
-		tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
-
-	for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-		if (qa[i].qt_dquot == NULL ||
-		    qa[i].qt_dquot == dqp)
-			return &qa[i];
-	}
-
-	return NULL;
-}
-
-/*
- * Make the changes in the transaction structure.
- * The moral equivalent to xfs_trans_mod_sb().
- * We don't touch any fields in the dquot, so we don't care
- * if it's locked or not (most of the time it won't be).
- */
-void
-xfs_trans_mod_dquot(
-	xfs_trans_t	*tp,
-	xfs_dquot_t	*dqp,
-	uint		field,
-	long		delta)
-{
-	xfs_dqtrx_t	*qtrx;
-
-	ASSERT(tp);
-	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
-	qtrx = NULL;
-
-	if (tp->t_dqinfo == NULL)
-		xfs_trans_alloc_dqinfo(tp);
-	/*
-	 * Find either the first free slot or the slot that belongs
-	 * to this dquot.
-	 */
-	qtrx = xfs_trans_get_dqtrx(tp, dqp);
-	ASSERT(qtrx);
-	if (qtrx->qt_dquot == NULL)
-		qtrx->qt_dquot = dqp;
-
-	switch (field) {
-
-		/*
-		 * regular disk blk reservation
-		 */
-	      case XFS_TRANS_DQ_RES_BLKS:
-		qtrx->qt_blk_res += (ulong)delta;
-		break;
-
-		/*
-		 * inode reservation
-		 */
-	      case XFS_TRANS_DQ_RES_INOS:
-		qtrx->qt_ino_res += (ulong)delta;
-		break;
-
-		/*
-		 * disk blocks used.
-		 */
-	      case XFS_TRANS_DQ_BCOUNT:
-		if (qtrx->qt_blk_res && delta > 0) {
-			qtrx->qt_blk_res_used += (ulong)delta;
-			ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
-		}
-		qtrx->qt_bcount_delta += delta;
-		break;
-
-	      case XFS_TRANS_DQ_DELBCOUNT:
-		qtrx->qt_delbcnt_delta += delta;
-		break;
-
-		/*
-		 * Inode Count
-		 */
-	      case XFS_TRANS_DQ_ICOUNT:
-		if (qtrx->qt_ino_res && delta > 0) {
-			qtrx->qt_ino_res_used += (ulong)delta;
-			ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
-		}
-		qtrx->qt_icount_delta += delta;
-		break;
-
-		/*
-		 * rtblk reservation
-		 */
-	      case XFS_TRANS_DQ_RES_RTBLKS:
-		qtrx->qt_rtblk_res += (ulong)delta;
-		break;
-
-		/*
-		 * rtblk count
-		 */
-	      case XFS_TRANS_DQ_RTBCOUNT:
-		if (qtrx->qt_rtblk_res && delta > 0) {
-			qtrx->qt_rtblk_res_used += (ulong)delta;
-			ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
-		}
-		qtrx->qt_rtbcount_delta += delta;
-		break;
-
-	      case XFS_TRANS_DQ_DELRTBCOUNT:
-		qtrx->qt_delrtb_delta += delta;
-		break;
-
-	      default:
-		ASSERT(0);
-	}
-	tp->t_flags |= XFS_TRANS_DQ_DIRTY;
-}
-
-
-/*
- * Given an array of dqtrx structures, lock all the dquots associated
- * and join them to the transaction, provided they have been modified.
- * We know that the highest number of dquots (of one type - usr OR grp),
- * involved in a transaction is 2 and that both usr and grp combined - 3.
- * So, we don't attempt to make this very generic.
- */
-STATIC void
-xfs_trans_dqlockedjoin(
-	xfs_trans_t	*tp,
-	xfs_dqtrx_t	*q)
-{
-	ASSERT(q[0].qt_dquot != NULL);
-	if (q[1].qt_dquot == NULL) {
-		xfs_dqlock(q[0].qt_dquot);
-		xfs_trans_dqjoin(tp, q[0].qt_dquot);
-	} else {
-		ASSERT(XFS_QM_TRANS_MAXDQS == 2);
-		xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
-		xfs_trans_dqjoin(tp, q[0].qt_dquot);
-		xfs_trans_dqjoin(tp, q[1].qt_dquot);
-	}
-}
-
-
-/*
- * Called by xfs_trans_commit() and similar in spirit to
- * xfs_trans_apply_sb_deltas().
- * Go thru all the dquots belonging to this transaction and modify the
- * INCORE dquot to reflect the actual usages.
- * Unreserve just the reservations done by this transaction.
- * dquot is still left locked at exit.
- */
-void
-xfs_trans_apply_dquot_deltas(
-	xfs_trans_t		*tp)
-{
-	int			i, j;
-	xfs_dquot_t		*dqp;
-	xfs_dqtrx_t		*qtrx, *qa;
-	xfs_disk_dquot_t	*d;
-	long			totalbdelta;
-	long			totalrtbdelta;
-
-	if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
-		return;
-
-	ASSERT(tp->t_dqinfo);
-	qa = tp->t_dqinfo->dqa_usrdquots;
-	for (j = 0; j < 2; j++) {
-		if (qa[0].qt_dquot == NULL) {
-			qa = tp->t_dqinfo->dqa_grpdquots;
-			continue;
-		}
-
-		/*
-		 * Lock all of the dquots and join them to the transaction.
-		 */
-		xfs_trans_dqlockedjoin(tp, qa);
-
-		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-			qtrx = &qa[i];
-			/*
-			 * The array of dquots is filled
-			 * sequentially, not sparsely.
-			 */
-			if ((dqp = qtrx->qt_dquot) == NULL)
-				break;
-
-			ASSERT(XFS_DQ_IS_LOCKED(dqp));
-			ASSERT(dqp->q_transp == tp);
-
-			/*
-			 * adjust the actual number of blocks used
-			 */
-			d = &dqp->q_core;
-
-			/*
-			 * The issue here is - sometimes we don't make a blkquota
-			 * reservation intentionally to be fair to users
-			 * (when the amount is small). On the other hand,
-			 * delayed allocs do make reservations, but that's
-			 * outside of a transaction, so we have no
-			 * idea how much was really reserved.
-			 * So, here we've accumulated delayed allocation blks and
-			 * non-delay blks. The assumption is that the
-			 * delayed ones are always reserved (outside of a
-			 * transaction), and the others may or may not have
-			 * quota reservations.
-			 */
-			totalbdelta = qtrx->qt_bcount_delta +
-				qtrx->qt_delbcnt_delta;
-			totalrtbdelta = qtrx->qt_rtbcount_delta +
-				qtrx->qt_delrtb_delta;
-#ifdef DEBUG
-			if (totalbdelta < 0)
-				ASSERT(be64_to_cpu(d->d_bcount) >=
-				       -totalbdelta);
-
-			if (totalrtbdelta < 0)
-				ASSERT(be64_to_cpu(d->d_rtbcount) >=
-				       -totalrtbdelta);
-
-			if (qtrx->qt_icount_delta < 0)
-				ASSERT(be64_to_cpu(d->d_icount) >=
-				       -qtrx->qt_icount_delta);
-#endif
-			if (totalbdelta)
-				be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
-
-			if (qtrx->qt_icount_delta)
-				be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
-
-			if (totalrtbdelta)
-				be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
-
-			/*
-			 * Get any default limits in use.
-			 * Start/reset the timer(s) if needed.
-			 */
-			if (d->d_id) {
-				xfs_qm_adjust_dqlimits(tp->t_mountp, d);
-				xfs_qm_adjust_dqtimers(tp->t_mountp, d);
-			}
-
-			dqp->dq_flags |= XFS_DQ_DIRTY;
-			/*
-			 * add this to the list of items to get logged
-			 */
-			xfs_trans_log_dquot(tp, dqp);
-			/*
-			 * Take off what's left of the original reservation.
-			 * In case of delayed allocations, there's no
-			 * reservation that a transaction structure knows of.
-			 */
-			if (qtrx->qt_blk_res != 0) {
-				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
-					if (qtrx->qt_blk_res >
-					    qtrx->qt_blk_res_used)
-						dqp->q_res_bcount -= (xfs_qcnt_t)
-							(qtrx->qt_blk_res -
-							 qtrx->qt_blk_res_used);
-					else
-						dqp->q_res_bcount -= (xfs_qcnt_t)
-							(qtrx->qt_blk_res_used -
-							 qtrx->qt_blk_res);
-				}
-			} else {
-				/*
-				 * These blks were never reserved, either inside
-				 * a transaction or outside one (in a delayed
-				 * allocation). Also, this isn't always a
-				 * negative number since we sometimes
-				 * deliberately skip quota reservations.
-				 */
-				if (qtrx->qt_bcount_delta) {
-					dqp->q_res_bcount +=
-					      (xfs_qcnt_t)qtrx->qt_bcount_delta;
-				}
-			}
-			/*
-			 * Adjust the RT reservation.
-			 */
-			if (qtrx->qt_rtblk_res != 0) {
-				if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
-					if (qtrx->qt_rtblk_res >
-					    qtrx->qt_rtblk_res_used)
-					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
-						       (qtrx->qt_rtblk_res -
-							qtrx->qt_rtblk_res_used);
-					else
-					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
-						       (qtrx->qt_rtblk_res_used -
-							qtrx->qt_rtblk_res);
-				}
-			} else {
-				if (qtrx->qt_rtbcount_delta)
-					dqp->q_res_rtbcount +=
-					    (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
-			}
-
-			/*
-			 * Adjust the inode reservation.
-			 */
-			if (qtrx->qt_ino_res != 0) {
-				ASSERT(qtrx->qt_ino_res >=
-				       qtrx->qt_ino_res_used);
-				if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
-					dqp->q_res_icount -= (xfs_qcnt_t)
-						(qtrx->qt_ino_res -
-						 qtrx->qt_ino_res_used);
-			} else {
-				if (qtrx->qt_icount_delta)
-					dqp->q_res_icount +=
-					    (xfs_qcnt_t)qtrx->qt_icount_delta;
-			}
-
-			ASSERT(dqp->q_res_bcount >=
-				be64_to_cpu(dqp->q_core.d_bcount));
-			ASSERT(dqp->q_res_icount >=
-				be64_to_cpu(dqp->q_core.d_icount));
-			ASSERT(dqp->q_res_rtbcount >=
-				be64_to_cpu(dqp->q_core.d_rtbcount));
-		}
-		/*
-		 * Do the group quotas next
-		 */
-		qa = tp->t_dqinfo->dqa_grpdquots;
-	}
-}
-
-/*
- * Release the reservations, and adjust the dquots accordingly.
- * This is called only when the transaction is being aborted. If by
- * any chance we have done dquot modifications incore (ie. deltas) already,
- * we simply throw those away, since that's the expected behavior
- * when a transaction is curtailed without a commit.
- */
-void
-xfs_trans_unreserve_and_mod_dquots(
-	xfs_trans_t		*tp)
-{
-	int			i, j;
-	xfs_dquot_t		*dqp;
-	xfs_dqtrx_t		*qtrx, *qa;
-	boolean_t		locked;
-
-	if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
-		return;
-
-	qa = tp->t_dqinfo->dqa_usrdquots;
-
-	for (j = 0; j < 2; j++) {
-		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-			qtrx = &qa[i];
-			/*
-			 * We assume that the array of dquots is filled
-			 * sequentially, not sparsely.
-			 */
-			if ((dqp = qtrx->qt_dquot) == NULL)
-				break;
-			/*
-			 * Unreserve the original reservation. We don't care
-			 * about the number of blocks used field, or deltas.
-			 * Also we don't bother to zero the fields.
-			 */
-			locked = B_FALSE;
-			if (qtrx->qt_blk_res) {
-				xfs_dqlock(dqp);
-				locked = B_TRUE;
-				dqp->q_res_bcount -=
-					(xfs_qcnt_t)qtrx->qt_blk_res;
-			}
-			if (qtrx->qt_ino_res) {
-				if (!locked) {
-					xfs_dqlock(dqp);
-					locked = B_TRUE;
-				}
-				dqp->q_res_icount -=
-					(xfs_qcnt_t)qtrx->qt_ino_res;
-			}
-
-			if (qtrx->qt_rtblk_res) {
-				if (!locked) {
-					xfs_dqlock(dqp);
-					locked = B_TRUE;
-				}
-				dqp->q_res_rtbcount -=
-					(xfs_qcnt_t)qtrx->qt_rtblk_res;
-			}
-			if (locked)
-				xfs_dqunlock(dqp);
-
-		}
-		qa = tp->t_dqinfo->dqa_grpdquots;
-	}
-}
-
-STATIC void
-xfs_quota_warn(
-	struct xfs_mount	*mp,
-	struct xfs_dquot	*dqp,
-	int			type)
-{
-	/* no warnings for project quotas - we just return ENOSPC later */
-	if (dqp->dq_flags & XFS_DQ_PROJ)
-		return;
-	quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
-			   be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
-			   type);
-}
-
-/*
- * This reserves disk blocks and inodes against a dquot.
- * Flags indicate if the dquot is to be locked here and also
- * if the blk reservation is for RT or regular blocks.
- * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
- */
-STATIC int
-xfs_trans_dqresv(
-	xfs_trans_t	*tp,
-	xfs_mount_t	*mp,
-	xfs_dquot_t	*dqp,
-	long		nblks,
-	long		ninos,
-	uint		flags)
-{
-	xfs_qcnt_t	hardlimit;
-	xfs_qcnt_t	softlimit;
-	time_t		timer;
-	xfs_qwarncnt_t	warns;
-	xfs_qwarncnt_t	warnlimit;
-	xfs_qcnt_t	count;
-	xfs_qcnt_t	*resbcountp;
-	xfs_quotainfo_t	*q = mp->m_quotainfo;
-
-
-	xfs_dqlock(dqp);
-
-	if (flags & XFS_TRANS_DQ_RES_BLKS) {
-		hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
-		if (!hardlimit)
-			hardlimit = q->qi_bhardlimit;
-		softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
-		if (!softlimit)
-			softlimit = q->qi_bsoftlimit;
-		timer = be32_to_cpu(dqp->q_core.d_btimer);
-		warns = be16_to_cpu(dqp->q_core.d_bwarns);
-		warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
-		resbcountp = &dqp->q_res_bcount;
-	} else {
-		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
-		hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
-		if (!hardlimit)
-			hardlimit = q->qi_rtbhardlimit;
-		softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
-		if (!softlimit)
-			softlimit = q->qi_rtbsoftlimit;
-		timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
-		warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
-		warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
-		resbcountp = &dqp->q_res_rtbcount;
-	}
-
-	if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
-	    dqp->q_core.d_id &&
-	    ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
-	     (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
-	      (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
-		if (nblks > 0) {
-			/*
-			 * dquot is locked already. See if we'd go over the
-			 * hardlimit or exceed the timelimit if we allocate
-			 * nblks.
-			 */
-			if (hardlimit > 0ULL &&
-			    hardlimit <= nblks + *resbcountp) {
-				xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
-				goto error_return;
-			}
-			if (softlimit > 0ULL &&
-			    softlimit <= nblks + *resbcountp) {
-				if ((timer != 0 && get_seconds() > timer) ||
-				    (warns != 0 && warns >= warnlimit)) {
-					xfs_quota_warn(mp, dqp,
-						       QUOTA_NL_BSOFTLONGWARN);
-					goto error_return;
-				}
-
-				xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
-			}
-		}
-		if (ninos > 0) {
-			count = be64_to_cpu(dqp->q_core.d_icount);
-			timer = be32_to_cpu(dqp->q_core.d_itimer);
-			warns = be16_to_cpu(dqp->q_core.d_iwarns);
-			warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
-			hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
-			if (!hardlimit)
-				hardlimit = q->qi_ihardlimit;
-			softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
-			if (!softlimit)
-				softlimit = q->qi_isoftlimit;
-
-			if (hardlimit > 0ULL && count >= hardlimit) {
-				xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
-				goto error_return;
-			}
-			if (softlimit > 0ULL && count >= softlimit) {
-				if  ((timer != 0 && get_seconds() > timer) ||
-				     (warns != 0 && warns >= warnlimit)) {
-					xfs_quota_warn(mp, dqp,
-						       QUOTA_NL_ISOFTLONGWARN);
-					goto error_return;
-				}
-				xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
-			}
-		}
-	}
-
-	/*
-	 * Change the reservation, but not the actual usage.
-	 * Note that q_res_bcount = q_core.d_bcount + resv
-	 */
-	(*resbcountp) += (xfs_qcnt_t)nblks;
-	if (ninos != 0)
-		dqp->q_res_icount += (xfs_qcnt_t)ninos;
-
-	/*
-	 * note the reservation amt in the trans struct too,
-	 * so that the transaction knows how much was reserved by
-	 * it against this particular dquot.
-	 * We don't do this when we are reserving for a delayed allocation,
-	 * because we don't have the luxury of a transaction envelope then.
-	 */
-	if (tp) {
-		ASSERT(tp->t_dqinfo);
-		ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-		if (nblks != 0)
-			xfs_trans_mod_dquot(tp, dqp,
-					    flags & XFS_QMOPT_RESBLK_MASK,
-					    nblks);
-		if (ninos != 0)
-			xfs_trans_mod_dquot(tp, dqp,
-					    XFS_TRANS_DQ_RES_INOS,
-					    ninos);
-	}
-	ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
-	ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
-	ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
-
-	xfs_dqunlock(dqp);
-	return 0;
-
-error_return:
-	xfs_dqunlock(dqp);
-	if (flags & XFS_QMOPT_ENOSPC)
-		return ENOSPC;
-	return EDQUOT;
-}
-
-
-/*
- * Given dquot(s), make disk block and/or inode reservations against them.
- * The fact that this does the reservation against both the usr and
- * grp/prj quotas is important, because this follows a both-or-nothing
- * approach.
- *
- * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
- *	   XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
- *	   XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
- *	   XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
- * dquots are unlocked on return, if they were not locked by caller.
- */
-int
-xfs_trans_reserve_quota_bydquots(
-	xfs_trans_t	*tp,
-	xfs_mount_t	*mp,
-	xfs_dquot_t	*udqp,
-	xfs_dquot_t	*gdqp,
-	long		nblks,
-	long		ninos,
-	uint		flags)
-{
-	int		resvd = 0, error;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-		return 0;
-
-	if (tp && tp->t_dqinfo == NULL)
-		xfs_trans_alloc_dqinfo(tp);
-
-	ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-
-	if (udqp) {
-		error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
-					(flags & ~XFS_QMOPT_ENOSPC));
-		if (error)
-			return error;
-		resvd = 1;
-	}
-
-	if (gdqp) {
-		error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
-		if (error) {
-			/*
-			 * can't do it, so backout previous reservation
-			 */
-			if (resvd) {
-				flags |= XFS_QMOPT_FORCE_RES;
-				xfs_trans_dqresv(tp, mp, udqp,
-						 -nblks, -ninos, flags);
-			}
-			return error;
-		}
-	}
-
-	/*
-	 * Didn't change anything critical, so, no need to log
-	 */
-	return 0;
-}
-
-
-/*
- * Lock the dquot and change the reservation if we can.
- * This doesn't change the actual usage, just the reservation.
- * The inode sent in is locked.
- */
-int
-xfs_trans_reserve_quota_nblks(
-	struct xfs_trans	*tp,
-	struct xfs_inode	*ip,
-	long			nblks,
-	long			ninos,
-	uint			flags)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-		return 0;
-	if (XFS_IS_PQUOTA_ON(mp))
-		flags |= XFS_QMOPT_ENOSPC;
-
-	ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
-	ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-				XFS_TRANS_DQ_RES_RTBLKS ||
-	       (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-				XFS_TRANS_DQ_RES_BLKS);
-
-	/*
-	 * Reserve nblks against these dquots, with trans as the mediator.
-	 */
-	return xfs_trans_reserve_quota_bydquots(tp, mp,
-						ip->i_udquot, ip->i_gdquot,
-						nblks, ninos, flags);
-}
-
-/*
- * This routine is called to allocate a quotaoff log item.
- */
-xfs_qoff_logitem_t *
-xfs_trans_get_qoff_item(
-	xfs_trans_t		*tp,
-	xfs_qoff_logitem_t	*startqoff,
-	uint			flags)
-{
-	xfs_qoff_logitem_t	*q;
-
-	ASSERT(tp != NULL);
-
-	q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
-	ASSERT(q != NULL);
-
-	/*
-	 * Get a log_item_desc to point at the new item.
-	 */
-	xfs_trans_add_item(tp, &q->qql_item);
-	return q;
-}
-
-
-/*
- * This is called to mark the quotaoff logitem as needing
- * to be logged when the transaction is committed.  The logitem must
- * already be associated with the given transaction.
- */
-void
-xfs_trans_log_quotaoff_item(
-	xfs_trans_t		*tp,
-	xfs_qoff_logitem_t	*qlp)
-{
-	tp->t_flags |= XFS_TRANS_DIRTY;
-	qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-STATIC void
-xfs_trans_alloc_dqinfo(
-	xfs_trans_t	*tp)
-{
-	tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
-}
-
-void
-xfs_trans_free_dqinfo(
-	xfs_trans_t	*tp)
-{
-	if (!tp->t_dqinfo)
-		return;
-	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
-	tp->t_dqinfo = NULL;
-}
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
deleted file mode 100644
index b83f76b..0000000
--- a/fs/xfs/support/uuid.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
-	__be32	uu_timelow;
-	__be16	uu_timemid;
-	__be16	uu_timehi;
-	__be16	uu_clockseq;
-	__be16	uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
-	xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
-	fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-		   be16_to_cpu(uup->uu_timemid);
-	fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
-	int	i;
-	char	*cp = (char *)uuid;
-
-	if (uuid == NULL)
-		return 0;
-	/* implied check of version number here... */
-	for (i = 0; i < sizeof *uuid; i++)
-		if (*cp++) return 0;	/* not nil */
-	return 1;	/* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
-	return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
deleted file mode 100644
index 4732d71..0000000
--- a/fs/xfs/support/uuid.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
-	unsigned char	__u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-#endif	/* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/time.h b/fs/xfs/time.h
new file mode 100644
index 0000000..387e695
--- /dev/null
+++ b/fs/xfs/time.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+
+#include <linux/sched.h>
+#include <linux/time.h>
+
+typedef struct timespec timespec_t;
+
+static inline void delay(long ticks)
+{
+	schedule_timeout_uninterruptible(ticks);
+}
+
+static inline void nanotime(struct timespec *tvp)
+{
+	*tvp = CURRENT_TIME;
+}
+
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
new file mode 100644
index 0000000..b83f76b
--- /dev/null
+++ b/fs/xfs/uuid.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <xfs.h>
+
+/* IRIX interpretation of an uuid_t */
+typedef struct {
+	__be32	uu_timelow;
+	__be16	uu_timemid;
+	__be16	uu_timehi;
+	__be16	uu_clockseq;
+	__be16	uu_node[3];
+} xfs_uu_t;
+
+/*
+ * uuid_getnodeuniq - obtain the node unique fields of a UUID.
+ *
+ * This is not in any way a standard or condoned UUID function;
+ * it just something that's needed for user-level file handles.
+ */
+void
+uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
+{
+	xfs_uu_t *uup = (xfs_uu_t *)uuid;
+
+	fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
+		   be16_to_cpu(uup->uu_timemid);
+	fsid[1] = be32_to_cpu(uup->uu_timelow);
+}
+
+int
+uuid_is_nil(uuid_t *uuid)
+{
+	int	i;
+	char	*cp = (char *)uuid;
+
+	if (uuid == NULL)
+		return 0;
+	/* implied check of version number here... */
+	for (i = 0; i < sizeof *uuid; i++)
+		if (*cp++) return 0;	/* not nil */
+	return 1;	/* is nil */
+}
+
+int
+uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
+{
+	return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
+}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
new file mode 100644
index 0000000..4732d71
--- /dev/null
+++ b/fs/xfs/uuid.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_UUID_H__
+#define __XFS_SUPPORT_UUID_H__
+
+typedef struct {
+	unsigned char	__u_bits[16];
+} uuid_t;
+
+extern int uuid_is_nil(uuid_t *uuid);
+extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
+extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
+
+#endif	/* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
new file mode 100644
index 0000000..b6c4b37
--- /dev/null
+++ b/fs/xfs/xfs_acl.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include "xfs_trace.h"
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+/*
+ * Locking scheme:
+ *  - all ACL updates are protected by inode->i_mutex, which is taken before
+ *    calling into this file.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+	struct posix_acl_entry *acl_e;
+	struct posix_acl *acl;
+	struct xfs_acl_entry *ace;
+	int count, i;
+
+	count = be32_to_cpu(aclp->acl_cnt);
+
+	acl = posix_acl_alloc(count, GFP_KERNEL);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < count; i++) {
+		acl_e = &acl->a_entries[i];
+		ace = &aclp->acl_entry[i];
+
+		/*
+		 * The tag is 32 bits on disk and 16 bits in core.
+		 *
+		 * Because every access to it goes through the core
+		 * format first this is not a problem.
+		 */
+		acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+		acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+		switch (acl_e->e_tag) {
+		case ACL_USER:
+		case ACL_GROUP:
+			acl_e->e_id = be32_to_cpu(ace->ae_id);
+			break;
+		case ACL_USER_OBJ:
+		case ACL_GROUP_OBJ:
+		case ACL_MASK:
+		case ACL_OTHER:
+			acl_e->e_id = ACL_UNDEFINED_ID;
+			break;
+		default:
+			goto fail;
+		}
+	}
+	return acl;
+
+fail:
+	posix_acl_release(acl);
+	return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+	const struct posix_acl_entry *acl_e;
+	struct xfs_acl_entry *ace;
+	int i;
+
+	aclp->acl_cnt = cpu_to_be32(acl->a_count);
+	for (i = 0; i < acl->a_count; i++) {
+		ace = &aclp->acl_entry[i];
+		acl_e = &acl->a_entries[i];
+
+		ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+		ace->ae_id = cpu_to_be32(acl_e->e_id);
+		ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+	}
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	struct posix_acl *acl;
+	struct xfs_acl *xfs_acl;
+	int len = sizeof(struct xfs_acl);
+	unsigned char *ea_name;
+	int error;
+
+	acl = get_cached_acl(inode, type);
+	if (acl != ACL_NOT_CACHED)
+		return acl;
+
+	trace_xfs_get_acl(ip);
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		ea_name = SGI_ACL_FILE;
+		break;
+	case ACL_TYPE_DEFAULT:
+		ea_name = SGI_ACL_DEFAULT;
+		break;
+	default:
+		BUG();
+	}
+
+	/*
+	 * If we have a cached ACLs value just return it, not need to
+	 * go out to the disk.
+	 */
+
+	xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+	if (!xfs_acl)
+		return ERR_PTR(-ENOMEM);
+
+	error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
+							&len, ATTR_ROOT);
+	if (error) {
+		/*
+		 * If the attribute doesn't exist make sure we have a negative
+		 * cache entry, for any other error assume it is transient and
+		 * leave the cache entry as ACL_NOT_CACHED.
+		 */
+		if (error == -ENOATTR) {
+			acl = NULL;
+			goto out_update_cache;
+		}
+		goto out;
+	}
+
+	acl = xfs_acl_from_disk(xfs_acl);
+	if (IS_ERR(acl))
+		goto out;
+
+ out_update_cache:
+	set_cached_acl(inode, type, acl);
+ out:
+	kfree(xfs_acl);
+	return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	unsigned char *ea_name;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		ea_name = SGI_ACL_FILE;
+		break;
+	case ACL_TYPE_DEFAULT:
+		if (!S_ISDIR(inode->i_mode))
+			return acl ? -EACCES : 0;
+		ea_name = SGI_ACL_DEFAULT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (acl) {
+		struct xfs_acl *xfs_acl;
+		int len;
+
+		xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+		if (!xfs_acl)
+			return -ENOMEM;
+
+		xfs_acl_to_disk(xfs_acl, acl);
+		len = sizeof(struct xfs_acl) -
+			(sizeof(struct xfs_acl_entry) *
+			 (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+		error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
+				len, ATTR_ROOT);
+
+		kfree(xfs_acl);
+	} else {
+		/*
+		 * A NULL ACL argument means we want to remove the ACL.
+		 */
+		error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+		/*
+		 * If the attribute didn't exist to start with that's fine.
+		 */
+		if (error == -ENOATTR)
+			error = 0;
+	}
+
+	if (!error)
+		set_cached_acl(inode, type, acl);
+	return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, umode_t mode)
+{
+	int error = 0;
+
+	if (mode != inode->i_mode) {
+		struct iattr iattr;
+
+		iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
+		iattr.ia_mode = mode;
+		iattr.ia_ctime = current_fs_time(inode->i_sb);
+
+		error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+	}
+
+	return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, unsigned char *name)
+{
+	int len = sizeof(struct xfs_acl);
+
+	return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+			    ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+	return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+	if (!S_ISDIR(inode->i_mode))
+		return 0;
+	return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
+{
+	umode_t mode = inode->i_mode;
+	int error = 0, inherit = 0;
+
+	if (S_ISDIR(inode->i_mode)) {
+		error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+		if (error)
+			goto out;
+	}
+
+	error = posix_acl_create(&acl, GFP_KERNEL, &mode);
+	if (error < 0)
+		return error;
+
+	/*
+	 * If posix_acl_create returns a positive value we need to
+	 * inherit a permission that can't be represented using the Unix
+	 * mode bits and we actually need to set an ACL.
+	 */
+	if (error > 0)
+		inherit = 1;
+
+	error = xfs_set_mode(inode, mode);
+	if (error)
+		goto out;
+
+	if (inherit)
+		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+
+out:
+	posix_acl_release(acl);
+	return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+	struct posix_acl *acl;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+
+	error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+	if (error)
+		return error;
+
+	error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+	posix_acl_release(acl);
+	return error;
+}
+
+static int
+xfs_xattr_acl_get(struct dentry *dentry, const char *name,
+		void *value, size_t size, int type)
+{
+	struct posix_acl *acl;
+	int error;
+
+	acl = xfs_get_acl(dentry->d_inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+
+	error = posix_acl_to_xattr(acl, value, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+static int
+xfs_xattr_acl_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	struct inode *inode = dentry->d_inode;
+	struct posix_acl *acl = NULL;
+	int error = 0;
+
+	if (flags & XATTR_CREATE)
+		return -EINVAL;
+	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+		return value ? -EACCES : 0;
+	if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (!value)
+		goto set_acl;
+
+	acl = posix_acl_from_xattr(value, size);
+	if (!acl) {
+		/*
+		 * acl_set_file(3) may request that we set default ACLs with
+		 * zero length -- defend (gracefully) against that here.
+		 */
+		goto out;
+	}
+	if (IS_ERR(acl)) {
+		error = PTR_ERR(acl);
+		goto out;
+	}
+
+	error = posix_acl_valid(acl);
+	if (error)
+		goto out_release;
+
+	error = -EINVAL;
+	if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+		goto out_release;
+
+	if (type == ACL_TYPE_ACCESS) {
+		umode_t mode = inode->i_mode;
+		error = posix_acl_equiv_mode(acl, &mode);
+
+		if (error <= 0) {
+			posix_acl_release(acl);
+			acl = NULL;
+
+			if (error < 0)
+				return error;
+		}
+
+		error = xfs_set_mode(inode, mode);
+		if (error)
+			goto out_release;
+	}
+
+ set_acl:
+	error = xfs_set_acl(inode, type, acl);
+ out_release:
+	posix_acl_release(acl);
+ out:
+	return error;
+}
+
+const struct xattr_handler xfs_xattr_acl_access_handler = {
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
+	.get	= xfs_xattr_acl_get,
+	.set	= xfs_xattr_acl_set,
+};
+
+const struct xattr_handler xfs_xattr_acl_default_handler = {
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
+	.get	= xfs_xattr_acl_get,
+	.set	= xfs_xattr_acl_set,
+};
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
new file mode 100644
index 0000000..63e971e
--- /dev/null
+++ b/fs/xfs/xfs_aops.c
@@ -0,0 +1,1499 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_iomap.h"
+#include "xfs_vnodeops.h"
+#include "xfs_trace.h"
+#include "xfs_bmap.h"
+#include <linux/gfp.h>
+#include <linux/mpage.h>
+#include <linux/pagevec.h>
+#include <linux/writeback.h>
+
+
+/*
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC		37
+#define to_ioend_wq(v)	(&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
+static wait_queue_head_t xfs_ioend_wq[NVSYNC];
+
+void __init
+xfs_ioend_init(void)
+{
+	int i;
+
+	for (i = 0; i < NVSYNC; i++)
+		init_waitqueue_head(&xfs_ioend_wq[i]);
+}
+
+void
+xfs_ioend_wait(
+	xfs_inode_t	*ip)
+{
+	wait_queue_head_t *wq = to_ioend_wq(ip);
+
+	wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
+}
+
+STATIC void
+xfs_ioend_wake(
+	xfs_inode_t	*ip)
+{
+	if (atomic_dec_and_test(&ip->i_iocount))
+		wake_up(to_ioend_wq(ip));
+}
+
+void
+xfs_count_page_state(
+	struct page		*page,
+	int			*delalloc,
+	int			*unwritten)
+{
+	struct buffer_head	*bh, *head;
+
+	*delalloc = *unwritten = 0;
+
+	bh = head = page_buffers(page);
+	do {
+		if (buffer_unwritten(bh))
+			(*unwritten) = 1;
+		else if (buffer_delay(bh))
+			(*delalloc) = 1;
+	} while ((bh = bh->b_this_page) != head);
+}
+
+STATIC struct block_device *
+xfs_find_bdev_for_inode(
+	struct inode		*inode)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+
+	if (XFS_IS_REALTIME_INODE(ip))
+		return mp->m_rtdev_targp->bt_bdev;
+	else
+		return mp->m_ddev_targp->bt_bdev;
+}
+
+/*
+ * We're now finished for good with this ioend structure.
+ * Update the page state via the associated buffer_heads,
+ * release holds on the inode and bio, and finally free
+ * up memory.  Do not use the ioend after this.
+ */
+STATIC void
+xfs_destroy_ioend(
+	xfs_ioend_t		*ioend)
+{
+	struct buffer_head	*bh, *next;
+	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
+
+	for (bh = ioend->io_buffer_head; bh; bh = next) {
+		next = bh->b_private;
+		bh->b_end_io(bh, !ioend->io_error);
+	}
+
+	/*
+	 * Volume managers supporting multiple paths can send back ENODEV
+	 * when the final path disappears.  In this case continuing to fill
+	 * the page cache with dirty data which cannot be written out is
+	 * evil, so prevent that.
+	 */
+	if (unlikely(ioend->io_error == -ENODEV)) {
+		xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
+				      __FILE__, __LINE__);
+	}
+
+	xfs_ioend_wake(ip);
+	mempool_free(ioend, xfs_ioend_pool);
+}
+
+/*
+ * If the end of the current ioend is beyond the current EOF,
+ * return the new EOF value, otherwise zero.
+ */
+STATIC xfs_fsize_t
+xfs_ioend_new_eof(
+	xfs_ioend_t		*ioend)
+{
+	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
+	xfs_fsize_t		isize;
+	xfs_fsize_t		bsize;
+
+	bsize = ioend->io_offset + ioend->io_size;
+	isize = MAX(ip->i_size, ip->i_new_size);
+	isize = MIN(isize, bsize);
+	return isize > ip->i_d.di_size ? isize : 0;
+}
+
+/*
+ * Update on-disk file size now that data has been written to disk.  The
+ * current in-memory file size is i_size.  If a write is beyond eof i_new_size
+ * will be the intended file size until i_size is updated.  If this write does
+ * not extend all the way to the valid file size then restrict this update to
+ * the end of the write.
+ *
+ * This function does not block as blocking on the inode lock in IO completion
+ * can lead to IO completion order dependency deadlocks.. If it can't get the
+ * inode ilock it will return EAGAIN. Callers must handle this.
+ */
+STATIC int
+xfs_setfilesize(
+	xfs_ioend_t		*ioend)
+{
+	xfs_inode_t		*ip = XFS_I(ioend->io_inode);
+	xfs_fsize_t		isize;
+
+	if (unlikely(ioend->io_error))
+		return 0;
+
+	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+		return EAGAIN;
+
+	isize = xfs_ioend_new_eof(ioend);
+	if (isize) {
+		trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
+		ip->i_d.di_size = isize;
+		xfs_mark_inode_dirty(ip);
+	}
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return 0;
+}
+
+/*
+ * Schedule IO completion handling on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend(
+	struct xfs_ioend	*ioend)
+{
+	if (atomic_dec_and_test(&ioend->io_remaining)) {
+		if (ioend->io_type == IO_UNWRITTEN)
+			queue_work(xfsconvertd_workqueue, &ioend->io_work);
+		else
+			queue_work(xfsdatad_workqueue, &ioend->io_work);
+	}
+}
+
+/*
+ * IO write completion.
+ */
+STATIC void
+xfs_end_io(
+	struct work_struct *work)
+{
+	xfs_ioend_t	*ioend = container_of(work, xfs_ioend_t, io_work);
+	struct xfs_inode *ip = XFS_I(ioend->io_inode);
+	int		error = 0;
+
+	/*
+	 * For unwritten extents we need to issue transactions to convert a
+	 * range to normal written extens after the data I/O has finished.
+	 */
+	if (ioend->io_type == IO_UNWRITTEN &&
+	    likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
+
+		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
+						 ioend->io_size);
+		if (error)
+			ioend->io_error = error;
+	}
+
+	/*
+	 * We might have to update the on-disk file size after extending
+	 * writes.
+	 */
+	error = xfs_setfilesize(ioend);
+	ASSERT(!error || error == EAGAIN);
+
+	/*
+	 * If we didn't complete processing of the ioend, requeue it to the
+	 * tail of the workqueue for another attempt later. Otherwise destroy
+	 * it.
+	 */
+	if (error == EAGAIN) {
+		atomic_inc(&ioend->io_remaining);
+		xfs_finish_ioend(ioend);
+		/* ensure we don't spin on blocked ioends */
+		delay(1);
+	} else {
+		if (ioend->io_iocb)
+			aio_complete(ioend->io_iocb, ioend->io_result, 0);
+		xfs_destroy_ioend(ioend);
+	}
+}
+
+/*
+ * Call IO completion handling in caller context on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend_sync(
+	struct xfs_ioend	*ioend)
+{
+	if (atomic_dec_and_test(&ioend->io_remaining))
+		xfs_end_io(&ioend->io_work);
+}
+
+/*
+ * Allocate and initialise an IO completion structure.
+ * We need to track unwritten extent write completion here initially.
+ * We'll need to extend this for updating the ondisk inode size later
+ * (vs. incore size).
+ */
+STATIC xfs_ioend_t *
+xfs_alloc_ioend(
+	struct inode		*inode,
+	unsigned int		type)
+{
+	xfs_ioend_t		*ioend;
+
+	ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
+
+	/*
+	 * Set the count to 1 initially, which will prevent an I/O
+	 * completion callback from happening before we have started
+	 * all the I/O from calling the completion routine too early.
+	 */
+	atomic_set(&ioend->io_remaining, 1);
+	ioend->io_error = 0;
+	ioend->io_list = NULL;
+	ioend->io_type = type;
+	ioend->io_inode = inode;
+	ioend->io_buffer_head = NULL;
+	ioend->io_buffer_tail = NULL;
+	atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
+	ioend->io_offset = 0;
+	ioend->io_size = 0;
+	ioend->io_iocb = NULL;
+	ioend->io_result = 0;
+
+	INIT_WORK(&ioend->io_work, xfs_end_io);
+	return ioend;
+}
+
+STATIC int
+xfs_map_blocks(
+	struct inode		*inode,
+	loff_t			offset,
+	struct xfs_bmbt_irec	*imap,
+	int			type,
+	int			nonblocking)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	ssize_t			count = 1 << inode->i_blkbits;
+	xfs_fileoff_t		offset_fsb, end_fsb;
+	int			error = 0;
+	int			bmapi_flags = XFS_BMAPI_ENTIRE;
+	int			nimaps = 1;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -XFS_ERROR(EIO);
+
+	if (type == IO_UNWRITTEN)
+		bmapi_flags |= XFS_BMAPI_IGSTATE;
+
+	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+		if (nonblocking)
+			return -XFS_ERROR(EAGAIN);
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+	}
+
+	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+	       (ip->i_df.if_flags & XFS_IFEXTENTS));
+	ASSERT(offset <= mp->m_maxioffset);
+
+	if (offset + count > mp->m_maxioffset)
+		count = mp->m_maxioffset - offset;
+	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+			  bmapi_flags,  NULL, 0, imap, &nimaps, NULL);
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+	if (error)
+		return -XFS_ERROR(error);
+
+	if (type == IO_DELALLOC &&
+	    (!nimaps || isnullstartblock(imap->br_startblock))) {
+		error = xfs_iomap_write_allocate(ip, offset, count, imap);
+		if (!error)
+			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
+		return -XFS_ERROR(error);
+	}
+
+#ifdef DEBUG
+	if (type == IO_UNWRITTEN) {
+		ASSERT(nimaps);
+		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+	}
+#endif
+	if (nimaps)
+		trace_xfs_map_blocks_found(ip, offset, count, type, imap);
+	return 0;
+}
+
+STATIC int
+xfs_imap_valid(
+	struct inode		*inode,
+	struct xfs_bmbt_irec	*imap,
+	xfs_off_t		offset)
+{
+	offset >>= inode->i_blkbits;
+
+	return offset >= imap->br_startoff &&
+		offset < imap->br_startoff + imap->br_blockcount;
+}
+
+/*
+ * BIO completion handler for buffered IO.
+ */
+STATIC void
+xfs_end_bio(
+	struct bio		*bio,
+	int			error)
+{
+	xfs_ioend_t		*ioend = bio->bi_private;
+
+	ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+	ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
+
+	/* Toss bio and pass work off to an xfsdatad thread */
+	bio->bi_private = NULL;
+	bio->bi_end_io = NULL;
+	bio_put(bio);
+
+	xfs_finish_ioend(ioend);
+}
+
+STATIC void
+xfs_submit_ioend_bio(
+	struct writeback_control *wbc,
+	xfs_ioend_t		*ioend,
+	struct bio		*bio)
+{
+	atomic_inc(&ioend->io_remaining);
+	bio->bi_private = ioend;
+	bio->bi_end_io = xfs_end_bio;
+
+	/*
+	 * If the I/O is beyond EOF we mark the inode dirty immediately
+	 * but don't update the inode size until I/O completion.
+	 */
+	if (xfs_ioend_new_eof(ioend))
+		xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
+
+	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
+}
+
+STATIC struct bio *
+xfs_alloc_ioend_bio(
+	struct buffer_head	*bh)
+{
+	int			nvecs = bio_get_nr_vecs(bh->b_bdev);
+	struct bio		*bio = bio_alloc(GFP_NOIO, nvecs);
+
+	ASSERT(bio->bi_private == NULL);
+	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+	bio->bi_bdev = bh->b_bdev;
+	return bio;
+}
+
+STATIC void
+xfs_start_buffer_writeback(
+	struct buffer_head	*bh)
+{
+	ASSERT(buffer_mapped(bh));
+	ASSERT(buffer_locked(bh));
+	ASSERT(!buffer_delay(bh));
+	ASSERT(!buffer_unwritten(bh));
+
+	mark_buffer_async_write(bh);
+	set_buffer_uptodate(bh);
+	clear_buffer_dirty(bh);
+}
+
+STATIC void
+xfs_start_page_writeback(
+	struct page		*page,
+	int			clear_dirty,
+	int			buffers)
+{
+	ASSERT(PageLocked(page));
+	ASSERT(!PageWriteback(page));
+	if (clear_dirty)
+		clear_page_dirty_for_io(page);
+	set_page_writeback(page);
+	unlock_page(page);
+	/* If no buffers on the page are to be written, finish it here */
+	if (!buffers)
+		end_page_writeback(page);
+}
+
+static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+{
+	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+}
+
+/*
+ * Submit all of the bios for all of the ioends we have saved up, covering the
+ * initial writepage page and also any probed pages.
+ *
+ * Because we may have multiple ioends spanning a page, we need to start
+ * writeback on all the buffers before we submit them for I/O. If we mark the
+ * buffers as we got, then we can end up with a page that only has buffers
+ * marked async write and I/O complete on can occur before we mark the other
+ * buffers async write.
+ *
+ * The end result of this is that we trip a bug in end_page_writeback() because
+ * we call it twice for the one page as the code in end_buffer_async_write()
+ * assumes that all buffers on the page are started at the same time.
+ *
+ * The fix is two passes across the ioend list - one to start writeback on the
+ * buffer_heads, and then submit them for I/O on the second pass.
+ */
+STATIC void
+xfs_submit_ioend(
+	struct writeback_control *wbc,
+	xfs_ioend_t		*ioend)
+{
+	xfs_ioend_t		*head = ioend;
+	xfs_ioend_t		*next;
+	struct buffer_head	*bh;
+	struct bio		*bio;
+	sector_t		lastblock = 0;
+
+	/* Pass 1 - start writeback */
+	do {
+		next = ioend->io_list;
+		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
+			xfs_start_buffer_writeback(bh);
+	} while ((ioend = next) != NULL);
+
+	/* Pass 2 - submit I/O */
+	ioend = head;
+	do {
+		next = ioend->io_list;
+		bio = NULL;
+
+		for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+
+			if (!bio) {
+ retry:
+				bio = xfs_alloc_ioend_bio(bh);
+			} else if (bh->b_blocknr != lastblock + 1) {
+				xfs_submit_ioend_bio(wbc, ioend, bio);
+				goto retry;
+			}
+
+			if (bio_add_buffer(bio, bh) != bh->b_size) {
+				xfs_submit_ioend_bio(wbc, ioend, bio);
+				goto retry;
+			}
+
+			lastblock = bh->b_blocknr;
+		}
+		if (bio)
+			xfs_submit_ioend_bio(wbc, ioend, bio);
+		xfs_finish_ioend(ioend);
+	} while ((ioend = next) != NULL);
+}
+
+/*
+ * Cancel submission of all buffer_heads so far in this endio.
+ * Toss the endio too.  Only ever called for the initial page
+ * in a writepage request, so only ever one page.
+ */
+STATIC void
+xfs_cancel_ioend(
+	xfs_ioend_t		*ioend)
+{
+	xfs_ioend_t		*next;
+	struct buffer_head	*bh, *next_bh;
+
+	do {
+		next = ioend->io_list;
+		bh = ioend->io_buffer_head;
+		do {
+			next_bh = bh->b_private;
+			clear_buffer_async_write(bh);
+			unlock_buffer(bh);
+		} while ((bh = next_bh) != NULL);
+
+		xfs_ioend_wake(XFS_I(ioend->io_inode));
+		mempool_free(ioend, xfs_ioend_pool);
+	} while ((ioend = next) != NULL);
+}
+
+/*
+ * Test to see if we've been building up a completion structure for
+ * earlier buffers -- if so, we try to append to this ioend if we
+ * can, otherwise we finish off any current ioend and start another.
+ * Return true if we've finished the given ioend.
+ */
+STATIC void
+xfs_add_to_ioend(
+	struct inode		*inode,
+	struct buffer_head	*bh,
+	xfs_off_t		offset,
+	unsigned int		type,
+	xfs_ioend_t		**result,
+	int			need_ioend)
+{
+	xfs_ioend_t		*ioend = *result;
+
+	if (!ioend || need_ioend || type != ioend->io_type) {
+		xfs_ioend_t	*previous = *result;
+
+		ioend = xfs_alloc_ioend(inode, type);
+		ioend->io_offset = offset;
+		ioend->io_buffer_head = bh;
+		ioend->io_buffer_tail = bh;
+		if (previous)
+			previous->io_list = ioend;
+		*result = ioend;
+	} else {
+		ioend->io_buffer_tail->b_private = bh;
+		ioend->io_buffer_tail = bh;
+	}
+
+	bh->b_private = NULL;
+	ioend->io_size += bh->b_size;
+}
+
+STATIC void
+xfs_map_buffer(
+	struct inode		*inode,
+	struct buffer_head	*bh,
+	struct xfs_bmbt_irec	*imap,
+	xfs_off_t		offset)
+{
+	sector_t		bn;
+	struct xfs_mount	*m = XFS_I(inode)->i_mount;
+	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
+	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
+
+	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+	bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+	      ((offset - iomap_offset) >> inode->i_blkbits);
+
+	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
+
+	bh->b_blocknr = bn;
+	set_buffer_mapped(bh);
+}
+
+STATIC void
+xfs_map_at_offset(
+	struct inode		*inode,
+	struct buffer_head	*bh,
+	struct xfs_bmbt_irec	*imap,
+	xfs_off_t		offset)
+{
+	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+	xfs_map_buffer(inode, bh, imap, offset);
+	set_buffer_mapped(bh);
+	clear_buffer_delay(bh);
+	clear_buffer_unwritten(bh);
+}
+
+/*
+ * Test if a given page is suitable for writing as part of an unwritten
+ * or delayed allocate extent.
+ */
+STATIC int
+xfs_is_delayed_page(
+	struct page		*page,
+	unsigned int		type)
+{
+	if (PageWriteback(page))
+		return 0;
+
+	if (page->mapping && page_has_buffers(page)) {
+		struct buffer_head	*bh, *head;
+		int			acceptable = 0;
+
+		bh = head = page_buffers(page);
+		do {
+			if (buffer_unwritten(bh))
+				acceptable = (type == IO_UNWRITTEN);
+			else if (buffer_delay(bh))
+				acceptable = (type == IO_DELALLOC);
+			else if (buffer_dirty(bh) && buffer_mapped(bh))
+				acceptable = (type == IO_OVERWRITE);
+			else
+				break;
+		} while ((bh = bh->b_this_page) != head);
+
+		if (acceptable)
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC int
+xfs_convert_page(
+	struct inode		*inode,
+	struct page		*page,
+	loff_t			tindex,
+	struct xfs_bmbt_irec	*imap,
+	xfs_ioend_t		**ioendp,
+	struct writeback_control *wbc)
+{
+	struct buffer_head	*bh, *head;
+	xfs_off_t		end_offset;
+	unsigned long		p_offset;
+	unsigned int		type;
+	int			len, page_dirty;
+	int			count = 0, done = 0, uptodate = 1;
+ 	xfs_off_t		offset = page_offset(page);
+
+	if (page->index != tindex)
+		goto fail;
+	if (!trylock_page(page))
+		goto fail;
+	if (PageWriteback(page))
+		goto fail_unlock_page;
+	if (page->mapping != inode->i_mapping)
+		goto fail_unlock_page;
+	if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
+		goto fail_unlock_page;
+
+	/*
+	 * page_dirty is initially a count of buffers on the page before
+	 * EOF and is decremented as we move each into a cleanable state.
+	 *
+	 * Derivation:
+	 *
+	 * End offset is the highest offset that this page should represent.
+	 * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+	 * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+	 * hence give us the correct page_dirty count. On any other page,
+	 * it will be zero and in that case we need page_dirty to be the
+	 * count of buffers on the page.
+	 */
+	end_offset = min_t(unsigned long long,
+			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+			i_size_read(inode));
+
+	len = 1 << inode->i_blkbits;
+	p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
+					PAGE_CACHE_SIZE);
+	p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+	page_dirty = p_offset / len;
+
+	bh = head = page_buffers(page);
+	do {
+		if (offset >= end_offset)
+			break;
+		if (!buffer_uptodate(bh))
+			uptodate = 0;
+		if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+			done = 1;
+			continue;
+		}
+
+		if (buffer_unwritten(bh) || buffer_delay(bh) ||
+		    buffer_mapped(bh)) {
+			if (buffer_unwritten(bh))
+				type = IO_UNWRITTEN;
+			else if (buffer_delay(bh))
+				type = IO_DELALLOC;
+			else
+				type = IO_OVERWRITE;
+
+			if (!xfs_imap_valid(inode, imap, offset)) {
+				done = 1;
+				continue;
+			}
+
+			lock_buffer(bh);
+			if (type != IO_OVERWRITE)
+				xfs_map_at_offset(inode, bh, imap, offset);
+			xfs_add_to_ioend(inode, bh, offset, type,
+					 ioendp, done);
+
+			page_dirty--;
+			count++;
+		} else {
+			done = 1;
+		}
+	} while (offset += len, (bh = bh->b_this_page) != head);
+
+	if (uptodate && bh == head)
+		SetPageUptodate(page);
+
+	if (count) {
+		if (--wbc->nr_to_write <= 0 &&
+		    wbc->sync_mode == WB_SYNC_NONE)
+			done = 1;
+	}
+	xfs_start_page_writeback(page, !page_dirty, count);
+
+	return done;
+ fail_unlock_page:
+	unlock_page(page);
+ fail:
+	return 1;
+}
+
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+	struct inode		*inode,
+	pgoff_t			tindex,
+	struct xfs_bmbt_irec	*imap,
+	xfs_ioend_t		**ioendp,
+	struct writeback_control *wbc,
+	pgoff_t			tlast)
+{
+	struct pagevec		pvec;
+	int			done = 0, i;
+
+	pagevec_init(&pvec, 0);
+	while (!done && tindex <= tlast) {
+		unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+		if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+			break;
+
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+					imap, ioendp, wbc);
+			if (done)
+				break;
+		}
+
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+}
+
+STATIC void
+xfs_vm_invalidatepage(
+	struct page		*page,
+	unsigned long		offset)
+{
+	trace_xfs_invalidatepage(page->mapping->host, page, offset);
+	block_invalidatepage(page, offset);
+}
+
+/*
+ * If the page has delalloc buffers on it, we need to punch them out before we
+ * invalidate the page. If we don't, we leave a stale delalloc mapping on the
+ * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
+ * is done on that same region - the delalloc extent is returned when none is
+ * supposed to be there.
+ *
+ * We prevent this by truncating away the delalloc regions on the page before
+ * invalidating it. Because they are delalloc, we can do this without needing a
+ * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
+ * truncation without a transaction as there is no space left for block
+ * reservation (typically why we see a ENOSPC in writeback).
+ *
+ * This is not a performance critical path, so for now just do the punching a
+ * buffer head at a time.
+ */
+STATIC void
+xfs_aops_discard_page(
+	struct page		*page)
+{
+	struct inode		*inode = page->mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct buffer_head	*bh, *head;
+	loff_t			offset = page_offset(page);
+
+	if (!xfs_is_delayed_page(page, IO_DELALLOC))
+		goto out_invalidate;
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		goto out_invalidate;
+
+	xfs_alert(ip->i_mount,
+		"page discard on page %p, inode 0x%llx, offset %llu.",
+			page, ip->i_ino, offset);
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	bh = head = page_buffers(page);
+	do {
+		int		error;
+		xfs_fileoff_t	start_fsb;
+
+		if (!buffer_delay(bh))
+			goto next_buffer;
+
+		start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+		error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
+		if (error) {
+			/* something screwed, just bail */
+			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+				xfs_alert(ip->i_mount,
+			"page discard unable to remove delalloc mapping.");
+			}
+			break;
+		}
+next_buffer:
+		offset += 1 << inode->i_blkbits;
+
+	} while ((bh = bh->b_this_page) != head);
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_invalidate:
+	xfs_vm_invalidatepage(page, 0);
+	return;
+}
+
+/*
+ * Write out a dirty page.
+ *
+ * For delalloc space on the page we need to allocate space and flush it.
+ * For unwritten space on the page we need to start the conversion to
+ * regular allocated space.
+ * For any other dirty buffer heads on the page we should flush them.
+ */
+STATIC int
+xfs_vm_writepage(
+	struct page		*page,
+	struct writeback_control *wbc)
+{
+	struct inode		*inode = page->mapping->host;
+	struct buffer_head	*bh, *head;
+	struct xfs_bmbt_irec	imap;
+	xfs_ioend_t		*ioend = NULL, *iohead = NULL;
+	loff_t			offset;
+	unsigned int		type;
+	__uint64_t              end_offset;
+	pgoff_t                 end_index, last_index;
+	ssize_t			len;
+	int			err, imap_valid = 0, uptodate = 1;
+	int			count = 0;
+	int			nonblocking = 0;
+
+	trace_xfs_writepage(inode, page, 0);
+
+	ASSERT(page_has_buffers(page));
+
+	/*
+	 * Refuse to write the page out if we are called from reclaim context.
+	 *
+	 * This avoids stack overflows when called from deeply used stacks in
+	 * random callers for direct reclaim or memcg reclaim.  We explicitly
+	 * allow reclaim from kswapd as the stack usage there is relatively low.
+	 *
+	 * This should really be done by the core VM, but until that happens
+	 * filesystems like XFS, btrfs and ext4 have to take care of this
+	 * by themselves.
+	 */
+	if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
+		goto redirty;
+
+	/*
+	 * Given that we do not allow direct reclaim to call us, we should
+	 * never be called while in a filesystem transaction.
+	 */
+	if (WARN_ON(current->flags & PF_FSTRANS))
+		goto redirty;
+
+	/* Is this page beyond the end of the file? */
+	offset = i_size_read(inode);
+	end_index = offset >> PAGE_CACHE_SHIFT;
+	last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
+	if (page->index >= end_index) {
+		if ((page->index >= end_index + 1) ||
+		    !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+			unlock_page(page);
+			return 0;
+		}
+	}
+
+	end_offset = min_t(unsigned long long,
+			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+			offset);
+	len = 1 << inode->i_blkbits;
+
+	bh = head = page_buffers(page);
+	offset = page_offset(page);
+	type = IO_OVERWRITE;
+
+	if (wbc->sync_mode == WB_SYNC_NONE)
+		nonblocking = 1;
+
+	do {
+		int new_ioend = 0;
+
+		if (offset >= end_offset)
+			break;
+		if (!buffer_uptodate(bh))
+			uptodate = 0;
+
+		/*
+		 * set_page_dirty dirties all buffers in a page, independent
+		 * of their state.  The dirty state however is entirely
+		 * meaningless for holes (!mapped && uptodate), so skip
+		 * buffers covering holes here.
+		 */
+		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+			imap_valid = 0;
+			continue;
+		}
+
+		if (buffer_unwritten(bh)) {
+			if (type != IO_UNWRITTEN) {
+				type = IO_UNWRITTEN;
+				imap_valid = 0;
+			}
+		} else if (buffer_delay(bh)) {
+			if (type != IO_DELALLOC) {
+				type = IO_DELALLOC;
+				imap_valid = 0;
+			}
+		} else if (buffer_uptodate(bh)) {
+			if (type != IO_OVERWRITE) {
+				type = IO_OVERWRITE;
+				imap_valid = 0;
+			}
+		} else {
+			if (PageUptodate(page)) {
+				ASSERT(buffer_mapped(bh));
+				imap_valid = 0;
+			}
+			continue;
+		}
+
+		if (imap_valid)
+			imap_valid = xfs_imap_valid(inode, &imap, offset);
+		if (!imap_valid) {
+			/*
+			 * If we didn't have a valid mapping then we need to
+			 * put the new mapping into a separate ioend structure.
+			 * This ensures non-contiguous extents always have
+			 * separate ioends, which is particularly important
+			 * for unwritten extent conversion at I/O completion
+			 * time.
+			 */
+			new_ioend = 1;
+			err = xfs_map_blocks(inode, offset, &imap, type,
+					     nonblocking);
+			if (err)
+				goto error;
+			imap_valid = xfs_imap_valid(inode, &imap, offset);
+		}
+		if (imap_valid) {
+			lock_buffer(bh);
+			if (type != IO_OVERWRITE)
+				xfs_map_at_offset(inode, bh, &imap, offset);
+			xfs_add_to_ioend(inode, bh, offset, type, &ioend,
+					 new_ioend);
+			count++;
+		}
+
+		if (!iohead)
+			iohead = ioend;
+
+	} while (offset += len, ((bh = bh->b_this_page) != head));
+
+	if (uptodate && bh == head)
+		SetPageUptodate(page);
+
+	xfs_start_page_writeback(page, 1, count);
+
+	if (ioend && imap_valid) {
+		xfs_off_t		end_index;
+
+		end_index = imap.br_startoff + imap.br_blockcount;
+
+		/* to bytes */
+		end_index <<= inode->i_blkbits;
+
+		/* to pages */
+		end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
+
+		/* check against file size */
+		if (end_index > last_index)
+			end_index = last_index;
+
+		xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
+				  wbc, end_index);
+	}
+
+	if (iohead)
+		xfs_submit_ioend(wbc, iohead);
+
+	return 0;
+
+error:
+	if (iohead)
+		xfs_cancel_ioend(iohead);
+
+	if (err == -EAGAIN)
+		goto redirty;
+
+	xfs_aops_discard_page(page);
+	ClearPageUptodate(page);
+	unlock_page(page);
+	return err;
+
+redirty:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return 0;
+}
+
+STATIC int
+xfs_vm_writepages(
+	struct address_space	*mapping,
+	struct writeback_control *wbc)
+{
+	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+	return generic_writepages(mapping, wbc);
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. The page should already be clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 1 if the page is ok to release, 0 otherwise.
+ */
+STATIC int
+xfs_vm_releasepage(
+	struct page		*page,
+	gfp_t			gfp_mask)
+{
+	int			delalloc, unwritten;
+
+	trace_xfs_releasepage(page->mapping->host, page, 0);
+
+	xfs_count_page_state(page, &delalloc, &unwritten);
+
+	if (WARN_ON(delalloc))
+		return 0;
+	if (WARN_ON(unwritten))
+		return 0;
+
+	return try_to_free_buffers(page);
+}
+
+STATIC int
+__xfs_get_blocks(
+	struct inode		*inode,
+	sector_t		iblock,
+	struct buffer_head	*bh_result,
+	int			create,
+	int			direct)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_fileoff_t		offset_fsb, end_fsb;
+	int			error = 0;
+	int			lockmode = 0;
+	struct xfs_bmbt_irec	imap;
+	int			nimaps = 1;
+	xfs_off_t		offset;
+	ssize_t			size;
+	int			new = 0;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -XFS_ERROR(EIO);
+
+	offset = (xfs_off_t)iblock << inode->i_blkbits;
+	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+	size = bh_result->b_size;
+
+	if (!create && direct && offset >= i_size_read(inode))
+		return 0;
+
+	if (create) {
+		lockmode = XFS_ILOCK_EXCL;
+		xfs_ilock(ip, lockmode);
+	} else {
+		lockmode = xfs_ilock_map_shared(ip);
+	}
+
+	ASSERT(offset <= mp->m_maxioffset);
+	if (offset + size > mp->m_maxioffset)
+		size = mp->m_maxioffset - offset;
+	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
+	error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+			  XFS_BMAPI_ENTIRE,  NULL, 0, &imap, &nimaps, NULL);
+	if (error)
+		goto out_unlock;
+
+	if (create &&
+	    (!nimaps ||
+	     (imap.br_startblock == HOLESTARTBLOCK ||
+	      imap.br_startblock == DELAYSTARTBLOCK))) {
+		if (direct) {
+			error = xfs_iomap_write_direct(ip, offset, size,
+						       &imap, nimaps);
+		} else {
+			error = xfs_iomap_write_delay(ip, offset, size, &imap);
+		}
+		if (error)
+			goto out_unlock;
+
+		trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+	} else if (nimaps) {
+		trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+	} else {
+		trace_xfs_get_blocks_notfound(ip, offset, size);
+		goto out_unlock;
+	}
+	xfs_iunlock(ip, lockmode);
+
+	if (imap.br_startblock != HOLESTARTBLOCK &&
+	    imap.br_startblock != DELAYSTARTBLOCK) {
+		/*
+		 * For unwritten extents do not report a disk address on
+		 * the read case (treat as if we're reading into a hole).
+		 */
+		if (create || !ISUNWRITTEN(&imap))
+			xfs_map_buffer(inode, bh_result, &imap, offset);
+		if (create && ISUNWRITTEN(&imap)) {
+			if (direct)
+				bh_result->b_private = inode;
+			set_buffer_unwritten(bh_result);
+		}
+	}
+
+	/*
+	 * If this is a realtime file, data may be on a different device.
+	 * to that pointed to from the buffer_head b_bdev currently.
+	 */
+	bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
+
+	/*
+	 * If we previously allocated a block out beyond eof and we are now
+	 * coming back to use it then we will need to flag it as new even if it
+	 * has a disk address.
+	 *
+	 * With sub-block writes into unwritten extents we also need to mark
+	 * the buffer as new so that the unwritten parts of the buffer gets
+	 * correctly zeroed.
+	 */
+	if (create &&
+	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+	     (offset >= i_size_read(inode)) ||
+	     (new || ISUNWRITTEN(&imap))))
+		set_buffer_new(bh_result);
+
+	if (imap.br_startblock == DELAYSTARTBLOCK) {
+		BUG_ON(direct);
+		if (create) {
+			set_buffer_uptodate(bh_result);
+			set_buffer_mapped(bh_result);
+			set_buffer_delay(bh_result);
+		}
+	}
+
+	/*
+	 * If this is O_DIRECT or the mpage code calling tell them how large
+	 * the mapping is, so that we can avoid repeated get_blocks calls.
+	 */
+	if (direct || size > (1 << inode->i_blkbits)) {
+		xfs_off_t		mapping_size;
+
+		mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
+		mapping_size <<= inode->i_blkbits;
+
+		ASSERT(mapping_size > 0);
+		if (mapping_size > size)
+			mapping_size = size;
+		if (mapping_size > LONG_MAX)
+			mapping_size = LONG_MAX;
+
+		bh_result->b_size = mapping_size;
+	}
+
+	return 0;
+
+out_unlock:
+	xfs_iunlock(ip, lockmode);
+	return -error;
+}
+
+int
+xfs_get_blocks(
+	struct inode		*inode,
+	sector_t		iblock,
+	struct buffer_head	*bh_result,
+	int			create)
+{
+	return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
+}
+
+STATIC int
+xfs_get_blocks_direct(
+	struct inode		*inode,
+	sector_t		iblock,
+	struct buffer_head	*bh_result,
+	int			create)
+{
+	return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
+}
+
+/*
+ * Complete a direct I/O write request.
+ *
+ * If the private argument is non-NULL __xfs_get_blocks signals us that we
+ * need to issue a transaction to convert the range from unwritten to written
+ * extents.  In case this is regular synchronous I/O we just call xfs_end_io
+ * to do this and we are done.  But in case this was a successful AIO
+ * request this handler is called from interrupt context, from which we
+ * can't start transactions.  In that case offload the I/O completion to
+ * the workqueues we also use for buffered I/O completion.
+ */
+STATIC void
+xfs_end_io_direct_write(
+	struct kiocb		*iocb,
+	loff_t			offset,
+	ssize_t			size,
+	void			*private,
+	int			ret,
+	bool			is_async)
+{
+	struct xfs_ioend	*ioend = iocb->private;
+
+	/*
+	 * blockdev_direct_IO can return an error even after the I/O
+	 * completion handler was called.  Thus we need to protect
+	 * against double-freeing.
+	 */
+	iocb->private = NULL;
+
+	ioend->io_offset = offset;
+	ioend->io_size = size;
+	if (private && size > 0)
+		ioend->io_type = IO_UNWRITTEN;
+
+	if (is_async) {
+		/*
+		 * If we are converting an unwritten extent we need to delay
+		 * the AIO completion until after the unwrittent extent
+		 * conversion has completed, otherwise do it ASAP.
+		 */
+		if (ioend->io_type == IO_UNWRITTEN) {
+			ioend->io_iocb = iocb;
+			ioend->io_result = ret;
+		} else {
+			aio_complete(iocb, ret, 0);
+		}
+		xfs_finish_ioend(ioend);
+	} else {
+		xfs_finish_ioend_sync(ioend);
+	}
+
+	/* XXX: probably should move into the real I/O completion handler */
+	inode_dio_done(ioend->io_inode);
+}
+
+STATIC ssize_t
+xfs_vm_direct_IO(
+	int			rw,
+	struct kiocb		*iocb,
+	const struct iovec	*iov,
+	loff_t			offset,
+	unsigned long		nr_segs)
+{
+	struct inode		*inode = iocb->ki_filp->f_mapping->host;
+	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
+	ssize_t			ret;
+
+	if (rw & WRITE) {
+		iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
+
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    xfs_end_io_direct_write, NULL, 0);
+		if (ret != -EIOCBQUEUED && iocb->private)
+			xfs_destroy_ioend(iocb->private);
+	} else {
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    NULL, NULL, 0);
+	}
+
+	return ret;
+}
+
+STATIC void
+xfs_vm_write_failed(
+	struct address_space	*mapping,
+	loff_t			to)
+{
+	struct inode		*inode = mapping->host;
+
+	if (to > inode->i_size) {
+		/*
+		 * punch out the delalloc blocks we have already allocated. We
+		 * don't call xfs_setattr() to do this as we may be in the
+		 * middle of a multi-iovec write and so the vfs inode->i_size
+		 * will not match the xfs ip->i_size and so it will zero too
+		 * much. Hence we jus truncate the page cache to zero what is
+		 * necessary and punch the delalloc blocks directly.
+		 */
+		struct xfs_inode	*ip = XFS_I(inode);
+		xfs_fileoff_t		start_fsb;
+		xfs_fileoff_t		end_fsb;
+		int			error;
+
+		truncate_pagecache(inode, to, inode->i_size);
+
+		/*
+		 * Check if there are any blocks that are outside of i_size
+		 * that need to be trimmed back.
+		 */
+		start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+		end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+		if (end_fsb <= start_fsb)
+			return;
+
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+							end_fsb - start_fsb);
+		if (error) {
+			/* something screwed, just bail */
+			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+				xfs_alert(ip->i_mount,
+			"xfs_vm_write_failed: unable to clean up ino %lld",
+						ip->i_ino);
+			}
+		}
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	}
+}
+
+STATIC int
+xfs_vm_write_begin(
+	struct file		*file,
+	struct address_space	*mapping,
+	loff_t			pos,
+	unsigned		len,
+	unsigned		flags,
+	struct page		**pagep,
+	void			**fsdata)
+{
+	int			ret;
+
+	ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
+				pagep, xfs_get_blocks);
+	if (unlikely(ret))
+		xfs_vm_write_failed(mapping, pos + len);
+	return ret;
+}
+
+STATIC int
+xfs_vm_write_end(
+	struct file		*file,
+	struct address_space	*mapping,
+	loff_t			pos,
+	unsigned		len,
+	unsigned		copied,
+	struct page		*page,
+	void			*fsdata)
+{
+	int			ret;
+
+	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (unlikely(ret < len))
+		xfs_vm_write_failed(mapping, pos + len);
+	return ret;
+}
+
+STATIC sector_t
+xfs_vm_bmap(
+	struct address_space	*mapping,
+	sector_t		block)
+{
+	struct inode		*inode = (struct inode *)mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+
+	trace_xfs_vm_bmap(XFS_I(inode));
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+	return generic_block_bmap(mapping, block, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpage(
+	struct file		*unused,
+	struct page		*page)
+{
+	return mpage_readpage(page, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpages(
+	struct file		*unused,
+	struct address_space	*mapping,
+	struct list_head	*pages,
+	unsigned		nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
+}
+
+const struct address_space_operations xfs_address_space_operations = {
+	.readpage		= xfs_vm_readpage,
+	.readpages		= xfs_vm_readpages,
+	.writepage		= xfs_vm_writepage,
+	.writepages		= xfs_vm_writepages,
+	.releasepage		= xfs_vm_releasepage,
+	.invalidatepage		= xfs_vm_invalidatepage,
+	.write_begin		= xfs_vm_write_begin,
+	.write_end		= xfs_vm_write_end,
+	.bmap			= xfs_vm_bmap,
+	.direct_IO		= xfs_vm_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
+	.error_remove_page	= generic_error_remove_page,
+};
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
new file mode 100644
index 0000000..71f721e
--- /dev/null
+++ b/fs/xfs/xfs_aops.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2005-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_AOPS_H__
+#define __XFS_AOPS_H__
+
+extern struct workqueue_struct *xfsdatad_workqueue;
+extern struct workqueue_struct *xfsconvertd_workqueue;
+extern mempool_t *xfs_ioend_pool;
+
+/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+	IO_DIRECT = 0,	/* special case for direct I/O ioends */
+	IO_DELALLOC,	/* mapping covers delalloc region */
+	IO_UNWRITTEN,	/* mapping covers allocated but uninitialized data */
+	IO_OVERWRITE,	/* mapping covers already allocated extent */
+};
+
+#define XFS_IO_TYPES \
+	{ 0,			"" }, \
+	{ IO_DELALLOC,		"delalloc" }, \
+	{ IO_UNWRITTEN,		"unwritten" }, \
+	{ IO_OVERWRITE,		"overwrite" }
+
+/*
+ * xfs_ioend struct manages large extent writes for XFS.
+ * It can manage several multi-page bio's at once.
+ */
+typedef struct xfs_ioend {
+	struct xfs_ioend	*io_list;	/* next ioend in chain */
+	unsigned int		io_type;	/* delalloc / unwritten */
+	int			io_error;	/* I/O error code */
+	atomic_t		io_remaining;	/* hold count */
+	struct inode		*io_inode;	/* file being written to */
+	struct buffer_head	*io_buffer_head;/* buffer linked list head */
+	struct buffer_head	*io_buffer_tail;/* buffer linked list tail */
+	size_t			io_size;	/* size of the extent */
+	xfs_off_t		io_offset;	/* offset in the file */
+	struct work_struct	io_work;	/* xfsdatad work queue */
+	struct kiocb		*io_iocb;
+	int			io_result;
+} xfs_ioend_t;
+
+extern const struct address_space_operations xfs_address_space_operations;
+extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+
+extern void xfs_ioend_init(void);
+extern void xfs_ioend_wait(struct xfs_inode *);
+
+extern void xfs_count_page_state(struct page *, int *, int *);
+
+#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
new file mode 100644
index 0000000..c57836d
--- /dev/null
+++ b/fs/xfs/xfs_buf.c
@@ -0,0 +1,1876 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/percpu.h>
+#include <linux/blkdev.h>
+#include <linux/hash.h>
+#include <linux/kthread.h>
+#include <linux/migrate.h>
+#include <linux/backing-dev.h>
+#include <linux/freezer.h>
+
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_trace.h"
+
+static kmem_zone_t *xfs_buf_zone;
+STATIC int xfsbufd(void *);
+STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
+
+static struct workqueue_struct *xfslogd_workqueue;
+struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
+
+#ifdef XFS_BUF_LOCK_TRACKING
+# define XB_SET_OWNER(bp)	((bp)->b_last_holder = current->pid)
+# define XB_CLEAR_OWNER(bp)	((bp)->b_last_holder = -1)
+# define XB_GET_OWNER(bp)	((bp)->b_last_holder)
+#else
+# define XB_SET_OWNER(bp)	do { } while (0)
+# define XB_CLEAR_OWNER(bp)	do { } while (0)
+# define XB_GET_OWNER(bp)	do { } while (0)
+#endif
+
+#define xb_to_gfp(flags) \
+	((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
+	  ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+
+#define xb_to_km(flags) \
+	 (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+
+#define xfs_buf_allocate(flags) \
+	kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
+#define xfs_buf_deallocate(bp) \
+	kmem_zone_free(xfs_buf_zone, (bp));
+
+static inline int
+xfs_buf_is_vmapped(
+	struct xfs_buf	*bp)
+{
+	/*
+	 * Return true if the buffer is vmapped.
+	 *
+	 * The XBF_MAPPED flag is set if the buffer should be mapped, but the
+	 * code is clever enough to know it doesn't have to map a single page,
+	 * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
+	 */
+	return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
+}
+
+static inline int
+xfs_buf_vmap_len(
+	struct xfs_buf	*bp)
+{
+	return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
+}
+
+/*
+ * xfs_buf_lru_add - add a buffer to the LRU.
+ *
+ * The LRU takes a new reference to the buffer so that it will only be freed
+ * once the shrinker takes the buffer off the LRU.
+ */
+STATIC void
+xfs_buf_lru_add(
+	struct xfs_buf	*bp)
+{
+	struct xfs_buftarg *btp = bp->b_target;
+
+	spin_lock(&btp->bt_lru_lock);
+	if (list_empty(&bp->b_lru)) {
+		atomic_inc(&bp->b_hold);
+		list_add_tail(&bp->b_lru, &btp->bt_lru);
+		btp->bt_lru_nr++;
+	}
+	spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * xfs_buf_lru_del - remove a buffer from the LRU
+ *
+ * The unlocked check is safe here because it only occurs when there are not
+ * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
+ * to optimise the shrinker removing the buffer from the LRU and calling
+ * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
+ * bt_lru_lock.
+ */
+STATIC void
+xfs_buf_lru_del(
+	struct xfs_buf	*bp)
+{
+	struct xfs_buftarg *btp = bp->b_target;
+
+	if (list_empty(&bp->b_lru))
+		return;
+
+	spin_lock(&btp->bt_lru_lock);
+	if (!list_empty(&bp->b_lru)) {
+		list_del_init(&bp->b_lru);
+		btp->bt_lru_nr--;
+	}
+	spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * When we mark a buffer stale, we remove the buffer from the LRU and clear the
+ * b_lru_ref count so that the buffer is freed immediately when the buffer
+ * reference count falls to zero. If the buffer is already on the LRU, we need
+ * to remove the reference that LRU holds on the buffer.
+ *
+ * This prevents build-up of stale buffers on the LRU.
+ */
+void
+xfs_buf_stale(
+	struct xfs_buf	*bp)
+{
+	bp->b_flags |= XBF_STALE;
+	atomic_set(&(bp)->b_lru_ref, 0);
+	if (!list_empty(&bp->b_lru)) {
+		struct xfs_buftarg *btp = bp->b_target;
+
+		spin_lock(&btp->bt_lru_lock);
+		if (!list_empty(&bp->b_lru)) {
+			list_del_init(&bp->b_lru);
+			btp->bt_lru_nr--;
+			atomic_dec(&bp->b_hold);
+		}
+		spin_unlock(&btp->bt_lru_lock);
+	}
+	ASSERT(atomic_read(&bp->b_hold) >= 1);
+}
+
+STATIC void
+_xfs_buf_initialize(
+	xfs_buf_t		*bp,
+	xfs_buftarg_t		*target,
+	xfs_off_t		range_base,
+	size_t			range_length,
+	xfs_buf_flags_t		flags)
+{
+	/*
+	 * We don't want certain flags to appear in b_flags.
+	 */
+	flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
+
+	memset(bp, 0, sizeof(xfs_buf_t));
+	atomic_set(&bp->b_hold, 1);
+	atomic_set(&bp->b_lru_ref, 1);
+	init_completion(&bp->b_iowait);
+	INIT_LIST_HEAD(&bp->b_lru);
+	INIT_LIST_HEAD(&bp->b_list);
+	RB_CLEAR_NODE(&bp->b_rbnode);
+	sema_init(&bp->b_sema, 0); /* held, no waiters */
+	XB_SET_OWNER(bp);
+	bp->b_target = target;
+	bp->b_file_offset = range_base;
+	/*
+	 * Set buffer_length and count_desired to the same value initially.
+	 * I/O routines should use count_desired, which will be the same in
+	 * most cases but may be reset (e.g. XFS recovery).
+	 */
+	bp->b_buffer_length = bp->b_count_desired = range_length;
+	bp->b_flags = flags;
+	bp->b_bn = XFS_BUF_DADDR_NULL;
+	atomic_set(&bp->b_pin_count, 0);
+	init_waitqueue_head(&bp->b_waiters);
+
+	XFS_STATS_INC(xb_create);
+
+	trace_xfs_buf_init(bp, _RET_IP_);
+}
+
+/*
+ *	Allocate a page array capable of holding a specified number
+ *	of pages, and point the page buf at it.
+ */
+STATIC int
+_xfs_buf_get_pages(
+	xfs_buf_t		*bp,
+	int			page_count,
+	xfs_buf_flags_t		flags)
+{
+	/* Make sure that we have a page list */
+	if (bp->b_pages == NULL) {
+		bp->b_offset = xfs_buf_poff(bp->b_file_offset);
+		bp->b_page_count = page_count;
+		if (page_count <= XB_PAGES) {
+			bp->b_pages = bp->b_page_array;
+		} else {
+			bp->b_pages = kmem_alloc(sizeof(struct page *) *
+					page_count, xb_to_km(flags));
+			if (bp->b_pages == NULL)
+				return -ENOMEM;
+		}
+		memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
+	}
+	return 0;
+}
+
+/*
+ *	Frees b_pages if it was allocated.
+ */
+STATIC void
+_xfs_buf_free_pages(
+	xfs_buf_t	*bp)
+{
+	if (bp->b_pages != bp->b_page_array) {
+		kmem_free(bp->b_pages);
+		bp->b_pages = NULL;
+	}
+}
+
+/*
+ *	Releases the specified buffer.
+ *
+ * 	The modification state of any associated pages is left unchanged.
+ * 	The buffer most not be on any hash - use xfs_buf_rele instead for
+ * 	hashed and refcounted buffers
+ */
+void
+xfs_buf_free(
+	xfs_buf_t		*bp)
+{
+	trace_xfs_buf_free(bp, _RET_IP_);
+
+	ASSERT(list_empty(&bp->b_lru));
+
+	if (bp->b_flags & _XBF_PAGES) {
+		uint		i;
+
+		if (xfs_buf_is_vmapped(bp))
+			vm_unmap_ram(bp->b_addr - bp->b_offset,
+					bp->b_page_count);
+
+		for (i = 0; i < bp->b_page_count; i++) {
+			struct page	*page = bp->b_pages[i];
+
+			__free_page(page);
+		}
+	} else if (bp->b_flags & _XBF_KMEM)
+		kmem_free(bp->b_addr);
+	_xfs_buf_free_pages(bp);
+	xfs_buf_deallocate(bp);
+}
+
+/*
+ * Allocates all the pages for buffer in question and builds it's page list.
+ */
+STATIC int
+xfs_buf_allocate_memory(
+	xfs_buf_t		*bp,
+	uint			flags)
+{
+	size_t			size = bp->b_count_desired;
+	size_t			nbytes, offset;
+	gfp_t			gfp_mask = xb_to_gfp(flags);
+	unsigned short		page_count, i;
+	xfs_off_t		end;
+	int			error;
+
+	/*
+	 * for buffers that are contained within a single page, just allocate
+	 * the memory from the heap - there's no need for the complexity of
+	 * page arrays to keep allocation down to order 0.
+	 */
+	if (bp->b_buffer_length < PAGE_SIZE) {
+		bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
+		if (!bp->b_addr) {
+			/* low memory - use alloc_page loop instead */
+			goto use_alloc_page;
+		}
+
+		if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
+								PAGE_MASK) !=
+		    ((unsigned long)bp->b_addr & PAGE_MASK)) {
+			/* b_addr spans two pages - use alloc_page instead */
+			kmem_free(bp->b_addr);
+			bp->b_addr = NULL;
+			goto use_alloc_page;
+		}
+		bp->b_offset = offset_in_page(bp->b_addr);
+		bp->b_pages = bp->b_page_array;
+		bp->b_pages[0] = virt_to_page(bp->b_addr);
+		bp->b_page_count = 1;
+		bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
+		return 0;
+	}
+
+use_alloc_page:
+	end = bp->b_file_offset + bp->b_buffer_length;
+	page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
+	error = _xfs_buf_get_pages(bp, page_count, flags);
+	if (unlikely(error))
+		return error;
+
+	offset = bp->b_offset;
+	bp->b_flags |= _XBF_PAGES;
+
+	for (i = 0; i < bp->b_page_count; i++) {
+		struct page	*page;
+		uint		retries = 0;
+retry:
+		page = alloc_page(gfp_mask);
+		if (unlikely(page == NULL)) {
+			if (flags & XBF_READ_AHEAD) {
+				bp->b_page_count = i;
+				error = ENOMEM;
+				goto out_free_pages;
+			}
+
+			/*
+			 * This could deadlock.
+			 *
+			 * But until all the XFS lowlevel code is revamped to
+			 * handle buffer allocation failures we can't do much.
+			 */
+			if (!(++retries % 100))
+				xfs_err(NULL,
+		"possible memory allocation deadlock in %s (mode:0x%x)",
+					__func__, gfp_mask);
+
+			XFS_STATS_INC(xb_page_retries);
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
+			goto retry;
+		}
+
+		XFS_STATS_INC(xb_page_found);
+
+		nbytes = min_t(size_t, size, PAGE_SIZE - offset);
+		size -= nbytes;
+		bp->b_pages[i] = page;
+		offset = 0;
+	}
+	return 0;
+
+out_free_pages:
+	for (i = 0; i < bp->b_page_count; i++)
+		__free_page(bp->b_pages[i]);
+	return error;
+}
+
+/*
+ *	Map buffer into kernel address-space if necessary.
+ */
+STATIC int
+_xfs_buf_map_pages(
+	xfs_buf_t		*bp,
+	uint			flags)
+{
+	ASSERT(bp->b_flags & _XBF_PAGES);
+	if (bp->b_page_count == 1) {
+		/* A single page buffer is always mappable */
+		bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
+		bp->b_flags |= XBF_MAPPED;
+	} else if (flags & XBF_MAPPED) {
+		int retried = 0;
+
+		do {
+			bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
+						-1, PAGE_KERNEL);
+			if (bp->b_addr)
+				break;
+			vm_unmap_aliases();
+		} while (retried++ <= 1);
+
+		if (!bp->b_addr)
+			return -ENOMEM;
+		bp->b_addr += bp->b_offset;
+		bp->b_flags |= XBF_MAPPED;
+	}
+
+	return 0;
+}
+
+/*
+ *	Finding and Reading Buffers
+ */
+
+/*
+ *	Look up, and creates if absent, a lockable buffer for
+ *	a given range of an inode.  The buffer is returned
+ *	locked.	 If other overlapping buffers exist, they are
+ *	released before the new buffer is created and locked,
+ *	which may imply that this call will block until those buffers
+ *	are unlocked.  No I/O is implied by this call.
+ */
+xfs_buf_t *
+_xfs_buf_find(
+	xfs_buftarg_t		*btp,	/* block device target		*/
+	xfs_off_t		ioff,	/* starting offset of range	*/
+	size_t			isize,	/* length of range		*/
+	xfs_buf_flags_t		flags,
+	xfs_buf_t		*new_bp)
+{
+	xfs_off_t		range_base;
+	size_t			range_length;
+	struct xfs_perag	*pag;
+	struct rb_node		**rbp;
+	struct rb_node		*parent;
+	xfs_buf_t		*bp;
+
+	range_base = (ioff << BBSHIFT);
+	range_length = (isize << BBSHIFT);
+
+	/* Check for IOs smaller than the sector size / not sector aligned */
+	ASSERT(!(range_length < (1 << btp->bt_sshift)));
+	ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
+
+	/* get tree root */
+	pag = xfs_perag_get(btp->bt_mount,
+				xfs_daddr_to_agno(btp->bt_mount, ioff));
+
+	/* walk tree */
+	spin_lock(&pag->pag_buf_lock);
+	rbp = &pag->pag_buf_tree.rb_node;
+	parent = NULL;
+	bp = NULL;
+	while (*rbp) {
+		parent = *rbp;
+		bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+
+		if (range_base < bp->b_file_offset)
+			rbp = &(*rbp)->rb_left;
+		else if (range_base > bp->b_file_offset)
+			rbp = &(*rbp)->rb_right;
+		else {
+			/*
+			 * found a block offset match. If the range doesn't
+			 * match, the only way this is allowed is if the buffer
+			 * in the cache is stale and the transaction that made
+			 * it stale has not yet committed. i.e. we are
+			 * reallocating a busy extent. Skip this buffer and
+			 * continue searching to the right for an exact match.
+			 */
+			if (bp->b_buffer_length != range_length) {
+				ASSERT(bp->b_flags & XBF_STALE);
+				rbp = &(*rbp)->rb_right;
+				continue;
+			}
+			atomic_inc(&bp->b_hold);
+			goto found;
+		}
+	}
+
+	/* No match found */
+	if (new_bp) {
+		_xfs_buf_initialize(new_bp, btp, range_base,
+				range_length, flags);
+		rb_link_node(&new_bp->b_rbnode, parent, rbp);
+		rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+		/* the buffer keeps the perag reference until it is freed */
+		new_bp->b_pag = pag;
+		spin_unlock(&pag->pag_buf_lock);
+	} else {
+		XFS_STATS_INC(xb_miss_locked);
+		spin_unlock(&pag->pag_buf_lock);
+		xfs_perag_put(pag);
+	}
+	return new_bp;
+
+found:
+	spin_unlock(&pag->pag_buf_lock);
+	xfs_perag_put(pag);
+
+	if (!xfs_buf_trylock(bp)) {
+		if (flags & XBF_TRYLOCK) {
+			xfs_buf_rele(bp);
+			XFS_STATS_INC(xb_busy_locked);
+			return NULL;
+		}
+		xfs_buf_lock(bp);
+		XFS_STATS_INC(xb_get_locked_waited);
+	}
+
+	/*
+	 * if the buffer is stale, clear all the external state associated with
+	 * it. We need to keep flags such as how we allocated the buffer memory
+	 * intact here.
+	 */
+	if (bp->b_flags & XBF_STALE) {
+		ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
+		bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
+	}
+
+	trace_xfs_buf_find(bp, flags, _RET_IP_);
+	XFS_STATS_INC(xb_get_locked);
+	return bp;
+}
+
+/*
+ *	Assembles a buffer covering the specified range.
+ *	Storage in memory for all portions of the buffer will be allocated,
+ *	although backing storage may not be.
+ */
+xfs_buf_t *
+xfs_buf_get(
+	xfs_buftarg_t		*target,/* target for buffer		*/
+	xfs_off_t		ioff,	/* starting offset of range	*/
+	size_t			isize,	/* length of range		*/
+	xfs_buf_flags_t		flags)
+{
+	xfs_buf_t		*bp, *new_bp;
+	int			error = 0;
+
+	new_bp = xfs_buf_allocate(flags);
+	if (unlikely(!new_bp))
+		return NULL;
+
+	bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
+	if (bp == new_bp) {
+		error = xfs_buf_allocate_memory(bp, flags);
+		if (error)
+			goto no_buffer;
+	} else {
+		xfs_buf_deallocate(new_bp);
+		if (unlikely(bp == NULL))
+			return NULL;
+	}
+
+	if (!(bp->b_flags & XBF_MAPPED)) {
+		error = _xfs_buf_map_pages(bp, flags);
+		if (unlikely(error)) {
+			xfs_warn(target->bt_mount,
+				"%s: failed to map pages\n", __func__);
+			goto no_buffer;
+		}
+	}
+
+	XFS_STATS_INC(xb_get);
+
+	/*
+	 * Always fill in the block number now, the mapped cases can do
+	 * their own overlay of this later.
+	 */
+	bp->b_bn = ioff;
+	bp->b_count_desired = bp->b_buffer_length;
+
+	trace_xfs_buf_get(bp, flags, _RET_IP_);
+	return bp;
+
+ no_buffer:
+	if (flags & (XBF_LOCK | XBF_TRYLOCK))
+		xfs_buf_unlock(bp);
+	xfs_buf_rele(bp);
+	return NULL;
+}
+
+STATIC int
+_xfs_buf_read(
+	xfs_buf_t		*bp,
+	xfs_buf_flags_t		flags)
+{
+	int			status;
+
+	ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
+	ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+
+	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
+	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
+
+	status = xfs_buf_iorequest(bp);
+	if (status || bp->b_error || (flags & XBF_ASYNC))
+		return status;
+	return xfs_buf_iowait(bp);
+}
+
+xfs_buf_t *
+xfs_buf_read(
+	xfs_buftarg_t		*target,
+	xfs_off_t		ioff,
+	size_t			isize,
+	xfs_buf_flags_t		flags)
+{
+	xfs_buf_t		*bp;
+
+	flags |= XBF_READ;
+
+	bp = xfs_buf_get(target, ioff, isize, flags);
+	if (bp) {
+		trace_xfs_buf_read(bp, flags, _RET_IP_);
+
+		if (!XFS_BUF_ISDONE(bp)) {
+			XFS_STATS_INC(xb_get_read);
+			_xfs_buf_read(bp, flags);
+		} else if (flags & XBF_ASYNC) {
+			/*
+			 * Read ahead call which is already satisfied,
+			 * drop the buffer
+			 */
+			goto no_buffer;
+		} else {
+			/* We do not want read in the flags */
+			bp->b_flags &= ~XBF_READ;
+		}
+	}
+
+	return bp;
+
+ no_buffer:
+	if (flags & (XBF_LOCK | XBF_TRYLOCK))
+		xfs_buf_unlock(bp);
+	xfs_buf_rele(bp);
+	return NULL;
+}
+
+/*
+ *	If we are not low on memory then do the readahead in a deadlock
+ *	safe manner.
+ */
+void
+xfs_buf_readahead(
+	xfs_buftarg_t		*target,
+	xfs_off_t		ioff,
+	size_t			isize)
+{
+	if (bdi_read_congested(target->bt_bdi))
+		return;
+
+	xfs_buf_read(target, ioff, isize,
+		     XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+	struct xfs_mount	*mp,
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		daddr,
+	size_t			length,
+	int			flags)
+{
+	xfs_buf_t		*bp;
+	int			error;
+
+	bp = xfs_buf_get_uncached(target, length, flags);
+	if (!bp)
+		return NULL;
+
+	/* set up the buffer for a read IO */
+	XFS_BUF_SET_ADDR(bp, daddr);
+	XFS_BUF_READ(bp);
+
+	xfsbdstrat(mp, bp);
+	error = xfs_buf_iowait(bp);
+	if (error || bp->b_error) {
+		xfs_buf_relse(bp);
+		return NULL;
+	}
+	return bp;
+}
+
+xfs_buf_t *
+xfs_buf_get_empty(
+	size_t			len,
+	xfs_buftarg_t		*target)
+{
+	xfs_buf_t		*bp;
+
+	bp = xfs_buf_allocate(0);
+	if (bp)
+		_xfs_buf_initialize(bp, target, 0, len, 0);
+	return bp;
+}
+
+/*
+ * Return a buffer allocated as an empty buffer and associated to external
+ * memory via xfs_buf_associate_memory() back to it's empty state.
+ */
+void
+xfs_buf_set_empty(
+	struct xfs_buf		*bp,
+	size_t			len)
+{
+	if (bp->b_pages)
+		_xfs_buf_free_pages(bp);
+
+	bp->b_pages = NULL;
+	bp->b_page_count = 0;
+	bp->b_addr = NULL;
+	bp->b_file_offset = 0;
+	bp->b_buffer_length = bp->b_count_desired = len;
+	bp->b_bn = XFS_BUF_DADDR_NULL;
+	bp->b_flags &= ~XBF_MAPPED;
+}
+
+static inline struct page *
+mem_to_page(
+	void			*addr)
+{
+	if ((!is_vmalloc_addr(addr))) {
+		return virt_to_page(addr);
+	} else {
+		return vmalloc_to_page(addr);
+	}
+}
+
+int
+xfs_buf_associate_memory(
+	xfs_buf_t		*bp,
+	void			*mem,
+	size_t			len)
+{
+	int			rval;
+	int			i = 0;
+	unsigned long		pageaddr;
+	unsigned long		offset;
+	size_t			buflen;
+	int			page_count;
+
+	pageaddr = (unsigned long)mem & PAGE_MASK;
+	offset = (unsigned long)mem - pageaddr;
+	buflen = PAGE_ALIGN(len + offset);
+	page_count = buflen >> PAGE_SHIFT;
+
+	/* Free any previous set of page pointers */
+	if (bp->b_pages)
+		_xfs_buf_free_pages(bp);
+
+	bp->b_pages = NULL;
+	bp->b_addr = mem;
+
+	rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
+	if (rval)
+		return rval;
+
+	bp->b_offset = offset;
+
+	for (i = 0; i < bp->b_page_count; i++) {
+		bp->b_pages[i] = mem_to_page((void *)pageaddr);
+		pageaddr += PAGE_SIZE;
+	}
+
+	bp->b_count_desired = len;
+	bp->b_buffer_length = buflen;
+	bp->b_flags |= XBF_MAPPED;
+
+	return 0;
+}
+
+xfs_buf_t *
+xfs_buf_get_uncached(
+	struct xfs_buftarg	*target,
+	size_t			len,
+	int			flags)
+{
+	unsigned long		page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+	int			error, i;
+	xfs_buf_t		*bp;
+
+	bp = xfs_buf_allocate(0);
+	if (unlikely(bp == NULL))
+		goto fail;
+	_xfs_buf_initialize(bp, target, 0, len, 0);
+
+	error = _xfs_buf_get_pages(bp, page_count, 0);
+	if (error)
+		goto fail_free_buf;
+
+	for (i = 0; i < page_count; i++) {
+		bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
+		if (!bp->b_pages[i])
+			goto fail_free_mem;
+	}
+	bp->b_flags |= _XBF_PAGES;
+
+	error = _xfs_buf_map_pages(bp, XBF_MAPPED);
+	if (unlikely(error)) {
+		xfs_warn(target->bt_mount,
+			"%s: failed to map pages\n", __func__);
+		goto fail_free_mem;
+	}
+
+	trace_xfs_buf_get_uncached(bp, _RET_IP_);
+	return bp;
+
+ fail_free_mem:
+	while (--i >= 0)
+		__free_page(bp->b_pages[i]);
+	_xfs_buf_free_pages(bp);
+ fail_free_buf:
+	xfs_buf_deallocate(bp);
+ fail:
+	return NULL;
+}
+
+/*
+ *	Increment reference count on buffer, to hold the buffer concurrently
+ *	with another thread which may release (free) the buffer asynchronously.
+ *	Must hold the buffer already to call this function.
+ */
+void
+xfs_buf_hold(
+	xfs_buf_t		*bp)
+{
+	trace_xfs_buf_hold(bp, _RET_IP_);
+	atomic_inc(&bp->b_hold);
+}
+
+/*
+ *	Releases a hold on the specified buffer.  If the
+ *	the hold count is 1, calls xfs_buf_free.
+ */
+void
+xfs_buf_rele(
+	xfs_buf_t		*bp)
+{
+	struct xfs_perag	*pag = bp->b_pag;
+
+	trace_xfs_buf_rele(bp, _RET_IP_);
+
+	if (!pag) {
+		ASSERT(list_empty(&bp->b_lru));
+		ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
+		if (atomic_dec_and_test(&bp->b_hold))
+			xfs_buf_free(bp);
+		return;
+	}
+
+	ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
+
+	ASSERT(atomic_read(&bp->b_hold) > 0);
+	if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
+		if (!(bp->b_flags & XBF_STALE) &&
+			   atomic_read(&bp->b_lru_ref)) {
+			xfs_buf_lru_add(bp);
+			spin_unlock(&pag->pag_buf_lock);
+		} else {
+			xfs_buf_lru_del(bp);
+			ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
+			rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+			spin_unlock(&pag->pag_buf_lock);
+			xfs_perag_put(pag);
+			xfs_buf_free(bp);
+		}
+	}
+}
+
+
+/*
+ *	Lock a buffer object, if it is not already locked.
+ *
+ *	If we come across a stale, pinned, locked buffer, we know that we are
+ *	being asked to lock a buffer that has been reallocated. Because it is
+ *	pinned, we know that the log has not been pushed to disk and hence it
+ *	will still be locked.  Rather than continuing to have trylock attempts
+ *	fail until someone else pushes the log, push it ourselves before
+ *	returning.  This means that the xfsaild will not get stuck trying
+ *	to push on stale inode buffers.
+ */
+int
+xfs_buf_trylock(
+	struct xfs_buf		*bp)
+{
+	int			locked;
+
+	locked = down_trylock(&bp->b_sema) == 0;
+	if (locked)
+		XB_SET_OWNER(bp);
+	else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+		xfs_log_force(bp->b_target->bt_mount, 0);
+
+	trace_xfs_buf_trylock(bp, _RET_IP_);
+	return locked;
+}
+
+/*
+ *	Lock a buffer object.
+ *
+ *	If we come across a stale, pinned, locked buffer, we know that we
+ *	are being asked to lock a buffer that has been reallocated. Because
+ *	it is pinned, we know that the log has not been pushed to disk and
+ *	hence it will still be locked. Rather than sleeping until someone
+ *	else pushes the log, push it ourselves before trying to get the lock.
+ */
+void
+xfs_buf_lock(
+	struct xfs_buf		*bp)
+{
+	trace_xfs_buf_lock(bp, _RET_IP_);
+
+	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+		xfs_log_force(bp->b_target->bt_mount, 0);
+	down(&bp->b_sema);
+	XB_SET_OWNER(bp);
+
+	trace_xfs_buf_lock_done(bp, _RET_IP_);
+}
+
+/*
+ *	Releases the lock on the buffer object.
+ *	If the buffer is marked delwri but is not queued, do so before we
+ *	unlock the buffer as we need to set flags correctly.  We also need to
+ *	take a reference for the delwri queue because the unlocker is going to
+ *	drop their's and they don't know we just queued it.
+ */
+void
+xfs_buf_unlock(
+	struct xfs_buf		*bp)
+{
+	if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
+		atomic_inc(&bp->b_hold);
+		bp->b_flags |= XBF_ASYNC;
+		xfs_buf_delwri_queue(bp, 0);
+	}
+
+	XB_CLEAR_OWNER(bp);
+	up(&bp->b_sema);
+
+	trace_xfs_buf_unlock(bp, _RET_IP_);
+}
+
+STATIC void
+xfs_buf_wait_unpin(
+	xfs_buf_t		*bp)
+{
+	DECLARE_WAITQUEUE	(wait, current);
+
+	if (atomic_read(&bp->b_pin_count) == 0)
+		return;
+
+	add_wait_queue(&bp->b_waiters, &wait);
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (atomic_read(&bp->b_pin_count) == 0)
+			break;
+		io_schedule();
+	}
+	remove_wait_queue(&bp->b_waiters, &wait);
+	set_current_state(TASK_RUNNING);
+}
+
+/*
+ *	Buffer Utility Routines
+ */
+
+STATIC void
+xfs_buf_iodone_work(
+	struct work_struct	*work)
+{
+	xfs_buf_t		*bp =
+		container_of(work, xfs_buf_t, b_iodone_work);
+
+	if (bp->b_iodone)
+		(*(bp->b_iodone))(bp);
+	else if (bp->b_flags & XBF_ASYNC)
+		xfs_buf_relse(bp);
+}
+
+void
+xfs_buf_ioend(
+	xfs_buf_t		*bp,
+	int			schedule)
+{
+	trace_xfs_buf_iodone(bp, _RET_IP_);
+
+	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
+	if (bp->b_error == 0)
+		bp->b_flags |= XBF_DONE;
+
+	if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
+		if (schedule) {
+			INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
+			queue_work(xfslogd_workqueue, &bp->b_iodone_work);
+		} else {
+			xfs_buf_iodone_work(&bp->b_iodone_work);
+		}
+	} else {
+		complete(&bp->b_iowait);
+	}
+}
+
+void
+xfs_buf_ioerror(
+	xfs_buf_t		*bp,
+	int			error)
+{
+	ASSERT(error >= 0 && error <= 0xffff);
+	bp->b_error = (unsigned short)error;
+	trace_xfs_buf_ioerror(bp, error, _RET_IP_);
+}
+
+int
+xfs_bwrite(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp)
+{
+	int			error;
+
+	bp->b_flags |= XBF_WRITE;
+	bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
+
+	xfs_buf_delwri_dequeue(bp);
+	xfs_bdstrat_cb(bp);
+
+	error = xfs_buf_iowait(bp);
+	if (error)
+		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+	xfs_buf_relse(bp);
+	return error;
+}
+
+void
+xfs_bdwrite(
+	void			*mp,
+	struct xfs_buf		*bp)
+{
+	trace_xfs_buf_bdwrite(bp, _RET_IP_);
+
+	bp->b_flags &= ~XBF_READ;
+	bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
+
+	xfs_buf_delwri_queue(bp, 1);
+}
+
+/*
+ * Called when we want to stop a buffer from getting written or read.
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
+ * so that the proper iodone callbacks get called.
+ */
+STATIC int
+xfs_bioerror(
+	xfs_buf_t *bp)
+{
+#ifdef XFSERRORDEBUG
+	ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
+#endif
+
+	/*
+	 * No need to wait until the buffer is unpinned, we aren't flushing it.
+	 */
+	xfs_buf_ioerror(bp, EIO);
+
+	/*
+	 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
+	 */
+	XFS_BUF_UNREAD(bp);
+	XFS_BUF_UNDELAYWRITE(bp);
+	XFS_BUF_UNDONE(bp);
+	XFS_BUF_STALE(bp);
+
+	xfs_buf_ioend(bp, 0);
+
+	return EIO;
+}
+
+/*
+ * Same as xfs_bioerror, except that we are releasing the buffer
+ * here ourselves, and avoiding the xfs_buf_ioend call.
+ * This is meant for userdata errors; metadata bufs come with
+ * iodone functions attached, so that we can track down errors.
+ */
+STATIC int
+xfs_bioerror_relse(
+	struct xfs_buf	*bp)
+{
+	int64_t		fl = bp->b_flags;
+	/*
+	 * No need to wait until the buffer is unpinned.
+	 * We aren't flushing it.
+	 *
+	 * chunkhold expects B_DONE to be set, whether
+	 * we actually finish the I/O or not. We don't want to
+	 * change that interface.
+	 */
+	XFS_BUF_UNREAD(bp);
+	XFS_BUF_UNDELAYWRITE(bp);
+	XFS_BUF_DONE(bp);
+	XFS_BUF_STALE(bp);
+	bp->b_iodone = NULL;
+	if (!(fl & XBF_ASYNC)) {
+		/*
+		 * Mark b_error and B_ERROR _both_.
+		 * Lot's of chunkcache code assumes that.
+		 * There's no reason to mark error for
+		 * ASYNC buffers.
+		 */
+		xfs_buf_ioerror(bp, EIO);
+		XFS_BUF_FINISH_IOWAIT(bp);
+	} else {
+		xfs_buf_relse(bp);
+	}
+
+	return EIO;
+}
+
+
+/*
+ * All xfs metadata buffers except log state machine buffers
+ * get this attached as their b_bdstrat callback function.
+ * This is so that we can catch a buffer
+ * after prematurely unpinning it to forcibly shutdown the filesystem.
+ */
+int
+xfs_bdstrat_cb(
+	struct xfs_buf	*bp)
+{
+	if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+		trace_xfs_bdstrat_shut(bp, _RET_IP_);
+		/*
+		 * Metadata write that didn't get logged but
+		 * written delayed anyway. These aren't associated
+		 * with a transaction, and can be ignored.
+		 */
+		if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
+			return xfs_bioerror_relse(bp);
+		else
+			return xfs_bioerror(bp);
+	}
+
+	xfs_buf_iorequest(bp);
+	return 0;
+}
+
+/*
+ * Wrapper around bdstrat so that we can stop data from going to disk in case
+ * we are shutting down the filesystem.  Typically user data goes thru this
+ * path; one of the exceptions is the superblock.
+ */
+void
+xfsbdstrat(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp)
+{
+	if (XFS_FORCED_SHUTDOWN(mp)) {
+		trace_xfs_bdstrat_shut(bp, _RET_IP_);
+		xfs_bioerror_relse(bp);
+		return;
+	}
+
+	xfs_buf_iorequest(bp);
+}
+
+STATIC void
+_xfs_buf_ioend(
+	xfs_buf_t		*bp,
+	int			schedule)
+{
+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+		xfs_buf_ioend(bp, schedule);
+}
+
+STATIC void
+xfs_buf_bio_end_io(
+	struct bio		*bio,
+	int			error)
+{
+	xfs_buf_t		*bp = (xfs_buf_t *)bio->bi_private;
+
+	xfs_buf_ioerror(bp, -error);
+
+	if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
+
+	_xfs_buf_ioend(bp, 1);
+	bio_put(bio);
+}
+
+STATIC void
+_xfs_buf_ioapply(
+	xfs_buf_t		*bp)
+{
+	int			rw, map_i, total_nr_pages, nr_pages;
+	struct bio		*bio;
+	int			offset = bp->b_offset;
+	int			size = bp->b_count_desired;
+	sector_t		sector = bp->b_bn;
+
+	total_nr_pages = bp->b_page_count;
+	map_i = 0;
+
+	if (bp->b_flags & XBF_WRITE) {
+		if (bp->b_flags & XBF_SYNCIO)
+			rw = WRITE_SYNC;
+		else
+			rw = WRITE;
+		if (bp->b_flags & XBF_FUA)
+			rw |= REQ_FUA;
+		if (bp->b_flags & XBF_FLUSH)
+			rw |= REQ_FLUSH;
+	} else if (bp->b_flags & XBF_READ_AHEAD) {
+		rw = READA;
+	} else {
+		rw = READ;
+	}
+
+	/* we only use the buffer cache for meta-data */
+	rw |= REQ_META;
+
+next_chunk:
+	atomic_inc(&bp->b_io_remaining);
+	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+	if (nr_pages > total_nr_pages)
+		nr_pages = total_nr_pages;
+
+	bio = bio_alloc(GFP_NOIO, nr_pages);
+	bio->bi_bdev = bp->b_target->bt_bdev;
+	bio->bi_sector = sector;
+	bio->bi_end_io = xfs_buf_bio_end_io;
+	bio->bi_private = bp;
+
+
+	for (; size && nr_pages; nr_pages--, map_i++) {
+		int	rbytes, nbytes = PAGE_SIZE - offset;
+
+		if (nbytes > size)
+			nbytes = size;
+
+		rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+		if (rbytes < nbytes)
+			break;
+
+		offset = 0;
+		sector += nbytes >> BBSHIFT;
+		size -= nbytes;
+		total_nr_pages--;
+	}
+
+	if (likely(bio->bi_size)) {
+		if (xfs_buf_is_vmapped(bp)) {
+			flush_kernel_vmap_range(bp->b_addr,
+						xfs_buf_vmap_len(bp));
+		}
+		submit_bio(rw, bio);
+		if (size)
+			goto next_chunk;
+	} else {
+		xfs_buf_ioerror(bp, EIO);
+		bio_put(bio);
+	}
+}
+
+int
+xfs_buf_iorequest(
+	xfs_buf_t		*bp)
+{
+	trace_xfs_buf_iorequest(bp, _RET_IP_);
+
+	if (bp->b_flags & XBF_DELWRI) {
+		xfs_buf_delwri_queue(bp, 1);
+		return 0;
+	}
+
+	if (bp->b_flags & XBF_WRITE) {
+		xfs_buf_wait_unpin(bp);
+	}
+
+	xfs_buf_hold(bp);
+
+	/* Set the count to 1 initially, this will stop an I/O
+	 * completion callout which happens before we have started
+	 * all the I/O from calling xfs_buf_ioend too early.
+	 */
+	atomic_set(&bp->b_io_remaining, 1);
+	_xfs_buf_ioapply(bp);
+	_xfs_buf_ioend(bp, 0);
+
+	xfs_buf_rele(bp);
+	return 0;
+}
+
+/*
+ *	Waits for I/O to complete on the buffer supplied.
+ *	It returns immediately if no I/O is pending.
+ *	It returns the I/O error code, if any, or 0 if there was no error.
+ */
+int
+xfs_buf_iowait(
+	xfs_buf_t		*bp)
+{
+	trace_xfs_buf_iowait(bp, _RET_IP_);
+
+	wait_for_completion(&bp->b_iowait);
+
+	trace_xfs_buf_iowait_done(bp, _RET_IP_);
+	return bp->b_error;
+}
+
+xfs_caddr_t
+xfs_buf_offset(
+	xfs_buf_t		*bp,
+	size_t			offset)
+{
+	struct page		*page;
+
+	if (bp->b_flags & XBF_MAPPED)
+		return bp->b_addr + offset;
+
+	offset += bp->b_offset;
+	page = bp->b_pages[offset >> PAGE_SHIFT];
+	return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
+}
+
+/*
+ *	Move data into or out of a buffer.
+ */
+void
+xfs_buf_iomove(
+	xfs_buf_t		*bp,	/* buffer to process		*/
+	size_t			boff,	/* starting buffer offset	*/
+	size_t			bsize,	/* length to copy		*/
+	void			*data,	/* data address			*/
+	xfs_buf_rw_t		mode)	/* read/write/zero flag		*/
+{
+	size_t			bend, cpoff, csize;
+	struct page		*page;
+
+	bend = boff + bsize;
+	while (boff < bend) {
+		page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
+		cpoff = xfs_buf_poff(boff + bp->b_offset);
+		csize = min_t(size_t,
+			      PAGE_SIZE-cpoff, bp->b_count_desired-boff);
+
+		ASSERT(((csize + cpoff) <= PAGE_SIZE));
+
+		switch (mode) {
+		case XBRW_ZERO:
+			memset(page_address(page) + cpoff, 0, csize);
+			break;
+		case XBRW_READ:
+			memcpy(data, page_address(page) + cpoff, csize);
+			break;
+		case XBRW_WRITE:
+			memcpy(page_address(page) + cpoff, data, csize);
+		}
+
+		boff += csize;
+		data += csize;
+	}
+}
+
+/*
+ *	Handling of buffer targets (buftargs).
+ */
+
+/*
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
+ */
+void
+xfs_wait_buftarg(
+	struct xfs_buftarg	*btp)
+{
+	struct xfs_buf		*bp;
+
+restart:
+	spin_lock(&btp->bt_lru_lock);
+	while (!list_empty(&btp->bt_lru)) {
+		bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+		if (atomic_read(&bp->b_hold) > 1) {
+			spin_unlock(&btp->bt_lru_lock);
+			delay(100);
+			goto restart;
+		}
+		/*
+		 * clear the LRU reference count so the bufer doesn't get
+		 * ignored in xfs_buf_rele().
+		 */
+		atomic_set(&bp->b_lru_ref, 0);
+		spin_unlock(&btp->bt_lru_lock);
+		xfs_buf_rele(bp);
+		spin_lock(&btp->bt_lru_lock);
+	}
+	spin_unlock(&btp->bt_lru_lock);
+}
+
+int
+xfs_buftarg_shrink(
+	struct shrinker		*shrink,
+	struct shrink_control	*sc)
+{
+	struct xfs_buftarg	*btp = container_of(shrink,
+					struct xfs_buftarg, bt_shrinker);
+	struct xfs_buf		*bp;
+	int nr_to_scan = sc->nr_to_scan;
+	LIST_HEAD(dispose);
+
+	if (!nr_to_scan)
+		return btp->bt_lru_nr;
+
+	spin_lock(&btp->bt_lru_lock);
+	while (!list_empty(&btp->bt_lru)) {
+		if (nr_to_scan-- <= 0)
+			break;
+
+		bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+
+		/*
+		 * Decrement the b_lru_ref count unless the value is already
+		 * zero. If the value is already zero, we need to reclaim the
+		 * buffer, otherwise it gets another trip through the LRU.
+		 */
+		if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+			list_move_tail(&bp->b_lru, &btp->bt_lru);
+			continue;
+		}
+
+		/*
+		 * remove the buffer from the LRU now to avoid needing another
+		 * lock round trip inside xfs_buf_rele().
+		 */
+		list_move(&bp->b_lru, &dispose);
+		btp->bt_lru_nr--;
+	}
+	spin_unlock(&btp->bt_lru_lock);
+
+	while (!list_empty(&dispose)) {
+		bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+		list_del_init(&bp->b_lru);
+		xfs_buf_rele(bp);
+	}
+
+	return btp->bt_lru_nr;
+}
+
+void
+xfs_free_buftarg(
+	struct xfs_mount	*mp,
+	struct xfs_buftarg	*btp)
+{
+	unregister_shrinker(&btp->bt_shrinker);
+
+	xfs_flush_buftarg(btp, 1);
+	if (mp->m_flags & XFS_MOUNT_BARRIER)
+		xfs_blkdev_issue_flush(btp);
+
+	kthread_stop(btp->bt_task);
+	kmem_free(btp);
+}
+
+STATIC int
+xfs_setsize_buftarg_flags(
+	xfs_buftarg_t		*btp,
+	unsigned int		blocksize,
+	unsigned int		sectorsize,
+	int			verbose)
+{
+	btp->bt_bsize = blocksize;
+	btp->bt_sshift = ffs(sectorsize) - 1;
+	btp->bt_smask = sectorsize - 1;
+
+	if (set_blocksize(btp->bt_bdev, sectorsize)) {
+		xfs_warn(btp->bt_mount,
+			"Cannot set_blocksize to %u on device %s\n",
+			sectorsize, xfs_buf_target_name(btp));
+		return EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ *	When allocating the initial buffer target we have not yet
+ *	read in the superblock, so don't know what sized sectors
+ *	are being used is at this early stage.  Play safe.
+ */
+STATIC int
+xfs_setsize_buftarg_early(
+	xfs_buftarg_t		*btp,
+	struct block_device	*bdev)
+{
+	return xfs_setsize_buftarg_flags(btp,
+			PAGE_SIZE, bdev_logical_block_size(bdev), 0);
+}
+
+int
+xfs_setsize_buftarg(
+	xfs_buftarg_t		*btp,
+	unsigned int		blocksize,
+	unsigned int		sectorsize)
+{
+	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+
+STATIC int
+xfs_alloc_delwrite_queue(
+	xfs_buftarg_t		*btp,
+	const char		*fsname)
+{
+	INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+	spin_lock_init(&btp->bt_delwrite_lock);
+	btp->bt_flags = 0;
+	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
+	if (IS_ERR(btp->bt_task))
+		return PTR_ERR(btp->bt_task);
+	return 0;
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+	struct xfs_mount	*mp,
+	struct block_device	*bdev,
+	int			external,
+	const char		*fsname)
+{
+	xfs_buftarg_t		*btp;
+
+	btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+	btp->bt_mount = mp;
+	btp->bt_dev =  bdev->bd_dev;
+	btp->bt_bdev = bdev;
+	btp->bt_bdi = blk_get_backing_dev_info(bdev);
+	if (!btp->bt_bdi)
+		goto error;
+
+	INIT_LIST_HEAD(&btp->bt_lru);
+	spin_lock_init(&btp->bt_lru_lock);
+	if (xfs_setsize_buftarg_early(btp, bdev))
+		goto error;
+	if (xfs_alloc_delwrite_queue(btp, fsname))
+		goto error;
+	btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+	btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+	register_shrinker(&btp->bt_shrinker);
+	return btp;
+
+error:
+	kmem_free(btp);
+	return NULL;
+}
+
+
+/*
+ *	Delayed write buffer handling
+ */
+STATIC void
+xfs_buf_delwri_queue(
+	xfs_buf_t		*bp,
+	int			unlock)
+{
+	struct list_head	*dwq = &bp->b_target->bt_delwrite_queue;
+	spinlock_t		*dwlk = &bp->b_target->bt_delwrite_lock;
+
+	trace_xfs_buf_delwri_queue(bp, _RET_IP_);
+
+	ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
+
+	spin_lock(dwlk);
+	/* If already in the queue, dequeue and place at tail */
+	if (!list_empty(&bp->b_list)) {
+		ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+		if (unlock)
+			atomic_dec(&bp->b_hold);
+		list_del(&bp->b_list);
+	}
+
+	if (list_empty(dwq)) {
+		/* start xfsbufd as it is about to have something to do */
+		wake_up_process(bp->b_target->bt_task);
+	}
+
+	bp->b_flags |= _XBF_DELWRI_Q;
+	list_add_tail(&bp->b_list, dwq);
+	bp->b_queuetime = jiffies;
+	spin_unlock(dwlk);
+
+	if (unlock)
+		xfs_buf_unlock(bp);
+}
+
+void
+xfs_buf_delwri_dequeue(
+	xfs_buf_t		*bp)
+{
+	spinlock_t		*dwlk = &bp->b_target->bt_delwrite_lock;
+	int			dequeued = 0;
+
+	spin_lock(dwlk);
+	if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
+		ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+		list_del_init(&bp->b_list);
+		dequeued = 1;
+	}
+	bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+	spin_unlock(dwlk);
+
+	if (dequeued)
+		xfs_buf_rele(bp);
+
+	trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
+}
+
+/*
+ * If a delwri buffer needs to be pushed before it has aged out, then promote
+ * it to the head of the delwri queue so that it will be flushed on the next
+ * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
+ * than the age currently needed to flush the buffer. Hence the next time the
+ * xfsbufd sees it is guaranteed to be considered old enough to flush.
+ */
+void
+xfs_buf_delwri_promote(
+	struct xfs_buf	*bp)
+{
+	struct xfs_buftarg *btp = bp->b_target;
+	long		age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
+
+	ASSERT(bp->b_flags & XBF_DELWRI);
+	ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+	/*
+	 * Check the buffer age before locking the delayed write queue as we
+	 * don't need to promote buffers that are already past the flush age.
+	 */
+	if (bp->b_queuetime < jiffies - age)
+		return;
+	bp->b_queuetime = jiffies - age;
+	spin_lock(&btp->bt_delwrite_lock);
+	list_move(&bp->b_list, &btp->bt_delwrite_queue);
+	spin_unlock(&btp->bt_delwrite_lock);
+}
+
+STATIC void
+xfs_buf_runall_queues(
+	struct workqueue_struct	*queue)
+{
+	flush_workqueue(queue);
+}
+
+/*
+ * Move as many buffers as specified to the supplied list
+ * idicating if we skipped any buffers to prevent deadlocks.
+ */
+STATIC int
+xfs_buf_delwri_split(
+	xfs_buftarg_t	*target,
+	struct list_head *list,
+	unsigned long	age)
+{
+	xfs_buf_t	*bp, *n;
+	struct list_head *dwq = &target->bt_delwrite_queue;
+	spinlock_t	*dwlk = &target->bt_delwrite_lock;
+	int		skipped = 0;
+	int		force;
+
+	force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+	INIT_LIST_HEAD(list);
+	spin_lock(dwlk);
+	list_for_each_entry_safe(bp, n, dwq, b_list) {
+		ASSERT(bp->b_flags & XBF_DELWRI);
+
+		if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
+			if (!force &&
+			    time_before(jiffies, bp->b_queuetime + age)) {
+				xfs_buf_unlock(bp);
+				break;
+			}
+
+			bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
+			bp->b_flags |= XBF_WRITE;
+			list_move_tail(&bp->b_list, list);
+			trace_xfs_buf_delwri_split(bp, _RET_IP_);
+		} else
+			skipped++;
+	}
+	spin_unlock(dwlk);
+
+	return skipped;
+
+}
+
+/*
+ * Compare function is more complex than it needs to be because
+ * the return value is only 32 bits and we are doing comparisons
+ * on 64 bit values
+ */
+static int
+xfs_buf_cmp(
+	void		*priv,
+	struct list_head *a,
+	struct list_head *b)
+{
+	struct xfs_buf	*ap = container_of(a, struct xfs_buf, b_list);
+	struct xfs_buf	*bp = container_of(b, struct xfs_buf, b_list);
+	xfs_daddr_t		diff;
+
+	diff = ap->b_bn - bp->b_bn;
+	if (diff < 0)
+		return -1;
+	if (diff > 0)
+		return 1;
+	return 0;
+}
+
+STATIC int
+xfsbufd(
+	void		*data)
+{
+	xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
+
+	current->flags |= PF_MEMALLOC;
+
+	set_freezable();
+
+	do {
+		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
+		long	tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
+		struct list_head tmp;
+		struct blk_plug plug;
+
+		if (unlikely(freezing(current))) {
+			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+			refrigerator();
+		} else {
+			clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+		}
+
+		/* sleep for a long time if there is nothing to do. */
+		if (list_empty(&target->bt_delwrite_queue))
+			tout = MAX_SCHEDULE_TIMEOUT;
+		schedule_timeout_interruptible(tout);
+
+		xfs_buf_delwri_split(target, &tmp, age);
+		list_sort(NULL, &tmp, xfs_buf_cmp);
+
+		blk_start_plug(&plug);
+		while (!list_empty(&tmp)) {
+			struct xfs_buf *bp;
+			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
+			list_del_init(&bp->b_list);
+			xfs_bdstrat_cb(bp);
+		}
+		blk_finish_plug(&plug);
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+/*
+ *	Go through all incore buffers, and release buffers if they belong to
+ *	the given device. This is used in filesystem error handling to
+ *	preserve the consistency of its metadata.
+ */
+int
+xfs_flush_buftarg(
+	xfs_buftarg_t	*target,
+	int		wait)
+{
+	xfs_buf_t	*bp;
+	int		pincount = 0;
+	LIST_HEAD(tmp_list);
+	LIST_HEAD(wait_list);
+	struct blk_plug plug;
+
+	xfs_buf_runall_queues(xfsconvertd_workqueue);
+	xfs_buf_runall_queues(xfsdatad_workqueue);
+	xfs_buf_runall_queues(xfslogd_workqueue);
+
+	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+	pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
+
+	/*
+	 * Dropped the delayed write list lock, now walk the temporary list.
+	 * All I/O is issued async and then if we need to wait for completion
+	 * we do that after issuing all the IO.
+	 */
+	list_sort(NULL, &tmp_list, xfs_buf_cmp);
+
+	blk_start_plug(&plug);
+	while (!list_empty(&tmp_list)) {
+		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
+		ASSERT(target == bp->b_target);
+		list_del_init(&bp->b_list);
+		if (wait) {
+			bp->b_flags &= ~XBF_ASYNC;
+			list_add(&bp->b_list, &wait_list);
+		}
+		xfs_bdstrat_cb(bp);
+	}
+	blk_finish_plug(&plug);
+
+	if (wait) {
+		/* Wait for IO to complete. */
+		while (!list_empty(&wait_list)) {
+			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
+
+			list_del_init(&bp->b_list);
+			xfs_buf_iowait(bp);
+			xfs_buf_relse(bp);
+		}
+	}
+
+	return pincount;
+}
+
+int __init
+xfs_buf_init(void)
+{
+	xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
+						KM_ZONE_HWALIGN, NULL);
+	if (!xfs_buf_zone)
+		goto out;
+
+	xfslogd_workqueue = alloc_workqueue("xfslogd",
+					WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
+	if (!xfslogd_workqueue)
+		goto out_free_buf_zone;
+
+	xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
+	if (!xfsdatad_workqueue)
+		goto out_destroy_xfslogd_workqueue;
+
+	xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
+						WQ_MEM_RECLAIM, 1);
+	if (!xfsconvertd_workqueue)
+		goto out_destroy_xfsdatad_workqueue;
+
+	return 0;
+
+ out_destroy_xfsdatad_workqueue:
+	destroy_workqueue(xfsdatad_workqueue);
+ out_destroy_xfslogd_workqueue:
+	destroy_workqueue(xfslogd_workqueue);
+ out_free_buf_zone:
+	kmem_zone_destroy(xfs_buf_zone);
+ out:
+	return -ENOMEM;
+}
+
+void
+xfs_buf_terminate(void)
+{
+	destroy_workqueue(xfsconvertd_workqueue);
+	destroy_workqueue(xfsdatad_workqueue);
+	destroy_workqueue(xfslogd_workqueue);
+	kmem_zone_destroy(xfs_buf_zone);
+}
+
+#ifdef CONFIG_KDB_MODULES
+struct list_head *
+xfs_get_buftarg_list(void)
+{
+	return &xfs_buftarg_list;
+}
+#endif
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
new file mode 100644
index 0000000..620972b
--- /dev/null
+++ b/fs/xfs/xfs_buf.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+
+/*
+ *	Base types
+ */
+
+#define XFS_BUF_DADDR_NULL	((xfs_daddr_t) (-1LL))
+
+#define xfs_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
+#define xfs_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoct(dd)	((dd) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_poff(aa)	((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum {
+	XBRW_READ = 1,			/* transfer into target memory */
+	XBRW_WRITE = 2,			/* transfer from target memory */
+	XBRW_ZERO = 3,			/* Zero target memory */
+} xfs_buf_rw_t;
+
+#define XBF_READ	(1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE	(1 << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD	(1 << 2) /* asynchronous read-ahead */
+#define XBF_MAPPED	(1 << 3) /* buffer mapped (b_addr valid) */
+#define XBF_ASYNC	(1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE	(1 << 5) /* all pages in the buffer uptodate */
+#define XBF_DELWRI	(1 << 6) /* buffer has dirty pages */
+#define XBF_STALE	(1 << 7) /* buffer has been staled, do not find it */
+
+/* I/O hints for the BIO layer */
+#define XBF_SYNCIO	(1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA		(1 << 11)/* force cache write through mode */
+#define XBF_FLUSH	(1 << 12)/* flush the disk cache before a write */
+
+/* flags used only as arguments to access routines */
+#define XBF_LOCK	(1 << 15)/* lock requested */
+#define XBF_TRYLOCK	(1 << 16)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK	(1 << 17)/* do not block in current thread */
+
+/* flags used only internally */
+#define _XBF_PAGES	(1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM	(1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q	(1 << 22)/* buffer on delwri queue */
+
+typedef unsigned int xfs_buf_flags_t;
+
+#define XFS_BUF_FLAGS \
+	{ XBF_READ,		"READ" }, \
+	{ XBF_WRITE,		"WRITE" }, \
+	{ XBF_READ_AHEAD,	"READ_AHEAD" }, \
+	{ XBF_MAPPED,		"MAPPED" }, \
+	{ XBF_ASYNC,		"ASYNC" }, \
+	{ XBF_DONE,		"DONE" }, \
+	{ XBF_DELWRI,		"DELWRI" }, \
+	{ XBF_STALE,		"STALE" }, \
+	{ XBF_SYNCIO,		"SYNCIO" }, \
+	{ XBF_FUA,		"FUA" }, \
+	{ XBF_FLUSH,		"FLUSH" }, \
+	{ XBF_LOCK,		"LOCK" },  	/* should never be set */\
+	{ XBF_TRYLOCK,		"TRYLOCK" }, 	/* ditto */\
+	{ XBF_DONT_BLOCK,	"DONT_BLOCK" },	/* ditto */\
+	{ _XBF_PAGES,		"PAGES" }, \
+	{ _XBF_KMEM,		"KMEM" }, \
+	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
+
+typedef enum {
+	XBT_FORCE_SLEEP = 0,
+	XBT_FORCE_FLUSH = 1,
+} xfs_buftarg_flags_t;
+
+typedef struct xfs_buftarg {
+	dev_t			bt_dev;
+	struct block_device	*bt_bdev;
+	struct backing_dev_info	*bt_bdi;
+	struct xfs_mount	*bt_mount;
+	unsigned int		bt_bsize;
+	unsigned int		bt_sshift;
+	size_t			bt_smask;
+
+	/* per device delwri queue */
+	struct task_struct	*bt_task;
+	struct list_head	bt_delwrite_queue;
+	spinlock_t		bt_delwrite_lock;
+	unsigned long		bt_flags;
+
+	/* LRU control structures */
+	struct shrinker		bt_shrinker;
+	struct list_head	bt_lru;
+	spinlock_t		bt_lru_lock;
+	unsigned int		bt_lru_nr;
+} xfs_buftarg_t;
+
+struct xfs_buf;
+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
+
+#define XB_PAGES	2
+
+typedef struct xfs_buf {
+	/*
+	 * first cacheline holds all the fields needed for an uncontended cache
+	 * hit to be fully processed. The semaphore straddles the cacheline
+	 * boundary, but the counter and lock sits on the first cacheline,
+	 * which is the only bit that is touched if we hit the semaphore
+	 * fast-path on locking.
+	 */
+	struct rb_node		b_rbnode;	/* rbtree node */
+	xfs_off_t		b_file_offset;	/* offset in file */
+	size_t			b_buffer_length;/* size of buffer in bytes */
+	atomic_t		b_hold;		/* reference count */
+	atomic_t		b_lru_ref;	/* lru reclaim ref count */
+	xfs_buf_flags_t		b_flags;	/* status flags */
+	struct semaphore	b_sema;		/* semaphore for lockables */
+
+	struct list_head	b_lru;		/* lru list */
+	wait_queue_head_t	b_waiters;	/* unpin waiters */
+	struct list_head	b_list;
+	struct xfs_perag	*b_pag;		/* contains rbtree root */
+	xfs_buftarg_t		*b_target;	/* buffer target (device) */
+	xfs_daddr_t		b_bn;		/* block number for I/O */
+	size_t			b_count_desired;/* desired transfer size */
+	void			*b_addr;	/* virtual address of buffer */
+	struct work_struct	b_iodone_work;
+	xfs_buf_iodone_t	b_iodone;	/* I/O completion function */
+	struct completion	b_iowait;	/* queue for I/O waiters */
+	void			*b_fspriv;
+	struct xfs_trans	*b_transp;
+	struct page		**b_pages;	/* array of page pointers */
+	struct page		*b_page_array[XB_PAGES]; /* inline pages */
+	unsigned long		b_queuetime;	/* time buffer was queued */
+	atomic_t		b_pin_count;	/* pin count */
+	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
+	unsigned int		b_page_count;	/* size of page array */
+	unsigned int		b_offset;	/* page offset in first page */
+	unsigned short		b_error;	/* error code on I/O */
+#ifdef XFS_BUF_LOCK_TRACKING
+	int			b_last_holder;
+#endif
+} xfs_buf_t;
+
+
+/* Finding and Reading Buffers */
+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
+				xfs_buf_flags_t, xfs_buf_t *);
+#define xfs_incore(buftarg,blkno,len,lockit) \
+	_xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
+
+extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
+				xfs_buf_flags_t);
+extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
+				xfs_buf_flags_t);
+
+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
+extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
+extern void xfs_buf_hold(xfs_buf_t *);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+				struct xfs_buftarg *target,
+				xfs_daddr_t daddr, size_t length, int flags);
+
+/* Releasing Buffers */
+extern void xfs_buf_free(xfs_buf_t *);
+extern void xfs_buf_rele(xfs_buf_t *);
+
+/* Locking and Unlocking Buffers */
+extern int xfs_buf_trylock(xfs_buf_t *);
+extern void xfs_buf_lock(xfs_buf_t *);
+extern void xfs_buf_unlock(xfs_buf_t *);
+#define xfs_buf_islocked(bp) \
+	((bp)->b_sema.count <= 0)
+
+/* Buffer Read and Write Routines */
+extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
+extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
+
+extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
+
+extern void xfs_buf_ioend(xfs_buf_t *,	int);
+extern void xfs_buf_ioerror(xfs_buf_t *, int);
+extern int xfs_buf_iorequest(xfs_buf_t *);
+extern int xfs_buf_iowait(xfs_buf_t *);
+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
+				xfs_buf_rw_t);
+#define xfs_buf_zero(bp, off, len) \
+	    xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+
+static inline int xfs_buf_geterror(xfs_buf_t *bp)
+{
+	return bp ? bp->b_error : ENOMEM;
+}
+
+/* Buffer Utility Routines */
+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
+
+/* Delayed Write Buffer Routines */
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
+extern void xfs_buf_delwri_promote(xfs_buf_t *);
+
+/* Buffer Daemon Setup Routines */
+extern int xfs_buf_init(void);
+extern void xfs_buf_terminate(void);
+
+static inline const char *
+xfs_buf_target_name(struct xfs_buftarg *target)
+{
+	static char __b[BDEVNAME_SIZE];
+
+	return bdevname(target->bt_bdev, __b);
+}
+
+
+#define XFS_BUF_ZEROFLAGS(bp) \
+	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
+			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+
+void xfs_buf_stale(struct xfs_buf *bp);
+#define XFS_BUF_STALE(bp)	xfs_buf_stale(bp);
+#define XFS_BUF_UNSTALE(bp)	((bp)->b_flags &= ~XBF_STALE)
+#define XFS_BUF_ISSTALE(bp)	((bp)->b_flags & XBF_STALE)
+#define XFS_BUF_SUPER_STALE(bp)	do {				\
+					XFS_BUF_STALE(bp);	\
+					xfs_buf_delwri_dequeue(bp);	\
+					XFS_BUF_DONE(bp);	\
+				} while (0)
+
+#define XFS_BUF_DELAYWRITE(bp)		((bp)->b_flags |= XBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(bp)	xfs_buf_delwri_dequeue(bp)
+#define XFS_BUF_ISDELAYWRITE(bp)	((bp)->b_flags & XBF_DELWRI)
+
+#define XFS_BUF_DONE(bp)	((bp)->b_flags |= XBF_DONE)
+#define XFS_BUF_UNDONE(bp)	((bp)->b_flags &= ~XBF_DONE)
+#define XFS_BUF_ISDONE(bp)	((bp)->b_flags & XBF_DONE)
+
+#define XFS_BUF_ASYNC(bp)	((bp)->b_flags |= XBF_ASYNC)
+#define XFS_BUF_UNASYNC(bp)	((bp)->b_flags &= ~XBF_ASYNC)
+#define XFS_BUF_ISASYNC(bp)	((bp)->b_flags & XBF_ASYNC)
+
+#define XFS_BUF_READ(bp)	((bp)->b_flags |= XBF_READ)
+#define XFS_BUF_UNREAD(bp)	((bp)->b_flags &= ~XBF_READ)
+#define XFS_BUF_ISREAD(bp)	((bp)->b_flags & XBF_READ)
+
+#define XFS_BUF_WRITE(bp)	((bp)->b_flags |= XBF_WRITE)
+#define XFS_BUF_UNWRITE(bp)	((bp)->b_flags &= ~XBF_WRITE)
+#define XFS_BUF_ISWRITE(bp)	((bp)->b_flags & XBF_WRITE)
+
+#define XFS_BUF_ADDR(bp)		((bp)->b_bn)
+#define XFS_BUF_SET_ADDR(bp, bno)	((bp)->b_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_OFFSET(bp)		((bp)->b_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)	((bp)->b_file_offset = (off))
+#define XFS_BUF_COUNT(bp)		((bp)->b_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)	((bp)->b_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)		((bp)->b_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)	((bp)->b_buffer_length = (cnt))
+
+static inline void
+xfs_buf_set_ref(
+	struct xfs_buf	*bp,
+	int		lru_ref)
+{
+	atomic_set(&bp->b_lru_ref, lru_ref);
+}
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)	xfs_buf_set_ref(bp, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)		do { } while (0)
+
+static inline int xfs_buf_ispinned(struct xfs_buf *bp)
+{
+	return atomic_read(&bp->b_pin_count);
+}
+
+#define XFS_BUF_FINISH_IOWAIT(bp)	complete(&bp->b_iowait);
+
+static inline void xfs_buf_relse(xfs_buf_t *bp)
+{
+	xfs_buf_unlock(bp);
+	xfs_buf_rele(bp);
+}
+
+/*
+ *	Handling of buftargs.
+ */
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+			struct block_device *, int, const char *);
+extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
+extern void xfs_wait_buftarg(xfs_buftarg_t *);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
+
+#ifdef CONFIG_KDB_MODULES
+extern struct list_head *xfs_get_buftarg_list(void);
+#endif
+
+#define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
+#define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
+
+#define xfs_binval(buftarg)		xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg)		xfs_flush_buftarg(buftarg, 1)
+
+#endif	/* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
new file mode 100644
index 0000000..244e797
--- /dev/null
+++ b/fs/xfs/xfs_discard.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_discard.h"
+#include "xfs_trace.h"
+
+STATIC int
+xfs_trim_extents(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno,
+	xfs_fsblock_t		start,
+	xfs_fsblock_t		len,
+	xfs_fsblock_t		minlen,
+	__uint64_t		*blocks_trimmed)
+{
+	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
+	struct xfs_btree_cur	*cur;
+	struct xfs_buf		*agbp;
+	struct xfs_perag	*pag;
+	int			error;
+	int			i;
+
+	pag = xfs_perag_get(mp, agno);
+
+	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	if (error || !agbp)
+		goto out_put_perag;
+
+	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
+
+	/*
+	 * Force out the log.  This means any transactions that might have freed
+	 * space before we took the AGF buffer lock are now on disk, and the
+	 * volatile disk cache is flushed.
+	 */
+	xfs_log_force(mp, XFS_LOG_SYNC);
+
+	/*
+	 * Look up the longest btree in the AGF and start with it.
+	 */
+	error = xfs_alloc_lookup_le(cur, 0,
+				    XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
+	if (error)
+		goto out_del_cursor;
+
+	/*
+	 * Loop until we are done with all extents that are large
+	 * enough to be worth discarding.
+	 */
+	while (i) {
+		xfs_agblock_t fbno;
+		xfs_extlen_t flen;
+
+		error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
+		if (error)
+			goto out_del_cursor;
+		XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
+		ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
+
+		/*
+		 * Too small?  Give up.
+		 */
+		if (flen < minlen) {
+			trace_xfs_discard_toosmall(mp, agno, fbno, flen);
+			goto out_del_cursor;
+		}
+
+		/*
+		 * If the extent is entirely outside of the range we are
+		 * supposed to discard skip it.  Do not bother to trim
+		 * down partially overlapping ranges for now.
+		 */
+		if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
+		    XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
+			trace_xfs_discard_exclude(mp, agno, fbno, flen);
+			goto next_extent;
+		}
+
+		/*
+		 * If any blocks in the range are still busy, skip the
+		 * discard and try again the next time.
+		 */
+		if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
+			trace_xfs_discard_busy(mp, agno, fbno, flen);
+			goto next_extent;
+		}
+
+		trace_xfs_discard_extent(mp, agno, fbno, flen);
+		error = -blkdev_issue_discard(bdev,
+				XFS_AGB_TO_DADDR(mp, agno, fbno),
+				XFS_FSB_TO_BB(mp, flen),
+				GFP_NOFS, 0);
+		if (error)
+			goto out_del_cursor;
+		*blocks_trimmed += flen;
+
+next_extent:
+		error = xfs_btree_decrement(cur, 0, &i);
+		if (error)
+			goto out_del_cursor;
+	}
+
+out_del_cursor:
+	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	xfs_buf_relse(agbp);
+out_put_perag:
+	xfs_perag_put(pag);
+	return error;
+}
+
+int
+xfs_ioc_trim(
+	struct xfs_mount		*mp,
+	struct fstrim_range __user	*urange)
+{
+	struct request_queue	*q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
+	unsigned int		granularity = q->limits.discard_granularity;
+	struct fstrim_range	range;
+	xfs_fsblock_t		start, len, minlen;
+	xfs_agnumber_t		start_agno, end_agno, agno;
+	__uint64_t		blocks_trimmed = 0;
+	int			error, last_error = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (!blk_queue_discard(q))
+		return -XFS_ERROR(EOPNOTSUPP);
+	if (copy_from_user(&range, urange, sizeof(range)))
+		return -XFS_ERROR(EFAULT);
+
+	/*
+	 * Truncating down the len isn't actually quite correct, but using
+	 * XFS_B_TO_FSB would mean we trivially get overflows for values
+	 * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
+	 * used by the fstrim application.  In the end it really doesn't
+	 * matter as trimming blocks is an advisory interface.
+	 */
+	start = XFS_B_TO_FSBT(mp, range.start);
+	len = XFS_B_TO_FSBT(mp, range.len);
+	minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
+
+	start_agno = XFS_FSB_TO_AGNO(mp, start);
+	if (start_agno >= mp->m_sb.sb_agcount)
+		return -XFS_ERROR(EINVAL);
+
+	end_agno = XFS_FSB_TO_AGNO(mp, start + len);
+	if (end_agno >= mp->m_sb.sb_agcount)
+		end_agno = mp->m_sb.sb_agcount - 1;
+
+	for (agno = start_agno; agno <= end_agno; agno++) {
+		error = -xfs_trim_extents(mp, agno, start, len, minlen,
+					  &blocks_trimmed);
+		if (error)
+			last_error = error;
+	}
+
+	if (last_error)
+		return last_error;
+
+	range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
+	if (copy_to_user(urange, &range, sizeof(range)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+int
+xfs_discard_extents(
+	struct xfs_mount	*mp,
+	struct list_head	*list)
+{
+	struct xfs_busy_extent	*busyp;
+	int			error = 0;
+
+	list_for_each_entry(busyp, list, list) {
+		trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
+					 busyp->length);
+
+		error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+				XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
+				XFS_FSB_TO_BB(mp, busyp->length),
+				GFP_NOFS, 0);
+		if (error && error != EOPNOTSUPP) {
+			xfs_info(mp,
+	 "discard failed for extent [0x%llu,%u], error %d",
+				 (unsigned long long)busyp->bno,
+				 busyp->length,
+				 error);
+			return error;
+		}
+	}
+
+	return 0;
+}
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h
new file mode 100644
index 0000000..344879a
--- /dev/null
+++ b/fs/xfs/xfs_discard.h
@@ -0,0 +1,10 @@
+#ifndef XFS_DISCARD_H
+#define XFS_DISCARD_H 1
+
+struct fstrim_range;
+struct list_head;
+
+extern int	xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
+extern int	xfs_discard_extents(struct xfs_mount *, struct list_head *);
+
+#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
new file mode 100644
index 0000000..db62959
--- /dev/null
+++ b/fs/xfs/xfs_dquot.c
@@ -0,0 +1,1454 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+
+/*
+   LOCK ORDER
+
+   inode lock		    (ilock)
+   dquot hash-chain lock    (hashlock)
+   xqm dquot freelist lock  (freelistlock
+   mount's dquot list lock  (mplistlock)
+   user dquot lock - lock ordering among dquots is based on the uid or gid
+   group dquot lock - similar to udquots. Between the two dquots, the udquot
+		      has to be locked first.
+   pin lock - the dquot lock must be held to take this lock.
+   flush lock - ditto.
+*/
+
+#ifdef DEBUG
+xfs_buftarg_t *xfs_dqerror_target;
+int xfs_do_dqerror;
+int xfs_dqreq_num;
+int xfs_dqerror_mod = 33;
+#endif
+
+static struct lock_class_key xfs_dquot_other_class;
+
+/*
+ * Allocate and initialize a dquot. We don't always allocate fresh memory;
+ * we try to reclaim a free dquot if the number of incore dquots are above
+ * a threshold.
+ * The only field inside the core that gets initialized at this point
+ * is the d_id field. The idea is to fill in the entire q_core
+ * when we read in the on disk dquot.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqinit(
+	xfs_mount_t  *mp,
+	xfs_dqid_t   id,
+	uint	     type)
+{
+	xfs_dquot_t	*dqp;
+	boolean_t	brandnewdquot;
+
+	brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
+	dqp->dq_flags = type;
+	dqp->q_core.d_id = cpu_to_be32(id);
+	dqp->q_mount = mp;
+
+	/*
+	 * No need to re-initialize these if this is a reclaimed dquot.
+	 */
+	if (brandnewdquot) {
+		INIT_LIST_HEAD(&dqp->q_freelist);
+		mutex_init(&dqp->q_qlock);
+		init_waitqueue_head(&dqp->q_pinwait);
+
+		/*
+		 * Because we want to use a counting completion, complete
+		 * the flush completion once to allow a single access to
+		 * the flush completion without blocking.
+		 */
+		init_completion(&dqp->q_flush);
+		complete(&dqp->q_flush);
+
+		trace_xfs_dqinit(dqp);
+	} else {
+		/*
+		 * Only the q_core portion was zeroed in dqreclaim_one().
+		 * So, we need to reset others.
+		 */
+		dqp->q_nrefs = 0;
+		dqp->q_blkno = 0;
+		INIT_LIST_HEAD(&dqp->q_mplist);
+		INIT_LIST_HEAD(&dqp->q_hashlist);
+		dqp->q_bufoffset = 0;
+		dqp->q_fileoffset = 0;
+		dqp->q_transp = NULL;
+		dqp->q_gdquot = NULL;
+		dqp->q_res_bcount = 0;
+		dqp->q_res_icount = 0;
+		dqp->q_res_rtbcount = 0;
+		atomic_set(&dqp->q_pincount, 0);
+		dqp->q_hash = NULL;
+		ASSERT(list_empty(&dqp->q_freelist));
+
+		trace_xfs_dqreuse(dqp);
+	}
+
+	/*
+	 * In either case we need to make sure group quotas have a different
+	 * lock class than user quotas, to make sure lockdep knows we can
+	 * locks of one of each at the same time.
+	 */
+	if (!(type & XFS_DQ_USER))
+		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+
+	/*
+	 * log item gets initialized later
+	 */
+	return (dqp);
+}
+
+/*
+ * This is called to free all the memory associated with a dquot
+ */
+void
+xfs_qm_dqdestroy(
+	xfs_dquot_t	*dqp)
+{
+	ASSERT(list_empty(&dqp->q_freelist));
+
+	mutex_destroy(&dqp->q_qlock);
+	kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+
+	atomic_dec(&xfs_Gqm->qm_totaldquots);
+}
+
+/*
+ * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
+ */
+STATIC void
+xfs_qm_dqinit_core(
+	xfs_dqid_t	id,
+	uint		type,
+	xfs_dqblk_t	*d)
+{
+	/*
+	 * Caller has zero'd the entire dquot 'chunk' already.
+	 */
+	d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+	d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+	d->dd_diskdq.d_id = cpu_to_be32(id);
+	d->dd_diskdq.d_flags = type;
+}
+
+/*
+ * If default limits are in force, push them into the dquot now.
+ * We overwrite the dquot limits only if they are zero and this
+ * is not the root dquot.
+ */
+void
+xfs_qm_adjust_dqlimits(
+	xfs_mount_t		*mp,
+	xfs_disk_dquot_t	*d)
+{
+	xfs_quotainfo_t		*q = mp->m_quotainfo;
+
+	ASSERT(d->d_id);
+
+	if (q->qi_bsoftlimit && !d->d_blk_softlimit)
+		d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
+	if (q->qi_bhardlimit && !d->d_blk_hardlimit)
+		d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
+	if (q->qi_isoftlimit && !d->d_ino_softlimit)
+		d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
+	if (q->qi_ihardlimit && !d->d_ino_hardlimit)
+		d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
+	if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
+		d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
+	if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
+		d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
+}
+
+/*
+ * Check the limits and timers of a dquot and start or reset timers
+ * if necessary.
+ * This gets called even when quota enforcement is OFF, which makes our
+ * life a little less complicated. (We just don't reject any quota
+ * reservations in that case, when enforcement is off).
+ * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
+ * enforcement's off.
+ * In contrast, warnings are a little different in that they don't
+ * 'automatically' get started when limits get exceeded.  They do
+ * get reset to zero, however, when we find the count to be under
+ * the soft limit (they are only ever set non-zero via userspace).
+ */
+void
+xfs_qm_adjust_dqtimers(
+	xfs_mount_t		*mp,
+	xfs_disk_dquot_t	*d)
+{
+	ASSERT(d->d_id);
+
+#ifdef DEBUG
+	if (d->d_blk_hardlimit)
+		ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
+		       be64_to_cpu(d->d_blk_hardlimit));
+	if (d->d_ino_hardlimit)
+		ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
+		       be64_to_cpu(d->d_ino_hardlimit));
+	if (d->d_rtb_hardlimit)
+		ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
+		       be64_to_cpu(d->d_rtb_hardlimit));
+#endif
+
+	if (!d->d_btimer) {
+		if ((d->d_blk_softlimit &&
+		     (be64_to_cpu(d->d_bcount) >=
+		      be64_to_cpu(d->d_blk_softlimit))) ||
+		    (d->d_blk_hardlimit &&
+		     (be64_to_cpu(d->d_bcount) >=
+		      be64_to_cpu(d->d_blk_hardlimit)))) {
+			d->d_btimer = cpu_to_be32(get_seconds() +
+					mp->m_quotainfo->qi_btimelimit);
+		} else {
+			d->d_bwarns = 0;
+		}
+	} else {
+		if ((!d->d_blk_softlimit ||
+		     (be64_to_cpu(d->d_bcount) <
+		      be64_to_cpu(d->d_blk_softlimit))) &&
+		    (!d->d_blk_hardlimit ||
+		    (be64_to_cpu(d->d_bcount) <
+		     be64_to_cpu(d->d_blk_hardlimit)))) {
+			d->d_btimer = 0;
+		}
+	}
+
+	if (!d->d_itimer) {
+		if ((d->d_ino_softlimit &&
+		     (be64_to_cpu(d->d_icount) >=
+		      be64_to_cpu(d->d_ino_softlimit))) ||
+		    (d->d_ino_hardlimit &&
+		     (be64_to_cpu(d->d_icount) >=
+		      be64_to_cpu(d->d_ino_hardlimit)))) {
+			d->d_itimer = cpu_to_be32(get_seconds() +
+					mp->m_quotainfo->qi_itimelimit);
+		} else {
+			d->d_iwarns = 0;
+		}
+	} else {
+		if ((!d->d_ino_softlimit ||
+		     (be64_to_cpu(d->d_icount) <
+		      be64_to_cpu(d->d_ino_softlimit)))  &&
+		    (!d->d_ino_hardlimit ||
+		     (be64_to_cpu(d->d_icount) <
+		      be64_to_cpu(d->d_ino_hardlimit)))) {
+			d->d_itimer = 0;
+		}
+	}
+
+	if (!d->d_rtbtimer) {
+		if ((d->d_rtb_softlimit &&
+		     (be64_to_cpu(d->d_rtbcount) >=
+		      be64_to_cpu(d->d_rtb_softlimit))) ||
+		    (d->d_rtb_hardlimit &&
+		     (be64_to_cpu(d->d_rtbcount) >=
+		      be64_to_cpu(d->d_rtb_hardlimit)))) {
+			d->d_rtbtimer = cpu_to_be32(get_seconds() +
+					mp->m_quotainfo->qi_rtbtimelimit);
+		} else {
+			d->d_rtbwarns = 0;
+		}
+	} else {
+		if ((!d->d_rtb_softlimit ||
+		     (be64_to_cpu(d->d_rtbcount) <
+		      be64_to_cpu(d->d_rtb_softlimit))) &&
+		    (!d->d_rtb_hardlimit ||
+		     (be64_to_cpu(d->d_rtbcount) <
+		      be64_to_cpu(d->d_rtb_hardlimit)))) {
+			d->d_rtbtimer = 0;
+		}
+	}
+}
+
+/*
+ * initialize a buffer full of dquots and log the whole thing
+ */
+STATIC void
+xfs_qm_init_dquot_blk(
+	xfs_trans_t	*tp,
+	xfs_mount_t	*mp,
+	xfs_dqid_t	id,
+	uint		type,
+	xfs_buf_t	*bp)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	xfs_dqblk_t	*d;
+	int		curid, i;
+
+	ASSERT(tp);
+	ASSERT(xfs_buf_islocked(bp));
+
+	d = bp->b_addr;
+
+	/*
+	 * ID of the first dquot in the block - id's are zero based.
+	 */
+	curid = id - (id % q->qi_dqperchunk);
+	ASSERT(curid >= 0);
+	memset(d, 0, BBTOB(q->qi_dqchunklen));
+	for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
+		xfs_qm_dqinit_core(curid, type, d);
+	xfs_trans_dquot_buf(tp, bp,
+			    (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
+			    ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
+			     XFS_BLF_GDQUOT_BUF)));
+	xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
+}
+
+
+
+/*
+ * Allocate a block and fill it with dquots.
+ * This is called when the bmapi finds a hole.
+ */
+STATIC int
+xfs_qm_dqalloc(
+	xfs_trans_t	**tpp,
+	xfs_mount_t	*mp,
+	xfs_dquot_t	*dqp,
+	xfs_inode_t	*quotip,
+	xfs_fileoff_t	offset_fsb,
+	xfs_buf_t	**O_bpp)
+{
+	xfs_fsblock_t	firstblock;
+	xfs_bmap_free_t flist;
+	xfs_bmbt_irec_t map;
+	int		nmaps, error, committed;
+	xfs_buf_t	*bp;
+	xfs_trans_t	*tp = *tpp;
+
+	ASSERT(tp != NULL);
+
+	trace_xfs_dqalloc(dqp);
+
+	/*
+	 * Initialize the bmap freelist prior to calling bmapi code.
+	 */
+	xfs_bmap_init(&flist, &firstblock);
+	xfs_ilock(quotip, XFS_ILOCK_EXCL);
+	/*
+	 * Return if this type of quotas is turned off while we didn't
+	 * have an inode lock
+	 */
+	if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+		xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+		return (ESRCH);
+	}
+
+	xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
+	nmaps = 1;
+	if ((error = xfs_bmapi(tp, quotip,
+			      offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
+			      XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
+			      &firstblock,
+			      XFS_QM_DQALLOC_SPACE_RES(mp),
+			      &map, &nmaps, &flist))) {
+		goto error0;
+	}
+	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
+	ASSERT(nmaps == 1);
+	ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+	       (map.br_startblock != HOLESTARTBLOCK));
+
+	/*
+	 * Keep track of the blkno to save a lookup later
+	 */
+	dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+	/* now we can just get the buffer (there's nothing to read yet) */
+	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+			       dqp->q_blkno,
+			       mp->m_quotainfo->qi_dqchunklen,
+			       0);
+	if (!bp || (error = xfs_buf_geterror(bp)))
+		goto error1;
+	/*
+	 * Make a chunk of dquots out of this buffer and log
+	 * the entire thing.
+	 */
+	xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
+			      dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
+
+	/*
+	 * xfs_bmap_finish() may commit the current transaction and
+	 * start a second transaction if the freelist is not empty.
+	 *
+	 * Since we still want to modify this buffer, we need to
+	 * ensure that the buffer is not released on commit of
+	 * the first transaction and ensure the buffer is added to the
+	 * second transaction.
+	 *
+	 * If there is only one transaction then don't stop the buffer
+	 * from being released when it commits later on.
+	 */
+
+	xfs_trans_bhold(tp, bp);
+
+	if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
+		goto error1;
+	}
+
+	if (committed) {
+		tp = *tpp;
+		xfs_trans_bjoin(tp, bp);
+	} else {
+		xfs_trans_bhold_release(tp, bp);
+	}
+
+	*O_bpp = bp;
+	return 0;
+
+      error1:
+	xfs_bmap_cancel(&flist);
+      error0:
+	xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+
+	return (error);
+}
+
+/*
+ * Maps a dquot to the buffer containing its on-disk version.
+ * This returns a ptr to the buffer containing the on-disk dquot
+ * in the bpp param, and a ptr to the on-disk dquot within that buffer
+ */
+STATIC int
+xfs_qm_dqtobp(
+	xfs_trans_t		**tpp,
+	xfs_dquot_t		*dqp,
+	xfs_disk_dquot_t	**O_ddpp,
+	xfs_buf_t		**O_bpp,
+	uint			flags)
+{
+	xfs_bmbt_irec_t map;
+	int		nmaps = 1, error;
+	xfs_buf_t	*bp;
+	xfs_inode_t	*quotip = XFS_DQ_TO_QIP(dqp);
+	xfs_mount_t	*mp = dqp->q_mount;
+	xfs_disk_dquot_t *ddq;
+	xfs_dqid_t	id = be32_to_cpu(dqp->q_core.d_id);
+	xfs_trans_t	*tp = (tpp ? *tpp : NULL);
+
+	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
+
+	xfs_ilock(quotip, XFS_ILOCK_SHARED);
+	if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+		/*
+		 * Return if this type of quotas is turned off while we
+		 * didn't have the quota inode lock.
+		 */
+		xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+		return ESRCH;
+	}
+
+	/*
+	 * Find the block map; no allocations yet
+	 */
+	error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+			  XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+			  NULL, 0, &map, &nmaps, NULL);
+
+	xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+	if (error)
+		return error;
+
+	ASSERT(nmaps == 1);
+	ASSERT(map.br_blockcount == 1);
+
+	/*
+	 * Offset of dquot in the (fixed sized) dquot chunk.
+	 */
+	dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+		sizeof(xfs_dqblk_t);
+
+	ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+	if (map.br_startblock == HOLESTARTBLOCK) {
+		/*
+		 * We don't allocate unless we're asked to
+		 */
+		if (!(flags & XFS_QMOPT_DQALLOC))
+			return ENOENT;
+
+		ASSERT(tp);
+		error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
+					dqp->q_fileoffset, &bp);
+		if (error)
+			return error;
+		tp = *tpp;
+	} else {
+		trace_xfs_dqtobp_read(dqp);
+
+		/*
+		 * store the blkno etc so that we don't have to do the
+		 * mapping all the time
+		 */
+		dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+		error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+					   dqp->q_blkno,
+					   mp->m_quotainfo->qi_dqchunklen,
+					   0, &bp);
+		if (error || !bp)
+			return XFS_ERROR(error);
+	}
+
+	ASSERT(xfs_buf_islocked(bp));
+
+	/*
+	 * calculate the location of the dquot inside the buffer.
+	 */
+	ddq = bp->b_addr + dqp->q_bufoffset;
+
+	/*
+	 * A simple sanity check in case we got a corrupted dquot...
+	 */
+	error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
+			   flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
+			   "dqtobp");
+	if (error) {
+		if (!(flags & XFS_QMOPT_DQREPAIR)) {
+			xfs_trans_brelse(tp, bp);
+			return XFS_ERROR(EIO);
+		}
+	}
+
+	*O_bpp = bp;
+	*O_ddpp = ddq;
+
+	return (0);
+}
+
+
+/*
+ * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
+ * and release the buffer immediately.
+ *
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqread(
+	xfs_trans_t	**tpp,
+	xfs_dqid_t	id,
+	xfs_dquot_t	*dqp,	/* dquot to get filled in */
+	uint		flags)
+{
+	xfs_disk_dquot_t *ddqp;
+	xfs_buf_t	 *bp;
+	int		 error;
+	xfs_trans_t	 *tp;
+
+	ASSERT(tpp);
+
+	trace_xfs_dqread(dqp);
+
+	/*
+	 * get a pointer to the on-disk dquot and the buffer containing it
+	 * dqp already knows its own type (GROUP/USER).
+	 */
+	if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
+		return (error);
+	}
+	tp = *tpp;
+
+	/* copy everything from disk dquot to the incore dquot */
+	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
+	ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+	xfs_qm_dquot_logitem_init(dqp);
+
+	/*
+	 * Reservation counters are defined as reservation plus current usage
+	 * to avoid having to add every time.
+	 */
+	dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
+	dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
+	dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
+
+	/* Mark the buf so that this will stay incore a little longer */
+	XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
+
+	/*
+	 * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
+	 * So we need to release with xfs_trans_brelse().
+	 * The strategy here is identical to that of inodes; we lock
+	 * the dquot in xfs_qm_dqget() before making it accessible to
+	 * others. This is because dquots, like inodes, need a good level of
+	 * concurrency, and we don't want to take locks on the entire buffers
+	 * for dquot accesses.
+	 * Note also that the dquot buffer may even be dirty at this point, if
+	 * this particular dquot was repaired. We still aren't afraid to
+	 * brelse it because we have the changes incore.
+	 */
+	ASSERT(xfs_buf_islocked(bp));
+	xfs_trans_brelse(tp, bp);
+
+	return (error);
+}
+
+
+/*
+ * allocate an incore dquot from the kernel heap,
+ * and fill its core with quota information kept on disk.
+ * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
+ * if it wasn't already allocated.
+ */
+STATIC int
+xfs_qm_idtodq(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	id,	 /* gid or uid, depending on type */
+	uint		type,	 /* UDQUOT or GDQUOT */
+	uint		flags,	 /* DQALLOC, DQREPAIR */
+	xfs_dquot_t	**O_dqpp)/* OUT : incore dquot, not locked */
+{
+	xfs_dquot_t	*dqp;
+	int		error;
+	xfs_trans_t	*tp;
+	int		cancelflags=0;
+
+	dqp = xfs_qm_dqinit(mp, id, type);
+	tp = NULL;
+	if (flags & XFS_QMOPT_DQALLOC) {
+		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+		error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
+				XFS_WRITE_LOG_RES(mp) +
+				BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
+				128,
+				0,
+				XFS_TRANS_PERM_LOG_RES,
+				XFS_WRITE_LOG_COUNT);
+		if (error) {
+			cancelflags = 0;
+			goto error0;
+		}
+		cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+	}
+
+	/*
+	 * Read it from disk; xfs_dqread() takes care of
+	 * all the necessary initialization of dquot's fields (locks, etc)
+	 */
+	if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
+		/*
+		 * This can happen if quotas got turned off (ESRCH),
+		 * or if the dquot didn't exist on disk and we ask to
+		 * allocate (ENOENT).
+		 */
+		trace_xfs_dqread_fail(dqp);
+		cancelflags |= XFS_TRANS_ABORT;
+		goto error0;
+	}
+	if (tp) {
+		if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
+			goto error1;
+	}
+
+	*O_dqpp = dqp;
+	return (0);
+
+ error0:
+	ASSERT(error);
+	if (tp)
+		xfs_trans_cancel(tp, cancelflags);
+ error1:
+	xfs_qm_dqdestroy(dqp);
+	*O_dqpp = NULL;
+	return (error);
+}
+
+/*
+ * Lookup a dquot in the incore dquot hashtable. We keep two separate
+ * hashtables for user and group dquots; and, these are global tables
+ * inside the XQM, not per-filesystem tables.
+ * The hash chain must be locked by caller, and it is left locked
+ * on return. Returning dquot is locked.
+ */
+STATIC int
+xfs_qm_dqlookup(
+	xfs_mount_t		*mp,
+	xfs_dqid_t		id,
+	xfs_dqhash_t		*qh,
+	xfs_dquot_t		**O_dqpp)
+{
+	xfs_dquot_t		*dqp;
+	uint			flist_locked;
+
+	ASSERT(mutex_is_locked(&qh->qh_lock));
+
+	flist_locked = B_FALSE;
+
+	/*
+	 * Traverse the hashchain looking for a match
+	 */
+	list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
+		/*
+		 * We already have the hashlock. We don't need the
+		 * dqlock to look at the id field of the dquot, since the
+		 * id can't be modified without the hashlock anyway.
+		 */
+		if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
+			trace_xfs_dqlookup_found(dqp);
+
+			/*
+			 * All in core dquots must be on the dqlist of mp
+			 */
+			ASSERT(!list_empty(&dqp->q_mplist));
+
+			xfs_dqlock(dqp);
+			if (dqp->q_nrefs == 0) {
+				ASSERT(!list_empty(&dqp->q_freelist));
+				if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+					trace_xfs_dqlookup_want(dqp);
+
+					/*
+					 * We may have raced with dqreclaim_one()
+					 * (and lost). So, flag that we don't
+					 * want the dquot to be reclaimed.
+					 */
+					dqp->dq_flags |= XFS_DQ_WANT;
+					xfs_dqunlock(dqp);
+					mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+					xfs_dqlock(dqp);
+					dqp->dq_flags &= ~(XFS_DQ_WANT);
+				}
+				flist_locked = B_TRUE;
+			}
+
+			/*
+			 * id couldn't have changed; we had the hashlock all
+			 * along
+			 */
+			ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+
+			if (flist_locked) {
+				if (dqp->q_nrefs != 0) {
+					mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+					flist_locked = B_FALSE;
+				} else {
+					/* take it off the freelist */
+					trace_xfs_dqlookup_freelist(dqp);
+					list_del_init(&dqp->q_freelist);
+					xfs_Gqm->qm_dqfrlist_cnt--;
+				}
+			}
+
+			XFS_DQHOLD(dqp);
+
+			if (flist_locked)
+				mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+			/*
+			 * move the dquot to the front of the hashchain
+			 */
+			ASSERT(mutex_is_locked(&qh->qh_lock));
+			list_move(&dqp->q_hashlist, &qh->qh_list);
+			trace_xfs_dqlookup_done(dqp);
+			*O_dqpp = dqp;
+			return 0;
+		}
+	}
+
+	*O_dqpp = NULL;
+	ASSERT(mutex_is_locked(&qh->qh_lock));
+	return (1);
+}
+
+/*
+ * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
+ * a locked dquot, doing an allocation (if requested) as needed.
+ * When both an inode and an id are given, the inode's id takes precedence.
+ * That is, if the id changes while we don't hold the ilock inside this
+ * function, the new dquot is returned, not necessarily the one requested
+ * in the id argument.
+ */
+int
+xfs_qm_dqget(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,	  /* locked inode (optional) */
+	xfs_dqid_t	id,	  /* uid/projid/gid depending on type */
+	uint		type,	  /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
+	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
+	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
+{
+	xfs_dquot_t	*dqp;
+	xfs_dqhash_t	*h;
+	uint		version;
+	int		error;
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+	if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
+	    (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
+	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
+		return (ESRCH);
+	}
+	h = XFS_DQ_HASH(mp, id, type);
+
+#ifdef DEBUG
+	if (xfs_do_dqerror) {
+		if ((xfs_dqerror_target == mp->m_ddev_targp) &&
+		    (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
+			xfs_debug(mp, "Returning error in dqget");
+			return (EIO);
+		}
+	}
+#endif
+
+ again:
+
+#ifdef DEBUG
+	ASSERT(type == XFS_DQ_USER ||
+	       type == XFS_DQ_PROJ ||
+	       type == XFS_DQ_GROUP);
+	if (ip) {
+		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+		if (type == XFS_DQ_USER)
+			ASSERT(ip->i_udquot == NULL);
+		else
+			ASSERT(ip->i_gdquot == NULL);
+	}
+#endif
+	mutex_lock(&h->qh_lock);
+
+	/*
+	 * Look in the cache (hashtable).
+	 * The chain is kept locked during lookup.
+	 */
+	if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+		XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
+		/*
+		 * The dquot was found, moved to the front of the chain,
+		 * taken off the freelist if it was on it, and locked
+		 * at this point. Just unlock the hashchain and return.
+		 */
+		ASSERT(*O_dqpp);
+		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
+		mutex_unlock(&h->qh_lock);
+		trace_xfs_dqget_hit(*O_dqpp);
+		return (0);	/* success */
+	}
+	XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+
+	/*
+	 * Dquot cache miss. We don't want to keep the inode lock across
+	 * a (potential) disk read. Also we don't want to deal with the lock
+	 * ordering between quotainode and this inode. OTOH, dropping the inode
+	 * lock here means dealing with a chown that can happen before
+	 * we re-acquire the lock.
+	 */
+	if (ip)
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	/*
+	 * Save the hashchain version stamp, and unlock the chain, so that
+	 * we don't keep the lock across a disk read
+	 */
+	version = h->qh_version;
+	mutex_unlock(&h->qh_lock);
+
+	/*
+	 * Allocate the dquot on the kernel heap, and read the ondisk
+	 * portion off the disk. Also, do all the necessary initialization
+	 * This can return ENOENT if dquot didn't exist on disk and we didn't
+	 * ask it to allocate; ESRCH if quotas got turned off suddenly.
+	 */
+	if ((error = xfs_qm_idtodq(mp, id, type,
+				  flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
+					   XFS_QMOPT_DOWARN),
+				  &dqp))) {
+		if (ip)
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
+		return (error);
+	}
+
+	/*
+	 * See if this is mount code calling to look at the overall quota limits
+	 * which are stored in the id == 0 user or group's dquot.
+	 * Since we may not have done a quotacheck by this point, just return
+	 * the dquot without attaching it to any hashtables, lists, etc, or even
+	 * taking a reference.
+	 * The caller must dqdestroy this once done.
+	 */
+	if (flags & XFS_QMOPT_DQSUSER) {
+		ASSERT(id == 0);
+		ASSERT(! ip);
+		goto dqret;
+	}
+
+	/*
+	 * Dquot lock comes after hashlock in the lock ordering
+	 */
+	if (ip) {
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+		/*
+		 * A dquot could be attached to this inode by now, since
+		 * we had dropped the ilock.
+		 */
+		if (type == XFS_DQ_USER) {
+			if (!XFS_IS_UQUOTA_ON(mp)) {
+				/* inode stays locked on return */
+				xfs_qm_dqdestroy(dqp);
+				return XFS_ERROR(ESRCH);
+			}
+			if (ip->i_udquot) {
+				xfs_qm_dqdestroy(dqp);
+				dqp = ip->i_udquot;
+				xfs_dqlock(dqp);
+				goto dqret;
+			}
+		} else {
+			if (!XFS_IS_OQUOTA_ON(mp)) {
+				/* inode stays locked on return */
+				xfs_qm_dqdestroy(dqp);
+				return XFS_ERROR(ESRCH);
+			}
+			if (ip->i_gdquot) {
+				xfs_qm_dqdestroy(dqp);
+				dqp = ip->i_gdquot;
+				xfs_dqlock(dqp);
+				goto dqret;
+			}
+		}
+	}
+
+	/*
+	 * Hashlock comes after ilock in lock order
+	 */
+	mutex_lock(&h->qh_lock);
+	if (version != h->qh_version) {
+		xfs_dquot_t *tmpdqp;
+		/*
+		 * Now, see if somebody else put the dquot in the
+		 * hashtable before us. This can happen because we didn't
+		 * keep the hashchain lock. We don't have to worry about
+		 * lock order between the two dquots here since dqp isn't
+		 * on any findable lists yet.
+		 */
+		if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+			/*
+			 * Duplicate found. Just throw away the new dquot
+			 * and start over.
+			 */
+			xfs_qm_dqput(tmpdqp);
+			mutex_unlock(&h->qh_lock);
+			xfs_qm_dqdestroy(dqp);
+			XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+			goto again;
+		}
+	}
+
+	/*
+	 * Put the dquot at the beginning of the hash-chain and mp's list
+	 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
+	 */
+	ASSERT(mutex_is_locked(&h->qh_lock));
+	dqp->q_hash = h;
+	list_add(&dqp->q_hashlist, &h->qh_list);
+	h->qh_version++;
+
+	/*
+	 * Attach this dquot to this filesystem's list of all dquots,
+	 * kept inside the mount structure in m_quotainfo field
+	 */
+	mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
+
+	/*
+	 * We return a locked dquot to the caller, with a reference taken
+	 */
+	xfs_dqlock(dqp);
+	dqp->q_nrefs = 1;
+
+	list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
+	mp->m_quotainfo->qi_dquots++;
+	mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+	mutex_unlock(&h->qh_lock);
+ dqret:
+	ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	trace_xfs_dqget_miss(dqp);
+	*O_dqpp = dqp;
+	return (0);
+}
+
+
+/*
+ * Release a reference to the dquot (decrement ref-count)
+ * and unlock it. If there is a group quota attached to this
+ * dquot, carefully release that too without tripping over
+ * deadlocks'n'stuff.
+ */
+void
+xfs_qm_dqput(
+	xfs_dquot_t	*dqp)
+{
+	xfs_dquot_t	*gdqp;
+
+	ASSERT(dqp->q_nrefs > 0);
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+	trace_xfs_dqput(dqp);
+
+	if (dqp->q_nrefs != 1) {
+		dqp->q_nrefs--;
+		xfs_dqunlock(dqp);
+		return;
+	}
+
+	/*
+	 * drop the dqlock and acquire the freelist and dqlock
+	 * in the right order; but try to get it out-of-order first
+	 */
+	if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+		trace_xfs_dqput_wait(dqp);
+		xfs_dqunlock(dqp);
+		mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+		xfs_dqlock(dqp);
+	}
+
+	while (1) {
+		gdqp = NULL;
+
+		/* We can't depend on nrefs being == 1 here */
+		if (--dqp->q_nrefs == 0) {
+			trace_xfs_dqput_free(dqp);
+
+			list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+			xfs_Gqm->qm_dqfrlist_cnt++;
+
+			/*
+			 * If we just added a udquot to the freelist, then
+			 * we want to release the gdquot reference that
+			 * it (probably) has. Otherwise it'll keep the
+			 * gdquot from getting reclaimed.
+			 */
+			if ((gdqp = dqp->q_gdquot)) {
+				/*
+				 * Avoid a recursive dqput call
+				 */
+				xfs_dqlock(gdqp);
+				dqp->q_gdquot = NULL;
+			}
+		}
+		xfs_dqunlock(dqp);
+
+		/*
+		 * If we had a group quota inside the user quota as a hint,
+		 * release it now.
+		 */
+		if (! gdqp)
+			break;
+		dqp = gdqp;
+	}
+	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+}
+
+/*
+ * Release a dquot. Flush it if dirty, then dqput() it.
+ * dquot must not be locked.
+ */
+void
+xfs_qm_dqrele(
+	xfs_dquot_t	*dqp)
+{
+	if (!dqp)
+		return;
+
+	trace_xfs_dqrele(dqp);
+
+	xfs_dqlock(dqp);
+	/*
+	 * We don't care to flush it if the dquot is dirty here.
+	 * That will create stutters that we want to avoid.
+	 * Instead we do a delayed write when we try to reclaim
+	 * a dirty dquot. Also xfs_sync will take part of the burden...
+	 */
+	xfs_qm_dqput(dqp);
+}
+
+/*
+ * This is the dquot flushing I/O completion routine.  It is called
+ * from interrupt level when the buffer containing the dquot is
+ * flushed to disk.  It is responsible for removing the dquot logitem
+ * from the AIL if it has not been re-logged, and unlocking the dquot's
+ * flush lock. This behavior is very similar to that of inodes..
+ */
+STATIC void
+xfs_qm_dqflush_done(
+	struct xfs_buf		*bp,
+	struct xfs_log_item	*lip)
+{
+	xfs_dq_logitem_t	*qip = (struct xfs_dq_logitem *)lip;
+	xfs_dquot_t		*dqp = qip->qli_dquot;
+	struct xfs_ail		*ailp = lip->li_ailp;
+
+	/*
+	 * We only want to pull the item from the AIL if its
+	 * location in the log has not changed since we started the flush.
+	 * Thus, we only bother if the dquot's lsn has
+	 * not changed. First we check the lsn outside the lock
+	 * since it's cheaper, and then we recheck while
+	 * holding the lock before removing the dquot from the AIL.
+	 */
+	if ((lip->li_flags & XFS_LI_IN_AIL) &&
+	    lip->li_lsn == qip->qli_flush_lsn) {
+
+		/* xfs_trans_ail_delete() drops the AIL lock. */
+		spin_lock(&ailp->xa_lock);
+		if (lip->li_lsn == qip->qli_flush_lsn)
+			xfs_trans_ail_delete(ailp, lip);
+		else
+			spin_unlock(&ailp->xa_lock);
+	}
+
+	/*
+	 * Release the dq's flush lock since we're done with it.
+	 */
+	xfs_dqfunlock(dqp);
+}
+
+/*
+ * Write a modified dquot to disk.
+ * The dquot must be locked and the flush lock too taken by caller.
+ * The flush lock will not be unlocked until the dquot reaches the disk,
+ * but the dquot is free to be unlocked and modified by the caller
+ * in the interim. Dquot is still locked on return. This behavior is
+ * identical to that of inodes.
+ */
+int
+xfs_qm_dqflush(
+	xfs_dquot_t		*dqp,
+	uint			flags)
+{
+	struct xfs_mount	*mp = dqp->q_mount;
+	struct xfs_buf		*bp;
+	struct xfs_disk_dquot	*ddqp;
+	int			error;
+
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	ASSERT(!completion_done(&dqp->q_flush));
+
+	trace_xfs_dqflush(dqp);
+
+	/*
+	 * If not dirty, or it's pinned and we are not supposed to block, nada.
+	 */
+	if (!XFS_DQ_IS_DIRTY(dqp) ||
+	    (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
+		xfs_dqfunlock(dqp);
+		return 0;
+	}
+	xfs_qm_dqunpin_wait(dqp);
+
+	/*
+	 * This may have been unpinned because the filesystem is shutting
+	 * down forcibly. If that's the case we must not write this dquot
+	 * to disk, because the log record didn't make it to disk!
+	 */
+	if (XFS_FORCED_SHUTDOWN(mp)) {
+		dqp->dq_flags &= ~XFS_DQ_DIRTY;
+		xfs_dqfunlock(dqp);
+		return XFS_ERROR(EIO);
+	}
+
+	/*
+	 * Get the buffer containing the on-disk dquot
+	 */
+	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+				   mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+	if (error) {
+		ASSERT(error != ENOENT);
+		xfs_dqfunlock(dqp);
+		return error;
+	}
+
+	/*
+	 * Calculate the location of the dquot inside the buffer.
+	 */
+	ddqp = bp->b_addr + dqp->q_bufoffset;
+
+	/*
+	 * A simple sanity check in case we got a corrupted dquot..
+	 */
+	error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
+			   XFS_QMOPT_DOWARN, "dqflush (incore copy)");
+	if (error) {
+		xfs_buf_relse(bp);
+		xfs_dqfunlock(dqp);
+		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+		return XFS_ERROR(EIO);
+	}
+
+	/* This is the only portion of data that needs to persist */
+	memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
+
+	/*
+	 * Clear the dirty field and remember the flush lsn for later use.
+	 */
+	dqp->dq_flags &= ~XFS_DQ_DIRTY;
+
+	xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
+					&dqp->q_logitem.qli_item.li_lsn);
+
+	/*
+	 * Attach an iodone routine so that we can remove this dquot from the
+	 * AIL and release the flush lock once the dquot is synced to disk.
+	 */
+	xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
+				  &dqp->q_logitem.qli_item);
+
+	/*
+	 * If the buffer is pinned then push on the log so we won't
+	 * get stuck waiting in the write for too long.
+	 */
+	if (xfs_buf_ispinned(bp)) {
+		trace_xfs_dqflush_force(dqp);
+		xfs_log_force(mp, 0);
+	}
+
+	if (flags & SYNC_WAIT)
+		error = xfs_bwrite(mp, bp);
+	else
+		xfs_bdwrite(mp, bp);
+
+	trace_xfs_dqflush_done(dqp);
+
+	/*
+	 * dqp is still locked, but caller is free to unlock it now.
+	 */
+	return error;
+
+}
+
+int
+xfs_qm_dqlock_nowait(
+	xfs_dquot_t *dqp)
+{
+	return mutex_trylock(&dqp->q_qlock);
+}
+
+void
+xfs_dqlock(
+	xfs_dquot_t *dqp)
+{
+	mutex_lock(&dqp->q_qlock);
+}
+
+void
+xfs_dqunlock(
+	xfs_dquot_t *dqp)
+{
+	mutex_unlock(&(dqp->q_qlock));
+	if (dqp->q_logitem.qli_dquot == dqp) {
+		/* Once was dqp->q_mount, but might just have been cleared */
+		xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
+					(xfs_log_item_t*)&(dqp->q_logitem));
+	}
+}
+
+
+void
+xfs_dqunlock_nonotify(
+	xfs_dquot_t *dqp)
+{
+	mutex_unlock(&(dqp->q_qlock));
+}
+
+/*
+ * Lock two xfs_dquot structures.
+ *
+ * To avoid deadlocks we always lock the quota structure with
+ * the lowerd id first.
+ */
+void
+xfs_dqlock2(
+	xfs_dquot_t	*d1,
+	xfs_dquot_t	*d2)
+{
+	if (d1 && d2) {
+		ASSERT(d1 != d2);
+		if (be32_to_cpu(d1->q_core.d_id) >
+		    be32_to_cpu(d2->q_core.d_id)) {
+			mutex_lock(&d2->q_qlock);
+			mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
+		} else {
+			mutex_lock(&d1->q_qlock);
+			mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
+		}
+	} else if (d1) {
+		mutex_lock(&d1->q_qlock);
+	} else if (d2) {
+		mutex_lock(&d2->q_qlock);
+	}
+}
+
+
+/*
+ * Take a dquot out of the mount's dqlist as well as the hashlist.
+ * This is called via unmount as well as quotaoff, and the purge
+ * will always succeed unless there are soft (temp) references
+ * outstanding.
+ *
+ * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
+ * that we're returning! XXXsup - not cool.
+ */
+/* ARGSUSED */
+int
+xfs_qm_dqpurge(
+	xfs_dquot_t	*dqp)
+{
+	xfs_dqhash_t	*qh = dqp->q_hash;
+	xfs_mount_t	*mp = dqp->q_mount;
+
+	ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
+	ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
+
+	xfs_dqlock(dqp);
+	/*
+	 * We really can't afford to purge a dquot that is
+	 * referenced, because these are hard refs.
+	 * It shouldn't happen in general because we went thru _all_ inodes in
+	 * dqrele_all_inodes before calling this and didn't let the mountlock go.
+	 * However it is possible that we have dquots with temporary
+	 * references that are not attached to an inode. e.g. see xfs_setattr().
+	 */
+	if (dqp->q_nrefs != 0) {
+		xfs_dqunlock(dqp);
+		mutex_unlock(&dqp->q_hash->qh_lock);
+		return (1);
+	}
+
+	ASSERT(!list_empty(&dqp->q_freelist));
+
+	/*
+	 * If we're turning off quotas, we have to make sure that, for
+	 * example, we don't delete quota disk blocks while dquots are
+	 * in the process of getting written to those disk blocks.
+	 * This dquot might well be on AIL, and we can't leave it there
+	 * if we're turning off quotas. Basically, we need this flush
+	 * lock, and are willing to block on it.
+	 */
+	if (!xfs_dqflock_nowait(dqp)) {
+		/*
+		 * Block on the flush lock after nudging dquot buffer,
+		 * if it is incore.
+		 */
+		xfs_qm_dqflock_pushbuf_wait(dqp);
+	}
+
+	/*
+	 * XXXIf we're turning this type of quotas off, we don't care
+	 * about the dirty metadata sitting in this dquot. OTOH, if
+	 * we're unmounting, we do care, so we flush it and wait.
+	 */
+	if (XFS_DQ_IS_DIRTY(dqp)) {
+		int	error;
+
+		/* dqflush unlocks dqflock */
+		/*
+		 * Given that dqpurge is a very rare occurrence, it is OK
+		 * that we're holding the hashlist and mplist locks
+		 * across the disk write. But, ... XXXsup
+		 *
+		 * We don't care about getting disk errors here. We need
+		 * to purge this dquot anyway, so we go ahead regardless.
+		 */
+		error = xfs_qm_dqflush(dqp, SYNC_WAIT);
+		if (error)
+			xfs_warn(mp, "%s: dquot %p flush failed",
+				__func__, dqp);
+		xfs_dqflock(dqp);
+	}
+	ASSERT(atomic_read(&dqp->q_pincount) == 0);
+	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+
+	list_del_init(&dqp->q_hashlist);
+	qh->qh_version++;
+	list_del_init(&dqp->q_mplist);
+	mp->m_quotainfo->qi_dqreclaims++;
+	mp->m_quotainfo->qi_dquots--;
+	/*
+	 * XXX Move this to the front of the freelist, if we can get the
+	 * freelist lock.
+	 */
+	ASSERT(!list_empty(&dqp->q_freelist));
+
+	dqp->q_mount = NULL;
+	dqp->q_hash = NULL;
+	dqp->dq_flags = XFS_DQ_INACTIVE;
+	memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+	xfs_dqfunlock(dqp);
+	xfs_dqunlock(dqp);
+	mutex_unlock(&qh->qh_lock);
+	return (0);
+}
+
+
+/*
+ * Give the buffer a little push if it is incore and
+ * wait on the flush lock.
+ */
+void
+xfs_qm_dqflock_pushbuf_wait(
+	xfs_dquot_t	*dqp)
+{
+	xfs_mount_t	*mp = dqp->q_mount;
+	xfs_buf_t	*bp;
+
+	/*
+	 * Check to see if the dquot has been flushed delayed
+	 * write.  If so, grab its buffer and send it
+	 * out immediately.  We'll be able to acquire
+	 * the flush lock when the I/O completes.
+	 */
+	bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
+			mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+	if (!bp)
+		goto out_lock;
+
+	if (XFS_BUF_ISDELAYWRITE(bp)) {
+		if (xfs_buf_ispinned(bp))
+			xfs_log_force(mp, 0);
+		xfs_buf_delwri_promote(bp);
+		wake_up_process(bp->b_target->bt_task);
+	}
+	xfs_buf_relse(bp);
+out_lock:
+	xfs_dqflock(dqp);
+}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
new file mode 100644
index 0000000..34b7e94
--- /dev/null
+++ b/fs/xfs/xfs_dquot.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DQUOT_H__
+#define __XFS_DQUOT_H__
+
+/*
+ * Dquots are structures that hold quota information about a user or a group,
+ * much like inodes are for files. In fact, dquots share many characteristics
+ * with inodes. However, dquots can also be a centralized resource, relative
+ * to a collection of inodes. In this respect, dquots share some characteristics
+ * of the superblock.
+ * XFS dquots exploit both those in its algorithms. They make every attempt
+ * to not be a bottleneck when quotas are on and have minimal impact, if any,
+ * when quotas are off.
+ */
+
+/*
+ * The hash chain headers (hash buckets)
+ */
+typedef struct xfs_dqhash {
+	struct list_head  qh_list;
+	struct mutex	  qh_lock;
+	uint		  qh_version;	/* ever increasing version */
+	uint		  qh_nelems;	/* number of dquots on the list */
+} xfs_dqhash_t;
+
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * The incore dquot structure
+ */
+typedef struct xfs_dquot {
+	uint		 dq_flags;	/* various flags (XFS_DQ_*) */
+	struct list_head q_freelist;	/* global free list of dquots */
+	struct list_head q_mplist;	/* mount's list of dquots */
+	struct list_head q_hashlist;	/* gloabl hash list of dquots */
+	xfs_dqhash_t	*q_hash;	/* the hashchain header */
+	struct xfs_mount*q_mount;	/* filesystem this relates to */
+	struct xfs_trans*q_transp;	/* trans this belongs to currently */
+	uint		 q_nrefs;	/* # active refs from inodes */
+	xfs_daddr_t	 q_blkno;	/* blkno of dquot buffer */
+	int		 q_bufoffset;	/* off of dq in buffer (# dquots) */
+	xfs_fileoff_t	 q_fileoffset;	/* offset in quotas file */
+
+	struct xfs_dquot*q_gdquot;	/* group dquot, hint only */
+	xfs_disk_dquot_t q_core;	/* actual usage & quotas */
+	xfs_dq_logitem_t q_logitem;	/* dquot log item */
+	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */
+	xfs_qcnt_t	 q_res_icount;	/* total inos allocd+reserved */
+	xfs_qcnt_t	 q_res_rtbcount;/* total realtime blks used+reserved */
+	struct mutex	 q_qlock;	/* quota lock */
+	struct completion q_flush;	/* flush completion queue */
+	atomic_t          q_pincount;	/* dquot pin count */
+	wait_queue_head_t q_pinwait;	/* dquot pinning wait queue */
+} xfs_dquot_t;
+
+/*
+ * Lock hierarchy for q_qlock:
+ *	XFS_QLOCK_NORMAL is the implicit default,
+ * 	XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
+ */
+enum {
+	XFS_QLOCK_NORMAL = 0,
+	XFS_QLOCK_NESTED,
+};
+
+#define XFS_DQHOLD(dqp)		((dqp)->q_nrefs++)
+
+/*
+ * Manage the q_flush completion queue embedded in the dquot.  This completion
+ * queue synchronizes processes attempting to flush the in-core dquot back to
+ * disk.
+ */
+static inline void xfs_dqflock(xfs_dquot_t *dqp)
+{
+	wait_for_completion(&dqp->q_flush);
+}
+
+static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
+{
+	return try_wait_for_completion(&dqp->q_flush);
+}
+
+static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
+{
+	complete(&dqp->q_flush);
+}
+
+#define XFS_DQ_IS_LOCKED(dqp)	(mutex_is_locked(&((dqp)->q_qlock)))
+#define XFS_DQ_IS_DIRTY(dqp)	((dqp)->dq_flags & XFS_DQ_DIRTY)
+#define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)
+#define XFS_QM_ISPDQ(dqp)	((dqp)->dq_flags & XFS_DQ_PROJ)
+#define XFS_QM_ISGDQ(dqp)	((dqp)->dq_flags & XFS_DQ_GROUP)
+#define XFS_DQ_TO_QINF(dqp)	((dqp)->q_mount->m_quotainfo)
+#define XFS_DQ_TO_QIP(dqp)	(XFS_QM_ISUDQ(dqp) ? \
+				 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
+				 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
+
+#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
+				     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
+				     (XFS_IS_OQUOTA_ON((d)->q_mount))))
+
+extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
+extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
+extern int		xfs_qm_dqpurge(xfs_dquot_t *);
+extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
+extern int		xfs_qm_dqlock_nowait(xfs_dquot_t *);
+extern void		xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
+extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
+					xfs_disk_dquot_t *);
+extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
+					xfs_disk_dquot_t *);
+extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
+					xfs_dqid_t, uint, uint, xfs_dquot_t **);
+extern void		xfs_qm_dqput(xfs_dquot_t *);
+extern void		xfs_dqlock(xfs_dquot_t *);
+extern void		xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
+extern void		xfs_dqunlock(xfs_dquot_t *);
+extern void		xfs_dqunlock_nonotify(xfs_dquot_t *);
+
+#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
new file mode 100644
index 0000000..9e0e2fa
--- /dev/null
+++ b/fs/xfs/xfs_dquot_item.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+
+static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
+{
+	return container_of(lip, struct xfs_dq_logitem, qli_item);
+}
+
+/*
+ * returns the number of iovecs needed to log the given dquot item.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_size(
+	struct xfs_log_item	*lip)
+{
+	/*
+	 * we need only two iovecs, one for the format, one for the real thing
+	 */
+	return 2;
+}
+
+/*
+ * fills in the vector of log iovecs for the given dquot log item.
+ */
+STATIC void
+xfs_qm_dquot_logitem_format(
+	struct xfs_log_item	*lip,
+	struct xfs_log_iovec	*logvec)
+{
+	struct xfs_dq_logitem	*qlip = DQUOT_ITEM(lip);
+
+	logvec->i_addr = &qlip->qli_format;
+	logvec->i_len  = sizeof(xfs_dq_logformat_t);
+	logvec->i_type = XLOG_REG_TYPE_QFORMAT;
+	logvec++;
+	logvec->i_addr = &qlip->qli_dquot->q_core;
+	logvec->i_len  = sizeof(xfs_disk_dquot_t);
+	logvec->i_type = XLOG_REG_TYPE_DQUOT;
+
+	ASSERT(2 == lip->li_desc->lid_size);
+	qlip->qli_format.qlf_size = 2;
+
+}
+
+/*
+ * Increment the pin count of the given dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pin(
+	struct xfs_log_item	*lip)
+{
+	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	atomic_inc(&dqp->q_pincount);
+}
+
+/*
+ * Decrement the pin count of the given dquot, and wake up
+ * anyone in xfs_dqwait_unpin() if the count goes to 0.	 The
+ * dquot must have been previously pinned with a call to
+ * xfs_qm_dquot_logitem_pin().
+ */
+STATIC void
+xfs_qm_dquot_logitem_unpin(
+	struct xfs_log_item	*lip,
+	int			remove)
+{
+	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+	ASSERT(atomic_read(&dqp->q_pincount) > 0);
+	if (atomic_dec_and_test(&dqp->q_pincount))
+		wake_up(&dqp->q_pinwait);
+}
+
+/*
+ * Given the logitem, this writes the corresponding dquot entry to disk
+ * asynchronously. This is called with the dquot entry securely locked;
+ * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
+ * at the end.
+ */
+STATIC void
+xfs_qm_dquot_logitem_push(
+	struct xfs_log_item	*lip)
+{
+	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
+	int			error;
+
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	ASSERT(!completion_done(&dqp->q_flush));
+
+	/*
+	 * Since we were able to lock the dquot's flush lock and
+	 * we found it on the AIL, the dquot must be dirty.  This
+	 * is because the dquot is removed from the AIL while still
+	 * holding the flush lock in xfs_dqflush_done().  Thus, if
+	 * we found it in the AIL and were able to obtain the flush
+	 * lock without sleeping, then there must not have been
+	 * anyone in the process of flushing the dquot.
+	 */
+	error = xfs_qm_dqflush(dqp, 0);
+	if (error)
+		xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
+			__func__, error, dqp);
+	xfs_dqunlock(dqp);
+}
+
+STATIC xfs_lsn_t
+xfs_qm_dquot_logitem_committed(
+	struct xfs_log_item	*lip,
+	xfs_lsn_t		lsn)
+{
+	/*
+	 * We always re-log the entire dquot when it becomes dirty,
+	 * so, the latest copy _is_ the only one that matters.
+	 */
+	return lsn;
+}
+
+/*
+ * This is called to wait for the given dquot to be unpinned.
+ * Most of these pin/unpin routines are plagiarized from inode code.
+ */
+void
+xfs_qm_dqunpin_wait(
+	struct xfs_dquot	*dqp)
+{
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	if (atomic_read(&dqp->q_pincount) == 0)
+		return;
+
+	/*
+	 * Give the log a push so we don't wait here too long.
+	 */
+	xfs_log_force(dqp->q_mount, 0);
+	wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
+}
+
+/*
+ * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
+ * the dquot is locked by us, but the flush lock isn't. So, here we are
+ * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
+ * If so, we want to push it out to help us take this item off the AIL as soon
+ * as possible.
+ *
+ * We must not be holding the AIL lock at this point. Calling incore() to
+ * search the buffer cache can be a time consuming thing, and AIL lock is a
+ * spinlock.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pushbuf(
+	struct xfs_log_item	*lip)
+{
+	struct xfs_dq_logitem	*qlip = DQUOT_ITEM(lip);
+	struct xfs_dquot	*dqp = qlip->qli_dquot;
+	struct xfs_buf		*bp;
+
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+	/*
+	 * If flushlock isn't locked anymore, chances are that the
+	 * inode flush completed and the inode was taken off the AIL.
+	 * So, just get out.
+	 */
+	if (completion_done(&dqp->q_flush) ||
+	    !(lip->li_flags & XFS_LI_IN_AIL)) {
+		xfs_dqunlock(dqp);
+		return;
+	}
+
+	bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
+			dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+	xfs_dqunlock(dqp);
+	if (!bp)
+		return;
+	if (XFS_BUF_ISDELAYWRITE(bp))
+		xfs_buf_delwri_promote(bp);
+	xfs_buf_relse(bp);
+}
+
+/*
+ * This is called to attempt to lock the dquot associated with this
+ * dquot log item.  Don't sleep on the dquot lock or the flush lock.
+ * If the flush lock is already held, indicating that the dquot has
+ * been or is in the process of being flushed, then see if we can
+ * find the dquot's buffer in the buffer cache without sleeping.  If
+ * we can and it is marked delayed write, then we want to send it out.
+ * We delay doing so until the push routine, though, to avoid sleeping
+ * in any device strategy routines.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_trylock(
+	struct xfs_log_item	*lip)
+{
+	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+	if (atomic_read(&dqp->q_pincount) > 0)
+		return XFS_ITEM_PINNED;
+
+	if (!xfs_qm_dqlock_nowait(dqp))
+		return XFS_ITEM_LOCKED;
+
+	if (!xfs_dqflock_nowait(dqp)) {
+		/*
+		 * dquot has already been flushed to the backing buffer,
+		 * leave it locked, pushbuf routine will unlock it.
+		 */
+		return XFS_ITEM_PUSHBUF;
+	}
+
+	ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+	return XFS_ITEM_SUCCESS;
+}
+
+/*
+ * Unlock the dquot associated with the log item.
+ * Clear the fields of the dquot and dquot log item that
+ * are specific to the current transaction.  If the
+ * hold flags is set, do not unlock the dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_unlock(
+	struct xfs_log_item	*lip)
+{
+	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+	/*
+	 * Clear the transaction pointer in the dquot
+	 */
+	dqp->q_transp = NULL;
+
+	/*
+	 * dquots are never 'held' from getting unlocked at the end of
+	 * a transaction.  Their locking and unlocking is hidden inside the
+	 * transaction layer, within trans_commit. Hence, no LI_HOLD flag
+	 * for the logitem.
+	 */
+	xfs_dqunlock(dqp);
+}
+
+/*
+ * this needs to stamp an lsn into the dquot, I think.
+ * rpc's that look at user dquot's would then have to
+ * push on the dependency recorded in the dquot
+ */
+STATIC void
+xfs_qm_dquot_logitem_committing(
+	struct xfs_log_item	*lip,
+	xfs_lsn_t		lsn)
+{
+}
+
+/*
+ * This is the ops vector for dquots
+ */
+static struct xfs_item_ops xfs_dquot_item_ops = {
+	.iop_size	= xfs_qm_dquot_logitem_size,
+	.iop_format	= xfs_qm_dquot_logitem_format,
+	.iop_pin	= xfs_qm_dquot_logitem_pin,
+	.iop_unpin	= xfs_qm_dquot_logitem_unpin,
+	.iop_trylock	= xfs_qm_dquot_logitem_trylock,
+	.iop_unlock	= xfs_qm_dquot_logitem_unlock,
+	.iop_committed	= xfs_qm_dquot_logitem_committed,
+	.iop_push	= xfs_qm_dquot_logitem_push,
+	.iop_pushbuf	= xfs_qm_dquot_logitem_pushbuf,
+	.iop_committing = xfs_qm_dquot_logitem_committing
+};
+
+/*
+ * Initialize the dquot log item for a newly allocated dquot.
+ * The dquot isn't locked at this point, but it isn't on any of the lists
+ * either, so we don't care.
+ */
+void
+xfs_qm_dquot_logitem_init(
+	struct xfs_dquot	*dqp)
+{
+	struct xfs_dq_logitem	*lp = &dqp->q_logitem;
+
+	xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
+					&xfs_dquot_item_ops);
+	lp->qli_dquot = dqp;
+	lp->qli_format.qlf_type = XFS_LI_DQUOT;
+	lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
+	lp->qli_format.qlf_blkno = dqp->q_blkno;
+	lp->qli_format.qlf_len = 1;
+	/*
+	 * This is just the offset of this dquot within its buffer
+	 * (which is currently 1 FSB and probably won't change).
+	 * Hence 32 bits for this offset should be just fine.
+	 * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
+	 * here, and recompute it at recovery time.
+	 */
+	lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
+}
+
+/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
+
+static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
+{
+	return container_of(lip, struct xfs_qoff_logitem, qql_item);
+}
+
+
+/*
+ * This returns the number of iovecs needed to log the given quotaoff item.
+ * We only need 1 iovec for an quotaoff item.  It just logs the
+ * quotaoff_log_format structure.
+ */
+STATIC uint
+xfs_qm_qoff_logitem_size(
+	struct xfs_log_item	*lip)
+{
+	return 1;
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given quotaoff log item. We use only 1 iovec, and we point that
+ * at the quotaoff_log_format structure embedded in the quotaoff item.
+ * It is at this point that we assert that all of the extent
+ * slots in the quotaoff item have been filled.
+ */
+STATIC void
+xfs_qm_qoff_logitem_format(
+	struct xfs_log_item	*lip,
+	struct xfs_log_iovec	*log_vector)
+{
+	struct xfs_qoff_logitem	*qflip = QOFF_ITEM(lip);
+
+	ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
+
+	log_vector->i_addr = &qflip->qql_format;
+	log_vector->i_len = sizeof(xfs_qoff_logitem_t);
+	log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
+	qflip->qql_format.qf_size = 1;
+}
+
+/*
+ * Pinning has no meaning for an quotaoff item, so just return.
+ */
+STATIC void
+xfs_qm_qoff_logitem_pin(
+	struct xfs_log_item	*lip)
+{
+}
+
+/*
+ * Since pinning has no meaning for an quotaoff item, unpinning does
+ * not either.
+ */
+STATIC void
+xfs_qm_qoff_logitem_unpin(
+	struct xfs_log_item	*lip,
+	int			remove)
+{
+}
+
+/*
+ * Quotaoff items have no locking, so just return success.
+ */
+STATIC uint
+xfs_qm_qoff_logitem_trylock(
+	struct xfs_log_item	*lip)
+{
+	return XFS_ITEM_LOCKED;
+}
+
+/*
+ * Quotaoff items have no locking or pushing, so return failure
+ * so that the caller doesn't bother with us.
+ */
+STATIC void
+xfs_qm_qoff_logitem_unlock(
+	struct xfs_log_item	*lip)
+{
+}
+
+/*
+ * The quotaoff-start-item is logged only once and cannot be moved in the log,
+ * so simply return the lsn at which it's been logged.
+ */
+STATIC xfs_lsn_t
+xfs_qm_qoff_logitem_committed(
+	struct xfs_log_item	*lip,
+	xfs_lsn_t		lsn)
+{
+	return lsn;
+}
+
+/*
+ * There isn't much you can do to push on an quotaoff item.  It is simply
+ * stuck waiting for the log to be flushed to disk.
+ */
+STATIC void
+xfs_qm_qoff_logitem_push(
+	struct xfs_log_item	*lip)
+{
+}
+
+
+STATIC xfs_lsn_t
+xfs_qm_qoffend_logitem_committed(
+	struct xfs_log_item	*lip,
+	xfs_lsn_t		lsn)
+{
+	struct xfs_qoff_logitem	*qfe = QOFF_ITEM(lip);
+	struct xfs_qoff_logitem	*qfs = qfe->qql_start_lip;
+	struct xfs_ail		*ailp = qfs->qql_item.li_ailp;
+
+	/*
+	 * Delete the qoff-start logitem from the AIL.
+	 * xfs_trans_ail_delete() drops the AIL lock.
+	 */
+	spin_lock(&ailp->xa_lock);
+	xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
+
+	kmem_free(qfs);
+	kmem_free(qfe);
+	return (xfs_lsn_t)-1;
+}
+
+/*
+ * XXX rcc - don't know quite what to do with this.  I think we can
+ * just ignore it.  The only time that isn't the case is if we allow
+ * the client to somehow see that quotas have been turned off in which
+ * we can't allow that to get back until the quotaoff hits the disk.
+ * So how would that happen?  Also, do we need different routines for
+ * quotaoff start and quotaoff end?  I suspect the answer is yes but
+ * to be sure, I need to look at the recovery code and see how quota off
+ * recovery is handled (do we roll forward or back or do something else).
+ * If we roll forwards or backwards, then we need two separate routines,
+ * one that does nothing and one that stamps in the lsn that matters
+ * (truly makes the quotaoff irrevocable).  If we do something else,
+ * then maybe we don't need two.
+ */
+STATIC void
+xfs_qm_qoff_logitem_committing(
+	struct xfs_log_item	*lip,
+	xfs_lsn_t		commit_lsn)
+{
+}
+
+static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+	.iop_size	= xfs_qm_qoff_logitem_size,
+	.iop_format	= xfs_qm_qoff_logitem_format,
+	.iop_pin	= xfs_qm_qoff_logitem_pin,
+	.iop_unpin	= xfs_qm_qoff_logitem_unpin,
+	.iop_trylock	= xfs_qm_qoff_logitem_trylock,
+	.iop_unlock	= xfs_qm_qoff_logitem_unlock,
+	.iop_committed	= xfs_qm_qoffend_logitem_committed,
+	.iop_push	= xfs_qm_qoff_logitem_push,
+	.iop_committing = xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * This is the ops vector shared by all quotaoff-start log items.
+ */
+static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+	.iop_size	= xfs_qm_qoff_logitem_size,
+	.iop_format	= xfs_qm_qoff_logitem_format,
+	.iop_pin	= xfs_qm_qoff_logitem_pin,
+	.iop_unpin	= xfs_qm_qoff_logitem_unpin,
+	.iop_trylock	= xfs_qm_qoff_logitem_trylock,
+	.iop_unlock	= xfs_qm_qoff_logitem_unlock,
+	.iop_committed	= xfs_qm_qoff_logitem_committed,
+	.iop_push	= xfs_qm_qoff_logitem_push,
+	.iop_committing = xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * Allocate and initialize an quotaoff item of the correct quota type(s).
+ */
+struct xfs_qoff_logitem *
+xfs_qm_qoff_logitem_init(
+	struct xfs_mount	*mp,
+	struct xfs_qoff_logitem	*start,
+	uint			flags)
+{
+	struct xfs_qoff_logitem	*qf;
+
+	qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
+
+	xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
+			&xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
+	qf->qql_item.li_mountp = mp;
+	qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
+	qf->qql_format.qf_flags = flags;
+	qf->qql_start_lip = start;
+	return qf;
+}
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
new file mode 100644
index 0000000..5acae2a
--- /dev/null
+++ b/fs/xfs/xfs_dquot_item.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DQUOT_ITEM_H__
+#define __XFS_DQUOT_ITEM_H__
+
+struct xfs_dquot;
+struct xfs_trans;
+struct xfs_mount;
+struct xfs_qoff_logitem;
+
+typedef struct xfs_dq_logitem {
+	xfs_log_item_t		 qli_item;	   /* common portion */
+	struct xfs_dquot	*qli_dquot;	   /* dquot ptr */
+	xfs_lsn_t		 qli_flush_lsn;	   /* lsn at last flush */
+	xfs_dq_logformat_t	 qli_format;	   /* logged structure */
+} xfs_dq_logitem_t;
+
+typedef struct xfs_qoff_logitem {
+	xfs_log_item_t		 qql_item;	/* common portion */
+	struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+	xfs_qoff_logformat_t	 qql_format;	/* logged structure */
+} xfs_qoff_logitem_t;
+
+
+extern void		   xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
+					struct xfs_qoff_logitem *, uint);
+extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
+					struct xfs_qoff_logitem *, uint);
+extern void		   xfs_trans_log_quotaoff_item(struct xfs_trans *,
+					struct xfs_qoff_logitem *);
+
+#endif	/* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
new file mode 100644
index 0000000..75e5d32
--- /dev/null
+++ b/fs/xfs/xfs_export.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_mount.h"
+#include "xfs_export.h"
+#include "xfs_vnodeops.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+
+/*
+ * Note that we only accept fileids which are long enough rather than allow
+ * the parent generation number to default to zero.  XFS considers zero a
+ * valid generation number not an invalid/wildcard value.
+ */
+static int xfs_fileid_length(int fileid_type)
+{
+	switch (fileid_type) {
+	case FILEID_INO32_GEN:
+		return 2;
+	case FILEID_INO32_GEN_PARENT:
+		return 4;
+	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+		return 3;
+	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+		return 6;
+	}
+	return 255; /* invalid */
+}
+
+STATIC int
+xfs_fs_encode_fh(
+	struct dentry		*dentry,
+	__u32			*fh,
+	int			*max_len,
+	int			connectable)
+{
+	struct fid		*fid = (struct fid *)fh;
+	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fh;
+	struct inode		*inode = dentry->d_inode;
+	int			fileid_type;
+	int			len;
+
+	/* Directories don't need their parent encoded, they have ".." */
+	if (S_ISDIR(inode->i_mode) || !connectable)
+		fileid_type = FILEID_INO32_GEN;
+	else
+		fileid_type = FILEID_INO32_GEN_PARENT;
+
+	/*
+	 * If the the filesystem may contain 64bit inode numbers, we need
+	 * to use larger file handles that can represent them.
+	 *
+	 * While we only allocate inodes that do not fit into 32 bits any
+	 * large enough filesystem may contain them, thus the slightly
+	 * confusing looking conditional below.
+	 */
+	if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
+	    (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
+		fileid_type |= XFS_FILEID_TYPE_64FLAG;
+
+	/*
+	 * Only encode if there is enough space given.  In practice
+	 * this means we can't export a filesystem with 64bit inodes
+	 * over NFSv2 with the subtree_check export option; the other
+	 * seven combinations work.  The real answer is "don't use v2".
+	 */
+	len = xfs_fileid_length(fileid_type);
+	if (*max_len < len) {
+		*max_len = len;
+		return 255;
+	}
+	*max_len = len;
+
+	switch (fileid_type) {
+	case FILEID_INO32_GEN_PARENT:
+		spin_lock(&dentry->d_lock);
+		fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
+		fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
+		spin_unlock(&dentry->d_lock);
+		/*FALLTHRU*/
+	case FILEID_INO32_GEN:
+		fid->i32.ino = inode->i_ino;
+		fid->i32.gen = inode->i_generation;
+		break;
+	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+		spin_lock(&dentry->d_lock);
+		fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
+		fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
+		spin_unlock(&dentry->d_lock);
+		/*FALLTHRU*/
+	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+		fid64->ino = inode->i_ino;
+		fid64->gen = inode->i_generation;
+		break;
+	}
+
+	return fileid_type;
+}
+
+STATIC struct inode *
+xfs_nfs_get_inode(
+	struct super_block	*sb,
+	u64			ino,
+	u32			generation)
+ {
+ 	xfs_mount_t		*mp = XFS_M(sb);
+	xfs_inode_t		*ip;
+	int			error;
+
+	/*
+	 * NFS can sometimes send requests for ino 0.  Fail them gracefully.
+	 */
+	if (ino == 0)
+		return ERR_PTR(-ESTALE);
+
+	/*
+	 * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
+	 * fine and not an indication of a corrupted filesystem as clients can
+	 * send invalid file handles and we have to handle it gracefully..
+	 */
+	error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
+	if (error) {
+		/*
+		 * EINVAL means the inode cluster doesn't exist anymore.
+		 * This implies the filehandle is stale, so we should
+		 * translate it here.
+		 * We don't use ESTALE directly down the chain to not
+		 * confuse applications using bulkstat that expect EINVAL.
+		 */
+		if (error == EINVAL || error == ENOENT)
+			error = ESTALE;
+		return ERR_PTR(-error);
+	}
+
+	if (ip->i_d.di_gen != generation) {
+		IRELE(ip);
+		return ERR_PTR(-ESTALE);
+	}
+
+	return VFS_I(ip);
+}
+
+STATIC struct dentry *
+xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+		 int fh_len, int fileid_type)
+{
+	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fid;
+	struct inode		*inode = NULL;
+
+	if (fh_len < xfs_fileid_length(fileid_type))
+		return NULL;
+
+	switch (fileid_type) {
+	case FILEID_INO32_GEN_PARENT:
+	case FILEID_INO32_GEN:
+		inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
+		break;
+	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+		inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
+		break;
+	}
+
+	return d_obtain_alias(inode);
+}
+
+STATIC struct dentry *
+xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
+		 int fh_len, int fileid_type)
+{
+	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fid;
+	struct inode		*inode = NULL;
+
+	switch (fileid_type) {
+	case FILEID_INO32_GEN_PARENT:
+		inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
+					      fid->i32.parent_gen);
+		break;
+	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+		inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
+					      fid64->parent_gen);
+		break;
+	}
+
+	return d_obtain_alias(inode);
+}
+
+STATIC struct dentry *
+xfs_fs_get_parent(
+	struct dentry		*child)
+{
+	int			error;
+	struct xfs_inode	*cip;
+
+	error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
+	if (unlikely(error))
+		return ERR_PTR(-error);
+
+	return d_obtain_alias(VFS_I(cip));
+}
+
+STATIC int
+xfs_fs_nfs_commit_metadata(
+	struct inode		*inode)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	int			error = 0;
+
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	if (xfs_ipincount(ip)) {
+		error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
+				XFS_LOG_SYNC, NULL);
+	}
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+	return error;
+}
+
+const struct export_operations xfs_export_operations = {
+	.encode_fh		= xfs_fs_encode_fh,
+	.fh_to_dentry		= xfs_fs_fh_to_dentry,
+	.fh_to_parent		= xfs_fs_fh_to_parent,
+	.get_parent		= xfs_fs_get_parent,
+	.commit_metadata	= xfs_fs_nfs_commit_metadata,
+};
diff --git a/fs/xfs/xfs_export.h b/fs/xfs/xfs_export.h
new file mode 100644
index 0000000..3272b6a
--- /dev/null
+++ b/fs/xfs/xfs_export.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_EXPORT_H__
+#define __XFS_EXPORT_H__
+
+/*
+ * Common defines for code related to exporting XFS filesystems over NFS.
+ *
+ * The NFS fileid goes out on the wire as an array of
+ * 32bit unsigned ints in host order.  There are 5 possible
+ * formats.
+ *
+ * (1)	fileid_type=0x00
+ *	(no fileid data; handled by the generic code)
+ *
+ * (2)	fileid_type=0x01
+ *	inode-num
+ *	generation
+ *
+ * (3)	fileid_type=0x02
+ *	inode-num
+ *	generation
+ *	parent-inode-num
+ *	parent-generation
+ *
+ * (4)	fileid_type=0x81
+ *	inode-num-lo32
+ *	inode-num-hi32
+ *	generation
+ *
+ * (5)	fileid_type=0x82
+ *	inode-num-lo32
+ *	inode-num-hi32
+ *	generation
+ *	parent-inode-num-lo32
+ *	parent-inode-num-hi32
+ *	parent-generation
+ *
+ * Note, the NFS filehandle also includes an fsid portion which
+ * may have an inode number in it.  That number is hardcoded to
+ * 32bits and there is no way for XFS to intercept it.  In
+ * practice this means when exporting an XFS filesystem with 64bit
+ * inodes you should either export the mountpoint (rather than
+ * a subdirectory) or use the "fsid" export option.
+ */
+
+struct xfs_fid64 {
+	u64 ino;
+	u32 gen;
+	u64 parent_ino;
+	u32 parent_gen;
+} __attribute__((packed));
+
+/* This flag goes on the wire.  Don't play with it. */
+#define XFS_FILEID_TYPE_64FLAG	0x80	/* NFS fileid has 64bit inodes */
+
+#endif	/* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
new file mode 100644
index 0000000..7f7b424
--- /dev/null
+++ b/fs/xfs/xfs_file.c
@@ -0,0 +1,1096 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_vnodeops.h"
+#include "xfs_da_btree.h"
+#include "xfs_ioctl.h"
+#include "xfs_trace.h"
+
+#include <linux/dcache.h>
+#include <linux/falloc.h>
+
+static const struct vm_operations_struct xfs_file_vm_ops;
+
+/*
+ * Locking primitives for read and write IO paths to ensure we consistently use
+ * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
+ */
+static inline void
+xfs_rw_ilock(
+	struct xfs_inode	*ip,
+	int			type)
+{
+	if (type & XFS_IOLOCK_EXCL)
+		mutex_lock(&VFS_I(ip)->i_mutex);
+	xfs_ilock(ip, type);
+}
+
+static inline void
+xfs_rw_iunlock(
+	struct xfs_inode	*ip,
+	int			type)
+{
+	xfs_iunlock(ip, type);
+	if (type & XFS_IOLOCK_EXCL)
+		mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+static inline void
+xfs_rw_ilock_demote(
+	struct xfs_inode	*ip,
+	int			type)
+{
+	xfs_ilock_demote(ip, type);
+	if (type & XFS_IOLOCK_EXCL)
+		mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+/*
+ *	xfs_iozero
+ *
+ *	xfs_iozero clears the specified range of buffer supplied,
+ *	and marks all the affected blocks as valid and modified.  If
+ *	an affected block is not allocated, it will be allocated.  If
+ *	an affected block is not completely overwritten, and is not
+ *	valid before the operation, it will be read from disk before
+ *	being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+	struct xfs_inode	*ip,	/* inode			*/
+	loff_t			pos,	/* offset in file		*/
+	size_t			count)	/* size of data to zero		*/
+{
+	struct page		*page;
+	struct address_space	*mapping;
+	int			status;
+
+	mapping = VFS_I(ip)->i_mapping;
+	do {
+		unsigned offset, bytes;
+		void *fsdata;
+
+		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+		bytes = PAGE_CACHE_SIZE - offset;
+		if (bytes > count)
+			bytes = count;
+
+		status = pagecache_write_begin(NULL, mapping, pos, bytes,
+					AOP_FLAG_UNINTERRUPTIBLE,
+					&page, &fsdata);
+		if (status)
+			break;
+
+		zero_user(page, offset, bytes);
+
+		status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
+					page, fsdata);
+		WARN_ON(status <= 0); /* can't return less than zero! */
+		pos += bytes;
+		count -= bytes;
+		status = 0;
+	} while (count);
+
+	return (-status);
+}
+
+STATIC int
+xfs_file_fsync(
+	struct file		*file,
+	loff_t			start,
+	loff_t			end,
+	int			datasync)
+{
+	struct inode		*inode = file->f_mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	int			error = 0;
+	int			log_flushed = 0;
+
+	trace_xfs_file_fsync(ip);
+
+	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (error)
+		return error;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -XFS_ERROR(EIO);
+
+	xfs_iflags_clear(ip, XFS_ITRUNCATED);
+
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	xfs_ioend_wait(ip);
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	if (mp->m_flags & XFS_MOUNT_BARRIER) {
+		/*
+		 * If we have an RT and/or log subvolume we need to make sure
+		 * to flush the write cache the device used for file data
+		 * first.  This is to ensure newly written file data make
+		 * it to disk before logging the new inode size in case of
+		 * an extending write.
+		 */
+		if (XFS_IS_REALTIME_INODE(ip))
+			xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+		else if (mp->m_logdev_targp != mp->m_ddev_targp)
+			xfs_blkdev_issue_flush(mp->m_ddev_targp);
+	}
+
+	/*
+	 * We always need to make sure that the required inode state is safe on
+	 * disk.  The inode might be clean but we still might need to force the
+	 * log because of committed transactions that haven't hit the disk yet.
+	 * Likewise, there could be unflushed non-transactional changes to the
+	 * inode core that have to go to disk and this requires us to issue
+	 * a synchronous transaction to capture these changes correctly.
+	 *
+	 * This code relies on the assumption that if the i_update_core field
+	 * of the inode is clear and the inode is unpinned then it is clean
+	 * and no action is required.
+	 */
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+	/*
+	 * First check if the VFS inode is marked dirty.  All the dirtying
+	 * of non-transactional updates no goes through mark_inode_dirty*,
+	 * which allows us to distinguish beteeen pure timestamp updates
+	 * and i_size updates which need to be caught for fdatasync.
+	 * After that also theck for the dirty state in the XFS inode, which
+	 * might gets cleared when the inode gets written out via the AIL
+	 * or xfs_iflush_cluster.
+	 */
+	if (((inode->i_state & I_DIRTY_DATASYNC) ||
+	    ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
+	    ip->i_update_core) {
+		/*
+		 * Kick off a transaction to log the inode core to get the
+		 * updates.  The sync transaction will also force the log.
+		 */
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+		error = xfs_trans_reserve(tp, 0,
+				XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+		if (error) {
+			xfs_trans_cancel(tp, 0);
+			return -error;
+		}
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+		/*
+		 * Note - it's possible that we might have pushed ourselves out
+		 * of the way during trans_reserve which would flush the inode.
+		 * But there's no guarantee that the inode buffer has actually
+		 * gone out yet (it's delwri).	Plus the buffer could be pinned
+		 * anyway if it's part of an inode in another recent
+		 * transaction.	 So we play it safe and fire off the
+		 * transaction anyway.
+		 */
+		xfs_trans_ijoin(tp, ip);
+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+		xfs_trans_set_sync(tp);
+		error = _xfs_trans_commit(tp, 0, &log_flushed);
+
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	} else {
+		/*
+		 * Timestamps/size haven't changed since last inode flush or
+		 * inode transaction commit.  That means either nothing got
+		 * written or a transaction committed which caught the updates.
+		 * If the latter happened and the transaction hasn't hit the
+		 * disk yet, the inode will be still be pinned.  If it is,
+		 * force the log.
+		 */
+		if (xfs_ipincount(ip)) {
+			error = _xfs_log_force_lsn(mp,
+					ip->i_itemp->ili_last_lsn,
+					XFS_LOG_SYNC, &log_flushed);
+		}
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	}
+
+	/*
+	 * If we only have a single device, and the log force about was
+	 * a no-op we might have to flush the data device cache here.
+	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
+	 * an already allocated file and thus do not have any metadata to
+	 * commit.
+	 */
+	if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
+	    mp->m_logdev_targp == mp->m_ddev_targp &&
+	    !XFS_IS_REALTIME_INODE(ip) &&
+	    !log_flushed)
+		xfs_blkdev_issue_flush(mp->m_ddev_targp);
+
+	return -error;
+}
+
+STATIC ssize_t
+xfs_file_aio_read(
+	struct kiocb		*iocb,
+	const struct iovec	*iovp,
+	unsigned long		nr_segs,
+	loff_t			pos)
+{
+	struct file		*file = iocb->ki_filp;
+	struct inode		*inode = file->f_mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	size_t			size = 0;
+	ssize_t			ret = 0;
+	int			ioflags = 0;
+	xfs_fsize_t		n;
+	unsigned long		seg;
+
+	XFS_STATS_INC(xs_read_calls);
+
+	BUG_ON(iocb->ki_pos != pos);
+
+	if (unlikely(file->f_flags & O_DIRECT))
+		ioflags |= IO_ISDIRECT;
+	if (file->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
+
+	/* START copy & waste from filemap.c */
+	for (seg = 0; seg < nr_segs; seg++) {
+		const struct iovec *iv = &iovp[seg];
+
+		/*
+		 * If any segment has a negative length, or the cumulative
+		 * length ever wraps negative then return -EINVAL.
+		 */
+		size += iv->iov_len;
+		if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+			return XFS_ERROR(-EINVAL);
+	}
+	/* END copy & waste from filemap.c */
+
+	if (unlikely(ioflags & IO_ISDIRECT)) {
+		xfs_buftarg_t	*target =
+			XFS_IS_REALTIME_INODE(ip) ?
+				mp->m_rtdev_targp : mp->m_ddev_targp;
+		if ((iocb->ki_pos & target->bt_smask) ||
+		    (size & target->bt_smask)) {
+			if (iocb->ki_pos == ip->i_size)
+				return 0;
+			return -XFS_ERROR(EINVAL);
+		}
+	}
+
+	n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
+	if (n <= 0 || size == 0)
+		return 0;
+
+	if (n < size)
+		size = n;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	if (unlikely(ioflags & IO_ISDIRECT)) {
+		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+
+		if (inode->i_mapping->nrpages) {
+			ret = -xfs_flushinval_pages(ip,
+					(iocb->ki_pos & PAGE_CACHE_MASK),
+					-1, FI_REMAPF_LOCKED);
+			if (ret) {
+				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
+				return ret;
+			}
+		}
+		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+	} else
+		xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+
+	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
+
+	ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
+	if (ret > 0)
+		XFS_STATS_ADD(xs_read_bytes, ret);
+
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_splice_read(
+	struct file		*infilp,
+	loff_t			*ppos,
+	struct pipe_inode_info	*pipe,
+	size_t			count,
+	unsigned int		flags)
+{
+	struct xfs_inode	*ip = XFS_I(infilp->f_mapping->host);
+	int			ioflags = 0;
+	ssize_t			ret;
+
+	XFS_STATS_INC(xs_read_calls);
+
+	if (infilp->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return -EIO;
+
+	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+
+	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
+
+	ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+	if (ret > 0)
+		XFS_STATS_ADD(xs_read_bytes, ret);
+
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+	return ret;
+}
+
+STATIC void
+xfs_aio_write_isize_update(
+	struct inode	*inode,
+	loff_t		*ppos,
+	ssize_t		bytes_written)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	xfs_fsize_t		isize = i_size_read(inode);
+
+	if (bytes_written > 0)
+		XFS_STATS_ADD(xs_write_bytes, bytes_written);
+
+	if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
+					*ppos > isize))
+		*ppos = isize;
+
+	if (*ppos > ip->i_size) {
+		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+		if (*ppos > ip->i_size)
+			ip->i_size = *ppos;
+		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+	}
+}
+
+/*
+ * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
+ * part of the I/O may have been written to disk before the error occurred.  In
+ * this case the on-disk file size may have been adjusted beyond the in-memory
+ * file size and now needs to be truncated back.
+ */
+STATIC void
+xfs_aio_write_newsize_update(
+	struct xfs_inode	*ip)
+{
+	if (ip->i_new_size) {
+		xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+		ip->i_new_size = 0;
+		if (ip->i_d.di_size > ip->i_size)
+			ip->i_d.di_size = ip->i_size;
+		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+	}
+}
+
+/*
+ * xfs_file_splice_write() does not use xfs_rw_ilock() because
+ * generic_file_splice_write() takes the i_mutex itself. This, in theory,
+ * couuld cause lock inversions between the aio_write path and the splice path
+ * if someone is doing concurrent splice(2) based writes and write(2) based
+ * writes to the same inode. The only real way to fix this is to re-implement
+ * the generic code here with correct locking orders.
+ */
+STATIC ssize_t
+xfs_file_splice_write(
+	struct pipe_inode_info	*pipe,
+	struct file		*outfilp,
+	loff_t			*ppos,
+	size_t			count,
+	unsigned int		flags)
+{
+	struct inode		*inode = outfilp->f_mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	xfs_fsize_t		new_size;
+	int			ioflags = 0;
+	ssize_t			ret;
+
+	XFS_STATS_INC(xs_write_calls);
+
+	if (outfilp->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return -EIO;
+
+	xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+	new_size = *ppos + count;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	if (new_size > ip->i_size)
+		ip->i_new_size = new_size;
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
+
+	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
+
+	xfs_aio_write_isize_update(inode, ppos, ret);
+	xfs_aio_write_newsize_update(ip);
+	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+	return ret;
+}
+
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF.  We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int				/* error (positive) */
+xfs_zero_last_block(
+	xfs_inode_t	*ip,
+	xfs_fsize_t	offset,
+	xfs_fsize_t	isize)
+{
+	xfs_fileoff_t	last_fsb;
+	xfs_mount_t	*mp = ip->i_mount;
+	int		nimaps;
+	int		zero_offset;
+	int		zero_len;
+	int		error = 0;
+	xfs_bmbt_irec_t	imap;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+	zero_offset = XFS_B_FSB_OFFSET(mp, isize);
+	if (zero_offset == 0) {
+		/*
+		 * There are no extra bytes in the last block on disk to
+		 * zero, so return.
+		 */
+		return 0;
+	}
+
+	last_fsb = XFS_B_TO_FSBT(mp, isize);
+	nimaps = 1;
+	error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
+			  &nimaps, NULL);
+	if (error) {
+		return error;
+	}
+	ASSERT(nimaps > 0);
+	/*
+	 * If the block underlying isize is just a hole, then there
+	 * is nothing to zero.
+	 */
+	if (imap.br_startblock == HOLESTARTBLOCK) {
+		return 0;
+	}
+	/*
+	 * Zero the part of the last block beyond the EOF, and write it
+	 * out sync.  We need to drop the ilock while we do this so we
+	 * don't deadlock when the buffer cache calls back to us.
+	 */
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	zero_len = mp->m_sb.sb_blocksize - zero_offset;
+	if (isize + zero_len > offset)
+		zero_len = offset - isize;
+	error = xfs_iozero(ip, isize, zero_len);
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	ASSERT(error >= 0);
+	return error;
+}
+
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF.  This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file.  This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated.  If fill is set,
+ * then any holes in the range are filled and zeroed.  If not, the holes
+ * are left alone as holes.
+ */
+
+int					/* error (positive) */
+xfs_zero_eof(
+	xfs_inode_t	*ip,
+	xfs_off_t	offset,		/* starting I/O offset */
+	xfs_fsize_t	isize)		/* current inode size */
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	xfs_fileoff_t	start_zero_fsb;
+	xfs_fileoff_t	end_zero_fsb;
+	xfs_fileoff_t	zero_count_fsb;
+	xfs_fileoff_t	last_fsb;
+	xfs_fileoff_t	zero_off;
+	xfs_fsize_t	zero_len;
+	int		nimaps;
+	int		error = 0;
+	xfs_bmbt_irec_t	imap;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+	ASSERT(offset > isize);
+
+	/*
+	 * First handle zeroing the block on which isize resides.
+	 * We only zero a part of that block so it is handled specially.
+	 */
+	error = xfs_zero_last_block(ip, offset, isize);
+	if (error) {
+		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+		return error;
+	}
+
+	/*
+	 * Calculate the range between the new size and the old
+	 * where blocks needing to be zeroed may exist.  To get the
+	 * block where the last byte in the file currently resides,
+	 * we need to subtract one from the size and truncate back
+	 * to a block boundary.  We subtract 1 in case the size is
+	 * exactly on a block boundary.
+	 */
+	last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+	start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+	end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+	ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+	if (last_fsb == end_zero_fsb) {
+		/*
+		 * The size was only incremented on its last block.
+		 * We took care of that above, so just return.
+		 */
+		return 0;
+	}
+
+	ASSERT(start_zero_fsb <= end_zero_fsb);
+	while (start_zero_fsb <= end_zero_fsb) {
+		nimaps = 1;
+		zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+		error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
+				  0, NULL, 0, &imap, &nimaps, NULL);
+		if (error) {
+			ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+			return error;
+		}
+		ASSERT(nimaps > 0);
+
+		if (imap.br_state == XFS_EXT_UNWRITTEN ||
+		    imap.br_startblock == HOLESTARTBLOCK) {
+			/*
+			 * This loop handles initializing pages that were
+			 * partially initialized by the code below this
+			 * loop. It basically zeroes the part of the page
+			 * that sits on a hole and sets the page as P_HOLE
+			 * and calls remapf if it is a mapped file.
+			 */
+			start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+			ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+			continue;
+		}
+
+		/*
+		 * There are blocks we need to zero.
+		 * Drop the inode lock while we're doing the I/O.
+		 * We'll still have the iolock to protect us.
+		 */
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+		zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+		zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+
+		if ((zero_off + zero_len) > offset)
+			zero_len = offset - zero_off;
+
+		error = xfs_iozero(ip, zero_off, zero_len);
+		if (error) {
+			goto out_lock;
+		}
+
+		start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+		ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+	}
+
+	return 0;
+
+out_lock:
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	ASSERT(error >= 0);
+	return error;
+}
+
+/*
+ * Common pre-write limit and setup checks.
+ *
+ * Returns with iolock held according to @iolock.
+ */
+STATIC ssize_t
+xfs_file_aio_write_checks(
+	struct file		*file,
+	loff_t			*pos,
+	size_t			*count,
+	int			*iolock)
+{
+	struct inode		*inode = file->f_mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	xfs_fsize_t		new_size;
+	int			error = 0;
+
+	error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
+	if (error) {
+		xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
+		*iolock = 0;
+		return error;
+	}
+
+	new_size = *pos + *count;
+	if (new_size > ip->i_size)
+		ip->i_new_size = new_size;
+
+	if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+		file_update_time(file);
+
+	/*
+	 * If the offset is beyond the size of the file, we need to zero any
+	 * blocks that fall between the existing EOF and the start of this
+	 * write.
+	 */
+	if (*pos > ip->i_size)
+		error = -xfs_zero_eof(ip, *pos, ip->i_size);
+
+	xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+	if (error)
+		return error;
+
+	/*
+	 * If we're writing the file then make sure to clear the setuid and
+	 * setgid bits if the process is not being run by root.  This keeps
+	 * people from modifying setuid and setgid binaries.
+	 */
+	return file_remove_suid(file);
+
+}
+
+/*
+ * xfs_file_dio_aio_write - handle direct IO writes
+ *
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
+ * By separating it from the buffered write path we remove all the tricky to
+ * follow locking changes and looping.
+ *
+ * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
+ * until we're sure the bytes at the new EOF have been zeroed and/or the cached
+ * pages are flushed out.
+ *
+ * In most cases the direct IO writes will be done holding IOLOCK_SHARED
+ * allowing them to be done in parallel with reads and other direct IO writes.
+ * However, if the IO is not aligned to filesystem blocks, the direct IO layer
+ * needs to do sub-block zeroing and that requires serialisation against other
+ * direct IOs to the same block. In this case we need to serialise the
+ * submission of the unaligned IOs so that we don't get racing block zeroing in
+ * the dio layer.  To avoid the problem with aio, we also need to wait for
+ * outstanding IOs to complete so that unwritten extent conversion is completed
+ * before we try to map the overlapping block. This is currently implemented by
+ * hitting it with a big hammer (i.e. xfs_ioend_wait()).
+ *
+ * Returns with locks held indicated by @iolock and errors indicated by
+ * negative return values.
+ */
+STATIC ssize_t
+xfs_file_dio_aio_write(
+	struct kiocb		*iocb,
+	const struct iovec	*iovp,
+	unsigned long		nr_segs,
+	loff_t			pos,
+	size_t			ocount,
+	int			*iolock)
+{
+	struct file		*file = iocb->ki_filp;
+	struct address_space	*mapping = file->f_mapping;
+	struct inode		*inode = mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	ssize_t			ret = 0;
+	size_t			count = ocount;
+	int			unaligned_io = 0;
+	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
+					mp->m_rtdev_targp : mp->m_ddev_targp;
+
+	*iolock = 0;
+	if ((pos & target->bt_smask) || (count & target->bt_smask))
+		return -XFS_ERROR(EINVAL);
+
+	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
+		unaligned_io = 1;
+
+	if (unaligned_io || mapping->nrpages || pos > ip->i_size)
+		*iolock = XFS_IOLOCK_EXCL;
+	else
+		*iolock = XFS_IOLOCK_SHARED;
+	xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+	if (ret)
+		return ret;
+
+	if (mapping->nrpages) {
+		WARN_ON(*iolock != XFS_IOLOCK_EXCL);
+		ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
+							FI_REMAPF_LOCKED);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * If we are doing unaligned IO, wait for all other IO to drain,
+	 * otherwise demote the lock if we had to flush cached pages
+	 */
+	if (unaligned_io)
+		xfs_ioend_wait(ip);
+	else if (*iolock == XFS_IOLOCK_EXCL) {
+		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+		*iolock = XFS_IOLOCK_SHARED;
+	}
+
+	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+	ret = generic_file_direct_write(iocb, iovp,
+			&nr_segs, pos, &iocb->ki_pos, count, ocount);
+
+	/* No fallback to buffered IO on errors for XFS. */
+	ASSERT(ret < 0 || ret == count);
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_buffered_aio_write(
+	struct kiocb		*iocb,
+	const struct iovec	*iovp,
+	unsigned long		nr_segs,
+	loff_t			pos,
+	size_t			ocount,
+	int			*iolock)
+{
+	struct file		*file = iocb->ki_filp;
+	struct address_space	*mapping = file->f_mapping;
+	struct inode		*inode = mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	ssize_t			ret;
+	int			enospc = 0;
+	size_t			count = ocount;
+
+	*iolock = XFS_IOLOCK_EXCL;
+	xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+	if (ret)
+		return ret;
+
+	/* We can write back this queue in page reclaim */
+	current->backing_dev_info = mapping->backing_dev_info;
+
+write_retry:
+	trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
+	ret = generic_file_buffered_write(iocb, iovp, nr_segs,
+			pos, &iocb->ki_pos, count, ret);
+	/*
+	 * if we just got an ENOSPC, flush the inode now we aren't holding any
+	 * page locks and retry *once*
+	 */
+	if (ret == -ENOSPC && !enospc) {
+		ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
+		if (ret)
+			return ret;
+		enospc = 1;
+		goto write_retry;
+	}
+	current->backing_dev_info = NULL;
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_aio_write(
+	struct kiocb		*iocb,
+	const struct iovec	*iovp,
+	unsigned long		nr_segs,
+	loff_t			pos)
+{
+	struct file		*file = iocb->ki_filp;
+	struct address_space	*mapping = file->f_mapping;
+	struct inode		*inode = mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	ssize_t			ret;
+	int			iolock;
+	size_t			ocount = 0;
+
+	XFS_STATS_INC(xs_write_calls);
+
+	BUG_ON(iocb->ki_pos != pos);
+
+	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
+	if (ret)
+		return ret;
+
+	if (ocount == 0)
+		return 0;
+
+	xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return -EIO;
+
+	if (unlikely(file->f_flags & O_DIRECT))
+		ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
+						ocount, &iolock);
+	else
+		ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
+						ocount, &iolock);
+
+	xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
+
+	if (ret <= 0)
+		goto out_unlock;
+
+	/* Handle various SYNC-type writes */
+	if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
+		loff_t end = pos + ret - 1;
+		int error;
+
+		xfs_rw_iunlock(ip, iolock);
+		error = xfs_file_fsync(file, pos, end,
+				      (file->f_flags & __O_SYNC) ? 0 : 1);
+		xfs_rw_ilock(ip, iolock);
+		if (error)
+			ret = error;
+	}
+
+out_unlock:
+	xfs_aio_write_newsize_update(ip);
+	xfs_rw_iunlock(ip, iolock);
+	return ret;
+}
+
+STATIC long
+xfs_file_fallocate(
+	struct file	*file,
+	int		mode,
+	loff_t		offset,
+	loff_t		len)
+{
+	struct inode	*inode = file->f_path.dentry->d_inode;
+	long		error;
+	loff_t		new_size = 0;
+	xfs_flock64_t	bf;
+	xfs_inode_t	*ip = XFS_I(inode);
+	int		cmd = XFS_IOC_RESVSP;
+	int		attr_flags = XFS_ATTR_NOLOCK;
+
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+		return -EOPNOTSUPP;
+
+	bf.l_whence = 0;
+	bf.l_start = offset;
+	bf.l_len = len;
+
+	xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+	if (mode & FALLOC_FL_PUNCH_HOLE)
+		cmd = XFS_IOC_UNRESVSP;
+
+	/* check the new inode size is valid before allocating */
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+	    offset + len > i_size_read(inode)) {
+		new_size = offset + len;
+		error = inode_newsize_ok(inode, new_size);
+		if (error)
+			goto out_unlock;
+	}
+
+	if (file->f_flags & O_DSYNC)
+		attr_flags |= XFS_ATTR_SYNC;
+
+	error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
+	if (error)
+		goto out_unlock;
+
+	/* Change file size if needed */
+	if (new_size) {
+		struct iattr iattr;
+
+		iattr.ia_valid = ATTR_SIZE;
+		iattr.ia_size = new_size;
+		error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
+	}
+
+out_unlock:
+	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+	return error;
+}
+
+
+STATIC int
+xfs_file_open(
+	struct inode	*inode,
+	struct file	*file)
+{
+	if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+		return -EFBIG;
+	if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
+		return -EIO;
+	return 0;
+}
+
+STATIC int
+xfs_dir_open(
+	struct inode	*inode,
+	struct file	*file)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	int		mode;
+	int		error;
+
+	error = xfs_file_open(inode, file);
+	if (error)
+		return error;
+
+	/*
+	 * If there are any blocks, read-ahead block 0 as we're almost
+	 * certain to have the next operation be a read there.
+	 */
+	mode = xfs_ilock_map_shared(ip);
+	if (ip->i_d.di_nextents > 0)
+		xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
+	xfs_iunlock(ip, mode);
+	return 0;
+}
+
+STATIC int
+xfs_file_release(
+	struct inode	*inode,
+	struct file	*filp)
+{
+	return -xfs_release(XFS_I(inode));
+}
+
+STATIC int
+xfs_file_readdir(
+	struct file	*filp,
+	void		*dirent,
+	filldir_t	filldir)
+{
+	struct inode	*inode = filp->f_path.dentry->d_inode;
+	xfs_inode_t	*ip = XFS_I(inode);
+	int		error;
+	size_t		bufsize;
+
+	/*
+	 * The Linux API doesn't pass down the total size of the buffer
+	 * we read into down to the filesystem.  With the filldir concept
+	 * it's not needed for correct information, but the XFS dir2 leaf
+	 * code wants an estimate of the buffer size to calculate it's
+	 * readahead window and size the buffers used for mapping to
+	 * physical blocks.
+	 *
+	 * Try to give it an estimate that's good enough, maybe at some
+	 * point we can change the ->readdir prototype to include the
+	 * buffer size.  For now we use the current glibc buffer size.
+	 */
+	bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
+
+	error = xfs_readdir(ip, dirent, bufsize,
+				(xfs_off_t *)&filp->f_pos, filldir);
+	if (error)
+		return -error;
+	return 0;
+}
+
+STATIC int
+xfs_file_mmap(
+	struct file	*filp,
+	struct vm_area_struct *vma)
+{
+	vma->vm_ops = &xfs_file_vm_ops;
+	vma->vm_flags |= VM_CAN_NONLINEAR;
+
+	file_accessed(filp);
+	return 0;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made
+ * writable. We can set the page state up correctly for a writable
+ * page, which means we can do correct delalloc accounting (ENOSPC
+ * checking!) and unwritten extent mapping.
+ */
+STATIC int
+xfs_vm_page_mkwrite(
+	struct vm_area_struct	*vma,
+	struct vm_fault		*vmf)
+{
+	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
+}
+
+const struct file_operations xfs_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= xfs_file_aio_read,
+	.aio_write	= xfs_file_aio_write,
+	.splice_read	= xfs_file_splice_read,
+	.splice_write	= xfs_file_splice_write,
+	.unlocked_ioctl	= xfs_file_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= xfs_file_compat_ioctl,
+#endif
+	.mmap		= xfs_file_mmap,
+	.open		= xfs_file_open,
+	.release	= xfs_file_release,
+	.fsync		= xfs_file_fsync,
+	.fallocate	= xfs_file_fallocate,
+};
+
+const struct file_operations xfs_dir_file_operations = {
+	.open		= xfs_dir_open,
+	.read		= generic_read_dir,
+	.readdir	= xfs_file_readdir,
+	.llseek		= generic_file_llseek,
+	.unlocked_ioctl	= xfs_file_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= xfs_file_compat_ioctl,
+#endif
+	.fsync		= xfs_file_fsync,
+};
+
+static const struct vm_operations_struct xfs_file_vm_ops = {
+	.fault		= filemap_fault,
+	.page_mkwrite	= xfs_vm_page_mkwrite,
+};
diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c
new file mode 100644
index 0000000..ed88ed1
--- /dev/null
+++ b/fs/xfs/xfs_fs_subr.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_vnodeops.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+
+/*
+ * note: all filemap functions return negative error codes. These
+ * need to be inverted before returning to the xfs core functions.
+ */
+void
+xfs_tosspages(
+	xfs_inode_t	*ip,
+	xfs_off_t	first,
+	xfs_off_t	last,
+	int		fiopt)
+{
+	/* can't toss partial tail pages, so mask them out */
+	last &= ~(PAGE_SIZE - 1);
+	truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
+}
+
+int
+xfs_flushinval_pages(
+	xfs_inode_t	*ip,
+	xfs_off_t	first,
+	xfs_off_t	last,
+	int		fiopt)
+{
+	struct address_space *mapping = VFS_I(ip)->i_mapping;
+	int		ret = 0;
+
+	trace_xfs_pagecache_inval(ip, first, last);
+
+	xfs_iflags_clear(ip, XFS_ITRUNCATED);
+	ret = filemap_write_and_wait_range(mapping, first,
+				last == -1 ? LLONG_MAX : last);
+	if (!ret)
+		truncate_inode_pages_range(mapping, first, last);
+	return -ret;
+}
+
+int
+xfs_flush_pages(
+	xfs_inode_t	*ip,
+	xfs_off_t	first,
+	xfs_off_t	last,
+	uint64_t	flags,
+	int		fiopt)
+{
+	struct address_space *mapping = VFS_I(ip)->i_mapping;
+	int		ret = 0;
+	int		ret2;
+
+	xfs_iflags_clear(ip, XFS_ITRUNCATED);
+	ret = -filemap_fdatawrite_range(mapping, first,
+				last == -1 ? LLONG_MAX : last);
+	if (flags & XBF_ASYNC)
+		return ret;
+	ret2 = xfs_wait_on_pages(ip, first, last);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+int
+xfs_wait_on_pages(
+	xfs_inode_t	*ip,
+	xfs_off_t	first,
+	xfs_off_t	last)
+{
+	struct address_space *mapping = VFS_I(ip)->i_mapping;
+
+	if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+		return -filemap_fdatawait_range(mapping, first,
+					last == -1 ? ip->i_size - 1 : last);
+	}
+	return 0;
+}
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
new file mode 100644
index 0000000..76e81cf
--- /dev/null
+++ b/fs/xfs/xfs_globals.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sysctl.h"
+
+/*
+ * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
+ * other XFS code uses these values.  Times are measured in centisecs (i.e.
+ * 100ths of a second).
+ */
+xfs_param_t xfs_params = {
+			  /*	MIN		DFLT		MAX	*/
+	.sgid_inherit	= {	0,		0,		1	},
+	.symlink_mode	= {	0,		0,		1	},
+	.panic_mask	= {	0,		0,		255	},
+	.error_level	= {	0,		3,		11	},
+	.syncd_timer	= {	1*100,		30*100,		7200*100},
+	.stats_clear	= {	0,		0,		1	},
+	.inherit_sync	= {	0,		1,		1	},
+	.inherit_nodump	= {	0,		1,		1	},
+	.inherit_noatim = {	0,		1,		1	},
+	.xfs_buf_timer	= {	100/2,		1*100,		30*100	},
+	.xfs_buf_age	= {	1*100,		15*100,		7200*100},
+	.inherit_nosym	= {	0,		0,		1	},
+	.rotorstep	= {	1,		1,		255	},
+	.inherit_nodfrg	= {	0,		1,		1	},
+	.fstrm_timer	= {	1,		30*100,		3600*100},
+};
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
new file mode 100644
index 0000000..f7ce7de
--- /dev/null
+++ b/fs/xfs/xfs_ioctl.c
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ioctl.h"
+#include "xfs_rtalloc.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_bmap.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_dfrag.h"
+#include "xfs_fsops.h"
+#include "xfs_vnodeops.h"
+#include "xfs_discard.h"
+#include "xfs_quota.h"
+#include "xfs_inode_item.h"
+#include "xfs_export.h"
+#include "xfs_trace.h"
+
+#include <linux/capability.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/exportfs.h>
+
+/*
+ * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
+ * a file or fs handle.
+ *
+ * XFS_IOC_PATH_TO_FSHANDLE
+ *    returns fs handle for a mount point or path within that mount point
+ * XFS_IOC_FD_TO_HANDLE
+ *    returns full handle for a FD opened in user space
+ * XFS_IOC_PATH_TO_HANDLE
+ *    returns full handle for a path
+ */
+int
+xfs_find_handle(
+	unsigned int		cmd,
+	xfs_fsop_handlereq_t	*hreq)
+{
+	int			hsize;
+	xfs_handle_t		handle;
+	struct inode		*inode;
+	struct file		*file = NULL;
+	struct path		path;
+	int			error;
+	struct xfs_inode	*ip;
+
+	if (cmd == XFS_IOC_FD_TO_HANDLE) {
+		file = fget(hreq->fd);
+		if (!file)
+			return -EBADF;
+		inode = file->f_path.dentry->d_inode;
+	} else {
+		error = user_lpath((const char __user *)hreq->path, &path);
+		if (error)
+			return error;
+		inode = path.dentry->d_inode;
+	}
+	ip = XFS_I(inode);
+
+	/*
+	 * We can only generate handles for inodes residing on a XFS filesystem,
+	 * and only for regular files, directories or symbolic links.
+	 */
+	error = -EINVAL;
+	if (inode->i_sb->s_magic != XFS_SB_MAGIC)
+		goto out_put;
+
+	error = -EBADF;
+	if (!S_ISREG(inode->i_mode) &&
+	    !S_ISDIR(inode->i_mode) &&
+	    !S_ISLNK(inode->i_mode))
+		goto out_put;
+
+
+	memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
+
+	if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
+		/*
+		 * This handle only contains an fsid, zero the rest.
+		 */
+		memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
+		hsize = sizeof(xfs_fsid_t);
+	} else {
+		int		lock_mode;
+
+		lock_mode = xfs_ilock_map_shared(ip);
+		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
+					sizeof(handle.ha_fid.fid_len);
+		handle.ha_fid.fid_pad = 0;
+		handle.ha_fid.fid_gen = ip->i_d.di_gen;
+		handle.ha_fid.fid_ino = ip->i_ino;
+		xfs_iunlock_map_shared(ip, lock_mode);
+
+		hsize = XFS_HSIZE(handle);
+	}
+
+	error = -EFAULT;
+	if (copy_to_user(hreq->ohandle, &handle, hsize) ||
+	    copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
+		goto out_put;
+
+	error = 0;
+
+ out_put:
+	if (cmd == XFS_IOC_FD_TO_HANDLE)
+		fput(file);
+	else
+		path_put(&path);
+	return error;
+}
+
+/*
+ * No need to do permission checks on the various pathname components
+ * as the handle operations are privileged.
+ */
+STATIC int
+xfs_handle_acceptable(
+	void			*context,
+	struct dentry		*dentry)
+{
+	return 1;
+}
+
+/*
+ * Convert userspace handle data into a dentry.
+ */
+struct dentry *
+xfs_handle_to_dentry(
+	struct file		*parfilp,
+	void __user		*uhandle,
+	u32			hlen)
+{
+	xfs_handle_t		handle;
+	struct xfs_fid64	fid;
+
+	/*
+	 * Only allow handle opens under a directory.
+	 */
+	if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
+		return ERR_PTR(-ENOTDIR);
+
+	if (hlen != sizeof(xfs_handle_t))
+		return ERR_PTR(-EINVAL);
+	if (copy_from_user(&handle, uhandle, hlen))
+		return ERR_PTR(-EFAULT);
+	if (handle.ha_fid.fid_len !=
+	    sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
+		return ERR_PTR(-EINVAL);
+
+	memset(&fid, 0, sizeof(struct fid));
+	fid.ino = handle.ha_fid.fid_ino;
+	fid.gen = handle.ha_fid.fid_gen;
+
+	return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
+			FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
+			xfs_handle_acceptable, NULL);
+}
+
+STATIC struct dentry *
+xfs_handlereq_to_dentry(
+	struct file		*parfilp,
+	xfs_fsop_handlereq_t	*hreq)
+{
+	return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
+}
+
+int
+xfs_open_by_handle(
+	struct file		*parfilp,
+	xfs_fsop_handlereq_t	*hreq)
+{
+	const struct cred	*cred = current_cred();
+	int			error;
+	int			fd;
+	int			permflag;
+	struct file		*filp;
+	struct inode		*inode;
+	struct dentry		*dentry;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+
+	dentry = xfs_handlereq_to_dentry(parfilp, hreq);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+	inode = dentry->d_inode;
+
+	/* Restrict xfs_open_by_handle to directories & regular files. */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+		error = -XFS_ERROR(EPERM);
+		goto out_dput;
+	}
+
+#if BITS_PER_LONG != 32
+	hreq->oflags |= O_LARGEFILE;
+#endif
+
+	/* Put open permission in namei format. */
+	permflag = hreq->oflags;
+	if ((permflag+1) & O_ACCMODE)
+		permflag++;
+	if (permflag & O_TRUNC)
+		permflag |= 2;
+
+	if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
+	    (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+		error = -XFS_ERROR(EPERM);
+		goto out_dput;
+	}
+
+	if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+		error = -XFS_ERROR(EACCES);
+		goto out_dput;
+	}
+
+	/* Can't write directories. */
+	if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+		error = -XFS_ERROR(EISDIR);
+		goto out_dput;
+	}
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		error = fd;
+		goto out_dput;
+	}
+
+	filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
+			   hreq->oflags, cred);
+	if (IS_ERR(filp)) {
+		put_unused_fd(fd);
+		return PTR_ERR(filp);
+	}
+
+	if (S_ISREG(inode->i_mode)) {
+		filp->f_flags |= O_NOATIME;
+		filp->f_mode |= FMODE_NOCMTIME;
+	}
+
+	fd_install(fd, filp);
+	return fd;
+
+ out_dput:
+	dput(dentry);
+	return error;
+}
+
+/*
+ * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
+ * unused first argument.
+ */
+STATIC int
+do_readlink(
+	char __user		*buffer,
+	int			buflen,
+	const char		*link)
+{
+        int len;
+
+	len = PTR_ERR(link);
+	if (IS_ERR(link))
+		goto out;
+
+	len = strlen(link);
+	if (len > (unsigned) buflen)
+		len = buflen;
+	if (copy_to_user(buffer, link, len))
+		len = -EFAULT;
+ out:
+	return len;
+}
+
+
+int
+xfs_readlink_by_handle(
+	struct file		*parfilp,
+	xfs_fsop_handlereq_t	*hreq)
+{
+	struct dentry		*dentry;
+	__u32			olen;
+	void			*link;
+	int			error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+
+	dentry = xfs_handlereq_to_dentry(parfilp, hreq);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	/* Restrict this handle operation to symlinks only. */
+	if (!S_ISLNK(dentry->d_inode->i_mode)) {
+		error = -XFS_ERROR(EINVAL);
+		goto out_dput;
+	}
+
+	if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
+		error = -XFS_ERROR(EFAULT);
+		goto out_dput;
+	}
+
+	link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+	if (!link) {
+		error = -XFS_ERROR(ENOMEM);
+		goto out_dput;
+	}
+
+	error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+	if (error)
+		goto out_kfree;
+	error = do_readlink(hreq->ohandle, olen, link);
+	if (error)
+		goto out_kfree;
+
+ out_kfree:
+	kfree(link);
+ out_dput:
+	dput(dentry);
+	return error;
+}
+
+STATIC int
+xfs_fssetdm_by_handle(
+	struct file		*parfilp,
+	void			__user *arg)
+{
+	int			error;
+	struct fsdmidata	fsd;
+	xfs_fsop_setdm_handlereq_t dmhreq;
+	struct dentry		*dentry;
+
+	if (!capable(CAP_MKNOD))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+		error = -XFS_ERROR(EPERM);
+		goto out;
+	}
+
+	if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
+		error = -XFS_ERROR(EFAULT);
+		goto out;
+	}
+
+	error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+				 fsd.fsd_dmstate);
+
+ out:
+	dput(dentry);
+	return error;
+}
+
+STATIC int
+xfs_attrlist_by_handle(
+	struct file		*parfilp,
+	void			__user *arg)
+{
+	int			error = -ENOMEM;
+	attrlist_cursor_kern_t	*cursor;
+	xfs_fsop_attrlist_handlereq_t al_hreq;
+	struct dentry		*dentry;
+	char			*kbuf;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+	if (al_hreq.buflen > XATTR_LIST_MAX)
+		return -XFS_ERROR(EINVAL);
+
+	/*
+	 * Reject flags, only allow namespaces.
+	 */
+	if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+		return -XFS_ERROR(EINVAL);
+
+	dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
+	if (!kbuf)
+		goto out_dput;
+
+	cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+	error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+					al_hreq.flags, cursor);
+	if (error)
+		goto out_kfree;
+
+	if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
+		error = -EFAULT;
+
+ out_kfree:
+	kfree(kbuf);
+ out_dput:
+	dput(dentry);
+	return error;
+}
+
+int
+xfs_attrmulti_attr_get(
+	struct inode		*inode,
+	unsigned char		*name,
+	unsigned char		__user *ubuf,
+	__uint32_t		*len,
+	__uint32_t		flags)
+{
+	unsigned char		*kbuf;
+	int			error = EFAULT;
+
+	if (*len > XATTR_SIZE_MAX)
+		return EINVAL;
+	kbuf = kmalloc(*len, GFP_KERNEL);
+	if (!kbuf)
+		return ENOMEM;
+
+	error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
+	if (error)
+		goto out_kfree;
+
+	if (copy_to_user(ubuf, kbuf, *len))
+		error = EFAULT;
+
+ out_kfree:
+	kfree(kbuf);
+	return error;
+}
+
+int
+xfs_attrmulti_attr_set(
+	struct inode		*inode,
+	unsigned char		*name,
+	const unsigned char	__user *ubuf,
+	__uint32_t		len,
+	__uint32_t		flags)
+{
+	unsigned char		*kbuf;
+	int			error = EFAULT;
+
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		return EPERM;
+	if (len > XATTR_SIZE_MAX)
+		return EINVAL;
+
+	kbuf = memdup_user(ubuf, len);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+
+	error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+
+	return error;
+}
+
+int
+xfs_attrmulti_attr_remove(
+	struct inode		*inode,
+	unsigned char		*name,
+	__uint32_t		flags)
+{
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		return EPERM;
+	return xfs_attr_remove(XFS_I(inode), name, flags);
+}
+
+STATIC int
+xfs_attrmulti_by_handle(
+	struct file		*parfilp,
+	void			__user *arg)
+{
+	int			error;
+	xfs_attr_multiop_t	*ops;
+	xfs_fsop_attrmulti_handlereq_t am_hreq;
+	struct dentry		*dentry;
+	unsigned int		i, size;
+	unsigned char		*attr_name;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	/* overflow check */
+	if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
+		return -E2BIG;
+
+	dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	error = E2BIG;
+	size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
+	if (!size || size > 16 * PAGE_SIZE)
+		goto out_dput;
+
+	ops = memdup_user(am_hreq.ops, size);
+	if (IS_ERR(ops)) {
+		error = PTR_ERR(ops);
+		goto out_dput;
+	}
+
+	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+	if (!attr_name)
+		goto out_kfree_ops;
+
+	error = 0;
+	for (i = 0; i < am_hreq.opcount; i++) {
+		ops[i].am_error = strncpy_from_user((char *)attr_name,
+				ops[i].am_attrname, MAXNAMELEN);
+		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+			error = -ERANGE;
+		if (ops[i].am_error < 0)
+			break;
+
+		switch (ops[i].am_opcode) {
+		case ATTR_OP_GET:
+			ops[i].am_error = xfs_attrmulti_attr_get(
+					dentry->d_inode, attr_name,
+					ops[i].am_attrvalue, &ops[i].am_length,
+					ops[i].am_flags);
+			break;
+		case ATTR_OP_SET:
+			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+			if (ops[i].am_error)
+				break;
+			ops[i].am_error = xfs_attrmulti_attr_set(
+					dentry->d_inode, attr_name,
+					ops[i].am_attrvalue, ops[i].am_length,
+					ops[i].am_flags);
+			mnt_drop_write(parfilp->f_path.mnt);
+			break;
+		case ATTR_OP_REMOVE:
+			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+			if (ops[i].am_error)
+				break;
+			ops[i].am_error = xfs_attrmulti_attr_remove(
+					dentry->d_inode, attr_name,
+					ops[i].am_flags);
+			mnt_drop_write(parfilp->f_path.mnt);
+			break;
+		default:
+			ops[i].am_error = EINVAL;
+		}
+	}
+
+	if (copy_to_user(am_hreq.ops, ops, size))
+		error = XFS_ERROR(EFAULT);
+
+	kfree(attr_name);
+ out_kfree_ops:
+	kfree(ops);
+ out_dput:
+	dput(dentry);
+	return -error;
+}
+
+int
+xfs_ioc_space(
+	struct xfs_inode	*ip,
+	struct inode		*inode,
+	struct file		*filp,
+	int			ioflags,
+	unsigned int		cmd,
+	xfs_flock64_t		*bf)
+{
+	int			attr_flags = 0;
+	int			error;
+
+	/*
+	 * Only allow the sys admin to reserve space unless
+	 * unwritten extents are enabled.
+	 */
+	if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
+	    !capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+
+	if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
+		return -XFS_ERROR(EPERM);
+
+	if (!(filp->f_mode & FMODE_WRITE))
+		return -XFS_ERROR(EBADF);
+
+	if (!S_ISREG(inode->i_mode))
+		return -XFS_ERROR(EINVAL);
+
+	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+		attr_flags |= XFS_ATTR_NONBLOCK;
+
+	if (filp->f_flags & O_DSYNC)
+		attr_flags |= XFS_ATTR_SYNC;
+
+	if (ioflags & IO_INVIS)
+		attr_flags |= XFS_ATTR_DMI;
+
+	error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
+	return -error;
+}
+
+STATIC int
+xfs_ioc_bulkstat(
+	xfs_mount_t		*mp,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	xfs_fsop_bulkreq_t	bulkreq;
+	int			count;	/* # of records returned */
+	xfs_ino_t		inlast;	/* last inode number */
+	int			done;
+	int			error;
+
+	/* done = 1 if there are more stats to get and if bulkstat */
+	/* should be called again (unused here, but used in dmapi) */
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -XFS_ERROR(EIO);
+
+	if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+		return -XFS_ERROR(EFAULT);
+
+	if ((count = bulkreq.icount) <= 0)
+		return -XFS_ERROR(EINVAL);
+
+	if (bulkreq.ubuffer == NULL)
+		return -XFS_ERROR(EINVAL);
+
+	if (cmd == XFS_IOC_FSINUMBERS)
+		error = xfs_inumbers(mp, &inlast, &count,
+					bulkreq.ubuffer, xfs_inumbers_fmt);
+	else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
+		error = xfs_bulkstat_single(mp, &inlast,
+						bulkreq.ubuffer, &done);
+	else	/* XFS_IOC_FSBULKSTAT */
+		error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
+				     sizeof(xfs_bstat_t), bulkreq.ubuffer,
+				     &done);
+
+	if (error)
+		return -error;
+
+	if (bulkreq.ocount != NULL) {
+		if (copy_to_user(bulkreq.lastip, &inlast,
+						sizeof(xfs_ino_t)))
+			return -XFS_ERROR(EFAULT);
+
+		if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+			return -XFS_ERROR(EFAULT);
+	}
+
+	return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+	xfs_mount_t		*mp,
+	void			__user *arg)
+{
+	xfs_fsop_geom_t         fsgeo;
+	int			error;
+
+	error = xfs_fs_geometry(mp, &fsgeo, 3);
+	if (error)
+		return -error;
+
+	/*
+	 * Caller should have passed an argument of type
+	 * xfs_fsop_geom_v1_t.  This is a proper subset of the
+	 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+	 */
+	if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry(
+	xfs_mount_t		*mp,
+	void			__user *arg)
+{
+	xfs_fsop_geom_t		fsgeo;
+	int			error;
+
+	error = xfs_fs_geometry(mp, &fsgeo, 4);
+	if (error)
+		return -error;
+
+	if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+/*
+ * Linux extended inode flags interface.
+ */
+
+STATIC unsigned int
+xfs_merge_ioc_xflags(
+	unsigned int	flags,
+	unsigned int	start)
+{
+	unsigned int	xflags = start;
+
+	if (flags & FS_IMMUTABLE_FL)
+		xflags |= XFS_XFLAG_IMMUTABLE;
+	else
+		xflags &= ~XFS_XFLAG_IMMUTABLE;
+	if (flags & FS_APPEND_FL)
+		xflags |= XFS_XFLAG_APPEND;
+	else
+		xflags &= ~XFS_XFLAG_APPEND;
+	if (flags & FS_SYNC_FL)
+		xflags |= XFS_XFLAG_SYNC;
+	else
+		xflags &= ~XFS_XFLAG_SYNC;
+	if (flags & FS_NOATIME_FL)
+		xflags |= XFS_XFLAG_NOATIME;
+	else
+		xflags &= ~XFS_XFLAG_NOATIME;
+	if (flags & FS_NODUMP_FL)
+		xflags |= XFS_XFLAG_NODUMP;
+	else
+		xflags &= ~XFS_XFLAG_NODUMP;
+
+	return xflags;
+}
+
+STATIC unsigned int
+xfs_di2lxflags(
+	__uint16_t	di_flags)
+{
+	unsigned int	flags = 0;
+
+	if (di_flags & XFS_DIFLAG_IMMUTABLE)
+		flags |= FS_IMMUTABLE_FL;
+	if (di_flags & XFS_DIFLAG_APPEND)
+		flags |= FS_APPEND_FL;
+	if (di_flags & XFS_DIFLAG_SYNC)
+		flags |= FS_SYNC_FL;
+	if (di_flags & XFS_DIFLAG_NOATIME)
+		flags |= FS_NOATIME_FL;
+	if (di_flags & XFS_DIFLAG_NODUMP)
+		flags |= FS_NODUMP_FL;
+	return flags;
+}
+
+STATIC int
+xfs_ioc_fsgetxattr(
+	xfs_inode_t		*ip,
+	int			attr,
+	void			__user *arg)
+{
+	struct fsxattr		fa;
+
+	memset(&fa, 0, sizeof(struct fsxattr));
+
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	fa.fsx_xflags = xfs_ip2xflags(ip);
+	fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
+	fa.fsx_projid = xfs_get_projid(ip);
+
+	if (attr) {
+		if (ip->i_afp) {
+			if (ip->i_afp->if_flags & XFS_IFEXTENTS)
+				fa.fsx_nextents = ip->i_afp->if_bytes /
+							sizeof(xfs_bmbt_rec_t);
+			else
+				fa.fsx_nextents = ip->i_d.di_anextents;
+		} else
+			fa.fsx_nextents = 0;
+	} else {
+		if (ip->i_df.if_flags & XFS_IFEXTENTS)
+			fa.fsx_nextents = ip->i_df.if_bytes /
+						sizeof(xfs_bmbt_rec_t);
+		else
+			fa.fsx_nextents = ip->i_d.di_nextents;
+	}
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+	if (copy_to_user(arg, &fa, sizeof(fa)))
+		return -EFAULT;
+	return 0;
+}
+
+STATIC void
+xfs_set_diflags(
+	struct xfs_inode	*ip,
+	unsigned int		xflags)
+{
+	unsigned int		di_flags;
+
+	/* can't set PREALLOC this way, just preserve it */
+	di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+	if (xflags & XFS_XFLAG_IMMUTABLE)
+		di_flags |= XFS_DIFLAG_IMMUTABLE;
+	if (xflags & XFS_XFLAG_APPEND)
+		di_flags |= XFS_DIFLAG_APPEND;
+	if (xflags & XFS_XFLAG_SYNC)
+		di_flags |= XFS_DIFLAG_SYNC;
+	if (xflags & XFS_XFLAG_NOATIME)
+		di_flags |= XFS_DIFLAG_NOATIME;
+	if (xflags & XFS_XFLAG_NODUMP)
+		di_flags |= XFS_DIFLAG_NODUMP;
+	if (xflags & XFS_XFLAG_PROJINHERIT)
+		di_flags |= XFS_DIFLAG_PROJINHERIT;
+	if (xflags & XFS_XFLAG_NODEFRAG)
+		di_flags |= XFS_DIFLAG_NODEFRAG;
+	if (xflags & XFS_XFLAG_FILESTREAM)
+		di_flags |= XFS_DIFLAG_FILESTREAM;
+	if (S_ISDIR(ip->i_d.di_mode)) {
+		if (xflags & XFS_XFLAG_RTINHERIT)
+			di_flags |= XFS_DIFLAG_RTINHERIT;
+		if (xflags & XFS_XFLAG_NOSYMLINKS)
+			di_flags |= XFS_DIFLAG_NOSYMLINKS;
+		if (xflags & XFS_XFLAG_EXTSZINHERIT)
+			di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+	} else if (S_ISREG(ip->i_d.di_mode)) {
+		if (xflags & XFS_XFLAG_REALTIME)
+			di_flags |= XFS_DIFLAG_REALTIME;
+		if (xflags & XFS_XFLAG_EXTSIZE)
+			di_flags |= XFS_DIFLAG_EXTSIZE;
+	}
+
+	ip->i_d.di_flags = di_flags;
+}
+
+STATIC void
+xfs_diflags_to_linux(
+	struct xfs_inode	*ip)
+{
+	struct inode		*inode = VFS_I(ip);
+	unsigned int		xflags = xfs_ip2xflags(ip);
+
+	if (xflags & XFS_XFLAG_IMMUTABLE)
+		inode->i_flags |= S_IMMUTABLE;
+	else
+		inode->i_flags &= ~S_IMMUTABLE;
+	if (xflags & XFS_XFLAG_APPEND)
+		inode->i_flags |= S_APPEND;
+	else
+		inode->i_flags &= ~S_APPEND;
+	if (xflags & XFS_XFLAG_SYNC)
+		inode->i_flags |= S_SYNC;
+	else
+		inode->i_flags &= ~S_SYNC;
+	if (xflags & XFS_XFLAG_NOATIME)
+		inode->i_flags |= S_NOATIME;
+	else
+		inode->i_flags &= ~S_NOATIME;
+}
+
+#define FSX_PROJID	1
+#define FSX_EXTSIZE	2
+#define FSX_XFLAGS	4
+#define FSX_NONBLOCK	8
+
+STATIC int
+xfs_ioctl_setattr(
+	xfs_inode_t		*ip,
+	struct fsxattr		*fa,
+	int			mask)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	unsigned int		lock_flags = 0;
+	struct xfs_dquot	*udqp = NULL;
+	struct xfs_dquot	*gdqp = NULL;
+	struct xfs_dquot	*olddquot = NULL;
+	int			code;
+
+	trace_xfs_ioctl_setattr(ip);
+
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return XFS_ERROR(EROFS);
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	/*
+	 * Disallow 32bit project ids when projid32bit feature is not enabled.
+	 */
+	if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+			!xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
+		return XFS_ERROR(EINVAL);
+
+	/*
+	 * If disk quotas is on, we make sure that the dquots do exist on disk,
+	 * before we start any other transactions. Trying to do this later
+	 * is messy. We don't care to take a readlock to look at the ids
+	 * in inode here, because we can't hold it across the trans_reserve.
+	 * If the IDs do change before we take the ilock, we're covered
+	 * because the i_*dquot fields will get updated anyway.
+	 */
+	if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
+		code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
+					 ip->i_d.di_gid, fa->fsx_projid,
+					 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
+		if (code)
+			return code;
+	}
+
+	/*
+	 * For the other attributes, we acquire the inode lock and
+	 * first do an error checking pass.
+	 */
+	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+	code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+	if (code)
+		goto error_return;
+
+	lock_flags = XFS_ILOCK_EXCL;
+	xfs_ilock(ip, lock_flags);
+
+	/*
+	 * CAP_FOWNER overrides the following restrictions:
+	 *
+	 * The user ID of the calling process must be equal
+	 * to the file owner ID, except in cases where the
+	 * CAP_FSETID capability is applicable.
+	 */
+	if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+		code = XFS_ERROR(EPERM);
+		goto error_return;
+	}
+
+	/*
+	 * Do a quota reservation only if projid is actually going to change.
+	 */
+	if (mask & FSX_PROJID) {
+		if (XFS_IS_QUOTA_RUNNING(mp) &&
+		    XFS_IS_PQUOTA_ON(mp) &&
+		    xfs_get_projid(ip) != fa->fsx_projid) {
+			ASSERT(tp);
+			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+						capable(CAP_FOWNER) ?
+						XFS_QMOPT_FORCE_RES : 0);
+			if (code)	/* out of quota */
+				goto error_return;
+		}
+	}
+
+	if (mask & FSX_EXTSIZE) {
+		/*
+		 * Can't change extent size if any extents are allocated.
+		 */
+		if (ip->i_d.di_nextents &&
+		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+		     fa->fsx_extsize)) {
+			code = XFS_ERROR(EINVAL);	/* EFBIG? */
+			goto error_return;
+		}
+
+		/*
+		 * Extent size must be a multiple of the appropriate block
+		 * size, if set at all. It must also be smaller than the
+		 * maximum extent size supported by the filesystem.
+		 *
+		 * Also, for non-realtime files, limit the extent size hint to
+		 * half the size of the AGs in the filesystem so alignment
+		 * doesn't result in extents larger than an AG.
+		 */
+		if (fa->fsx_extsize != 0) {
+			xfs_extlen_t    size;
+			xfs_fsblock_t   extsize_fsb;
+
+			extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
+			if (extsize_fsb > MAXEXTLEN) {
+				code = XFS_ERROR(EINVAL);
+				goto error_return;
+			}
+
+			if (XFS_IS_REALTIME_INODE(ip) ||
+			    ((mask & FSX_XFLAGS) &&
+			    (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
+				size = mp->m_sb.sb_rextsize <<
+				       mp->m_sb.sb_blocklog;
+			} else {
+				size = mp->m_sb.sb_blocksize;
+				if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
+					code = XFS_ERROR(EINVAL);
+					goto error_return;
+				}
+			}
+
+			if (fa->fsx_extsize % size) {
+				code = XFS_ERROR(EINVAL);
+				goto error_return;
+			}
+		}
+	}
+
+
+	if (mask & FSX_XFLAGS) {
+		/*
+		 * Can't change realtime flag if any extents are allocated.
+		 */
+		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+		    (XFS_IS_REALTIME_INODE(ip)) !=
+		    (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+			code = XFS_ERROR(EINVAL);	/* EFBIG? */
+			goto error_return;
+		}
+
+		/*
+		 * If realtime flag is set then must have realtime data.
+		 */
+		if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+			if ((mp->m_sb.sb_rblocks == 0) ||
+			    (mp->m_sb.sb_rextsize == 0) ||
+			    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+				code = XFS_ERROR(EINVAL);
+				goto error_return;
+			}
+		}
+
+		/*
+		 * Can't modify an immutable/append-only file unless
+		 * we have appropriate permission.
+		 */
+		if ((ip->i_d.di_flags &
+				(XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+		     (fa->fsx_xflags &
+				(XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+		    !capable(CAP_LINUX_IMMUTABLE)) {
+			code = XFS_ERROR(EPERM);
+			goto error_return;
+		}
+	}
+
+	xfs_trans_ijoin(tp, ip);
+
+	/*
+	 * Change file ownership.  Must be the owner or privileged.
+	 */
+	if (mask & FSX_PROJID) {
+		/*
+		 * CAP_FSETID overrides the following restrictions:
+		 *
+		 * The set-user-ID and set-group-ID bits of a file will be
+		 * cleared upon successful return from chown()
+		 */
+		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+		    !capable(CAP_FSETID))
+			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+		/*
+		 * Change the ownerships and register quota modifications
+		 * in the transaction.
+		 */
+		if (xfs_get_projid(ip) != fa->fsx_projid) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+				olddquot = xfs_qm_vop_chown(tp, ip,
+							&ip->i_gdquot, gdqp);
+			}
+			xfs_set_projid(ip, fa->fsx_projid);
+
+			/*
+			 * We may have to rev the inode as well as
+			 * the superblock version number since projids didn't
+			 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+			 */
+			if (ip->i_d.di_version == 1)
+				xfs_bump_ino_vers2(tp, ip);
+		}
+
+	}
+
+	if (mask & FSX_EXTSIZE)
+		ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+	if (mask & FSX_XFLAGS) {
+		xfs_set_diflags(ip, fa->fsx_xflags);
+		xfs_diflags_to_linux(ip);
+	}
+
+	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+	XFS_STATS_INC(xs_ig_attrchg);
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * transaction goes to disk before returning to the user.
+	 * This is slightly sub-optimal in that truncates require
+	 * two sync transactions instead of one for wsync filesystems.
+	 * One for the truncate and one for the timestamps since we
+	 * don't want to change the timestamps unless we're sure the
+	 * truncate worked.  Truncates are less than 1% of the laddis
+	 * mix so this probably isn't worth the trouble to optimize.
+	 */
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
+		xfs_trans_set_sync(tp);
+	code = xfs_trans_commit(tp, 0);
+	xfs_iunlock(ip, lock_flags);
+
+	/*
+	 * Release any dquot(s) the inode had kept before chown.
+	 */
+	xfs_qm_dqrele(olddquot);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
+
+	return code;
+
+ error_return:
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
+	xfs_trans_cancel(tp, 0);
+	if (lock_flags)
+		xfs_iunlock(ip, lock_flags);
+	return code;
+}
+
+STATIC int
+xfs_ioc_fssetxattr(
+	xfs_inode_t		*ip,
+	struct file		*filp,
+	void			__user *arg)
+{
+	struct fsxattr		fa;
+	unsigned int		mask;
+
+	if (copy_from_user(&fa, arg, sizeof(fa)))
+		return -EFAULT;
+
+	mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
+	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+		mask |= FSX_NONBLOCK;
+
+	return -xfs_ioctl_setattr(ip, &fa, mask);
+}
+
+STATIC int
+xfs_ioc_getxflags(
+	xfs_inode_t		*ip,
+	void			__user *arg)
+{
+	unsigned int		flags;
+
+	flags = xfs_di2lxflags(ip->i_d.di_flags);
+	if (copy_to_user(arg, &flags, sizeof(flags)))
+		return -EFAULT;
+	return 0;
+}
+
+STATIC int
+xfs_ioc_setxflags(
+	xfs_inode_t		*ip,
+	struct file		*filp,
+	void			__user *arg)
+{
+	struct fsxattr		fa;
+	unsigned int		flags;
+	unsigned int		mask;
+
+	if (copy_from_user(&flags, arg, sizeof(flags)))
+		return -EFAULT;
+
+	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+		      FS_NOATIME_FL | FS_NODUMP_FL | \
+		      FS_SYNC_FL))
+		return -EOPNOTSUPP;
+
+	mask = FSX_XFLAGS;
+	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+		mask |= FSX_NONBLOCK;
+	fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
+
+	return -xfs_ioctl_setattr(ip, &fa, mask);
+}
+
+STATIC int
+xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
+{
+	struct getbmap __user	*base = *ap;
+
+	/* copy only getbmap portion (not getbmapx) */
+	if (copy_to_user(base, bmv, sizeof(struct getbmap)))
+		return XFS_ERROR(EFAULT);
+
+	*ap += sizeof(struct getbmap);
+	return 0;
+}
+
+STATIC int
+xfs_ioc_getbmap(
+	struct xfs_inode	*ip,
+	int			ioflags,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	struct getbmapx		bmx;
+	int			error;
+
+	if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
+		return -XFS_ERROR(EFAULT);
+
+	if (bmx.bmv_count < 2)
+		return -XFS_ERROR(EINVAL);
+
+	bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+	if (ioflags & IO_INVIS)
+		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
+
+	error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
+			    (struct getbmap *)arg+1);
+	if (error)
+		return -error;
+
+	/* copy back header - only size of getbmap */
+	if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+STATIC int
+xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
+{
+	struct getbmapx __user	*base = *ap;
+
+	if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
+		return XFS_ERROR(EFAULT);
+
+	*ap += sizeof(struct getbmapx);
+	return 0;
+}
+
+STATIC int
+xfs_ioc_getbmapx(
+	struct xfs_inode	*ip,
+	void			__user *arg)
+{
+	struct getbmapx		bmx;
+	int			error;
+
+	if (copy_from_user(&bmx, arg, sizeof(bmx)))
+		return -XFS_ERROR(EFAULT);
+
+	if (bmx.bmv_count < 2)
+		return -XFS_ERROR(EINVAL);
+
+	if (bmx.bmv_iflags & (~BMV_IF_VALID))
+		return -XFS_ERROR(EINVAL);
+
+	error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
+			    (struct getbmapx *)arg+1);
+	if (error)
+		return -error;
+
+	/* copy back header */
+	if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
+		return -XFS_ERROR(EFAULT);
+
+	return 0;
+}
+
+/*
+ * Note: some of the ioctl's return positive numbers as a
+ * byte count indicating success, such as readlink_by_handle.
+ * So we don't "sign flip" like most other routines.  This means
+ * true errors need to be returned as a negative value.
+ */
+long
+xfs_file_ioctl(
+	struct file		*filp,
+	unsigned int		cmd,
+	unsigned long		p)
+{
+	struct inode		*inode = filp->f_path.dentry->d_inode;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	void			__user *arg = (void __user *)p;
+	int			ioflags = 0;
+	int			error;
+
+	if (filp->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
+
+	trace_xfs_file_ioctl(ip);
+
+	switch (cmd) {
+	case FITRIM:
+		return xfs_ioc_trim(mp, arg);
+	case XFS_IOC_ALLOCSP:
+	case XFS_IOC_FREESP:
+	case XFS_IOC_RESVSP:
+	case XFS_IOC_UNRESVSP:
+	case XFS_IOC_ALLOCSP64:
+	case XFS_IOC_FREESP64:
+	case XFS_IOC_RESVSP64:
+	case XFS_IOC_UNRESVSP64:
+	case XFS_IOC_ZERO_RANGE: {
+		xfs_flock64_t		bf;
+
+		if (copy_from_user(&bf, arg, sizeof(bf)))
+			return -XFS_ERROR(EFAULT);
+		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+	}
+	case XFS_IOC_DIOINFO: {
+		struct dioattr	da;
+		xfs_buftarg_t	*target =
+			XFS_IS_REALTIME_INODE(ip) ?
+			mp->m_rtdev_targp : mp->m_ddev_targp;
+
+		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
+
+		if (copy_to_user(arg, &da, sizeof(da)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_FSBULKSTAT_SINGLE:
+	case XFS_IOC_FSBULKSTAT:
+	case XFS_IOC_FSINUMBERS:
+		return xfs_ioc_bulkstat(mp, cmd, arg);
+
+	case XFS_IOC_FSGEOMETRY_V1:
+		return xfs_ioc_fsgeometry_v1(mp, arg);
+
+	case XFS_IOC_FSGEOMETRY:
+		return xfs_ioc_fsgeometry(mp, arg);
+
+	case XFS_IOC_GETVERSION:
+		return put_user(inode->i_generation, (int __user *)arg);
+
+	case XFS_IOC_FSGETXATTR:
+		return xfs_ioc_fsgetxattr(ip, 0, arg);
+	case XFS_IOC_FSGETXATTRA:
+		return xfs_ioc_fsgetxattr(ip, 1, arg);
+	case XFS_IOC_FSSETXATTR:
+		return xfs_ioc_fssetxattr(ip, filp, arg);
+	case XFS_IOC_GETXFLAGS:
+		return xfs_ioc_getxflags(ip, arg);
+	case XFS_IOC_SETXFLAGS:
+		return xfs_ioc_setxflags(ip, filp, arg);
+
+	case XFS_IOC_FSSETDM: {
+		struct fsdmidata	dmi;
+
+		if (copy_from_user(&dmi, arg, sizeof(dmi)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
+				dmi.fsd_dmstate);
+		return -error;
+	}
+
+	case XFS_IOC_GETBMAP:
+	case XFS_IOC_GETBMAPA:
+		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+
+	case XFS_IOC_GETBMAPX:
+		return xfs_ioc_getbmapx(ip, arg);
+
+	case XFS_IOC_FD_TO_HANDLE:
+	case XFS_IOC_PATH_TO_HANDLE:
+	case XFS_IOC_PATH_TO_FSHANDLE: {
+		xfs_fsop_handlereq_t	hreq;
+
+		if (copy_from_user(&hreq, arg, sizeof(hreq)))
+			return -XFS_ERROR(EFAULT);
+		return xfs_find_handle(cmd, &hreq);
+	}
+	case XFS_IOC_OPEN_BY_HANDLE: {
+		xfs_fsop_handlereq_t	hreq;
+
+		if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+			return -XFS_ERROR(EFAULT);
+		return xfs_open_by_handle(filp, &hreq);
+	}
+	case XFS_IOC_FSSETDM_BY_HANDLE:
+		return xfs_fssetdm_by_handle(filp, arg);
+
+	case XFS_IOC_READLINK_BY_HANDLE: {
+		xfs_fsop_handlereq_t	hreq;
+
+		if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+			return -XFS_ERROR(EFAULT);
+		return xfs_readlink_by_handle(filp, &hreq);
+	}
+	case XFS_IOC_ATTRLIST_BY_HANDLE:
+		return xfs_attrlist_by_handle(filp, arg);
+
+	case XFS_IOC_ATTRMULTI_BY_HANDLE:
+		return xfs_attrmulti_by_handle(filp, arg);
+
+	case XFS_IOC_SWAPEXT: {
+		struct xfs_swapext	sxp;
+
+		if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
+			return -XFS_ERROR(EFAULT);
+		error = xfs_swapext(&sxp);
+		return -error;
+	}
+
+	case XFS_IOC_FSCOUNTS: {
+		xfs_fsop_counts_t out;
+
+		error = xfs_fs_counts(mp, &out);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &out, sizeof(out)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_SET_RESBLKS: {
+		xfs_fsop_resblks_t inout;
+		__uint64_t	   in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (mp->m_flags & XFS_MOUNT_RDONLY)
+			return -XFS_ERROR(EROFS);
+
+		if (copy_from_user(&inout, arg, sizeof(inout)))
+			return -XFS_ERROR(EFAULT);
+
+		/* input parameter is passed in resblks field of structure */
+		in = inout.resblks;
+		error = xfs_reserve_blocks(mp, &in, &inout);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &inout, sizeof(inout)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_GET_RESBLKS: {
+		xfs_fsop_resblks_t out;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		error = xfs_reserve_blocks(mp, NULL, &out);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &out, sizeof(out)))
+			return -XFS_ERROR(EFAULT);
+
+		return 0;
+	}
+
+	case XFS_IOC_FSGROWFSDATA: {
+		xfs_growfs_data_t in;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_data(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FSGROWFSLOG: {
+		xfs_growfs_log_t in;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_log(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FSGROWFSRT: {
+		xfs_growfs_rt_t in;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_rt(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_GOINGDOWN: {
+		__uint32_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (get_user(in, (__uint32_t __user *)arg))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_fs_goingdown(mp, in);
+		return -error;
+	}
+
+	case XFS_IOC_ERROR_INJECTION: {
+		xfs_error_injection_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_errortag_add(in.errtag, mp);
+		return -error;
+	}
+
+	case XFS_IOC_ERROR_CLEARALL:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		error = xfs_errortag_clearall(mp, 1);
+		return -error;
+
+	default:
+		return -ENOTTY;
+	}
+}
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
new file mode 100644
index 0000000..d56173b
--- /dev/null
+++ b/fs/xfs/xfs_ioctl.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL_H__
+#define __XFS_IOCTL_H__
+
+extern int
+xfs_ioc_space(
+	struct xfs_inode	*ip,
+	struct inode		*inode,
+	struct file		*filp,
+	int			ioflags,
+	unsigned int		cmd,
+	xfs_flock64_t		*bf);
+
+extern int
+xfs_find_handle(
+	unsigned int		cmd,
+	xfs_fsop_handlereq_t	*hreq);
+
+extern int
+xfs_open_by_handle(
+	struct file		*parfilp,
+	xfs_fsop_handlereq_t	*hreq);
+
+extern int
+xfs_readlink_by_handle(
+	struct file		*parfilp,
+	xfs_fsop_handlereq_t	*hreq);
+
+extern int
+xfs_attrmulti_attr_get(
+	struct inode		*inode,
+	unsigned char		*name,
+	unsigned char		__user *ubuf,
+	__uint32_t		*len,
+	__uint32_t		flags);
+
+extern int
+xfs_attrmulti_attr_set(
+	struct inode		*inode,
+	unsigned char		*name,
+	const unsigned char	__user *ubuf,
+	__uint32_t		len,
+	__uint32_t		flags);
+
+extern int
+xfs_attrmulti_attr_remove(
+	struct inode		*inode,
+	unsigned char		*name,
+	__uint32_t		flags);
+
+extern struct dentry *
+xfs_handle_to_dentry(
+	struct file		*parfilp,
+	void __user		*uhandle,
+	u32			hlen);
+
+extern long
+xfs_file_ioctl(
+	struct file		*filp,
+	unsigned int		cmd,
+	unsigned long		p);
+
+extern long
+xfs_file_compat_ioctl(
+	struct file		*file,
+	unsigned int		cmd,
+	unsigned long		arg);
+
+#endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
new file mode 100644
index 0000000..54e623b
--- /dev/null
+++ b/fs/xfs/xfs_ioctl32.c
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/compat.h>
+#include <linux/ioctl.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_vnode.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
+#include "xfs_dfrag.h"
+#include "xfs_vnodeops.h"
+#include "xfs_fsops.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_attr.h"
+#include "xfs_ioctl.h"
+#include "xfs_ioctl32.h"
+#include "xfs_trace.h"
+
+#define  _NATIVE_IOC(cmd, type) \
+	  _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
+
+#ifdef BROKEN_X86_ALIGNMENT
+STATIC int
+xfs_compat_flock64_copyin(
+	xfs_flock64_t		*bf,
+	compat_xfs_flock64_t	__user *arg32)
+{
+	if (get_user(bf->l_type,	&arg32->l_type) ||
+	    get_user(bf->l_whence,	&arg32->l_whence) ||
+	    get_user(bf->l_start,	&arg32->l_start) ||
+	    get_user(bf->l_len,		&arg32->l_len) ||
+	    get_user(bf->l_sysid,	&arg32->l_sysid) ||
+	    get_user(bf->l_pid,		&arg32->l_pid) ||
+	    copy_from_user(bf->l_pad,	&arg32->l_pad,	4*sizeof(u32)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+STATIC int
+xfs_compat_ioc_fsgeometry_v1(
+	struct xfs_mount	  *mp,
+	compat_xfs_fsop_geom_v1_t __user *arg32)
+{
+	xfs_fsop_geom_t		  fsgeo;
+	int			  error;
+
+	error = xfs_fs_geometry(mp, &fsgeo, 3);
+	if (error)
+		return -error;
+	/* The 32-bit variant simply has some padding at the end */
+	if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+STATIC int
+xfs_compat_growfs_data_copyin(
+	struct xfs_growfs_data	 *in,
+	compat_xfs_growfs_data_t __user *arg32)
+{
+	if (get_user(in->newblocks, &arg32->newblocks) ||
+	    get_user(in->imaxpct,   &arg32->imaxpct))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+STATIC int
+xfs_compat_growfs_rt_copyin(
+	struct xfs_growfs_rt	 *in,
+	compat_xfs_growfs_rt_t	__user *arg32)
+{
+	if (get_user(in->newblocks, &arg32->newblocks) ||
+	    get_user(in->extsize,   &arg32->extsize))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt_compat(
+	void			__user *ubuffer,
+	const xfs_inogrp_t	*buffer,
+	long			count,
+	long			*written)
+{
+	compat_xfs_inogrp_t	__user *p32 = ubuffer;
+	long			i;
+
+	for (i = 0; i < count; i++) {
+		if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
+		    put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
+		    put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
+			return -XFS_ERROR(EFAULT);
+	}
+	*written = count * sizeof(*p32);
+	return 0;
+}
+
+#else
+#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#endif	/* BROKEN_X86_ALIGNMENT */
+
+STATIC int
+xfs_ioctl32_bstime_copyin(
+	xfs_bstime_t		*bstime,
+	compat_xfs_bstime_t	__user *bstime32)
+{
+	compat_time_t		sec32;	/* tv_sec differs on 64 vs. 32 */
+
+	if (get_user(sec32,		&bstime32->tv_sec)	||
+	    get_user(bstime->tv_nsec,	&bstime32->tv_nsec))
+		return -XFS_ERROR(EFAULT);
+	bstime->tv_sec = sec32;
+	return 0;
+}
+
+/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+STATIC int
+xfs_ioctl32_bstat_copyin(
+	xfs_bstat_t		*bstat,
+	compat_xfs_bstat_t	__user *bstat32)
+{
+	if (get_user(bstat->bs_ino,	&bstat32->bs_ino)	||
+	    get_user(bstat->bs_mode,	&bstat32->bs_mode)	||
+	    get_user(bstat->bs_nlink,	&bstat32->bs_nlink)	||
+	    get_user(bstat->bs_uid,	&bstat32->bs_uid)	||
+	    get_user(bstat->bs_gid,	&bstat32->bs_gid)	||
+	    get_user(bstat->bs_rdev,	&bstat32->bs_rdev)	||
+	    get_user(bstat->bs_blksize,	&bstat32->bs_blksize)	||
+	    get_user(bstat->bs_size,	&bstat32->bs_size)	||
+	    xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
+	    xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
+	    xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
+	    get_user(bstat->bs_blocks,	&bstat32->bs_size)	||
+	    get_user(bstat->bs_xflags,	&bstat32->bs_size)	||
+	    get_user(bstat->bs_extsize,	&bstat32->bs_extsize)	||
+	    get_user(bstat->bs_extents,	&bstat32->bs_extents)	||
+	    get_user(bstat->bs_gen,	&bstat32->bs_gen)	||
+	    get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+	    get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
+	    get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask)	||
+	    get_user(bstat->bs_dmstate,	&bstat32->bs_dmstate)	||
+	    get_user(bstat->bs_aextents, &bstat32->bs_aextents))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+/* XFS_IOC_FSBULKSTAT and friends */
+
+STATIC int
+xfs_bstime_store_compat(
+	compat_xfs_bstime_t	__user *p32,
+	const xfs_bstime_t	*p)
+{
+	__s32			sec32;
+
+	sec32 = p->tv_sec;
+	if (put_user(sec32, &p32->tv_sec) ||
+	    put_user(p->tv_nsec, &p32->tv_nsec))
+		return -XFS_ERROR(EFAULT);
+	return 0;
+}
+
+/* Return 0 on success or positive error (to xfs_bulkstat()) */
+STATIC int
+xfs_bulkstat_one_fmt_compat(
+	void			__user *ubuffer,
+	int			ubsize,
+	int			*ubused,
+	const xfs_bstat_t	*buffer)
+{
+	compat_xfs_bstat_t	__user *p32 = ubuffer;
+
+	if (ubsize < sizeof(*p32))
+		return XFS_ERROR(ENOMEM);
+
+	if (put_user(buffer->bs_ino,	  &p32->bs_ino)		||
+	    put_user(buffer->bs_mode,	  &p32->bs_mode)	||
+	    put_user(buffer->bs_nlink,	  &p32->bs_nlink)	||
+	    put_user(buffer->bs_uid,	  &p32->bs_uid)		||
+	    put_user(buffer->bs_gid,	  &p32->bs_gid)		||
+	    put_user(buffer->bs_rdev,	  &p32->bs_rdev)	||
+	    put_user(buffer->bs_blksize,  &p32->bs_blksize)	||
+	    put_user(buffer->bs_size,	  &p32->bs_size)	||
+	    xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
+	    xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
+	    xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
+	    put_user(buffer->bs_blocks,	  &p32->bs_blocks)	||
+	    put_user(buffer->bs_xflags,	  &p32->bs_xflags)	||
+	    put_user(buffer->bs_extsize,  &p32->bs_extsize)	||
+	    put_user(buffer->bs_extents,  &p32->bs_extents)	||
+	    put_user(buffer->bs_gen,	  &p32->bs_gen)		||
+	    put_user(buffer->bs_projid,	  &p32->bs_projid)	||
+	    put_user(buffer->bs_projid_hi,	&p32->bs_projid_hi)	||
+	    put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)	||
+	    put_user(buffer->bs_dmstate,  &p32->bs_dmstate)	||
+	    put_user(buffer->bs_aextents, &p32->bs_aextents))
+		return XFS_ERROR(EFAULT);
+	if (ubused)
+		*ubused = sizeof(*p32);
+	return 0;
+}
+
+STATIC int
+xfs_bulkstat_one_compat(
+	xfs_mount_t	*mp,		/* mount point for filesystem */
+	xfs_ino_t	ino,		/* inode number to get data for */
+	void		__user *buffer,	/* buffer to place output in */
+	int		ubsize,		/* size of buffer */
+	int		*ubused,	/* bytes used by me */
+	int		*stat)		/* BULKSTAT_RV_... */
+{
+	return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
+				    xfs_bulkstat_one_fmt_compat,
+				    ubused, stat);
+}
+
+/* copied from xfs_ioctl.c */
+STATIC int
+xfs_compat_ioc_bulkstat(
+	xfs_mount_t		  *mp,
+	unsigned int		  cmd,
+	compat_xfs_fsop_bulkreq_t __user *p32)
+{
+	u32			addr;
+	xfs_fsop_bulkreq_t	bulkreq;
+	int			count;	/* # of records returned */
+	xfs_ino_t		inlast;	/* last inode number */
+	int			done;
+	int			error;
+
+	/* done = 1 if there are more stats to get and if bulkstat */
+	/* should be called again (unused here, but used in dmapi) */
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -XFS_ERROR(EIO);
+
+	if (get_user(addr, &p32->lastip))
+		return -XFS_ERROR(EFAULT);
+	bulkreq.lastip = compat_ptr(addr);
+	if (get_user(bulkreq.icount, &p32->icount) ||
+	    get_user(addr, &p32->ubuffer))
+		return -XFS_ERROR(EFAULT);
+	bulkreq.ubuffer = compat_ptr(addr);
+	if (get_user(addr, &p32->ocount))
+		return -XFS_ERROR(EFAULT);
+	bulkreq.ocount = compat_ptr(addr);
+
+	if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+		return -XFS_ERROR(EFAULT);
+
+	if ((count = bulkreq.icount) <= 0)
+		return -XFS_ERROR(EINVAL);
+
+	if (bulkreq.ubuffer == NULL)
+		return -XFS_ERROR(EINVAL);
+
+	if (cmd == XFS_IOC_FSINUMBERS_32) {
+		error = xfs_inumbers(mp, &inlast, &count,
+				bulkreq.ubuffer, xfs_inumbers_fmt_compat);
+	} else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
+		int res;
+
+		error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
+				sizeof(compat_xfs_bstat_t), 0, &res);
+	} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
+		error = xfs_bulkstat(mp, &inlast, &count,
+			xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+			bulkreq.ubuffer, &done);
+	} else
+		error = XFS_ERROR(EINVAL);
+	if (error)
+		return -error;
+
+	if (bulkreq.ocount != NULL) {
+		if (copy_to_user(bulkreq.lastip, &inlast,
+						sizeof(xfs_ino_t)))
+			return -XFS_ERROR(EFAULT);
+
+		if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+			return -XFS_ERROR(EFAULT);
+	}
+
+	return 0;
+}
+
+STATIC int
+xfs_compat_handlereq_copyin(
+	xfs_fsop_handlereq_t		*hreq,
+	compat_xfs_fsop_handlereq_t	__user *arg32)
+{
+	compat_xfs_fsop_handlereq_t	hreq32;
+
+	if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	hreq->fd = hreq32.fd;
+	hreq->path = compat_ptr(hreq32.path);
+	hreq->oflags = hreq32.oflags;
+	hreq->ihandle = compat_ptr(hreq32.ihandle);
+	hreq->ihandlen = hreq32.ihandlen;
+	hreq->ohandle = compat_ptr(hreq32.ohandle);
+	hreq->ohandlen = compat_ptr(hreq32.ohandlen);
+
+	return 0;
+}
+
+STATIC struct dentry *
+xfs_compat_handlereq_to_dentry(
+	struct file		*parfilp,
+	compat_xfs_fsop_handlereq_t *hreq)
+{
+	return xfs_handle_to_dentry(parfilp,
+			compat_ptr(hreq->ihandle), hreq->ihandlen);
+}
+
+STATIC int
+xfs_compat_attrlist_by_handle(
+	struct file		*parfilp,
+	void			__user *arg)
+{
+	int			error;
+	attrlist_cursor_kern_t	*cursor;
+	compat_xfs_fsop_attrlist_handlereq_t al_hreq;
+	struct dentry		*dentry;
+	char			*kbuf;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&al_hreq, arg,
+			   sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+	if (al_hreq.buflen > XATTR_LIST_MAX)
+		return -XFS_ERROR(EINVAL);
+
+	/*
+	 * Reject flags, only allow namespaces.
+	 */
+	if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+		return -XFS_ERROR(EINVAL);
+
+	dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	error = -ENOMEM;
+	kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+	if (!kbuf)
+		goto out_dput;
+
+	cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+	error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+					al_hreq.flags, cursor);
+	if (error)
+		goto out_kfree;
+
+	if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
+		error = -EFAULT;
+
+ out_kfree:
+	kfree(kbuf);
+ out_dput:
+	dput(dentry);
+	return error;
+}
+
+STATIC int
+xfs_compat_attrmulti_by_handle(
+	struct file				*parfilp,
+	void					__user *arg)
+{
+	int					error;
+	compat_xfs_attr_multiop_t		*ops;
+	compat_xfs_fsop_attrmulti_handlereq_t	am_hreq;
+	struct dentry				*dentry;
+	unsigned int				i, size;
+	unsigned char				*attr_name;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&am_hreq, arg,
+			   sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	/* overflow check */
+	if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
+		return -E2BIG;
+
+	dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	error = E2BIG;
+	size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
+	if (!size || size > 16 * PAGE_SIZE)
+		goto out_dput;
+
+	ops = memdup_user(compat_ptr(am_hreq.ops), size);
+	if (IS_ERR(ops)) {
+		error = PTR_ERR(ops);
+		goto out_dput;
+	}
+
+	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+	if (!attr_name)
+		goto out_kfree_ops;
+
+	error = 0;
+	for (i = 0; i < am_hreq.opcount; i++) {
+		ops[i].am_error = strncpy_from_user((char *)attr_name,
+				compat_ptr(ops[i].am_attrname),
+				MAXNAMELEN);
+		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+			error = -ERANGE;
+		if (ops[i].am_error < 0)
+			break;
+
+		switch (ops[i].am_opcode) {
+		case ATTR_OP_GET:
+			ops[i].am_error = xfs_attrmulti_attr_get(
+					dentry->d_inode, attr_name,
+					compat_ptr(ops[i].am_attrvalue),
+					&ops[i].am_length, ops[i].am_flags);
+			break;
+		case ATTR_OP_SET:
+			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+			if (ops[i].am_error)
+				break;
+			ops[i].am_error = xfs_attrmulti_attr_set(
+					dentry->d_inode, attr_name,
+					compat_ptr(ops[i].am_attrvalue),
+					ops[i].am_length, ops[i].am_flags);
+			mnt_drop_write(parfilp->f_path.mnt);
+			break;
+		case ATTR_OP_REMOVE:
+			ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+			if (ops[i].am_error)
+				break;
+			ops[i].am_error = xfs_attrmulti_attr_remove(
+					dentry->d_inode, attr_name,
+					ops[i].am_flags);
+			mnt_drop_write(parfilp->f_path.mnt);
+			break;
+		default:
+			ops[i].am_error = EINVAL;
+		}
+	}
+
+	if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
+		error = XFS_ERROR(EFAULT);
+
+	kfree(attr_name);
+ out_kfree_ops:
+	kfree(ops);
+ out_dput:
+	dput(dentry);
+	return -error;
+}
+
+STATIC int
+xfs_compat_fssetdm_by_handle(
+	struct file		*parfilp,
+	void			__user *arg)
+{
+	int			error;
+	struct fsdmidata	fsd;
+	compat_xfs_fsop_setdm_handlereq_t dmhreq;
+	struct dentry		*dentry;
+
+	if (!capable(CAP_MKNOD))
+		return -XFS_ERROR(EPERM);
+	if (copy_from_user(&dmhreq, arg,
+			   sizeof(compat_xfs_fsop_setdm_handlereq_t)))
+		return -XFS_ERROR(EFAULT);
+
+	dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+		error = -XFS_ERROR(EPERM);
+		goto out;
+	}
+
+	if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
+		error = -XFS_ERROR(EFAULT);
+		goto out;
+	}
+
+	error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+				 fsd.fsd_dmstate);
+
+out:
+	dput(dentry);
+	return error;
+}
+
+long
+xfs_file_compat_ioctl(
+	struct file		*filp,
+	unsigned		cmd,
+	unsigned long		p)
+{
+	struct inode		*inode = filp->f_path.dentry->d_inode;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	void			__user *arg = (void __user *)p;
+	int			ioflags = 0;
+	int			error;
+
+	if (filp->f_mode & FMODE_NOCMTIME)
+		ioflags |= IO_INVIS;
+
+	trace_xfs_file_compat_ioctl(ip);
+
+	switch (cmd) {
+	/* No size or alignment issues on any arch */
+	case XFS_IOC_DIOINFO:
+	case XFS_IOC_FSGEOMETRY:
+	case XFS_IOC_FSGETXATTR:
+	case XFS_IOC_FSSETXATTR:
+	case XFS_IOC_FSGETXATTRA:
+	case XFS_IOC_FSSETDM:
+	case XFS_IOC_GETBMAP:
+	case XFS_IOC_GETBMAPA:
+	case XFS_IOC_GETBMAPX:
+	case XFS_IOC_FSCOUNTS:
+	case XFS_IOC_SET_RESBLKS:
+	case XFS_IOC_GET_RESBLKS:
+	case XFS_IOC_FSGROWFSLOG:
+	case XFS_IOC_GOINGDOWN:
+	case XFS_IOC_ERROR_INJECTION:
+	case XFS_IOC_ERROR_CLEARALL:
+		return xfs_file_ioctl(filp, cmd, p);
+#ifndef BROKEN_X86_ALIGNMENT
+	/* These are handled fine if no alignment issues */
+	case XFS_IOC_ALLOCSP:
+	case XFS_IOC_FREESP:
+	case XFS_IOC_RESVSP:
+	case XFS_IOC_UNRESVSP:
+	case XFS_IOC_ALLOCSP64:
+	case XFS_IOC_FREESP64:
+	case XFS_IOC_RESVSP64:
+	case XFS_IOC_UNRESVSP64:
+	case XFS_IOC_FSGEOMETRY_V1:
+	case XFS_IOC_FSGROWFSDATA:
+	case XFS_IOC_FSGROWFSRT:
+	case XFS_IOC_ZERO_RANGE:
+		return xfs_file_ioctl(filp, cmd, p);
+#else
+	case XFS_IOC_ALLOCSP_32:
+	case XFS_IOC_FREESP_32:
+	case XFS_IOC_ALLOCSP64_32:
+	case XFS_IOC_FREESP64_32:
+	case XFS_IOC_RESVSP_32:
+	case XFS_IOC_UNRESVSP_32:
+	case XFS_IOC_RESVSP64_32:
+	case XFS_IOC_UNRESVSP64_32:
+	case XFS_IOC_ZERO_RANGE_32: {
+		struct xfs_flock64	bf;
+
+		if (xfs_compat_flock64_copyin(&bf, arg))
+			return -XFS_ERROR(EFAULT);
+		cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
+		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+	}
+	case XFS_IOC_FSGEOMETRY_V1_32:
+		return xfs_compat_ioc_fsgeometry_v1(mp, arg);
+	case XFS_IOC_FSGROWFSDATA_32: {
+		struct xfs_growfs_data	in;
+
+		if (xfs_compat_growfs_data_copyin(&in, arg))
+			return -XFS_ERROR(EFAULT);
+		error = xfs_growfs_data(mp, &in);
+		return -error;
+	}
+	case XFS_IOC_FSGROWFSRT_32: {
+		struct xfs_growfs_rt	in;
+
+		if (xfs_compat_growfs_rt_copyin(&in, arg))
+			return -XFS_ERROR(EFAULT);
+		error = xfs_growfs_rt(mp, &in);
+		return -error;
+	}
+#endif
+	/* long changes size, but xfs only copiese out 32 bits */
+	case XFS_IOC_GETXFLAGS_32:
+	case XFS_IOC_SETXFLAGS_32:
+	case XFS_IOC_GETVERSION_32:
+		cmd = _NATIVE_IOC(cmd, long);
+		return xfs_file_ioctl(filp, cmd, p);
+	case XFS_IOC_SWAPEXT_32: {
+		struct xfs_swapext	  sxp;
+		struct compat_xfs_swapext __user *sxu = arg;
+
+		/* Bulk copy in up to the sx_stat field, then copy bstat */
+		if (copy_from_user(&sxp, sxu,
+				   offsetof(struct xfs_swapext, sx_stat)) ||
+		    xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
+			return -XFS_ERROR(EFAULT);
+		error = xfs_swapext(&sxp);
+		return -error;
+	}
+	case XFS_IOC_FSBULKSTAT_32:
+	case XFS_IOC_FSBULKSTAT_SINGLE_32:
+	case XFS_IOC_FSINUMBERS_32:
+		return xfs_compat_ioc_bulkstat(mp, cmd, arg);
+	case XFS_IOC_FD_TO_HANDLE_32:
+	case XFS_IOC_PATH_TO_HANDLE_32:
+	case XFS_IOC_PATH_TO_FSHANDLE_32: {
+		struct xfs_fsop_handlereq	hreq;
+
+		if (xfs_compat_handlereq_copyin(&hreq, arg))
+			return -XFS_ERROR(EFAULT);
+		cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
+		return xfs_find_handle(cmd, &hreq);
+	}
+	case XFS_IOC_OPEN_BY_HANDLE_32: {
+		struct xfs_fsop_handlereq	hreq;
+
+		if (xfs_compat_handlereq_copyin(&hreq, arg))
+			return -XFS_ERROR(EFAULT);
+		return xfs_open_by_handle(filp, &hreq);
+	}
+	case XFS_IOC_READLINK_BY_HANDLE_32: {
+		struct xfs_fsop_handlereq	hreq;
+
+		if (xfs_compat_handlereq_copyin(&hreq, arg))
+			return -XFS_ERROR(EFAULT);
+		return xfs_readlink_by_handle(filp, &hreq);
+	}
+	case XFS_IOC_ATTRLIST_BY_HANDLE_32:
+		return xfs_compat_attrlist_by_handle(filp, arg);
+	case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
+		return xfs_compat_attrmulti_by_handle(filp, arg);
+	case XFS_IOC_FSSETDM_BY_HANDLE_32:
+		return xfs_compat_fssetdm_by_handle(filp, arg);
+	default:
+		return -XFS_ERROR(ENOIOCTLCMD);
+	}
+}
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
new file mode 100644
index 0000000..80f4060
--- /dev/null
+++ b/fs/xfs/xfs_ioctl32.h
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL32_H__
+#define __XFS_IOCTL32_H__
+
+#include <linux/compat.h>
+
+/*
+ * on 32-bit arches, ioctl argument structures may have different sizes
+ * and/or alignment.  We define compat structures which match the
+ * 32-bit sizes/alignments here, and their associated ioctl numbers.
+ *
+ * xfs_ioctl32.c contains routines to copy these structures in and out.
+ */
+
+/* stock kernel-level ioctls we support */
+#define XFS_IOC_GETXFLAGS_32	FS_IOC32_GETFLAGS
+#define XFS_IOC_SETXFLAGS_32	FS_IOC32_SETFLAGS
+#define XFS_IOC_GETVERSION_32	FS_IOC32_GETVERSION
+
+/*
+ * On intel, even if sizes match, alignment and/or padding may differ.
+ */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#define __compat_packed __attribute__((packed))
+#else
+#define __compat_packed
+#endif
+
+typedef struct compat_xfs_bstime {
+	compat_time_t	tv_sec;		/* seconds		*/
+	__s32		tv_nsec;	/* and nanoseconds	*/
+} compat_xfs_bstime_t;
+
+typedef struct compat_xfs_bstat {
+	__u64		bs_ino;		/* inode number			*/
+	__u16		bs_mode;	/* type and mode		*/
+	__u16		bs_nlink;	/* number of links		*/
+	__u32		bs_uid;		/* user id			*/
+	__u32		bs_gid;		/* group id			*/
+	__u32		bs_rdev;	/* device value			*/
+	__s32		bs_blksize;	/* block size			*/
+	__s64		bs_size;	/* file size			*/
+	compat_xfs_bstime_t bs_atime;	/* access time			*/
+	compat_xfs_bstime_t bs_mtime;	/* modify time			*/
+	compat_xfs_bstime_t bs_ctime;	/* inode change time		*/
+	int64_t		bs_blocks;	/* number of blocks		*/
+	__u32		bs_xflags;	/* extended flags		*/
+	__s32		bs_extsize;	/* extent size			*/
+	__s32		bs_extents;	/* number of extents		*/
+	__u32		bs_gen;		/* generation count		*/
+	__u16		bs_projid_lo;	/* lower part of project id	*/
+#define	bs_projid	bs_projid_lo	/* (previously just bs_projid)	*/
+	__u16		bs_projid_hi;	/* high part of project id	*/
+	unsigned char	bs_pad[12];	/* pad space, unused		*/
+	__u32		bs_dmevmask;	/* DMIG event mask		*/
+	__u16		bs_dmstate;	/* DMIG state info		*/
+	__u16		bs_aextents;	/* attribute number of extents	*/
+} __compat_packed compat_xfs_bstat_t;
+
+typedef struct compat_xfs_fsop_bulkreq {
+	compat_uptr_t	lastip;		/* last inode # pointer		*/
+	__s32		icount;		/* count of entries in buffer	*/
+	compat_uptr_t	ubuffer;	/* user buffer for inode desc.	*/
+	compat_uptr_t	ocount;		/* output count pointer		*/
+} compat_xfs_fsop_bulkreq_t;
+
+#define XFS_IOC_FSBULKSTAT_32 \
+	_IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+	_IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+	_IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+
+typedef struct compat_xfs_fsop_handlereq {
+	__u32		fd;		/* fd for FD_TO_HANDLE		*/
+	compat_uptr_t	path;		/* user pathname		*/
+	__u32		oflags;		/* open flags			*/
+	compat_uptr_t	ihandle;	/* user supplied handle		*/
+	__u32		ihandlen;	/* user supplied length		*/
+	compat_uptr_t	ohandle;	/* user buffer for handle	*/
+	compat_uptr_t	ohandlen;	/* user buffer length		*/
+} compat_xfs_fsop_handlereq_t;
+
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+	_IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+	_IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+	_IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+	_IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+	_IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+
+/* The bstat field in the swapext struct needs translation */
+typedef struct compat_xfs_swapext {
+	__int64_t		sx_version;	/* version */
+	__int64_t		sx_fdtarget;	/* fd of target file */
+	__int64_t		sx_fdtmp;	/* fd of tmp file */
+	xfs_off_t		sx_offset;	/* offset into file */
+	xfs_off_t		sx_length;	/* leng from offset */
+	char			sx_pad[16];	/* pad space, unused */
+	compat_xfs_bstat_t	sx_stat;	/* stat of target b4 copy */
+} __compat_packed compat_xfs_swapext_t;
+
+#define XFS_IOC_SWAPEXT_32	_IOWR('X', 109, struct compat_xfs_swapext)
+
+typedef struct compat_xfs_fsop_attrlist_handlereq {
+	struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+	struct xfs_attrlist_cursor	pos; /* opaque cookie, list offset */
+	__u32				flags;	/* which namespace to use */
+	__u32				buflen;	/* length of buffer supplied */
+	compat_uptr_t			buffer;	/* returned names */
+} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
+
+/* Note: actually this is read/write */
+#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
+	_IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
+
+/* am_opcodes defined in xfs_fs.h */
+typedef struct compat_xfs_attr_multiop {
+	__u32		am_opcode;
+	__s32		am_error;
+	compat_uptr_t	am_attrname;
+	compat_uptr_t	am_attrvalue;
+	__u32		am_length;
+	__u32		am_flags;
+} compat_xfs_attr_multiop_t;
+
+typedef struct compat_xfs_fsop_attrmulti_handlereq {
+	struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+	__u32				opcount;/* count of following multiop */
+	/* ptr to compat_xfs_attr_multiop */
+	compat_uptr_t			ops; /* attr_multi data */
+} compat_xfs_fsop_attrmulti_handlereq_t;
+
+#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
+	_IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
+
+typedef struct compat_xfs_fsop_setdm_handlereq {
+	struct compat_xfs_fsop_handlereq hreq;	/* handle information   */
+	/* ptr to struct fsdmidata */
+	compat_uptr_t			data;	/* DMAPI data   */
+} compat_xfs_fsop_setdm_handlereq_t;
+
+#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
+	_IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
+
+#ifdef BROKEN_X86_ALIGNMENT
+/* on ia32 l_start is on a 32-bit boundary */
+typedef struct compat_xfs_flock64 {
+	__s16		l_type;
+	__s16		l_whence;
+	__s64		l_start	__attribute__((packed));
+			/* len == 0 means until end of file */
+	__s64		l_len __attribute__((packed));
+	__s32		l_sysid;
+	__u32		l_pid;
+	__s32		l_pad[4];	/* reserve area */
+} compat_xfs_flock64_t;
+
+#define XFS_IOC_ALLOCSP_32	_IOW('X', 10, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP_32	_IOW('X', 11, struct compat_xfs_flock64)
+#define XFS_IOC_ALLOCSP64_32	_IOW('X', 36, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP64_32	_IOW('X', 37, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP_32	_IOW('X', 40, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP_32	_IOW('X', 41, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP64_32	_IOW('X', 42, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP64_32	_IOW('X', 43, struct compat_xfs_flock64)
+#define XFS_IOC_ZERO_RANGE_32	_IOW('X', 57, struct compat_xfs_flock64)
+
+typedef struct compat_xfs_fsop_geom_v1 {
+	__u32		blocksize;	/* filesystem (data) block size */
+	__u32		rtextsize;	/* realtime extent size		*/
+	__u32		agblocks;	/* fsblocks in an AG		*/
+	__u32		agcount;	/* number of allocation groups	*/
+	__u32		logblocks;	/* fsblocks in the log		*/
+	__u32		sectsize;	/* (data) sector size, bytes	*/
+	__u32		inodesize;	/* inode size in bytes		*/
+	__u32		imaxpct;	/* max allowed inode space(%)	*/
+	__u64		datablocks;	/* fsblocks in data subvolume	*/
+	__u64		rtblocks;	/* fsblocks in realtime subvol	*/
+	__u64		rtextents;	/* rt extents in realtime subvol*/
+	__u64		logstart;	/* starting fsblock of the log	*/
+	unsigned char	uuid[16];	/* unique id of the filesystem	*/
+	__u32		sunit;		/* stripe unit, fsblocks	*/
+	__u32		swidth;		/* stripe width, fsblocks	*/
+	__s32		version;	/* structure version		*/
+	__u32		flags;		/* superblock version flags	*/
+	__u32		logsectsize;	/* log sector size, bytes	*/
+	__u32		rtsectsize;	/* realtime sector size, bytes	*/
+	__u32		dirblocksize;	/* directory block size, bytes	*/
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+
+#define XFS_IOC_FSGEOMETRY_V1_32  \
+	_IOR('X', 100, struct compat_xfs_fsop_geom_v1)
+
+typedef struct compat_xfs_inogrp {
+	__u64		xi_startino;	/* starting inode number	*/
+	__s32		xi_alloccount;	/* # bits set in allocmask	*/
+	__u64		xi_allocmask;	/* mask of allocated inodes	*/
+} __attribute__((packed)) compat_xfs_inogrp_t;
+
+/* These growfs input structures have padding on the end, so must translate */
+typedef struct compat_xfs_growfs_data {
+	__u64		newblocks;	/* new data subvol size, fsblocks */
+	__u32		imaxpct;	/* new inode space percentage limit */
+} __attribute__((packed)) compat_xfs_growfs_data_t;
+
+typedef struct compat_xfs_growfs_rt {
+	__u64		newblocks;	/* new realtime size, fsblocks */
+	__u32		extsize;	/* new realtime extent size, fsblocks */
+} __attribute__((packed)) compat_xfs_growfs_rt_t;
+
+#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
+#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
+
+#endif /* BROKEN_X86_ALIGNMENT */
+
+#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
new file mode 100644
index 0000000..b9c172b
--- /dev/null
+++ b/fs/xfs/xfs_iops.c
@@ -0,0 +1,1210 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_acl.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+
+#include <linux/capability.h>
+#include <linux/xattr.h>
+#include <linux/namei.h>
+#include <linux/posix_acl.h>
+#include <linux/security.h>
+#include <linux/fiemap.h>
+#include <linux/slab.h>
+
+/*
+ * Bring the timestamps in the XFS inode uptodate.
+ *
+ * Used before writing the inode to disk.
+ */
+void
+xfs_synchronize_times(
+	xfs_inode_t	*ip)
+{
+	struct inode	*inode = VFS_I(ip);
+
+	ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+	ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+	ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
+	ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
+	ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
+	ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
+}
+
+/*
+ * If the linux inode is valid, mark it dirty.
+ * Used when committing a dirty inode into a transaction so that
+ * the inode will get written back by the linux code
+ */
+void
+xfs_mark_inode_dirty_sync(
+	xfs_inode_t	*ip)
+{
+	struct inode	*inode = VFS_I(ip);
+
+	if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
+		mark_inode_dirty_sync(inode);
+}
+
+void
+xfs_mark_inode_dirty(
+	xfs_inode_t	*ip)
+{
+	struct inode	*inode = VFS_I(ip);
+
+	if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
+		mark_inode_dirty(inode);
+}
+
+/*
+ * Hook in SELinux.  This is not quite correct yet, what we really need
+ * here (as we do for default ACLs) is a mechanism by which creation of
+ * these attrs can be journalled at inode creation time (along with the
+ * inode, of course, such that log replay can't cause these to be lost).
+ */
+STATIC int
+xfs_init_security(
+	struct inode	*inode,
+	struct inode	*dir,
+	const struct qstr *qstr)
+{
+	struct xfs_inode *ip = XFS_I(inode);
+	size_t		length;
+	void		*value;
+	unsigned char	*name;
+	int		error;
+
+	error = security_inode_init_security(inode, dir, qstr, (char **)&name,
+					     &value, &length);
+	if (error) {
+		if (error == -EOPNOTSUPP)
+			return 0;
+		return -error;
+	}
+
+	error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
+
+	kfree(name);
+	kfree(value);
+	return error;
+}
+
+static void
+xfs_dentry_to_name(
+	struct xfs_name	*namep,
+	struct dentry	*dentry)
+{
+	namep->name = dentry->d_name.name;
+	namep->len = dentry->d_name.len;
+}
+
+STATIC void
+xfs_cleanup_inode(
+	struct inode	*dir,
+	struct inode	*inode,
+	struct dentry	*dentry)
+{
+	struct xfs_name	teardown;
+
+	/* Oh, the horror.
+	 * If we can't add the ACL or we fail in
+	 * xfs_init_security we must back out.
+	 * ENOSPC can hit here, among other things.
+	 */
+	xfs_dentry_to_name(&teardown, dentry);
+
+	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
+	iput(inode);
+}
+
+STATIC int
+xfs_vn_mknod(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	int		mode,
+	dev_t		rdev)
+{
+	struct inode	*inode;
+	struct xfs_inode *ip = NULL;
+	struct posix_acl *default_acl = NULL;
+	struct xfs_name	name;
+	int		error;
+
+	/*
+	 * Irix uses Missed'em'V split, but doesn't want to see
+	 * the upper 5 bits of (14bit) major.
+	 */
+	if (S_ISCHR(mode) || S_ISBLK(mode)) {
+		if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
+			return -EINVAL;
+		rdev = sysv_encode_dev(rdev);
+	} else {
+		rdev = 0;
+	}
+
+	if (IS_POSIXACL(dir)) {
+		default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+		if (IS_ERR(default_acl))
+			return PTR_ERR(default_acl);
+
+		if (!default_acl)
+			mode &= ~current_umask();
+	}
+
+	xfs_dentry_to_name(&name, dentry);
+	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+	if (unlikely(error))
+		goto out_free_acl;
+
+	inode = VFS_I(ip);
+
+	error = xfs_init_security(inode, dir, &dentry->d_name);
+	if (unlikely(error))
+		goto out_cleanup_inode;
+
+	if (default_acl) {
+		error = -xfs_inherit_acl(inode, default_acl);
+		default_acl = NULL;
+		if (unlikely(error))
+			goto out_cleanup_inode;
+	}
+
+
+	d_instantiate(dentry, inode);
+	return -error;
+
+ out_cleanup_inode:
+	xfs_cleanup_inode(dir, inode, dentry);
+ out_free_acl:
+	posix_acl_release(default_acl);
+	return -error;
+}
+
+STATIC int
+xfs_vn_create(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	int		mode,
+	struct nameidata *nd)
+{
+	return xfs_vn_mknod(dir, dentry, mode, 0);
+}
+
+STATIC int
+xfs_vn_mkdir(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	int		mode)
+{
+	return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
+}
+
+STATIC struct dentry *
+xfs_vn_lookup(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	struct nameidata *nd)
+{
+	struct xfs_inode *cip;
+	struct xfs_name	name;
+	int		error;
+
+	if (dentry->d_name.len >= MAXNAMELEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	xfs_dentry_to_name(&name, dentry);
+	error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
+	if (unlikely(error)) {
+		if (unlikely(error != ENOENT))
+			return ERR_PTR(-error);
+		d_add(dentry, NULL);
+		return NULL;
+	}
+
+	return d_splice_alias(VFS_I(cip), dentry);
+}
+
+STATIC struct dentry *
+xfs_vn_ci_lookup(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	struct nameidata *nd)
+{
+	struct xfs_inode *ip;
+	struct xfs_name	xname;
+	struct xfs_name ci_name;
+	struct qstr	dname;
+	int		error;
+
+	if (dentry->d_name.len >= MAXNAMELEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	xfs_dentry_to_name(&xname, dentry);
+	error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+	if (unlikely(error)) {
+		if (unlikely(error != ENOENT))
+			return ERR_PTR(-error);
+		/*
+		 * call d_add(dentry, NULL) here when d_drop_negative_children
+		 * is called in xfs_vn_mknod (ie. allow negative dentries
+		 * with CI filesystems).
+		 */
+		return NULL;
+	}
+
+	/* if exact match, just splice and exit */
+	if (!ci_name.name)
+		return d_splice_alias(VFS_I(ip), dentry);
+
+	/* else case-insensitive match... */
+	dname.name = ci_name.name;
+	dname.len = ci_name.len;
+	dentry = d_add_ci(dentry, VFS_I(ip), &dname);
+	kmem_free(ci_name.name);
+	return dentry;
+}
+
+STATIC int
+xfs_vn_link(
+	struct dentry	*old_dentry,
+	struct inode	*dir,
+	struct dentry	*dentry)
+{
+	struct inode	*inode = old_dentry->d_inode;
+	struct xfs_name	name;
+	int		error;
+
+	xfs_dentry_to_name(&name, dentry);
+
+	error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
+	if (unlikely(error))
+		return -error;
+
+	ihold(inode);
+	d_instantiate(dentry, inode);
+	return 0;
+}
+
+STATIC int
+xfs_vn_unlink(
+	struct inode	*dir,
+	struct dentry	*dentry)
+{
+	struct xfs_name	name;
+	int		error;
+
+	xfs_dentry_to_name(&name, dentry);
+
+	error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+	if (error)
+		return error;
+
+	/*
+	 * With unlink, the VFS makes the dentry "negative": no inode,
+	 * but still hashed. This is incompatible with case-insensitive
+	 * mode, so invalidate (unhash) the dentry in CI-mode.
+	 */
+	if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+		d_invalidate(dentry);
+	return 0;
+}
+
+STATIC int
+xfs_vn_symlink(
+	struct inode	*dir,
+	struct dentry	*dentry,
+	const char	*symname)
+{
+	struct inode	*inode;
+	struct xfs_inode *cip = NULL;
+	struct xfs_name	name;
+	int		error;
+	mode_t		mode;
+
+	mode = S_IFLNK |
+		(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
+	xfs_dentry_to_name(&name, dentry);
+
+	error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
+	if (unlikely(error))
+		goto out;
+
+	inode = VFS_I(cip);
+
+	error = xfs_init_security(inode, dir, &dentry->d_name);
+	if (unlikely(error))
+		goto out_cleanup_inode;
+
+	d_instantiate(dentry, inode);
+	return 0;
+
+ out_cleanup_inode:
+	xfs_cleanup_inode(dir, inode, dentry);
+ out:
+	return -error;
+}
+
+STATIC int
+xfs_vn_rename(
+	struct inode	*odir,
+	struct dentry	*odentry,
+	struct inode	*ndir,
+	struct dentry	*ndentry)
+{
+	struct inode	*new_inode = ndentry->d_inode;
+	struct xfs_name	oname;
+	struct xfs_name	nname;
+
+	xfs_dentry_to_name(&oname, odentry);
+	xfs_dentry_to_name(&nname, ndentry);
+
+	return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+			   XFS_I(ndir), &nname, new_inode ?
+			   			XFS_I(new_inode) : NULL);
+}
+
+/*
+ * careful here - this function can get called recursively, so
+ * we need to be very careful about how much stack we use.
+ * uio is kmalloced for this reason...
+ */
+STATIC void *
+xfs_vn_follow_link(
+	struct dentry		*dentry,
+	struct nameidata	*nd)
+{
+	char			*link;
+	int			error = -ENOMEM;
+
+	link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+	if (!link)
+		goto out_err;
+
+	error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+	if (unlikely(error))
+		goto out_kfree;
+
+	nd_set_link(nd, link);
+	return NULL;
+
+ out_kfree:
+	kfree(link);
+ out_err:
+	nd_set_link(nd, ERR_PTR(error));
+	return NULL;
+}
+
+STATIC void
+xfs_vn_put_link(
+	struct dentry	*dentry,
+	struct nameidata *nd,
+	void		*p)
+{
+	char		*s = nd_get_link(nd);
+
+	if (!IS_ERR(s))
+		kfree(s);
+}
+
+STATIC int
+xfs_vn_getattr(
+	struct vfsmount		*mnt,
+	struct dentry		*dentry,
+	struct kstat		*stat)
+{
+	struct inode		*inode = dentry->d_inode;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+
+	trace_xfs_getattr(ip);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	stat->size = XFS_ISIZE(ip);
+	stat->dev = inode->i_sb->s_dev;
+	stat->mode = ip->i_d.di_mode;
+	stat->nlink = ip->i_d.di_nlink;
+	stat->uid = ip->i_d.di_uid;
+	stat->gid = ip->i_d.di_gid;
+	stat->ino = ip->i_ino;
+	stat->atime = inode->i_atime;
+	stat->mtime = inode->i_mtime;
+	stat->ctime = inode->i_ctime;
+	stat->blocks =
+		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		stat->blksize = BLKDEV_IOSIZE;
+		stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+				   sysv_minor(ip->i_df.if_u2.if_rdev));
+		break;
+	default:
+		if (XFS_IS_REALTIME_INODE(ip)) {
+			/*
+			 * If the file blocks are being allocated from a
+			 * realtime volume, then return the inode's realtime
+			 * extent size or the realtime volume's extent size.
+			 */
+			stat->blksize =
+				xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
+		} else
+			stat->blksize = xfs_preferred_iosize(mp);
+		stat->rdev = 0;
+		break;
+	}
+
+	return 0;
+}
+
+int
+xfs_setattr_nonsize(
+	struct xfs_inode	*ip,
+	struct iattr		*iattr,
+	int			flags)
+{
+	xfs_mount_t		*mp = ip->i_mount;
+	struct inode		*inode = VFS_I(ip);
+	int			mask = iattr->ia_valid;
+	xfs_trans_t		*tp;
+	int			error;
+	uid_t			uid = 0, iuid = 0;
+	gid_t			gid = 0, igid = 0;
+	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
+	struct xfs_dquot	*olddquot1 = NULL, *olddquot2 = NULL;
+
+	trace_xfs_setattr(ip);
+
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return XFS_ERROR(EROFS);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	error = -inode_change_ok(inode, iattr);
+	if (error)
+		return XFS_ERROR(error);
+
+	ASSERT((mask & ATTR_SIZE) == 0);
+
+	/*
+	 * If disk quotas is on, we make sure that the dquots do exist on disk,
+	 * before we start any other transactions. Trying to do this later
+	 * is messy. We don't care to take a readlock to look at the ids
+	 * in inode here, because we can't hold it across the trans_reserve.
+	 * If the IDs do change before we take the ilock, we're covered
+	 * because the i_*dquot fields will get updated anyway.
+	 */
+	if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
+		uint	qflags = 0;
+
+		if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+			uid = iattr->ia_uid;
+			qflags |= XFS_QMOPT_UQUOTA;
+		} else {
+			uid = ip->i_d.di_uid;
+		}
+		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+			gid = iattr->ia_gid;
+			qflags |= XFS_QMOPT_GQUOTA;
+		}  else {
+			gid = ip->i_d.di_gid;
+		}
+
+		/*
+		 * We take a reference when we initialize udqp and gdqp,
+		 * so it is important that we never blindly double trip on
+		 * the same variable. See xfs_create() for an example.
+		 */
+		ASSERT(udqp == NULL);
+		ASSERT(gdqp == NULL);
+		error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
+					 qflags, &udqp, &gdqp);
+		if (error)
+			return error;
+	}
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+	if (error)
+		goto out_dqrele;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * Change file ownership.  Must be the owner or privileged.
+	 */
+	if (mask & (ATTR_UID|ATTR_GID)) {
+		/*
+		 * These IDs could have changed since we last looked at them.
+		 * But, we're assured that if the ownership did change
+		 * while we didn't have the inode locked, inode's dquot(s)
+		 * would have changed also.
+		 */
+		iuid = ip->i_d.di_uid;
+		igid = ip->i_d.di_gid;
+		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+
+		/*
+		 * Do a quota reservation only if uid/gid is actually
+		 * going to change.
+		 */
+		if (XFS_IS_QUOTA_RUNNING(mp) &&
+		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+			ASSERT(tp);
+			error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+						capable(CAP_FOWNER) ?
+						XFS_QMOPT_FORCE_RES : 0);
+			if (error)	/* out of quota */
+				goto out_trans_cancel;
+		}
+	}
+
+	xfs_trans_ijoin(tp, ip);
+
+	/*
+	 * Change file ownership.  Must be the owner or privileged.
+	 */
+	if (mask & (ATTR_UID|ATTR_GID)) {
+		/*
+		 * CAP_FSETID overrides the following restrictions:
+		 *
+		 * The set-user-ID and set-group-ID bits of a file will be
+		 * cleared upon successful return from chown()
+		 */
+		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+		    !capable(CAP_FSETID))
+			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+		/*
+		 * Change the ownerships and register quota modifications
+		 * in the transaction.
+		 */
+		if (iuid != uid) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+				ASSERT(mask & ATTR_UID);
+				ASSERT(udqp);
+				olddquot1 = xfs_qm_vop_chown(tp, ip,
+							&ip->i_udquot, udqp);
+			}
+			ip->i_d.di_uid = uid;
+			inode->i_uid = uid;
+		}
+		if (igid != gid) {
+			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+				ASSERT(!XFS_IS_PQUOTA_ON(mp));
+				ASSERT(mask & ATTR_GID);
+				ASSERT(gdqp);
+				olddquot2 = xfs_qm_vop_chown(tp, ip,
+							&ip->i_gdquot, gdqp);
+			}
+			ip->i_d.di_gid = gid;
+			inode->i_gid = gid;
+		}
+	}
+
+	/*
+	 * Change file access modes.
+	 */
+	if (mask & ATTR_MODE) {
+		umode_t mode = iattr->ia_mode;
+
+		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+			mode &= ~S_ISGID;
+
+		ip->i_d.di_mode &= S_IFMT;
+		ip->i_d.di_mode |= mode & ~S_IFMT;
+
+		inode->i_mode &= S_IFMT;
+		inode->i_mode |= mode & ~S_IFMT;
+	}
+
+	/*
+	 * Change file access or modified times.
+	 */
+	if (mask & ATTR_ATIME) {
+		inode->i_atime = iattr->ia_atime;
+		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+		ip->i_update_core = 1;
+	}
+	if (mask & ATTR_CTIME) {
+		inode->i_ctime = iattr->ia_ctime;
+		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+		ip->i_update_core = 1;
+	}
+	if (mask & ATTR_MTIME) {
+		inode->i_mtime = iattr->ia_mtime;
+		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+		ip->i_update_core = 1;
+	}
+
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+	XFS_STATS_INC(xs_ig_attrchg);
+
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
+		xfs_trans_set_sync(tp);
+	error = xfs_trans_commit(tp, 0);
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * Release any dquot(s) the inode had kept before chown.
+	 */
+	xfs_qm_dqrele(olddquot1);
+	xfs_qm_dqrele(olddquot2);
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
+
+	if (error)
+		return XFS_ERROR(error);
+
+	/*
+	 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+	 * 	     update.  We could avoid this with linked transactions
+	 * 	     and passing down the transaction pointer all the way
+	 *	     to attr_set.  No previous user of the generic
+	 * 	     Posix ACL code seems to care about this issue either.
+	 */
+	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+		error = -xfs_acl_chmod(inode);
+		if (error)
+			return XFS_ERROR(error);
+	}
+
+	return 0;
+
+out_trans_cancel:
+	xfs_trans_cancel(tp, 0);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_dqrele:
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
+	return error;
+}
+
+/*
+ * Truncate file.  Must have write permission and not be a directory.
+ */
+int
+xfs_setattr_size(
+	struct xfs_inode	*ip,
+	struct iattr		*iattr,
+	int			flags)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct inode		*inode = VFS_I(ip);
+	int			mask = iattr->ia_valid;
+	struct xfs_trans	*tp;
+	int			error;
+	uint			lock_flags;
+	uint			commit_flags = 0;
+
+	trace_xfs_setattr(ip);
+
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return XFS_ERROR(EROFS);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	error = -inode_change_ok(inode, iattr);
+	if (error)
+		return XFS_ERROR(error);
+
+	ASSERT(S_ISREG(ip->i_d.di_mode));
+	ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+			ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+			ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+
+	lock_flags = XFS_ILOCK_EXCL;
+	if (!(flags & XFS_ATTR_NOLOCK))
+		lock_flags |= XFS_IOLOCK_EXCL;
+	xfs_ilock(ip, lock_flags);
+
+	/*
+	 * Short circuit the truncate case for zero length files.
+	 */
+	if (iattr->ia_size == 0 &&
+	    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+		if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+			goto out_unlock;
+
+		/*
+		 * Use the regular setattr path to update the timestamps.
+		 */
+		xfs_iunlock(ip, lock_flags);
+		iattr->ia_valid &= ~ATTR_SIZE;
+		return xfs_setattr_nonsize(ip, iattr, 0);
+	}
+
+	/*
+	 * Make sure that the dquots are attached to the inode.
+	 */
+	error = xfs_qm_dqattach_locked(ip, 0);
+	if (error)
+		goto out_unlock;
+
+	/*
+	 * Now we can make the changes.  Before we join the inode to the
+	 * transaction, take care of the part of the truncation that must be
+	 * done without the inode lock.  This needs to be done before joining
+	 * the inode to the transaction, because the inode cannot be unlocked
+	 * once it is a part of the transaction.
+	 */
+	if (iattr->ia_size > ip->i_size) {
+		/*
+		 * Do the first part of growing a file: zero any data in the
+		 * last block that is beyond the old EOF.  We need to do this
+		 * before the inode is joined to the transaction to modify
+		 * i_size.
+		 */
+		error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+		if (error)
+			goto out_unlock;
+	}
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	lock_flags &= ~XFS_ILOCK_EXCL;
+
+	/*
+	 * We are going to log the inode size change in this transaction so
+	 * any previous writes that are beyond the on disk EOF and the new
+	 * EOF that have not been written out need to be written here.  If we
+	 * do not write the data out, we expose ourselves to the null files
+	 * problem.
+	 *
+	 * Only flush from the on disk size to the smaller of the in memory
+	 * file size or the new size as that's the range we really care about
+	 * here and prevents waiting for other data not within the range we
+	 * care about here.
+	 */
+	if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
+		error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
+					XBF_ASYNC, FI_NONE);
+		if (error)
+			goto out_unlock;
+	}
+
+	/*
+	 * Wait for all I/O to complete.
+	 */
+	xfs_ioend_wait(ip);
+
+	error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+				     xfs_get_blocks);
+	if (error)
+		goto out_unlock;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+				 XFS_TRANS_PERM_LOG_RES,
+				 XFS_ITRUNCATE_LOG_COUNT);
+	if (error)
+		goto out_trans_cancel;
+
+	truncate_setsize(inode, iattr->ia_size);
+
+	commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+	lock_flags |= XFS_ILOCK_EXCL;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+	xfs_trans_ijoin(tp, ip);
+
+	/*
+	 * Only change the c/mtime if we are changing the size or we are
+	 * explicitly asked to change it.  This handles the semantic difference
+	 * between truncate() and ftruncate() as implemented in the VFS.
+	 *
+	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+	 * special case where we need to update the times despite not having
+	 * these flags set.  For all other operations the VFS set these flags
+	 * explicitly if it wants a timestamp update.
+	 */
+	if (iattr->ia_size != ip->i_size &&
+	    (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+		iattr->ia_ctime = iattr->ia_mtime =
+			current_fs_time(inode->i_sb);
+		mask |= ATTR_CTIME | ATTR_MTIME;
+	}
+
+	if (iattr->ia_size > ip->i_size) {
+		ip->i_d.di_size = iattr->ia_size;
+		ip->i_size = iattr->ia_size;
+	} else if (iattr->ia_size <= ip->i_size ||
+		   (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
+		error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
+		if (error)
+			goto out_trans_abort;
+
+		/*
+		 * Truncated "down", so we're removing references to old data
+		 * here - if we delay flushing for a long time, we expose
+		 * ourselves unduly to the notorious NULL files problem.  So,
+		 * we mark this inode and flush it when the file is closed,
+		 * and do not wait the usual (long) time for writeout.
+		 */
+		xfs_iflags_set(ip, XFS_ITRUNCATED);
+	}
+
+	if (mask & ATTR_CTIME) {
+		inode->i_ctime = iattr->ia_ctime;
+		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+		ip->i_update_core = 1;
+	}
+	if (mask & ATTR_MTIME) {
+		inode->i_mtime = iattr->ia_mtime;
+		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+		ip->i_update_core = 1;
+	}
+
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+	XFS_STATS_INC(xs_ig_attrchg);
+
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
+		xfs_trans_set_sync(tp);
+
+	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+out_unlock:
+	if (lock_flags)
+		xfs_iunlock(ip, lock_flags);
+	return error;
+
+out_trans_abort:
+	commit_flags |= XFS_TRANS_ABORT;
+out_trans_cancel:
+	xfs_trans_cancel(tp, commit_flags);
+	goto out_unlock;
+}
+
+STATIC int
+xfs_vn_setattr(
+	struct dentry	*dentry,
+	struct iattr	*iattr)
+{
+	if (iattr->ia_valid & ATTR_SIZE)
+		return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
+	return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
+}
+
+#define XFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+
+/*
+ * Call fiemap helper to fill in user data.
+ * Returns positive errors to xfs_getbmap.
+ */
+STATIC int
+xfs_fiemap_format(
+	void			**arg,
+	struct getbmapx		*bmv,
+	int			*full)
+{
+	int			error;
+	struct fiemap_extent_info *fieinfo = *arg;
+	u32			fiemap_flags = 0;
+	u64			logical, physical, length;
+
+	/* Do nothing for a hole */
+	if (bmv->bmv_block == -1LL)
+		return 0;
+
+	logical = BBTOB(bmv->bmv_offset);
+	physical = BBTOB(bmv->bmv_block);
+	length = BBTOB(bmv->bmv_length);
+
+	if (bmv->bmv_oflags & BMV_OF_PREALLOC)
+		fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
+	else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
+		fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
+		physical = 0;   /* no block yet */
+	}
+	if (bmv->bmv_oflags & BMV_OF_LAST)
+		fiemap_flags |= FIEMAP_EXTENT_LAST;
+
+	error = fiemap_fill_next_extent(fieinfo, logical, physical,
+					length, fiemap_flags);
+	if (error > 0) {
+		error = 0;
+		*full = 1;	/* user array now full */
+	}
+
+	return -error;
+}
+
+STATIC int
+xfs_vn_fiemap(
+	struct inode		*inode,
+	struct fiemap_extent_info *fieinfo,
+	u64			start,
+	u64			length)
+{
+	xfs_inode_t		*ip = XFS_I(inode);
+	struct getbmapx		bm;
+	int			error;
+
+	error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
+	if (error)
+		return error;
+
+	/* Set up bmap header for xfs internal routine */
+	bm.bmv_offset = BTOBB(start);
+	/* Special case for whole file */
+	if (length == FIEMAP_MAX_OFFSET)
+		bm.bmv_length = -1LL;
+	else
+		bm.bmv_length = BTOBB(length);
+
+	/* We add one because in getbmap world count includes the header */
+	bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
+					fieinfo->fi_extents_max + 1;
+	bm.bmv_count = min_t(__s32, bm.bmv_count,
+			     (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
+	bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
+	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
+		bm.bmv_iflags |= BMV_IF_ATTRFORK;
+	if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
+		bm.bmv_iflags |= BMV_IF_DELALLOC;
+
+	error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
+	if (error)
+		return -error;
+
+	return 0;
+}
+
+static const struct inode_operations xfs_inode_operations = {
+	.get_acl		= xfs_get_acl,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= generic_setxattr,
+	.getxattr		= generic_getxattr,
+	.removexattr		= generic_removexattr,
+	.listxattr		= xfs_vn_listxattr,
+	.fiemap			= xfs_vn_fiemap,
+};
+
+static const struct inode_operations xfs_dir_inode_operations = {
+	.create			= xfs_vn_create,
+	.lookup			= xfs_vn_lookup,
+	.link			= xfs_vn_link,
+	.unlink			= xfs_vn_unlink,
+	.symlink		= xfs_vn_symlink,
+	.mkdir			= xfs_vn_mkdir,
+	/*
+	 * Yes, XFS uses the same method for rmdir and unlink.
+	 *
+	 * There are some subtile differences deeper in the code,
+	 * but we use S_ISDIR to check for those.
+	 */
+	.rmdir			= xfs_vn_unlink,
+	.mknod			= xfs_vn_mknod,
+	.rename			= xfs_vn_rename,
+	.get_acl		= xfs_get_acl,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= generic_setxattr,
+	.getxattr		= generic_getxattr,
+	.removexattr		= generic_removexattr,
+	.listxattr		= xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_dir_ci_inode_operations = {
+	.create			= xfs_vn_create,
+	.lookup			= xfs_vn_ci_lookup,
+	.link			= xfs_vn_link,
+	.unlink			= xfs_vn_unlink,
+	.symlink		= xfs_vn_symlink,
+	.mkdir			= xfs_vn_mkdir,
+	/*
+	 * Yes, XFS uses the same method for rmdir and unlink.
+	 *
+	 * There are some subtile differences deeper in the code,
+	 * but we use S_ISDIR to check for those.
+	 */
+	.rmdir			= xfs_vn_unlink,
+	.mknod			= xfs_vn_mknod,
+	.rename			= xfs_vn_rename,
+	.get_acl		= xfs_get_acl,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= generic_setxattr,
+	.getxattr		= generic_getxattr,
+	.removexattr		= generic_removexattr,
+	.listxattr		= xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_symlink_inode_operations = {
+	.readlink		= generic_readlink,
+	.follow_link		= xfs_vn_follow_link,
+	.put_link		= xfs_vn_put_link,
+	.get_acl		= xfs_get_acl,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.setxattr		= generic_setxattr,
+	.getxattr		= generic_getxattr,
+	.removexattr		= generic_removexattr,
+	.listxattr		= xfs_vn_listxattr,
+};
+
+STATIC void
+xfs_diflags_to_iflags(
+	struct inode		*inode,
+	struct xfs_inode	*ip)
+{
+	if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+		inode->i_flags |= S_IMMUTABLE;
+	else
+		inode->i_flags &= ~S_IMMUTABLE;
+	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+		inode->i_flags |= S_APPEND;
+	else
+		inode->i_flags &= ~S_APPEND;
+	if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+		inode->i_flags |= S_SYNC;
+	else
+		inode->i_flags &= ~S_SYNC;
+	if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+		inode->i_flags |= S_NOATIME;
+	else
+		inode->i_flags &= ~S_NOATIME;
+}
+
+/*
+ * Initialize the Linux inode, set up the operation vectors and
+ * unlock the inode.
+ *
+ * When reading existing inodes from disk this is called directly
+ * from xfs_iget, when creating a new inode it is called from
+ * xfs_ialloc after setting up the inode.
+ *
+ * We are always called with an uninitialised linux inode here.
+ * We need to initialise the necessary fields and take a reference
+ * on it.
+ */
+void
+xfs_setup_inode(
+	struct xfs_inode	*ip)
+{
+	struct inode		*inode = &ip->i_vnode;
+
+	inode->i_ino = ip->i_ino;
+	inode->i_state = I_NEW;
+
+	inode_sb_list_add(inode);
+	/* make the inode look hashed for the writeback code */
+	hlist_add_fake(&inode->i_hash);
+
+	inode->i_mode	= ip->i_d.di_mode;
+	inode->i_nlink	= ip->i_d.di_nlink;
+	inode->i_uid	= ip->i_d.di_uid;
+	inode->i_gid	= ip->i_d.di_gid;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		inode->i_rdev =
+			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+			      sysv_minor(ip->i_df.if_u2.if_rdev));
+		break;
+	default:
+		inode->i_rdev = 0;
+		break;
+	}
+
+	inode->i_generation = ip->i_d.di_gen;
+	i_size_write(inode, ip->i_d.di_size);
+	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
+	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
+	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
+	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec;
+	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec;
+	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;
+	xfs_diflags_to_iflags(inode, ip);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_op = &xfs_inode_operations;
+		inode->i_fop = &xfs_file_operations;
+		inode->i_mapping->a_ops = &xfs_address_space_operations;
+		break;
+	case S_IFDIR:
+		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+			inode->i_op = &xfs_dir_ci_inode_operations;
+		else
+			inode->i_op = &xfs_dir_inode_operations;
+		inode->i_fop = &xfs_dir_file_operations;
+		break;
+	case S_IFLNK:
+		inode->i_op = &xfs_symlink_inode_operations;
+		if (!(ip->i_df.if_flags & XFS_IFINLINE))
+			inode->i_mapping->a_ops = &xfs_address_space_operations;
+		break;
+	default:
+		inode->i_op = &xfs_inode_operations;
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+		break;
+	}
+
+	/*
+	 * If there is no attribute fork no ACL can exist on this inode,
+	 * and it can't have any file capabilities attached to it either.
+	 */
+	if (!XFS_IFORK_Q(ip)) {
+		inode_has_no_xattr(inode);
+		cache_no_acl(inode);
+	}
+
+	xfs_iflags_clear(ip, XFS_INEW);
+	barrier();
+
+	unlock_new_inode(inode);
+}
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
new file mode 100644
index 0000000..ef41c92
--- /dev/null
+++ b/fs/xfs/xfs_iops.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOPS_H__
+#define __XFS_IOPS_H__
+
+struct xfs_inode;
+
+extern const struct file_operations xfs_file_operations;
+extern const struct file_operations xfs_dir_file_operations;
+
+extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
+
+extern void xfs_setup_inode(struct xfs_inode *);
+
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
new file mode 100644
index 0000000..1e8a45e
--- /dev/null
+++ b/fs/xfs/xfs_linux.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_LINUX__
+#define __XFS_LINUX__
+
+#include <linux/types.h>
+
+/*
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS	1
+# define XFS_BIG_INUMS	1
+#else
+# define XFS_BIG_BLKNOS	0
+# define XFS_BIG_INUMS	0
+#endif
+
+#include "xfs_types.h"
+
+#include "kmem.h"
+#include "mrlock.h"
+#include "time.h"
+#include "uuid.h"
+
+#include <linux/semaphore.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/pagemap.h>
+#include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/sort.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/ctype.h>
+#include <linux/writeback.h>
+#include <linux/capability.h>
+#include <linux/list_sort.h>
+
+#include <asm/page.h>
+#include <asm/div64.h>
+#include <asm/param.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include "xfs_vnode.h"
+#include "xfs_stats.h"
+#include "xfs_sysctl.h"
+#include "xfs_iops.h"
+#include "xfs_aops.h"
+#include "xfs_super.h"
+#include "xfs_buf.h"
+#include "xfs_message.h"
+
+#ifdef __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
+/*
+ * Feature macros (disable/enable)
+ */
+#ifdef CONFIG_SMP
+#define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
+#else
+#undef  HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
+#endif
+
+#define irix_sgid_inherit	xfs_params.sgid_inherit.val
+#define irix_symlink_mode	xfs_params.symlink_mode.val
+#define xfs_panic_mask		xfs_params.panic_mask.val
+#define xfs_error_level		xfs_params.error_level.val
+#define xfs_syncd_centisecs	xfs_params.syncd_timer.val
+#define xfs_stats_clear		xfs_params.stats_clear.val
+#define xfs_inherit_sync	xfs_params.inherit_sync.val
+#define xfs_inherit_nodump	xfs_params.inherit_nodump.val
+#define xfs_inherit_noatime	xfs_params.inherit_noatim.val
+#define xfs_buf_timer_centisecs	xfs_params.xfs_buf_timer.val
+#define xfs_buf_age_centisecs	xfs_params.xfs_buf_age.val
+#define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
+#define xfs_rotorstep		xfs_params.rotorstep.val
+#define xfs_inherit_nodefrag	xfs_params.inherit_nodfrg.val
+#define xfs_fstrm_centisecs	xfs_params.fstrm_timer.val
+
+#define current_cpu()		(raw_smp_processor_id())
+#define current_pid()		(current->pid)
+#define current_test_flags(f)	(current->flags & (f))
+#define current_set_flags_nested(sp, f)		\
+		(*(sp) = current->flags, current->flags |= (f))
+#define current_clear_flags_nested(sp, f)	\
+		(*(sp) = current->flags, current->flags &= ~(f))
+#define current_restore_flags_nested(sp, f)	\
+		(current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
+
+#define spinlock_destroy(lock)
+
+#define NBBY		8		/* number of bits per byte */
+
+/*
+ * Size of block device i/o is parameterized here.
+ * Currently the system supports page-sized i/o.
+ */
+#define	BLKDEV_IOSHIFT		PAGE_CACHE_SHIFT
+#define	BLKDEV_IOSIZE		(1<<BLKDEV_IOSHIFT)
+/* number of BB's per block device block */
+#define	BLKDEV_BB		BTOBB(BLKDEV_IOSIZE)
+
+#define ENOATTR		ENODATA		/* Attribute not found */
+#define EWRONGFS	EINVAL		/* Mount with wrong filesystem type */
+#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
+
+#define SYNCHRONIZE()	barrier()
+#define __return_address __builtin_return_address(0)
+
+#define XFS_PROJID_DEFAULT	0
+#define MAXPATHLEN	1024
+
+#define MIN(a,b)	(min(a,b))
+#define MAX(a,b)	(max(a,b))
+#define howmany(x, y)	(((x)+((y)-1))/(y))
+
+/*
+ * Various platform dependent calls that don't fit anywhere else
+ */
+#define xfs_sort(a,n,s,fn)	sort(a,n,s,fn,NULL)
+#define xfs_stack_trace()	dump_stack()
+
+
+/* Move the kernel do_div definition off to one side */
+
+#if defined __i386__
+/* For ia32 we need to pull some tricks to get past various versions
+ * of the compiler which do not like us using do_div in the middle
+ * of large functions.
+ */
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+	__u32	mod;
+
+	switch (n) {
+		case 4:
+			mod = *(__u32 *)a % b;
+			*(__u32 *)a = *(__u32 *)a / b;
+			return mod;
+		case 8:
+			{
+			unsigned long __upper, __low, __high, __mod;
+			__u64	c = *(__u64 *)a;
+			__upper = __high = c >> 32;
+			__low = c;
+			if (__high) {
+				__upper = __high % (b);
+				__high = __high / (b);
+			}
+			asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+			asm("":"=A" (c):"a" (__low),"d" (__high));
+			*(__u64 *)a = c;
+			return __mod;
+			}
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+	switch (n) {
+		case 4:
+			return *(__u32 *)a % b;
+		case 8:
+			{
+			unsigned long __upper, __low, __high, __mod;
+			__u64	c = *(__u64 *)a;
+			__upper = __high = c >> 32;
+			__low = c;
+			if (__high) {
+				__upper = __high % (b);
+				__high = __high / (b);
+			}
+			asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+			asm("":"=A" (c):"a" (__low),"d" (__high));
+			return __mod;
+			}
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
+#else
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+	__u32	mod;
+
+	switch (n) {
+		case 4:
+			mod = *(__u32 *)a % b;
+			*(__u32 *)a = *(__u32 *)a / b;
+			return mod;
+		case 8:
+			mod = do_div(*(__u64 *)a, b);
+			return mod;
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+	switch (n) {
+		case 4:
+			return *(__u32 *)a % b;
+		case 8:
+			{
+			__u64	c = *(__u64 *)a;
+			return do_div(c, b);
+			}
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
+#endif
+
+#undef do_div
+#define do_div(a, b)	xfs_do_div(&(a), (b), sizeof(a))
+#define do_mod(a, b)	xfs_do_mod(&(a), (b), sizeof(a))
+
+static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+{
+	x += y - 1;
+	do_div(x, y);
+	return(x * y);
+}
+
+static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+{
+	x += y - 1;
+	do_div(x, y);
+	return x;
+}
+
+/* ARM old ABI has some weird alignment/padding */
+#if defined(__arm__) && !defined(__ARM_EABI__)
+#define __arch_pack __attribute__((packed))
+#else
+#define __arch_pack
+#endif
+
+#define ASSERT_ALWAYS(expr)	\
+	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+#ifndef DEBUG
+#define ASSERT(expr)	((void)0)
+
+#ifndef STATIC
+# define STATIC static noinline
+#endif
+
+#else /* DEBUG */
+
+#define ASSERT(expr)	\
+	(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+#ifndef STATIC
+# define STATIC noinline
+#endif
+
+#endif /* DEBUG */
+
+#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
new file mode 100644
index 0000000..bd672de
--- /dev/null
+++ b/fs/xfs/xfs_message.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2011 Red Hat, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+
+/*
+ * XFS logging functions
+ */
+static void
+__xfs_printk(
+	const char		*level,
+	const struct xfs_mount	*mp,
+	struct va_format	*vaf)
+{
+	if (mp && mp->m_fsname) {
+		printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
+		return;
+	}
+	printk("%sXFS: %pV\n", level, vaf);
+}
+
+#define define_xfs_printk_level(func, kern_level)		\
+void func(const struct xfs_mount *mp, const char *fmt, ...)	\
+{								\
+	struct va_format	vaf;				\
+	va_list			args;				\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	__xfs_printk(kern_level, mp, &vaf);			\
+	va_end(args);						\
+}								\
+
+define_xfs_printk_level(xfs_emerg, KERN_EMERG);
+define_xfs_printk_level(xfs_alert, KERN_ALERT);
+define_xfs_printk_level(xfs_crit, KERN_CRIT);
+define_xfs_printk_level(xfs_err, KERN_ERR);
+define_xfs_printk_level(xfs_warn, KERN_WARNING);
+define_xfs_printk_level(xfs_notice, KERN_NOTICE);
+define_xfs_printk_level(xfs_info, KERN_INFO);
+#ifdef DEBUG
+define_xfs_printk_level(xfs_debug, KERN_DEBUG);
+#endif
+
+void
+xfs_alert_tag(
+	const struct xfs_mount	*mp,
+	int			panic_tag,
+	const char		*fmt, ...)
+{
+	struct va_format	vaf;
+	va_list			args;
+	int			do_panic = 0;
+
+	if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
+		xfs_alert(mp, "Transforming an alert into a BUG.");
+		do_panic = 1;
+	}
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	__xfs_printk(KERN_ALERT, mp, &vaf);
+	va_end(args);
+
+	BUG_ON(do_panic);
+}
+
+void
+assfail(char *expr, char *file, int line)
+{
+	xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
+		expr, file, line);
+	BUG();
+}
+
+void
+xfs_hex_dump(void *p, int length)
+{
+	print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
+}
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
new file mode 100644
index 0000000..7fb7ea0
--- /dev/null
+++ b/fs/xfs/xfs_message.h
@@ -0,0 +1,39 @@
+#ifndef __XFS_MESSAGE_H
+#define __XFS_MESSAGE_H 1
+
+struct xfs_mount;
+
+extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
+			 const char *fmt, ...)
+        __attribute__ ((format (printf, 3, 4)));
+extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+
+#ifdef DEBUG
+extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+#else
+static inline void
+__attribute__ ((format (printf, 2, 3)))
+xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+{
+}
+#endif
+
+extern void assfail(char *expr, char *f, int l);
+
+extern void xfs_hex_dump(void *p, int length);
+
+#endif	/* __XFS_MESSAGE_H */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
new file mode 100644
index 0000000..9a0aa76
--- /dev/null
+++ b/fs/xfs/xfs_qm.c
@@ -0,0 +1,2416 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_utils.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+/*
+ * The global quota manager. There is only one of these for the entire
+ * system, _not_ one per file system. XQM keeps track of the overall
+ * quota functionality, including maintaining the freelist and hash
+ * tables of dquots.
+ */
+struct mutex	xfs_Gqm_lock;
+struct xfs_qm	*xfs_Gqm;
+uint		ndquot;
+
+kmem_zone_t	*qm_dqzone;
+kmem_zone_t	*qm_dqtrxzone;
+
+STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
+STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);
+
+STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
+STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
+STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
+
+static struct shrinker xfs_qm_shaker = {
+	.shrink = xfs_qm_shake,
+	.seeks = DEFAULT_SEEKS,
+};
+
+/*
+ * Initialize the XQM structure.
+ * Note that there is not one quota manager per file system.
+ */
+STATIC struct xfs_qm *
+xfs_Gqm_init(void)
+{
+	xfs_dqhash_t	*udqhash, *gdqhash;
+	xfs_qm_t	*xqm;
+	size_t		hsize;
+	uint		i;
+
+	/*
+	 * Initialize the dquot hash tables.
+	 */
+	udqhash = kmem_zalloc_greedy(&hsize,
+				     XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
+				     XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
+	if (!udqhash)
+		goto out;
+
+	gdqhash = kmem_zalloc_large(hsize);
+	if (!gdqhash)
+		goto out_free_udqhash;
+
+	hsize /= sizeof(xfs_dqhash_t);
+	ndquot = hsize << 8;
+
+	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
+	xqm->qm_dqhashmask = hsize - 1;
+	xqm->qm_usr_dqhtable = udqhash;
+	xqm->qm_grp_dqhtable = gdqhash;
+	ASSERT(xqm->qm_usr_dqhtable != NULL);
+	ASSERT(xqm->qm_grp_dqhtable != NULL);
+
+	for (i = 0; i < hsize; i++) {
+		xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
+		xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
+	}
+
+	/*
+	 * Freelist of all dquots of all file systems
+	 */
+	INIT_LIST_HEAD(&xqm->qm_dqfrlist);
+	xqm->qm_dqfrlist_cnt = 0;
+	mutex_init(&xqm->qm_dqfrlist_lock);
+
+	/*
+	 * dquot zone. we register our own low-memory callback.
+	 */
+	if (!qm_dqzone) {
+		xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
+						"xfs_dquots");
+		qm_dqzone = xqm->qm_dqzone;
+	} else
+		xqm->qm_dqzone = qm_dqzone;
+
+	register_shrinker(&xfs_qm_shaker);
+
+	/*
+	 * The t_dqinfo portion of transactions.
+	 */
+	if (!qm_dqtrxzone) {
+		xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
+						   "xfs_dqtrx");
+		qm_dqtrxzone = xqm->qm_dqtrxzone;
+	} else
+		xqm->qm_dqtrxzone = qm_dqtrxzone;
+
+	atomic_set(&xqm->qm_totaldquots, 0);
+	xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
+	xqm->qm_nrefs = 0;
+	return xqm;
+
+ out_free_udqhash:
+	kmem_free_large(udqhash);
+ out:
+	return NULL;
+}
+
+/*
+ * Destroy the global quota manager when its reference count goes to zero.
+ */
+STATIC void
+xfs_qm_destroy(
+	struct xfs_qm	*xqm)
+{
+	struct xfs_dquot *dqp, *n;
+	int		hsize, i;
+
+	ASSERT(xqm != NULL);
+	ASSERT(xqm->qm_nrefs == 0);
+	unregister_shrinker(&xfs_qm_shaker);
+	hsize = xqm->qm_dqhashmask + 1;
+	for (i = 0; i < hsize; i++) {
+		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
+		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
+	}
+	kmem_free_large(xqm->qm_usr_dqhtable);
+	kmem_free_large(xqm->qm_grp_dqhtable);
+	xqm->qm_usr_dqhtable = NULL;
+	xqm->qm_grp_dqhtable = NULL;
+	xqm->qm_dqhashmask = 0;
+
+	/* frlist cleanup */
+	mutex_lock(&xqm->qm_dqfrlist_lock);
+	list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
+		xfs_dqlock(dqp);
+		list_del_init(&dqp->q_freelist);
+		xfs_Gqm->qm_dqfrlist_cnt--;
+		xfs_dqunlock(dqp);
+		xfs_qm_dqdestroy(dqp);
+	}
+	mutex_unlock(&xqm->qm_dqfrlist_lock);
+	mutex_destroy(&xqm->qm_dqfrlist_lock);
+	kmem_free(xqm);
+}
+
+/*
+ * Called at mount time to let XQM know that another file system is
+ * starting quotas. This isn't crucial information as the individual mount
+ * structures are pretty independent, but it helps the XQM keep a
+ * global view of what's going on.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_hold_quotafs_ref(
+	struct xfs_mount *mp)
+{
+	/*
+	 * Need to lock the xfs_Gqm structure for things like this. For example,
+	 * the structure could disappear between the entry to this routine and
+	 * a HOLD operation if not locked.
+	 */
+	mutex_lock(&xfs_Gqm_lock);
+
+	if (!xfs_Gqm) {
+		xfs_Gqm = xfs_Gqm_init();
+		if (!xfs_Gqm) {
+			mutex_unlock(&xfs_Gqm_lock);
+			return ENOMEM;
+		}
+	}
+
+	/*
+	 * We can keep a list of all filesystems with quotas mounted for
+	 * debugging and statistical purposes, but ...
+	 * Just take a reference and get out.
+	 */
+	xfs_Gqm->qm_nrefs++;
+	mutex_unlock(&xfs_Gqm_lock);
+
+	return 0;
+}
+
+
+/*
+ * Release the reference that a filesystem took at mount time,
+ * so that we know when we need to destroy the entire quota manager.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_rele_quotafs_ref(
+	struct xfs_mount *mp)
+{
+	xfs_dquot_t	*dqp, *n;
+
+	ASSERT(xfs_Gqm);
+	ASSERT(xfs_Gqm->qm_nrefs > 0);
+
+	/*
+	 * Go thru the freelist and destroy all inactive dquots.
+	 */
+	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+
+	list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+		xfs_dqlock(dqp);
+		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+			ASSERT(dqp->q_mount == NULL);
+			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+			ASSERT(list_empty(&dqp->q_hashlist));
+			ASSERT(list_empty(&dqp->q_mplist));
+			list_del_init(&dqp->q_freelist);
+			xfs_Gqm->qm_dqfrlist_cnt--;
+			xfs_dqunlock(dqp);
+			xfs_qm_dqdestroy(dqp);
+		} else {
+			xfs_dqunlock(dqp);
+		}
+	}
+	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+
+	/*
+	 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
+	 * be restarted.
+	 */
+	mutex_lock(&xfs_Gqm_lock);
+	if (--xfs_Gqm->qm_nrefs == 0) {
+		xfs_qm_destroy(xfs_Gqm);
+		xfs_Gqm = NULL;
+	}
+	mutex_unlock(&xfs_Gqm_lock);
+}
+
+/*
+ * Just destroy the quotainfo structure.
+ */
+void
+xfs_qm_unmount(
+	struct xfs_mount	*mp)
+{
+	if (mp->m_quotainfo) {
+		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+		xfs_qm_destroy_quotainfo(mp);
+	}
+}
+
+
+/*
+ * This is called from xfs_mountfs to start quotas and initialize all
+ * necessary data structures like quotainfo.  This is also responsible for
+ * running a quotacheck as necessary.  We are guaranteed that the superblock
+ * is consistently read in at this point.
+ *
+ * If we fail here, the mount will continue with quota turned off. We don't
+ * need to inidicate success or failure at all.
+ */
+void
+xfs_qm_mount_quotas(
+	xfs_mount_t	*mp)
+{
+	int		error = 0;
+	uint		sbf;
+
+	/*
+	 * If quotas on realtime volumes is not supported, we disable
+	 * quotas immediately.
+	 */
+	if (mp->m_sb.sb_rextents) {
+		xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
+		mp->m_qflags = 0;
+		goto write_changes;
+	}
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	/*
+	 * Allocate the quotainfo structure inside the mount struct, and
+	 * create quotainode(s), and change/rev superblock if necessary.
+	 */
+	error = xfs_qm_init_quotainfo(mp);
+	if (error) {
+		/*
+		 * We must turn off quotas.
+		 */
+		ASSERT(mp->m_quotainfo == NULL);
+		mp->m_qflags = 0;
+		goto write_changes;
+	}
+	/*
+	 * If any of the quotas are not consistent, do a quotacheck.
+	 */
+	if (XFS_QM_NEED_QUOTACHECK(mp)) {
+		error = xfs_qm_quotacheck(mp);
+		if (error) {
+			/* Quotacheck failed and disabled quotas. */
+			return;
+		}
+	}
+	/* 
+	 * If one type of quotas is off, then it will lose its
+	 * quotachecked status, since we won't be doing accounting for
+	 * that type anymore.
+	 */
+	if (!XFS_IS_UQUOTA_ON(mp))
+		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
+	if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
+		mp->m_qflags &= ~XFS_OQUOTA_CHKD;
+
+ write_changes:
+	/*
+	 * We actually don't have to acquire the m_sb_lock at all.
+	 * This can only be called from mount, and that's single threaded. XXX
+	 */
+	spin_lock(&mp->m_sb_lock);
+	sbf = mp->m_sb.sb_qflags;
+	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
+	spin_unlock(&mp->m_sb_lock);
+
+	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
+		if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
+			/*
+			 * We could only have been turning quotas off.
+			 * We aren't in very good shape actually because
+			 * the incore structures are convinced that quotas are
+			 * off, but the on disk superblock doesn't know that !
+			 */
+			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+			xfs_alert(mp, "%s: Superblock update failed!",
+				__func__);
+		}
+	}
+
+	if (error) {
+		xfs_warn(mp, "Failed to initialize disk quotas.");
+		return;
+	}
+}
+
+/*
+ * Called from the vfsops layer.
+ */
+void
+xfs_qm_unmount_quotas(
+	xfs_mount_t	*mp)
+{
+	/*
+	 * Release the dquots that root inode, et al might be holding,
+	 * before we flush quotas and blow away the quotainfo structure.
+	 */
+	ASSERT(mp->m_rootip);
+	xfs_qm_dqdetach(mp->m_rootip);
+	if (mp->m_rbmip)
+		xfs_qm_dqdetach(mp->m_rbmip);
+	if (mp->m_rsumip)
+		xfs_qm_dqdetach(mp->m_rsumip);
+
+	/*
+	 * Release the quota inodes.
+	 */
+	if (mp->m_quotainfo) {
+		if (mp->m_quotainfo->qi_uquotaip) {
+			IRELE(mp->m_quotainfo->qi_uquotaip);
+			mp->m_quotainfo->qi_uquotaip = NULL;
+		}
+		if (mp->m_quotainfo->qi_gquotaip) {
+			IRELE(mp->m_quotainfo->qi_gquotaip);
+			mp->m_quotainfo->qi_gquotaip = NULL;
+		}
+	}
+}
+
+/*
+ * Flush all dquots of the given file system to disk. The dquots are
+ * _not_ purged from memory here, just their data written to disk.
+ */
+STATIC int
+xfs_qm_dqflush_all(
+	struct xfs_mount	*mp,
+	int			sync_mode)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	int			recl;
+	struct xfs_dquot	*dqp;
+	int			error;
+
+	if (!q)
+		return 0;
+again:
+	mutex_lock(&q->qi_dqlist_lock);
+	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+		xfs_dqlock(dqp);
+		if (! XFS_DQ_IS_DIRTY(dqp)) {
+			xfs_dqunlock(dqp);
+			continue;
+		}
+
+		/* XXX a sentinel would be better */
+		recl = q->qi_dqreclaims;
+		if (!xfs_dqflock_nowait(dqp)) {
+			/*
+			 * If we can't grab the flush lock then check
+			 * to see if the dquot has been flushed delayed
+			 * write.  If so, grab its buffer and send it
+			 * out immediately.  We'll be able to acquire
+			 * the flush lock when the I/O completes.
+			 */
+			xfs_qm_dqflock_pushbuf_wait(dqp);
+		}
+		/*
+		 * Let go of the mplist lock. We don't want to hold it
+		 * across a disk write.
+		 */
+		mutex_unlock(&q->qi_dqlist_lock);
+		error = xfs_qm_dqflush(dqp, sync_mode);
+		xfs_dqunlock(dqp);
+		if (error)
+			return error;
+
+		mutex_lock(&q->qi_dqlist_lock);
+		if (recl != q->qi_dqreclaims) {
+			mutex_unlock(&q->qi_dqlist_lock);
+			/* XXX restart limit */
+			goto again;
+		}
+	}
+
+	mutex_unlock(&q->qi_dqlist_lock);
+	/* return ! busy */
+	return 0;
+}
+/*
+ * Release the group dquot pointers the user dquots may be
+ * carrying around as a hint. mplist is locked on entry and exit.
+ */
+STATIC void
+xfs_qm_detach_gdquots(
+	struct xfs_mount	*mp)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	struct xfs_dquot	*dqp, *gdqp;
+	int			nrecl;
+
+ again:
+	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+		xfs_dqlock(dqp);
+		if ((gdqp = dqp->q_gdquot)) {
+			xfs_dqlock(gdqp);
+			dqp->q_gdquot = NULL;
+		}
+		xfs_dqunlock(dqp);
+
+		if (gdqp) {
+			/*
+			 * Can't hold the mplist lock across a dqput.
+			 * XXXmust convert to marker based iterations here.
+			 */
+			nrecl = q->qi_dqreclaims;
+			mutex_unlock(&q->qi_dqlist_lock);
+			xfs_qm_dqput(gdqp);
+
+			mutex_lock(&q->qi_dqlist_lock);
+			if (nrecl != q->qi_dqreclaims)
+				goto again;
+		}
+	}
+}
+
+/*
+ * Go through all the incore dquots of this file system and take them
+ * off the mplist and hashlist, if the dquot type matches the dqtype
+ * parameter. This is used when turning off quota accounting for
+ * users and/or groups, as well as when the filesystem is unmounting.
+ */
+STATIC int
+xfs_qm_dqpurge_int(
+	struct xfs_mount	*mp,
+	uint			flags)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	struct xfs_dquot	*dqp, *n;
+	uint			dqtype;
+	int			nrecl;
+	int			nmisses;
+
+	if (!q)
+		return 0;
+
+	dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
+	dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
+	dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
+
+	mutex_lock(&q->qi_dqlist_lock);
+
+	/*
+	 * In the first pass through all incore dquots of this filesystem,
+	 * we release the group dquot pointers the user dquots may be
+	 * carrying around as a hint. We need to do this irrespective of
+	 * what's being turned off.
+	 */
+	xfs_qm_detach_gdquots(mp);
+
+      again:
+	nmisses = 0;
+	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+	/*
+	 * Try to get rid of all of the unwanted dquots. The idea is to
+	 * get them off mplist and hashlist, but leave them on freelist.
+	 */
+	list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
+		/*
+		 * It's OK to look at the type without taking dqlock here.
+		 * We're holding the mplist lock here, and that's needed for
+		 * a dqreclaim.
+		 */
+		if ((dqp->dq_flags & dqtype) == 0)
+			continue;
+
+		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+			nrecl = q->qi_dqreclaims;
+			mutex_unlock(&q->qi_dqlist_lock);
+			mutex_lock(&dqp->q_hash->qh_lock);
+			mutex_lock(&q->qi_dqlist_lock);
+
+			/*
+			 * XXXTheoretically, we can get into a very long
+			 * ping pong game here.
+			 * No one can be adding dquots to the mplist at
+			 * this point, but somebody might be taking things off.
+			 */
+			if (nrecl != q->qi_dqreclaims) {
+				mutex_unlock(&dqp->q_hash->qh_lock);
+				goto again;
+			}
+		}
+
+		/*
+		 * Take the dquot off the mplist and hashlist. It may remain on
+		 * freelist in INACTIVE state.
+		 */
+		nmisses += xfs_qm_dqpurge(dqp);
+	}
+	mutex_unlock(&q->qi_dqlist_lock);
+	return nmisses;
+}
+
+int
+xfs_qm_dqpurge_all(
+	xfs_mount_t	*mp,
+	uint		flags)
+{
+	int		ndquots;
+
+	/*
+	 * Purge the dquot cache.
+	 * None of the dquots should really be busy at this point.
+	 */
+	if (mp->m_quotainfo) {
+		while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
+			delay(ndquots * 10);
+		}
+	}
+	return 0;
+}
+
+STATIC int
+xfs_qm_dqattach_one(
+	xfs_inode_t	*ip,
+	xfs_dqid_t	id,
+	uint		type,
+	uint		doalloc,
+	xfs_dquot_t	*udqhint, /* hint */
+	xfs_dquot_t	**IO_idqpp)
+{
+	xfs_dquot_t	*dqp;
+	int		error;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	error = 0;
+
+	/*
+	 * See if we already have it in the inode itself. IO_idqpp is
+	 * &i_udquot or &i_gdquot. This made the code look weird, but
+	 * made the logic a lot simpler.
+	 */
+	dqp = *IO_idqpp;
+	if (dqp) {
+		trace_xfs_dqattach_found(dqp);
+		return 0;
+	}
+
+	/*
+	 * udqhint is the i_udquot field in inode, and is non-NULL only
+	 * when the type arg is group/project. Its purpose is to save a
+	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
+	 * the user dquot.
+	 */
+	if (udqhint) {
+		ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
+		xfs_dqlock(udqhint);
+
+		/*
+		 * No need to take dqlock to look at the id.
+		 *
+		 * The ID can't change until it gets reclaimed, and it won't
+		 * be reclaimed as long as we have a ref from inode and we
+		 * hold the ilock.
+		 */
+		dqp = udqhint->q_gdquot;
+		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
+			xfs_dqlock(dqp);
+			XFS_DQHOLD(dqp);
+			ASSERT(*IO_idqpp == NULL);
+			*IO_idqpp = dqp;
+
+			xfs_dqunlock(dqp);
+			xfs_dqunlock(udqhint);
+			return 0;
+		}
+
+		/*
+		 * We can't hold a dquot lock when we call the dqget code.
+		 * We'll deadlock in no time, because of (not conforming to)
+		 * lock ordering - the inodelock comes before any dquot lock,
+		 * and we may drop and reacquire the ilock in xfs_qm_dqget().
+		 */
+		xfs_dqunlock(udqhint);
+	}
+
+	/*
+	 * Find the dquot from somewhere. This bumps the
+	 * reference count of dquot and returns it locked.
+	 * This can return ENOENT if dquot didn't exist on
+	 * disk and we didn't ask it to allocate;
+	 * ESRCH if quotas got turned off suddenly.
+	 */
+	error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+	if (error)
+		return error;
+
+	trace_xfs_dqattach_get(dqp);
+
+	/*
+	 * dqget may have dropped and re-acquired the ilock, but it guarantees
+	 * that the dquot returned is the one that should go in the inode.
+	 */
+	*IO_idqpp = dqp;
+	xfs_dqunlock(dqp);
+	return 0;
+}
+
+
+/*
+ * Given a udquot and gdquot, attach a ptr to the group dquot in the
+ * udquot as a hint for future lookups. The idea sounds simple, but the
+ * execution isn't, because the udquot might have a group dquot attached
+ * already and getting rid of that gets us into lock ordering constraints.
+ * The process is complicated more by the fact that the dquots may or may not
+ * be locked on entry.
+ */
+STATIC void
+xfs_qm_dqattach_grouphint(
+	xfs_dquot_t	*udq,
+	xfs_dquot_t	*gdq)
+{
+	xfs_dquot_t	*tmp;
+
+	xfs_dqlock(udq);
+
+	if ((tmp = udq->q_gdquot)) {
+		if (tmp == gdq) {
+			xfs_dqunlock(udq);
+			return;
+		}
+
+		udq->q_gdquot = NULL;
+		/*
+		 * We can't keep any dqlocks when calling dqrele,
+		 * because the freelist lock comes before dqlocks.
+		 */
+		xfs_dqunlock(udq);
+		/*
+		 * we took a hard reference once upon a time in dqget,
+		 * so give it back when the udquot no longer points at it
+		 * dqput() does the unlocking of the dquot.
+		 */
+		xfs_qm_dqrele(tmp);
+
+		xfs_dqlock(udq);
+		xfs_dqlock(gdq);
+
+	} else {
+		ASSERT(XFS_DQ_IS_LOCKED(udq));
+		xfs_dqlock(gdq);
+	}
+
+	ASSERT(XFS_DQ_IS_LOCKED(udq));
+	ASSERT(XFS_DQ_IS_LOCKED(gdq));
+	/*
+	 * Somebody could have attached a gdquot here,
+	 * when we dropped the uqlock. If so, just do nothing.
+	 */
+	if (udq->q_gdquot == NULL) {
+		XFS_DQHOLD(gdq);
+		udq->q_gdquot = gdq;
+	}
+
+	xfs_dqunlock(gdq);
+	xfs_dqunlock(udq);
+}
+
+
+/*
+ * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
+ * into account.
+ * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
+ * Inode may get unlocked and relocked in here, and the caller must deal with
+ * the consequences.
+ */
+int
+xfs_qm_dqattach_locked(
+	xfs_inode_t	*ip,
+	uint		flags)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	uint		nquotas = 0;
+	int		error = 0;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) ||
+	    !XFS_IS_QUOTA_ON(mp) ||
+	    !XFS_NOT_DQATTACHED(mp, ip) ||
+	    ip->i_ino == mp->m_sb.sb_uquotino ||
+	    ip->i_ino == mp->m_sb.sb_gquotino)
+		return 0;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+	if (XFS_IS_UQUOTA_ON(mp)) {
+		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
+						flags & XFS_QMOPT_DQALLOC,
+						NULL, &ip->i_udquot);
+		if (error)
+			goto done;
+		nquotas++;
+	}
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	if (XFS_IS_OQUOTA_ON(mp)) {
+		error = XFS_IS_GQUOTA_ON(mp) ?
+			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+						flags & XFS_QMOPT_DQALLOC,
+						ip->i_udquot, &ip->i_gdquot) :
+			xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
+						flags & XFS_QMOPT_DQALLOC,
+						ip->i_udquot, &ip->i_gdquot);
+		/*
+		 * Don't worry about the udquot that we may have
+		 * attached above. It'll get detached, if not already.
+		 */
+		if (error)
+			goto done;
+		nquotas++;
+	}
+
+	/*
+	 * Attach this group quota to the user quota as a hint.
+	 * This WON'T, in general, result in a thrash.
+	 */
+	if (nquotas == 2) {
+		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+		ASSERT(ip->i_udquot);
+		ASSERT(ip->i_gdquot);
+
+		/*
+		 * We may or may not have the i_udquot locked at this point,
+		 * but this check is OK since we don't depend on the i_gdquot to
+		 * be accurate 100% all the time. It is just a hint, and this
+		 * will succeed in general.
+		 */
+		if (ip->i_udquot->q_gdquot == ip->i_gdquot)
+			goto done;
+		/*
+		 * Attach i_gdquot to the gdquot hint inside the i_udquot.
+		 */
+		xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
+	}
+
+ done:
+#ifdef DEBUG
+	if (!error) {
+		if (XFS_IS_UQUOTA_ON(mp))
+			ASSERT(ip->i_udquot);
+		if (XFS_IS_OQUOTA_ON(mp))
+			ASSERT(ip->i_gdquot);
+	}
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+#endif
+	return error;
+}
+
+int
+xfs_qm_dqattach(
+	struct xfs_inode	*ip,
+	uint			flags)
+{
+	int			error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	error = xfs_qm_dqattach_locked(ip, flags);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	return error;
+}
+
+/*
+ * Release dquots (and their references) if any.
+ * The inode should be locked EXCL except when this's called by
+ * xfs_ireclaim.
+ */
+void
+xfs_qm_dqdetach(
+	xfs_inode_t	*ip)
+{
+	if (!(ip->i_udquot || ip->i_gdquot))
+		return;
+
+	trace_xfs_dquot_dqdetach(ip);
+
+	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
+	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
+	if (ip->i_udquot) {
+		xfs_qm_dqrele(ip->i_udquot);
+		ip->i_udquot = NULL;
+	}
+	if (ip->i_gdquot) {
+		xfs_qm_dqrele(ip->i_gdquot);
+		ip->i_gdquot = NULL;
+	}
+}
+
+int
+xfs_qm_sync(
+	struct xfs_mount	*mp,
+	int			flags)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	int			recl, restarts;
+	struct xfs_dquot	*dqp;
+	int			error;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+		return 0;
+
+	restarts = 0;
+
+  again:
+	mutex_lock(&q->qi_dqlist_lock);
+	/*
+	 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
+	 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
+	 * when we have the mplist lock, we know that dquots will be consistent
+	 * as long as we have it locked.
+	 */
+	if (!XFS_IS_QUOTA_ON(mp)) {
+		mutex_unlock(&q->qi_dqlist_lock);
+		return 0;
+	}
+	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+		/*
+		 * If this is vfs_sync calling, then skip the dquots that
+		 * don't 'seem' to be dirty. ie. don't acquire dqlock.
+		 * This is very similar to what xfs_sync does with inodes.
+		 */
+		if (flags & SYNC_TRYLOCK) {
+			if (!XFS_DQ_IS_DIRTY(dqp))
+				continue;
+			if (!xfs_qm_dqlock_nowait(dqp))
+				continue;
+		} else {
+			xfs_dqlock(dqp);
+		}
+
+		/*
+		 * Now, find out for sure if this dquot is dirty or not.
+		 */
+		if (! XFS_DQ_IS_DIRTY(dqp)) {
+			xfs_dqunlock(dqp);
+			continue;
+		}
+
+		/* XXX a sentinel would be better */
+		recl = q->qi_dqreclaims;
+		if (!xfs_dqflock_nowait(dqp)) {
+			if (flags & SYNC_TRYLOCK) {
+				xfs_dqunlock(dqp);
+				continue;
+			}
+			/*
+			 * If we can't grab the flush lock then if the caller
+			 * really wanted us to give this our best shot, so
+			 * see if we can give a push to the buffer before we wait
+			 * on the flush lock. At this point, we know that
+			 * even though the dquot is being flushed,
+			 * it has (new) dirty data.
+			 */
+			xfs_qm_dqflock_pushbuf_wait(dqp);
+		}
+		/*
+		 * Let go of the mplist lock. We don't want to hold it
+		 * across a disk write
+		 */
+		mutex_unlock(&q->qi_dqlist_lock);
+		error = xfs_qm_dqflush(dqp, flags);
+		xfs_dqunlock(dqp);
+		if (error && XFS_FORCED_SHUTDOWN(mp))
+			return 0;	/* Need to prevent umount failure */
+		else if (error)
+			return error;
+
+		mutex_lock(&q->qi_dqlist_lock);
+		if (recl != q->qi_dqreclaims) {
+			if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
+				break;
+
+			mutex_unlock(&q->qi_dqlist_lock);
+			goto again;
+		}
+	}
+
+	mutex_unlock(&q->qi_dqlist_lock);
+	return 0;
+}
+
+/*
+ * The hash chains and the mplist use the same xfs_dqhash structure as
+ * their list head, but we can take the mplist qh_lock and one of the
+ * hash qh_locks at the same time without any problem as they aren't
+ * related.
+ */
+static struct lock_class_key xfs_quota_mplist_class;
+
+/*
+ * This initializes all the quota information that's kept in the
+ * mount structure
+ */
+STATIC int
+xfs_qm_init_quotainfo(
+	xfs_mount_t	*mp)
+{
+	xfs_quotainfo_t *qinf;
+	int		error;
+	xfs_dquot_t	*dqp;
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	/*
+	 * Tell XQM that we exist as soon as possible.
+	 */
+	if ((error = xfs_qm_hold_quotafs_ref(mp))) {
+		return error;
+	}
+
+	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
+
+	/*
+	 * See if quotainodes are setup, and if not, allocate them,
+	 * and change the superblock accordingly.
+	 */
+	if ((error = xfs_qm_init_quotainos(mp))) {
+		kmem_free(qinf);
+		mp->m_quotainfo = NULL;
+		return error;
+	}
+
+	INIT_LIST_HEAD(&qinf->qi_dqlist);
+	mutex_init(&qinf->qi_dqlist_lock);
+	lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
+
+	qinf->qi_dqreclaims = 0;
+
+	/* mutex used to serialize quotaoffs */
+	mutex_init(&qinf->qi_quotaofflock);
+
+	/* Precalc some constants */
+	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+	ASSERT(qinf->qi_dqchunklen);
+	qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
+	do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
+
+	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
+
+	/*
+	 * We try to get the limits from the superuser's limits fields.
+	 * This is quite hacky, but it is standard quota practice.
+	 * We look at the USR dquot with id == 0 first, but if user quotas
+	 * are not enabled we goto the GRP dquot with id == 0.
+	 * We don't really care to keep separate default limits for user
+	 * and group quotas, at least not at this point.
+	 */
+	error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
+			     XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
+			     (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
+				XFS_DQ_PROJ),
+			     XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
+			     &dqp);
+	if (! error) {
+		xfs_disk_dquot_t	*ddqp = &dqp->q_core;
+
+		/*
+		 * The warnings and timers set the grace period given to
+		 * a user or group before he or she can not perform any
+		 * more writing. If it is zero, a default is used.
+		 */
+		qinf->qi_btimelimit = ddqp->d_btimer ?
+			be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
+		qinf->qi_itimelimit = ddqp->d_itimer ?
+			be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
+		qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
+			be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
+		qinf->qi_bwarnlimit = ddqp->d_bwarns ?
+			be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
+		qinf->qi_iwarnlimit = ddqp->d_iwarns ?
+			be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
+		qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
+			be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
+		qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+		qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+		qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+ 
+		/*
+		 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
+		 * we don't want this dquot cached. We haven't done a
+		 * quotacheck yet, and quotacheck doesn't like incore dquots.
+		 */
+		xfs_qm_dqdestroy(dqp);
+	} else {
+		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
+		qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
+		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
+		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
+		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Gets called when unmounting a filesystem or when all quotas get
+ * turned off.
+ * This purges the quota inodes, destroys locks and frees itself.
+ */
+void
+xfs_qm_destroy_quotainfo(
+	xfs_mount_t	*mp)
+{
+	xfs_quotainfo_t *qi;
+
+	qi = mp->m_quotainfo;
+	ASSERT(qi != NULL);
+	ASSERT(xfs_Gqm != NULL);
+
+	/*
+	 * Release the reference that XQM kept, so that we know
+	 * when the XQM structure should be freed. We cannot assume
+	 * that xfs_Gqm is non-null after this point.
+	 */
+	xfs_qm_rele_quotafs_ref(mp);
+
+	ASSERT(list_empty(&qi->qi_dqlist));
+	mutex_destroy(&qi->qi_dqlist_lock);
+
+	if (qi->qi_uquotaip) {
+		IRELE(qi->qi_uquotaip);
+		qi->qi_uquotaip = NULL; /* paranoia */
+	}
+	if (qi->qi_gquotaip) {
+		IRELE(qi->qi_gquotaip);
+		qi->qi_gquotaip = NULL;
+	}
+	mutex_destroy(&qi->qi_quotaofflock);
+	kmem_free(qi);
+	mp->m_quotainfo = NULL;
+}
+
+
+
+/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_list_init(
+	xfs_dqlist_t	*list,
+	char		*str,
+	int		n)
+{
+	mutex_init(&list->qh_lock);
+	INIT_LIST_HEAD(&list->qh_list);
+	list->qh_version = 0;
+	list->qh_nelems = 0;
+}
+
+STATIC void
+xfs_qm_list_destroy(
+	xfs_dqlist_t	*list)
+{
+	mutex_destroy(&(list->qh_lock));
+}
+
+/*
+ * Create an inode and return with a reference already taken, but unlocked
+ * This is how we create quota inodes
+ */
+STATIC int
+xfs_qm_qino_alloc(
+	xfs_mount_t	*mp,
+	xfs_inode_t	**ip,
+	__int64_t	sbfields,
+	uint		flags)
+{
+	xfs_trans_t	*tp;
+	int		error;
+	int		committed;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
+	if ((error = xfs_trans_reserve(tp,
+				      XFS_QM_QINOCREATE_SPACE_RES(mp),
+				      XFS_CREATE_LOG_RES(mp), 0,
+				      XFS_TRANS_PERM_LOG_RES,
+				      XFS_CREATE_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		return error;
+	}
+
+	error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
+	if (error) {
+		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+				 XFS_TRANS_ABORT);
+		return error;
+	}
+
+	/*
+	 * Make the changes in the superblock, and log those too.
+	 * sbfields arg may contain fields other than *QUOTINO;
+	 * VERSIONNUM for example.
+	 */
+	spin_lock(&mp->m_sb_lock);
+	if (flags & XFS_QMOPT_SBVERSION) {
+		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
+		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
+		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
+
+		xfs_sb_version_addquota(&mp->m_sb);
+		mp->m_sb.sb_uquotino = NULLFSINO;
+		mp->m_sb.sb_gquotino = NULLFSINO;
+
+		/* qflags will get updated _after_ quotacheck */
+		mp->m_sb.sb_qflags = 0;
+	}
+	if (flags & XFS_QMOPT_UQUOTA)
+		mp->m_sb.sb_uquotino = (*ip)->i_ino;
+	else
+		mp->m_sb.sb_gquotino = (*ip)->i_ino;
+	spin_unlock(&mp->m_sb_lock);
+	xfs_mod_sb(tp, sbfields);
+
+	if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
+		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
+		return error;
+	}
+	return 0;
+}
+
+
+STATIC void
+xfs_qm_reset_dqcounts(
+	xfs_mount_t	*mp,
+	xfs_buf_t	*bp,
+	xfs_dqid_t	id,
+	uint		type)
+{
+	xfs_disk_dquot_t	*ddq;
+	int			j;
+
+	trace_xfs_reset_dqcounts(bp, _RET_IP_);
+
+	/*
+	 * Reset all counters and timers. They'll be
+	 * started afresh by xfs_qm_quotacheck.
+	 */
+#ifdef DEBUG
+	j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+	do_div(j, sizeof(xfs_dqblk_t));
+	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
+#endif
+	ddq = bp->b_addr;
+	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
+		/*
+		 * Do a sanity check, and if needed, repair the dqblk. Don't
+		 * output any warnings because it's perfectly possible to
+		 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
+		 */
+		(void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
+				      "xfs_quotacheck");
+		ddq->d_bcount = 0;
+		ddq->d_icount = 0;
+		ddq->d_rtbcount = 0;
+		ddq->d_btimer = 0;
+		ddq->d_itimer = 0;
+		ddq->d_rtbtimer = 0;
+		ddq->d_bwarns = 0;
+		ddq->d_iwarns = 0;
+		ddq->d_rtbwarns = 0;
+		ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+	}
+}
+
+STATIC int
+xfs_qm_dqiter_bufs(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	firstid,
+	xfs_fsblock_t	bno,
+	xfs_filblks_t	blkcnt,
+	uint		flags)
+{
+	xfs_buf_t	*bp;
+	int		error;
+	int		type;
+
+	ASSERT(blkcnt > 0);
+	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
+		(flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
+	error = 0;
+
+	/*
+	 * Blkcnt arg can be a very big number, and might even be
+	 * larger than the log itself. So, we have to break it up into
+	 * manageable-sized transactions.
+	 * Note that we don't start a permanent transaction here; we might
+	 * not be able to get a log reservation for the whole thing up front,
+	 * and we don't really care to either, because we just discard
+	 * everything if we were to crash in the middle of this loop.
+	 */
+	while (blkcnt--) {
+		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+			      XFS_FSB_TO_DADDR(mp, bno),
+			      mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+		if (error)
+			break;
+
+		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
+		xfs_bdwrite(mp, bp);
+		/*
+		 * goto the next block.
+		 */
+		bno++;
+		firstid += mp->m_quotainfo->qi_dqperchunk;
+	}
+	return error;
+}
+
+/*
+ * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
+ * caller supplied function for every chunk of dquots that we find.
+ */
+STATIC int
+xfs_qm_dqiterate(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*qip,
+	uint		flags)
+{
+	xfs_bmbt_irec_t		*map;
+	int			i, nmaps;	/* number of map entries */
+	int			error;		/* return value */
+	xfs_fileoff_t		lblkno;
+	xfs_filblks_t		maxlblkcnt;
+	xfs_dqid_t		firstid;
+	xfs_fsblock_t		rablkno;
+	xfs_filblks_t		rablkcnt;
+
+	error = 0;
+	/*
+	 * This looks racy, but we can't keep an inode lock across a
+	 * trans_reserve. But, this gets called during quotacheck, and that
+	 * happens only at mount time which is single threaded.
+	 */
+	if (qip->i_d.di_nblocks == 0)
+		return 0;
+
+	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
+
+	lblkno = 0;
+	maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+	do {
+		nmaps = XFS_DQITER_MAP_SIZE;
+		/*
+		 * We aren't changing the inode itself. Just changing
+		 * some of its data. No new blocks are added here, and
+		 * the inode is never added to the transaction.
+		 */
+		xfs_ilock(qip, XFS_ILOCK_SHARED);
+		error = xfs_bmapi(NULL, qip, lblkno,
+				  maxlblkcnt - lblkno,
+				  XFS_BMAPI_METADATA,
+				  NULL,
+				  0, map, &nmaps, NULL);
+		xfs_iunlock(qip, XFS_ILOCK_SHARED);
+		if (error)
+			break;
+
+		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
+		for (i = 0; i < nmaps; i++) {
+			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
+			ASSERT(map[i].br_blockcount);
+
+
+			lblkno += map[i].br_blockcount;
+
+			if (map[i].br_startblock == HOLESTARTBLOCK)
+				continue;
+
+			firstid = (xfs_dqid_t) map[i].br_startoff *
+				mp->m_quotainfo->qi_dqperchunk;
+			/*
+			 * Do a read-ahead on the next extent.
+			 */
+			if ((i+1 < nmaps) &&
+			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
+				rablkcnt =  map[i+1].br_blockcount;
+				rablkno = map[i+1].br_startblock;
+				while (rablkcnt--) {
+					xfs_buf_readahead(mp->m_ddev_targp,
+					       XFS_FSB_TO_DADDR(mp, rablkno),
+					       mp->m_quotainfo->qi_dqchunklen);
+					rablkno++;
+				}
+			}
+			/*
+			 * Iterate thru all the blks in the extent and
+			 * reset the counters of all the dquots inside them.
+			 */
+			if ((error = xfs_qm_dqiter_bufs(mp,
+						       firstid,
+						       map[i].br_startblock,
+						       map[i].br_blockcount,
+						       flags))) {
+				break;
+			}
+		}
+
+		if (error)
+			break;
+	} while (nmaps > 0);
+
+	kmem_free(map);
+
+	return error;
+}
+
+/*
+ * Called by dqusage_adjust in doing a quotacheck.
+ *
+ * Given the inode, and a dquot id this updates both the incore dqout as well
+ * as the buffer copy. This is so that once the quotacheck is done, we can
+ * just log all the buffers, as opposed to logging numerous updates to
+ * individual dquots.
+ */
+STATIC int
+xfs_qm_quotacheck_dqadjust(
+	struct xfs_inode	*ip,
+	xfs_dqid_t		id,
+	uint			type,
+	xfs_qcnt_t		nblks,
+	xfs_qcnt_t		rtblks)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_dquot	*dqp;
+	int			error;
+
+	error = xfs_qm_dqget(mp, ip, id, type,
+			     XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+	if (error) {
+		/*
+		 * Shouldn't be able to turn off quotas here.
+		 */
+		ASSERT(error != ESRCH);
+		ASSERT(error != ENOENT);
+		return error;
+	}
+
+	trace_xfs_dqadjust(dqp);
+
+	/*
+	 * Adjust the inode count and the block count to reflect this inode's
+	 * resource usage.
+	 */
+	be64_add_cpu(&dqp->q_core.d_icount, 1);
+	dqp->q_res_icount++;
+	if (nblks) {
+		be64_add_cpu(&dqp->q_core.d_bcount, nblks);
+		dqp->q_res_bcount += nblks;
+	}
+	if (rtblks) {
+		be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
+		dqp->q_res_rtbcount += rtblks;
+	}
+
+	/*
+	 * Set default limits, adjust timers (since we changed usages)
+	 *
+	 * There are no timers for the default values set in the root dquot.
+	 */
+	if (dqp->q_core.d_id) {
+		xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
+		xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
+	}
+
+	dqp->dq_flags |= XFS_DQ_DIRTY;
+	xfs_qm_dqput(dqp);
+	return 0;
+}
+
+STATIC int
+xfs_qm_get_rtblks(
+	xfs_inode_t	*ip,
+	xfs_qcnt_t	*O_rtblks)
+{
+	xfs_filblks_t	rtblks;			/* total rt blks */
+	xfs_extnum_t	idx;			/* extent record index */
+	xfs_ifork_t	*ifp;			/* inode fork pointer */
+	xfs_extnum_t	nextents;		/* number of extent entries */
+	int		error;
+
+	ASSERT(XFS_IS_REALTIME_INODE(ip));
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
+			return error;
+	}
+	rtblks = 0;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	for (idx = 0; idx < nextents; idx++)
+		rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
+	*O_rtblks = (xfs_qcnt_t)rtblks;
+	return 0;
+}
+
+/*
+ * callback routine supplied to bulkstat(). Given an inumber, find its
+ * dquots and update them to account for resources taken by that inode.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqusage_adjust(
+	xfs_mount_t	*mp,		/* mount point for filesystem */
+	xfs_ino_t	ino,		/* inode number to get data for */
+	void		__user *buffer,	/* not used */
+	int		ubsize,		/* not used */
+	int		*ubused,	/* not used */
+	int		*res)		/* result code value */
+{
+	xfs_inode_t	*ip;
+	xfs_qcnt_t	nblks, rtblks = 0;
+	int		error;
+
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	/*
+	 * rootino must have its resources accounted for, not so with the quota
+	 * inodes.
+	 */
+	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+		*res = BULKSTAT_RV_NOTHING;
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
+	 * interface expects the inode to be exclusively locked because that's
+	 * the case in all other instances. It's OK that we do this because
+	 * quotacheck is done only at mount time.
+	 */
+	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
+	if (error) {
+		*res = BULKSTAT_RV_NOTHING;
+		return error;
+	}
+
+	ASSERT(ip->i_delayed_blks == 0);
+
+	if (XFS_IS_REALTIME_INODE(ip)) {
+		/*
+		 * Walk thru the extent list and count the realtime blocks.
+		 */
+		error = xfs_qm_get_rtblks(ip, &rtblks);
+		if (error)
+			goto error0;
+	}
+
+	nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
+
+	/*
+	 * Add the (disk blocks and inode) resources occupied by this
+	 * inode to its dquots. We do this adjustment in the incore dquot,
+	 * and also copy the changes to its buffer.
+	 * We don't care about putting these changes in a transaction
+	 * envelope because if we crash in the middle of a 'quotacheck'
+	 * we have to start from the beginning anyway.
+	 * Once we're done, we'll log all the dquot bufs.
+	 *
+	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
+	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
+	 */
+	if (XFS_IS_UQUOTA_ON(mp)) {
+		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
+						   XFS_DQ_USER, nblks, rtblks);
+		if (error)
+			goto error0;
+	}
+
+	if (XFS_IS_GQUOTA_ON(mp)) {
+		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
+						   XFS_DQ_GROUP, nblks, rtblks);
+		if (error)
+			goto error0;
+	}
+
+	if (XFS_IS_PQUOTA_ON(mp)) {
+		error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
+						   XFS_DQ_PROJ, nblks, rtblks);
+		if (error)
+			goto error0;
+	}
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	IRELE(ip);
+	*res = BULKSTAT_RV_DIDONE;
+	return 0;
+
+error0:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	IRELE(ip);
+	*res = BULKSTAT_RV_GIVEUP;
+	return error;
+}
+
+/*
+ * Walk thru all the filesystem inodes and construct a consistent view
+ * of the disk quota world. If the quotacheck fails, disable quotas.
+ */
+int
+xfs_qm_quotacheck(
+	xfs_mount_t	*mp)
+{
+	int		done, count, error;
+	xfs_ino_t	lastino;
+	size_t		structsz;
+	xfs_inode_t	*uip, *gip;
+	uint		flags;
+
+	count = INT_MAX;
+	structsz = 1;
+	lastino = 0;
+	flags = 0;
+
+	ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	/*
+	 * There should be no cached dquots. The (simplistic) quotacheck
+	 * algorithm doesn't like that.
+	 */
+	ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
+
+	xfs_notice(mp, "Quotacheck needed: Please wait.");
+
+	/*
+	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
+	 * their counters to zero. We need a clean slate.
+	 * We don't log our changes till later.
+	 */
+	uip = mp->m_quotainfo->qi_uquotaip;
+	if (uip) {
+		error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
+		if (error)
+			goto error_return;
+		flags |= XFS_UQUOTA_CHKD;
+	}
+
+	gip = mp->m_quotainfo->qi_gquotaip;
+	if (gip) {
+		error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
+					XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+		if (error)
+			goto error_return;
+		flags |= XFS_OQUOTA_CHKD;
+	}
+
+	do {
+		/*
+		 * Iterate thru all the inodes in the file system,
+		 * adjusting the corresponding dquot counters in core.
+		 */
+		error = xfs_bulkstat(mp, &lastino, &count,
+				     xfs_qm_dqusage_adjust,
+				     structsz, NULL, &done);
+		if (error)
+			break;
+
+	} while (!done);
+
+	/*
+	 * We've made all the changes that we need to make incore.
+	 * Flush them down to disk buffers if everything was updated
+	 * successfully.
+	 */
+	if (!error)
+		error = xfs_qm_dqflush_all(mp, 0);
+
+	/*
+	 * We can get this error if we couldn't do a dquot allocation inside
+	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
+	 * dirty dquots that might be cached, we just want to get rid of them
+	 * and turn quotaoff. The dquots won't be attached to any of the inodes
+	 * at this point (because we intentionally didn't in dqget_noattach).
+	 */
+	if (error) {
+		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+		goto error_return;
+	}
+
+	/*
+	 * We didn't log anything, because if we crashed, we'll have to
+	 * start the quotacheck from scratch anyway. However, we must make
+	 * sure that our dquot changes are secure before we put the
+	 * quotacheck'd stamp on the superblock. So, here we do a synchronous
+	 * flush.
+	 */
+	XFS_bflush(mp->m_ddev_targp);
+
+	/*
+	 * If one type of quotas is off, then it will lose its
+	 * quotachecked status, since we won't be doing accounting for
+	 * that type anymore.
+	 */
+	mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
+	mp->m_qflags |= flags;
+
+ error_return:
+	if (error) {
+		xfs_warn(mp,
+	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
+			error);
+		/*
+		 * We must turn off quotas.
+		 */
+		ASSERT(mp->m_quotainfo != NULL);
+		ASSERT(xfs_Gqm != NULL);
+		xfs_qm_destroy_quotainfo(mp);
+		if (xfs_mount_reset_sbqflags(mp)) {
+			xfs_warn(mp,
+				"Quotacheck: Failed to reset quota flags.");
+		}
+	} else
+		xfs_notice(mp, "Quotacheck: Done.");
+	return (error);
+}
+
+/*
+ * This is called after the superblock has been read in and we're ready to
+ * iget the quota inodes.
+ */
+STATIC int
+xfs_qm_init_quotainos(
+	xfs_mount_t	*mp)
+{
+	xfs_inode_t	*uip, *gip;
+	int		error;
+	__int64_t	sbflags;
+	uint		flags;
+
+	ASSERT(mp->m_quotainfo);
+	uip = gip = NULL;
+	sbflags = 0;
+	flags = 0;
+
+	/*
+	 * Get the uquota and gquota inodes
+	 */
+	if (xfs_sb_version_hasquota(&mp->m_sb)) {
+		if (XFS_IS_UQUOTA_ON(mp) &&
+		    mp->m_sb.sb_uquotino != NULLFSINO) {
+			ASSERT(mp->m_sb.sb_uquotino > 0);
+			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+					     0, 0, &uip)))
+				return XFS_ERROR(error);
+		}
+		if (XFS_IS_OQUOTA_ON(mp) &&
+		    mp->m_sb.sb_gquotino != NULLFSINO) {
+			ASSERT(mp->m_sb.sb_gquotino > 0);
+			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+					     0, 0, &gip))) {
+				if (uip)
+					IRELE(uip);
+				return XFS_ERROR(error);
+			}
+		}
+	} else {
+		flags |= XFS_QMOPT_SBVERSION;
+		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+			    XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
+	}
+
+	/*
+	 * Create the two inodes, if they don't exist already. The changes
+	 * made above will get added to a transaction and logged in one of
+	 * the qino_alloc calls below.  If the device is readonly,
+	 * temporarily switch to read-write to do this.
+	 */
+	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
+		if ((error = xfs_qm_qino_alloc(mp, &uip,
+					      sbflags | XFS_SB_UQUOTINO,
+					      flags | XFS_QMOPT_UQUOTA)))
+			return XFS_ERROR(error);
+
+		flags &= ~XFS_QMOPT_SBVERSION;
+	}
+	if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
+		flags |= (XFS_IS_GQUOTA_ON(mp) ?
+				XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+		error = xfs_qm_qino_alloc(mp, &gip,
+					  sbflags | XFS_SB_GQUOTINO, flags);
+		if (error) {
+			if (uip)
+				IRELE(uip);
+
+			return XFS_ERROR(error);
+		}
+	}
+
+	mp->m_quotainfo->qi_uquotaip = uip;
+	mp->m_quotainfo->qi_gquotaip = gip;
+
+	return 0;
+}
+
+
+
+/*
+ * Just pop the least recently used dquot off the freelist and
+ * recycle it. The returned dquot is locked.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqreclaim_one(void)
+{
+	xfs_dquot_t	*dqpout;
+	xfs_dquot_t	*dqp;
+	int		restarts;
+	int		startagain;
+
+	restarts = 0;
+	dqpout = NULL;
+
+	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
+again:
+	startagain = 0;
+	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+
+	list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+		struct xfs_mount *mp = dqp->q_mount;
+		xfs_dqlock(dqp);
+
+		/*
+		 * We are racing with dqlookup here. Naturally we don't
+		 * want to reclaim a dquot that lookup wants. We release the
+		 * freelist lock and start over, so that lookup will grab
+		 * both the dquot and the freelistlock.
+		 */
+		if (dqp->dq_flags & XFS_DQ_WANT) {
+			ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
+
+			trace_xfs_dqreclaim_want(dqp);
+			XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+			restarts++;
+			startagain = 1;
+			goto dqunlock;
+		}
+
+		/*
+		 * If the dquot is inactive, we are assured that it is
+		 * not on the mplist or the hashlist, and that makes our
+		 * life easier.
+		 */
+		if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+			ASSERT(mp == NULL);
+			ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+			ASSERT(list_empty(&dqp->q_hashlist));
+			ASSERT(list_empty(&dqp->q_mplist));
+			list_del_init(&dqp->q_freelist);
+			xfs_Gqm->qm_dqfrlist_cnt--;
+			dqpout = dqp;
+			XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+			goto dqunlock;
+		}
+
+		ASSERT(dqp->q_hash);
+		ASSERT(!list_empty(&dqp->q_mplist));
+
+		/*
+		 * Try to grab the flush lock. If this dquot is in the process
+		 * of getting flushed to disk, we don't want to reclaim it.
+		 */
+		if (!xfs_dqflock_nowait(dqp))
+			goto dqunlock;
+
+		/*
+		 * We have the flush lock so we know that this is not in the
+		 * process of being flushed. So, if this is dirty, flush it
+		 * DELWRI so that we don't get a freelist infested with
+		 * dirty dquots.
+		 */
+		if (XFS_DQ_IS_DIRTY(dqp)) {
+			int	error;
+
+			trace_xfs_dqreclaim_dirty(dqp);
+
+			/*
+			 * We flush it delayed write, so don't bother
+			 * releasing the freelist lock.
+			 */
+			error = xfs_qm_dqflush(dqp, 0);
+			if (error) {
+				xfs_warn(mp, "%s: dquot %p flush failed",
+					__func__, dqp);
+			}
+			goto dqunlock;
+		}
+
+		/*
+		 * We're trying to get the hashlock out of order. This races
+		 * with dqlookup; so, we giveup and goto the next dquot if
+		 * we couldn't get the hashlock. This way, we won't starve
+		 * a dqlookup process that holds the hashlock that is
+		 * waiting for the freelist lock.
+		 */
+		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+			restarts++;
+			goto dqfunlock;
+		}
+
+		/*
+		 * This races with dquot allocation code as well as dqflush_all
+		 * and reclaim code. So, if we failed to grab the mplist lock,
+		 * giveup everything and start over.
+		 */
+		if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
+			restarts++;
+			startagain = 1;
+			goto qhunlock;
+		}
+
+		ASSERT(dqp->q_nrefs == 0);
+		list_del_init(&dqp->q_mplist);
+		mp->m_quotainfo->qi_dquots--;
+		mp->m_quotainfo->qi_dqreclaims++;
+		list_del_init(&dqp->q_hashlist);
+		dqp->q_hash->qh_version++;
+		list_del_init(&dqp->q_freelist);
+		xfs_Gqm->qm_dqfrlist_cnt--;
+		dqpout = dqp;
+		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+qhunlock:
+		mutex_unlock(&dqp->q_hash->qh_lock);
+dqfunlock:
+		xfs_dqfunlock(dqp);
+dqunlock:
+		xfs_dqunlock(dqp);
+		if (dqpout)
+			break;
+		if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+			break;
+		if (startagain) {
+			mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+			goto again;
+		}
+	}
+	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+	return dqpout;
+}
+
+/*
+ * Traverse the freelist of dquots and attempt to reclaim a maximum of
+ * 'howmany' dquots. This operation races with dqlookup(), and attempts to
+ * favor the lookup function ...
+ */
+STATIC int
+xfs_qm_shake_freelist(
+	int	howmany)
+{
+	int		nreclaimed = 0;
+	xfs_dquot_t	*dqp;
+
+	if (howmany <= 0)
+		return 0;
+
+	while (nreclaimed < howmany) {
+		dqp = xfs_qm_dqreclaim_one();
+		if (!dqp)
+			return nreclaimed;
+		xfs_qm_dqdestroy(dqp);
+		nreclaimed++;
+	}
+	return nreclaimed;
+}
+
+/*
+ * The kmem_shake interface is invoked when memory is running low.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_shake(
+	struct shrinker	*shrink,
+	struct shrink_control *sc)
+{
+	int	ndqused, nfree, n;
+	gfp_t gfp_mask = sc->gfp_mask;
+
+	if (!kmem_shake_allow(gfp_mask))
+		return 0;
+	if (!xfs_Gqm)
+		return 0;
+
+	nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
+	/* incore dquots in all f/s's */
+	ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
+
+	ASSERT(ndqused >= 0);
+
+	if (nfree <= ndqused && nfree < ndquot)
+		return 0;
+
+	ndqused *= xfs_Gqm->qm_dqfree_ratio;	/* target # of free dquots */
+	n = nfree - ndqused - ndquot;		/* # over target */
+
+	return xfs_qm_shake_freelist(MAX(nfree, n));
+}
+
+
+/*------------------------------------------------------------------*/
+
+/*
+ * Return a new incore dquot. Depending on the number of
+ * dquots in the system, we either allocate a new one on the kernel heap,
+ * or reclaim a free one.
+ * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
+ * to reclaim an existing one from the freelist.
+ */
+boolean_t
+xfs_qm_dqalloc_incore(
+	xfs_dquot_t **O_dqpp)
+{
+	xfs_dquot_t	*dqp;
+
+	/*
+	 * Check against high water mark to see if we want to pop
+	 * a nincompoop dquot off the freelist.
+	 */
+	if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
+		/*
+		 * Try to recycle a dquot from the freelist.
+		 */
+		if ((dqp = xfs_qm_dqreclaim_one())) {
+			XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+			/*
+			 * Just zero the core here. The rest will get
+			 * reinitialized by caller. XXX we shouldn't even
+			 * do this zero ...
+			 */
+			memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+			*O_dqpp = dqp;
+			return B_FALSE;
+		}
+		XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+	}
+
+	/*
+	 * Allocate a brand new dquot on the kernel heap and return it
+	 * to the caller to initialize.
+	 */
+	ASSERT(xfs_Gqm->qm_dqzone != NULL);
+	*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+	atomic_inc(&xfs_Gqm->qm_totaldquots);
+
+	return B_TRUE;
+}
+
+
+/*
+ * Start a transaction and write the incore superblock changes to
+ * disk. flags parameter indicates which fields have changed.
+ */
+int
+xfs_qm_write_sb_changes(
+	xfs_mount_t	*mp,
+	__int64_t	flags)
+{
+	xfs_trans_t	*tp;
+	int		error;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+	if ((error = xfs_trans_reserve(tp, 0,
+				      mp->m_sb.sb_sectsize + 128, 0,
+				      0,
+				      XFS_DEFAULT_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		return error;
+	}
+
+	xfs_mod_sb(tp, flags);
+	error = xfs_trans_commit(tp, 0);
+
+	return error;
+}
+
+
+/* --------------- utility functions for vnodeops ---------------- */
+
+
+/*
+ * Given an inode, a uid, gid and prid make sure that we have
+ * allocated relevant dquot(s) on disk, and that we won't exceed inode
+ * quotas by creating this file.
+ * This also attaches dquot(s) to the given inode after locking it,
+ * and returns the dquots corresponding to the uid and/or gid.
+ *
+ * in	: inode (unlocked)
+ * out	: udquot, gdquot with references taken and unlocked
+ */
+int
+xfs_qm_vop_dqalloc(
+	struct xfs_inode	*ip,
+	uid_t			uid,
+	gid_t			gid,
+	prid_t			prid,
+	uint			flags,
+	struct xfs_dquot	**O_udqpp,
+	struct xfs_dquot	**O_gdqpp)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_dquot	*uq, *gq;
+	int			error;
+	uint			lockflags;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+		return 0;
+
+	lockflags = XFS_ILOCK_EXCL;
+	xfs_ilock(ip, lockflags);
+
+	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
+		gid = ip->i_d.di_gid;
+
+	/*
+	 * Attach the dquot(s) to this inode, doing a dquot allocation
+	 * if necessary. The dquot(s) will not be locked.
+	 */
+	if (XFS_NOT_DQATTACHED(mp, ip)) {
+		error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+		if (error) {
+			xfs_iunlock(ip, lockflags);
+			return error;
+		}
+	}
+
+	uq = gq = NULL;
+	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
+		if (ip->i_d.di_uid != uid) {
+			/*
+			 * What we need is the dquot that has this uid, and
+			 * if we send the inode to dqget, the uid of the inode
+			 * takes priority over what's sent in the uid argument.
+			 * We must unlock inode here before calling dqget if
+			 * we're not sending the inode, because otherwise
+			 * we'll deadlock by doing trans_reserve while
+			 * holding ilock.
+			 */
+			xfs_iunlock(ip, lockflags);
+			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
+						 XFS_DQ_USER,
+						 XFS_QMOPT_DQALLOC |
+						 XFS_QMOPT_DOWARN,
+						 &uq))) {
+				ASSERT(error != ENOENT);
+				return error;
+			}
+			/*
+			 * Get the ilock in the right order.
+			 */
+			xfs_dqunlock(uq);
+			lockflags = XFS_ILOCK_SHARED;
+			xfs_ilock(ip, lockflags);
+		} else {
+			/*
+			 * Take an extra reference, because we'll return
+			 * this to caller
+			 */
+			ASSERT(ip->i_udquot);
+			uq = ip->i_udquot;
+			xfs_dqlock(uq);
+			XFS_DQHOLD(uq);
+			xfs_dqunlock(uq);
+		}
+	}
+	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
+		if (ip->i_d.di_gid != gid) {
+			xfs_iunlock(ip, lockflags);
+			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
+						 XFS_DQ_GROUP,
+						 XFS_QMOPT_DQALLOC |
+						 XFS_QMOPT_DOWARN,
+						 &gq))) {
+				if (uq)
+					xfs_qm_dqrele(uq);
+				ASSERT(error != ENOENT);
+				return error;
+			}
+			xfs_dqunlock(gq);
+			lockflags = XFS_ILOCK_SHARED;
+			xfs_ilock(ip, lockflags);
+		} else {
+			ASSERT(ip->i_gdquot);
+			gq = ip->i_gdquot;
+			xfs_dqlock(gq);
+			XFS_DQHOLD(gq);
+			xfs_dqunlock(gq);
+		}
+	} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+		if (xfs_get_projid(ip) != prid) {
+			xfs_iunlock(ip, lockflags);
+			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
+						 XFS_DQ_PROJ,
+						 XFS_QMOPT_DQALLOC |
+						 XFS_QMOPT_DOWARN,
+						 &gq))) {
+				if (uq)
+					xfs_qm_dqrele(uq);
+				ASSERT(error != ENOENT);
+				return (error);
+			}
+			xfs_dqunlock(gq);
+			lockflags = XFS_ILOCK_SHARED;
+			xfs_ilock(ip, lockflags);
+		} else {
+			ASSERT(ip->i_gdquot);
+			gq = ip->i_gdquot;
+			xfs_dqlock(gq);
+			XFS_DQHOLD(gq);
+			xfs_dqunlock(gq);
+		}
+	}
+	if (uq)
+		trace_xfs_dquot_dqalloc(ip);
+
+	xfs_iunlock(ip, lockflags);
+	if (O_udqpp)
+		*O_udqpp = uq;
+	else if (uq)
+		xfs_qm_dqrele(uq);
+	if (O_gdqpp)
+		*O_gdqpp = gq;
+	else if (gq)
+		xfs_qm_dqrele(gq);
+	return 0;
+}
+
+/*
+ * Actually transfer ownership, and do dquot modifications.
+ * These were already reserved.
+ */
+xfs_dquot_t *
+xfs_qm_vop_chown(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	xfs_dquot_t	**IO_olddq,
+	xfs_dquot_t	*newdq)
+{
+	xfs_dquot_t	*prevdq;
+	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
+				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+
+	/* old dquot */
+	prevdq = *IO_olddq;
+	ASSERT(prevdq);
+	ASSERT(prevdq != newdq);
+
+	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
+	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
+
+	/* the sparkling new dquot */
+	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
+	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
+
+	/*
+	 * Take an extra reference, because the inode
+	 * is going to keep this dquot pointer even
+	 * after the trans_commit.
+	 */
+	xfs_dqlock(newdq);
+	XFS_DQHOLD(newdq);
+	xfs_dqunlock(newdq);
+	*IO_olddq = newdq;
+
+	return prevdq;
+}
+
+/*
+ * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
+ */
+int
+xfs_qm_vop_chown_reserve(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	xfs_dquot_t	*udqp,
+	xfs_dquot_t	*gdqp,
+	uint		flags)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	uint		delblks, blkflags, prjflags = 0;
+	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq;
+	int		error;
+
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	delblks = ip->i_delayed_blks;
+	delblksudq = delblksgdq = unresudq = unresgdq = NULL;
+	blkflags = XFS_IS_REALTIME_INODE(ip) ?
+			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
+
+	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+	    ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
+		delblksudq = udqp;
+		/*
+		 * If there are delayed allocation blocks, then we have to
+		 * unreserve those from the old dquot, and add them to the
+		 * new dquot.
+		 */
+		if (delblks) {
+			ASSERT(ip->i_udquot);
+			unresudq = ip->i_udquot;
+		}
+	}
+	if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
+		if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
+		     xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
+			prjflags = XFS_QMOPT_ENOSPC;
+
+		if (prjflags ||
+		    (XFS_IS_GQUOTA_ON(ip->i_mount) &&
+		     ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
+			delblksgdq = gdqp;
+			if (delblks) {
+				ASSERT(ip->i_gdquot);
+				unresgdq = ip->i_gdquot;
+			}
+		}
+	}
+
+	if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+				delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
+				flags | blkflags | prjflags)))
+		return (error);
+
+	/*
+	 * Do the delayed blks reservations/unreservations now. Since, these
+	 * are done without the help of a transaction, if a reservation fails
+	 * its previous reservations won't be automatically undone by trans
+	 * code. So, we have to do it manually here.
+	 */
+	if (delblks) {
+		/*
+		 * Do the reservations first. Unreservation can't fail.
+		 */
+		ASSERT(delblksudq || delblksgdq);
+		ASSERT(unresudq || unresgdq);
+		if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+				delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
+				flags | blkflags | prjflags)))
+			return (error);
+		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+				unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
+				blkflags);
+	}
+
+	return (0);
+}
+
+int
+xfs_qm_vop_rename_dqattach(
+	struct xfs_inode	**i_tab)
+{
+	struct xfs_mount	*mp = i_tab[0]->i_mount;
+	int			i;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+		return 0;
+
+	for (i = 0; (i < 4 && i_tab[i]); i++) {
+		struct xfs_inode	*ip = i_tab[i];
+		int			error;
+
+		/*
+		 * Watch out for duplicate entries in the table.
+		 */
+		if (i == 0 || ip != i_tab[i-1]) {
+			if (XFS_NOT_DQATTACHED(mp, ip)) {
+				error = xfs_qm_dqattach(ip, 0);
+				if (error)
+					return error;
+			}
+		}
+	}
+	return 0;
+}
+
+void
+xfs_qm_vop_create_dqattach(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	struct xfs_dquot	*udqp,
+	struct xfs_dquot	*gdqp)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+		return;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+	if (udqp) {
+		xfs_dqlock(udqp);
+		XFS_DQHOLD(udqp);
+		xfs_dqunlock(udqp);
+		ASSERT(ip->i_udquot == NULL);
+		ip->i_udquot = udqp;
+		ASSERT(XFS_IS_UQUOTA_ON(mp));
+		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
+		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
+	}
+	if (gdqp) {
+		xfs_dqlock(gdqp);
+		XFS_DQHOLD(gdqp);
+		xfs_dqunlock(gdqp);
+		ASSERT(ip->i_gdquot == NULL);
+		ip->i_gdquot = gdqp;
+		ASSERT(XFS_IS_OQUOTA_ON(mp));
+		ASSERT((XFS_IS_GQUOTA_ON(mp) ?
+			ip->i_d.di_gid : xfs_get_projid(ip)) ==
+				be32_to_cpu(gdqp->q_core.d_id));
+		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
+	}
+}
+
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
new file mode 100644
index 0000000..43b9abe
--- /dev/null
+++ b/fs/xfs/xfs_qm.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QM_H__
+#define __XFS_QM_H__
+
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_quota_priv.h"
+#include "xfs_qm_stats.h"
+
+struct xfs_qm;
+struct xfs_inode;
+
+extern uint		ndquot;
+extern struct mutex	xfs_Gqm_lock;
+extern struct xfs_qm	*xfs_Gqm;
+extern kmem_zone_t	*qm_dqzone;
+extern kmem_zone_t	*qm_dqtrxzone;
+
+/*
+ * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
+ * iterate over the mountpt's dquot list in one call.
+ */
+#define XFS_QM_SYNC_MAX_RESTARTS	7
+
+/*
+ * Ditto, for xfs_qm_dqreclaim_one.
+ */
+#define XFS_QM_RECLAIM_MAX_RESTARTS	4
+
+/*
+ * Ideal ratio of free to in use dquots. Quota manager makes an attempt
+ * to keep this balance.
+ */
+#define XFS_QM_DQFREE_RATIO		2
+
+/*
+ * Dquot hashtable constants/threshold values.
+ */
+#define XFS_QM_HASHSIZE_LOW		(PAGE_SIZE / sizeof(xfs_dqhash_t))
+#define XFS_QM_HASHSIZE_HIGH		((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
+
+/*
+ * This defines the unit of allocation of dquots.
+ * Currently, it is just one file system block, and a 4K blk contains 30
+ * (136 * 30 = 4080) dquots. It's probably not worth trying to make
+ * this more dynamic.
+ * XXXsup However, if this number is changed, we have to make sure that we don't
+ * implicitly assume that we do allocations in chunks of a single filesystem
+ * block in the dquot/xqm code.
+ */
+#define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1
+
+typedef xfs_dqhash_t	xfs_dqlist_t;
+
+/*
+ * Quota Manager (global) structure. Lives only in core.
+ */
+typedef struct xfs_qm {
+	xfs_dqlist_t	*qm_usr_dqhtable;/* udquot hash table */
+	xfs_dqlist_t	*qm_grp_dqhtable;/* gdquot hash table */
+	uint		 qm_dqhashmask;	 /* # buckets in dq hashtab - 1 */
+	struct list_head qm_dqfrlist;	 /* freelist of dquots */
+	struct mutex	 qm_dqfrlist_lock;
+	int		 qm_dqfrlist_cnt;
+	atomic_t	 qm_totaldquots; /* total incore dquots */
+	uint		 qm_nrefs;	 /* file systems with quota on */
+	int		 qm_dqfree_ratio;/* ratio of free to inuse dquots */
+	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
+	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */
+} xfs_qm_t;
+
+/*
+ * Various quota information for individual filesystems.
+ * The mount structure keeps a pointer to this.
+ */
+typedef struct xfs_quotainfo {
+	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */
+	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */
+	struct list_head qi_dqlist;	 /* all dquots in filesys */
+	struct mutex	 qi_dqlist_lock;
+	int		 qi_dquots;
+	int		 qi_dqreclaims;	 /* a change here indicates
+					    a removal in the dqlist */
+	time_t		 qi_btimelimit;	 /* limit for blks timer */
+	time_t		 qi_itimelimit;	 /* limit for inodes timer */
+	time_t		 qi_rtbtimelimit;/* limit for rt blks timer */
+	xfs_qwarncnt_t	 qi_bwarnlimit;	 /* limit for blks warnings */
+	xfs_qwarncnt_t	 qi_iwarnlimit;	 /* limit for inodes warnings */
+	xfs_qwarncnt_t	 qi_rtbwarnlimit;/* limit for rt blks warnings */
+	struct mutex	 qi_quotaofflock;/* to serialize quotaoff */
+	xfs_filblks_t	 qi_dqchunklen;	 /* # BBs in a chunk of dqs */
+	uint		 qi_dqperchunk;	 /* # ondisk dqs in above chunk */
+	xfs_qcnt_t	 qi_bhardlimit;	 /* default data blk hard limit */
+	xfs_qcnt_t	 qi_bsoftlimit;	 /* default data blk soft limit */
+	xfs_qcnt_t	 qi_ihardlimit;	 /* default inode count hard limit */
+	xfs_qcnt_t	 qi_isoftlimit;	 /* default inode count soft limit */
+	xfs_qcnt_t	 qi_rtbhardlimit;/* default realtime blk hard limit */
+	xfs_qcnt_t	 qi_rtbsoftlimit;/* default realtime blk soft limit */
+} xfs_quotainfo_t;
+
+
+extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
+extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
+			xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
+extern void	xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
+extern void	xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
+
+/*
+ * We keep the usr and grp dquots separately so that locking will be easier
+ * to do at commit time. All transactions that we know of at this point
+ * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
+ */
+#define XFS_QM_TRANS_MAXDQS		2
+typedef struct xfs_dquot_acct {
+	xfs_dqtrx_t	dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
+	xfs_dqtrx_t	dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
+} xfs_dquot_acct_t;
+
+/*
+ * Users are allowed to have a usage exceeding their softlimit for
+ * a period this long.
+ */
+#define XFS_QM_BTIMELIMIT	(7 * 24*60*60)          /* 1 week */
+#define XFS_QM_RTBTIMELIMIT	(7 * 24*60*60)          /* 1 week */
+#define XFS_QM_ITIMELIMIT	(7 * 24*60*60)          /* 1 week */
+
+#define XFS_QM_BWARNLIMIT	5
+#define XFS_QM_IWARNLIMIT	5
+#define XFS_QM_RTBWARNLIMIT	5
+
+extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *);
+extern int		xfs_qm_quotacheck(xfs_mount_t *);
+extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
+
+/* dquot stuff */
+extern boolean_t	xfs_qm_dqalloc_incore(xfs_dquot_t **);
+extern int		xfs_qm_dqpurge_all(xfs_mount_t *, uint);
+extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
+
+/* quota ops */
+extern int		xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
+extern int		xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
+					fs_disk_quota_t *);
+extern int		xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
+					fs_disk_quota_t *);
+extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
+extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint);
+extern int		xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
+
+#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
new file mode 100644
index 0000000..a0a829a
--- /dev/null
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_qm.h"
+
+
+STATIC void
+xfs_fill_statvfs_from_dquot(
+	struct kstatfs		*statp,
+	xfs_disk_dquot_t	*dp)
+{
+	__uint64_t		limit;
+
+	limit = dp->d_blk_softlimit ?
+		be64_to_cpu(dp->d_blk_softlimit) :
+		be64_to_cpu(dp->d_blk_hardlimit);
+	if (limit && statp->f_blocks > limit) {
+		statp->f_blocks = limit;
+		statp->f_bfree = statp->f_bavail =
+			(statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
+			 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
+	}
+
+	limit = dp->d_ino_softlimit ?
+		be64_to_cpu(dp->d_ino_softlimit) :
+		be64_to_cpu(dp->d_ino_hardlimit);
+	if (limit && statp->f_files > limit) {
+		statp->f_files = limit;
+		statp->f_ffree =
+			(statp->f_files > be64_to_cpu(dp->d_icount)) ?
+			 (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
+	}
+}
+
+
+/*
+ * Directory tree accounting is implemented using project quotas, where
+ * the project identifier is inherited from parent directories.
+ * A statvfs (df, etc.) of a directory that is using project quota should
+ * return a statvfs of the project, not the entire filesystem.
+ * This makes such trees appear as if they are filesystems in themselves.
+ */
+void
+xfs_qm_statvfs(
+	xfs_inode_t		*ip,
+	struct kstatfs		*statp)
+{
+	xfs_mount_t		*mp = ip->i_mount;
+	xfs_dquot_t		*dqp;
+
+	if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
+		xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
+		xfs_qm_dqput(dqp);
+	}
+}
+
+int
+xfs_qm_newmount(
+	xfs_mount_t	*mp,
+	uint		*needquotamount,
+	uint		*quotaflags)
+{
+	uint		quotaondisk;
+	uint		uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
+
+	quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
+				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
+
+	if (quotaondisk) {
+		uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
+		pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
+		gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
+	}
+
+	/*
+	 * If the device itself is read-only, we can't allow
+	 * the user to change the state of quota on the mount -
+	 * this would generate a transaction on the ro device,
+	 * which would lead to an I/O error and shutdown
+	 */
+
+	if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
+	    (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
+	     (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
+	    (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
+	     (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
+	    (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
+	    xfs_dev_is_read_only(mp, "changing quota state")) {
+		xfs_warn(mp, "please mount with%s%s%s%s.",
+			(!quotaondisk ? "out quota" : ""),
+			(uquotaondisk ? " usrquota" : ""),
+			(pquotaondisk ? " prjquota" : ""),
+			(gquotaondisk ? " grpquota" : ""));
+		return XFS_ERROR(EPERM);
+	}
+
+	if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
+		/*
+		 * Call mount_quotas at this point only if we won't have to do
+		 * a quotacheck.
+		 */
+		if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
+			/*
+			 * If an error occurred, qm_mount_quotas code
+			 * has already disabled quotas. So, just finish
+			 * mounting, and get on with the boring life
+			 * without disk quotas.
+			 */
+			xfs_qm_mount_quotas(mp);
+		} else {
+			/*
+			 * Clear the quota flags, but remember them. This
+			 * is so that the quota code doesn't get invoked
+			 * before we're ready. This can happen when an
+			 * inode goes inactive and wants to free blocks,
+			 * or via xfs_log_mount_finish.
+			 */
+			*needquotamount = B_TRUE;
+			*quotaflags = mp->m_qflags;
+			mp->m_qflags = 0;
+		}
+	}
+
+	return 0;
+}
+
+void __init
+xfs_qm_init(void)
+{
+	printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
+	mutex_init(&xfs_Gqm_lock);
+	xfs_qm_init_procfs();
+}
+
+void __exit
+xfs_qm_exit(void)
+{
+	xfs_qm_cleanup_procfs();
+	if (qm_dqzone)
+		kmem_zone_destroy(qm_dqzone);
+	if (qm_dqtrxzone)
+		kmem_zone_destroy(qm_dqtrxzone);
+}
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
new file mode 100644
index 0000000..8671a0b
--- /dev/null
+++ b/fs/xfs/xfs_qm_stats.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_qm.h"
+
+struct xqmstats xqmstats;
+
+static int xqm_proc_show(struct seq_file *m, void *v)
+{
+	/* maximum; incore; ratio free to inuse; freelist */
+	seq_printf(m, "%d\t%d\t%d\t%u\n",
+			ndquot,
+			xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
+			xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+			xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
+	return 0;
+}
+
+static int xqm_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xqm_proc_show, NULL);
+}
+
+static const struct file_operations xqm_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= xqm_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int xqmstat_proc_show(struct seq_file *m, void *v)
+{
+	/* quota performance statistics */
+	seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
+			xqmstats.xs_qm_dqreclaims,
+			xqmstats.xs_qm_dqreclaim_misses,
+			xqmstats.xs_qm_dquot_dups,
+			xqmstats.xs_qm_dqcachemisses,
+			xqmstats.xs_qm_dqcachehits,
+			xqmstats.xs_qm_dqwants,
+			xqmstats.xs_qm_dqshake_reclaims,
+			xqmstats.xs_qm_dqinact_reclaims);
+	return 0;
+}
+
+static int xqmstat_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xqmstat_proc_show, NULL);
+}
+
+static const struct file_operations xqmstat_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= xqmstat_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void
+xfs_qm_init_procfs(void)
+{
+	proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
+	proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
+}
+
+void
+xfs_qm_cleanup_procfs(void)
+{
+	remove_proc_entry("fs/xfs/xqm", NULL);
+	remove_proc_entry("fs/xfs/xqmstat", NULL);
+}
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h
new file mode 100644
index 0000000..5b964fc
--- /dev/null
+++ b/fs/xfs/xfs_qm_stats.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QM_STATS_H__
+#define __XFS_QM_STATS_H__
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+/*
+ * XQM global statistics
+ */
+struct xqmstats {
+	__uint32_t		xs_qm_dqreclaims;
+	__uint32_t		xs_qm_dqreclaim_misses;
+	__uint32_t		xs_qm_dquot_dups;
+	__uint32_t		xs_qm_dqcachemisses;
+	__uint32_t		xs_qm_dqcachehits;
+	__uint32_t		xs_qm_dqwants;
+	__uint32_t		xs_qm_dqshake_reclaims;
+	__uint32_t		xs_qm_dqinact_reclaims;
+};
+
+extern struct xqmstats xqmstats;
+
+# define XQM_STATS_INC(count)	( (count)++ )
+
+extern void xfs_qm_init_procfs(void);
+extern void xfs_qm_cleanup_procfs(void);
+
+#else
+
+# define XQM_STATS_INC(count)	do { } while (0)
+
+static inline void xfs_qm_init_procfs(void) { };
+static inline void xfs_qm_cleanup_procfs(void) { };
+
+#endif
+
+#endif	/* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
new file mode 100644
index 0000000..609246f
--- /dev/null
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -0,0 +1,906 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/capability.h>
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+STATIC int	xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
+STATIC int	xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+					uint);
+STATIC uint	xfs_qm_export_flags(uint);
+STATIC uint	xfs_qm_export_qtype_flags(uint);
+STATIC void	xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
+					fs_disk_quota_t *);
+
+
+/*
+ * Turn off quota accounting and/or enforcement for all udquots and/or
+ * gdquots. Called only at unmount time.
+ *
+ * This assumes that there are no dquots of this file system cached
+ * incore, and modifies the ondisk dquot directly. Therefore, for example,
+ * it is an error to call this twice, without purging the cache.
+ */
+int
+xfs_qm_scall_quotaoff(
+	xfs_mount_t		*mp,
+	uint			flags)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	uint			dqtype;
+	int			error;
+	uint			inactivate_flags;
+	xfs_qoff_logitem_t	*qoffstart;
+	int			nculprits;
+
+	/*
+	 * No file system can have quotas enabled on disk but not in core.
+	 * Note that quota utilities (like quotaoff) _expect_
+	 * errno == EEXIST here.
+	 */
+	if ((mp->m_qflags & flags) == 0)
+		return XFS_ERROR(EEXIST);
+	error = 0;
+
+	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+
+	/*
+	 * We don't want to deal with two quotaoffs messing up each other,
+	 * so we're going to serialize it. quotaoff isn't exactly a performance
+	 * critical thing.
+	 * If quotaoff, then we must be dealing with the root filesystem.
+	 */
+	ASSERT(q);
+	mutex_lock(&q->qi_quotaofflock);
+
+	/*
+	 * If we're just turning off quota enforcement, change mp and go.
+	 */
+	if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
+		mp->m_qflags &= ~(flags);
+
+		spin_lock(&mp->m_sb_lock);
+		mp->m_sb.sb_qflags = mp->m_qflags;
+		spin_unlock(&mp->m_sb_lock);
+		mutex_unlock(&q->qi_quotaofflock);
+
+		/* XXX what to do if error ? Revert back to old vals incore ? */
+		error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
+		return (error);
+	}
+
+	dqtype = 0;
+	inactivate_flags = 0;
+	/*
+	 * If accounting is off, we must turn enforcement off, clear the
+	 * quota 'CHKD' certificate to make it known that we have to
+	 * do a quotacheck the next time this quota is turned on.
+	 */
+	if (flags & XFS_UQUOTA_ACCT) {
+		dqtype |= XFS_QMOPT_UQUOTA;
+		flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
+		inactivate_flags |= XFS_UQUOTA_ACTIVE;
+	}
+	if (flags & XFS_GQUOTA_ACCT) {
+		dqtype |= XFS_QMOPT_GQUOTA;
+		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+		inactivate_flags |= XFS_GQUOTA_ACTIVE;
+	} else if (flags & XFS_PQUOTA_ACCT) {
+		dqtype |= XFS_QMOPT_PQUOTA;
+		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+		inactivate_flags |= XFS_PQUOTA_ACTIVE;
+	}
+
+	/*
+	 * Nothing to do?  Don't complain. This happens when we're just
+	 * turning off quota enforcement.
+	 */
+	if ((mp->m_qflags & flags) == 0)
+		goto out_unlock;
+
+	/*
+	 * Write the LI_QUOTAOFF log record, and do SB changes atomically,
+	 * and synchronously. If we fail to write, we should abort the
+	 * operation as it cannot be recovered safely if we crash.
+	 */
+	error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
+	if (error)
+		goto out_unlock;
+
+	/*
+	 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
+	 * to take care of the race between dqget and quotaoff. We don't take
+	 * any special locks to reset these bits. All processes need to check
+	 * these bits *after* taking inode lock(s) to see if the particular
+	 * quota type is in the process of being turned off. If *ACTIVE, it is
+	 * guaranteed that all dquot structures and all quotainode ptrs will all
+	 * stay valid as long as that inode is kept locked.
+	 *
+	 * There is no turning back after this.
+	 */
+	mp->m_qflags &= ~inactivate_flags;
+
+	/*
+	 * Give back all the dquot reference(s) held by inodes.
+	 * Here we go thru every single incore inode in this file system, and
+	 * do a dqrele on the i_udquot/i_gdquot that it may have.
+	 * Essentially, as long as somebody has an inode locked, this guarantees
+	 * that quotas will not be turned off. This is handy because in a
+	 * transaction once we lock the inode(s) and check for quotaon, we can
+	 * depend on the quota inodes (and other things) being valid as long as
+	 * we keep the lock(s).
+	 */
+	xfs_qm_dqrele_all_inodes(mp, flags);
+
+	/*
+	 * Next we make the changes in the quota flag in the mount struct.
+	 * This isn't protected by a particular lock directly, because we
+	 * don't want to take a mrlock every time we depend on quotas being on.
+	 */
+	mp->m_qflags &= ~(flags);
+
+	/*
+	 * Go through all the dquots of this file system and purge them,
+	 * according to what was turned off. We may not be able to get rid
+	 * of all dquots, because dquots can have temporary references that
+	 * are not attached to inodes. eg. xfs_setattr, xfs_create.
+	 * So, if we couldn't purge all the dquots from the filesystem,
+	 * we can't get rid of the incore data structures.
+	 */
+	while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
+		delay(10 * nculprits);
+
+	/*
+	 * Transactions that had started before ACTIVE state bit was cleared
+	 * could have logged many dquots, so they'd have higher LSNs than
+	 * the first QUOTAOFF log record does. If we happen to crash when
+	 * the tail of the log has gone past the QUOTAOFF record, but
+	 * before the last dquot modification, those dquots __will__
+	 * recover, and that's not good.
+	 *
+	 * So, we have QUOTAOFF start and end logitems; the start
+	 * logitem won't get overwritten until the end logitem appears...
+	 */
+	error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
+	if (error) {
+		/* We're screwed now. Shutdown is the only option. */
+		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+		goto out_unlock;
+	}
+
+	/*
+	 * If quotas is completely disabled, close shop.
+	 */
+	if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
+	    ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
+		mutex_unlock(&q->qi_quotaofflock);
+		xfs_qm_destroy_quotainfo(mp);
+		return (0);
+	}
+
+	/*
+	 * Release our quotainode references if we don't need them anymore.
+	 */
+	if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
+		IRELE(q->qi_uquotaip);
+		q->qi_uquotaip = NULL;
+	}
+	if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
+		IRELE(q->qi_gquotaip);
+		q->qi_gquotaip = NULL;
+	}
+
+out_unlock:
+	mutex_unlock(&q->qi_quotaofflock);
+	return error;
+}
+
+STATIC int
+xfs_qm_scall_trunc_qfile(
+	struct xfs_mount	*mp,
+	xfs_ino_t		ino)
+{
+	struct xfs_inode	*ip;
+	struct xfs_trans	*tp;
+	int			error;
+
+	if (ino == NULLFSINO)
+		return 0;
+
+	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
+	if (error)
+		return error;
+
+	xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
+	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+				  XFS_TRANS_PERM_LOG_RES,
+				  XFS_ITRUNCATE_LOG_COUNT);
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+		goto out_put;
+	}
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip);
+
+	error = xfs_itruncate_data(&tp, ip, 0);
+	if (error) {
+		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+				     XFS_TRANS_ABORT);
+		goto out_unlock;
+	}
+
+	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+out_put:
+	IRELE(ip);
+	return error;
+}
+
+int
+xfs_qm_scall_trunc_qfiles(
+	xfs_mount_t	*mp,
+	uint		flags)
+{
+	int		error = 0, error2 = 0;
+
+	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
+		xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
+			__func__, flags, mp->m_qflags);
+		return XFS_ERROR(EINVAL);
+	}
+
+	if (flags & XFS_DQ_USER)
+		error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
+	if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
+		error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
+
+	return error ? error : error2;
+}
+
+/*
+ * Switch on (a given) quota enforcement for a filesystem.  This takes
+ * effect immediately.
+ * (Switching on quota accounting must be done at mount time.)
+ */
+int
+xfs_qm_scall_quotaon(
+	xfs_mount_t	*mp,
+	uint		flags)
+{
+	int		error;
+	uint		qf;
+	__int64_t	sbflags;
+
+	flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+	/*
+	 * Switching on quota accounting must be done at mount time.
+	 */
+	flags &= ~(XFS_ALL_QUOTA_ACCT);
+
+	sbflags = 0;
+
+	if (flags == 0) {
+		xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
+			__func__, mp->m_qflags);
+		return XFS_ERROR(EINVAL);
+	}
+
+	/* No fs can turn on quotas with a delayed effect */
+	ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
+
+	/*
+	 * Can't enforce without accounting. We check the superblock
+	 * qflags here instead of m_qflags because rootfs can have
+	 * quota acct on ondisk without m_qflags' knowing.
+	 */
+	if (((flags & XFS_UQUOTA_ACCT) == 0 &&
+	    (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
+	    (flags & XFS_UQUOTA_ENFD))
+	    ||
+	    ((flags & XFS_PQUOTA_ACCT) == 0 &&
+	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
+	    (flags & XFS_GQUOTA_ACCT) == 0 &&
+	    (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
+	    (flags & XFS_OQUOTA_ENFD))) {
+		xfs_debug(mp,
+			"%s: Can't enforce without acct, flags=%x sbflags=%x\n",
+			__func__, flags, mp->m_sb.sb_qflags);
+		return XFS_ERROR(EINVAL);
+	}
+	/*
+	 * If everything's up to-date incore, then don't waste time.
+	 */
+	if ((mp->m_qflags & flags) == flags)
+		return XFS_ERROR(EEXIST);
+
+	/*
+	 * Change sb_qflags on disk but not incore mp->qflags
+	 * if this is the root filesystem.
+	 */
+	spin_lock(&mp->m_sb_lock);
+	qf = mp->m_sb.sb_qflags;
+	mp->m_sb.sb_qflags = qf | flags;
+	spin_unlock(&mp->m_sb_lock);
+
+	/*
+	 * There's nothing to change if it's the same.
+	 */
+	if ((qf & flags) == flags && sbflags == 0)
+		return XFS_ERROR(EEXIST);
+	sbflags |= XFS_SB_QFLAGS;
+
+	if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
+		return (error);
+	/*
+	 * If we aren't trying to switch on quota enforcement, we are done.
+	 */
+	if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
+	     (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
+	     ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
+	     (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
+	     ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
+	     (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
+	    (flags & XFS_ALL_QUOTA_ENFD) == 0)
+		return (0);
+
+	if (! XFS_IS_QUOTA_RUNNING(mp))
+		return XFS_ERROR(ESRCH);
+
+	/*
+	 * Switch on quota enforcement in core.
+	 */
+	mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
+	mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
+	mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
+
+	return (0);
+}
+
+
+/*
+ * Return quota status information, such as uquota-off, enforcements, etc.
+ */
+int
+xfs_qm_scall_getqstat(
+	struct xfs_mount	*mp,
+	struct fs_quota_stat	*out)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	struct xfs_inode	*uip, *gip;
+	boolean_t		tempuqip, tempgqip;
+
+	uip = gip = NULL;
+	tempuqip = tempgqip = B_FALSE;
+	memset(out, 0, sizeof(fs_quota_stat_t));
+
+	out->qs_version = FS_QSTAT_VERSION;
+	if (!xfs_sb_version_hasquota(&mp->m_sb)) {
+		out->qs_uquota.qfs_ino = NULLFSINO;
+		out->qs_gquota.qfs_ino = NULLFSINO;
+		return (0);
+	}
+	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
+							(XFS_ALL_QUOTA_ACCT|
+							 XFS_ALL_QUOTA_ENFD));
+	out->qs_pad = 0;
+	out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
+	out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+
+	if (q) {
+		uip = q->qi_uquotaip;
+		gip = q->qi_gquotaip;
+	}
+	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
+		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+					0, 0, &uip) == 0)
+			tempuqip = B_TRUE;
+	}
+	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
+		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+					0, 0, &gip) == 0)
+			tempgqip = B_TRUE;
+	}
+	if (uip) {
+		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
+		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
+		if (tempuqip)
+			IRELE(uip);
+	}
+	if (gip) {
+		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
+		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
+		if (tempgqip)
+			IRELE(gip);
+	}
+	if (q) {
+		out->qs_incoredqs = q->qi_dquots;
+		out->qs_btimelimit = q->qi_btimelimit;
+		out->qs_itimelimit = q->qi_itimelimit;
+		out->qs_rtbtimelimit = q->qi_rtbtimelimit;
+		out->qs_bwarnlimit = q->qi_bwarnlimit;
+		out->qs_iwarnlimit = q->qi_iwarnlimit;
+	}
+	return 0;
+}
+
+#define XFS_DQ_MASK \
+	(FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
+
+/*
+ * Adjust quota limits, and start/stop timers accordingly.
+ */
+int
+xfs_qm_scall_setqlim(
+	xfs_mount_t		*mp,
+	xfs_dqid_t		id,
+	uint			type,
+	fs_disk_quota_t		*newlim)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	xfs_disk_dquot_t	*ddq;
+	xfs_dquot_t		*dqp;
+	xfs_trans_t		*tp;
+	int			error;
+	xfs_qcnt_t		hard, soft;
+
+	if (newlim->d_fieldmask & ~XFS_DQ_MASK)
+		return EINVAL;
+	if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
+		return 0;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
+				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		return (error);
+	}
+
+	/*
+	 * We don't want to race with a quotaoff so take the quotaoff lock.
+	 * (We don't hold an inode lock, so there's nothing else to stop
+	 * a quotaoff from happening). (XXXThis doesn't currently happen
+	 * because we take the vfslock before calling xfs_qm_sysent).
+	 */
+	mutex_lock(&q->qi_quotaofflock);
+
+	/*
+	 * Get the dquot (locked), and join it to the transaction.
+	 * Allocate the dquot if this doesn't exist.
+	 */
+	if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
+		xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+		ASSERT(error != ENOENT);
+		goto out_unlock;
+	}
+	xfs_trans_dqjoin(tp, dqp);
+	ddq = &dqp->q_core;
+
+	/*
+	 * Make sure that hardlimits are >= soft limits before changing.
+	 */
+	hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
+		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
+			be64_to_cpu(ddq->d_blk_hardlimit);
+	soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
+		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
+			be64_to_cpu(ddq->d_blk_softlimit);
+	if (hard == 0 || hard >= soft) {
+		ddq->d_blk_hardlimit = cpu_to_be64(hard);
+		ddq->d_blk_softlimit = cpu_to_be64(soft);
+		if (id == 0) {
+			q->qi_bhardlimit = hard;
+			q->qi_bsoftlimit = soft;
+		}
+	} else {
+		xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
+	}
+	hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
+		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
+			be64_to_cpu(ddq->d_rtb_hardlimit);
+	soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
+		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
+			be64_to_cpu(ddq->d_rtb_softlimit);
+	if (hard == 0 || hard >= soft) {
+		ddq->d_rtb_hardlimit = cpu_to_be64(hard);
+		ddq->d_rtb_softlimit = cpu_to_be64(soft);
+		if (id == 0) {
+			q->qi_rtbhardlimit = hard;
+			q->qi_rtbsoftlimit = soft;
+		}
+	} else {
+		xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
+	}
+
+	hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
+		(xfs_qcnt_t) newlim->d_ino_hardlimit :
+			be64_to_cpu(ddq->d_ino_hardlimit);
+	soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
+		(xfs_qcnt_t) newlim->d_ino_softlimit :
+			be64_to_cpu(ddq->d_ino_softlimit);
+	if (hard == 0 || hard >= soft) {
+		ddq->d_ino_hardlimit = cpu_to_be64(hard);
+		ddq->d_ino_softlimit = cpu_to_be64(soft);
+		if (id == 0) {
+			q->qi_ihardlimit = hard;
+			q->qi_isoftlimit = soft;
+		}
+	} else {
+		xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
+	}
+
+	/*
+	 * Update warnings counter(s) if requested
+	 */
+	if (newlim->d_fieldmask & FS_DQ_BWARNS)
+		ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
+	if (newlim->d_fieldmask & FS_DQ_IWARNS)
+		ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
+	if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+		ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
+
+	if (id == 0) {
+		/*
+		 * Timelimits for the super user set the relative time
+		 * the other users can be over quota for this file system.
+		 * If it is zero a default is used.  Ditto for the default
+		 * soft and hard limit values (already done, above), and
+		 * for warnings.
+		 */
+		if (newlim->d_fieldmask & FS_DQ_BTIMER) {
+			q->qi_btimelimit = newlim->d_btimer;
+			ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
+		}
+		if (newlim->d_fieldmask & FS_DQ_ITIMER) {
+			q->qi_itimelimit = newlim->d_itimer;
+			ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
+		}
+		if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
+			q->qi_rtbtimelimit = newlim->d_rtbtimer;
+			ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
+		}
+		if (newlim->d_fieldmask & FS_DQ_BWARNS)
+			q->qi_bwarnlimit = newlim->d_bwarns;
+		if (newlim->d_fieldmask & FS_DQ_IWARNS)
+			q->qi_iwarnlimit = newlim->d_iwarns;
+		if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+			q->qi_rtbwarnlimit = newlim->d_rtbwarns;
+	} else {
+		/*
+		 * If the user is now over quota, start the timelimit.
+		 * The user will not be 'warned'.
+		 * Note that we keep the timers ticking, whether enforcement
+		 * is on or off. We don't really want to bother with iterating
+		 * over all ondisk dquots and turning the timers on/off.
+		 */
+		xfs_qm_adjust_dqtimers(mp, ddq);
+	}
+	dqp->dq_flags |= XFS_DQ_DIRTY;
+	xfs_trans_log_dquot(tp, dqp);
+
+	error = xfs_trans_commit(tp, 0);
+	xfs_qm_dqrele(dqp);
+
+ out_unlock:
+	mutex_unlock(&q->qi_quotaofflock);
+	return error;
+}
+
+int
+xfs_qm_scall_getquota(
+	xfs_mount_t	*mp,
+	xfs_dqid_t	id,
+	uint		type,
+	fs_disk_quota_t *out)
+{
+	xfs_dquot_t	*dqp;
+	int		error;
+
+	/*
+	 * Try to get the dquot. We don't want it allocated on disk, so
+	 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
+	 * exist, we'll get ENOENT back.
+	 */
+	if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
+		return (error);
+	}
+
+	/*
+	 * If everything's NULL, this dquot doesn't quite exist as far as
+	 * our utility programs are concerned.
+	 */
+	if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+		xfs_qm_dqput(dqp);
+		return XFS_ERROR(ENOENT);
+	}
+	/*
+	 * Convert the disk dquot to the exportable format
+	 */
+	xfs_qm_export_dquot(mp, &dqp->q_core, out);
+	xfs_qm_dqput(dqp);
+	return (error ? XFS_ERROR(EFAULT) : 0);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff_end(
+	xfs_mount_t		*mp,
+	xfs_qoff_logitem_t	*startqoff,
+	uint			flags)
+{
+	xfs_trans_t		*tp;
+	int			error;
+	xfs_qoff_logitem_t	*qoffi;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
+
+	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
+				      0, 0, XFS_DEFAULT_LOG_COUNT))) {
+		xfs_trans_cancel(tp, 0);
+		return (error);
+	}
+
+	qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+					flags & XFS_ALL_QUOTA_ACCT);
+	xfs_trans_log_quotaoff_item(tp, qoffi);
+
+	/*
+	 * We have to make sure that the transaction is secure on disk before we
+	 * return and actually stop quota accounting. So, make it synchronous.
+	 * We don't care about quotoff's performance.
+	 */
+	xfs_trans_set_sync(tp);
+	error = xfs_trans_commit(tp, 0);
+	return (error);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff(
+	xfs_mount_t	       *mp,
+	xfs_qoff_logitem_t     **qoffstartp,
+	uint		       flags)
+{
+	xfs_trans_t	       *tp;
+	int			error;
+	xfs_qoff_logitem_t     *qoffi=NULL;
+	uint			oldsbqflag=0;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
+	if ((error = xfs_trans_reserve(tp, 0,
+				      sizeof(xfs_qoff_logitem_t) * 2 +
+				      mp->m_sb.sb_sectsize + 128,
+				      0,
+				      0,
+				      XFS_DEFAULT_LOG_COUNT))) {
+		goto error0;
+	}
+
+	qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+	xfs_trans_log_quotaoff_item(tp, qoffi);
+
+	spin_lock(&mp->m_sb_lock);
+	oldsbqflag = mp->m_sb.sb_qflags;
+	mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+	spin_unlock(&mp->m_sb_lock);
+
+	xfs_mod_sb(tp, XFS_SB_QFLAGS);
+
+	/*
+	 * We have to make sure that the transaction is secure on disk before we
+	 * return and actually stop quota accounting. So, make it synchronous.
+	 * We don't care about quotoff's performance.
+	 */
+	xfs_trans_set_sync(tp);
+	error = xfs_trans_commit(tp, 0);
+
+error0:
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		/*
+		 * No one else is modifying sb_qflags, so this is OK.
+		 * We still hold the quotaofflock.
+		 */
+		spin_lock(&mp->m_sb_lock);
+		mp->m_sb.sb_qflags = oldsbqflag;
+		spin_unlock(&mp->m_sb_lock);
+	}
+	*qoffstartp = qoffi;
+	return (error);
+}
+
+
+/*
+ * Translate an internal style on-disk-dquot to the exportable format.
+ * The main differences are that the counters/limits are all in Basic
+ * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
+ * to be converted to the native endianness.
+ */
+STATIC void
+xfs_qm_export_dquot(
+	xfs_mount_t		*mp,
+	xfs_disk_dquot_t	*src,
+	struct fs_disk_quota	*dst)
+{
+	memset(dst, 0, sizeof(*dst));
+	dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
+	dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
+	dst->d_id = be32_to_cpu(src->d_id);
+	dst->d_blk_hardlimit =
+		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
+	dst->d_blk_softlimit =
+		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
+	dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
+	dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
+	dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
+	dst->d_icount = be64_to_cpu(src->d_icount);
+	dst->d_btimer = be32_to_cpu(src->d_btimer);
+	dst->d_itimer = be32_to_cpu(src->d_itimer);
+	dst->d_iwarns = be16_to_cpu(src->d_iwarns);
+	dst->d_bwarns = be16_to_cpu(src->d_bwarns);
+	dst->d_rtb_hardlimit =
+		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
+	dst->d_rtb_softlimit =
+		XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
+	dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
+	dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
+	dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
+
+	/*
+	 * Internally, we don't reset all the timers when quota enforcement
+	 * gets turned off. No need to confuse the user level code,
+	 * so return zeroes in that case.
+	 */
+	if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
+	    (!XFS_IS_OQUOTA_ENFORCED(mp) &&
+			(src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
+		dst->d_btimer = 0;
+		dst->d_itimer = 0;
+		dst->d_rtbtimer = 0;
+	}
+
+#ifdef DEBUG
+	if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
+	     (XFS_IS_OQUOTA_ENFORCED(mp) &&
+			(dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
+	    dst->d_id != 0) {
+		if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+		    (dst->d_blk_softlimit > 0)) {
+			ASSERT(dst->d_btimer != 0);
+		}
+		if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+		    (dst->d_ino_softlimit > 0)) {
+			ASSERT(dst->d_itimer != 0);
+		}
+	}
+#endif
+}
+
+STATIC uint
+xfs_qm_export_qtype_flags(
+	uint flags)
+{
+	/*
+	 * Can't be more than one, or none.
+	 */
+	ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
+		(FS_PROJ_QUOTA | FS_USER_QUOTA));
+	ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
+		(FS_PROJ_QUOTA | FS_GROUP_QUOTA));
+	ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
+		(FS_USER_QUOTA | FS_GROUP_QUOTA));
+	ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
+
+	return (flags & XFS_DQ_USER) ?
+		FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
+			FS_PROJ_QUOTA : FS_GROUP_QUOTA;
+}
+
+STATIC uint
+xfs_qm_export_flags(
+	uint flags)
+{
+	uint uflags;
+
+	uflags = 0;
+	if (flags & XFS_UQUOTA_ACCT)
+		uflags |= FS_QUOTA_UDQ_ACCT;
+	if (flags & XFS_PQUOTA_ACCT)
+		uflags |= FS_QUOTA_PDQ_ACCT;
+	if (flags & XFS_GQUOTA_ACCT)
+		uflags |= FS_QUOTA_GDQ_ACCT;
+	if (flags & XFS_UQUOTA_ENFD)
+		uflags |= FS_QUOTA_UDQ_ENFD;
+	if (flags & (XFS_OQUOTA_ENFD)) {
+		uflags |= (flags & XFS_GQUOTA_ACCT) ?
+			FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
+	}
+	return (uflags);
+}
+
+
+STATIC int
+xfs_dqrele_inode(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
+	int			flags)
+{
+	/* skip quota inodes */
+	if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
+	    ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
+		ASSERT(ip->i_udquot == NULL);
+		ASSERT(ip->i_gdquot == NULL);
+		return 0;
+	}
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+		xfs_qm_dqrele(ip->i_udquot);
+		ip->i_udquot = NULL;
+	}
+	if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+		xfs_qm_dqrele(ip->i_gdquot);
+		ip->i_gdquot = NULL;
+	}
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return 0;
+}
+
+
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ *
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+	struct xfs_mount *mp,
+	uint		 flags)
+{
+	ASSERT(mp->m_quotainfo);
+	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
+}
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h
new file mode 100644
index 0000000..94a3d92
--- /dev/null
+++ b/fs/xfs/xfs_quota_priv.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QUOTA_PRIV_H__
+#define __XFS_QUOTA_PRIV_H__
+
+/*
+ * Number of bmaps that we ask from bmapi when doing a quotacheck.
+ * We make this restriction to keep the memory usage to a minimum.
+ */
+#define XFS_DQITER_MAP_SIZE	10
+
+/*
+ * Hash into a bucket in the dquot hash table, based on <mp, id>.
+ */
+#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+				 (__psunsigned_t)(id)) & \
+				(xfs_Gqm->qm_dqhashmask - 1))
+#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
+				     (xfs_Gqm->qm_usr_dqhtable + \
+				      XFS_DQ_HASHVAL(mp, id)) : \
+				     (xfs_Gqm->qm_grp_dqhtable + \
+				      XFS_DQ_HASHVAL(mp, id)))
+#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
+	!dqp->q_core.d_blk_hardlimit && \
+	!dqp->q_core.d_blk_softlimit && \
+	!dqp->q_core.d_rtb_hardlimit && \
+	!dqp->q_core.d_rtb_softlimit && \
+	!dqp->q_core.d_ino_hardlimit && \
+	!dqp->q_core.d_ino_softlimit && \
+	!dqp->q_core.d_bcount && \
+	!dqp->q_core.d_rtbcount && \
+	!dqp->q_core.d_icount)
+
+#define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
+				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
+				 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
+
+#endif	/* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
new file mode 100644
index 0000000..7e76f53
--- /dev/null
+++ b/fs/xfs/xfs_quotaops.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_qm.h"
+#include <linux/quota.h>
+
+
+STATIC int
+xfs_quota_type(int type)
+{
+	switch (type) {
+	case USRQUOTA:
+		return XFS_DQ_USER;
+	case GRPQUOTA:
+		return XFS_DQ_GROUP;
+	default:
+		return XFS_DQ_PROJ;
+	}
+}
+
+STATIC int
+xfs_fs_get_xstate(
+	struct super_block	*sb,
+	struct fs_quota_stat	*fqs)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	return -xfs_qm_scall_getqstat(mp, fqs);
+}
+
+STATIC int
+xfs_fs_set_xstate(
+	struct super_block	*sb,
+	unsigned int		uflags,
+	int			op)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+	unsigned int		flags = 0;
+
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
+	if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+
+	if (uflags & FS_QUOTA_UDQ_ACCT)
+		flags |= XFS_UQUOTA_ACCT;
+	if (uflags & FS_QUOTA_PDQ_ACCT)
+		flags |= XFS_PQUOTA_ACCT;
+	if (uflags & FS_QUOTA_GDQ_ACCT)
+		flags |= XFS_GQUOTA_ACCT;
+	if (uflags & FS_QUOTA_UDQ_ENFD)
+		flags |= XFS_UQUOTA_ENFD;
+	if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
+		flags |= XFS_OQUOTA_ENFD;
+
+	switch (op) {
+	case Q_XQUOTAON:
+		return -xfs_qm_scall_quotaon(mp, flags);
+	case Q_XQUOTAOFF:
+		if (!XFS_IS_QUOTA_ON(mp))
+			return -EINVAL;
+		return -xfs_qm_scall_quotaoff(mp, flags);
+	case Q_XQUOTARM:
+		if (XFS_IS_QUOTA_ON(mp))
+			return -EINVAL;
+		return -xfs_qm_scall_trunc_qfiles(mp, flags);
+	}
+
+	return -EINVAL;
+}
+
+STATIC int
+xfs_fs_get_dqblk(
+	struct super_block	*sb,
+	int			type,
+	qid_t			id,
+	struct fs_disk_quota	*fdq)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	if (!XFS_IS_QUOTA_ON(mp))
+		return -ESRCH;
+
+	return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
+}
+
+STATIC int
+xfs_fs_set_dqblk(
+	struct super_block	*sb,
+	int			type,
+	qid_t			id,
+	struct fs_disk_quota	*fdq)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	if (!XFS_IS_QUOTA_ON(mp))
+		return -ESRCH;
+
+	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
+}
+
+const struct quotactl_ops xfs_quotactl_operations = {
+	.get_xstate		= xfs_fs_get_xstate,
+	.set_xstate		= xfs_fs_set_xstate,
+	.get_dqblk		= xfs_fs_get_dqblk,
+	.set_dqblk		= xfs_fs_set_dqblk,
+};
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
new file mode 100644
index 0000000..76fdc58
--- /dev/null
+++ b/fs/xfs/xfs_stats.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/proc_fs.h>
+
+DEFINE_PER_CPU(struct xfsstats, xfsstats);
+
+static int xfs_stat_proc_show(struct seq_file *m, void *v)
+{
+	int		c, i, j, val;
+	__uint64_t	xs_xstrat_bytes = 0;
+	__uint64_t	xs_write_bytes = 0;
+	__uint64_t	xs_read_bytes = 0;
+
+	static const struct xstats_entry {
+		char	*desc;
+		int	endpoint;
+	} xstats[] = {
+		{ "extent_alloc",	XFSSTAT_END_EXTENT_ALLOC	},
+		{ "abt",		XFSSTAT_END_ALLOC_BTREE		},
+		{ "blk_map",		XFSSTAT_END_BLOCK_MAPPING	},
+		{ "bmbt",		XFSSTAT_END_BLOCK_MAP_BTREE	},
+		{ "dir",		XFSSTAT_END_DIRECTORY_OPS	},
+		{ "trans",		XFSSTAT_END_TRANSACTIONS	},
+		{ "ig",			XFSSTAT_END_INODE_OPS		},
+		{ "log",		XFSSTAT_END_LOG_OPS		},
+		{ "push_ail",		XFSSTAT_END_TAIL_PUSHING	},
+		{ "xstrat",		XFSSTAT_END_WRITE_CONVERT	},
+		{ "rw",			XFSSTAT_END_READ_WRITE_OPS	},
+		{ "attr",		XFSSTAT_END_ATTRIBUTE_OPS	},
+		{ "icluster",		XFSSTAT_END_INODE_CLUSTER	},
+		{ "vnodes",		XFSSTAT_END_VNODE_OPS		},
+		{ "buf",		XFSSTAT_END_BUF			},
+		{ "abtb2",		XFSSTAT_END_ABTB_V2		},
+		{ "abtc2",		XFSSTAT_END_ABTC_V2		},
+		{ "bmbt2",		XFSSTAT_END_BMBT_V2		},
+		{ "ibt2",		XFSSTAT_END_IBT_V2		},
+	};
+
+	/* Loop over all stats groups */
+	for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
+		seq_printf(m, "%s", xstats[i].desc);
+		/* inner loop does each group */
+		while (j < xstats[i].endpoint) {
+			val = 0;
+			/* sum over all cpus */
+			for_each_possible_cpu(c)
+				val += *(((__u32*)&per_cpu(xfsstats, c) + j));
+			seq_printf(m, " %u", val);
+			j++;
+		}
+		seq_putc(m, '\n');
+	}
+	/* extra precision counters */
+	for_each_possible_cpu(i) {
+		xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+		xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+		xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+	}
+
+	seq_printf(m, "xpc %Lu %Lu %Lu\n",
+			xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+	seq_printf(m, "debug %u\n",
+#if defined(DEBUG)
+		1);
+#else
+		0);
+#endif
+	return 0;
+}
+
+static int xfs_stat_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xfs_stat_proc_show, NULL);
+}
+
+static const struct file_operations xfs_stat_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= xfs_stat_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int
+xfs_init_procfs(void)
+{
+	if (!proc_mkdir("fs/xfs", NULL))
+		goto out;
+
+	if (!proc_create("fs/xfs/stat", 0, NULL,
+			 &xfs_stat_proc_fops))
+		goto out_remove_entry;
+	return 0;
+
+ out_remove_entry:
+	remove_proc_entry("fs/xfs", NULL);
+ out:
+	return -ENOMEM;
+}
+
+void
+xfs_cleanup_procfs(void)
+{
+	remove_proc_entry("fs/xfs/stat", NULL);
+	remove_proc_entry("fs/xfs", NULL);
+}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
new file mode 100644
index 0000000..736854b
--- /dev/null
+++ b/fs/xfs/xfs_stats.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_STATS_H__
+#define __XFS_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+#include <linux/percpu.h>
+
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC	4
+	__uint32_t		xs_allocx;
+	__uint32_t		xs_allocb;
+	__uint32_t		xs_freex;
+	__uint32_t		xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE	(XFSSTAT_END_EXTENT_ALLOC+4)
+	__uint32_t		xs_abt_lookup;
+	__uint32_t		xs_abt_compare;
+	__uint32_t		xs_abt_insrec;
+	__uint32_t		xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING	(XFSSTAT_END_ALLOC_BTREE+7)
+	__uint32_t		xs_blk_mapr;
+	__uint32_t		xs_blk_mapw;
+	__uint32_t		xs_blk_unmap;
+	__uint32_t		xs_add_exlist;
+	__uint32_t		xs_del_exlist;
+	__uint32_t		xs_look_exlist;
+	__uint32_t		xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE	(XFSSTAT_END_BLOCK_MAPPING+4)
+	__uint32_t		xs_bmbt_lookup;
+	__uint32_t		xs_bmbt_compare;
+	__uint32_t		xs_bmbt_insrec;
+	__uint32_t		xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS	(XFSSTAT_END_BLOCK_MAP_BTREE+4)
+	__uint32_t		xs_dir_lookup;
+	__uint32_t		xs_dir_create;
+	__uint32_t		xs_dir_remove;
+	__uint32_t		xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS	(XFSSTAT_END_DIRECTORY_OPS+3)
+	__uint32_t		xs_trans_sync;
+	__uint32_t		xs_trans_async;
+	__uint32_t		xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS		(XFSSTAT_END_TRANSACTIONS+7)
+	__uint32_t		xs_ig_attempts;
+	__uint32_t		xs_ig_found;
+	__uint32_t		xs_ig_frecycle;
+	__uint32_t		xs_ig_missed;
+	__uint32_t		xs_ig_dup;
+	__uint32_t		xs_ig_reclaims;
+	__uint32_t		xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS		(XFSSTAT_END_INODE_OPS+5)
+	__uint32_t		xs_log_writes;
+	__uint32_t		xs_log_blocks;
+	__uint32_t		xs_log_noiclogs;
+	__uint32_t		xs_log_force;
+	__uint32_t		xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING	(XFSSTAT_END_LOG_OPS+10)
+	__uint32_t		xs_try_logspace;
+	__uint32_t		xs_sleep_logspace;
+	__uint32_t		xs_push_ail;
+	__uint32_t		xs_push_ail_success;
+	__uint32_t		xs_push_ail_pushbuf;
+	__uint32_t		xs_push_ail_pinned;
+	__uint32_t		xs_push_ail_locked;
+	__uint32_t		xs_push_ail_flushing;
+	__uint32_t		xs_push_ail_restarts;
+	__uint32_t		xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT	(XFSSTAT_END_TAIL_PUSHING+2)
+	__uint32_t		xs_xstrat_quick;
+	__uint32_t		xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS	(XFSSTAT_END_WRITE_CONVERT+2)
+	__uint32_t		xs_write_calls;
+	__uint32_t		xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS	(XFSSTAT_END_READ_WRITE_OPS+4)
+	__uint32_t		xs_attr_get;
+	__uint32_t		xs_attr_set;
+	__uint32_t		xs_attr_remove;
+	__uint32_t		xs_attr_list;
+# define XFSSTAT_END_INODE_CLUSTER	(XFSSTAT_END_ATTRIBUTE_OPS+3)
+	__uint32_t		xs_iflush_count;
+	__uint32_t		xs_icluster_flushcnt;
+	__uint32_t		xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS		(XFSSTAT_END_INODE_CLUSTER+8)
+	__uint32_t		vn_active;	/* # vnodes not on free lists */
+	__uint32_t		vn_alloc;	/* # times vn_alloc called */
+	__uint32_t		vn_get;		/* # times vn_get called */
+	__uint32_t		vn_hold;	/* # times vn_hold called */
+	__uint32_t		vn_rele;	/* # times vn_rele called */
+	__uint32_t		vn_reclaim;	/* # times vn_reclaim called */
+	__uint32_t		vn_remove;	/* # times vn_remove called */
+	__uint32_t		vn_free;	/* # times vn_free called */
+#define XFSSTAT_END_BUF			(XFSSTAT_END_VNODE_OPS+9)
+	__uint32_t		xb_get;
+	__uint32_t		xb_create;
+	__uint32_t		xb_get_locked;
+	__uint32_t		xb_get_locked_waited;
+	__uint32_t		xb_busy_locked;
+	__uint32_t		xb_miss_locked;
+	__uint32_t		xb_page_retries;
+	__uint32_t		xb_page_found;
+	__uint32_t		xb_get_read;
+/* Version 2 btree counters */
+#define XFSSTAT_END_ABTB_V2		(XFSSTAT_END_BUF+15)
+	__uint32_t		xs_abtb_2_lookup;
+	__uint32_t		xs_abtb_2_compare;
+	__uint32_t		xs_abtb_2_insrec;
+	__uint32_t		xs_abtb_2_delrec;
+	__uint32_t		xs_abtb_2_newroot;
+	__uint32_t		xs_abtb_2_killroot;
+	__uint32_t		xs_abtb_2_increment;
+	__uint32_t		xs_abtb_2_decrement;
+	__uint32_t		xs_abtb_2_lshift;
+	__uint32_t		xs_abtb_2_rshift;
+	__uint32_t		xs_abtb_2_split;
+	__uint32_t		xs_abtb_2_join;
+	__uint32_t		xs_abtb_2_alloc;
+	__uint32_t		xs_abtb_2_free;
+	__uint32_t		xs_abtb_2_moves;
+#define XFSSTAT_END_ABTC_V2		(XFSSTAT_END_ABTB_V2+15)
+	__uint32_t		xs_abtc_2_lookup;
+	__uint32_t		xs_abtc_2_compare;
+	__uint32_t		xs_abtc_2_insrec;
+	__uint32_t		xs_abtc_2_delrec;
+	__uint32_t		xs_abtc_2_newroot;
+	__uint32_t		xs_abtc_2_killroot;
+	__uint32_t		xs_abtc_2_increment;
+	__uint32_t		xs_abtc_2_decrement;
+	__uint32_t		xs_abtc_2_lshift;
+	__uint32_t		xs_abtc_2_rshift;
+	__uint32_t		xs_abtc_2_split;
+	__uint32_t		xs_abtc_2_join;
+	__uint32_t		xs_abtc_2_alloc;
+	__uint32_t		xs_abtc_2_free;
+	__uint32_t		xs_abtc_2_moves;
+#define XFSSTAT_END_BMBT_V2		(XFSSTAT_END_ABTC_V2+15)
+	__uint32_t		xs_bmbt_2_lookup;
+	__uint32_t		xs_bmbt_2_compare;
+	__uint32_t		xs_bmbt_2_insrec;
+	__uint32_t		xs_bmbt_2_delrec;
+	__uint32_t		xs_bmbt_2_newroot;
+	__uint32_t		xs_bmbt_2_killroot;
+	__uint32_t		xs_bmbt_2_increment;
+	__uint32_t		xs_bmbt_2_decrement;
+	__uint32_t		xs_bmbt_2_lshift;
+	__uint32_t		xs_bmbt_2_rshift;
+	__uint32_t		xs_bmbt_2_split;
+	__uint32_t		xs_bmbt_2_join;
+	__uint32_t		xs_bmbt_2_alloc;
+	__uint32_t		xs_bmbt_2_free;
+	__uint32_t		xs_bmbt_2_moves;
+#define XFSSTAT_END_IBT_V2		(XFSSTAT_END_BMBT_V2+15)
+	__uint32_t		xs_ibt_2_lookup;
+	__uint32_t		xs_ibt_2_compare;
+	__uint32_t		xs_ibt_2_insrec;
+	__uint32_t		xs_ibt_2_delrec;
+	__uint32_t		xs_ibt_2_newroot;
+	__uint32_t		xs_ibt_2_killroot;
+	__uint32_t		xs_ibt_2_increment;
+	__uint32_t		xs_ibt_2_decrement;
+	__uint32_t		xs_ibt_2_lshift;
+	__uint32_t		xs_ibt_2_rshift;
+	__uint32_t		xs_ibt_2_split;
+	__uint32_t		xs_ibt_2_join;
+	__uint32_t		xs_ibt_2_alloc;
+	__uint32_t		xs_ibt_2_free;
+	__uint32_t		xs_ibt_2_moves;
+/* Extra precision counters */
+	__uint64_t		xs_xstrat_bytes;
+	__uint64_t		xs_write_bytes;
+	__uint64_t		xs_read_bytes;
+};
+
+DECLARE_PER_CPU(struct xfsstats, xfsstats);
+
+/*
+ * We don't disable preempt, not too worried about poking the
+ * wrong CPU's stat for now (also aggregated before reporting).
+ */
+#define XFS_STATS_INC(v)	(per_cpu(xfsstats, current_cpu()).v++)
+#define XFS_STATS_DEC(v)	(per_cpu(xfsstats, current_cpu()).v--)
+#define XFS_STATS_ADD(v, inc)	(per_cpu(xfsstats, current_cpu()).v += (inc))
+
+extern int xfs_init_procfs(void);
+extern void xfs_cleanup_procfs(void);
+
+
+#else	/* !CONFIG_PROC_FS */
+
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+
+static inline int xfs_init_procfs(void)
+{
+	return 0;
+}
+
+static inline void xfs_cleanup_procfs(void)
+{
+}
+
+#endif	/* !CONFIG_PROC_FS */
+
+#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
new file mode 100644
index 0000000..9a72dda
--- /dev/null
+++ b/fs/xfs/xfs_super.c
@@ -0,0 +1,1773 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_fsops.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_vnodeops.h"
+#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
+#include "xfs_sync.h"
+#include "xfs_trace.h"
+
+#include <linux/namei.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mount.h>
+#include <linux/mempool.h>
+#include <linux/writeback.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/parser.h>
+
+static const struct super_operations xfs_super_operations;
+static kmem_zone_t *xfs_ioend_zone;
+mempool_t *xfs_ioend_pool;
+
+#define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
+#define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
+#define MNTOPT_LOGDEV	"logdev"	/* log device */
+#define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
+#define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
+#define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
+#define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
+#define MNTOPT_SWALLOC	"swalloc"	/* turn on stripe width allocation */
+#define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
+#define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
+#define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
+#define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
+#define MNTOPT_GRPID	"grpid"		/* group-ID from parent directory */
+#define MNTOPT_NOGRPID	"nogrpid"	/* group-ID from current process */
+#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
+#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
+#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
+#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
+#define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
+					 * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
+#define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
+#define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
+#define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
+#define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
+#define MNTOPT_NOLARGEIO   "nolargeio"	/* do not report large I/O sizes
+					 * in stat(). */
+#define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
+#define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
+#define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
+#define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
+#define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
+#define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
+#define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
+#define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
+#define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
+#define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
+#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
+#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
+#define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
+#define MNTOPT_DELAYLOG    "delaylog"	/* Delayed logging enabled */
+#define MNTOPT_NODELAYLOG  "nodelaylog"	/* Delayed logging disabled */
+#define MNTOPT_DISCARD	   "discard"	/* Discard unused blocks */
+#define MNTOPT_NODISCARD   "nodiscard"	/* Do not discard unused blocks */
+
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+	Opt_barrier, Opt_nobarrier, Opt_err
+};
+
+static const match_table_t tokens = {
+	{Opt_barrier, "barrier"},
+	{Opt_nobarrier, "nobarrier"},
+	{Opt_err, NULL}
+};
+
+
+STATIC unsigned long
+suffix_strtoul(char *s, char **endp, unsigned int base)
+{
+	int	last, shift_left_factor = 0;
+	char	*value = s;
+
+	last = strlen(value) - 1;
+	if (value[last] == 'K' || value[last] == 'k') {
+		shift_left_factor = 10;
+		value[last] = '\0';
+	}
+	if (value[last] == 'M' || value[last] == 'm') {
+		shift_left_factor = 20;
+		value[last] = '\0';
+	}
+	if (value[last] == 'G' || value[last] == 'g') {
+		shift_left_factor = 30;
+		value[last] = '\0';
+	}
+
+	return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ *
+ * Note that this function leaks the various device name allocations on
+ * failure.  The caller takes care of them.
+ */
+STATIC int
+xfs_parseargs(
+	struct xfs_mount	*mp,
+	char			*options)
+{
+	struct super_block	*sb = mp->m_super;
+	char			*this_char, *value, *eov;
+	int			dsunit = 0;
+	int			dswidth = 0;
+	int			iosize = 0;
+	__uint8_t		iosizelog = 0;
+
+	/*
+	 * set up the mount name first so all the errors will refer to the
+	 * correct device.
+	 */
+	mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
+	if (!mp->m_fsname)
+		return ENOMEM;
+	mp->m_fsname_len = strlen(mp->m_fsname) + 1;
+
+	/*
+	 * Copy binary VFS mount flags we are interested in.
+	 */
+	if (sb->s_flags & MS_RDONLY)
+		mp->m_flags |= XFS_MOUNT_RDONLY;
+	if (sb->s_flags & MS_DIRSYNC)
+		mp->m_flags |= XFS_MOUNT_DIRSYNC;
+	if (sb->s_flags & MS_SYNCHRONOUS)
+		mp->m_flags |= XFS_MOUNT_WSYNC;
+
+	/*
+	 * Set some default flags that could be cleared by the mount option
+	 * parsing.
+	 */
+	mp->m_flags |= XFS_MOUNT_BARRIER;
+	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+	mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+	mp->m_flags |= XFS_MOUNT_DELAYLOG;
+
+	/*
+	 * These can be overridden by the mount option parsing.
+	 */
+	mp->m_logbufs = -1;
+	mp->m_logbsize = -1;
+
+	if (!options)
+		goto done;
+
+	while ((this_char = strsep(&options, ",")) != NULL) {
+		if (!*this_char)
+			continue;
+		if ((value = strchr(this_char, '=')) != NULL)
+			*value++ = 0;
+
+		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
+			if (!value || !*value) {
+				xfs_warn(mp, "%s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			mp->m_logbufs = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
+			if (!value || !*value) {
+				xfs_warn(mp, "%s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			mp->m_logbsize = suffix_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
+			if (!value || !*value) {
+				xfs_warn(mp, "%s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+			if (!mp->m_logname)
+				return ENOMEM;
+		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
+			xfs_warn(mp, "%s option not allowed on this system",
+				this_char);
+			return EINVAL;
+		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
+			if (!value || !*value) {
+				xfs_warn(mp, "%s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+			if (!mp->m_rtname)
+				return ENOMEM;
+		} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
+			if (!value || !*value) {
+				xfs_warn(mp, "%s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			iosize = simple_strtoul(value, &eov, 10);
+			iosizelog = ffs(iosize) - 1;
+		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
+			if (!value || !*value) {
+				xfs_warn(mp, "%s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			iosize = suffix_strtoul(value, &eov, 10);
+			iosizelog = ffs(iosize) - 1;
+		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
+			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+			mp->m_flags |= XFS_MOUNT_GRPID;
+		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
+			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+			mp->m_flags &= ~XFS_MOUNT_GRPID;
+		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+			mp->m_flags |= XFS_MOUNT_WSYNC;
+		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+			mp->m_flags |= XFS_MOUNT_NORECOVERY;
+		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+			mp->m_flags |= XFS_MOUNT_NOALIGN;
+		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+			mp->m_flags |= XFS_MOUNT_SWALLOC;
+		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
+			if (!value || !*value) {
+				xfs_warn(mp, "%s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			dsunit = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
+			if (!value || !*value) {
+				xfs_warn(mp, "%s option requires an argument",
+					this_char);
+				return EINVAL;
+			}
+			dswidth = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+#if !XFS_BIG_INUMS
+			xfs_warn(mp, "%s option not allowed on this system",
+				this_char);
+			return EINVAL;
+#endif
+		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+			mp->m_flags |= XFS_MOUNT_NOUUID;
+		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+			mp->m_flags |= XFS_MOUNT_BARRIER;
+		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+			mp->m_flags &= ~XFS_MOUNT_BARRIER;
+		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+			mp->m_flags |= XFS_MOUNT_IKEEP;
+		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+			mp->m_flags &= ~XFS_MOUNT_IKEEP;
+		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+			mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
+		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+			mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+			mp->m_flags |= XFS_MOUNT_ATTR2;
+		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+			mp->m_flags &= ~XFS_MOUNT_ATTR2;
+			mp->m_flags |= XFS_MOUNT_NOATTR2;
+		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+			mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+		} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+			mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+					  XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+					  XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+					  XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
+		} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
+			   !strcmp(this_char, MNTOPT_UQUOTA) ||
+			   !strcmp(this_char, MNTOPT_USRQUOTA)) {
+			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+					 XFS_UQUOTA_ENFD);
+		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
+			   !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+			mp->m_qflags &= ~XFS_UQUOTA_ENFD;
+		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
+			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+					 XFS_OQUOTA_ENFD);
+		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+			mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
+			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+					 XFS_OQUOTA_ENFD);
+		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+			mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+		} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
+			mp->m_flags |= XFS_MOUNT_DELAYLOG;
+		} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
+			mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
+		} else if (!strcmp(this_char, MNTOPT_DISCARD)) {
+			mp->m_flags |= XFS_MOUNT_DISCARD;
+		} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
+			mp->m_flags &= ~XFS_MOUNT_DISCARD;
+		} else if (!strcmp(this_char, "ihashsize")) {
+			xfs_warn(mp,
+	"ihashsize no longer used, option is deprecated.");
+		} else if (!strcmp(this_char, "osyncisdsync")) {
+			xfs_warn(mp,
+	"osyncisdsync has no effect, option is deprecated.");
+		} else if (!strcmp(this_char, "osyncisosync")) {
+			xfs_warn(mp,
+	"osyncisosync has no effect, option is deprecated.");
+		} else if (!strcmp(this_char, "irixsgid")) {
+			xfs_warn(mp,
+	"irixsgid is now a sysctl(2) variable, option is deprecated.");
+		} else {
+			xfs_warn(mp, "unknown mount option [%s].", this_char);
+			return EINVAL;
+		}
+	}
+
+	/*
+	 * no recovery flag requires a read-only mount
+	 */
+	if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
+	    !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+		xfs_warn(mp, "no-recovery mounts must be read-only.");
+		return EINVAL;
+	}
+
+	if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
+		xfs_warn(mp,
+	"sunit and swidth options incompatible with the noalign option");
+		return EINVAL;
+	}
+
+	if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
+	    !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
+		xfs_warn(mp,
+	"the discard option is incompatible with the nodelaylog option");
+		return EINVAL;
+	}
+
+#ifndef CONFIG_XFS_QUOTA
+	if (XFS_IS_QUOTA_RUNNING(mp)) {
+		xfs_warn(mp, "quota support not available in this kernel.");
+		return EINVAL;
+	}
+#endif
+
+	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
+		xfs_warn(mp, "cannot mount with both project and group quota");
+		return EINVAL;
+	}
+
+	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
+		xfs_warn(mp, "sunit and swidth must be specified together");
+		return EINVAL;
+	}
+
+	if (dsunit && (dswidth % dsunit != 0)) {
+		xfs_warn(mp,
+	"stripe width (%d) must be a multiple of the stripe unit (%d)",
+			dswidth, dsunit);
+		return EINVAL;
+	}
+
+done:
+	if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+		/*
+		 * At this point the superblock has not been read
+		 * in, therefore we do not know the block size.
+		 * Before the mount call ends we will convert
+		 * these to FSBs.
+		 */
+		if (dsunit) {
+			mp->m_dalign = dsunit;
+			mp->m_flags |= XFS_MOUNT_RETERR;
+		}
+
+		if (dswidth)
+			mp->m_swidth = dswidth;
+	}
+
+	if (mp->m_logbufs != -1 &&
+	    mp->m_logbufs != 0 &&
+	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
+			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+		return XFS_ERROR(EINVAL);
+	}
+	if (mp->m_logbsize != -1 &&
+	    mp->m_logbsize !=  0 &&
+	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+	     !is_power_of_2(mp->m_logbsize))) {
+		xfs_warn(mp,
+			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+			mp->m_logbsize);
+		return XFS_ERROR(EINVAL);
+	}
+
+	if (iosizelog) {
+		if (iosizelog > XFS_MAX_IO_LOG ||
+		    iosizelog < XFS_MIN_IO_LOG) {
+			xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
+				iosizelog, XFS_MIN_IO_LOG,
+				XFS_MAX_IO_LOG);
+			return XFS_ERROR(EINVAL);
+		}
+
+		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+		mp->m_readio_log = iosizelog;
+		mp->m_writeio_log = iosizelog;
+	}
+
+	return 0;
+}
+
+struct proc_xfs_info {
+	int	flag;
+	char	*str;
+};
+
+STATIC int
+xfs_showargs(
+	struct xfs_mount	*mp,
+	struct seq_file		*m)
+{
+	static struct proc_xfs_info xfs_info_set[] = {
+		/* the few simple ones we can get from the mount struct */
+		{ XFS_MOUNT_IKEEP,		"," MNTOPT_IKEEP },
+		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
+		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
+		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
+		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
+		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
+		{ XFS_MOUNT_ATTR2,		"," MNTOPT_ATTR2 },
+		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
+		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
+		{ XFS_MOUNT_DELAYLOG,		"," MNTOPT_DELAYLOG },
+		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
+		{ 0, NULL }
+	};
+	static struct proc_xfs_info xfs_info_unset[] = {
+		/* the few simple ones we can get from the mount struct */
+		{ XFS_MOUNT_COMPAT_IOSIZE,	"," MNTOPT_LARGEIO },
+		{ XFS_MOUNT_BARRIER,		"," MNTOPT_NOBARRIER },
+		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_64BITINODE },
+		{ 0, NULL }
+	};
+	struct proc_xfs_info	*xfs_infop;
+
+	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
+		if (mp->m_flags & xfs_infop->flag)
+			seq_puts(m, xfs_infop->str);
+	}
+	for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
+		if (!(mp->m_flags & xfs_infop->flag))
+			seq_puts(m, xfs_infop->str);
+	}
+
+	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+				(int)(1 << mp->m_writeio_log) >> 10);
+
+	if (mp->m_logbufs > 0)
+		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+	if (mp->m_logbsize > 0)
+		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+
+	if (mp->m_logname)
+		seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
+	if (mp->m_rtname)
+		seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
+
+	if (mp->m_dalign > 0)
+		seq_printf(m, "," MNTOPT_SUNIT "=%d",
+				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
+	if (mp->m_swidth > 0)
+		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
+
+	if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
+		seq_puts(m, "," MNTOPT_USRQUOTA);
+	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+		seq_puts(m, "," MNTOPT_UQUOTANOENF);
+
+	/* Either project or group quotas can be active, not both */
+
+	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
+		if (mp->m_qflags & XFS_OQUOTA_ENFD)
+			seq_puts(m, "," MNTOPT_PRJQUOTA);
+		else
+			seq_puts(m, "," MNTOPT_PQUOTANOENF);
+	} else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+		if (mp->m_qflags & XFS_OQUOTA_ENFD)
+			seq_puts(m, "," MNTOPT_GRPQUOTA);
+		else
+			seq_puts(m, "," MNTOPT_GQUOTANOENF);
+	}
+
+	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
+		seq_puts(m, "," MNTOPT_NOQUOTA);
+
+	return 0;
+}
+__uint64_t
+xfs_max_file_offset(
+	unsigned int		blockshift)
+{
+	unsigned int		pagefactor = 1;
+	unsigned int		bitshift = BITS_PER_LONG - 1;
+
+	/* Figure out maximum filesize, on Linux this can depend on
+	 * the filesystem blocksize (on 32 bit platforms).
+	 * __block_write_begin does this in an [unsigned] long...
+	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
+	 * So, for page sized blocks (4K on 32 bit platforms),
+	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+	 * but for smaller blocksizes it is less (bbits = log2 bsize).
+	 * Note1: get_block_t takes a long (implicit cast from above)
+	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
+	 * can optionally convert the [unsigned] long from above into
+	 * an [unsigned] long long.
+	 */
+
+#if BITS_PER_LONG == 32
+# if defined(CONFIG_LBDAF)
+	ASSERT(sizeof(sector_t) == 8);
+	pagefactor = PAGE_CACHE_SIZE;
+	bitshift = BITS_PER_LONG;
+# else
+	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+# endif
+#endif
+
+	return (((__uint64_t)pagefactor) << bitshift) - 1;
+}
+
+STATIC int
+xfs_blkdev_get(
+	xfs_mount_t		*mp,
+	const char		*name,
+	struct block_device	**bdevp)
+{
+	int			error = 0;
+
+	*bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+				    mp);
+	if (IS_ERR(*bdevp)) {
+		error = PTR_ERR(*bdevp);
+		xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
+	}
+
+	return -error;
+}
+
+STATIC void
+xfs_blkdev_put(
+	struct block_device	*bdev)
+{
+	if (bdev)
+		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+}
+
+void
+xfs_blkdev_issue_flush(
+	xfs_buftarg_t		*buftarg)
+{
+	blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
+}
+
+STATIC void
+xfs_close_devices(
+	struct xfs_mount	*mp)
+{
+	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+		xfs_free_buftarg(mp, mp->m_logdev_targp);
+		xfs_blkdev_put(logdev);
+	}
+	if (mp->m_rtdev_targp) {
+		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+		xfs_free_buftarg(mp, mp->m_rtdev_targp);
+		xfs_blkdev_put(rtdev);
+	}
+	xfs_free_buftarg(mp, mp->m_ddev_targp);
+}
+
+/*
+ * The file system configurations are:
+ *	(1) device (partition) with data and internal log
+ *	(2) logical volume with data and log subvolumes.
+ *	(3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present.  The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+	struct xfs_mount	*mp)
+{
+	struct block_device	*ddev = mp->m_super->s_bdev;
+	struct block_device	*logdev = NULL, *rtdev = NULL;
+	int			error;
+
+	/*
+	 * Open real time and log devices - order is important.
+	 */
+	if (mp->m_logname) {
+		error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
+		if (error)
+			goto out;
+	}
+
+	if (mp->m_rtname) {
+		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
+		if (error)
+			goto out_close_logdev;
+
+		if (rtdev == ddev || rtdev == logdev) {
+			xfs_warn(mp,
+	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
+			error = EINVAL;
+			goto out_close_rtdev;
+		}
+	}
+
+	/*
+	 * Setup xfs_mount buffer target pointers
+	 */
+	error = ENOMEM;
+	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
+	if (!mp->m_ddev_targp)
+		goto out_close_rtdev;
+
+	if (rtdev) {
+		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+							mp->m_fsname);
+		if (!mp->m_rtdev_targp)
+			goto out_free_ddev_targ;
+	}
+
+	if (logdev && logdev != ddev) {
+		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+							mp->m_fsname);
+		if (!mp->m_logdev_targp)
+			goto out_free_rtdev_targ;
+	} else {
+		mp->m_logdev_targp = mp->m_ddev_targp;
+	}
+
+	return 0;
+
+ out_free_rtdev_targ:
+	if (mp->m_rtdev_targp)
+		xfs_free_buftarg(mp, mp->m_rtdev_targp);
+ out_free_ddev_targ:
+	xfs_free_buftarg(mp, mp->m_ddev_targp);
+ out_close_rtdev:
+	if (rtdev)
+		xfs_blkdev_put(rtdev);
+ out_close_logdev:
+	if (logdev && logdev != ddev)
+		xfs_blkdev_put(logdev);
+ out:
+	return error;
+}
+
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+	struct xfs_mount	*mp)
+{
+	int			error;
+
+	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+				    mp->m_sb.sb_sectsize);
+	if (error)
+		return error;
+
+	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+		unsigned int	log_sector_size = BBSIZE;
+
+		if (xfs_sb_version_hassector(&mp->m_sb))
+			log_sector_size = mp->m_sb.sb_logsectsize;
+		error = xfs_setsize_buftarg(mp->m_logdev_targp,
+					    mp->m_sb.sb_blocksize,
+					    log_sector_size);
+		if (error)
+			return error;
+	}
+	if (mp->m_rtdev_targp) {
+		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+					    mp->m_sb.sb_blocksize,
+					    mp->m_sb.sb_sectsize);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
+
+/* Catch misguided souls that try to use this interface on XFS */
+STATIC struct inode *
+xfs_fs_alloc_inode(
+	struct super_block	*sb)
+{
+	BUG();
+	return NULL;
+}
+
+/*
+ * Now that the generic code is guaranteed not to be accessing
+ * the linux inode, we can reclaim the inode.
+ */
+STATIC void
+xfs_fs_destroy_inode(
+	struct inode		*inode)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+
+	trace_xfs_destroy_inode(ip);
+
+	XFS_STATS_INC(vn_reclaim);
+
+	/* bad inode, get out here ASAP */
+	if (is_bad_inode(inode))
+		goto out_reclaim;
+
+	xfs_ioend_wait(ip);
+
+	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+
+	/*
+	 * We should never get here with one of the reclaim flags already set.
+	 */
+	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+
+	/*
+	 * We always use background reclaim here because even if the
+	 * inode is clean, it still may be under IO and hence we have
+	 * to take the flush lock. The background reclaim path handles
+	 * this more efficiently than we can here, so simply let background
+	 * reclaim tear down all inodes.
+	 */
+out_reclaim:
+	xfs_inode_set_reclaim_tag(ip);
+}
+
+/*
+ * Slab object creation initialisation for the XFS inode.
+ * This covers only the idempotent fields in the XFS inode;
+ * all other fields need to be initialised on allocation
+ * from the slab. This avoids the need to repeatedly initialise
+ * fields in the xfs inode that left in the initialise state
+ * when freeing the inode.
+ */
+STATIC void
+xfs_fs_inode_init_once(
+	void			*inode)
+{
+	struct xfs_inode	*ip = inode;
+
+	memset(ip, 0, sizeof(struct xfs_inode));
+
+	/* vfs inode */
+	inode_init_once(VFS_I(ip));
+
+	/* xfs inode */
+	atomic_set(&ip->i_iocount, 0);
+	atomic_set(&ip->i_pincount, 0);
+	spin_lock_init(&ip->i_flags_lock);
+	init_waitqueue_head(&ip->i_ipin_wait);
+	/*
+	 * Because we want to use a counting completion, complete
+	 * the flush completion once to allow a single access to
+	 * the flush completion without blocking.
+	 */
+	init_completion(&ip->i_flush);
+	complete(&ip->i_flush);
+
+	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+		     "xfsino", ip->i_ino);
+}
+
+/*
+ * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
+ * we catch unlogged VFS level updates to the inode.
+ *
+ * We need the barrier() to maintain correct ordering between unlogged
+ * updates and the transaction commit code that clears the i_update_core
+ * field. This requires all updates to be completed before marking the
+ * inode dirty.
+ */
+STATIC void
+xfs_fs_dirty_inode(
+	struct inode	*inode,
+	int		flags)
+{
+	barrier();
+	XFS_I(inode)->i_update_core = 1;
+}
+
+STATIC int
+xfs_log_inode(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	int			error;
+
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		/* we need to return with the lock hold shared */
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		return error;
+	}
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * Note - it's possible that we might have pushed ourselves out of the
+	 * way during trans_reserve which would flush the inode.  But there's
+	 * no guarantee that the inode buffer has actually gone out yet (it's
+	 * delwri).  Plus the buffer could be pinned anyway if it's part of
+	 * an inode in another recent transaction.  So we play it safe and
+	 * fire off the transaction anyway.
+	 */
+	xfs_trans_ijoin(tp, ip);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	error = xfs_trans_commit(tp, 0);
+	xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+
+	return error;
+}
+
+STATIC int
+xfs_fs_write_inode(
+	struct inode		*inode,
+	struct writeback_control *wbc)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	int			error = EAGAIN;
+
+	trace_xfs_write_inode(ip);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	if (wbc->sync_mode == WB_SYNC_ALL) {
+		/*
+		 * Make sure the inode has made it it into the log.  Instead
+		 * of forcing it all the way to stable storage using a
+		 * synchronous transaction we let the log force inside the
+		 * ->sync_fs call do that for thus, which reduces the number
+		 * of synchronous log foces dramatically.
+		 */
+		xfs_ioend_wait(ip);
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		if (ip->i_update_core) {
+			error = xfs_log_inode(ip);
+			if (error)
+				goto out_unlock;
+		}
+	} else {
+		/*
+		 * We make this non-blocking if the inode is contended, return
+		 * EAGAIN to indicate to the caller that they did not succeed.
+		 * This prevents the flush path from blocking on inodes inside
+		 * another operation right now, they get caught later by
+		 * xfs_sync.
+		 */
+		if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
+			goto out;
+
+		if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+			goto out_unlock;
+
+		/*
+		 * Now we have the flush lock and the inode is not pinned, we
+		 * can check if the inode is really clean as we know that
+		 * there are no pending transaction completions, it is not
+		 * waiting on the delayed write queue and there is no IO in
+		 * progress.
+		 */
+		if (xfs_inode_clean(ip)) {
+			xfs_ifunlock(ip);
+			error = 0;
+			goto out_unlock;
+		}
+		error = xfs_iflush(ip, SYNC_TRYLOCK);
+	}
+
+ out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ out:
+	/*
+	 * if we failed to write out the inode then mark
+	 * it dirty again so we'll try again later.
+	 */
+	if (error)
+		xfs_mark_inode_dirty_sync(ip);
+	return -error;
+}
+
+STATIC void
+xfs_fs_evict_inode(
+	struct inode		*inode)
+{
+	xfs_inode_t		*ip = XFS_I(inode);
+
+	trace_xfs_evict_inode(ip);
+
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
+	XFS_STATS_INC(vn_rele);
+	XFS_STATS_INC(vn_remove);
+	XFS_STATS_DEC(vn_active);
+
+	/*
+	 * The iolock is used by the file system to coordinate reads,
+	 * writes, and block truncates.  Up to this point the lock
+	 * protected concurrent accesses by users of the inode.  But
+	 * from here forward we're doing some final processing of the
+	 * inode because we're done with it, and although we reuse the
+	 * iolock for protection it is really a distinct lock class
+	 * (in the lockdep sense) from before.  To keep lockdep happy
+	 * (and basically indicate what we are doing), we explicitly
+	 * re-init the iolock here.
+	 */
+	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+	lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+			&xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
+
+	xfs_inactive(ip);
+}
+
+STATIC void
+xfs_free_fsname(
+	struct xfs_mount	*mp)
+{
+	kfree(mp->m_fsname);
+	kfree(mp->m_rtname);
+	kfree(mp->m_logname);
+}
+
+STATIC void
+xfs_fs_put_super(
+	struct super_block	*sb)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	xfs_syncd_stop(mp);
+
+	/*
+	 * Blow away any referenced inode in the filestreams cache.
+	 * This can and will cause log traffic as inodes go inactive
+	 * here.
+	 */
+	xfs_filestream_unmount(mp);
+
+	XFS_bflush(mp->m_ddev_targp);
+
+	xfs_unmountfs(mp);
+	xfs_freesb(mp);
+	xfs_icsb_destroy_counters(mp);
+	xfs_close_devices(mp);
+	xfs_free_fsname(mp);
+	kfree(mp);
+}
+
+STATIC int
+xfs_fs_sync_fs(
+	struct super_block	*sb,
+	int			wait)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+	int			error;
+
+	/*
+	 * Not much we can do for the first async pass.  Writing out the
+	 * superblock would be counter-productive as we are going to redirty
+	 * when writing out other data and metadata (and writing out a single
+	 * block is quite fast anyway).
+	 *
+	 * Try to asynchronously kick off quota syncing at least.
+	 */
+	if (!wait) {
+		xfs_qm_sync(mp, SYNC_TRYLOCK);
+		return 0;
+	}
+
+	error = xfs_quiesce_data(mp);
+	if (error)
+		return -error;
+
+	if (laptop_mode) {
+		/*
+		 * The disk must be active because we're syncing.
+		 * We schedule xfssyncd now (now that the disk is
+		 * active) instead of later (when it might not be).
+		 */
+		flush_delayed_work_sync(&mp->m_sync_work);
+	}
+
+	return 0;
+}
+
+STATIC int
+xfs_fs_statfs(
+	struct dentry		*dentry,
+	struct kstatfs		*statp)
+{
+	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
+	xfs_sb_t		*sbp = &mp->m_sb;
+	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
+	__uint64_t		fakeinos, id;
+	xfs_extlen_t		lsize;
+	__int64_t		ffree;
+
+	statp->f_type = XFS_SB_MAGIC;
+	statp->f_namelen = MAXNAMELEN - 1;
+
+	id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
+	statp->f_fsid.val[0] = (u32)id;
+	statp->f_fsid.val[1] = (u32)(id >> 32);
+
+	xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+
+	spin_lock(&mp->m_sb_lock);
+	statp->f_bsize = sbp->sb_blocksize;
+	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
+	statp->f_blocks = sbp->sb_dblocks - lsize;
+	statp->f_bfree = statp->f_bavail =
+				sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+	fakeinos = statp->f_bfree << sbp->sb_inopblog;
+	statp->f_files =
+	    MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+	if (mp->m_maxicount)
+		statp->f_files = min_t(typeof(statp->f_files),
+					statp->f_files,
+					mp->m_maxicount);
+
+	/* make sure statp->f_ffree does not underflow */
+	ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+	statp->f_ffree = max_t(__int64_t, ffree, 0);
+
+	spin_unlock(&mp->m_sb_lock);
+
+	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+			      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+		xfs_qm_statvfs(ip, statp);
+	return 0;
+}
+
+STATIC void
+xfs_save_resvblks(struct xfs_mount *mp)
+{
+	__uint64_t resblks = 0;
+
+	mp->m_resblks_save = mp->m_resblks;
+	xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC void
+xfs_restore_resvblks(struct xfs_mount *mp)
+{
+	__uint64_t resblks;
+
+	if (mp->m_resblks_save) {
+		resblks = mp->m_resblks_save;
+		mp->m_resblks_save = 0;
+	} else
+		resblks = xfs_default_resblks(mp);
+
+	xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC int
+xfs_fs_remount(
+	struct super_block	*sb,
+	int			*flags,
+	char			*options)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+	substring_t		args[MAX_OPT_ARGS];
+	char			*p;
+	int			error;
+
+	while ((p = strsep(&options, ",")) != NULL) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_barrier:
+			mp->m_flags |= XFS_MOUNT_BARRIER;
+			break;
+		case Opt_nobarrier:
+			mp->m_flags &= ~XFS_MOUNT_BARRIER;
+			break;
+		default:
+			/*
+			 * Logically we would return an error here to prevent
+			 * users from believing they might have changed
+			 * mount options using remount which can't be changed.
+			 *
+			 * But unfortunately mount(8) adds all options from
+			 * mtab and fstab to the mount arguments in some cases
+			 * so we can't blindly reject options, but have to
+			 * check for each specified option if it actually
+			 * differs from the currently set option and only
+			 * reject it if that's the case.
+			 *
+			 * Until that is implemented we return success for
+			 * every remount request, and silently ignore all
+			 * options that we can't actually change.
+			 */
+#if 0
+			xfs_info(mp,
+		"mount option \"%s\" not supported for remount\n", p);
+			return -EINVAL;
+#else
+			break;
+#endif
+		}
+	}
+
+	/* ro -> rw */
+	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+		mp->m_flags &= ~XFS_MOUNT_RDONLY;
+
+		/*
+		 * If this is the first remount to writeable state we
+		 * might have some superblock changes to update.
+		 */
+		if (mp->m_update_flags) {
+			error = xfs_mount_log_sb(mp, mp->m_update_flags);
+			if (error) {
+				xfs_warn(mp, "failed to write sb changes");
+				return error;
+			}
+			mp->m_update_flags = 0;
+		}
+
+		/*
+		 * Fill out the reserve pool if it is empty. Use the stashed
+		 * value if it is non-zero, otherwise go with the default.
+		 */
+		xfs_restore_resvblks(mp);
+	}
+
+	/* rw -> ro */
+	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+		/*
+		 * After we have synced the data but before we sync the
+		 * metadata, we need to free up the reserve block pool so that
+		 * the used block count in the superblock on disk is correct at
+		 * the end of the remount. Stash the current reserve pool size
+		 * so that if we get remounted rw, we can return it to the same
+		 * size.
+		 */
+
+		xfs_quiesce_data(mp);
+		xfs_save_resvblks(mp);
+		xfs_quiesce_attr(mp);
+		mp->m_flags |= XFS_MOUNT_RDONLY;
+	}
+
+	return 0;
+}
+
+/*
+ * Second stage of a freeze. The data is already frozen so we only
+ * need to take care of the metadata. Once that's done write a dummy
+ * record to dirty the log in case of a crash while frozen.
+ */
+STATIC int
+xfs_fs_freeze(
+	struct super_block	*sb)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	xfs_save_resvblks(mp);
+	xfs_quiesce_attr(mp);
+	return -xfs_fs_log_dummy(mp);
+}
+
+STATIC int
+xfs_fs_unfreeze(
+	struct super_block	*sb)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	xfs_restore_resvblks(mp);
+	return 0;
+}
+
+STATIC int
+xfs_fs_show_options(
+	struct seq_file		*m,
+	struct vfsmount		*mnt)
+{
+	return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+	struct xfs_mount	*mp)
+{
+	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
+
+	/* Fail a mount where the logbuf is smaller than the log stripe */
+	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+		if (mp->m_logbsize <= 0 &&
+		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
+			mp->m_logbsize = mp->m_sb.sb_logsunit;
+		} else if (mp->m_logbsize > 0 &&
+			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
+			xfs_warn(mp,
+		"logbuf size must be greater than or equal to log stripe size");
+			return XFS_ERROR(EINVAL);
+		}
+	} else {
+		/* Fail a mount if the logbuf is larger than 32K */
+		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
+			xfs_warn(mp,
+		"logbuf size for version 1 logs must be 16K or 32K");
+			return XFS_ERROR(EINVAL);
+		}
+	}
+
+	/*
+	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+	 * told by noattr2 to turn it off
+	 */
+	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+	    !(mp->m_flags & XFS_MOUNT_NOATTR2))
+		mp->m_flags |= XFS_MOUNT_ATTR2;
+
+	/*
+	 * prohibit r/w mounts of read-only filesystems
+	 */
+	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+		xfs_warn(mp,
+			"cannot mount a read-only filesystem as read-write");
+		return XFS_ERROR(EROFS);
+	}
+
+	return 0;
+}
+
+STATIC int
+xfs_fs_fill_super(
+	struct super_block	*sb,
+	void			*data,
+	int			silent)
+{
+	struct inode		*root;
+	struct xfs_mount	*mp = NULL;
+	int			flags = 0, error = ENOMEM;
+
+	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+	if (!mp)
+		goto out;
+
+	spin_lock_init(&mp->m_sb_lock);
+	mutex_init(&mp->m_growlock);
+	atomic_set(&mp->m_active_trans, 0);
+
+	mp->m_super = sb;
+	sb->s_fs_info = mp;
+
+	error = xfs_parseargs(mp, (char *)data);
+	if (error)
+		goto out_free_fsname;
+
+	sb_min_blocksize(sb, BBSIZE);
+	sb->s_xattr = xfs_xattr_handlers;
+	sb->s_export_op = &xfs_export_operations;
+#ifdef CONFIG_XFS_QUOTA
+	sb->s_qcop = &xfs_quotactl_operations;
+#endif
+	sb->s_op = &xfs_super_operations;
+
+	if (silent)
+		flags |= XFS_MFSI_QUIET;
+
+	error = xfs_open_devices(mp);
+	if (error)
+		goto out_free_fsname;
+
+	error = xfs_icsb_init_counters(mp);
+	if (error)
+		goto out_close_devices;
+
+	error = xfs_readsb(mp, flags);
+	if (error)
+		goto out_destroy_counters;
+
+	error = xfs_finish_flags(mp);
+	if (error)
+		goto out_free_sb;
+
+	error = xfs_setup_devices(mp);
+	if (error)
+		goto out_free_sb;
+
+	error = xfs_filestream_mount(mp);
+	if (error)
+		goto out_free_sb;
+
+	/*
+	 * we must configure the block size in the superblock before we run the
+	 * full mount process as the mount process can lookup and cache inodes.
+	 * For the same reason we must also initialise the syncd and register
+	 * the inode cache shrinker so that inodes can be reclaimed during
+	 * operations like a quotacheck that iterate all inodes in the
+	 * filesystem.
+	 */
+	sb->s_magic = XFS_SB_MAGIC;
+	sb->s_blocksize = mp->m_sb.sb_blocksize;
+	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
+	sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+	sb->s_time_gran = 1;
+	set_posix_acl_flag(sb);
+
+	error = xfs_mountfs(mp);
+	if (error)
+		goto out_filestream_unmount;
+
+	error = xfs_syncd_init(mp);
+	if (error)
+		goto out_unmount;
+
+	root = igrab(VFS_I(mp->m_rootip));
+	if (!root) {
+		error = ENOENT;
+		goto out_syncd_stop;
+	}
+	if (is_bad_inode(root)) {
+		error = EINVAL;
+		goto out_syncd_stop;
+	}
+	sb->s_root = d_alloc_root(root);
+	if (!sb->s_root) {
+		error = ENOMEM;
+		goto out_iput;
+	}
+
+	return 0;
+
+ out_filestream_unmount:
+	xfs_filestream_unmount(mp);
+ out_free_sb:
+	xfs_freesb(mp);
+ out_destroy_counters:
+	xfs_icsb_destroy_counters(mp);
+ out_close_devices:
+	xfs_close_devices(mp);
+ out_free_fsname:
+	xfs_free_fsname(mp);
+	kfree(mp);
+ out:
+	return -error;
+
+ out_iput:
+	iput(root);
+ out_syncd_stop:
+	xfs_syncd_stop(mp);
+ out_unmount:
+	/*
+	 * Blow away any referenced inode in the filestreams cache.
+	 * This can and will cause log traffic as inodes go inactive
+	 * here.
+	 */
+	xfs_filestream_unmount(mp);
+
+	XFS_bflush(mp->m_ddev_targp);
+
+	xfs_unmountfs(mp);
+	goto out_free_sb;
+}
+
+STATIC struct dentry *
+xfs_fs_mount(
+	struct file_system_type	*fs_type,
+	int			flags,
+	const char		*dev_name,
+	void			*data)
+{
+	return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+}
+
+static int
+xfs_fs_nr_cached_objects(
+	struct super_block	*sb)
+{
+	return xfs_reclaim_inodes_count(XFS_M(sb));
+}
+
+static void
+xfs_fs_free_cached_objects(
+	struct super_block	*sb,
+	int			nr_to_scan)
+{
+	xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+}
+
+static const struct super_operations xfs_super_operations = {
+	.alloc_inode		= xfs_fs_alloc_inode,
+	.destroy_inode		= xfs_fs_destroy_inode,
+	.dirty_inode		= xfs_fs_dirty_inode,
+	.write_inode		= xfs_fs_write_inode,
+	.evict_inode		= xfs_fs_evict_inode,
+	.put_super		= xfs_fs_put_super,
+	.sync_fs		= xfs_fs_sync_fs,
+	.freeze_fs		= xfs_fs_freeze,
+	.unfreeze_fs		= xfs_fs_unfreeze,
+	.statfs			= xfs_fs_statfs,
+	.remount_fs		= xfs_fs_remount,
+	.show_options		= xfs_fs_show_options,
+	.nr_cached_objects	= xfs_fs_nr_cached_objects,
+	.free_cached_objects	= xfs_fs_free_cached_objects,
+};
+
+static struct file_system_type xfs_fs_type = {
+	.owner			= THIS_MODULE,
+	.name			= "xfs",
+	.mount			= xfs_fs_mount,
+	.kill_sb		= kill_block_super,
+	.fs_flags		= FS_REQUIRES_DEV,
+};
+
+STATIC int __init
+xfs_init_zones(void)
+{
+
+	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+	if (!xfs_ioend_zone)
+		goto out;
+
+	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+						  xfs_ioend_zone);
+	if (!xfs_ioend_pool)
+		goto out_destroy_ioend_zone;
+
+	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+						"xfs_log_ticket");
+	if (!xfs_log_ticket_zone)
+		goto out_destroy_ioend_pool;
+
+	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+						"xfs_bmap_free_item");
+	if (!xfs_bmap_free_item_zone)
+		goto out_destroy_log_ticket_zone;
+
+	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+						"xfs_btree_cur");
+	if (!xfs_btree_cur_zone)
+		goto out_destroy_bmap_free_item_zone;
+
+	xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+						"xfs_da_state");
+	if (!xfs_da_state_zone)
+		goto out_destroy_btree_cur_zone;
+
+	xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+	if (!xfs_dabuf_zone)
+		goto out_destroy_da_state_zone;
+
+	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+	if (!xfs_ifork_zone)
+		goto out_destroy_dabuf_zone;
+
+	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+	if (!xfs_trans_zone)
+		goto out_destroy_ifork_zone;
+
+	xfs_log_item_desc_zone =
+		kmem_zone_init(sizeof(struct xfs_log_item_desc),
+			       "xfs_log_item_desc");
+	if (!xfs_log_item_desc_zone)
+		goto out_destroy_trans_zone;
+
+	/*
+	 * The size of the zone allocated buf log item is the maximum
+	 * size possible under XFS.  This wastes a little bit of memory,
+	 * but it is much faster.
+	 */
+	xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+				(((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
+				  NBWORD) * sizeof(int))), "xfs_buf_item");
+	if (!xfs_buf_item_zone)
+		goto out_destroy_log_item_desc_zone;
+
+	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+			((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+				 sizeof(xfs_extent_t))), "xfs_efd_item");
+	if (!xfs_efd_zone)
+		goto out_destroy_buf_item_zone;
+
+	xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+			((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+				sizeof(xfs_extent_t))), "xfs_efi_item");
+	if (!xfs_efi_zone)
+		goto out_destroy_efd_zone;
+
+	xfs_inode_zone =
+		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+			KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
+			xfs_fs_inode_init_once);
+	if (!xfs_inode_zone)
+		goto out_destroy_efi_zone;
+
+	xfs_ili_zone =
+		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+					KM_ZONE_SPREAD, NULL);
+	if (!xfs_ili_zone)
+		goto out_destroy_inode_zone;
+
+	return 0;
+
+ out_destroy_inode_zone:
+	kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+	kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+	kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+	kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_log_item_desc_zone:
+	kmem_zone_destroy(xfs_log_item_desc_zone);
+ out_destroy_trans_zone:
+	kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+	kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+	kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+	kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+	kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+	kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+	kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+	mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+	kmem_zone_destroy(xfs_ioend_zone);
+ out:
+	return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_zones(void)
+{
+	kmem_zone_destroy(xfs_ili_zone);
+	kmem_zone_destroy(xfs_inode_zone);
+	kmem_zone_destroy(xfs_efi_zone);
+	kmem_zone_destroy(xfs_efd_zone);
+	kmem_zone_destroy(xfs_buf_item_zone);
+	kmem_zone_destroy(xfs_log_item_desc_zone);
+	kmem_zone_destroy(xfs_trans_zone);
+	kmem_zone_destroy(xfs_ifork_zone);
+	kmem_zone_destroy(xfs_dabuf_zone);
+	kmem_zone_destroy(xfs_da_state_zone);
+	kmem_zone_destroy(xfs_btree_cur_zone);
+	kmem_zone_destroy(xfs_bmap_free_item_zone);
+	kmem_zone_destroy(xfs_log_ticket_zone);
+	mempool_destroy(xfs_ioend_pool);
+	kmem_zone_destroy(xfs_ioend_zone);
+
+}
+
+STATIC int __init
+xfs_init_workqueues(void)
+{
+	/*
+	 * max_active is set to 8 to give enough concurency to allow
+	 * multiple work operations on each CPU to run. This allows multiple
+	 * filesystems to be running sync work concurrently, and scales with
+	 * the number of CPUs in the system.
+	 */
+	xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
+	if (!xfs_syncd_wq)
+		goto out;
+
+	xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
+	if (!xfs_ail_wq)
+		goto out_destroy_syncd;
+
+	return 0;
+
+out_destroy_syncd:
+	destroy_workqueue(xfs_syncd_wq);
+out:
+	return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_workqueues(void)
+{
+	destroy_workqueue(xfs_ail_wq);
+	destroy_workqueue(xfs_syncd_wq);
+}
+
+STATIC int __init
+init_xfs_fs(void)
+{
+	int			error;
+
+	printk(KERN_INFO XFS_VERSION_STRING " with "
+			 XFS_BUILD_OPTIONS " enabled\n");
+
+	xfs_ioend_init();
+	xfs_dir_startup();
+
+	error = xfs_init_zones();
+	if (error)
+		goto out;
+
+	error = xfs_init_workqueues();
+	if (error)
+		goto out_destroy_zones;
+
+	error = xfs_mru_cache_init();
+	if (error)
+		goto out_destroy_wq;
+
+	error = xfs_filestream_init();
+	if (error)
+		goto out_mru_cache_uninit;
+
+	error = xfs_buf_init();
+	if (error)
+		goto out_filestream_uninit;
+
+	error = xfs_init_procfs();
+	if (error)
+		goto out_buf_terminate;
+
+	error = xfs_sysctl_register();
+	if (error)
+		goto out_cleanup_procfs;
+
+	vfs_initquota();
+
+	error = register_filesystem(&xfs_fs_type);
+	if (error)
+		goto out_sysctl_unregister;
+	return 0;
+
+ out_sysctl_unregister:
+	xfs_sysctl_unregister();
+ out_cleanup_procfs:
+	xfs_cleanup_procfs();
+ out_buf_terminate:
+	xfs_buf_terminate();
+ out_filestream_uninit:
+	xfs_filestream_uninit();
+ out_mru_cache_uninit:
+	xfs_mru_cache_uninit();
+ out_destroy_wq:
+	xfs_destroy_workqueues();
+ out_destroy_zones:
+	xfs_destroy_zones();
+ out:
+	return error;
+}
+
+STATIC void __exit
+exit_xfs_fs(void)
+{
+	vfs_exitquota();
+	unregister_filesystem(&xfs_fs_type);
+	xfs_sysctl_unregister();
+	xfs_cleanup_procfs();
+	xfs_buf_terminate();
+	xfs_filestream_uninit();
+	xfs_mru_cache_uninit();
+	xfs_destroy_workqueues();
+	xfs_destroy_zones();
+}
+
+module_init(init_xfs_fs);
+module_exit(exit_xfs_fs);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_LICENSE("GPL");
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
new file mode 100644
index 0000000..50a3266
--- /dev/null
+++ b/fs/xfs/xfs_super.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPER_H__
+#define __XFS_SUPER_H__
+
+#include <linux/exportfs.h>
+
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
+# define vfs_initquota()	xfs_qm_init()
+# define vfs_exitquota()	xfs_qm_exit()
+#else
+# define vfs_initquota()	do { } while (0)
+# define vfs_exitquota()	do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING		"ACLs, "
+# define set_posix_acl_flag(sb)	((sb)->s_flags |= MS_POSIXACL)
+#else
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb)	do { } while (0)
+#endif
+
+#define XFS_SECURITY_STRING	"security attributes, "
+
+#ifdef CONFIG_XFS_RT
+# define XFS_REALTIME_STRING	"realtime, "
+#else
+# define XFS_REALTIME_STRING
+#endif
+
+#if XFS_BIG_BLKNOS
+# if XFS_BIG_INUMS
+#  define XFS_BIGFS_STRING	"large block/inode numbers, "
+# else
+#  define XFS_BIGFS_STRING	"large block numbers, "
+# endif
+#else
+# define XFS_BIGFS_STRING
+#endif
+
+#ifdef DEBUG
+# define XFS_DBG_STRING		"debug"
+#else
+# define XFS_DBG_STRING		"no debug"
+#endif
+
+#define XFS_VERSION_STRING	"SGI XFS"
+#define XFS_BUILD_OPTIONS	XFS_ACL_STRING \
+				XFS_SECURITY_STRING \
+				XFS_REALTIME_STRING \
+				XFS_BIGFS_STRING \
+				XFS_DBG_STRING /* DBG must be last */
+
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_buftarg;
+struct block_device;
+
+extern __uint64_t xfs_max_file_offset(unsigned int);
+
+extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
+
+extern const struct export_operations xfs_export_operations;
+extern const struct xattr_handler *xfs_xattr_handlers[];
+extern const struct quotactl_ops xfs_quotactl_operations;
+
+#define XFS_M(sb)		((struct xfs_mount *)((sb)->s_fs_info))
+
+#endif	/* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
new file mode 100644
index 0000000..4604f90
--- /dev/null
+++ b/fs/xfs/xfs_sync.c
@@ -0,0 +1,1065 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_dinode.h"
+#include "xfs_error.h"
+#include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
+#include "xfs_quota.h"
+#include "xfs_trace.h"
+#include "xfs_fsops.h"
+
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+struct workqueue_struct	*xfs_syncd_wq;	/* sync workqueue */
+
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH	32
+
+STATIC int
+xfs_inode_ag_walk_grab(
+	struct xfs_inode	*ip)
+{
+	struct inode		*inode = VFS_I(ip);
+
+	ASSERT(rcu_read_lock_held());
+
+	/*
+	 * check for stale RCU freed inode
+	 *
+	 * If the inode has been reallocated, it doesn't matter if it's not in
+	 * the AG we are walking - we are walking for writeback, so if it
+	 * passes all the "valid inode" checks and is dirty, then we'll write
+	 * it back anyway.  If it has been reallocated and still being
+	 * initialised, the XFS_INEW check below will catch it.
+	 */
+	spin_lock(&ip->i_flags_lock);
+	if (!ip->i_ino)
+		goto out_unlock_noent;
+
+	/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+	if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+		goto out_unlock_noent;
+	spin_unlock(&ip->i_flags_lock);
+
+	/* nothing to sync during shutdown */
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return EFSCORRUPTED;
+
+	/* If we can't grab the inode, it must on it's way to reclaim. */
+	if (!igrab(inode))
+		return ENOENT;
+
+	if (is_bad_inode(inode)) {
+		IRELE(ip);
+		return ENOENT;
+	}
+
+	/* inode is valid */
+	return 0;
+
+out_unlock_noent:
+	spin_unlock(&ip->i_flags_lock);
+	return ENOENT;
+}
+
+STATIC int
+xfs_inode_ag_walk(
+	struct xfs_mount	*mp,
+	struct xfs_perag	*pag,
+	int			(*execute)(struct xfs_inode *ip,
+					   struct xfs_perag *pag, int flags),
+	int			flags)
+{
+	uint32_t		first_index;
+	int			last_error = 0;
+	int			skipped;
+	int			done;
+	int			nr_found;
+
+restart:
+	done = 0;
+	skipped = 0;
+	first_index = 0;
+	nr_found = 0;
+	do {
+		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+		int		error = 0;
+		int		i;
+
+		rcu_read_lock();
+		nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+					(void **)batch, first_index,
+					XFS_LOOKUP_BATCH);
+		if (!nr_found) {
+			rcu_read_unlock();
+			break;
+		}
+
+		/*
+		 * Grab the inodes before we drop the lock. if we found
+		 * nothing, nr == 0 and the loop will be skipped.
+		 */
+		for (i = 0; i < nr_found; i++) {
+			struct xfs_inode *ip = batch[i];
+
+			if (done || xfs_inode_ag_walk_grab(ip))
+				batch[i] = NULL;
+
+			/*
+			 * Update the index for the next lookup. Catch
+			 * overflows into the next AG range which can occur if
+			 * we have inodes in the last block of the AG and we
+			 * are currently pointing to the last inode.
+			 *
+			 * Because we may see inodes that are from the wrong AG
+			 * due to RCU freeing and reallocation, only update the
+			 * index if it lies in this AG. It was a race that lead
+			 * us to see this inode, so another lookup from the
+			 * same index will not find it again.
+			 */
+			if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+				continue;
+			first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+			if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+				done = 1;
+		}
+
+		/* unlock now we've grabbed the inodes. */
+		rcu_read_unlock();
+
+		for (i = 0; i < nr_found; i++) {
+			if (!batch[i])
+				continue;
+			error = execute(batch[i], pag, flags);
+			IRELE(batch[i]);
+			if (error == EAGAIN) {
+				skipped++;
+				continue;
+			}
+			if (error && last_error != EFSCORRUPTED)
+				last_error = error;
+		}
+
+		/* bail out if the filesystem is corrupted.  */
+		if (error == EFSCORRUPTED)
+			break;
+
+		cond_resched();
+
+	} while (nr_found && !done);
+
+	if (skipped) {
+		delay(1);
+		goto restart;
+	}
+	return last_error;
+}
+
+int
+xfs_inode_ag_iterator(
+	struct xfs_mount	*mp,
+	int			(*execute)(struct xfs_inode *ip,
+					   struct xfs_perag *pag, int flags),
+	int			flags)
+{
+	struct xfs_perag	*pag;
+	int			error = 0;
+	int			last_error = 0;
+	xfs_agnumber_t		ag;
+
+	ag = 0;
+	while ((pag = xfs_perag_get(mp, ag))) {
+		ag = pag->pag_agno + 1;
+		error = xfs_inode_ag_walk(mp, pag, execute, flags);
+		xfs_perag_put(pag);
+		if (error) {
+			last_error = error;
+			if (error == EFSCORRUPTED)
+				break;
+		}
+	}
+	return XFS_ERROR(last_error);
+}
+
+STATIC int
+xfs_sync_inode_data(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
+	int			flags)
+{
+	struct inode		*inode = VFS_I(ip);
+	struct address_space *mapping = inode->i_mapping;
+	int			error = 0;
+
+	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+		goto out_wait;
+
+	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+		if (flags & SYNC_TRYLOCK)
+			goto out_wait;
+		xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	}
+
+	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+				0 : XBF_ASYNC, FI_NONE);
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ out_wait:
+	if (flags & SYNC_WAIT)
+		xfs_ioend_wait(ip);
+	return error;
+}
+
+STATIC int
+xfs_sync_inode_attr(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
+	int			flags)
+{
+	int			error = 0;
+
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	if (xfs_inode_clean(ip))
+		goto out_unlock;
+	if (!xfs_iflock_nowait(ip)) {
+		if (!(flags & SYNC_WAIT))
+			goto out_unlock;
+		xfs_iflock(ip);
+	}
+
+	if (xfs_inode_clean(ip)) {
+		xfs_ifunlock(ip);
+		goto out_unlock;
+	}
+
+	error = xfs_iflush(ip, flags);
+
+	/*
+	 * We don't want to try again on non-blocking flushes that can't run
+	 * again immediately. If an inode really must be written, then that's
+	 * what the SYNC_WAIT flag is for.
+	 */
+	if (error == EAGAIN) {
+		ASSERT(!(flags & SYNC_WAIT));
+		error = 0;
+	}
+
+ out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	return error;
+}
+
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
+STATIC int
+xfs_sync_data(
+	struct xfs_mount	*mp,
+	int			flags)
+{
+	int			error;
+
+	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+
+	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
+	if (error)
+		return XFS_ERROR(error);
+
+	xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
+	return 0;
+}
+
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+STATIC int
+xfs_sync_attr(
+	struct xfs_mount	*mp,
+	int			flags)
+{
+	ASSERT((flags & ~SYNC_WAIT) == 0);
+
+	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
+}
+
+STATIC int
+xfs_sync_fsdata(
+	struct xfs_mount	*mp)
+{
+	struct xfs_buf		*bp;
+
+	/*
+	 * If the buffer is pinned then push on the log so we won't get stuck
+	 * waiting in the write for someone, maybe ourselves, to flush the log.
+	 *
+	 * Even though we just pushed the log above, we did not have the
+	 * superblock buffer locked at that point so it can become pinned in
+	 * between there and here.
+	 */
+	bp = xfs_getsb(mp, 0);
+	if (xfs_buf_ispinned(bp))
+		xfs_log_force(mp, 0);
+
+	return xfs_bwrite(mp, bp);
+}
+
+/*
+ * When remounting a filesystem read-only or freezing the filesystem, we have
+ * two phases to execute. This first phase is syncing the data before we
+ * quiesce the filesystem, and the second is flushing all the inodes out after
+ * we've waited for all the transactions created by the first phase to
+ * complete. The second phase ensures that the inodes are written to their
+ * location on disk rather than just existing in transactions in the log. This
+ * means after a quiesce there is no log replay required to write the inodes to
+ * disk (this is the main difference between a sync and a quiesce).
+ */
+/*
+ * First stage of freeze - no writers will make progress now we are here,
+ * so we flush delwri and delalloc buffers here, then wait for all I/O to
+ * complete.  Data is frozen at that point. Metadata is not frozen,
+ * transactions can still occur here so don't bother flushing the buftarg
+ * because it'll just get dirty again.
+ */
+int
+xfs_quiesce_data(
+	struct xfs_mount	*mp)
+{
+	int			error, error2 = 0;
+
+	xfs_qm_sync(mp, SYNC_TRYLOCK);
+	xfs_qm_sync(mp, SYNC_WAIT);
+
+	/* force out the newly dirtied log buffers */
+	xfs_log_force(mp, XFS_LOG_SYNC);
+
+	/* write superblock and hoover up shutdown errors */
+	error = xfs_sync_fsdata(mp);
+
+	/* make sure all delwri buffers are written out */
+	xfs_flush_buftarg(mp->m_ddev_targp, 1);
+
+	/* mark the log as covered if needed */
+	if (xfs_log_need_covered(mp))
+		error2 = xfs_fs_log_dummy(mp);
+
+	/* flush data-only devices */
+	if (mp->m_rtdev_targp)
+		XFS_bflush(mp->m_rtdev_targp);
+
+	return error ? error : error2;
+}
+
+STATIC void
+xfs_quiesce_fs(
+	struct xfs_mount	*mp)
+{
+	int	count = 0, pincount;
+
+	xfs_reclaim_inodes(mp, 0);
+	xfs_flush_buftarg(mp->m_ddev_targp, 0);
+
+	/*
+	 * This loop must run at least twice.  The first instance of the loop
+	 * will flush most meta data but that will generate more meta data
+	 * (typically directory updates).  Which then must be flushed and
+	 * logged before we can write the unmount record. We also so sync
+	 * reclaim of inodes to catch any that the above delwri flush skipped.
+	 */
+	do {
+		xfs_reclaim_inodes(mp, SYNC_WAIT);
+		xfs_sync_attr(mp, SYNC_WAIT);
+		pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+		if (!pincount) {
+			delay(50);
+			count++;
+		}
+	} while (count < 2);
+}
+
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceeding.
+ */
+void
+xfs_quiesce_attr(
+	struct xfs_mount	*mp)
+{
+	int	error = 0;
+
+	/* wait for all modifications to complete */
+	while (atomic_read(&mp->m_active_trans) > 0)
+		delay(100);
+
+	/* flush inodes and push all remaining buffers out to disk */
+	xfs_quiesce_fs(mp);
+
+	/*
+	 * Just warn here till VFS can correctly support
+	 * read-only remount without racing.
+	 */
+	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
+
+	/* Push the superblock and write an unmount record */
+	error = xfs_log_sbcount(mp);
+	if (error)
+		xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
+				"Frozen image may not be consistent.");
+	xfs_log_unmount_write(mp);
+	xfs_unmountfs_writesb(mp);
+}
+
+static void
+xfs_syncd_queue_sync(
+	struct xfs_mount        *mp)
+{
+	queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
+				msecs_to_jiffies(xfs_syncd_centisecs * 10));
+}
+
+/*
+ * Every sync period we need to unpin all items, reclaim inodes and sync
+ * disk quotas.  We might need to cover the log to indicate that the
+ * filesystem is idle and not frozen.
+ */
+STATIC void
+xfs_sync_worker(
+	struct work_struct *work)
+{
+	struct xfs_mount *mp = container_of(to_delayed_work(work),
+					struct xfs_mount, m_sync_work);
+	int		error;
+
+	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+		/* dgc: errors ignored here */
+		if (mp->m_super->s_frozen == SB_UNFROZEN &&
+		    xfs_log_need_covered(mp))
+			error = xfs_fs_log_dummy(mp);
+		else
+			xfs_log_force(mp, 0);
+		error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+
+		/* start pushing all the metadata that is currently dirty */
+		xfs_ail_push_all(mp->m_ail);
+	}
+
+	/* queue us up again */
+	xfs_syncd_queue_sync(mp);
+}
+
+/*
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs syncd work default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_syncd_queue_reclaim(
+	struct xfs_mount        *mp)
+{
+
+	/*
+	 * We can have inodes enter reclaim after we've shut down the syncd
+	 * workqueue during unmount, so don't allow reclaim work to be queued
+	 * during unmount.
+	 */
+	if (!(mp->m_super->s_flags & MS_ACTIVE))
+		return;
+
+	rcu_read_lock();
+	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+		queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
+			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+STATIC void
+xfs_reclaim_worker(
+	struct work_struct *work)
+{
+	struct xfs_mount *mp = container_of(to_delayed_work(work),
+					struct xfs_mount, m_reclaim_work);
+
+	xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+	xfs_syncd_queue_reclaim(mp);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room.
+ *
+ * Queue a new data flush if there isn't one already in progress and
+ * wait for completion of the flush. This means that we only ever have one
+ * inode flush in progress no matter how many ENOSPC events are occurring and
+ * so will prevent the system from bogging down due to every concurrent
+ * ENOSPC event scanning all the active inodes in the system for writeback.
+ */
+void
+xfs_flush_inodes(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+
+	queue_work(xfs_syncd_wq, &mp->m_flush_work);
+	flush_work_sync(&mp->m_flush_work);
+}
+
+STATIC void
+xfs_flush_worker(
+	struct work_struct *work)
+{
+	struct xfs_mount *mp = container_of(work,
+					struct xfs_mount, m_flush_work);
+
+	xfs_sync_data(mp, SYNC_TRYLOCK);
+	xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
+}
+
+int
+xfs_syncd_init(
+	struct xfs_mount	*mp)
+{
+	INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
+	INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
+	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+
+	xfs_syncd_queue_sync(mp);
+	xfs_syncd_queue_reclaim(mp);
+
+	return 0;
+}
+
+void
+xfs_syncd_stop(
+	struct xfs_mount	*mp)
+{
+	cancel_delayed_work_sync(&mp->m_sync_work);
+	cancel_delayed_work_sync(&mp->m_reclaim_work);
+	cancel_work_sync(&mp->m_flush_work);
+}
+
+void
+__xfs_inode_set_reclaim_tag(
+	struct xfs_perag	*pag,
+	struct xfs_inode	*ip)
+{
+	radix_tree_tag_set(&pag->pag_ici_root,
+			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+			   XFS_ICI_RECLAIM_TAG);
+
+	if (!pag->pag_ici_reclaimable) {
+		/* propagate the reclaim tag up into the perag radix tree */
+		spin_lock(&ip->i_mount->m_perag_lock);
+		radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+				XFS_ICI_RECLAIM_TAG);
+		spin_unlock(&ip->i_mount->m_perag_lock);
+
+		/* schedule periodic background inode reclaim */
+		xfs_syncd_queue_reclaim(ip->i_mount);
+
+		trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+							-1, _RET_IP_);
+	}
+	pag->pag_ici_reclaimable++;
+}
+
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+	xfs_inode_t	*ip)
+{
+	struct xfs_mount *mp = ip->i_mount;
+	struct xfs_perag *pag;
+
+	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+	spin_lock(&pag->pag_ici_lock);
+	spin_lock(&ip->i_flags_lock);
+	__xfs_inode_set_reclaim_tag(pag, ip);
+	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+	spin_unlock(&ip->i_flags_lock);
+	spin_unlock(&pag->pag_ici_lock);
+	xfs_perag_put(pag);
+}
+
+STATIC void
+__xfs_inode_clear_reclaim(
+	xfs_perag_t	*pag,
+	xfs_inode_t	*ip)
+{
+	pag->pag_ici_reclaimable--;
+	if (!pag->pag_ici_reclaimable) {
+		/* clear the reclaim tag from the perag radix tree */
+		spin_lock(&ip->i_mount->m_perag_lock);
+		radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+				XFS_ICI_RECLAIM_TAG);
+		spin_unlock(&ip->i_mount->m_perag_lock);
+		trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+							-1, _RET_IP_);
+	}
+}
+
+void
+__xfs_inode_clear_reclaim_tag(
+	xfs_mount_t	*mp,
+	xfs_perag_t	*pag,
+	xfs_inode_t	*ip)
+{
+	radix_tree_tag_clear(&pag->pag_ici_root,
+			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+	__xfs_inode_clear_reclaim(pag, ip);
+}
+
+/*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+	struct xfs_inode	*ip,
+	int			flags)
+{
+	ASSERT(rcu_read_lock_held());
+
+	/* quick check for stale RCU freed inode */
+	if (!ip->i_ino)
+		return 1;
+
+	/*
+	 * do some unlocked checks first to avoid unnecessary lock traffic.
+	 * The first is a flush lock check, the second is a already in reclaim
+	 * check. Only do these checks if we are not going to block on locks.
+	 */
+	if ((flags & SYNC_TRYLOCK) &&
+	    (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+		return 1;
+	}
+
+	/*
+	 * The radix tree lock here protects a thread in xfs_iget from racing
+	 * with us starting reclaim on the inode.  Once we have the
+	 * XFS_IRECLAIM flag set it will not touch us.
+	 *
+	 * Due to RCU lookup, we may find inodes that have been freed and only
+	 * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
+	 * aren't candidates for reclaim at all, so we must check the
+	 * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
+	 */
+	spin_lock(&ip->i_flags_lock);
+	if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
+	    __xfs_iflags_test(ip, XFS_IRECLAIM)) {
+		/* not a reclaim candidate. */
+		spin_unlock(&ip->i_flags_lock);
+		return 1;
+	}
+	__xfs_iflags_set(ip, XFS_IRECLAIM);
+	spin_unlock(&ip->i_flags_lock);
+	return 0;
+}
+
+/*
+ * Inodes in different states need to be treated differently, and the return
+ * value of xfs_iflush is not sufficient to get this right. The following table
+ * lists the inode states and the reclaim actions necessary for non-blocking
+ * reclaim:
+ *
+ *
+ *	inode state	     iflush ret		required action
+ *      ---------------      ----------         ---------------
+ *	bad			-		reclaim
+ *	shutdown		EIO		unpin and reclaim
+ *	clean, unpinned		0		reclaim
+ *	stale, unpinned		0		reclaim
+ *	clean, pinned(*)	0		requeue
+ *	stale, pinned		EAGAIN		requeue
+ *	dirty, delwri ok	0		requeue
+ *	dirty, delwri blocked	EAGAIN		requeue
+ *	dirty, sync flush	0		reclaim
+ *
+ * (*) dgc: I don't think the clean, pinned state is possible but it gets
+ * handled anyway given the order of checks implemented.
+ *
+ * As can be seen from the table, the return value of xfs_iflush() is not
+ * sufficient to correctly decide the reclaim action here. The checks in
+ * xfs_iflush() might look like duplicates, but they are not.
+ *
+ * Also, because we get the flush lock first, we know that any inode that has
+ * been flushed delwri has had the flush completed by the time we check that
+ * the inode is clean. The clean inode check needs to be done before flushing
+ * the inode delwri otherwise we would loop forever requeuing clean inodes as
+ * we cannot tell apart a successful delwri flush and a clean inode from the
+ * return value of xfs_iflush().
+ *
+ * Note that because the inode is flushed delayed write by background
+ * writeback, the flush lock may already be held here and waiting on it can
+ * result in very long latencies. Hence for sync reclaims, where we wait on the
+ * flush lock, the caller should push out delayed write inodes first before
+ * trying to reclaim them to minimise the amount of time spent waiting. For
+ * background relaim, we just requeue the inode for the next pass.
+ *
+ * Hence the order of actions after gaining the locks should be:
+ *	bad		=> reclaim
+ *	shutdown	=> unpin and reclaim
+ *	pinned, delwri	=> requeue
+ *	pinned, sync	=> unpin
+ *	stale		=> reclaim
+ *	clean		=> reclaim
+ *	dirty, delwri	=> flush and requeue
+ *	dirty, sync	=> flush, wait and reclaim
+ */
+STATIC int
+xfs_reclaim_inode(
+	struct xfs_inode	*ip,
+	struct xfs_perag	*pag,
+	int			sync_mode)
+{
+	int	error;
+
+restart:
+	error = 0;
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	if (!xfs_iflock_nowait(ip)) {
+		if (!(sync_mode & SYNC_WAIT))
+			goto out;
+		xfs_iflock(ip);
+	}
+
+	if (is_bad_inode(VFS_I(ip)))
+		goto reclaim;
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+		xfs_iunpin_wait(ip);
+		goto reclaim;
+	}
+	if (xfs_ipincount(ip)) {
+		if (!(sync_mode & SYNC_WAIT)) {
+			xfs_ifunlock(ip);
+			goto out;
+		}
+		xfs_iunpin_wait(ip);
+	}
+	if (xfs_iflags_test(ip, XFS_ISTALE))
+		goto reclaim;
+	if (xfs_inode_clean(ip))
+		goto reclaim;
+
+	/*
+	 * Now we have an inode that needs flushing.
+	 *
+	 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
+	 * reclaim as we can deadlock with inode cluster removal.
+	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
+	 * ip->i_lock, and we are doing the exact opposite here. As a result,
+	 * doing a blocking xfs_itobp() to get the cluster buffer will result
+	 * in an ABBA deadlock with xfs_ifree_cluster().
+	 *
+	 * As xfs_ifree_cluser() must gather all inodes that are active in the
+	 * cache to mark them stale, if we hit this case we don't actually want
+	 * to do IO here - we want the inode marked stale so we can simply
+	 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
+	 * just unlock the inode, back off and try again. Hopefully the next
+	 * pass through will see the stale flag set on the inode.
+	 */
+	error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
+	if (sync_mode & SYNC_WAIT) {
+		if (error == EAGAIN) {
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			/* backoff longer than in xfs_ifree_cluster */
+			delay(2);
+			goto restart;
+		}
+		xfs_iflock(ip);
+		goto reclaim;
+	}
+
+	/*
+	 * When we have to flush an inode but don't have SYNC_WAIT set, we
+	 * flush the inode out using a delwri buffer and wait for the next
+	 * call into reclaim to find it in a clean state instead of waiting for
+	 * it now. We also don't return errors here - if the error is transient
+	 * then the next reclaim pass will flush the inode, and if the error
+	 * is permanent then the next sync reclaim will reclaim the inode and
+	 * pass on the error.
+	 */
+	if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+		xfs_warn(ip->i_mount,
+			"inode 0x%llx background reclaim flush failed with %d",
+			(long long)ip->i_ino, error);
+	}
+out:
+	xfs_iflags_clear(ip, XFS_IRECLAIM);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	/*
+	 * We could return EAGAIN here to make reclaim rescan the inode tree in
+	 * a short while. However, this just burns CPU time scanning the tree
+	 * waiting for IO to complete and xfssyncd never goes back to the idle
+	 * state. Instead, return 0 to let the next scheduled background reclaim
+	 * attempt to reclaim the inode again.
+	 */
+	return 0;
+
+reclaim:
+	xfs_ifunlock(ip);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	XFS_STATS_INC(xs_ig_reclaims);
+	/*
+	 * Remove the inode from the per-AG radix tree.
+	 *
+	 * Because radix_tree_delete won't complain even if the item was never
+	 * added to the tree assert that it's been there before to catch
+	 * problems with the inode life time early on.
+	 */
+	spin_lock(&pag->pag_ici_lock);
+	if (!radix_tree_delete(&pag->pag_ici_root,
+				XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+		ASSERT(0);
+	__xfs_inode_clear_reclaim(pag, ip);
+	spin_unlock(&pag->pag_ici_lock);
+
+	/*
+	 * Here we do an (almost) spurious inode lock in order to coordinate
+	 * with inode cache radix tree lookups.  This is because the lookup
+	 * can reference the inodes in the cache without taking references.
+	 *
+	 * We make that OK here by ensuring that we wait until the inode is
+	 * unlocked after the lookup before we go ahead and free it.  We get
+	 * both the ilock and the iolock because the code may need to drop the
+	 * ilock one but will still hold the iolock.
+	 */
+	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_qm_dqdetach(ip);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+	xfs_inode_free(ip);
+	return error;
+
+}
+
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+	struct xfs_mount	*mp,
+	int			flags,
+	int			*nr_to_scan)
+{
+	struct xfs_perag	*pag;
+	int			error = 0;
+	int			last_error = 0;
+	xfs_agnumber_t		ag;
+	int			trylock = flags & SYNC_TRYLOCK;
+	int			skipped;
+
+restart:
+	ag = 0;
+	skipped = 0;
+	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+		unsigned long	first_index = 0;
+		int		done = 0;
+		int		nr_found = 0;
+
+		ag = pag->pag_agno + 1;
+
+		if (trylock) {
+			if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+				skipped++;
+				xfs_perag_put(pag);
+				continue;
+			}
+			first_index = pag->pag_ici_reclaim_cursor;
+		} else
+			mutex_lock(&pag->pag_ici_reclaim_lock);
+
+		do {
+			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+			int	i;
+
+			rcu_read_lock();
+			nr_found = radix_tree_gang_lookup_tag(
+					&pag->pag_ici_root,
+					(void **)batch, first_index,
+					XFS_LOOKUP_BATCH,
+					XFS_ICI_RECLAIM_TAG);
+			if (!nr_found) {
+				done = 1;
+				rcu_read_unlock();
+				break;
+			}
+
+			/*
+			 * Grab the inodes before we drop the lock. if we found
+			 * nothing, nr == 0 and the loop will be skipped.
+			 */
+			for (i = 0; i < nr_found; i++) {
+				struct xfs_inode *ip = batch[i];
+
+				if (done || xfs_reclaim_inode_grab(ip, flags))
+					batch[i] = NULL;
+
+				/*
+				 * Update the index for the next lookup. Catch
+				 * overflows into the next AG range which can
+				 * occur if we have inodes in the last block of
+				 * the AG and we are currently pointing to the
+				 * last inode.
+				 *
+				 * Because we may see inodes that are from the
+				 * wrong AG due to RCU freeing and
+				 * reallocation, only update the index if it
+				 * lies in this AG. It was a race that lead us
+				 * to see this inode, so another lookup from
+				 * the same index will not find it again.
+				 */
+				if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
+								pag->pag_agno)
+					continue;
+				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+					done = 1;
+			}
+
+			/* unlock now we've grabbed the inodes. */
+			rcu_read_unlock();
+
+			for (i = 0; i < nr_found; i++) {
+				if (!batch[i])
+					continue;
+				error = xfs_reclaim_inode(batch[i], pag, flags);
+				if (error && last_error != EFSCORRUPTED)
+					last_error = error;
+			}
+
+			*nr_to_scan -= XFS_LOOKUP_BATCH;
+
+			cond_resched();
+
+		} while (nr_found && !done && *nr_to_scan > 0);
+
+		if (trylock && !done)
+			pag->pag_ici_reclaim_cursor = first_index;
+		else
+			pag->pag_ici_reclaim_cursor = 0;
+		mutex_unlock(&pag->pag_ici_reclaim_lock);
+		xfs_perag_put(pag);
+	}
+
+	/*
+	 * if we skipped any AG, and we still have scan count remaining, do
+	 * another pass this time using blocking reclaim semantics (i.e
+	 * waiting on the reclaim locks and ignoring the reclaim cursors). This
+	 * ensure that when we get more reclaimers than AGs we block rather
+	 * than spin trying to execute reclaim.
+	 */
+	if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
+		trylock = 0;
+		goto restart;
+	}
+	return XFS_ERROR(last_error);
+}
+
+int
+xfs_reclaim_inodes(
+	xfs_mount_t	*mp,
+	int		mode)
+{
+	int		nr_to_scan = INT_MAX;
+
+	return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+}
+
+/*
+ * Scan a certain number of inodes for reclaim.
+ *
+ * When called we make sure that there is a background (fast) inode reclaim in
+ * progress, while we will throttle the speed of reclaim via doing synchronous
+ * reclaim of inodes. That means if we come across dirty inodes, we wait for
+ * them to be cleaned, which we hope will not be very long due to the
+ * background walker having already kicked the IO off on those dirty inodes.
+ */
+void
+xfs_reclaim_inodes_nr(
+	struct xfs_mount	*mp,
+	int			nr_to_scan)
+{
+	/* kick background reclaimer and push the AIL */
+	xfs_syncd_queue_reclaim(mp);
+	xfs_ail_push_all(mp->m_ail);
+
+	xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
+}
+
+/*
+ * Return the number of reclaimable inodes in the filesystem for
+ * the shrinker to determine how much to reclaim.
+ */
+int
+xfs_reclaim_inodes_count(
+	struct xfs_mount	*mp)
+{
+	struct xfs_perag	*pag;
+	xfs_agnumber_t		ag = 0;
+	int			reclaimable = 0;
+
+	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+		ag = pag->pag_agno + 1;
+		reclaimable += pag->pag_ici_reclaimable;
+		xfs_perag_put(pag);
+	}
+	return reclaimable;
+}
+
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
new file mode 100644
index 0000000..941202e
--- /dev/null
+++ b/fs/xfs/xfs_sync.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef XFS_SYNC_H
+#define XFS_SYNC_H 1
+
+struct xfs_mount;
+struct xfs_perag;
+
+#define SYNC_WAIT		0x0001	/* wait for i/o to complete */
+#define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
+
+extern struct workqueue_struct	*xfs_syncd_wq;	/* sync workqueue */
+
+int xfs_syncd_init(struct xfs_mount *mp);
+void xfs_syncd_stop(struct xfs_mount *mp);
+
+int xfs_quiesce_data(struct xfs_mount *mp);
+void xfs_quiesce_attr(struct xfs_mount *mp);
+
+void xfs_flush_inodes(struct xfs_inode *ip);
+
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
+int xfs_reclaim_inodes_count(struct xfs_mount *mp);
+void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
+
+void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
+void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
+				struct xfs_inode *ip);
+
+int xfs_sync_inode_grab(struct xfs_inode *ip);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+	int flags);
+
+#endif
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
new file mode 100644
index 0000000..ee2d2ad
--- /dev/null
+++ b/fs/xfs/xfs_sysctl.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include "xfs_error.h"
+
+static struct ctl_table_header *xfs_table_header;
+
+#ifdef CONFIG_PROC_FS
+STATIC int
+xfs_stats_clear_proc_handler(
+	ctl_table	*ctl,
+	int		write,
+	void		__user *buffer,
+	size_t		*lenp,
+	loff_t		*ppos)
+{
+	int		c, ret, *valp = ctl->data;
+	__uint32_t	vn_active;
+
+	ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+
+	if (!ret && write && *valp) {
+		xfs_notice(NULL, "Clearing xfsstats");
+		for_each_possible_cpu(c) {
+			preempt_disable();
+			/* save vn_active, it's a universal truth! */
+			vn_active = per_cpu(xfsstats, c).vn_active;
+			memset(&per_cpu(xfsstats, c), 0,
+			       sizeof(struct xfsstats));
+			per_cpu(xfsstats, c).vn_active = vn_active;
+			preempt_enable();
+		}
+		xfs_stats_clear = 0;
+	}
+
+	return ret;
+}
+
+STATIC int
+xfs_panic_mask_proc_handler(
+	ctl_table	*ctl,
+	int		write,
+	void		__user *buffer,
+	size_t		*lenp,
+	loff_t		*ppos)
+{
+	int		ret, *valp = ctl->data;
+
+	ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+	if (!ret && write) {
+		xfs_panic_mask = *valp;
+#ifdef DEBUG
+		xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
+#endif
+	}
+	return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
+static ctl_table xfs_table[] = {
+	{
+		.procname	= "irix_sgid_inherit",
+		.data		= &xfs_params.sgid_inherit.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.sgid_inherit.min,
+		.extra2		= &xfs_params.sgid_inherit.max
+	},
+	{
+		.procname	= "irix_symlink_mode",
+		.data		= &xfs_params.symlink_mode.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.symlink_mode.min,
+		.extra2		= &xfs_params.symlink_mode.max
+	},
+	{
+		.procname	= "panic_mask",
+		.data		= &xfs_params.panic_mask.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= xfs_panic_mask_proc_handler,
+		.extra1		= &xfs_params.panic_mask.min,
+		.extra2		= &xfs_params.panic_mask.max
+	},
+
+	{
+		.procname	= "error_level",
+		.data		= &xfs_params.error_level.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.error_level.min,
+		.extra2		= &xfs_params.error_level.max
+	},
+	{
+		.procname	= "xfssyncd_centisecs",
+		.data		= &xfs_params.syncd_timer.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.syncd_timer.min,
+		.extra2		= &xfs_params.syncd_timer.max
+	},
+	{
+		.procname	= "inherit_sync",
+		.data		= &xfs_params.inherit_sync.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.inherit_sync.min,
+		.extra2		= &xfs_params.inherit_sync.max
+	},
+	{
+		.procname	= "inherit_nodump",
+		.data		= &xfs_params.inherit_nodump.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.inherit_nodump.min,
+		.extra2		= &xfs_params.inherit_nodump.max
+	},
+	{
+		.procname	= "inherit_noatime",
+		.data		= &xfs_params.inherit_noatim.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.inherit_noatim.min,
+		.extra2		= &xfs_params.inherit_noatim.max
+	},
+	{
+		.procname	= "xfsbufd_centisecs",
+		.data		= &xfs_params.xfs_buf_timer.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.xfs_buf_timer.min,
+		.extra2		= &xfs_params.xfs_buf_timer.max
+	},
+	{
+		.procname	= "age_buffer_centisecs",
+		.data		= &xfs_params.xfs_buf_age.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.xfs_buf_age.min,
+		.extra2		= &xfs_params.xfs_buf_age.max
+	},
+	{
+		.procname	= "inherit_nosymlinks",
+		.data		= &xfs_params.inherit_nosym.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.inherit_nosym.min,
+		.extra2		= &xfs_params.inherit_nosym.max
+	},
+	{
+		.procname	= "rotorstep",
+		.data		= &xfs_params.rotorstep.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.rotorstep.min,
+		.extra2		= &xfs_params.rotorstep.max
+	},
+	{
+		.procname	= "inherit_nodefrag",
+		.data		= &xfs_params.inherit_nodfrg.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.inherit_nodfrg.min,
+		.extra2		= &xfs_params.inherit_nodfrg.max
+	},
+	{
+		.procname	= "filestream_centisecs",
+		.data		= &xfs_params.fstrm_timer.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &xfs_params.fstrm_timer.min,
+		.extra2		= &xfs_params.fstrm_timer.max,
+	},
+	/* please keep this the last entry */
+#ifdef CONFIG_PROC_FS
+	{
+		.procname	= "stats_clear",
+		.data		= &xfs_params.stats_clear.val,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= xfs_stats_clear_proc_handler,
+		.extra1		= &xfs_params.stats_clear.min,
+		.extra2		= &xfs_params.stats_clear.max
+	},
+#endif /* CONFIG_PROC_FS */
+
+	{}
+};
+
+static ctl_table xfs_dir_table[] = {
+	{
+		.procname	= "xfs",
+		.mode		= 0555,
+		.child		= xfs_table
+	},
+	{}
+};
+
+static ctl_table xfs_root_table[] = {
+	{
+		.procname	= "fs",
+		.mode		= 0555,
+		.child		= xfs_dir_table
+	},
+	{}
+};
+
+int
+xfs_sysctl_register(void)
+{
+	xfs_table_header = register_sysctl_table(xfs_root_table);
+	if (!xfs_table_header)
+		return -ENOMEM;
+	return 0;
+}
+
+void
+xfs_sysctl_unregister(void)
+{
+	unregister_sysctl_table(xfs_table_header);
+}
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
new file mode 100644
index 0000000..b9937d4
--- /dev/null
+++ b/fs/xfs/xfs_sysctl.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SYSCTL_H__
+#define __XFS_SYSCTL_H__
+
+#include <linux/sysctl.h>
+
+/*
+ * Tunable xfs parameters
+ */
+
+typedef struct xfs_sysctl_val {
+	int min;
+	int val;
+	int max;
+} xfs_sysctl_val_t;
+
+typedef struct xfs_param {
+	xfs_sysctl_val_t sgid_inherit;	/* Inherit S_ISGID if process' GID is
+					 * not a member of parent dir GID. */
+	xfs_sysctl_val_t symlink_mode;	/* Link creat mode affected by umask */
+	xfs_sysctl_val_t panic_mask;	/* bitmask to cause panic on errors. */
+	xfs_sysctl_val_t error_level;	/* Degree of reporting for problems  */
+	xfs_sysctl_val_t syncd_timer;	/* Interval between xfssyncd wakeups */
+	xfs_sysctl_val_t stats_clear;	/* Reset all XFS statistics to zero. */
+	xfs_sysctl_val_t inherit_sync;	/* Inherit the "sync" inode flag. */
+	xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
+	xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
+	xfs_sysctl_val_t xfs_buf_timer;	/* Interval between xfsbufd wakeups. */
+	xfs_sysctl_val_t xfs_buf_age;	/* Metadata buffer age before flush. */
+	xfs_sysctl_val_t inherit_nosym;	/* Inherit the "nosymlinks" flag. */
+	xfs_sysctl_val_t rotorstep;	/* inode32 AG rotoring control knob */
+	xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
+	xfs_sysctl_val_t fstrm_timer;	/* Filestream dir-AG assoc'n timeout. */
+} xfs_param_t;
+
+/*
+ * xfs_error_level:
+ *
+ * How much error reporting will be done when internal problems are
+ * encountered.  These problems normally return an EFSCORRUPTED to their
+ * caller, with no other information reported.
+ *
+ * 0	No error reports
+ * 1	Report EFSCORRUPTED errors that will cause a filesystem shutdown
+ * 5	Report all EFSCORRUPTED errors (all of the above errors, plus any
+ *	additional errors that are known to not cause shutdowns)
+ *
+ * xfs_panic_mask bit 0x8 turns the error reports into panics
+ */
+
+enum {
+	/* XFS_REFCACHE_SIZE = 1 */
+	/* XFS_REFCACHE_PURGE = 2 */
+	/* XFS_RESTRICT_CHOWN = 3 */
+	XFS_SGID_INHERIT = 4,
+	XFS_SYMLINK_MODE = 5,
+	XFS_PANIC_MASK = 6,
+	XFS_ERRLEVEL = 7,
+	XFS_SYNCD_TIMER = 8,
+	/* XFS_PROBE_DMAPI = 9 */
+	/* XFS_PROBE_IOOPS = 10 */
+	/* XFS_PROBE_QUOTA = 11 */
+	XFS_STATS_CLEAR = 12,
+	XFS_INHERIT_SYNC = 13,
+	XFS_INHERIT_NODUMP = 14,
+	XFS_INHERIT_NOATIME = 15,
+	XFS_BUF_TIMER = 16,
+	XFS_BUF_AGE = 17,
+	/* XFS_IO_BYPASS = 18 */
+	XFS_INHERIT_NOSYM = 19,
+	XFS_ROTORSTEP = 20,
+	XFS_INHERIT_NODFRG = 21,
+	XFS_FILESTREAM_TIMER = 22,
+};
+
+extern xfs_param_t	xfs_params;
+
+#ifdef CONFIG_SYSCTL
+extern int xfs_sysctl_register(void);
+extern void xfs_sysctl_unregister(void);
+#else
+# define xfs_sysctl_register()		(0)
+# define xfs_sysctl_unregister()	do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
new file mode 100644
index 0000000..9010ce8
--- /dev/null
+++ b/fs/xfs/xfs_trace.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2009, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_mount.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_quota.h"
+#include "xfs_iomap.h"
+#include "xfs_aops.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_log_recover.h"
+#include "xfs_inode_item.h"
+
+/*
+ * We include this last to have the helpers above available for the trace
+ * event implementations.
+ */
+#define CREATE_TRACE_POINTS
+#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
new file mode 100644
index 0000000..690fc7a
--- /dev/null
+++ b/fs/xfs/xfs_trace.h
@@ -0,0 +1,1746 @@
+/*
+ * Copyright (c) 2009, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xfs
+
+#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_XFS_H
+
+#include <linux/tracepoint.h>
+
+struct xfs_agf;
+struct xfs_alloc_arg;
+struct xfs_attr_list_context;
+struct xfs_buf_log_item;
+struct xfs_da_args;
+struct xfs_da_node_entry;
+struct xfs_dquot;
+struct xlog_ticket;
+struct log;
+struct xlog_recover;
+struct xlog_recover_item;
+struct xfs_buf_log_format;
+struct xfs_inode_log_format;
+
+DECLARE_EVENT_CLASS(xfs_attr_list_class,
+	TP_PROTO(struct xfs_attr_list_context *ctx),
+	TP_ARGS(ctx),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(u32, hashval)
+		__field(u32, blkno)
+		__field(u32, offset)
+		__field(void *, alist)
+		__field(int, bufsize)
+		__field(int, count)
+		__field(int, firstu)
+		__field(int, dupcnt)
+		__field(int, flags)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+		__entry->ino = ctx->dp->i_ino;
+		__entry->hashval = ctx->cursor->hashval;
+		__entry->blkno = ctx->cursor->blkno;
+		__entry->offset = ctx->cursor->offset;
+		__entry->alist = ctx->alist;
+		__entry->bufsize = ctx->bufsize;
+		__entry->count = ctx->count;
+		__entry->firstu = ctx->firstu;
+		__entry->flags = ctx->flags;
+	),
+	TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+		  "alist 0x%p size %u count %u firstu %u flags %d %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		   __entry->ino,
+		   __entry->hashval,
+		   __entry->blkno,
+		   __entry->offset,
+		   __entry->dupcnt,
+		   __entry->alist,
+		   __entry->bufsize,
+		   __entry->count,
+		   __entry->firstu,
+		   __entry->flags,
+		   __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
+	)
+)
+
+#define DEFINE_ATTR_LIST_EVENT(name) \
+DEFINE_EVENT(xfs_attr_list_class, name, \
+	TP_PROTO(struct xfs_attr_list_context *ctx), \
+	TP_ARGS(ctx))
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
+
+DECLARE_EVENT_CLASS(xfs_perag_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+		 unsigned long caller_ip),
+	TP_ARGS(mp, agno, refcount, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(int, refcount)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->refcount = refcount;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->refcount,
+		  (char *)__entry->caller_ip)
+);
+
+#define DEFINE_PERAG_REF_EVENT(name)	\
+DEFINE_EVENT(xfs_perag_class, name,	\
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,	\
+		 unsigned long caller_ip),					\
+	TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
+DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
+
+TRACE_EVENT(xfs_attr_list_node_descend,
+	TP_PROTO(struct xfs_attr_list_context *ctx,
+		 struct xfs_da_node_entry *btree),
+	TP_ARGS(ctx, btree),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(u32, hashval)
+		__field(u32, blkno)
+		__field(u32, offset)
+		__field(void *, alist)
+		__field(int, bufsize)
+		__field(int, count)
+		__field(int, firstu)
+		__field(int, dupcnt)
+		__field(int, flags)
+		__field(u32, bt_hashval)
+		__field(u32, bt_before)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+		__entry->ino = ctx->dp->i_ino;
+		__entry->hashval = ctx->cursor->hashval;
+		__entry->blkno = ctx->cursor->blkno;
+		__entry->offset = ctx->cursor->offset;
+		__entry->alist = ctx->alist;
+		__entry->bufsize = ctx->bufsize;
+		__entry->count = ctx->count;
+		__entry->firstu = ctx->firstu;
+		__entry->flags = ctx->flags;
+		__entry->bt_hashval = be32_to_cpu(btree->hashval);
+		__entry->bt_before = be32_to_cpu(btree->before);
+	),
+	TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+		  "alist 0x%p size %u count %u firstu %u flags %d %s "
+		  "node hashval %u, node before %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		   __entry->ino,
+		   __entry->hashval,
+		   __entry->blkno,
+		   __entry->offset,
+		   __entry->dupcnt,
+		   __entry->alist,
+		   __entry->bufsize,
+		   __entry->count,
+		   __entry->firstu,
+		   __entry->flags,
+		   __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
+		   __entry->bt_hashval,
+		   __entry->bt_before)
+);
+
+TRACE_EVENT(xfs_iext_insert,
+	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
+		 struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
+	TP_ARGS(ip, idx, r, state, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_extnum_t, idx)
+		__field(xfs_fileoff_t, startoff)
+		__field(xfs_fsblock_t, startblock)
+		__field(xfs_filblks_t, blockcount)
+		__field(xfs_exntst_t, state)
+		__field(int, bmap_state)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->idx = idx;
+		__entry->startoff = r->br_startoff;
+		__entry->startblock = r->br_startblock;
+		__entry->blockcount = r->br_blockcount;
+		__entry->state = r->br_state;
+		__entry->bmap_state = state;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+		  "offset %lld block %lld count %lld flag %d caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+		  (long)__entry->idx,
+		  __entry->startoff,
+		  (__int64_t)__entry->startblock,
+		  __entry->blockcount,
+		  __entry->state,
+		  (char *)__entry->caller_ip)
+);
+
+DECLARE_EVENT_CLASS(xfs_bmap_class,
+	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
+		 unsigned long caller_ip),
+	TP_ARGS(ip, idx, state, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_extnum_t, idx)
+		__field(xfs_fileoff_t, startoff)
+		__field(xfs_fsblock_t, startblock)
+		__field(xfs_filblks_t, blockcount)
+		__field(xfs_exntst_t, state)
+		__field(int, bmap_state)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		struct xfs_ifork	*ifp = (state & BMAP_ATTRFORK) ?
+						ip->i_afp : &ip->i_df;
+		struct xfs_bmbt_irec	r;
+
+		xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->idx = idx;
+		__entry->startoff = r.br_startoff;
+		__entry->startblock = r.br_startblock;
+		__entry->blockcount = r.br_blockcount;
+		__entry->state = r.br_state;
+		__entry->bmap_state = state;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+		  "offset %lld block %lld count %lld flag %d caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+		  (long)__entry->idx,
+		  __entry->startoff,
+		  (__int64_t)__entry->startblock,
+		  __entry->blockcount,
+		  __entry->state,
+		  (char *)__entry->caller_ip)
+)
+
+#define DEFINE_BMAP_EVENT(name) \
+DEFINE_EVENT(xfs_bmap_class, name, \
+	TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
+		 unsigned long caller_ip), \
+	TP_ARGS(ip, idx, state, caller_ip))
+DEFINE_BMAP_EVENT(xfs_iext_remove);
+DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
+DEFINE_BMAP_EVENT(xfs_bmap_post_update);
+DEFINE_BMAP_EVENT(xfs_extlist);
+
+DECLARE_EVENT_CLASS(xfs_buf_class,
+	TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
+	TP_ARGS(bp, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_daddr_t, bno)
+		__field(size_t, buffer_length)
+		__field(int, hold)
+		__field(int, pincount)
+		__field(unsigned, lockval)
+		__field(unsigned, flags)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = bp->b_target->bt_dev;
+		__entry->bno = bp->b_bn;
+		__entry->buffer_length = bp->b_buffer_length;
+		__entry->hold = atomic_read(&bp->b_hold);
+		__entry->pincount = atomic_read(&bp->b_pin_count);
+		__entry->lockval = bp->b_sema.count;
+		__entry->flags = bp->b_flags;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+		  "lock %d flags %s caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->bno,
+		  __entry->buffer_length,
+		  __entry->hold,
+		  __entry->pincount,
+		  __entry->lockval,
+		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+		  (void *)__entry->caller_ip)
+)
+
+#define DEFINE_BUF_EVENT(name) \
+DEFINE_EVENT(xfs_buf_class, name, \
+	TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
+	TP_ARGS(bp, caller_ip))
+DEFINE_BUF_EVENT(xfs_buf_init);
+DEFINE_BUF_EVENT(xfs_buf_free);
+DEFINE_BUF_EVENT(xfs_buf_hold);
+DEFINE_BUF_EVENT(xfs_buf_rele);
+DEFINE_BUF_EVENT(xfs_buf_iodone);
+DEFINE_BUF_EVENT(xfs_buf_iorequest);
+DEFINE_BUF_EVENT(xfs_buf_bawrite);
+DEFINE_BUF_EVENT(xfs_buf_bdwrite);
+DEFINE_BUF_EVENT(xfs_buf_lock);
+DEFINE_BUF_EVENT(xfs_buf_lock_done);
+DEFINE_BUF_EVENT(xfs_buf_trylock);
+DEFINE_BUF_EVENT(xfs_buf_unlock);
+DEFINE_BUF_EVENT(xfs_buf_iowait);
+DEFINE_BUF_EVENT(xfs_buf_iowait_done);
+DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
+DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
+DEFINE_BUF_EVENT(xfs_buf_delwri_split);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
+DEFINE_BUF_EVENT(xfs_bdstrat_shut);
+DEFINE_BUF_EVENT(xfs_buf_item_relse);
+DEFINE_BUF_EVENT(xfs_buf_item_iodone);
+DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
+DEFINE_BUF_EVENT(xfs_buf_error_relse);
+DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
+DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
+
+/* not really buffer traces, but the buf provides useful information */
+DEFINE_BUF_EVENT(xfs_btree_corrupt);
+DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
+DEFINE_BUF_EVENT(xfs_reset_dqcounts);
+DEFINE_BUF_EVENT(xfs_inode_item_push);
+
+/* pass flags explicitly */
+DECLARE_EVENT_CLASS(xfs_buf_flags_class,
+	TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
+	TP_ARGS(bp, flags, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_daddr_t, bno)
+		__field(size_t, buffer_length)
+		__field(int, hold)
+		__field(int, pincount)
+		__field(unsigned, lockval)
+		__field(unsigned, flags)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = bp->b_target->bt_dev;
+		__entry->bno = bp->b_bn;
+		__entry->buffer_length = bp->b_buffer_length;
+		__entry->flags = flags;
+		__entry->hold = atomic_read(&bp->b_hold);
+		__entry->pincount = atomic_read(&bp->b_pin_count);
+		__entry->lockval = bp->b_sema.count;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+		  "lock %d flags %s caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->bno,
+		  __entry->buffer_length,
+		  __entry->hold,
+		  __entry->pincount,
+		  __entry->lockval,
+		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+		  (void *)__entry->caller_ip)
+)
+
+#define DEFINE_BUF_FLAGS_EVENT(name) \
+DEFINE_EVENT(xfs_buf_flags_class, name, \
+	TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
+	TP_ARGS(bp, flags, caller_ip))
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
+
+TRACE_EVENT(xfs_buf_ioerror,
+	TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
+	TP_ARGS(bp, error, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_daddr_t, bno)
+		__field(size_t, buffer_length)
+		__field(unsigned, flags)
+		__field(int, hold)
+		__field(int, pincount)
+		__field(unsigned, lockval)
+		__field(int, error)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = bp->b_target->bt_dev;
+		__entry->bno = bp->b_bn;
+		__entry->buffer_length = bp->b_buffer_length;
+		__entry->hold = atomic_read(&bp->b_hold);
+		__entry->pincount = atomic_read(&bp->b_pin_count);
+		__entry->lockval = bp->b_sema.count;
+		__entry->error = error;
+		__entry->flags = bp->b_flags;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+		  "lock %d error %d flags %s caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->bno,
+		  __entry->buffer_length,
+		  __entry->hold,
+		  __entry->pincount,
+		  __entry->lockval,
+		  __entry->error,
+		  __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+		  (void *)__entry->caller_ip)
+);
+
+DECLARE_EVENT_CLASS(xfs_buf_item_class,
+	TP_PROTO(struct xfs_buf_log_item *bip),
+	TP_ARGS(bip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_daddr_t, buf_bno)
+		__field(size_t, buf_len)
+		__field(int, buf_hold)
+		__field(int, buf_pincount)
+		__field(int, buf_lockval)
+		__field(unsigned, buf_flags)
+		__field(unsigned, bli_recur)
+		__field(int, bli_refcount)
+		__field(unsigned, bli_flags)
+		__field(void *, li_desc)
+		__field(unsigned, li_flags)
+	),
+	TP_fast_assign(
+		__entry->dev = bip->bli_buf->b_target->bt_dev;
+		__entry->bli_flags = bip->bli_flags;
+		__entry->bli_recur = bip->bli_recur;
+		__entry->bli_refcount = atomic_read(&bip->bli_refcount);
+		__entry->buf_bno = bip->bli_buf->b_bn;
+		__entry->buf_len = bip->bli_buf->b_buffer_length;
+		__entry->buf_flags = bip->bli_buf->b_flags;
+		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
+		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
+		__entry->buf_lockval = bip->bli_buf->b_sema.count;
+		__entry->li_desc = bip->bli_item.li_desc;
+		__entry->li_flags = bip->bli_item.li_flags;
+	),
+	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+		  "lock %d flags %s recur %d refcount %d bliflags %s "
+		  "lidesc 0x%p liflags %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long long)__entry->buf_bno,
+		  __entry->buf_len,
+		  __entry->buf_hold,
+		  __entry->buf_pincount,
+		  __entry->buf_lockval,
+		  __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
+		  __entry->bli_recur,
+		  __entry->bli_refcount,
+		  __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
+		  __entry->li_desc,
+		  __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
+)
+
+#define DEFINE_BUF_ITEM_EVENT(name) \
+DEFINE_EVENT(xfs_buf_item_class, name, \
+	TP_PROTO(struct xfs_buf_log_item *bip), \
+	TP_ARGS(bip))
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
+
+DECLARE_EVENT_CLASS(xfs_lock_class,
+	TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
+		 unsigned long caller_ip),
+	TP_ARGS(ip,  lock_flags, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(int, lock_flags)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->lock_flags = lock_flags;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
+		  (void *)__entry->caller_ip)
+)
+
+#define DEFINE_LOCK_EVENT(name) \
+DEFINE_EVENT(xfs_lock_class, name, \
+	TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
+		 unsigned long caller_ip), \
+	TP_ARGS(ip,  lock_flags, caller_ip))
+DEFINE_LOCK_EVENT(xfs_ilock);
+DEFINE_LOCK_EVENT(xfs_ilock_nowait);
+DEFINE_LOCK_EVENT(xfs_ilock_demote);
+DEFINE_LOCK_EVENT(xfs_iunlock);
+
+DECLARE_EVENT_CLASS(xfs_inode_class,
+	TP_PROTO(struct xfs_inode *ip),
+	TP_ARGS(ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+	),
+	TP_printk("dev %d:%d ino 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino)
+)
+
+#define DEFINE_INODE_EVENT(name) \
+DEFINE_EVENT(xfs_inode_class, name, \
+	TP_PROTO(struct xfs_inode *ip), \
+	TP_ARGS(ip))
+DEFINE_INODE_EVENT(xfs_iget_skip);
+DEFINE_INODE_EVENT(xfs_iget_reclaim);
+DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
+DEFINE_INODE_EVENT(xfs_iget_hit);
+DEFINE_INODE_EVENT(xfs_iget_miss);
+
+DEFINE_INODE_EVENT(xfs_getattr);
+DEFINE_INODE_EVENT(xfs_setattr);
+DEFINE_INODE_EVENT(xfs_readlink);
+DEFINE_INODE_EVENT(xfs_alloc_file_space);
+DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_readdir);
+#ifdef CONFIG_XFS_POSIX_ACL
+DEFINE_INODE_EVENT(xfs_get_acl);
+#endif
+DEFINE_INODE_EVENT(xfs_vm_bmap);
+DEFINE_INODE_EVENT(xfs_file_ioctl);
+DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
+DEFINE_INODE_EVENT(xfs_ioctl_setattr);
+DEFINE_INODE_EVENT(xfs_file_fsync);
+DEFINE_INODE_EVENT(xfs_destroy_inode);
+DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_evict_inode);
+
+DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
+DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+
+DECLARE_EVENT_CLASS(xfs_iref_class,
+	TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
+	TP_ARGS(ip, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(int, count)
+		__field(int, pincount)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->count = atomic_read(&VFS_I(ip)->i_count);
+		__entry->pincount = atomic_read(&ip->i_pincount);
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->count,
+		  __entry->pincount,
+		  (char *)__entry->caller_ip)
+)
+
+#define DEFINE_IREF_EVENT(name) \
+DEFINE_EVENT(xfs_iref_class, name, \
+	TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
+	TP_ARGS(ip, caller_ip))
+DEFINE_IREF_EVENT(xfs_ihold);
+DEFINE_IREF_EVENT(xfs_irele);
+DEFINE_IREF_EVENT(xfs_inode_pin);
+DEFINE_IREF_EVENT(xfs_inode_unpin);
+DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+
+DECLARE_EVENT_CLASS(xfs_namespace_class,
+	TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
+	TP_ARGS(dp, name),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, dp_ino)
+		__dynamic_array(char, name, name->len)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(dp)->i_sb->s_dev;
+		__entry->dp_ino = dp->i_ino;
+		memcpy(__get_str(name), name->name, name->len);
+	),
+	TP_printk("dev %d:%d dp ino 0x%llx name %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->dp_ino,
+		  __get_str(name))
+)
+
+#define DEFINE_NAMESPACE_EVENT(name) \
+DEFINE_EVENT(xfs_namespace_class, name, \
+	TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
+	TP_ARGS(dp, name))
+DEFINE_NAMESPACE_EVENT(xfs_remove);
+DEFINE_NAMESPACE_EVENT(xfs_link);
+DEFINE_NAMESPACE_EVENT(xfs_lookup);
+DEFINE_NAMESPACE_EVENT(xfs_create);
+DEFINE_NAMESPACE_EVENT(xfs_symlink);
+
+TRACE_EVENT(xfs_rename,
+	TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
+		 struct xfs_name *src_name, struct xfs_name *target_name),
+	TP_ARGS(src_dp, target_dp, src_name, target_name),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, src_dp_ino)
+		__field(xfs_ino_t, target_dp_ino)
+		__dynamic_array(char, src_name, src_name->len)
+		__dynamic_array(char, target_name, target_name->len)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(src_dp)->i_sb->s_dev;
+		__entry->src_dp_ino = src_dp->i_ino;
+		__entry->target_dp_ino = target_dp->i_ino;
+		memcpy(__get_str(src_name), src_name->name, src_name->len);
+		memcpy(__get_str(target_name), target_name->name, target_name->len);
+	),
+	TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
+		  " src name %s target name %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->src_dp_ino,
+		  __entry->target_dp_ino,
+		  __get_str(src_name),
+		  __get_str(target_name))
+)
+
+DECLARE_EVENT_CLASS(xfs_dquot_class,
+	TP_PROTO(struct xfs_dquot *dqp),
+	TP_ARGS(dqp),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(u32, id)
+		__field(unsigned, flags)
+		__field(unsigned, nrefs)
+		__field(unsigned long long, res_bcount)
+		__field(unsigned long long, bcount)
+		__field(unsigned long long, icount)
+		__field(unsigned long long, blk_hardlimit)
+		__field(unsigned long long, blk_softlimit)
+		__field(unsigned long long, ino_hardlimit)
+		__field(unsigned long long, ino_softlimit)
+	), \
+	TP_fast_assign(
+		__entry->dev = dqp->q_mount->m_super->s_dev;
+		__entry->id = be32_to_cpu(dqp->q_core.d_id);
+		__entry->flags = dqp->dq_flags;
+		__entry->nrefs = dqp->q_nrefs;
+		__entry->res_bcount = dqp->q_res_bcount;
+		__entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
+		__entry->icount = be64_to_cpu(dqp->q_core.d_icount);
+		__entry->blk_hardlimit =
+			be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+		__entry->blk_softlimit =
+			be64_to_cpu(dqp->q_core.d_blk_softlimit);
+		__entry->ino_hardlimit =
+			be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+		__entry->ino_softlimit =
+			be64_to_cpu(dqp->q_core.d_ino_softlimit);
+	),
+	TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
+		  "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
+		  "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->id,
+		  __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
+		  __entry->nrefs,
+		  __entry->res_bcount,
+		  __entry->bcount,
+		  __entry->blk_hardlimit,
+		  __entry->blk_softlimit,
+		  __entry->icount,
+		  __entry->ino_hardlimit,
+		  __entry->ino_softlimit)
+)
+
+#define DEFINE_DQUOT_EVENT(name) \
+DEFINE_EVENT(xfs_dquot_class, name, \
+	TP_PROTO(struct xfs_dquot *dqp), \
+	TP_ARGS(dqp))
+DEFINE_DQUOT_EVENT(xfs_dqadjust);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
+DEFINE_DQUOT_EVENT(xfs_dqattach_found);
+DEFINE_DQUOT_EVENT(xfs_dqattach_get);
+DEFINE_DQUOT_EVENT(xfs_dqinit);
+DEFINE_DQUOT_EVENT(xfs_dqreuse);
+DEFINE_DQUOT_EVENT(xfs_dqalloc);
+DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
+DEFINE_DQUOT_EVENT(xfs_dqread);
+DEFINE_DQUOT_EVENT(xfs_dqread_fail);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
+DEFINE_DQUOT_EVENT(xfs_dqget_hit);
+DEFINE_DQUOT_EVENT(xfs_dqget_miss);
+DEFINE_DQUOT_EVENT(xfs_dqput);
+DEFINE_DQUOT_EVENT(xfs_dqput_wait);
+DEFINE_DQUOT_EVENT(xfs_dqput_free);
+DEFINE_DQUOT_EVENT(xfs_dqrele);
+DEFINE_DQUOT_EVENT(xfs_dqflush);
+DEFINE_DQUOT_EVENT(xfs_dqflush_force);
+DEFINE_DQUOT_EVENT(xfs_dqflush_done);
+
+DECLARE_EVENT_CLASS(xfs_loggrant_class,
+	TP_PROTO(struct log *log, struct xlog_ticket *tic),
+	TP_ARGS(log, tic),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned, trans_type)
+		__field(char, ocnt)
+		__field(char, cnt)
+		__field(int, curr_res)
+		__field(int, unit_res)
+		__field(unsigned int, flags)
+		__field(int, reserveq)
+		__field(int, writeq)
+		__field(int, grant_reserve_cycle)
+		__field(int, grant_reserve_bytes)
+		__field(int, grant_write_cycle)
+		__field(int, grant_write_bytes)
+		__field(int, curr_cycle)
+		__field(int, curr_block)
+		__field(xfs_lsn_t, tail_lsn)
+	),
+	TP_fast_assign(
+		__entry->dev = log->l_mp->m_super->s_dev;
+		__entry->trans_type = tic->t_trans_type;
+		__entry->ocnt = tic->t_ocnt;
+		__entry->cnt = tic->t_cnt;
+		__entry->curr_res = tic->t_curr_res;
+		__entry->unit_res = tic->t_unit_res;
+		__entry->flags = tic->t_flags;
+		__entry->reserveq = list_empty(&log->l_reserveq);
+		__entry->writeq = list_empty(&log->l_writeq);
+		xlog_crack_grant_head(&log->l_grant_reserve_head,
+				&__entry->grant_reserve_cycle,
+				&__entry->grant_reserve_bytes);
+		xlog_crack_grant_head(&log->l_grant_write_head,
+				&__entry->grant_write_cycle,
+				&__entry->grant_write_bytes);
+		__entry->curr_cycle = log->l_curr_cycle;
+		__entry->curr_block = log->l_curr_block;
+		__entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
+	),
+	TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
+		  "t_unit_res %u t_flags %s reserveq %s "
+		  "writeq %s grant_reserve_cycle %d "
+		  "grant_reserve_bytes %d grant_write_cycle %d "
+		  "grant_write_bytes %d curr_cycle %d curr_block %d "
+		  "tail_cycle %d tail_block %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
+		  __entry->ocnt,
+		  __entry->cnt,
+		  __entry->curr_res,
+		  __entry->unit_res,
+		  __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
+		  __entry->reserveq ? "empty" : "active",
+		  __entry->writeq ? "empty" : "active",
+		  __entry->grant_reserve_cycle,
+		  __entry->grant_reserve_bytes,
+		  __entry->grant_write_cycle,
+		  __entry->grant_write_bytes,
+		  __entry->curr_cycle,
+		  __entry->curr_block,
+		  CYCLE_LSN(__entry->tail_lsn),
+		  BLOCK_LSN(__entry->tail_lsn)
+	)
+)
+
+#define DEFINE_LOGGRANT_EVENT(name) \
+DEFINE_EVENT(xfs_loggrant_class, name, \
+	TP_PROTO(struct log *log, struct xlog_ticket *tic), \
+	TP_ARGS(log, tic))
+DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
+DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
+DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
+DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
+
+DECLARE_EVENT_CLASS(xfs_file_class,
+	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
+	TP_ARGS(ip, count, offset, flags),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_fsize_t, size)
+		__field(xfs_fsize_t, new_size)
+		__field(loff_t, offset)
+		__field(size_t, count)
+		__field(int, flags)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->size = ip->i_d.di_size;
+		__entry->new_size = ip->i_new_size;
+		__entry->offset = offset;
+		__entry->count = count;
+		__entry->flags = flags;
+	),
+	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+		  "offset 0x%llx count 0x%zx ioflags %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->size,
+		  __entry->new_size,
+		  __entry->offset,
+		  __entry->count,
+		  __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
+)
+
+#define DEFINE_RW_EVENT(name)		\
+DEFINE_EVENT(xfs_file_class, name,	\
+	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),	\
+	TP_ARGS(ip, count, offset, flags))
+DEFINE_RW_EVENT(xfs_file_read);
+DEFINE_RW_EVENT(xfs_file_buffered_write);
+DEFINE_RW_EVENT(xfs_file_direct_write);
+DEFINE_RW_EVENT(xfs_file_splice_read);
+DEFINE_RW_EVENT(xfs_file_splice_write);
+
+DECLARE_EVENT_CLASS(xfs_page_class,
+	TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
+	TP_ARGS(inode, page, off),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(pgoff_t, pgoff)
+		__field(loff_t, size)
+		__field(unsigned long, offset)
+		__field(int, delalloc)
+		__field(int, unwritten)
+	),
+	TP_fast_assign(
+		int delalloc = -1, unwritten = -1;
+
+		if (page_has_buffers(page))
+			xfs_count_page_state(page, &delalloc, &unwritten);
+		__entry->dev = inode->i_sb->s_dev;
+		__entry->ino = XFS_I(inode)->i_ino;
+		__entry->pgoff = page_offset(page);
+		__entry->size = i_size_read(inode);
+		__entry->offset = off;
+		__entry->delalloc = delalloc;
+		__entry->unwritten = unwritten;
+	),
+	TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
+		  "delalloc %d unwritten %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->pgoff,
+		  __entry->size,
+		  __entry->offset,
+		  __entry->delalloc,
+		  __entry->unwritten)
+)
+
+#define DEFINE_PAGE_EVENT(name)		\
+DEFINE_EVENT(xfs_page_class, name,	\
+	TP_PROTO(struct inode *inode, struct page *page, unsigned long off),	\
+	TP_ARGS(inode, page, off))
+DEFINE_PAGE_EVENT(xfs_writepage);
+DEFINE_PAGE_EVENT(xfs_releasepage);
+DEFINE_PAGE_EVENT(xfs_invalidatepage);
+
+DECLARE_EVENT_CLASS(xfs_imap_class,
+	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
+		 int type, struct xfs_bmbt_irec *irec),
+	TP_ARGS(ip, offset, count, type, irec),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(loff_t, size)
+		__field(loff_t, new_size)
+		__field(loff_t, offset)
+		__field(size_t, count)
+		__field(int, type)
+		__field(xfs_fileoff_t, startoff)
+		__field(xfs_fsblock_t, startblock)
+		__field(xfs_filblks_t, blockcount)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->size = ip->i_d.di_size;
+		__entry->new_size = ip->i_new_size;
+		__entry->offset = offset;
+		__entry->count = count;
+		__entry->type = type;
+		__entry->startoff = irec ? irec->br_startoff : 0;
+		__entry->startblock = irec ? irec->br_startblock : 0;
+		__entry->blockcount = irec ? irec->br_blockcount : 0;
+	),
+	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+		  "offset 0x%llx count %zd type %s "
+		  "startoff 0x%llx startblock %lld blockcount 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->size,
+		  __entry->new_size,
+		  __entry->offset,
+		  __entry->count,
+		  __print_symbolic(__entry->type, XFS_IO_TYPES),
+		  __entry->startoff,
+		  (__int64_t)__entry->startblock,
+		  __entry->blockcount)
+)
+
+#define DEFINE_IOMAP_EVENT(name)	\
+DEFINE_EVENT(xfs_imap_class, name,	\
+	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,	\
+		 int type, struct xfs_bmbt_irec *irec),		\
+	TP_ARGS(ip, offset, count, type, irec))
+DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
+
+DECLARE_EVENT_CLASS(xfs_simple_io_class,
+	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+	TP_ARGS(ip, offset, count),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(loff_t, isize)
+		__field(loff_t, disize)
+		__field(loff_t, new_size)
+		__field(loff_t, offset)
+		__field(size_t, count)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->isize = ip->i_size;
+		__entry->disize = ip->i_d.di_size;
+		__entry->new_size = ip->i_new_size;
+		__entry->offset = offset;
+		__entry->count = count;
+	),
+	TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
+		  "offset 0x%llx count %zd",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->isize,
+		  __entry->disize,
+		  __entry->new_size,
+		  __entry->offset,
+		  __entry->count)
+);
+
+#define DEFINE_SIMPLE_IO_EVENT(name)	\
+DEFINE_EVENT(xfs_simple_io_class, name,	\
+	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),	\
+	TP_ARGS(ip, offset, count))
+DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
+DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
+DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
+DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
+
+DECLARE_EVENT_CLASS(xfs_itrunc_class,
+	TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
+	TP_ARGS(ip, new_size),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_fsize_t, size)
+		__field(xfs_fsize_t, new_size)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->size = ip->i_d.di_size;
+		__entry->new_size = new_size;
+	),
+	TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->size,
+		  __entry->new_size)
+)
+
+#define DEFINE_ITRUNC_EVENT(name) \
+DEFINE_EVENT(xfs_itrunc_class, name, \
+	TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
+	TP_ARGS(ip, new_size))
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
+
+TRACE_EVENT(xfs_pagecache_inval,
+	TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
+	TP_ARGS(ip, start, finish),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_fsize_t, size)
+		__field(xfs_off_t, start)
+		__field(xfs_off_t, finish)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->size = ip->i_d.di_size;
+		__entry->start = start;
+		__entry->finish = finish;
+	),
+	TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->size,
+		  __entry->start,
+		  __entry->finish)
+);
+
+TRACE_EVENT(xfs_bunmap,
+	TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
+		 int flags, unsigned long caller_ip),
+	TP_ARGS(ip, bno, len, flags, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(xfs_fsize_t, size)
+		__field(xfs_fileoff_t, bno)
+		__field(xfs_filblks_t, len)
+		__field(unsigned long, caller_ip)
+		__field(int, flags)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->size = ip->i_d.di_size;
+		__entry->bno = bno;
+		__entry->len = len;
+		__entry->caller_ip = caller_ip;
+		__entry->flags = flags;
+	),
+	TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
+		  "flags %s caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->size,
+		  __entry->bno,
+		  __entry->len,
+		  __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
+		  (void *)__entry->caller_ip)
+
+);
+
+DECLARE_EVENT_CLASS(xfs_busy_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t agbno, xfs_extlen_t len),
+	TP_ARGS(mp, agno, agbno, len),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len)
+);
+#define DEFINE_BUSY_EVENT(name) \
+DEFINE_EVENT(xfs_busy_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_agblock_t agbno, xfs_extlen_t len), \
+	TP_ARGS(mp, agno, agbno, len))
+DEFINE_BUSY_EVENT(xfs_alloc_busy);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
+
+TRACE_EVENT(xfs_alloc_busy_trim,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t agbno, xfs_extlen_t len,
+		 xfs_agblock_t tbno, xfs_extlen_t tlen),
+	TP_ARGS(mp, agno, agbno, len, tbno, tlen),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+		__field(xfs_agblock_t, tbno)
+		__field(xfs_extlen_t, tlen)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+		__entry->tbno = tbno;
+		__entry->tlen = tlen;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len,
+		  __entry->tbno,
+		  __entry->tlen)
+);
+
+TRACE_EVENT(xfs_trans_commit_lsn,
+	TP_PROTO(struct xfs_trans *trans),
+	TP_ARGS(trans),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(struct xfs_trans *, tp)
+		__field(xfs_lsn_t, lsn)
+	),
+	TP_fast_assign(
+		__entry->dev = trans->t_mountp->m_super->s_dev;
+		__entry->tp = trans;
+		__entry->lsn = trans->t_commit_lsn;
+	),
+	TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->tp,
+		  __entry->lsn)
+);
+
+TRACE_EVENT(xfs_agf,
+	TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
+		 unsigned long caller_ip),
+	TP_ARGS(mp, agf, flags, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(int, flags)
+		__field(__u32, length)
+		__field(__u32, bno_root)
+		__field(__u32, cnt_root)
+		__field(__u32, bno_level)
+		__field(__u32, cnt_level)
+		__field(__u32, flfirst)
+		__field(__u32, fllast)
+		__field(__u32, flcount)
+		__field(__u32, freeblks)
+		__field(__u32, longest)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = be32_to_cpu(agf->agf_seqno),
+		__entry->flags = flags;
+		__entry->length = be32_to_cpu(agf->agf_length),
+		__entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
+		__entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
+		__entry->bno_level =
+				be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
+		__entry->cnt_level =
+				be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
+		__entry->flfirst = be32_to_cpu(agf->agf_flfirst),
+		__entry->fllast = be32_to_cpu(agf->agf_fllast),
+		__entry->flcount = be32_to_cpu(agf->agf_flcount),
+		__entry->freeblks = be32_to_cpu(agf->agf_freeblks),
+		__entry->longest = be32_to_cpu(agf->agf_longest);
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
+		  "levels b %u c %u flfirst %u fllast %u flcount %u "
+		  "freeblks %u longest %u caller %pf",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
+		  __entry->length,
+		  __entry->bno_root,
+		  __entry->cnt_root,
+		  __entry->bno_level,
+		  __entry->cnt_level,
+		  __entry->flfirst,
+		  __entry->fllast,
+		  __entry->flcount,
+		  __entry->freeblks,
+		  __entry->longest,
+		  (void *)__entry->caller_ip)
+);
+
+TRACE_EVENT(xfs_free_extent,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+		 xfs_extlen_t len, bool isfl, int haveleft, int haveright),
+	TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+		__field(int, isfl)
+		__field(int, haveleft)
+		__field(int, haveright)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+		__entry->isfl = isfl;
+		__entry->haveleft = haveleft;
+		__entry->haveright = haveright;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len,
+		  __entry->isfl,
+		  __entry->haveleft ?
+			(__entry->haveright ? "both" : "left") :
+			(__entry->haveright ? "right" : "none"))
+
+);
+
+DECLARE_EVENT_CLASS(xfs_alloc_class,
+	TP_PROTO(struct xfs_alloc_arg *args),
+	TP_ARGS(args),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, minlen)
+		__field(xfs_extlen_t, maxlen)
+		__field(xfs_extlen_t, mod)
+		__field(xfs_extlen_t, prod)
+		__field(xfs_extlen_t, minleft)
+		__field(xfs_extlen_t, total)
+		__field(xfs_extlen_t, alignment)
+		__field(xfs_extlen_t, minalignslop)
+		__field(xfs_extlen_t, len)
+		__field(short, type)
+		__field(short, otype)
+		__field(char, wasdel)
+		__field(char, wasfromfl)
+		__field(char, isfl)
+		__field(char, userdata)
+		__field(xfs_fsblock_t, firstblock)
+	),
+	TP_fast_assign(
+		__entry->dev = args->mp->m_super->s_dev;
+		__entry->agno = args->agno;
+		__entry->agbno = args->agbno;
+		__entry->minlen = args->minlen;
+		__entry->maxlen = args->maxlen;
+		__entry->mod = args->mod;
+		__entry->prod = args->prod;
+		__entry->minleft = args->minleft;
+		__entry->total = args->total;
+		__entry->alignment = args->alignment;
+		__entry->minalignslop = args->minalignslop;
+		__entry->len = args->len;
+		__entry->type = args->type;
+		__entry->otype = args->otype;
+		__entry->wasdel = args->wasdel;
+		__entry->wasfromfl = args->wasfromfl;
+		__entry->isfl = args->isfl;
+		__entry->userdata = args->userdata;
+		__entry->firstblock = args->firstblock;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
+		  "prod %u minleft %u total %u alignment %u minalignslop %u "
+		  "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
+		  "userdata %d firstblock 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->minlen,
+		  __entry->maxlen,
+		  __entry->mod,
+		  __entry->prod,
+		  __entry->minleft,
+		  __entry->total,
+		  __entry->alignment,
+		  __entry->minalignslop,
+		  __entry->len,
+		  __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
+		  __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
+		  __entry->wasdel,
+		  __entry->wasfromfl,
+		  __entry->isfl,
+		  __entry->userdata,
+		  (unsigned long long)__entry->firstblock)
+)
+
+#define DEFINE_ALLOC_EVENT(name) \
+DEFINE_EVENT(xfs_alloc_class, name, \
+	TP_PROTO(struct xfs_alloc_arg *args), \
+	TP_ARGS(args))
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
+
+DECLARE_EVENT_CLASS(xfs_dir2_class,
+	TP_PROTO(struct xfs_da_args *args),
+	TP_ARGS(args),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__dynamic_array(char, name, args->namelen)
+		__field(int, namelen)
+		__field(xfs_dahash_t, hashval)
+		__field(xfs_ino_t, inumber)
+		__field(int, op_flags)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+		__entry->ino = args->dp->i_ino;
+		if (args->namelen)
+			memcpy(__get_str(name), args->name, args->namelen);
+		__entry->namelen = args->namelen;
+		__entry->hashval = args->hashval;
+		__entry->inumber = args->inumber;
+		__entry->op_flags = args->op_flags;
+	),
+	TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
+		  "inumber 0x%llx op_flags %s",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->namelen,
+		  __entry->namelen ? __get_str(name) : NULL,
+		  __entry->namelen,
+		  __entry->hashval,
+		  __entry->inumber,
+		  __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
+)
+
+#define DEFINE_DIR2_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_class, name, \
+	TP_PROTO(struct xfs_da_args *args), \
+	TP_ARGS(args))
+DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
+DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
+
+DECLARE_EVENT_CLASS(xfs_dir2_space_class,
+	TP_PROTO(struct xfs_da_args *args, int idx),
+	TP_ARGS(args, idx),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(int, op_flags)
+		__field(int, idx)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+		__entry->ino = args->dp->i_ino;
+		__entry->op_flags = args->op_flags;
+		__entry->idx = idx;
+	),
+	TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+		  __entry->idx)
+)
+
+#define DEFINE_DIR2_SPACE_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_space_class, name, \
+	TP_PROTO(struct xfs_da_args *args, int idx), \
+	TP_ARGS(args, idx))
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
+
+TRACE_EVENT(xfs_dir2_leafn_moveents,
+	TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
+	TP_ARGS(args, src_idx, dst_idx, count),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(int, op_flags)
+		__field(int, src_idx)
+		__field(int, dst_idx)
+		__field(int, count)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+		__entry->ino = args->dp->i_ino;
+		__entry->op_flags = args->op_flags;
+		__entry->src_idx = src_idx;
+		__entry->dst_idx = dst_idx;
+		__entry->count = count;
+	),
+	TP_printk("dev %d:%d ino 0x%llx op_flags %s "
+		  "src_idx %d dst_idx %d count %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+		  __entry->src_idx,
+		  __entry->dst_idx,
+		  __entry->count)
+);
+
+#define XFS_SWAPEXT_INODES \
+	{ 0,	"target" }, \
+	{ 1,	"temp" }
+
+#define XFS_INODE_FORMAT_STR \
+	{ 0,	"invalid" }, \
+	{ 1,	"local" }, \
+	{ 2,	"extent" }, \
+	{ 3,	"btree" }
+
+DECLARE_EVENT_CLASS(xfs_swap_extent_class,
+	TP_PROTO(struct xfs_inode *ip, int which),
+	TP_ARGS(ip, which),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(int, which)
+		__field(xfs_ino_t, ino)
+		__field(int, format)
+		__field(int, nex)
+		__field(int, max_nex)
+		__field(int, broot_size)
+		__field(int, fork_off)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->which = which;
+		__entry->ino = ip->i_ino;
+		__entry->format = ip->i_d.di_format;
+		__entry->nex = ip->i_d.di_nextents;
+		__entry->max_nex = ip->i_df.if_ext_max;
+		__entry->broot_size = ip->i_df.if_broot_bytes;
+		__entry->fork_off = XFS_IFORK_BOFF(ip);
+	),
+	TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
+		  "Max in-fork extents %d, broot size %d, fork offset %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
+		  __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
+		  __entry->nex,
+		  __entry->max_nex,
+		  __entry->broot_size,
+		  __entry->fork_off)
+)
+
+#define DEFINE_SWAPEXT_EVENT(name) \
+DEFINE_EVENT(xfs_swap_extent_class, name, \
+	TP_PROTO(struct xfs_inode *ip, int which), \
+	TP_ARGS(ip, which))
+
+DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
+DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
+	TP_PROTO(struct log *log, struct xlog_recover *trans,
+		struct xlog_recover_item *item, int pass),
+	TP_ARGS(log, trans, item, pass),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned long, item)
+		__field(xlog_tid_t, tid)
+		__field(int, type)
+		__field(int, pass)
+		__field(int, count)
+		__field(int, total)
+	),
+	TP_fast_assign(
+		__entry->dev = log->l_mp->m_super->s_dev;
+		__entry->item = (unsigned long)item;
+		__entry->tid = trans->r_log_tid;
+		__entry->type = ITEM_TYPE(item);
+		__entry->pass = pass;
+		__entry->count = item->ri_cnt;
+		__entry->total = item->ri_total;
+	),
+	TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
+		  "item region count/total %d/%d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->tid,
+		  __entry->pass,
+		  (void *)__entry->item,
+		  __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+		  __entry->count,
+		  __entry->total)
+)
+
+#define DEFINE_LOG_RECOVER_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_item_class, name, \
+	TP_PROTO(struct log *log, struct xlog_recover *trans, \
+		struct xlog_recover_item *item, int pass), \
+	TP_ARGS(log, trans, item, pass))
+
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
+	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+	TP_ARGS(log, buf_f),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(__int64_t, blkno)
+		__field(unsigned short, len)
+		__field(unsigned short, flags)
+		__field(unsigned short, size)
+		__field(unsigned int, map_size)
+	),
+	TP_fast_assign(
+		__entry->dev = log->l_mp->m_super->s_dev;
+		__entry->blkno = buf_f->blf_blkno;
+		__entry->len = buf_f->blf_len;
+		__entry->flags = buf_f->blf_flags;
+		__entry->size = buf_f->blf_size;
+		__entry->map_size = buf_f->blf_map_size;
+	),
+	TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
+			"map_size %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->blkno,
+		  __entry->len,
+		  __entry->flags,
+		  __entry->size,
+		  __entry->map_size)
+)
+
+#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
+	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+	TP_ARGS(log, buf_f))
+
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
+	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+	TP_ARGS(log, in_f),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(unsigned short, size)
+		__field(int, fields)
+		__field(unsigned short, asize)
+		__field(unsigned short, dsize)
+		__field(__int64_t, blkno)
+		__field(int, len)
+		__field(int, boffset)
+	),
+	TP_fast_assign(
+		__entry->dev = log->l_mp->m_super->s_dev;
+		__entry->ino = in_f->ilf_ino;
+		__entry->size = in_f->ilf_size;
+		__entry->fields = in_f->ilf_fields;
+		__entry->asize = in_f->ilf_asize;
+		__entry->dsize = in_f->ilf_dsize;
+		__entry->blkno = in_f->ilf_blkno;
+		__entry->len = in_f->ilf_len;
+		__entry->boffset = in_f->ilf_boffset;
+	),
+	TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
+			"dsize %d, blkno 0x%llx, len %d, boffset %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->size,
+		  __entry->fields,
+		  __entry->asize,
+		  __entry->dsize,
+		  __entry->blkno,
+		  __entry->len,
+		  __entry->boffset)
+)
+#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
+	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+	TP_ARGS(log, in_f))
+
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
+
+DECLARE_EVENT_CLASS(xfs_discard_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t agbno, xfs_extlen_t len),
+	TP_ARGS(mp, agno, agbno, len),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u\n",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len)
+)
+
+#define DEFINE_DISCARD_EVENT(name) \
+DEFINE_EVENT(xfs_discard_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_agblock_t agbno, xfs_extlen_t len), \
+	TP_ARGS(mp, agno, agbno, len))
+DEFINE_DISCARD_EVENT(xfs_discard_extent);
+DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
+DEFINE_DISCARD_EVENT(xfs_discard_exclude);
+DEFINE_DISCARD_EVENT(xfs_discard_busy);
+
+#endif /* _TRACE_XFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE xfs_trace
+#include <trace/define_trace.h>
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
new file mode 100644
index 0000000..4d00ee6
--- /dev/null
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -0,0 +1,890 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+
+STATIC void	xfs_trans_alloc_dqinfo(xfs_trans_t *);
+
+/*
+ * Add the locked dquot to the transaction.
+ * The dquot must be locked, and it cannot be associated with any
+ * transaction.
+ */
+void
+xfs_trans_dqjoin(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp)
+{
+	ASSERT(dqp->q_transp != tp);
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+	ASSERT(dqp->q_logitem.qli_dquot == dqp);
+
+	/*
+	 * Get a log_item_desc to point at the new item.
+	 */
+	xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
+
+	/*
+	 * Initialize d_transp so we can later determine if this dquot is
+	 * associated with this transaction.
+	 */
+	dqp->q_transp = tp;
+}
+
+
+/*
+ * This is called to mark the dquot as needing
+ * to be logged when the transaction is committed.  The dquot must
+ * already be associated with the given transaction.
+ * Note that it marks the entire transaction as dirty. In the ordinary
+ * case, this gets called via xfs_trans_commit, after the transaction
+ * is already dirty. However, there's nothing stop this from getting
+ * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
+ * flag.
+ */
+void
+xfs_trans_log_dquot(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp)
+{
+	ASSERT(dqp->q_transp == tp);
+	ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+}
+
+/*
+ * Carry forward whatever is left of the quota blk reservation to
+ * the spanky new transaction
+ */
+void
+xfs_trans_dup_dqinfo(
+	xfs_trans_t	*otp,
+	xfs_trans_t	*ntp)
+{
+	xfs_dqtrx_t	*oq, *nq;
+	int		i,j;
+	xfs_dqtrx_t	*oqa, *nqa;
+
+	if (!otp->t_dqinfo)
+		return;
+
+	xfs_trans_alloc_dqinfo(ntp);
+	oqa = otp->t_dqinfo->dqa_usrdquots;
+	nqa = ntp->t_dqinfo->dqa_usrdquots;
+
+	/*
+	 * Because the quota blk reservation is carried forward,
+	 * it is also necessary to carry forward the DQ_DIRTY flag.
+	 */
+	if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+		ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
+
+	for (j = 0; j < 2; j++) {
+		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+			if (oqa[i].qt_dquot == NULL)
+				break;
+			oq = &oqa[i];
+			nq = &nqa[i];
+
+			nq->qt_dquot = oq->qt_dquot;
+			nq->qt_bcount_delta = nq->qt_icount_delta = 0;
+			nq->qt_rtbcount_delta = 0;
+
+			/*
+			 * Transfer whatever is left of the reservations.
+			 */
+			nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
+			oq->qt_blk_res = oq->qt_blk_res_used;
+
+			nq->qt_rtblk_res = oq->qt_rtblk_res -
+				oq->qt_rtblk_res_used;
+			oq->qt_rtblk_res = oq->qt_rtblk_res_used;
+
+			nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
+			oq->qt_ino_res = oq->qt_ino_res_used;
+
+		}
+		oqa = otp->t_dqinfo->dqa_grpdquots;
+		nqa = ntp->t_dqinfo->dqa_grpdquots;
+	}
+}
+
+/*
+ * Wrap around mod_dquot to account for both user and group quotas.
+ */
+void
+xfs_trans_mod_dquot_byino(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	uint		field,
+	long		delta)
+{
+	xfs_mount_t	*mp = tp->t_mountp;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) ||
+	    !XFS_IS_QUOTA_ON(mp) ||
+	    ip->i_ino == mp->m_sb.sb_uquotino ||
+	    ip->i_ino == mp->m_sb.sb_gquotino)
+		return;
+
+	if (tp->t_dqinfo == NULL)
+		xfs_trans_alloc_dqinfo(tp);
+
+	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
+		(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
+	if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
+		(void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
+}
+
+STATIC xfs_dqtrx_t *
+xfs_trans_get_dqtrx(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp)
+{
+	int		i;
+	xfs_dqtrx_t	*qa;
+
+	qa = XFS_QM_ISUDQ(dqp) ?
+		tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
+
+	for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+		if (qa[i].qt_dquot == NULL ||
+		    qa[i].qt_dquot == dqp)
+			return &qa[i];
+	}
+
+	return NULL;
+}
+
+/*
+ * Make the changes in the transaction structure.
+ * The moral equivalent to xfs_trans_mod_sb().
+ * We don't touch any fields in the dquot, so we don't care
+ * if it's locked or not (most of the time it won't be).
+ */
+void
+xfs_trans_mod_dquot(
+	xfs_trans_t	*tp,
+	xfs_dquot_t	*dqp,
+	uint		field,
+	long		delta)
+{
+	xfs_dqtrx_t	*qtrx;
+
+	ASSERT(tp);
+	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+	qtrx = NULL;
+
+	if (tp->t_dqinfo == NULL)
+		xfs_trans_alloc_dqinfo(tp);
+	/*
+	 * Find either the first free slot or the slot that belongs
+	 * to this dquot.
+	 */
+	qtrx = xfs_trans_get_dqtrx(tp, dqp);
+	ASSERT(qtrx);
+	if (qtrx->qt_dquot == NULL)
+		qtrx->qt_dquot = dqp;
+
+	switch (field) {
+
+		/*
+		 * regular disk blk reservation
+		 */
+	      case XFS_TRANS_DQ_RES_BLKS:
+		qtrx->qt_blk_res += (ulong)delta;
+		break;
+
+		/*
+		 * inode reservation
+		 */
+	      case XFS_TRANS_DQ_RES_INOS:
+		qtrx->qt_ino_res += (ulong)delta;
+		break;
+
+		/*
+		 * disk blocks used.
+		 */
+	      case XFS_TRANS_DQ_BCOUNT:
+		if (qtrx->qt_blk_res && delta > 0) {
+			qtrx->qt_blk_res_used += (ulong)delta;
+			ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
+		}
+		qtrx->qt_bcount_delta += delta;
+		break;
+
+	      case XFS_TRANS_DQ_DELBCOUNT:
+		qtrx->qt_delbcnt_delta += delta;
+		break;
+
+		/*
+		 * Inode Count
+		 */
+	      case XFS_TRANS_DQ_ICOUNT:
+		if (qtrx->qt_ino_res && delta > 0) {
+			qtrx->qt_ino_res_used += (ulong)delta;
+			ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
+		}
+		qtrx->qt_icount_delta += delta;
+		break;
+
+		/*
+		 * rtblk reservation
+		 */
+	      case XFS_TRANS_DQ_RES_RTBLKS:
+		qtrx->qt_rtblk_res += (ulong)delta;
+		break;
+
+		/*
+		 * rtblk count
+		 */
+	      case XFS_TRANS_DQ_RTBCOUNT:
+		if (qtrx->qt_rtblk_res && delta > 0) {
+			qtrx->qt_rtblk_res_used += (ulong)delta;
+			ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
+		}
+		qtrx->qt_rtbcount_delta += delta;
+		break;
+
+	      case XFS_TRANS_DQ_DELRTBCOUNT:
+		qtrx->qt_delrtb_delta += delta;
+		break;
+
+	      default:
+		ASSERT(0);
+	}
+	tp->t_flags |= XFS_TRANS_DQ_DIRTY;
+}
+
+
+/*
+ * Given an array of dqtrx structures, lock all the dquots associated
+ * and join them to the transaction, provided they have been modified.
+ * We know that the highest number of dquots (of one type - usr OR grp),
+ * involved in a transaction is 2 and that both usr and grp combined - 3.
+ * So, we don't attempt to make this very generic.
+ */
+STATIC void
+xfs_trans_dqlockedjoin(
+	xfs_trans_t	*tp,
+	xfs_dqtrx_t	*q)
+{
+	ASSERT(q[0].qt_dquot != NULL);
+	if (q[1].qt_dquot == NULL) {
+		xfs_dqlock(q[0].qt_dquot);
+		xfs_trans_dqjoin(tp, q[0].qt_dquot);
+	} else {
+		ASSERT(XFS_QM_TRANS_MAXDQS == 2);
+		xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
+		xfs_trans_dqjoin(tp, q[0].qt_dquot);
+		xfs_trans_dqjoin(tp, q[1].qt_dquot);
+	}
+}
+
+
+/*
+ * Called by xfs_trans_commit() and similar in spirit to
+ * xfs_trans_apply_sb_deltas().
+ * Go thru all the dquots belonging to this transaction and modify the
+ * INCORE dquot to reflect the actual usages.
+ * Unreserve just the reservations done by this transaction.
+ * dquot is still left locked at exit.
+ */
+void
+xfs_trans_apply_dquot_deltas(
+	xfs_trans_t		*tp)
+{
+	int			i, j;
+	xfs_dquot_t		*dqp;
+	xfs_dqtrx_t		*qtrx, *qa;
+	xfs_disk_dquot_t	*d;
+	long			totalbdelta;
+	long			totalrtbdelta;
+
+	if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+		return;
+
+	ASSERT(tp->t_dqinfo);
+	qa = tp->t_dqinfo->dqa_usrdquots;
+	for (j = 0; j < 2; j++) {
+		if (qa[0].qt_dquot == NULL) {
+			qa = tp->t_dqinfo->dqa_grpdquots;
+			continue;
+		}
+
+		/*
+		 * Lock all of the dquots and join them to the transaction.
+		 */
+		xfs_trans_dqlockedjoin(tp, qa);
+
+		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+			qtrx = &qa[i];
+			/*
+			 * The array of dquots is filled
+			 * sequentially, not sparsely.
+			 */
+			if ((dqp = qtrx->qt_dquot) == NULL)
+				break;
+
+			ASSERT(XFS_DQ_IS_LOCKED(dqp));
+			ASSERT(dqp->q_transp == tp);
+
+			/*
+			 * adjust the actual number of blocks used
+			 */
+			d = &dqp->q_core;
+
+			/*
+			 * The issue here is - sometimes we don't make a blkquota
+			 * reservation intentionally to be fair to users
+			 * (when the amount is small). On the other hand,
+			 * delayed allocs do make reservations, but that's
+			 * outside of a transaction, so we have no
+			 * idea how much was really reserved.
+			 * So, here we've accumulated delayed allocation blks and
+			 * non-delay blks. The assumption is that the
+			 * delayed ones are always reserved (outside of a
+			 * transaction), and the others may or may not have
+			 * quota reservations.
+			 */
+			totalbdelta = qtrx->qt_bcount_delta +
+				qtrx->qt_delbcnt_delta;
+			totalrtbdelta = qtrx->qt_rtbcount_delta +
+				qtrx->qt_delrtb_delta;
+#ifdef DEBUG
+			if (totalbdelta < 0)
+				ASSERT(be64_to_cpu(d->d_bcount) >=
+				       -totalbdelta);
+
+			if (totalrtbdelta < 0)
+				ASSERT(be64_to_cpu(d->d_rtbcount) >=
+				       -totalrtbdelta);
+
+			if (qtrx->qt_icount_delta < 0)
+				ASSERT(be64_to_cpu(d->d_icount) >=
+				       -qtrx->qt_icount_delta);
+#endif
+			if (totalbdelta)
+				be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
+
+			if (qtrx->qt_icount_delta)
+				be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
+
+			if (totalrtbdelta)
+				be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
+
+			/*
+			 * Get any default limits in use.
+			 * Start/reset the timer(s) if needed.
+			 */
+			if (d->d_id) {
+				xfs_qm_adjust_dqlimits(tp->t_mountp, d);
+				xfs_qm_adjust_dqtimers(tp->t_mountp, d);
+			}
+
+			dqp->dq_flags |= XFS_DQ_DIRTY;
+			/*
+			 * add this to the list of items to get logged
+			 */
+			xfs_trans_log_dquot(tp, dqp);
+			/*
+			 * Take off what's left of the original reservation.
+			 * In case of delayed allocations, there's no
+			 * reservation that a transaction structure knows of.
+			 */
+			if (qtrx->qt_blk_res != 0) {
+				if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+					if (qtrx->qt_blk_res >
+					    qtrx->qt_blk_res_used)
+						dqp->q_res_bcount -= (xfs_qcnt_t)
+							(qtrx->qt_blk_res -
+							 qtrx->qt_blk_res_used);
+					else
+						dqp->q_res_bcount -= (xfs_qcnt_t)
+							(qtrx->qt_blk_res_used -
+							 qtrx->qt_blk_res);
+				}
+			} else {
+				/*
+				 * These blks were never reserved, either inside
+				 * a transaction or outside one (in a delayed
+				 * allocation). Also, this isn't always a
+				 * negative number since we sometimes
+				 * deliberately skip quota reservations.
+				 */
+				if (qtrx->qt_bcount_delta) {
+					dqp->q_res_bcount +=
+					      (xfs_qcnt_t)qtrx->qt_bcount_delta;
+				}
+			}
+			/*
+			 * Adjust the RT reservation.
+			 */
+			if (qtrx->qt_rtblk_res != 0) {
+				if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
+					if (qtrx->qt_rtblk_res >
+					    qtrx->qt_rtblk_res_used)
+					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
+						       (qtrx->qt_rtblk_res -
+							qtrx->qt_rtblk_res_used);
+					else
+					       dqp->q_res_rtbcount -= (xfs_qcnt_t)
+						       (qtrx->qt_rtblk_res_used -
+							qtrx->qt_rtblk_res);
+				}
+			} else {
+				if (qtrx->qt_rtbcount_delta)
+					dqp->q_res_rtbcount +=
+					    (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
+			}
+
+			/*
+			 * Adjust the inode reservation.
+			 */
+			if (qtrx->qt_ino_res != 0) {
+				ASSERT(qtrx->qt_ino_res >=
+				       qtrx->qt_ino_res_used);
+				if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
+					dqp->q_res_icount -= (xfs_qcnt_t)
+						(qtrx->qt_ino_res -
+						 qtrx->qt_ino_res_used);
+			} else {
+				if (qtrx->qt_icount_delta)
+					dqp->q_res_icount +=
+					    (xfs_qcnt_t)qtrx->qt_icount_delta;
+			}
+
+			ASSERT(dqp->q_res_bcount >=
+				be64_to_cpu(dqp->q_core.d_bcount));
+			ASSERT(dqp->q_res_icount >=
+				be64_to_cpu(dqp->q_core.d_icount));
+			ASSERT(dqp->q_res_rtbcount >=
+				be64_to_cpu(dqp->q_core.d_rtbcount));
+		}
+		/*
+		 * Do the group quotas next
+		 */
+		qa = tp->t_dqinfo->dqa_grpdquots;
+	}
+}
+
+/*
+ * Release the reservations, and adjust the dquots accordingly.
+ * This is called only when the transaction is being aborted. If by
+ * any chance we have done dquot modifications incore (ie. deltas) already,
+ * we simply throw those away, since that's the expected behavior
+ * when a transaction is curtailed without a commit.
+ */
+void
+xfs_trans_unreserve_and_mod_dquots(
+	xfs_trans_t		*tp)
+{
+	int			i, j;
+	xfs_dquot_t		*dqp;
+	xfs_dqtrx_t		*qtrx, *qa;
+	boolean_t		locked;
+
+	if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+		return;
+
+	qa = tp->t_dqinfo->dqa_usrdquots;
+
+	for (j = 0; j < 2; j++) {
+		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+			qtrx = &qa[i];
+			/*
+			 * We assume that the array of dquots is filled
+			 * sequentially, not sparsely.
+			 */
+			if ((dqp = qtrx->qt_dquot) == NULL)
+				break;
+			/*
+			 * Unreserve the original reservation. We don't care
+			 * about the number of blocks used field, or deltas.
+			 * Also we don't bother to zero the fields.
+			 */
+			locked = B_FALSE;
+			if (qtrx->qt_blk_res) {
+				xfs_dqlock(dqp);
+				locked = B_TRUE;
+				dqp->q_res_bcount -=
+					(xfs_qcnt_t)qtrx->qt_blk_res;
+			}
+			if (qtrx->qt_ino_res) {
+				if (!locked) {
+					xfs_dqlock(dqp);
+					locked = B_TRUE;
+				}
+				dqp->q_res_icount -=
+					(xfs_qcnt_t)qtrx->qt_ino_res;
+			}
+
+			if (qtrx->qt_rtblk_res) {
+				if (!locked) {
+					xfs_dqlock(dqp);
+					locked = B_TRUE;
+				}
+				dqp->q_res_rtbcount -=
+					(xfs_qcnt_t)qtrx->qt_rtblk_res;
+			}
+			if (locked)
+				xfs_dqunlock(dqp);
+
+		}
+		qa = tp->t_dqinfo->dqa_grpdquots;
+	}
+}
+
+STATIC void
+xfs_quota_warn(
+	struct xfs_mount	*mp,
+	struct xfs_dquot	*dqp,
+	int			type)
+{
+	/* no warnings for project quotas - we just return ENOSPC later */
+	if (dqp->dq_flags & XFS_DQ_PROJ)
+		return;
+	quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
+			   be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
+			   type);
+}
+
+/*
+ * This reserves disk blocks and inodes against a dquot.
+ * Flags indicate if the dquot is to be locked here and also
+ * if the blk reservation is for RT or regular blocks.
+ * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
+ */
+STATIC int
+xfs_trans_dqresv(
+	xfs_trans_t	*tp,
+	xfs_mount_t	*mp,
+	xfs_dquot_t	*dqp,
+	long		nblks,
+	long		ninos,
+	uint		flags)
+{
+	xfs_qcnt_t	hardlimit;
+	xfs_qcnt_t	softlimit;
+	time_t		timer;
+	xfs_qwarncnt_t	warns;
+	xfs_qwarncnt_t	warnlimit;
+	xfs_qcnt_t	count;
+	xfs_qcnt_t	*resbcountp;
+	xfs_quotainfo_t	*q = mp->m_quotainfo;
+
+
+	xfs_dqlock(dqp);
+
+	if (flags & XFS_TRANS_DQ_RES_BLKS) {
+		hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+		if (!hardlimit)
+			hardlimit = q->qi_bhardlimit;
+		softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
+		if (!softlimit)
+			softlimit = q->qi_bsoftlimit;
+		timer = be32_to_cpu(dqp->q_core.d_btimer);
+		warns = be16_to_cpu(dqp->q_core.d_bwarns);
+		warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
+		resbcountp = &dqp->q_res_bcount;
+	} else {
+		ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
+		hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
+		if (!hardlimit)
+			hardlimit = q->qi_rtbhardlimit;
+		softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
+		if (!softlimit)
+			softlimit = q->qi_rtbsoftlimit;
+		timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
+		warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
+		warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
+		resbcountp = &dqp->q_res_rtbcount;
+	}
+
+	if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
+	    dqp->q_core.d_id &&
+	    ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
+	     (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
+	      (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
+		if (nblks > 0) {
+			/*
+			 * dquot is locked already. See if we'd go over the
+			 * hardlimit or exceed the timelimit if we allocate
+			 * nblks.
+			 */
+			if (hardlimit > 0ULL &&
+			    hardlimit <= nblks + *resbcountp) {
+				xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
+				goto error_return;
+			}
+			if (softlimit > 0ULL &&
+			    softlimit <= nblks + *resbcountp) {
+				if ((timer != 0 && get_seconds() > timer) ||
+				    (warns != 0 && warns >= warnlimit)) {
+					xfs_quota_warn(mp, dqp,
+						       QUOTA_NL_BSOFTLONGWARN);
+					goto error_return;
+				}
+
+				xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
+			}
+		}
+		if (ninos > 0) {
+			count = be64_to_cpu(dqp->q_core.d_icount);
+			timer = be32_to_cpu(dqp->q_core.d_itimer);
+			warns = be16_to_cpu(dqp->q_core.d_iwarns);
+			warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
+			hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+			if (!hardlimit)
+				hardlimit = q->qi_ihardlimit;
+			softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
+			if (!softlimit)
+				softlimit = q->qi_isoftlimit;
+
+			if (hardlimit > 0ULL && count >= hardlimit) {
+				xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
+				goto error_return;
+			}
+			if (softlimit > 0ULL && count >= softlimit) {
+				if  ((timer != 0 && get_seconds() > timer) ||
+				     (warns != 0 && warns >= warnlimit)) {
+					xfs_quota_warn(mp, dqp,
+						       QUOTA_NL_ISOFTLONGWARN);
+					goto error_return;
+				}
+				xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
+			}
+		}
+	}
+
+	/*
+	 * Change the reservation, but not the actual usage.
+	 * Note that q_res_bcount = q_core.d_bcount + resv
+	 */
+	(*resbcountp) += (xfs_qcnt_t)nblks;
+	if (ninos != 0)
+		dqp->q_res_icount += (xfs_qcnt_t)ninos;
+
+	/*
+	 * note the reservation amt in the trans struct too,
+	 * so that the transaction knows how much was reserved by
+	 * it against this particular dquot.
+	 * We don't do this when we are reserving for a delayed allocation,
+	 * because we don't have the luxury of a transaction envelope then.
+	 */
+	if (tp) {
+		ASSERT(tp->t_dqinfo);
+		ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+		if (nblks != 0)
+			xfs_trans_mod_dquot(tp, dqp,
+					    flags & XFS_QMOPT_RESBLK_MASK,
+					    nblks);
+		if (ninos != 0)
+			xfs_trans_mod_dquot(tp, dqp,
+					    XFS_TRANS_DQ_RES_INOS,
+					    ninos);
+	}
+	ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
+	ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
+	ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
+
+	xfs_dqunlock(dqp);
+	return 0;
+
+error_return:
+	xfs_dqunlock(dqp);
+	if (flags & XFS_QMOPT_ENOSPC)
+		return ENOSPC;
+	return EDQUOT;
+}
+
+
+/*
+ * Given dquot(s), make disk block and/or inode reservations against them.
+ * The fact that this does the reservation against both the usr and
+ * grp/prj quotas is important, because this follows a both-or-nothing
+ * approach.
+ *
+ * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
+ *	   XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
+ *	   XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
+ *	   XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
+ * dquots are unlocked on return, if they were not locked by caller.
+ */
+int
+xfs_trans_reserve_quota_bydquots(
+	xfs_trans_t	*tp,
+	xfs_mount_t	*mp,
+	xfs_dquot_t	*udqp,
+	xfs_dquot_t	*gdqp,
+	long		nblks,
+	long		ninos,
+	uint		flags)
+{
+	int		resvd = 0, error;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+		return 0;
+
+	if (tp && tp->t_dqinfo == NULL)
+		xfs_trans_alloc_dqinfo(tp);
+
+	ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+
+	if (udqp) {
+		error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
+					(flags & ~XFS_QMOPT_ENOSPC));
+		if (error)
+			return error;
+		resvd = 1;
+	}
+
+	if (gdqp) {
+		error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
+		if (error) {
+			/*
+			 * can't do it, so backout previous reservation
+			 */
+			if (resvd) {
+				flags |= XFS_QMOPT_FORCE_RES;
+				xfs_trans_dqresv(tp, mp, udqp,
+						 -nblks, -ninos, flags);
+			}
+			return error;
+		}
+	}
+
+	/*
+	 * Didn't change anything critical, so, no need to log
+	 */
+	return 0;
+}
+
+
+/*
+ * Lock the dquot and change the reservation if we can.
+ * This doesn't change the actual usage, just the reservation.
+ * The inode sent in is locked.
+ */
+int
+xfs_trans_reserve_quota_nblks(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	long			nblks,
+	long			ninos,
+	uint			flags)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+
+	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+		return 0;
+	if (XFS_IS_PQUOTA_ON(mp))
+		flags |= XFS_QMOPT_ENOSPC;
+
+	ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
+	ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
+				XFS_TRANS_DQ_RES_RTBLKS ||
+	       (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
+				XFS_TRANS_DQ_RES_BLKS);
+
+	/*
+	 * Reserve nblks against these dquots, with trans as the mediator.
+	 */
+	return xfs_trans_reserve_quota_bydquots(tp, mp,
+						ip->i_udquot, ip->i_gdquot,
+						nblks, ninos, flags);
+}
+
+/*
+ * This routine is called to allocate a quotaoff log item.
+ */
+xfs_qoff_logitem_t *
+xfs_trans_get_qoff_item(
+	xfs_trans_t		*tp,
+	xfs_qoff_logitem_t	*startqoff,
+	uint			flags)
+{
+	xfs_qoff_logitem_t	*q;
+
+	ASSERT(tp != NULL);
+
+	q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
+	ASSERT(q != NULL);
+
+	/*
+	 * Get a log_item_desc to point at the new item.
+	 */
+	xfs_trans_add_item(tp, &q->qql_item);
+	return q;
+}
+
+
+/*
+ * This is called to mark the quotaoff logitem as needing
+ * to be logged when the transaction is committed.  The logitem must
+ * already be associated with the given transaction.
+ */
+void
+xfs_trans_log_quotaoff_item(
+	xfs_trans_t		*tp,
+	xfs_qoff_logitem_t	*qlp)
+{
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+}
+
+STATIC void
+xfs_trans_alloc_dqinfo(
+	xfs_trans_t	*tp)
+{
+	tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+}
+
+void
+xfs_trans_free_dqinfo(
+	xfs_trans_t	*tp)
+{
+	if (!tp->t_dqinfo)
+		return;
+	kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+	tp->t_dqinfo = NULL;
+}
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
new file mode 100644
index 0000000..7c220b4
--- /dev/null
+++ b/fs/xfs/xfs_vnode.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_VNODE_H__
+#define __XFS_VNODE_H__
+
+#include "xfs_fs.h"
+
+struct file;
+struct xfs_inode;
+struct xfs_iomap;
+struct attrlist_cursor_kern;
+
+/*
+ * Return values for xfs_inactive.  A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define	VN_INACTIVE_CACHE	0
+#define	VN_INACTIVE_NOCACHE	1
+
+/*
+ * Flags for read/write calls - same values as IRIX
+ */
+#define IO_ISDIRECT	0x00004		/* bypass page cache */
+#define IO_INVIS	0x00020		/* don't update inode timestamps */
+
+#define XFS_IO_FLAGS \
+	{ IO_ISDIRECT,	"DIRECT" }, \
+	{ IO_INVIS,	"INVIS"}
+
+/*
+ * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
+ */
+#define FI_NONE			0	/* none */
+#define FI_REMAPF		1	/* Do a remapf prior to the operation */
+#define FI_REMAPF_LOCKED	2	/* Do a remapf prior to the operation.
+					   Prevent VM access to the pages until
+					   the operation completes. */
+
+/*
+ * Some useful predicates.
+ */
+#define VN_MAPPED(vp)	mapping_mapped(vp->i_mapping)
+#define VN_CACHED(vp)	(vp->i_mapping->nrpages)
+#define VN_DIRTY(vp)	mapping_tagged(vp->i_mapping, \
+					PAGECACHE_TAG_DIRTY)
+
+
+#endif	/* __XFS_VNODE_H__ */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
new file mode 100644
index 0000000..87d3e03
--- /dev/null
+++ b/fs/xfs/xfs_xattr.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2008 Christoph Hellwig.
+ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_acl.h"
+#include "xfs_vnodeops.h"
+
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+static int
+xfs_xattr_get(struct dentry *dentry, const char *name,
+		void *value, size_t size, int xflags)
+{
+	struct xfs_inode *ip = XFS_I(dentry->d_inode);
+	int error, asize = size;
+
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+
+	/* Convert Linux syscall to XFS internal ATTR flags */
+	if (!size) {
+		xflags |= ATTR_KERNOVAL;
+		value = NULL;
+	}
+
+	error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
+	if (error)
+		return error;
+	return asize;
+}
+
+static int
+xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags, int xflags)
+{
+	struct xfs_inode *ip = XFS_I(dentry->d_inode);
+
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+
+	/* Convert Linux syscall to XFS internal ATTR flags */
+	if (flags & XATTR_CREATE)
+		xflags |= ATTR_CREATE;
+	if (flags & XATTR_REPLACE)
+		xflags |= ATTR_REPLACE;
+
+	if (!value)
+		return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
+	return -xfs_attr_set(ip, (unsigned char *)name,
+				(void *)value, size, xflags);
+}
+
+static const struct xattr_handler xfs_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.flags	= 0, /* no flags implies user namespace */
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
+};
+
+static const struct xattr_handler xfs_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.flags	= ATTR_ROOT,
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
+};
+
+static const struct xattr_handler xfs_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.flags	= ATTR_SECURE,
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
+};
+
+const struct xattr_handler *xfs_xattr_handlers[] = {
+	&xfs_xattr_user_handler,
+	&xfs_xattr_trusted_handler,
+	&xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
+	&xfs_xattr_acl_access_handler,
+	&xfs_xattr_acl_default_handler,
+#endif
+	NULL
+};
+
+static unsigned int xfs_xattr_prefix_len(int flags)
+{
+	if (flags & XFS_ATTR_SECURE)
+		return sizeof("security");
+	else if (flags & XFS_ATTR_ROOT)
+		return sizeof("trusted");
+	else
+		return sizeof("user");
+}
+
+static const char *xfs_xattr_prefix(int flags)
+{
+	if (flags & XFS_ATTR_SECURE)
+		return xfs_xattr_security_handler.prefix;
+	else if (flags & XFS_ATTR_ROOT)
+		return xfs_xattr_trusted_handler.prefix;
+	else
+		return xfs_xattr_user_handler.prefix;
+}
+
+static int
+xfs_xattr_put_listent(
+	struct xfs_attr_list_context *context,
+	int		flags,
+	unsigned char	*name,
+	int		namelen,
+	int		valuelen,
+	unsigned char	*value)
+{
+	unsigned int prefix_len = xfs_xattr_prefix_len(flags);
+	char *offset;
+	int arraytop;
+
+	ASSERT(context->count >= 0);
+
+	/*
+	 * Only show root namespace entries if we are actually allowed to
+	 * see them.
+	 */
+	if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
+		return 0;
+
+	arraytop = context->count + prefix_len + namelen + 1;
+	if (arraytop > context->firstu) {
+		context->count = -1;	/* insufficient space */
+		return 1;
+	}
+	offset = (char *)context->alist + context->count;
+	strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+	offset += prefix_len;
+	strncpy(offset, (char *)name, namelen);			/* real name */
+	offset += namelen;
+	*offset = '\0';
+	context->count += prefix_len + namelen + 1;
+	return 0;
+}
+
+static int
+xfs_xattr_put_listent_sizes(
+	struct xfs_attr_list_context *context,
+	int		flags,
+	unsigned char	*name,
+	int		namelen,
+	int		valuelen,
+	unsigned char	*value)
+{
+	context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
+	return 0;
+}
+
+static int
+list_one_attr(const char *name, const size_t len, void *data,
+		size_t size, ssize_t *result)
+{
+	char *p = data + *result;
+
+	*result += len;
+	if (!size)
+		return 0;
+	if (*result > size)
+		return -ERANGE;
+
+	strcpy(p, name);
+	return 0;
+}
+
+ssize_t
+xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+	struct xfs_attr_list_context context;
+	struct attrlist_cursor_kern cursor = { 0 };
+	struct inode		*inode = dentry->d_inode;
+	int			error;
+
+	/*
+	 * First read the regular on-disk attributes.
+	 */
+	memset(&context, 0, sizeof(context));
+	context.dp = XFS_I(inode);
+	context.cursor = &cursor;
+	context.resynch = 1;
+	context.alist = data;
+	context.bufsize = size;
+	context.firstu = context.bufsize;
+
+	if (size)
+		context.put_listent = xfs_xattr_put_listent;
+	else
+		context.put_listent = xfs_xattr_put_listent_sizes;
+
+	xfs_attr_list_int(&context);
+	if (context.count < 0)
+		return -ERANGE;
+
+	/*
+	 * Then add the two synthetic ACL attributes.
+	 */
+	if (posix_acl_access_exists(inode)) {
+		error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
+				strlen(POSIX_ACL_XATTR_ACCESS) + 1,
+				data, size, &context.count);
+		if (error)
+			return error;
+	}
+
+	if (posix_acl_default_exists(inode)) {
+		error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
+				strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
+				data, size, &context.count);
+		if (error)
+			return error;
+	}
+
+	return context.count;
+}
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 3b8e028..e8bffbe 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,6 +1,6 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
 #include <net/ip_vs.h>
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 4e4932a..362da65 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1,6 +1,6 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
 #include <net/ip_vs.h>
-- 
cgit v1.1


From 63d635b21c00069b5ade7640bcbe8ab912dc65d1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 2 Jun 2011 04:29:23 +0000
Subject: ixgbe: Fix FCOE memory leak for DDP packets

This patch is meant to fix a memory leak found via code review for FCOE.
Specifically on DDP flows the SKBs were being dropped without being
recycled, freed, or given to the stack.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ixgbe/ixgbe_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index e86297b..2279039 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1459,8 +1459,10 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		if (ixgbe_rx_is_fcoe(adapter, rx_desc)) {
 			ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb,
 						   staterr);
-			if (!ddp_bytes)
+			if (!ddp_bytes) {
+				dev_kfree_skb_any(skb);
 				goto next_desc;
+			}
 		}
 #endif /* IXGBE_FCOE */
 		ixgbe_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc);
-- 
cgit v1.1


From 1d2101a712b3b7281a19ff6d7bfc16c2ce9d3998 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Fri, 22 Jul 2011 06:21:56 +0000
Subject: e1000e: Spurious interrupts & dropped packets with 82577/8/9 in
 half-duplex

On 82577/8/9 in half-duplex when a received packet is passed from the PHY
to the MAC, if too many preamble octects are stripped from the packet
before arriving at the MAC, it can be misintrepeted as an in-band message
rather than an actual frame.  For example, if the frame contents resembled
an interrupt request in-band message, it would trigger a false interrupt.
In most cases, the packet is just dropped.

By reducing the number of preamble octets stripped from the beginning of
the frame when passing it from the PHY to the MAC, the MAC will interpret
the frame properly.

An additional uses of the magic PHY_REG(770, 16) have been updated with a
define introduced with this patch.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/ich8lan.c | 41 +++++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 4e36978..7525e37 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -163,6 +163,11 @@
 #define HV_KMRN_MODE_CTRL      PHY_REG(769, 16)
 #define HV_KMRN_MDIO_SLOW      0x0400
 
+/* KMRN FIFO Control and Status */
+#define HV_KMRN_FIFO_CTRLSTA                  PHY_REG(770, 16)
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK    0x7000
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT   12
+
 /* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */
 /* Offset 04h HSFSTS */
 union ich8_hws_flash_status {
@@ -657,6 +662,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	struct e1000_mac_info *mac = &hw->mac;
 	s32 ret_val;
 	bool link;
+	u16 phy_reg;
 
 	/*
 	 * We only want to go out to the PHY registers to see if Auto-Neg
@@ -689,16 +695,35 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 
 	mac->get_link_status = false;
 
-	if (hw->phy.type == e1000_phy_82578) {
-		ret_val = e1000_link_stall_workaround_hv(hw);
-		if (ret_val)
-			goto out;
-	}
-
-	if (hw->mac.type == e1000_pch2lan) {
+	switch (hw->mac.type) {
+	case e1000_pch2lan:
 		ret_val = e1000_k1_workaround_lv(hw);
 		if (ret_val)
 			goto out;
+		/* fall-thru */
+	case e1000_pchlan:
+		if (hw->phy.type == e1000_phy_82578) {
+			ret_val = e1000_link_stall_workaround_hv(hw);
+			if (ret_val)
+				goto out;
+		}
+
+		/*
+		 * Workaround for PCHx parts in half-duplex:
+		 * Set the number of preambles removed from the packet
+		 * when it is passed from the PHY to the MAC to prevent
+		 * the MAC from misinterpreting the packet type.
+		 */
+		e1e_rphy(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg);
+		phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK;
+
+		if ((er32(STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD)
+			phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT);
+
+		e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg);
+		break;
+	default:
+		break;
 	}
 
 	/*
@@ -1355,7 +1380,7 @@ static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw)
 			return ret_val;
 
 		/* Preamble tuning for SSC */
-		ret_val = e1e_wphy(hw, PHY_REG(770, 16), 0xA204);
+		ret_val = e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204);
 		if (ret_val)
 			return ret_val;
 	}
-- 
cgit v1.1


From 0ed013e28fe853244f4972cf18d8e2bd62eeb8fc Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Fri, 29 Jul 2011 05:52:56 +0000
Subject: e1000e: workaround for packet drop on 82579 at 100Mbps

The MAC can drop short packets when the PHY detects noise on the line at
100Mbps due to a timing issue.  Workaround the issue by increasing the PLL
counter so the PHY properly recognizes the synchronization pattern from the
MAC.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/ich8lan.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 7525e37..46a5277 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -137,8 +137,9 @@
 #define HV_PM_CTRL		PHY_REG(770, 17)
 
 /* PHY Low Power Idle Control */
-#define I82579_LPI_CTRL			PHY_REG(772, 20)
-#define I82579_LPI_CTRL_ENABLE_MASK	0x6000
+#define I82579_LPI_CTRL				PHY_REG(772, 20)
+#define I82579_LPI_CTRL_ENABLE_MASK		0x6000
+#define I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT	0x80
 
 /* EMI Registers */
 #define I82579_EMI_ADDR         0x10
@@ -1670,6 +1671,7 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
 	s32 ret_val = 0;
 	u16 status_reg = 0;
 	u32 mac_reg;
+	u16 phy_reg;
 
 	if (hw->mac.type != e1000_pch2lan)
 		goto out;
@@ -1684,12 +1686,19 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
 		mac_reg = er32(FEXTNVM4);
 		mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
 
-		if (status_reg & HV_M_STATUS_SPEED_1000)
+		ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg);
+		if (ret_val)
+			goto out;
+
+		if (status_reg & HV_M_STATUS_SPEED_1000) {
 			mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
-		else
+			phy_reg &= ~I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
+		} else {
 			mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC;
-
+			phy_reg |= I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
+		}
 		ew32(FEXTNVM4, mac_reg);
+		ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg);
 	}
 
 out:
-- 
cgit v1.1


From c6e7f51e73c1bc6044bce989ec503ef2e4758d55 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Fri, 29 Jul 2011 05:53:02 +0000
Subject: e1000e: workaround invalid Tx/Rx tail descriptor register write

When the Manageability Engine (ME) is enabled on 82579, it periodically
accesses some MAC CSR registers.  There is an arbiter in hardware which
prevents simultaneous access of these registers by the host software, i.e.
the driver.  There is a hardware bug in the aribter that signals a host
access of the registers later than it actually happens.  A write of the
Transmit or Receive Descriptor Tail register could result in an incorrect
value if the driver and ME perform simultaneous accesses which could result
in an access to an invalid memory address.  This would return an
Unsupported Request which could hang the hardware.  Workaround the issue by
checking the FWSM register bit24 which is set by ME before it accesses the
MAC CSR registers.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/e1000e/e1000.h   |  4 +++
 drivers/net/e1000e/ich8lan.c |  5 +++
 drivers/net/e1000e/netdev.c  | 80 +++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
index 35916f4..8533ad7 100644
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -155,6 +155,9 @@ struct e1000_info;
 #define HV_M_STATUS_SPEED_1000            0x0200
 #define HV_M_STATUS_LINK_UP               0x0040
 
+#define E1000_ICH_FWSM_PCIM2PCI		0x01000000 /* ME PCIm-to-PCI active */
+#define E1000_ICH_FWSM_PCIM2PCI_COUNT	2000
+
 /* Time to wait before putting the device into D3 if there's no link (in ms). */
 #define LINK_TIMEOUT		100
 
@@ -454,6 +457,7 @@ struct e1000_info {
 #define FLAG2_DISABLE_AIM                 (1 << 8)
 #define FLAG2_CHECK_PHY_HANG              (1 << 9)
 #define FLAG2_NO_DISABLE_RX               (1 << 10)
+#define FLAG2_PCIM2PCI_ARBITER_WA         (1 << 11)
 
 #define E1000_RX_DESC_PS(R, i)	    \
 	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
index 46a5277..54add27 100644
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -814,6 +814,11 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
 	    (adapter->hw.phy.type == e1000_phy_igp_3))
 		adapter->flags |= FLAG_LSC_GIG_SPEED_DROP;
 
+	/* Enable workaround for 82579 w/ ME enabled */
+	if ((adapter->hw.mac.type == e1000_pch2lan) &&
+	    (er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+		adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA;
+
 	/* Disable EEE by default until IEEE802.3az spec is finalized */
 	if (adapter->flags2 & FLAG2_HAS_EEE)
 		adapter->hw.dev_spec.ich8lan.eee_disable = true;
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 362f703..2198e61 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -519,6 +519,63 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
 }
 
 /**
+ * e1000e_update_tail_wa - helper function for e1000e_update_[rt]dt_wa()
+ * @hw: pointer to the HW structure
+ * @tail: address of tail descriptor register
+ * @i: value to write to tail descriptor register
+ *
+ * When updating the tail register, the ME could be accessing Host CSR
+ * registers at the same time.  Normally, this is handled in h/w by an
+ * arbiter but on some parts there is a bug that acknowledges Host accesses
+ * later than it should which could result in the descriptor register to
+ * have an incorrect value.  Workaround this by checking the FWSM register
+ * which has bit 24 set while ME is accessing Host CSR registers, wait
+ * if it is set and try again a number of times.
+ **/
+static inline s32 e1000e_update_tail_wa(struct e1000_hw *hw, u8 __iomem * tail,
+					unsigned int i)
+{
+	unsigned int j = 0;
+
+	while ((j++ < E1000_ICH_FWSM_PCIM2PCI_COUNT) &&
+	       (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI))
+		udelay(50);
+
+	writel(i, tail);
+
+	if ((j == E1000_ICH_FWSM_PCIM2PCI_COUNT) && (i != readl(tail)))
+		return E1000_ERR_SWFW_SYNC;
+
+	return 0;
+}
+
+static void e1000e_update_rdt_wa(struct e1000_adapter *adapter, unsigned int i)
+{
+	u8 __iomem *tail = (adapter->hw.hw_addr + adapter->rx_ring->tail);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (e1000e_update_tail_wa(hw, tail, i)) {
+		u32 rctl = er32(RCTL);
+		ew32(RCTL, rctl & ~E1000_RCTL_EN);
+		e_err("ME firmware caused invalid RDT - resetting\n");
+		schedule_work(&adapter->reset_task);
+	}
+}
+
+static void e1000e_update_tdt_wa(struct e1000_adapter *adapter, unsigned int i)
+{
+	u8 __iomem *tail = (adapter->hw.hw_addr + adapter->tx_ring->tail);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (e1000e_update_tail_wa(hw, tail, i)) {
+		u32 tctl = er32(TCTL);
+		ew32(TCTL, tctl & ~E1000_TCTL_EN);
+		e_err("ME firmware caused invalid TDT - resetting\n");
+		schedule_work(&adapter->reset_task);
+	}
+}
+
+/**
  * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
  * @adapter: address of board private structure
  **/
@@ -573,7 +630,10 @@ map_skb:
 			 * such as IA-64).
 			 */
 			wmb();
-			writel(i, adapter->hw.hw_addr + rx_ring->tail);
+			if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+				e1000e_update_rdt_wa(adapter, i);
+			else
+				writel(i, adapter->hw.hw_addr + rx_ring->tail);
 		}
 		i++;
 		if (i == rx_ring->count)
@@ -673,7 +733,11 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
 			 * such as IA-64).
 			 */
 			wmb();
-			writel(i << 1, adapter->hw.hw_addr + rx_ring->tail);
+			if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+				e1000e_update_rdt_wa(adapter, i << 1);
+			else
+				writel(i << 1,
+				       adapter->hw.hw_addr + rx_ring->tail);
 		}
 
 		i++;
@@ -756,7 +820,10 @@ check_page:
 		 * applicable for weak-ordered memory model archs,
 		 * such as IA-64). */
 		wmb();
-		writel(i, adapter->hw.hw_addr + rx_ring->tail);
+		if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+			e1000e_update_rdt_wa(adapter, i);
+		else
+			writel(i, adapter->hw.hw_addr + rx_ring->tail);
 	}
 }
 
@@ -4689,7 +4756,12 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
 	wmb();
 
 	tx_ring->next_to_use = i;
-	writel(i, adapter->hw.hw_addr + tx_ring->tail);
+
+	if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+		e1000e_update_tdt_wa(adapter, i);
+	else
+		writel(i, adapter->hw.hw_addr + tx_ring->tail);
+
 	/*
 	 * we need this if more than one processor can write to our tail
 	 * at a time, it synchronizes IO on IA64/Altix systems
-- 
cgit v1.1


From aaff12039ffd812d0c8bbff50b87b6f1f09bec3e Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 7 Aug 2011 15:20:18 +0200
Subject: firewire: core: handle ack_busy when fetching the Config ROM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some older Panasonic made camcorders (Panasonic AG-EZ30 and NV-DX110,
Grundig Scenos DLC 2000) reject requests with ack_busy_X if a request is
sent immediately after they sent a response to a prior transaction.
This causes firewire-core to fail probing of the camcorder with "giving
up on config rom for node id ...".  Consequently, programs like kino or
dvgrab are unaware of the presence of a camcorder.

Such transaction failures happen also with the ieee1394 driver stack
(of the 2.4...2.6 kernel series until 2.6.36 inclusive) but with a lower
likelihood, such that kino or dvgrab are generally able to use these
camcorders via the older driver stack.  The cause for firewire-ohci's or
firewire-core's worse behavior is not yet known.  Gap count optimization
in firewire-core is not the cause.  Perhaps the slightly higher latency
of transaction completion in the older stack plays a role.  (ieee1394:
AR-resp DMA context tasklet -> packet completion ktread -> user process;
firewire-core: tasklet -> user process.)

This change introduces retries and delays after ack_busy_X into
firewire-core's Config ROM reader, such that at least firewire-core's
probing and /dev/fw* creation are successful.  This still leaves the
problem that userland processes are facing transaction failures.
gscanbus's built-in retry routines deal with them successfully, but
neither kino's nor dvgrab's do ever succeed.

But at least DV capture with "dvgrab -noavc -card 0" works now.  Live
video preview in kino works too, but not actual capture.

One way to prevent Configuration ROM reading failures in application
programs is to modify libraw1394 to synthesize read responses by means
of firewire-core's Configuration ROM cache.  This would only leave
CMP and FCP transaction failures as a potential problem source for
applications.

Reported-and-tested-by: Thomas Seilund <tps@netmaster.dk>
Reported-and-tested-by: René Fritz <rene@colorcube.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 95a4714..9f661e0 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -455,15 +455,20 @@ static struct device_attribute fw_device_attributes[] = {
 static int read_rom(struct fw_device *device,
 		    int generation, int index, u32 *data)
 {
-	int rcode;
+	u64 offset = (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4;
+	int i, rcode;
 
 	/* device->node_id, accessed below, must not be older than generation */
 	smp_rmb();
 
-	rcode = fw_run_transaction(device->card, TCODE_READ_QUADLET_REQUEST,
-			device->node_id, generation, device->max_speed,
-			(CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4,
-			data, 4);
+	for (i = 10; i < 100; i += 10) {
+		rcode = fw_run_transaction(device->card,
+				TCODE_READ_QUADLET_REQUEST, device->node_id,
+				generation, device->max_speed, offset, data, 4);
+		if (rcode != RCODE_BUSY)
+			break;
+		msleep(i);
+	}
 	be32_to_cpus(data);
 
 	return rcode;
-- 
cgit v1.1


From 441c850857148935babe000fc2ba1455fe54a6a9 Mon Sep 17 00:00:00 2001
From: Curt Wohlgemuth <curtw@google.com>
Date: Sat, 13 Aug 2011 11:25:18 -0400
Subject: ext4: Fix ext4_should_writeback_data() for no-journal mode

ext4_should_writeback_data() had an incorrect sequence of
tests to determine if it should return 0 or 1: in
particular, even in no-journal mode, 0 was being returned
for a non-regular-file inode.

This meant that, in non-journal mode, we would use
ext4_journalled_aops for directories, symlinks, and other
non-regular files.  However, calling journalled aop
callbacks when there is no valid handle, can cause problems.

This would cause a kernel crash with Jan Kara's commit
2d859db3e4 ("ext4: fix data corruption in inodes with
journalled data"), because we now dereference 'handle' in
ext4_journalled_write_end().

I also added BUG_ONs to check for a valid handle in the
obviously journal-only aops callbacks.

I tested this running xfstests with a scratch device in
these modes:

   - no-journal
   - data=ordered
   - data=writeback
   - data=journal

All work fine; the data=journal run has many failures and a
crash in xfstests 074, but this is no different from a
vanilla kernel.

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/ext4_jbd2.h | 4 ++--
 fs/ext4/inode.c     | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index bb85757..5802fa1 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode)
 
 static inline int ext4_should_writeback_data(struct inode *inode)
 {
-	if (!S_ISREG(inode->i_mode))
-		return 0;
 	if (EXT4_JOURNAL(inode) == NULL)
 		return 1;
+	if (!S_ISREG(inode->i_mode))
+		return 0;
 	if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
 		return 0;
 	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d47264c..ad3a7ca 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -983,6 +983,8 @@ static int ext4_journalled_write_end(struct file *file,
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
 
+	BUG_ON(!ext4_handle_valid(handle));
+
 	if (copied < len) {
 		if (!PageUptodate(page))
 			copied = 0;
@@ -1699,6 +1701,8 @@ static int __ext4_journalled_writepage(struct page *page,
 		goto out;
 	}
 
+	BUG_ON(!ext4_handle_valid(handle));
+
 	ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
 				do_journal_get_write_access);
 
-- 
cgit v1.1


From 2581fdc810889fdea97689cb62481201d579c796 Mon Sep 17 00:00:00 2001
From: Jiaying Zhang <jiayingz@google.com>
Date: Sat, 13 Aug 2011 12:17:13 -0400
Subject: ext4: call ext4_ioend_wait and ext4_flush_completed_IO in
 ext4_evict_inode

Flush inode's i_completed_io_list before calling ext4_io_wait to
prevent the following deadlock scenario: A page fault happens while
some process is writing inode A. During page fault,
shrink_icache_memory is called that in turn evicts another inode
B. Inode B has some pending io_end work so it calls ext4_ioend_wait()
that waits for inode B's i_ioend_count to become zero. However, inode
B's ioend work was queued behind some of inode A's ioend work on the
same cpu's ext4-dio-unwritten workqueue. As the ext4-dio-unwritten
thread on that cpu is processing inode A's ioend work, it tries to
grab inode A's i_mutex lock. Since the i_mutex lock of inode A is
still hold before the page fault happened, we enter a deadlock.

Also moves ext4_flush_completed_IO and ext4_ioend_wait from
ext4_destroy_inode() to ext4_evict_inode(). During inode deleteion,
ext4_evict_inode() is called before ext4_destroy_inode() and in
ext4_evict_inode(), we may call ext4_truncate() without holding
i_mutex lock. As a result, there is a race between flush_completed_IO
that is called from ext4_ext_truncate() and ext4_end_io_work, which
may cause corruption on an io_end structure. This change moves
ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode()
to ext4_evict_inode() to resolve the race between ext4_truncate() and
ext4_end_io_work during inode deletion.

Signed-off-by: Jiaying Zhang <jiayingz@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/inode.c | 6 ++++++
 fs/ext4/super.c | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ad3a7ca..7dd6981 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -120,6 +120,12 @@ void ext4_evict_inode(struct inode *inode)
 	int err;
 
 	trace_ext4_evict_inode(inode);
+
+	mutex_lock(&inode->i_mutex);
+	ext4_flush_completed_IO(inode);
+	mutex_unlock(&inode->i_mutex);
+	ext4_ioend_wait(inode);
+
 	if (inode->i_nlink) {
 		/*
 		 * When journalling data dirty buffers are tracked only in the
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4687fea..44d0c8d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head)
 
 static void ext4_destroy_inode(struct inode *inode)
 {
-	ext4_ioend_wait(inode);
 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 		ext4_msg(inode->i_sb, KERN_ERR,
 			 "Inode %lu (%p): orphan list check failed!",
-- 
cgit v1.1


From 32c80b32c053dc52712dedac5e4d0aa7c93fc353 Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Sat, 13 Aug 2011 12:30:59 -0400
Subject: ext4: Resolve the hang of direct i/o read in handling
 EXT4_IO_END_UNWRITTEN.

EXT4_IO_END_UNWRITTEN flag set and the increase of i_aiodio_unwritten
should be done simultaneously since ext4_end_io_nolock always clear
the flag and decrease the counter in the same time.

We don't increase i_aiodio_unwritten when setting
EXT4_IO_END_UNWRITTEN so it will go nagative and causes some process
to wait forever.

Part of the patch came from Eric in his e-mail, but it doesn't fix the
problem met by Michael actually.

http://marc.info/?l=linux-ext4&m=131316851417460&w=2

Reported-and-Tested-by: Michael Tokarev<mjt@tls.msk.ru>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/inode.c   | 9 ++++++++-
 fs/ext4/page-io.c | 6 ++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7dd6981..762e803 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2678,8 +2678,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
 		goto out;
 	}
 
-	io_end->flag = EXT4_IO_END_UNWRITTEN;
+	/*
+	 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
+	 * but being more careful is always safe for the future change.
+	 */
 	inode = io_end->inode;
+	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+		io_end->flag |= EXT4_IO_END_UNWRITTEN;
+		atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+	}
 
 	/* Add the io_end to per-inode completed io list*/
 	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 430c401..78839af 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -334,8 +334,10 @@ submit_and_retry:
 	if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
 	    (io_end->pages[io_end->num_io_pages-1] != io_page))
 		goto submit_and_retry;
-	if (buffer_uninit(bh))
-		io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
+	if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+		io_end->flag |= EXT4_IO_END_UNWRITTEN;
+		atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+	}
 	io->io_end->size += bh->b_size;
 	io->io_next_block++;
 	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
-- 
cgit v1.1


From 9dd75f1f1a02d656a11a7b9b9e6c2759b9c1e946 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 13 Aug 2011 12:58:21 -0400
Subject: ext4: fix nomblk_io_submit option so it correctly converts uninit
 blocks

Bug discovered by Jan Kara:

Finally, commit 1449032be17abb69116dbc393f67ceb8bd034f92 returned back
the old IO submission code but apparently it forgot to return the old
handling of uninitialized buffers so we unconditionnaly call
block_write_full_page() without specifying end_io function. So AFAICS
we never convert unwritten extents to written in some cases. For
example when I mount the fs as: mount -t ext4 -o
nomblk_io_submit,dioread_nolock /dev/ubdb /mnt and do
        int fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600);
        char buf[1024];
        memset(buf, 'a', sizeof(buf));
        fallocate(fd, 0, 0, 16384);
        write(fd, buf, sizeof(buf));

I get a file full of zeros (after remounting the filesystem so that
pagecache is dropped) instead of seeing the first KB contain 'a's.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/inode.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 762e803..c4da98a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1291,7 +1291,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
 			else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
 				err = ext4_bio_write_page(&io_submit, page,
 							  len, mpd->wbc);
-			else
+			else if (buffer_uninit(page_bufs)) {
+				ext4_set_bh_endio(page_bufs, inode);
+				err = block_write_full_page_endio(page,
+					noalloc_get_block_write,
+					mpd->wbc, ext4_end_io_buffer_write);
+			} else
 				err = block_write_full_page(page,
 					noalloc_get_block_write, mpd->wbc);
 
-- 
cgit v1.1


From 4eb60d869fdad7acd098b53bfd1863c311d8933d Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 13 Aug 2011 09:02:43 -0700
Subject: Revert "iwlagn: sysfs couldn't find the priv pointer"

This reverts commit cc1a93e68f6c0d736b771f0746e8e4186f483fdc.

This fix introduced a bug: bad pointer in unload.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-pci.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c
index 69d4ec4..fb7e436 100644
--- a/drivers/net/wireless/iwlwifi/iwl-pci.c
+++ b/drivers/net/wireless/iwlwifi/iwl-pci.c
@@ -134,7 +134,6 @@ static void iwl_pci_apm_config(struct iwl_bus *bus)
 static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data)
 {
 	bus->drv_data = drv_data;
-	pci_set_drvdata(IWL_BUS_GET_PCI_DEV(bus), drv_data);
 }
 
 static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[],
@@ -455,6 +454,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		pci_write_config_word(pdev, PCI_COMMAND, pci_cmd);
 	}
 
+	pci_set_drvdata(pdev, bus);
+
 	bus->dev = &pdev->dev;
 	bus->irq = pdev->irq;
 	bus->ops = &pci_ops;
@@ -493,12 +494,11 @@ static void iwl_pci_down(struct iwl_bus *bus)
 
 static void __devexit iwl_pci_remove(struct pci_dev *pdev)
 {
-	struct iwl_priv *priv = pci_get_drvdata(pdev);
-	void *bus_specific = priv->bus->bus_specific;
+	struct iwl_bus *bus = pci_get_drvdata(pdev);
 
-	iwl_remove(priv);
+	iwl_remove(bus->drv_data);
 
-	iwl_pci_down(bus_specific);
+	iwl_pci_down(bus);
 }
 
 #ifdef CONFIG_PM
@@ -506,20 +506,20 @@ static void __devexit iwl_pci_remove(struct pci_dev *pdev)
 static int iwl_pci_suspend(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
-	struct iwl_priv *priv = pci_get_drvdata(pdev);
+	struct iwl_bus *bus = pci_get_drvdata(pdev);
 
 	/* Before you put code here, think about WoWLAN. You cannot check here
 	 * whether WoWLAN is enabled or not, and your code will run even if
 	 * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx.
 	 */
 
-	return iwl_suspend(priv);
+	return iwl_suspend(bus->drv_data);
 }
 
 static int iwl_pci_resume(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
-	struct iwl_priv *priv = pci_get_drvdata(pdev);
+	struct iwl_bus *bus = pci_get_drvdata(pdev);
 
 	/* Before you put code here, think about WoWLAN. You cannot check here
 	 * whether WoWLAN is enabled or not, and your code will run even if
@@ -532,7 +532,7 @@ static int iwl_pci_resume(struct device *device)
 	 */
 	pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00);
 
-	return iwl_resume(priv);
+	return iwl_resume(bus->drv_data);
 }
 
 static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume);
-- 
cgit v1.1


From 16a9d06c753abc44f66f88e03bbecb3f1e45d71b Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 13 Aug 2011 09:02:44 -0700
Subject: iwlagn: sysfs couldn't find the priv pointer

This bug has been introduced by:
d593411084a56124aa9d80aafa15db8463b2d8f7
Author: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date:   Mon Jul 11 10:48:51 2011 +0300

    iwlagn: simplify the bus architecture

Revert part of the buggy patch: dev_get_drvdata will now return
iwl_priv as it did before the patch.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-pci.c | 39 +++++++++++++++-------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c
index fb7e436..2fdbffa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-pci.c
+++ b/drivers/net/wireless/iwlwifi/iwl-pci.c
@@ -134,6 +134,7 @@ static void iwl_pci_apm_config(struct iwl_bus *bus)
 static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data)
 {
 	bus->drv_data = drv_data;
+	pci_set_drvdata(IWL_BUS_GET_PCI_DEV(bus), drv_data);
 }
 
 static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[],
@@ -454,8 +455,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		pci_write_config_word(pdev, PCI_COMMAND, pci_cmd);
 	}
 
-	pci_set_drvdata(pdev, bus);
-
 	bus->dev = &pdev->dev;
 	bus->irq = pdev->irq;
 	bus->ops = &pci_ops;
@@ -479,26 +478,22 @@ out_no_pci:
 	return err;
 }
 
-static void iwl_pci_down(struct iwl_bus *bus)
-{
-	struct iwl_pci_bus *pci_bus = (struct iwl_pci_bus *) bus->bus_specific;
-
-	pci_disable_msi(pci_bus->pci_dev);
-	pci_iounmap(pci_bus->pci_dev, pci_bus->hw_base);
-	pci_release_regions(pci_bus->pci_dev);
-	pci_disable_device(pci_bus->pci_dev);
-	pci_set_drvdata(pci_bus->pci_dev, NULL);
-
-	kfree(bus);
-}
-
 static void __devexit iwl_pci_remove(struct pci_dev *pdev)
 {
-	struct iwl_bus *bus = pci_get_drvdata(pdev);
+	struct iwl_priv *priv = pci_get_drvdata(pdev);
+	struct iwl_bus *bus = priv->bus;
+	struct iwl_pci_bus *pci_bus = IWL_BUS_GET_PCI_BUS(bus);
+	struct pci_dev *pci_dev = IWL_BUS_GET_PCI_DEV(bus);
 
-	iwl_remove(bus->drv_data);
+	iwl_remove(priv);
 
-	iwl_pci_down(bus);
+	pci_disable_msi(pci_dev);
+	pci_iounmap(pci_dev, pci_bus->hw_base);
+	pci_release_regions(pci_dev);
+	pci_disable_device(pci_dev);
+	pci_set_drvdata(pci_dev, NULL);
+
+	kfree(bus);
 }
 
 #ifdef CONFIG_PM
@@ -506,20 +501,20 @@ static void __devexit iwl_pci_remove(struct pci_dev *pdev)
 static int iwl_pci_suspend(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
-	struct iwl_bus *bus = pci_get_drvdata(pdev);
+	struct iwl_priv *priv = pci_get_drvdata(pdev);
 
 	/* Before you put code here, think about WoWLAN. You cannot check here
 	 * whether WoWLAN is enabled or not, and your code will run even if
 	 * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx.
 	 */
 
-	return iwl_suspend(bus->drv_data);
+	return iwl_suspend(priv);
 }
 
 static int iwl_pci_resume(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
-	struct iwl_bus *bus = pci_get_drvdata(pdev);
+	struct iwl_priv *priv = pci_get_drvdata(pdev);
 
 	/* Before you put code here, think about WoWLAN. You cannot check here
 	 * whether WoWLAN is enabled or not, and your code will run even if
@@ -532,7 +527,7 @@ static int iwl_pci_resume(struct device *device)
 	 */
 	pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00);
 
-	return iwl_resume(bus->drv_data);
+	return iwl_resume(priv);
 }
 
 static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume);
-- 
cgit v1.1


From f6957f88e59df5008f7b2169400be657f81cdb80 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 7 Aug 2011 23:15:47 +0000
Subject: vmxnet3: Don't enable vlan filters in promiscuous mode.

The vmxnet3 driver enables vlan filters if filtering is enabled for
any vlan.  In promiscuous mode the filter table is cleared to in
order to disable filtering.  However, if a vlan device is subsequently
created that vlan will be added to the filter, re-engaging it.  As a
result, not only do we not see all the vlans in promiscuous mode, we
don't even see vlans for which a filter was previously created.

CC: Scott J. Goldman <scottjg@vmware.com>
CC: Shreyas Bhatewara <sbhatewara@vmware.com>
CC: VMware PV-Drivers <pv-drivers@vmware.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Shreyas N Bhatewara <sbhatewara@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 1cbacb3..0959583 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1929,14 +1929,17 @@ static void
 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-	unsigned long flags;
 
-	VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
-	spin_lock_irqsave(&adapter->cmd_lock, flags);
-	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-			       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+	if (!(netdev->flags & IFF_PROMISC)) {
+		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+		unsigned long flags;
+
+		VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
+		spin_lock_irqsave(&adapter->cmd_lock, flags);
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+	}
 
 	set_bit(vid, adapter->active_vlans);
 }
@@ -1946,14 +1949,17 @@ static void
 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-	u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-	unsigned long flags;
 
-	VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
-	spin_lock_irqsave(&adapter->cmd_lock, flags);
-	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-			       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+	if (!(netdev->flags & IFF_PROMISC)) {
+		u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+		unsigned long flags;
+
+		VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
+		spin_lock_irqsave(&adapter->cmd_lock, flags);
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+	}
 
 	clear_bit(vid, adapter->active_vlans);
 }
-- 
cgit v1.1


From 75bc8ef528f7c4ea7e80384c5593487b6b3b535e Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Mon, 8 Aug 2011 02:34:07 +0000
Subject: usbnet/cdc_ncm: Don't use stack variables for DMA

The cdc_ncm driver still has a few places where stack variables are
passed to the cdc_ncm_do_request function.  This triggers a stack trace in
lib/dma-debug.c if the CONFIG_DEBUG_DMA_API option is set.

Adjust these calls to pass parameters that have been allocated with
kzalloc.

Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c | 47 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index a03336e..f06fb78 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -228,23 +228,40 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 	if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) {
 
 		if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) {
-			struct usb_cdc_ncm_ndp_input_size ndp_in_sz;
+			struct usb_cdc_ncm_ndp_input_size *ndp_in_sz;
+
+			ndp_in_sz = kzalloc(sizeof(*ndp_in_sz), GFP_KERNEL);
+			if (!ndp_in_sz) {
+				err = -ENOMEM;
+				goto size_err;
+			}
+
 			err = usb_control_msg(ctx->udev,
 					usb_sndctrlpipe(ctx->udev, 0),
 					USB_CDC_SET_NTB_INPUT_SIZE,
 					USB_TYPE_CLASS | USB_DIR_OUT
 					 | USB_RECIP_INTERFACE,
-					0, iface_no, &ndp_in_sz, 8, 1000);
+					0, iface_no, ndp_in_sz, 8, 1000);
+			kfree(ndp_in_sz);
 		} else {
-			__le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+			__le32 *dwNtbInMaxSize;
+			dwNtbInMaxSize = kzalloc(sizeof(*dwNtbInMaxSize),
+					GFP_KERNEL);
+			if (!dwNtbInMaxSize) {
+				err = -ENOMEM;
+				goto size_err;
+			}
+			*dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+
 			err = usb_control_msg(ctx->udev,
 					usb_sndctrlpipe(ctx->udev, 0),
 					USB_CDC_SET_NTB_INPUT_SIZE,
 					USB_TYPE_CLASS | USB_DIR_OUT
 					 | USB_RECIP_INTERFACE,
-					0, iface_no, &dwNtbInMaxSize, 4, 1000);
+					0, iface_no, dwNtbInMaxSize, 4, 1000);
+			kfree(dwNtbInMaxSize);
 		}
-
+size_err:
 		if (err < 0)
 			pr_debug("Setting NTB Input Size failed\n");
 	}
@@ -325,19 +342,29 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 
 	/* set Max Datagram Size (MTU) */
 	if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) {
-		__le16 max_datagram_size;
+		__le16 *max_datagram_size;
 		u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize);
+
+		max_datagram_size = kzalloc(sizeof(*max_datagram_size),
+				GFP_KERNEL);
+		if (!max_datagram_size) {
+			err = -ENOMEM;
+			goto max_dgram_err;
+		}
+
 		err = usb_control_msg(ctx->udev, usb_rcvctrlpipe(ctx->udev, 0),
 				USB_CDC_GET_MAX_DATAGRAM_SIZE,
 				USB_TYPE_CLASS | USB_DIR_IN
 				 | USB_RECIP_INTERFACE,
-				0, iface_no, &max_datagram_size,
+				0, iface_no, max_datagram_size,
 				2, 1000);
 		if (err < 0) {
 			pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n",
 						CDC_NCM_MIN_DATAGRAM_SIZE);
+			kfree(max_datagram_size);
 		} else {
-			ctx->max_datagram_size = le16_to_cpu(max_datagram_size);
+			ctx->max_datagram_size =
+				le16_to_cpu(*max_datagram_size);
 			/* Check Eth descriptor value */
 			if (eth_max_sz < CDC_NCM_MAX_DATAGRAM_SIZE) {
 				if (ctx->max_datagram_size > eth_max_sz)
@@ -360,8 +387,10 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
 						USB_TYPE_CLASS | USB_DIR_OUT
 						 | USB_RECIP_INTERFACE,
 						0,
-						iface_no, &max_datagram_size,
+						iface_no, max_datagram_size,
 						2, 1000);
+			kfree(max_datagram_size);
+max_dgram_err:
 			if (err < 0)
 				pr_debug("SET_MAX_DATAGRAM_SIZE failed\n");
 		}
-- 
cgit v1.1


From 951f2f960e5bbce20309de44626cf11d17847712 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Mon, 8 Aug 2011 06:28:50 +0000
Subject: drivers/net/can/sja1000/plx_pci.c: eliminate double free

In this code, the failure_cleanup label calls the function
plx_pci_del_card, which frees everything in the card->net_dev array.  dev
is placed in this array immediately after allocation, so the two subsequent
jumps to failure_cleanup should not also call free_sja1000dev, but the
second one does.

If plx_pci_check_sja1000 fails, then free_sja1000dev is also called on
dev.  Because dev is already in the card->net_dev array, this implies that
when plx_pci_del_card is later called, it may get freed again.  So that
entry is reset to NULL after the free.

Finally, if there is a problem with one channel, there will be a hole in the
array.  card->channels counts the number of channels that have succeeded,
and does not keep track of the index of the largest element in the array
that is valid.  So the loop in plx_pci_del_card is changed to go up to
PLX_PCI_MAX_CHAN, which is only 2.

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/sja1000/plx_pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c
index 231385b..c7f3d4e 100644
--- a/drivers/net/can/sja1000/plx_pci.c
+++ b/drivers/net/can/sja1000/plx_pci.c
@@ -408,7 +408,7 @@ static void plx_pci_del_card(struct pci_dev *pdev)
 	struct sja1000_priv *priv;
 	int i = 0;
 
-	for (i = 0; i < card->channels; i++) {
+	for (i = 0; i < PLX_PCI_MAX_CHAN; i++) {
 		dev = card->net_dev[i];
 		if (!dev)
 			continue;
@@ -536,7 +536,6 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev,
 			if (err) {
 				dev_err(&pdev->dev, "Registering device failed "
 					"(err=%d)\n", err);
-				free_sja1000dev(dev);
 				goto failure_cleanup;
 			}
 
@@ -549,6 +548,7 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev,
 			dev_err(&pdev->dev, "Channel #%d not detected\n",
 				i + 1);
 			free_sja1000dev(dev);
+			card->net_dev[i] = NULL;
 		}
 	}
 
-- 
cgit v1.1


From 6a27cdeddf48858089e3672f844615cbf0877ebf Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Date: Tue, 9 Aug 2011 20:15:50 +0000
Subject: net: sh_eth: Fix build by forgot including linux/interrupt.h

By a6b7a407865aab9f849dd99a71072b7cd1175116, remove interrupt.h
from netdevice.h. But this forget to revise sh_eth.

This fix the build failure.

error: expected '=', ',', ';', 'asm' or '__attribute__' before 'sh_eth_interrupt'
error: implicit declaration of function 'request_irq'
error: 'sh_eth_interrupt' undeclared (first use in this function)
error: (Each undeclared identifier is reported only once
	drivers/net/sh_eth.c:1386: error: for each function it appears in.)
error: 'IRQF_SHARED' undeclared (first use in this function)
error: implicit declaration of function 'free_irq'

Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sh_eth.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index ad35c21..190f619 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
 #include <linux/delay.h>
-- 
cgit v1.1


From 32f7fd44ce3aae9ad204fb167d793c335608568d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Aug 2011 01:19:48 +0000
Subject: gianfar: prevent buggy hw rx vlan tagging

On some buggy chips, "vlan tag present" flag is set which causes packet
loss. Fix this by checking if rx vlan accel is enabled in features.

Reported-by: Michael Guntsche <mguntsche@gmail.com>
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 2659daa..31d5c57 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -2710,8 +2710,13 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
 	/* Tell the skb what kind of packet this is */
 	skb->protocol = eth_type_trans(skb, dev);
 
-	/* Set vlan tag */
-	if (fcb->flags & RXFCB_VLN)
+	/*
+	 * There's need to check for NETIF_F_HW_VLAN_RX here.
+	 * Even if vlan rx accel is disabled, on some chips
+	 * RXFCB_VLN is pseudo randomly set.
+	 */
+	if (dev->features & NETIF_F_HW_VLAN_RX &&
+	    fcb->flags & RXFCB_VLN)
 		__vlan_hwaccel_put_tag(skb, fcb->vlctl);
 
 	/* Send the packet up the stack */
-- 
cgit v1.1


From b88cf73d9278a5838e3ac2b670ab3b4ff533ea17 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 11 Aug 2011 14:39:59 +0000
Subject: net: add missing entries to Documentation/networking/00-INDEX

A simple janitor duty patch that adds a one sentence overview to
00-INDEX for all files that lacked it.

- does not add entries for subdirectories
- does not modify existing entries.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/00-INDEX | 116 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 4edd78d..811252b 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -1,13 +1,21 @@
 00-INDEX
 	- this file
+3c359.txt
+	- information on the 3Com TokenLink Velocity XL (3c5359) driver.
 3c505.txt
 	- information on the 3Com EtherLink Plus (3c505) driver.
+3c509.txt
+	- information on the 3Com Etherlink III Series Ethernet cards.
 6pack.txt
 	- info on the 6pack protocol, an alternative to KISS for AX.25
 DLINK.txt
 	- info on the D-Link DE-600/DE-620 parallel port pocket adapters
 PLIP.txt
 	- PLIP: The Parallel Line Internet Protocol device driver
+README.ipw2100
+	- README for the Intel PRO/Wireless 2100 driver.
+README.ipw2200
+	- README for the Intel PRO/Wireless 2915ABG and 2200BG driver.
 README.sb1000
 	- info on General Instrument/NextLevel SURFboard1000 cable modem.
 alias.txt
@@ -20,8 +28,12 @@ atm.txt
 	- info on where to get ATM programs and support for Linux.
 ax25.txt
 	- info on using AX.25 and NET/ROM code for Linux
+batman-adv.txt
+	- B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames.
 baycom.txt
 	- info on the driver for Baycom style amateur radio modems
+bonding.txt
+	- Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux.
 bridge.txt
 	- where to get user space programs for ethernet bridging with Linux.
 can.txt
@@ -34,32 +46,60 @@ cxacru.txt
 	- Conexant AccessRunner USB ADSL Modem
 cxacru-cf.py
 	- Conexant AccessRunner USB ADSL Modem configuration file parser
+cxgb.txt
+	- Release Notes for the Chelsio N210 Linux device driver.
+dccp.txt
+	- the Datagram Congestion Control Protocol (DCCP) (RFC 4340..42).
 de4x5.txt
 	- the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver
 decnet.txt
 	- info on using the DECnet networking layer in Linux.
 depca.txt
 	- the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver
+dl2k.txt
+	- README for D-Link DL2000-based Gigabit Ethernet Adapters (dl2k.ko).
+dm9000.txt
+	- README for the Simtec DM9000 Network driver.
 dmfe.txt
 	- info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver.
+dns_resolver.txt
+	- The DNS resolver module allows kernel servies to make DNS queries.
+driver.txt
+	- Softnet driver issues.
 e100.txt
 	- info on Intel's EtherExpress PRO/100 line of 10/100 boards
 e1000.txt
 	- info on Intel's E1000 line of gigabit ethernet boards
+e1000e.txt
+	- README for the Intel Gigabit Ethernet Driver (e1000e).
 eql.txt
 	- serial IP load balancing
 ewrk3.txt
 	- the Digital EtherWORKS 3 DE203/4/5 Ethernet driver
+fib_trie.txt
+	- Level Compressed Trie (LC-trie) notes: a structure for routing.
 filter.txt
 	- Linux Socket Filtering
 fore200e.txt
 	- FORE Systems PCA-200E/SBA-200E ATM NIC driver info.
 framerelay.txt
 	- info on using Frame Relay/Data Link Connection Identifier (DLCI).
+gen_stats.txt
+	- Generic networking statistics for netlink users.
+generic_hdlc.txt
+	- The generic High Level Data Link Control (HDLC) layer.
 generic_netlink.txt
 	- info on Generic Netlink
+gianfar.txt
+	- Gianfar Ethernet Driver.
 ieee802154.txt
 	- Linux IEEE 802.15.4 implementation, API and drivers
+ifenslave.c
+	- Configure network interfaces for parallel routing (bonding).
+igb.txt
+	- README for the Intel Gigabit Ethernet Driver (igb).
+igbvf.txt
+	- README for the Intel Gigabit Ethernet Driver (igbvf).
 ip-sysctl.txt
 	- /proc/sys/net/ipv4/* variables
 ip_dynaddr.txt
@@ -68,41 +108,117 @@ ipddp.txt
 	- AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation
 iphase.txt
 	- Interphase PCI ATM (i)Chip IA Linux driver info.
+ipv6.txt
+	- Options to the ipv6 kernel module.
+ipvs-sysctl.txt
+	- Per-inode explanation of the /proc/sys/net/ipv4/vs interface.
 irda.txt
 	- where to get IrDA (infrared) utilities and info for Linux.
+ixgb.txt
+	- README for the Intel 10 Gigabit Ethernet Driver (ixgb).
+ixgbe.txt
+	- README for the Intel 10 Gigabit Ethernet Driver (ixgbe).
+ixgbevf.txt
+	- README for the Intel Virtual Function (VF) Driver (ixgbevf).
+l2tp.txt
+	- User guide to the L2TP tunnel protocol.
 lapb-module.txt
 	- programming information of the LAPB module.
 ltpc.txt
 	- the Apple or Farallon LocalTalk PC card driver
+mac80211-injection.txt
+	- HOWTO use packet injection with mac80211
 multicast.txt
 	- Behaviour of cards under Multicast
+multiqueue.txt
+	- HOWTO for multiqueue network device support.
+netconsole.txt
+	- The network console module netconsole.ko: configuration and notes.
+netdev-features.txt
+	- Network interface "feature mess and how to get out from it alive".
 netdevices.txt
 	- info on network device driver functions exported to the kernel.
+netif-msg.txt
+	- Design of the network interface message level setting (NETIF_MSG_*).
+nfc.txt
+	- The Linux Near Field Communication (NFS) subsystem.
 olympic.txt
 	- IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info.
+operstates.txt
+	- Overview of network interface operational states.
+packet_mmap.txt
+	- User guide to memory mapped packet socket rings (PACKET_[RT]X_RING).
+phonet.txt
+	- The Phonet packet protocol used in Nokia cellular modems.
+phy.txt
+	- The PHY abstraction layer.
+pktgen.txt
+	- User guide to the kernel packet generator (pktgen.ko).
 policy-routing.txt
 	- IP policy-based routing
+ppp_generic.txt
+	- Information about the generic PPP driver.
+proc_net_tcp.txt
+	- Per inode overview of the /proc/net/tcp and /proc/net/tcp6 interfaces.
+radiotap-headers.txt
+	- Background on radiotap headers.
 ray_cs.txt
 	- Raylink Wireless LAN card driver info.
+rds.txt
+	- Background on the reliable, ordered datagram delivery method RDS.
+regulatory.txt
+	- Overview of the Linux wireless regulatory infrastructure.
+rxrpc.txt
+	- Guide to the RxRPC protocol.
+s2io.txt
+	- Release notes for Neterion Xframe I/II 10GbE driver.
+scaling.txt
+	- Explanation of network scaling techniques: RSS, RPS, RFS, aRFS, XPS.
+sctp.txt
+	- Notes on the Linux kernel implementation of the SCTP protocol.
+secid.txt
+	- Explanation of the secid member in flow structures.
 skfp.txt
 	- SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
 smc9.txt
 	- the driver for SMC's 9000 series of Ethernet cards
 smctr.txt
 	- SMC TokenCard TokenRing Linux driver info.
+spider-net.txt
+	- README for the Spidernet Driver (as found in PS3 / Cell BE).
+stmmac.txt
+	- README for the STMicro Synopsys Ethernet driver.
+tc-actions-env-rules.txt
+	- rules for traffic control (tc) actions.
+timestamping.txt
+	- overview of network packet timestamping variants.
 tcp.txt
 	- short blurb on how TCP output takes place.
+tcp-thin.txt
+	- kernel tuning options for low rate 'thin' TCP streams.
 tlan.txt
 	- ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info.
 tms380tr.txt
 	- SysKonnect Token Ring ISA/PCI adapter driver info.
+tproxy.txt
+	- Transparent proxy support user guide.
 tuntap.txt
 	- TUN/TAP device driver, allowing user space Rx/Tx of packets.
+udplite.txt
+	- UDP-Lite protocol (RFC 3828) introduction.
 vortex.txt
 	- info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
+vxge.txt
+	- README for the Neterion X3100 PCIe Server Adapter.
 x25.txt
 	- general info on X.25 development.
 x25-iface.txt
 	- description of the X.25 Packet Layer to LAPB device interface.
+xfrm_proc.txt
+	- description of the statistics package for XFRM.
+xfrm_sync.txt
+	- sync patches for XFRM enable migration of an SA between hosts.
+xfrm_sysctl.txt
+	- description of the XFRM configuration options.
 z8530drv.txt
 	- info about Linux driver for Z8530 based HDLC cards for AX.25
-- 
cgit v1.1


From 320f24e482e6b390c608c6afec253405f9ab7436 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 11 Aug 2011 14:41:48 +0000
Subject: net: minor update to Documentation/networking/scaling.txt

Incorporate last comments about hyperthreading, interrupt coalescing and
the definition of cache domains into the network scaling document scaling.txt

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/scaling.txt | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
index 7254b4b..58fd741 100644
--- a/Documentation/networking/scaling.txt
+++ b/Documentation/networking/scaling.txt
@@ -52,7 +52,8 @@ module parameter for specifying the number of hardware queues to
 configure. In the bnx2x driver, for instance, this parameter is called
 num_queues. A typical RSS configuration would be to have one receive queue
 for each CPU if the device supports enough queues, or otherwise at least
-one for each cache domain at a particular cache level (L1, L2, etc.).
+one for each memory domain, where a memory domain is a set of CPUs that
+share a particular memory level (L1, L2, NUMA node, etc.).
 
 The indirection table of an RSS device, which resolves a queue by masked
 hash, is usually programmed by the driver at initialization. The
@@ -82,11 +83,17 @@ RSS should be enabled when latency is a concern or whenever receive
 interrupt processing forms a bottleneck. Spreading load between CPUs
 decreases queue length. For low latency networking, the optimal setting
 is to allocate as many queues as there are CPUs in the system (or the
-NIC maximum, if lower). Because the aggregate number of interrupts grows
-with each additional queue, the most efficient high-rate configuration
+NIC maximum, if lower). The most efficient high-rate configuration
 is likely the one with the smallest number of receive queues where no
-CPU that processes receive interrupts reaches 100% utilization. Per-cpu
-load can be observed using the mpstat utility.
+receive queue overflows due to a saturated CPU, because in default
+mode with interrupt coalescing enabled, the aggregate number of
+interrupts (and thus work) grows with each additional queue.
+
+Per-cpu load can be observed using the mpstat utility, but note that on
+processors with hyperthreading (HT), each hyperthread is represented as
+a separate CPU. For interrupt handling, HT has shown no benefit in
+initial tests, so limit the number of queues to the number of CPU cores
+in the system.
 
 
 RPS: Receive Packet Steering
@@ -145,7 +152,7 @@ the bitmap.
 == Suggested Configuration
 
 For a single queue device, a typical RPS configuration would be to set
-the rps_cpus to the CPUs in the same cache domain of the interrupting
+the rps_cpus to the CPUs in the same memory domain of the interrupting
 CPU. If NUMA locality is not an issue, this could also be all CPUs in
 the system. At high interrupt rate, it might be wise to exclude the
 interrupting CPU from the map since that already performs much work.
@@ -154,7 +161,7 @@ For a multi-queue system, if RSS is configured so that a hardware
 receive queue is mapped to each CPU, then RPS is probably redundant
 and unnecessary. If there are fewer hardware queues than CPUs, then
 RPS might be beneficial if the rps_cpus for each queue are the ones that
-share the same cache domain as the interrupting CPU for that queue.
+share the same memory domain as the interrupting CPU for that queue.
 
 
 RFS: Receive Flow Steering
@@ -326,7 +333,7 @@ The queue chosen for transmitting a particular flow is saved in the
 corresponding socket structure for the flow (e.g. a TCP connection).
 This transmit queue is used for subsequent packets sent on the flow to
 prevent out of order (ooo) packets. The choice also amortizes the cost
-of calling get_xps_queues() over all packets in the connection. To avoid
+of calling get_xps_queues() over all packets in the flow. To avoid
 ooo packets, the queue for a flow can subsequently only be changed if
 skb->ooo_okay is set for a packet in the flow. This flag indicates that
 there are no outstanding packets in the flow, so the transmit queue can
-- 
cgit v1.1


From 588dc91151d99e9307c2f9a8468453274fe43ecd Mon Sep 17 00:00:00 2001
From: Wang Shaoyan <wangshaoyan.pt@taobao.com>
Date: Thu, 11 Aug 2011 17:07:25 +0000
Subject: gianfar: reduce stack usage in gianfar_ethtool.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  drivers/net/gianfar_ethtool.c:765: warning: the frame size of 2048 bytes is larger than 1024 bytes

Signed-off-by: Wang Shaoyan <wangshaoyan.pt@taobao.com>
Reviewed-and-tested-by: Sebastian Pöhn <sebastian.poehn@belden.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar_ethtool.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index 6e35069..25a8c2a 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -686,10 +686,21 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
 {
 	unsigned int last_rule_idx = priv->cur_filer_idx;
 	unsigned int cmp_rqfpr;
-	unsigned int local_rqfpr[MAX_FILER_IDX + 1];
-	unsigned int local_rqfcr[MAX_FILER_IDX + 1];
+	unsigned int *local_rqfpr;
+	unsigned int *local_rqfcr;
 	int i = 0x0, k = 0x0;
 	int j = MAX_FILER_IDX, l = 0x0;
+	int ret = 1;
+
+	local_rqfpr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1),
+		GFP_KERNEL);
+	local_rqfcr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1),
+		GFP_KERNEL);
+	if (!local_rqfpr || !local_rqfcr) {
+		pr_err("Out of memory\n");
+		ret = 0;
+		goto err;
+	}
 
 	switch (class) {
 	case TCP_V4_FLOW:
@@ -706,7 +717,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
 		break;
 	default:
 		pr_err("Right now this class is not supported\n");
-		return 0;
+		ret = 0;
+		goto err;
 	}
 
 	for (i = 0; i < MAX_FILER_IDX + 1; i++) {
@@ -721,7 +733,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
 
 	if (i == MAX_FILER_IDX + 1) {
 		pr_err("No parse rule found, can't create hash rules\n");
-		return 0;
+		ret = 0;
+		goto err;
 	}
 
 	/* If a match was found, then it begins the starting of a cluster rule
@@ -765,7 +778,10 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
 		priv->cur_filer_idx = priv->cur_filer_idx - 1;
 	}
 
-	return 1;
+err:
+	kfree(local_rqfcr);
+	kfree(local_rqfpr);
+	return ret;
 }
 
 static int gfar_set_hash_opts(struct gfar_private *priv, struct ethtool_rxnfc *cmd)
-- 
cgit v1.1


From a115c72802c37351b6d87dfb62938d2ad440eef4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 4 Aug 2011 13:23:38 +0900
Subject: ASoC: Move WM8962 CLKREG_OVD earlier

When the clocking registers are not overriden some of the registers are
not writable.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Cc: stable@kernel.org
---
 sound/soc/codecs/wm8962.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 60d740e..28650ed 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -2927,10 +2927,6 @@ static int wm8962_set_bias_level(struct snd_soc_codec *codec,
 					    WM8962_BIAS_ENA | 0x180);
 
 			msleep(5);
-
-			snd_soc_update_bits(codec, WM8962_CLOCKING2,
-					    WM8962_CLKREG_OVD,
-					    WM8962_CLKREG_OVD);
 		}
 
 		/* VMID 2*250k */
@@ -3868,6 +3864,10 @@ static int wm8962_probe(struct snd_soc_codec *codec)
 	 */
 	snd_soc_update_bits(codec, WM8962_CLOCKING2, WM8962_SYSCLK_ENA, 0);
 
+	/* Ensure we have soft control over all registers */
+	snd_soc_update_bits(codec, WM8962_CLOCKING2,
+			    WM8962_CLKREG_OVD, WM8962_CLKREG_OVD);
+
 	regulator_bulk_disable(ARRAY_SIZE(wm8962->supplies), wm8962->supplies);
 
 	if (pdata) {
-- 
cgit v1.1


From fd049755636a8b2cc084e088967dd566467ccebc Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Fri, 12 Aug 2011 17:52:59 +0300
Subject: ASoC: h1940: Fix compilation error due to missing header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add linux/types.h to fix this compilation error:

In file included from arch/arm/mach-s3c2410/include/mach/gpio-fns.h:27:0,
                 from arch/arm/mach-s3c2410/include/mach/gpio.h:27,
                 from /home/anarsoul/work/pda-linux/linux-next/arch/arm/include/asm/gpio.h:5,
                 from include/linux/gpio.h:18,
                 from sound/soc/samsung/rx1950_uda1380.c:20:
arch/arm/plat-samsung/include/plat/gpio-cfg.h:29:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_pull_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:30:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_drvstr_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:57:2: error: expected specifier-qualifier-list before ‘s3c_gpio_pull_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:148:47: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:156:24: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_getpull’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:175:24: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h: In function ‘s3c_gpio_cfgrange_nopull’:
arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: ‘s3c_gpio_pull_t’ undeclared (first use in this function)
arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: note: each undeclared identifier is reported only once for each function it appears in
arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: expected ‘)’ before numeric constant
arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: too many arguments to function ‘s3c_gpio_cfgall_range’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:174:12: note: declared here
arch/arm/plat-samsung/include/plat/gpio-cfg.h: At top level:
arch/arm/plat-samsung/include/plat/gpio-cfg.h:199:26: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_get_drvstr’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:210:50: error: expected declaration specifiers or ‘...’ before ‘s5p_gpio_drvstr_t’

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/samsung/h1940_uda1380.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/samsung/h1940_uda1380.c b/sound/soc/samsung/h1940_uda1380.c
index 241f55d..c6c6589 100644
--- a/sound/soc/samsung/h1940_uda1380.c
+++ b/sound/soc/samsung/h1940_uda1380.c
@@ -13,6 +13,7 @@
  *
  */
 
+#include <linux/types.h>
 #include <linux/gpio.h>
 
 #include <sound/soc.h>
-- 
cgit v1.1


From b8487928f5ca2976e4cb8d329943af849d2b6197 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Fri, 12 Aug 2011 17:53:00 +0300
Subject: ASoC: rx1950: Fix compilation error due to missing header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add linux/types.h to fix this compilation error:

In file included from arch/arm/mach-s3c2410/include/mach/gpio-fns.h:27:0,
                 from arch/arm/mach-s3c2410/include/mach/gpio.h:27,
                 from /home/anarsoul/work/pda-linux/linux-next/arch/arm/include/asm/gpio.h:5,
                 from include/linux/gpio.h:18,
                 from sound/soc/samsung/rx1950_uda1380.c:20:
arch/arm/plat-samsung/include/plat/gpio-cfg.h:29:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_pull_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:30:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_drvstr_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:57:2: error: expected specifier-qualifier-list before ‘s3c_gpio_pull_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:148:47: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:156:24: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_getpull’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:175:24: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’
arch/arm/plat-samsung/include/plat/gpio-cfg.h: In function ‘s3c_gpio_cfgrange_nopull’:
arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: ‘s3c_gpio_pull_t’ undeclared (first use in this function)
arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: note: each undeclared identifier is reported only once for each function it appears in
arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: expected ‘)’ before numeric constant
arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: too many arguments to function ‘s3c_gpio_cfgall_range’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:174:12: note: declared here
arch/arm/plat-samsung/include/plat/gpio-cfg.h: At top level:
arch/arm/plat-samsung/include/plat/gpio-cfg.h:199:26: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_get_drvstr’
arch/arm/plat-samsung/include/plat/gpio-cfg.h:210:50: error: expected declaration specifiers or ‘...’ before ‘s5p_gpio_drvstr_t’

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/samsung/rx1950_uda1380.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/samsung/rx1950_uda1380.c b/sound/soc/samsung/rx1950_uda1380.c
index 1e574a5..bc8c167 100644
--- a/sound/soc/samsung/rx1950_uda1380.c
+++ b/sound/soc/samsung/rx1950_uda1380.c
@@ -17,6 +17,7 @@
  *
  */
 
+#include <linux/types.h>
 #include <linux/gpio.h>
 
 #include <sound/soc.h>
-- 
cgit v1.1


From da6094ea7d3c2295473d8f5134279307255d6ebf Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Sun, 14 Aug 2011 11:31:16 +0200
Subject: ALSA: snd_usb_caiaq: track submitted output urbs

The snd_usb_caiaq driver currently assumes that output urbs are serviced
in time and doesn't track when and whether they are given back by the
USB core. That usually works fine, but due to temporary limitations of
the XHCI stack, we faced that urbs were submitted more than once with
this approach.

As it's no good practice to fire and forget urbs anyway, this patch
introduces a proper bit mask to track which requests have been submitted
and given back.

That alone however doesn't make the driver work in case the host
controller is broken and doesn't give back urbs at all, and the output
stream will stop once all pre-allocated output urbs are consumed. But
it does prevent crashes of the controller stack in such cases.

See http://bugzilla.kernel.org/show_bug.cgi?id=40702 for more details.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Reported-and-tested-by: Matej Laitl <matej@laitl.cz>
Cc: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/caiaq/audio.c  | 31 +++++++++++++++++++++++++++----
 sound/usb/caiaq/device.h |  1 +
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c
index aa52b3e..2cf87f5 100644
--- a/sound/usb/caiaq/audio.c
+++ b/sound/usb/caiaq/audio.c
@@ -139,8 +139,12 @@ static void stream_stop(struct snd_usb_caiaqdev *dev)
 
 	for (i = 0; i < N_URBS; i++) {
 		usb_kill_urb(dev->data_urbs_in[i]);
-		usb_kill_urb(dev->data_urbs_out[i]);
+
+		if (test_bit(i, &dev->outurb_active_mask))
+			usb_kill_urb(dev->data_urbs_out[i]);
 	}
+
+	dev->outurb_active_mask = 0;
 }
 
 static int snd_usb_caiaq_substream_open(struct snd_pcm_substream *substream)
@@ -612,8 +616,8 @@ static void read_completed(struct urb *urb)
 {
 	struct snd_usb_caiaq_cb_info *info = urb->context;
 	struct snd_usb_caiaqdev *dev;
-	struct urb *out;
-	int frame, len, send_it = 0, outframe = 0;
+	struct urb *out = NULL;
+	int i, frame, len, send_it = 0, outframe = 0;
 	size_t offset = 0;
 
 	if (urb->status || !info)
@@ -624,7 +628,17 @@ static void read_completed(struct urb *urb)
 	if (!dev->streaming)
 		return;
 
-	out = dev->data_urbs_out[info->index];
+	/* find an unused output urb that is unused */
+	for (i = 0; i < N_URBS; i++)
+		if (test_and_set_bit(i, &dev->outurb_active_mask) == 0) {
+			out = dev->data_urbs_out[i];
+			break;
+		}
+
+	if (!out) {
+		log("Unable to find an output urb to use\n");
+		goto requeue;
+	}
 
 	/* read the recently received packet and send back one which has
 	 * the same layout */
@@ -655,8 +669,12 @@ static void read_completed(struct urb *urb)
 		out->number_of_packets = outframe;
 		out->transfer_flags = URB_ISO_ASAP;
 		usb_submit_urb(out, GFP_ATOMIC);
+	} else {
+		struct snd_usb_caiaq_cb_info *oinfo = out->context;
+		clear_bit(oinfo->index, &dev->outurb_active_mask);
 	}
 
+requeue:
 	/* re-submit inbound urb */
 	for (frame = 0; frame < FRAMES_PER_URB; frame++) {
 		urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame;
@@ -678,6 +696,8 @@ static void write_completed(struct urb *urb)
 		dev->output_running = 1;
 		wake_up(&dev->prepare_wait_queue);
 	}
+
+	clear_bit(info->index, &dev->outurb_active_mask);
 }
 
 static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret)
@@ -829,6 +849,9 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
 	if (!dev->data_cb_info)
 		return -ENOMEM;
 
+	dev->outurb_active_mask = 0;
+	BUILD_BUG_ON(N_URBS > (sizeof(dev->outurb_active_mask) * 8));
+
 	for (i = 0; i < N_URBS; i++) {
 		dev->data_cb_info[i].dev = dev;
 		dev->data_cb_info[i].index = i;
diff --git a/sound/usb/caiaq/device.h b/sound/usb/caiaq/device.h
index b2b3101..3f9c633 100644
--- a/sound/usb/caiaq/device.h
+++ b/sound/usb/caiaq/device.h
@@ -96,6 +96,7 @@ struct snd_usb_caiaqdev {
 	int input_panic, output_panic, warned;
 	char *audio_in_buf, *audio_out_buf;
 	unsigned int samplerates, bpp;
+	unsigned long outurb_active_mask;
 
 	struct snd_pcm_substream *sub_playback[MAX_STREAMS];
 	struct snd_pcm_substream *sub_capture[MAX_STREAMS];
-- 
cgit v1.1


From eade7b281c9fc18401b989c77d5e5e660b25a3b7 Mon Sep 17 00:00:00 2001
From: Daniel T Chen <crimsun@ubuntu.com>
Date: Sun, 14 Aug 2011 22:43:01 -0400
Subject: ALSA: ac97: Add HP Compaq dc5100 SFF(PT003AW) to Headphone Jack Sense
 whitelist

BugLink: https://bugs.launchpad.net/bugs/826081

The original reporter needs 'Headphone Jack Sense' enabled to have
audible audio, so add his PCI SSID to the whitelist.

Reported-and-tested-by: Muhammad Khurram Khan
Cc: <stable@kernel.org>
Signed-off-by: Daniel T Chen <crimsun@ubuntu.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ac97/ac97_patch.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
index 200c9a1..a872d0a 100644
--- a/sound/pci/ac97/ac97_patch.c
+++ b/sound/pci/ac97/ac97_patch.c
@@ -1909,6 +1909,7 @@ static unsigned int ad1981_jacks_whitelist[] = {
 	0x103c0944, /* HP nc6220 */
 	0x103c0934, /* HP nc8220 */
 	0x103c006d, /* HP nx9105 */
+	0x103c300d, /* HP Compaq dc5100 SFF(PT003AW) */
 	0x17340088, /* FSC Scenic-W */
 	0 /* end */
 };
-- 
cgit v1.1


From d2b4c7bd7eabfaa2e3e5b8107d5eeb56ac879813 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Sat, 13 Aug 2011 19:15:01 +0800
Subject: ASoC: soc-jack: Fix checking return value of request_any_context_irq

request_any_context_irq() returns a negative value on failure.
On success, it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.orG
---
 sound/soc/soc-jack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c
index 7c17b98..38b0013 100644
--- a/sound/soc/soc-jack.c
+++ b/sound/soc/soc-jack.c
@@ -327,7 +327,7 @@ int snd_soc_jack_add_gpios(struct snd_soc_jack *jack, int count,
 					      IRQF_TRIGGER_FALLING,
 					      gpios[i].name,
 					      &gpios[i]);
-		if (ret)
+		if (ret < 0)
 			goto err;
 
 		if (gpios[i].wake) {
-- 
cgit v1.1


From 161d55c3ec4c7e26c96b11dc86caea0b3c9c6b0f Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Sat, 13 Aug 2011 11:33:08 +0800
Subject: ASoC: sta32x: Fix a memory leak if snd_soc_register_codec fails

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/codecs/sta32x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c
index 409d89d..fbd7eb9 100644
--- a/sound/soc/codecs/sta32x.c
+++ b/sound/soc/codecs/sta32x.c
@@ -857,6 +857,7 @@ static __devinit int sta32x_i2c_probe(struct i2c_client *i2c,
 	ret = snd_soc_register_codec(&i2c->dev, &sta32x_codec, &sta32x_dai, 1);
 	if (ret != 0) {
 		dev_err(&i2c->dev, "Failed to register codec (%d)\n", ret);
+		kfree(sta32x);
 		return ret;
 	}
 
-- 
cgit v1.1


From bf545ed72f2eeac664695a8ea2199d9ddaef6020 Mon Sep 17 00:00:00 2001
From: Scott Jiang <scott.jiang.linux@gmail.com>
Date: Fri, 12 Aug 2011 18:04:10 -0400
Subject: ASoC: ad193x: fix registers definition

fix dac word len mask and adc tdm fmt shift value

Signed-off-by: Scott Jiang <scott.jiang.linux@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
---
 sound/soc/codecs/ad193x.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/ad193x.h b/sound/soc/codecs/ad193x.h
index 9747b54..c1029d2 100644
--- a/sound/soc/codecs/ad193x.h
+++ b/sound/soc/codecs/ad193x.h
@@ -34,7 +34,7 @@
 #define AD193X_DAC_LEFT_HIGH    (1 << 3)
 #define AD193X_DAC_BCLK_INV     (1 << 7)
 #define AD193X_DAC_CTRL2        0x804
-#define AD193X_DAC_WORD_LEN_MASK	0xC
+#define AD193X_DAC_WORD_LEN_MASK        0x18
 #define AD193X_DAC_MASTER_MUTE  1
 #define AD193X_DAC_CHNL_MUTE    0x805
 #define AD193X_DACL1_MUTE       0
@@ -63,7 +63,7 @@
 #define AD193X_ADC_CTRL1        0x80f
 #define AD193X_ADC_SERFMT_MASK		0x60
 #define AD193X_ADC_SERFMT_STEREO	(0 << 5)
-#define AD193X_ADC_SERFMT_TDM		(1 << 2)
+#define AD193X_ADC_SERFMT_TDM		(1 << 5)
 #define AD193X_ADC_SERFMT_AUX		(2 << 5)
 #define AD193X_ADC_WORD_LEN_MASK	0x3
 #define AD193X_ADC_CTRL2        0x810
-- 
cgit v1.1


From 95c93d8525ebce1024bda7316f602ae45c36cd6f Mon Sep 17 00:00:00 2001
From: Scott Jiang <scott.jiang.linux@gmail.com>
Date: Fri, 12 Aug 2011 18:04:11 -0400
Subject: ASoC: ad193x: fix dac word len setting

dac word len value should left shift before setting

Signed-off-by: Scott Jiang <scott.jiang.linux@gmail.com>
Acked-by: Barry Song <21cnbao@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
---
 sound/soc/codecs/ad193x.c | 3 ++-
 sound/soc/codecs/ad193x.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/soc/codecs/ad193x.c b/sound/soc/codecs/ad193x.c
index 2374ca5..f1a8be5 100644
--- a/sound/soc/codecs/ad193x.c
+++ b/sound/soc/codecs/ad193x.c
@@ -307,7 +307,8 @@ static int ad193x_hw_params(struct snd_pcm_substream *substream,
 	snd_soc_write(codec, AD193X_PLL_CLK_CTRL0, reg);
 
 	reg = snd_soc_read(codec, AD193X_DAC_CTRL2);
-	reg = (reg & (~AD193X_DAC_WORD_LEN_MASK)) | word_len;
+	reg = (reg & (~AD193X_DAC_WORD_LEN_MASK))
+		| (word_len << AD193X_DAC_WORD_LEN_SHFT);
 	snd_soc_write(codec, AD193X_DAC_CTRL2, reg);
 
 	reg = snd_soc_read(codec, AD193X_ADC_CTRL1);
diff --git a/sound/soc/codecs/ad193x.h b/sound/soc/codecs/ad193x.h
index c1029d2..cccc2e8 100644
--- a/sound/soc/codecs/ad193x.h
+++ b/sound/soc/codecs/ad193x.h
@@ -34,6 +34,7 @@
 #define AD193X_DAC_LEFT_HIGH    (1 << 3)
 #define AD193X_DAC_BCLK_INV     (1 << 7)
 #define AD193X_DAC_CTRL2        0x804
+#define AD193X_DAC_WORD_LEN_SHFT        3
 #define AD193X_DAC_WORD_LEN_MASK        0x18
 #define AD193X_DAC_MASTER_MUTE  1
 #define AD193X_DAC_CHNL_MUTE    0x805
-- 
cgit v1.1


From 25ea524bed0202f823a0adcbbda68e86a22e3670 Mon Sep 17 00:00:00 2001
From: Scott Jiang <scott.jiang.linux@gmail.com>
Date: Fri, 12 Aug 2011 18:04:12 -0400
Subject: ASoC: ad193x: fix system clock

system clock is 24.576MHz instead of 12.288MHz

Signed-off-by: Scott Jiang <scott.jiang.linux@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/blackfin/bf5xx-ad193x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/blackfin/bf5xx-ad193x.c b/sound/soc/blackfin/bf5xx-ad193x.c
index d6651c0..a118a0f 100644
--- a/sound/soc/blackfin/bf5xx-ad193x.c
+++ b/sound/soc/blackfin/bf5xx-ad193x.c
@@ -56,7 +56,7 @@ static int bf5xx_ad193x_hw_params(struct snd_pcm_substream *substream,
 
 	switch (params_rate(params)) {
 	case 48000:
-		clk = 12288000;
+		clk = 24576000;
 		break;
 	}
 
-- 
cgit v1.1


From 396a2e79cdbd562bf7ea48132f8d3ba8304109b2 Mon Sep 17 00:00:00 2001
From: Scott Jiang <scott.jiang.linux@gmail.com>
Date: Fri, 12 Aug 2011 18:04:13 -0400
Subject: ASoC: Add spi hw read function for 16 addr 8 data mode for ad193x fix

[This will be used by the ad193x driver to fix the fact that the
original author of the driver put a bodge for their particular chip into
a the generic ASoC register I/O abstraction layer which looked like an
obvious bug which ended up getting fixed in 3.0.  Sadly there were no
comments documenting what was going on.  A minimally invasive correction
to the driver is to remove the register cache support and go direct to
the hardware all the time so we're adding a new feature -- broonie]

Signed-off-by: Scott Jiang <scott.jiang.linux@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/soc-io.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/sound/soc/soc-io.c b/sound/soc/soc-io.c
index cca490c..a62f7dd 100644
--- a/sound/soc/soc-io.c
+++ b/sound/soc/soc-io.c
@@ -205,6 +205,25 @@ static unsigned int snd_soc_16_8_read_i2c(struct snd_soc_codec *codec,
 #define snd_soc_16_8_read_i2c NULL
 #endif
 
+#if defined(CONFIG_SPI_MASTER)
+static unsigned int snd_soc_16_8_read_spi(struct snd_soc_codec *codec,
+		                          unsigned int r)
+{
+	struct spi_device *spi = codec->control_data;
+
+	const u16 reg = cpu_to_be16(r | 0x100);
+	u8 data;
+	int ret;
+
+	ret = spi_write_then_read(spi, &reg, 2, &data, 1);
+	if (ret < 0)
+		return 0;
+	return data;
+}
+#else
+#define snd_soc_16_8_read_spi NULL
+#endif
+
 static int snd_soc_16_8_write(struct snd_soc_codec *codec, unsigned int reg,
 			      unsigned int value)
 {
@@ -295,6 +314,7 @@ static struct {
 	int (*write)(struct snd_soc_codec *codec, unsigned int, unsigned int);
 	unsigned int (*read)(struct snd_soc_codec *, unsigned int);
 	unsigned int (*i2c_read)(struct snd_soc_codec *, unsigned int);
+	unsigned int (*spi_read)(struct snd_soc_codec *, unsigned int);
 } io_types[] = {
 	{
 		.addr_bits = 4, .data_bits = 12,
@@ -318,6 +338,7 @@ static struct {
 		.addr_bits = 16, .data_bits = 8,
 		.write = snd_soc_16_8_write,
 		.i2c_read = snd_soc_16_8_read_i2c,
+		.spi_read = snd_soc_16_8_read_spi,
 	},
 	{
 		.addr_bits = 16, .data_bits = 16,
@@ -383,6 +404,8 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec,
 #ifdef CONFIG_SPI_MASTER
 		codec->hw_write = do_spi_write;
 #endif
+		if (io_types[i].spi_read)
+			codec->hw_read = io_types[i].spi_read;
 
 		codec->control_data = container_of(codec->dev,
 						   struct spi_device,
-- 
cgit v1.1


From 0cc62e926324d4f3bd02d378baafbe73164fca35 Mon Sep 17 00:00:00 2001
From: Scott Jiang <scott.jiang.linux@gmail.com>
Date: Fri, 12 Aug 2011 18:04:14 -0400
Subject: ASoC: ad193x: remove cache support

asoc cache layer can't support this kind of spi registers well.
remove cache support and read/write registers directly

Signed-off-by: Scott Jiang <scott.jiang.linux@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/codecs/ad193x.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sound/soc/codecs/ad193x.c b/sound/soc/codecs/ad193x.c
index f1a8be5..eedb6f5 100644
--- a/sound/soc/codecs/ad193x.c
+++ b/sound/soc/codecs/ad193x.c
@@ -27,11 +27,6 @@ struct ad193x_priv {
 	int sysclk;
 };
 
-/* ad193x register cache & default register settings */
-static const u8 ad193x_reg[AD193X_NUM_REGS] = {
-	0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0,
-};
-
 /*
  * AD193X volume/mute/de-emphasis etc. controls
  */
@@ -390,9 +385,6 @@ static int ad193x_probe(struct snd_soc_codec *codec)
 
 static struct snd_soc_codec_driver soc_codec_dev_ad193x = {
 	.probe = 	ad193x_probe,
-	.reg_cache_default = ad193x_reg,
-	.reg_cache_size = AD193X_NUM_REGS,
-	.reg_word_size = sizeof(u16),
 };
 
 #if defined(CONFIG_SPI_MASTER)
-- 
cgit v1.1


From 8a9af4fdf6d5eeb3200a088354d266a87e8260b0 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Tue, 9 Aug 2011 16:31:54 -0700
Subject: USB: Avoid NULL pointer deref in usb_hcd_alloc_bandwidth.

usb_ifnum_to_if() can return NULL if the USB device does not have a
configuration installed (usb_device->actconfig == NULL), or if we can't
find the interface number in the installed configuration.  Return an
error instead of crashing.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/core/hcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 8669ba3..73cbbd8 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1775,6 +1775,8 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev,
 		struct usb_interface *iface = usb_ifnum_to_if(udev,
 				cur_alt->desc.bInterfaceNumber);
 
+		if (!iface)
+			return -EINVAL;
 		if (iface->resetting_device) {
 			/*
 			 * The USB core just reset the device, so the xHCI host
-- 
cgit v1.1


From 75f25bd31d9315ab57e4fb5eba3340452febc48d Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 3 Aug 2011 13:17:01 +0800
Subject: cpupower: avoid using symlinks

Reference the source directly, don't create symlinks.

Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 tools/power/cpupower/debug/x86_64/Makefile             | 8 ++++----
 tools/power/cpupower/debug/x86_64/centrino-decode.c    | 1 -
 tools/power/cpupower/debug/x86_64/powernow-k8-decode.c | 1 -
 3 files changed, 4 insertions(+), 6 deletions(-)
 delete mode 120000 tools/power/cpupower/debug/x86_64/centrino-decode.c
 delete mode 120000 tools/power/cpupower/debug/x86_64/powernow-k8-decode.c

diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile
index dbf1399..3326217 100644
--- a/tools/power/cpupower/debug/x86_64/Makefile
+++ b/tools/power/cpupower/debug/x86_64/Makefile
@@ -1,10 +1,10 @@
 default: all
 
-centrino-decode: centrino-decode.c
-	$(CC) $(CFLAGS) -o centrino-decode centrino-decode.c
+centrino-decode: ../i386/centrino-decode.c
+	$(CC) $(CFLAGS) -o $@ $<
 
-powernow-k8-decode: powernow-k8-decode.c
-	$(CC) $(CFLAGS) -o powernow-k8-decode powernow-k8-decode.c
+powernow-k8-decode: ../i386/powernow-k8-decode.c
+	$(CC) $(CFLAGS) -o $@ $<
 
 all: centrino-decode powernow-k8-decode
 
diff --git a/tools/power/cpupower/debug/x86_64/centrino-decode.c b/tools/power/cpupower/debug/x86_64/centrino-decode.c
deleted file mode 120000
index 26fb3f1..0000000
--- a/tools/power/cpupower/debug/x86_64/centrino-decode.c
+++ /dev/null
@@ -1 +0,0 @@
-../i386/centrino-decode.c
\ No newline at end of file
diff --git a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c b/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c
deleted file mode 120000
index eb30c79..0000000
--- a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c
+++ /dev/null
@@ -1 +0,0 @@
-../i386/powernow-k8-decode.c
\ No newline at end of file
-- 
cgit v1.1


From 2dfc818b35cbea59188cc86e86e0a0efce2b0dbe Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 12 Aug 2011 01:11:35 +0200
Subject: cpupower: mperf monitor - Use TSC to calculate max frequency if
 possible

Which makes the implementation independent from cpufreq drivers.
Therefore this would also work on a Xen kernel where the hypervisor
is doing frequency switching and idle entering.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 tools/power/cpupower/Makefile                      |   2 +-
 .../cpupower/utils/idle_monitor/mperf_monitor.c    | 177 +++++++++++++++------
 2 files changed, 131 insertions(+), 48 deletions(-)

diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 94c2cf0..11521d2 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -24,7 +24,7 @@
 
 # Set the following to `true' to make a unstripped, unoptimized
 # binary. Leave this set to `false' for production use.
-DEBUG ?=	false
+DEBUG ?=	true
 
 # make the build silent. Set this to something else to make it noisy again.
 V ?=		false
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index 63ca87a..5650ab5 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -22,12 +22,15 @@
 
 #define MSR_TSC	0x10
 
+#define MSR_AMD_HWCR 0xc0010015
+
 enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
 
 static int mperf_get_count_percent(unsigned int self_id, double *percent,
 				   unsigned int cpu);
 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
 				unsigned int cpu);
+static struct timespec time_start, time_end;
 
 static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
 	{
@@ -54,19 +57,33 @@ static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
 	},
 };
 
+enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF };
+static int max_freq_mode;
+/*
+ * The max frequency mperf is ticking at (in C0), either retrieved via:
+ *   1) calculated after measurements if we know TSC ticks at mperf/P0 frequency
+ *   2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time
+ * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen)
+ */
+static unsigned long max_frequency;
+
 static unsigned long long tsc_at_measure_start;
 static unsigned long long tsc_at_measure_end;
-static unsigned long max_frequency;
 static unsigned long long *mperf_previous_count;
 static unsigned long long *aperf_previous_count;
 static unsigned long long *mperf_current_count;
 static unsigned long long *aperf_current_count;
+
 /* valid flag for all CPUs. If a MSR read failed it will be zero */
 static int *is_valid;
 
 static int mperf_get_tsc(unsigned long long *tsc)
 {
-	return read_msr(0, MSR_TSC, tsc);
+	int ret;
+	ret = read_msr(0, MSR_TSC, tsc);
+	if (ret)
+		dprint("Reading TSC MSR failed, returning %llu\n", *tsc);
+	return ret;
 }
 
 static int mperf_init_stats(unsigned int cpu)
@@ -97,36 +114,11 @@ static int mperf_measure_stats(unsigned int cpu)
 	return 0;
 }
 
-/*
- * get_average_perf()
- *
- * Returns the average performance (also considers boosted frequencies)
- *
- * Input:
- *   aperf_diff: Difference of the aperf register over a time period
- *   mperf_diff: Difference of the mperf register over the same time period
- *   max_freq:   Maximum frequency (P0)
- *
- * Returns:
- *   Average performance over the time period
- */
-static unsigned long get_average_perf(unsigned long long aperf_diff,
-				      unsigned long long mperf_diff)
-{
-	unsigned int perf_percent = 0;
-	if (((unsigned long)(-1) / 100) < aperf_diff) {
-		int shift_count = 7;
-		aperf_diff >>= shift_count;
-		mperf_diff >>= shift_count;
-	}
-	perf_percent = (aperf_diff * 100) / mperf_diff;
-	return (max_frequency * perf_percent) / 100;
-}
-
 static int mperf_get_count_percent(unsigned int id, double *percent,
 				   unsigned int cpu)
 {
 	unsigned long long aperf_diff, mperf_diff, tsc_diff;
+	unsigned long long timediff;
 
 	if (!is_valid[cpu])
 		return -1;
@@ -136,11 +128,19 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
 
 	mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
 	aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
-	tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
 
-	*percent = 100.0 * mperf_diff / tsc_diff;
-	dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n",
-	       mperf_cstates[id].name, mperf_diff, tsc_diff);
+	if (max_freq_mode == MAX_FREQ_TSC_REF) {
+		tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+		*percent = 100.0 * mperf_diff / tsc_diff;
+		dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
+		       mperf_cstates[id].name, mperf_diff, tsc_diff);
+	} else if (max_freq_mode == MAX_FREQ_SYSFS) {
+		timediff = timespec_diff_us(time_start, time_end);
+		*percent = 100.0 * mperf_diff / timediff;
+		dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
+		       mperf_cstates[id].name, mperf_diff, timediff);
+	} else
+		return -1;
 
 	if (id == Cx)
 		*percent = 100.0 - *percent;
@@ -154,7 +154,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
 				unsigned int cpu)
 {
-	unsigned long long aperf_diff, mperf_diff;
+	unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff;
 
 	if (id != AVG_FREQ)
 		return 1;
@@ -165,11 +165,21 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
 	mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
 	aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
 
-	/* Return MHz for now, might want to return KHz if column width is more
-	   generic */
-	*count = get_average_perf(aperf_diff, mperf_diff) / 1000;
-	dprint("%s: %llu\n", mperf_cstates[id].name, *count);
+	if (max_freq_mode == MAX_FREQ_TSC_REF) {
+		/* Calculate max_freq from TSC count */
+		tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+		time_diff = timespec_diff_us(time_start, time_end);
+		max_frequency = tsc_diff / time_diff;
+	}
 
+	*count = max_frequency * ((double)aperf_diff / mperf_diff);
+	dprint("%s: Average freq based on %s maximum frequency:\n",
+	       mperf_cstates[id].name,
+	       (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read");
+	dprint("%max_frequency: %lu", max_frequency);
+	dprint("aperf_diff: %llu\n", aperf_diff);
+	dprint("mperf_diff: %llu\n", mperf_diff);
+	dprint("avg freq:   %llu\n", *count);
 	return 0;
 }
 
@@ -178,6 +188,7 @@ static int mperf_start(void)
 	int cpu;
 	unsigned long long dbg;
 
+	clock_gettime(CLOCK_REALTIME, &time_start);
 	mperf_get_tsc(&tsc_at_measure_start);
 
 	for (cpu = 0; cpu < cpu_count; cpu++)
@@ -193,32 +204,104 @@ static int mperf_stop(void)
 	unsigned long long dbg;
 	int cpu;
 
-	mperf_get_tsc(&tsc_at_measure_end);
-
 	for (cpu = 0; cpu < cpu_count; cpu++)
 		mperf_measure_stats(cpu);
 
+	mperf_get_tsc(&tsc_at_measure_end);
+	clock_gettime(CLOCK_REALTIME, &time_end);
+
 	mperf_get_tsc(&dbg);
 	dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
 
 	return 0;
 }
 
-struct cpuidle_monitor mperf_monitor;
-
-struct cpuidle_monitor *mperf_register(void)
+/*
+ * Mperf register is defined to tick at P0 (maximum) frequency
+ *
+ * Instead of reading out P0 which can be tricky to read out from HW,
+ * we use TSC counter if it reliably ticks at P0/mperf frequency.
+ *
+ * Still try to fall back to:
+ * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
+ * on older Intel HW without invariant TSC feature.
+ * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but
+ * it's still double checked (MSR_AMD_HWCR)).
+ *
+ * On these machines the user would still get useful mperf
+ * stats when acpi-cpufreq driver is loaded.
+ */
+static int init_maxfreq_mode(void)
 {
+	int ret;
+	unsigned long long hwcr;
 	unsigned long min;
 
-	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
-		return NULL;
-
-	/* Assume min/max all the same on all cores */
+	if (!cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC)
+		goto use_sysfs;
+
+	if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
+		/* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
+		 * freq.
+		 * A test whether hwcr is accessable/available would be:
+		 * (cpupower_cpu_info.family > 0x10 ||
+		 *   cpupower_cpu_info.family == 0x10 &&
+		 *   cpupower_cpu_info.model >= 0x2))
+		 * This should be the case for all aperf/mperf
+		 * capable AMD machines and is therefore safe to test here.
+		 * Compare with Linus kernel git commit: acf01734b1747b1ec4
+		 */
+		ret = read_msr(0, MSR_AMD_HWCR, &hwcr);
+		/*
+		 * If the MSR read failed, assume a Xen system that did
+		 * not explicitly provide access to it and assume TSC works
+		*/
+		if (ret != 0) {
+			dprint("TSC read 0x%x failed - assume TSC working\n",
+			       MSR_AMD_HWCR);
+			return 0;
+		} else if (1 & (hwcr >> 24)) {
+			max_freq_mode = MAX_FREQ_TSC_REF;
+			return 0;
+		} else { /* Use sysfs max frequency if available */ }
+	} else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) {
+		/*
+		 * On Intel we assume mperf (in C0) is ticking at same
+		 * rate than TSC
+		 */
+		max_freq_mode = MAX_FREQ_TSC_REF;
+		return 0;
+	}
+use_sysfs:
 	if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
 		dprint("Cannot retrieve max freq from cpufreq kernel "
 		       "subsystem\n");
-		return NULL;
+		return -1;
 	}
+	max_freq_mode = MAX_FREQ_SYSFS;
+	return 0;
+}
+
+/*
+ * This monitor provides:
+ *
+ * 1) Average frequency a CPU resided in
+ *    This always works if the CPU has aperf/mperf capabilities
+ *
+ * 2) C0 and Cx (any sleep state) time a CPU resided in
+ *    Works if mperf timer stops ticking in sleep states which
+ *    seem to be the case on all current HW.
+ * Both is directly retrieved from HW registers and is independent
+ * from kernel statistics.
+ */
+struct cpuidle_monitor mperf_monitor;
+struct cpuidle_monitor *mperf_register(void)
+{
+	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+		return NULL;
+
+	if (init_maxfreq_mode())
+		return NULL;
 
 	/* Free this at program termination */
 	is_valid = calloc(cpu_count, sizeof(int));
-- 
cgit v1.1


From 88f984e0e235f82a5d34f4a99244eeb14e1413e0 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 12 Aug 2011 01:11:36 +0200
Subject: cpupower: Do not show an empty Idle_Stats monitor if no idle driver
 is available

By taking error values of:
sysfs_get_idlestate_count(..);
into account.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index d048b96..bcd22a1 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -134,7 +134,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
 	/* Assume idle state count is the same for all CPUs */
 	cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
 
-	if (cpuidle_sysfs_monitor.hw_states_num == 0)
+	if (cpuidle_sysfs_monitor.hw_states_num <= 0)
 		return NULL;
 
 	for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
-- 
cgit v1.1


From 7c74d2bc5a9d43d33d6f16c1e706147162e2bc52 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 12 Aug 2011 01:11:37 +0200
Subject: cpupower: Better detect offlined CPUs

Before, checking for offlined CPUs was done dirty and
it was checked whether topology parsing returned -1 values.
But this is a valid case on a Xen (and possibly other) kernels.

Do proper online/offline checking, also take CONFIG_HOTPLUG_CPU
option into account (no /sys/devices/../cpuX/online file).

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 tools/power/cpupower/utils/helpers/helpers.h       |  3 ++
 tools/power/cpupower/utils/helpers/sysfs.c         | 50 ++++++++++++++++++++++
 tools/power/cpupower/utils/helpers/sysfs.h         |  2 +
 tools/power/cpupower/utils/helpers/topology.c      |  5 ++-
 .../cpupower/utils/idle_monitor/cpupower-monitor.c | 10 +++--
 5 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 592ee36..7a83022 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -96,6 +96,9 @@ struct cpupower_topology {
 		int pkg;
 		int core;
 		int cpu;
+
+		/* flags */
+		unsigned int is_online:1;
 	} *core_info;
 };
 
diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c
index 55e2466..c634302 100644
--- a/tools/power/cpupower/utils/helpers/sysfs.c
+++ b/tools/power/cpupower/utils/helpers/sysfs.c
@@ -56,6 +56,56 @@ static unsigned int sysfs_write_file(const char *path,
 	return (unsigned int) numwrite;
 }
 
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ *     1 -> if CPU is online
+ *     0 -> if CPU is offline
+ *     negative errno values in error case
+ */
+int sysfs_is_cpu_online(unsigned int cpu)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numread;
+	unsigned long long value;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+	struct stat statbuf;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+	if (stat(path, &statbuf) != 0)
+		return 0;
+
+	/*
+	 * kernel without CONFIG_HOTPLUG_CPU
+	 * -> cpuX directory exists, but not cpuX/online file
+	 */
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+	if (stat(path, &statbuf) != 0)
+		return 1;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -errno;
+
+	numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+	if (numread < 1) {
+		close(fd);
+		return -EIO;
+	}
+	linebuf[numread] = '\0';
+	close(fd);
+
+	value = strtoull(linebuf, &endp, 0);
+	if (value > 1 || value < 0)
+		return -EINVAL;
+
+	return value;
+}
+
 /* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
 
 /*
diff --git a/tools/power/cpupower/utils/helpers/sysfs.h b/tools/power/cpupower/utils/helpers/sysfs.h
index f9373e0..8cb797b 100644
--- a/tools/power/cpupower/utils/helpers/sysfs.h
+++ b/tools/power/cpupower/utils/helpers/sysfs.h
@@ -7,6 +7,8 @@
 
 extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
 
+extern int sysfs_is_cpu_online(unsigned int cpu);
+
 extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu,
 						unsigned int idlestate);
 extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu,
diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c
index 385ee5c..4eae2c4 100644
--- a/tools/power/cpupower/utils/helpers/topology.c
+++ b/tools/power/cpupower/utils/helpers/topology.c
@@ -41,6 +41,8 @@ struct cpuid_core_info {
 	unsigned int pkg;
 	unsigned int thread;
 	unsigned int cpu;
+	/* flags */
+	unsigned int is_online:1;
 };
 
 static int __compare(const void *t1, const void *t2)
@@ -78,6 +80,8 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
 		return -ENOMEM;
 	cpu_top->pkgs = cpu_top->cores = 0;
 	for (cpu = 0; cpu < cpus; cpu++) {
+		cpu_top->core_info[cpu].cpu = cpu;
+		cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
 		cpu_top->core_info[cpu].pkg =
 			sysfs_topology_read_file(cpu, "physical_package_id");
 		if ((int)cpu_top->core_info[cpu].pkg != -1 &&
@@ -85,7 +89,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
 			cpu_top->pkgs = cpu_top->core_info[cpu].pkg;
 		cpu_top->core_info[cpu].core =
 			sysfs_topology_read_file(cpu, "core_id");
-		cpu_top->core_info[cpu].cpu = cpu;
 	}
 	cpu_top->pkgs++;
 
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index ba4bf06..dd8e1ea 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -190,9 +190,13 @@ void print_results(int topology_depth, int cpu)
 			}
 		}
 	}
-	/* cpu offline */
-	if (cpu_top.core_info[cpu].pkg == -1 ||
-	    cpu_top.core_info[cpu].core == -1) {
+	/*
+	 * The monitor could still provide useful data, for example
+	 * AMD HW counters partly sit in PCI config space.
+	 * It's up to the monitor plug-in to check .is_online, this one
+	 * is just for additional info.
+	 */
+	if (!cpu_top.core_info[cpu].is_online) {
 		printf(_(" *is offline\n"));
 		return;
 	} else
-- 
cgit v1.1


From 9ee31f618a3c8209b2bd4bedd71fd5f2be7786bd Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 12 Aug 2011 01:11:38 +0200
Subject: cpupower: Make monitor command -c/--cpu aware

This allows for example:
cpupower -c 2-4,6 monitor -m Mperf
              |Mperf
PKG |CORE|CPU | C0   | Cx   | Freq
   0|   8|   4|  2.42| 97.58|  1353
   0|  16|   2| 14.38| 85.62|  1928
   0|  24|   6|  1.76| 98.24|  1442
   1|  16|   3| 15.53| 84.47|  1650

CPUs always get resorted for package, core then cpu id if it could get read out
(or however you name these topology levels...).
Still this is a nice way to keep the overview if a test binary is bound to
a specific CPU or if one wants to show all CPUs inside a package or similar.

Still missing: Do not measure not available cores to reduce the overhead
and achieve better results.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index dd8e1ea..6cb8d9e 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -149,6 +149,10 @@ void print_results(int topology_depth, int cpu)
 	unsigned long long result;
 	cstate_t s;
 
+	/* Be careful CPUs may got resorted for pkg value do not just use cpu */
+	if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu))
+		return;
+
 	if (topology_depth > 2)
 		printf("%4d|", cpu_top.core_info[cpu].pkg);
 	if (topology_depth > 1)
@@ -389,6 +393,10 @@ int cmd_monitor(int argc, char **argv)
 		return EXIT_FAILURE;
 	}
 
+	/* Default is: monitor all CPUs */
+	if (bitmask_isallclear(cpus_chosen))
+		bitmask_setall(cpus_chosen);
+
 	dprint("System has up to %d CPU cores\n", cpu_count);
 
 	for (num = 0; all_monitors[num]; num++) {
-- 
cgit v1.1


From 18adad1c57f820d38d05e3d5e3d548e286233b76 Mon Sep 17 00:00:00 2001
From: Gerard Braad <me@gbraad.nl>
Date: Tue, 16 Aug 2011 00:17:56 -0700
Subject: Input: wacom - add support for the Wacom Bamboo Pen (CTL-660/K)

Signed-off-by: Gerard Braad <me@gbraad.nl>
Reviewed-by: Chris Bagwell <chris@cnpbagwell.com>
Signed-off-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/tablet/wacom_wac.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index 03ebcc8..c1c2f7b 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -1460,6 +1460,9 @@ static const struct wacom_features wacom_features_0xD3 =
 static const struct wacom_features wacom_features_0xD4 =
 	{ "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200, 1023,
 	  63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0xD5 =
+	{ "Wacom Bamboo Pen 6x8",     WACOM_PKGLEN_BBFUN, 21648, 13530, 1023,
+	  63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
 static const struct wacom_features wacom_features_0xD6 =
 	{ "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN,   14720,  9200, 1023,
 	  63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1564,6 +1567,7 @@ const struct usb_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0xD2) },
 	{ USB_DEVICE_WACOM(0xD3) },
 	{ USB_DEVICE_WACOM(0xD4) },
+	{ USB_DEVICE_WACOM(0xD5) },
 	{ USB_DEVICE_WACOM(0xD6) },
 	{ USB_DEVICE_WACOM(0xD7) },
 	{ USB_DEVICE_WACOM(0xD8) },
-- 
cgit v1.1


From a417ea4432db7fd1c91c19b129a3e3d2367b7ce4 Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Tue, 16 Aug 2011 00:17:56 -0700
Subject: Input: wacom - add WAC_MSG_RETRIES define

Use WAC_MSG_RETRIES define instead of a numeric constant.

Signed-off-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/tablet/wacom_sys.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index 449c0a4..9879c73 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -49,6 +49,7 @@ struct hid_descriptor {
 #define USB_REQ_GET_REPORT	0x01
 #define USB_REQ_SET_REPORT	0x09
 #define WAC_HID_FEATURE_REPORT	0x03
+#define WAC_MSG_RETRIES		5
 
 static int usb_get_report(struct usb_interface *intf, unsigned char type,
 				unsigned char id, void *buf, int size)
@@ -165,7 +166,7 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi
 			report,
 			hid_desc->wDescriptorLength,
 			5000); /* 5 secs */
-	} while (result < 0 && limit++ < 5);
+	} while (result < 0 && limit++ < WAC_MSG_RETRIES);
 
 	/* No need to parse the Descriptor. It isn't an error though */
 	if (result < 0)
@@ -336,7 +337,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
 				error = usb_get_report(intf,
 					WAC_HID_FEATURE_REPORT, report_id,
 					rep_data, 3);
-		} while ((error < 0 || rep_data[1] != 4) && limit++ < 5);
+		} while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES);
 	} else if (features->type != TABLETPC) {
 		do {
 			rep_data[0] = 2;
@@ -347,7 +348,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
 				error = usb_get_report(intf,
 					WAC_HID_FEATURE_REPORT, report_id,
 					rep_data, 2);
-		} while ((error < 0 || rep_data[1] != 2) && limit++ < 5);
+		} while ((error < 0 || rep_data[1] != 2) && limit++ < WAC_MSG_RETRIES);
 	}
 
 	kfree(rep_data);
-- 
cgit v1.1


From 3b48c91cdf2d6827ce315b3b112310fa02198db0 Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Tue, 16 Aug 2011 00:17:57 -0700
Subject: Input: wacom - report id 3 returns 4 bytes of data

Signed-off-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/tablet/wacom_sys.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index 9879c73..d27c9d9 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -320,23 +320,25 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
 	int limit = 0, report_id = 2;
 	int error = -ENOMEM;
 
-	rep_data = kmalloc(2, GFP_KERNEL);
+	rep_data = kmalloc(4, GFP_KERNEL);
 	if (!rep_data)
 		return error;
 
-	/* ask to report tablet data if it is 2FGT Tablet PC or
+	/* ask to report tablet data if it is MT Tablet PC or
 	 * not a Tablet PC */
 	if (features->type == TABLETPC2FG) {
 		do {
 			rep_data[0] = 3;
 			rep_data[1] = 4;
+			rep_data[2] = 0;
+			rep_data[3] = 0;
 			report_id = 3;
 			error = usb_set_report(intf, WAC_HID_FEATURE_REPORT,
-				report_id, rep_data, 2);
+				report_id, rep_data, 4);
 			if (error >= 0)
 				error = usb_get_report(intf,
 					WAC_HID_FEATURE_REPORT, report_id,
-					rep_data, 3);
+					rep_data, 4);
 		} while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES);
 	} else if (features->type != TABLETPC) {
 		do {
-- 
cgit v1.1


From c503ad466da44ca23c658986629bf7a2e2eabbb7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 16 Aug 2011 14:23:20 +0200
Subject: ALSA: hda - Fix duplicated capture-volume creation for ALC268 models

Fix the duplicated creation of capture-mixer elements for some static
ALC268 configurations.  The capture mixers must be put to cap_mixer field
instead of mixers array.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/alc268_quirks.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/sound/pci/hda/alc268_quirks.c b/sound/pci/hda/alc268_quirks.c
index be58bf2..2e5876c 100644
--- a/sound/pci/hda/alc268_quirks.c
+++ b/sound/pci/hda/alc268_quirks.c
@@ -476,8 +476,8 @@ static const struct snd_pci_quirk alc268_ssid_cfg_tbl[] = {
 
 static const struct alc_config_preset alc268_presets[] = {
 	[ALC267_QUANTA_IL1] = {
-		.mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer,
-			    alc268_capture_nosrc_mixer },
+		.mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer },
+		.cap_mixer = alc268_capture_nosrc_mixer,
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc267_quanta_il1_verbs },
 		.num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -492,8 +492,8 @@ static const struct alc_config_preset alc268_presets[] = {
 		.init_hook = alc_inithook,
 	},
 	[ALC268_3ST] = {
-		.mixers = { alc268_base_mixer, alc268_capture_alt_mixer,
-			    alc268_beep_mixer },
+		.mixers = { alc268_base_mixer, alc268_beep_mixer },
+		.cap_mixer = alc268_capture_alt_mixer,
 		.init_verbs = { alc268_base_init_verbs },
 		.num_dacs = ARRAY_SIZE(alc268_dac_nids),
 		.dac_nids = alc268_dac_nids,
@@ -507,8 +507,8 @@ static const struct alc_config_preset alc268_presets[] = {
 		.input_mux = &alc268_capture_source,
 	},
 	[ALC268_TOSHIBA] = {
-		.mixers = { alc268_toshiba_mixer, alc268_capture_alt_mixer,
-			    alc268_beep_mixer },
+		.mixers = { alc268_toshiba_mixer, alc268_beep_mixer },
+		.cap_mixer = alc268_capture_alt_mixer,
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_toshiba_verbs },
 		.num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -525,8 +525,8 @@ static const struct alc_config_preset alc268_presets[] = {
 		.init_hook = alc_inithook,
 	},
 	[ALC268_ACER] = {
-		.mixers = { alc268_acer_mixer, alc268_capture_alt_mixer,
-			    alc268_beep_mixer },
+		.mixers = { alc268_acer_mixer, alc268_beep_mixer },
+		.cap_mixer = alc268_capture_alt_mixer,
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_acer_verbs },
 		.num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -543,8 +543,8 @@ static const struct alc_config_preset alc268_presets[] = {
 		.init_hook = alc_inithook,
 	},
 	[ALC268_ACER_DMIC] = {
-		.mixers = { alc268_acer_dmic_mixer, alc268_capture_alt_mixer,
-			    alc268_beep_mixer },
+		.mixers = { alc268_acer_dmic_mixer, alc268_beep_mixer },
+		.cap_mixer = alc268_capture_alt_mixer,
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_acer_verbs },
 		.num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -561,9 +561,8 @@ static const struct alc_config_preset alc268_presets[] = {
 		.init_hook = alc_inithook,
 	},
 	[ALC268_ACER_ASPIRE_ONE] = {
-		.mixers = { alc268_acer_aspire_one_mixer,
-			    alc268_beep_mixer,
-			    alc268_capture_nosrc_mixer },
+		.mixers = { alc268_acer_aspire_one_mixer, alc268_beep_mixer},
+		.cap_mixer = alc268_capture_nosrc_mixer,
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_acer_aspire_one_verbs },
 		.num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -579,8 +578,8 @@ static const struct alc_config_preset alc268_presets[] = {
 		.init_hook = alc_inithook,
 	},
 	[ALC268_DELL] = {
-		.mixers = { alc268_dell_mixer, alc268_beep_mixer,
-			    alc268_capture_nosrc_mixer },
+		.mixers = { alc268_dell_mixer, alc268_beep_mixer},
+		.cap_mixer = alc268_capture_nosrc_mixer,
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_dell_verbs },
 		.num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -596,8 +595,8 @@ static const struct alc_config_preset alc268_presets[] = {
 		.init_hook = alc_inithook,
 	},
 	[ALC268_ZEPTO] = {
-		.mixers = { alc268_base_mixer, alc268_capture_alt_mixer,
-			    alc268_beep_mixer },
+		.mixers = { alc268_base_mixer, alc268_beep_mixer },
+		.cap_mixer = alc268_capture_alt_mixer,
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_toshiba_verbs },
 		.num_dacs = ARRAY_SIZE(alc268_dac_nids),
@@ -616,7 +615,8 @@ static const struct alc_config_preset alc268_presets[] = {
 	},
 #ifdef CONFIG_SND_DEBUG
 	[ALC268_TEST] = {
-		.mixers = { alc268_test_mixer, alc268_capture_mixer },
+		.mixers = { alc268_test_mixer },
+		.cap_mixer = alc268_capture_mixer,
 		.init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs,
 				alc268_volume_init_verbs,
 				alc268_beep_init_verbs },
-- 
cgit v1.1


From c2bceb3d7f145af5a0916bea700f2f9d380901ea Mon Sep 17 00:00:00 2001
From: Lionel Elie Mamane <lionel@mamane.lu>
Date: Sat, 13 Aug 2011 14:04:38 +0000
Subject: sit tunnels: propagate IPv6 transport class to IPv4 Type of Service

sit tunnels (IPv6 tunnel over IPv4) do not implement the "tos inherit"
case to copy the IPv6 transport class byte from the inner packet to
the IPv4 type of service byte in the outer packet. By contrast, ipip
tunnels and GRE tunnels do.

This patch, adapted from the similar code in net/ipv4/ipip.c and
net/ipv4/ip_gre.c, implements that.

This patch applies to 3.0.1, and has been tested on that version.

Signed-off-by: Lionel Elie Mamane <lionel@mamane.lu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 07bf108..00b15ac 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto tx_error;
 
+	if (tos == 1)
+		tos = ipv6_get_dsfield(iph6);
+
 	/* ISATAP (RFC4214) - must come before 6to4 */
 	if (dev->priv_flags & IFF_ISATAP) {
 		struct neighbour *neigh = NULL;
-- 
cgit v1.1


From 48df4a6fd8c40c0bbcbca2044f5f2bc75dcf6db1 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 12 Aug 2011 10:23:01 -0700
Subject: xhci: Handle zero-length isochronous packets.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For a long time, the xHCI driver has had this note:
	/* FIXME: Ignoring zero-length packets, can those happen? */

It turns out that, yes, there are drivers that need to queue zero-length
transfers for isochronous OUT transfers.  Without this patch, users will
see kernel hang messages when a driver attempts to enqueue an isochronous
URB with a zero length transfer (because count_isoc_trbs_needed will return
zero for that TD, xhci_td->last_trb will never be set, and updating the
dequeue pointer will cause an infinite loop).

Matěj ran into this issue when using an NI Audio4DJ USB soundcard
with the snd-usb-caiaq driver.  See
	https://bugzilla.kernel.org/show_bug.cgi?id=40702

Fix count_isoc_trbs_needed() to return 1 for zero-length transfers (thanks
Alan on the math help).  Update the various TRB field calculations to deal
with zero-length transfers.  We're still transferring one packet with a
zero-length data payload, so the total_packet_count should be 1. The
Transfer Burst Count (TBC) and Transfer Last Burst Packet Count (TLBPC)
fields should be set to zero.

This patch should be backported to kernels as old as 2.6.36.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Tested-by: Matěj Laitl <matej@laitl.cz>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: stable@kernel.org
---
 drivers/usb/host/xhci-ring.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index b2d654b..54139a2 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2684,6 +2684,10 @@ static u32 xhci_v1_0_td_remainder(int running_total, int trb_buff_len,
 {
 	int packets_transferred;
 
+	/* One TRB with a zero-length data packet. */
+	if (running_total == 0 && trb_buff_len == 0)
+		return 0;
+
 	/* All the TRB queueing functions don't count the current TRB in
 	 * running_total.
 	 */
@@ -3125,20 +3129,15 @@ static int count_isoc_trbs_needed(struct xhci_hcd *xhci,
 		struct urb *urb, int i)
 {
 	int num_trbs = 0;
-	u64 addr, td_len, running_total;
+	u64 addr, td_len;
 
 	addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset);
 	td_len = urb->iso_frame_desc[i].length;
 
-	running_total = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
-	running_total &= TRB_MAX_BUFF_SIZE - 1;
-	if (running_total != 0)
-		num_trbs++;
-
-	while (running_total < td_len) {
+	num_trbs = DIV_ROUND_UP(td_len + (addr & (TRB_MAX_BUFF_SIZE - 1)),
+			TRB_MAX_BUFF_SIZE);
+	if (num_trbs == 0)
 		num_trbs++;
-		running_total += TRB_MAX_BUFF_SIZE;
-	}
 
 	return num_trbs;
 }
@@ -3250,9 +3249,11 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		addr = start_addr + urb->iso_frame_desc[i].offset;
 		td_len = urb->iso_frame_desc[i].length;
 		td_remain_len = td_len;
-		/* FIXME: Ignoring zero-length packets, can those happen? */
 		total_packet_count = roundup(td_len,
 				le16_to_cpu(urb->ep->desc.wMaxPacketSize));
+		/* A zero-length transfer still involves at least one packet. */
+		if (total_packet_count == 0)
+			total_packet_count++;
 		burst_count = xhci_get_burst_count(xhci, urb->dev, urb,
 				total_packet_count);
 		residue = xhci_get_last_burst_packet_count(xhci,
-- 
cgit v1.1


From eb39d34004888afcc0a44d9c36383cd69fa3b3b9 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 26 Jul 2011 16:59:00 -0700
Subject: target: Change TCM_NON_EXISTENT_LUN response to ASC=LOGICAL UNIT NOT
 SUPPORTED

This patch changes transport_send_check_condition_and_sense() for
TCM_NON_EXISTENT_LUN emulation to use 0x25 (LOGICAL UNIT NOT SUPPORTED)
instead of the original 0x20 (INVALID COMMAND OPERATION CODE).  This is
helpful to distinguish between TCM_UNSUPPORTED_SCSI_OPCODE ASC=0x20
exceptions.

Signed-off-by: Nicholas A. Bellinger <nab@risingtidesystems.com>
---
 drivers/target/target_core_transport.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 8976032..cc5a339 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -4726,6 +4726,13 @@ int transport_send_check_condition_and_sense(
 	 */
 	switch (reason) {
 	case TCM_NON_EXISTENT_LUN:
+		/* CURRENT ERROR */
+		buffer[offset] = 0x70;
+		/* ILLEGAL REQUEST */
+		buffer[offset+SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
+		/* LOGICAL UNIT NOT SUPPORTED */
+		buffer[offset+SPC_ASC_KEY_OFFSET] = 0x25;
+		break;
 	case TCM_UNSUPPORTED_SCSI_OPCODE:
 	case TCM_SECTOR_COUNT_TOO_MANY:
 		/* CURRENT ERROR */
-- 
cgit v1.1


From c331eb580a0a7906c0cdb8dbae3cfe99e3c0e555 Mon Sep 17 00:00:00 2001
From: Andrew Drake <adrake@adrake.org>
Date: Tue, 16 Aug 2011 11:07:39 -0700
Subject: Input: bcm5974 - Add support for newer MacBookPro8,2

New MacBook Pro devices reporting product name MacBookPro8,2 come with
newer/higher resolution touchpads than others with the same product
name with USB ID 05ac:0252. This patch adds support for these devices.

Signed-off-by: Andrew Drake <adrake@adrake.org>
Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
index 48d9ec1..da28018 100644
--- a/drivers/input/mouse/bcm5974.c
+++ b/drivers/input/mouse/bcm5974.c
@@ -71,6 +71,10 @@
 #define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI	0x024c
 #define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO	0x024d
 #define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS	0x024e
+/* Macbook8,2 (unibody) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI	0x0252
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO	0x0253
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS	0x0254
 
 #define BCM5974_DEVICE(prod) {					\
 	.match_flags = (USB_DEVICE_ID_MATCH_DEVICE |		\
@@ -112,6 +116,10 @@ static const struct usb_device_id bcm5974_table[] = {
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
 	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
+	/* MacbookPro8,2 */
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
+	BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
 	/* Terminating entry */
 	{}
 };
@@ -314,6 +322,18 @@ static const struct bcm5974_config bcm5974_config_table[] = {
 		{ DIM_X, DIM_X / SN_COORD, -4620, 5140 },
 		{ DIM_Y, DIM_Y / SN_COORD, -150, 6600 }
 	},
+	{
+		USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI,
+		USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO,
+		USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS,
+		HAS_INTEGRATED_BUTTON,
+		0x84, sizeof(struct bt_data),
+		0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+		{ DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+		{ DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+		{ DIM_X, DIM_X / SN_COORD, -4750, 5280 },
+		{ DIM_Y, DIM_Y / SN_COORD, -150, 6730 }
+	},
 	{}
 };
 
-- 
cgit v1.1


From 28ac293363368650963ee4c1e323c1ff502c121f Mon Sep 17 00:00:00 2001
From: Yufeng Shen <miletus@chromium.org>
Date: Tue, 16 Aug 2011 00:40:54 -0700
Subject: Input: atmel_mxt_ts - report pressure information from the driver

Atmel mxt1386 touch controller has the touch pressure information so
let's report it to the user space.

[dtor@mail.ru: added ABS_RESSURE reporting for ST emulation.]

Signed-off-by: Yufeng Shen <miletus@chromium.org>
Acked-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Acked-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/atmel_mxt_ts.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index ae00604..f5d6685 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -244,6 +244,7 @@ struct mxt_finger {
 	int x;
 	int y;
 	int area;
+	int pressure;
 };
 
 /* Each client has this additional data */
@@ -536,6 +537,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id)
 					finger[id].x);
 			input_report_abs(input_dev, ABS_MT_POSITION_Y,
 					finger[id].y);
+			input_report_abs(input_dev, ABS_MT_PRESSURE,
+					finger[id].pressure);
 		} else {
 			finger[id].status = 0;
 		}
@@ -546,6 +549,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id)
 	if (status != MXT_RELEASE) {
 		input_report_abs(input_dev, ABS_X, finger[single_id].x);
 		input_report_abs(input_dev, ABS_Y, finger[single_id].y);
+		input_report_abs(input_dev,
+				 ABS_PRESSURE, finger[single_id].pressure);
 	}
 
 	input_sync(input_dev);
@@ -560,6 +565,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
 	int x;
 	int y;
 	int area;
+	int pressure;
 
 	/* Check the touch is present on the screen */
 	if (!(status & MXT_DETECT)) {
@@ -584,6 +590,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
 		y = y >> 2;
 
 	area = message->message[4];
+	pressure = message->message[5];
 
 	dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id,
 		status & MXT_MOVE ? "moved" : "pressed",
@@ -594,6 +601,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
 	finger[id].x = x;
 	finger[id].y = y;
 	finger[id].area = area;
+	finger[id].pressure = pressure;
 
 	mxt_input_report(data, id);
 }
@@ -1116,6 +1124,8 @@ static int __devinit mxt_probe(struct i2c_client *client,
 			     0, data->max_x, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y,
 			     0, data->max_y, 0, 0);
+	input_set_abs_params(input_dev, ABS_PRESSURE,
+			     0, 255, 0, 0);
 
 	/* For multi touch */
 	input_mt_init_slots(input_dev, MXT_MAX_FINGER);
@@ -1125,6 +1135,8 @@ static int __devinit mxt_probe(struct i2c_client *client,
 			     0, data->max_x, 0, 0);
 	input_set_abs_params(input_dev, ABS_MT_POSITION_Y,
 			     0, data->max_y, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_PRESSURE,
+			     0, 255, 0, 0);
 
 	input_set_drvdata(input_dev, data);
 	i2c_set_clientdata(client, data);
-- 
cgit v1.1


From 25b7679136fd85b1e5197e36a0ca126163e89590 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 17 Aug 2011 09:20:01 +0200
Subject: ASoC: Fix check for symmetric rate enforcement

The ASoC core tries to not enforce symmetric rates when
two streams open simultaneously. It does so by checking
rtd->rate being zero. This works exactly once after booting
because it is not set to zero again when the streams close.
Fix this by setting rtd->rate when no active stream is left.

[This leads to lots of warnings about not enforcing the symmetry in some
situations as there's a race in the userspace API where we know we've
got two applications but don't know what rates they want to set.
-- broonie ]

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/soc-pcm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index b575939..2879c88 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -290,6 +290,9 @@ static int soc_pcm_close(struct snd_pcm_substream *substream)
 	codec_dai->active--;
 	codec->active--;
 
+	if (!cpu_dai->active && !codec_dai->active)
+		rtd->rate = 0;
+
 	/* Muting the DAC suppresses artifacts caused during digital
 	 * shutdown, for example from stopping clocks.
 	 */
-- 
cgit v1.1


From ccbcdf7cf1b5f6c6db30d84095b9c6c53043af55 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Tue, 16 Aug 2011 15:07:41 +0100
Subject: xen/x86: replace order-based range checking of M2P table by linear
 one

The order-based approach is not only less efficient (requiring a shift
and a compare, typical generated code looking like this

	mov	eax, [machine_to_phys_order]
	mov	ecx, eax
	shr	ebx, cl
	test	ebx, ebx
	jnz	...

whereas a direct check requires just a compare, like in

	cmp	ebx, [machine_to_phys_nr]
	jae	...

), but also slightly dangerous in the 32-on-64 case - the element
address calculation can wrap if the next power of two boundary is
sufficiently far away from the actual upper limit of the table, and
hence can result in user space addresses being accessed (with it being
unknown what may actually be mapped there).

Additionally, the elimination of the mistaken use of fls() here (should
have been __fls()) fixes a latent issue on x86-64 that would trigger
if the code was run on a system with memory extending beyond the 44-bit
boundary.

CC: stable@kernel.org
Signed-off-by: Jan Beulich <jbeulich@novell.com>
[v1: Based on Jeremy's feedback]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/page.h |  4 ++--
 arch/x86/xen/enlighten.c        |  4 ++--
 arch/x86/xen/mmu.c              | 12 ++++++++----
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 64a619d..7ff4669 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -39,7 +39,7 @@ typedef struct xpaddr {
     ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
 
 extern unsigned long *machine_to_phys_mapping;
-extern unsigned int   machine_to_phys_order;
+extern unsigned long  machine_to_phys_nr;
 
 extern unsigned long get_phys_to_machine(unsigned long pfn);
 extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -87,7 +87,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return mfn;
 
-	if (unlikely((mfn >> machine_to_phys_order) != 0)) {
+	if (unlikely(mfn >= machine_to_phys_nr)) {
 		pfn = ~0;
 		goto try_override;
 	}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 974a528..b960429 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type);
 
 unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
 EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned int   machine_to_phys_order;
-EXPORT_SYMBOL(machine_to_phys_order);
+unsigned long  machine_to_phys_nr;
+EXPORT_SYMBOL(machine_to_phys_nr);
 
 struct start_info *xen_start_info;
 EXPORT_SYMBOL_GPL(xen_start_info);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f987bde..24abc1f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1713,15 +1713,19 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 void __init xen_setup_machphys_mapping(void)
 {
 	struct xen_machphys_mapping mapping;
-	unsigned long machine_to_phys_nr_ents;
 
 	if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
 		machine_to_phys_mapping = (unsigned long *)mapping.v_start;
-		machine_to_phys_nr_ents = mapping.max_mfn + 1;
+		machine_to_phys_nr = mapping.max_mfn + 1;
 	} else {
-		machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+		machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
 	}
-	machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+#ifdef CONFIG_X86_32
+	if ((machine_to_phys_mapping + machine_to_phys_nr)
+	    < machine_to_phys_mapping)
+		machine_to_phys_nr = (unsigned long *)NULL
+				     - machine_to_phys_mapping;
+#endif
 }
 
 #ifdef CONFIG_X86_64
-- 
cgit v1.1


From 8919bc13e8d92c5b082c5c0321567383a071f5bc Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Mon, 15 Aug 2011 05:25:40 +0000
Subject: net_sched: fix port mirror/redirect stats reporting

When a redirected or mirrored packet is dropped by the target
device we need to record statistics.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 102fc21..e051398 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
 	skb2->skb_iif = skb->dev->ifindex;
 	skb2->dev = dev;
-	dev_queue_xmit(skb2);
-	err = 0;
+	err = dev_queue_xmit(skb2);
 
 out:
 	if (err) {
-- 
cgit v1.1


From ba3211ccd043fae3713793334d64d75bd0a1d029 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?=
 <panweiping3@gmail.com>
Date: Mon, 15 Aug 2011 15:57:35 +0000
Subject: bonding:reset backup and inactive flag of slave

Eduard Sinelnikov (eduard.sinelnikov@gmail.com) found that if we change
bonding mode from active backup to round robin, some slaves are still keeping
"backup", and won't transmit packets.

As Jay Vosburgh(fubar@us.ibm.com) pointed out that we can work around that by
removing the bond_is_active_slave() check, because the "backup" flag is only
meaningful for active backup mode.

But if we just simply ignore the bond_is_active_slave() check,
the transmission will work fine, but we can't maintain the correct value of
"backup" flag for each slaves, though it is meaningless for other mode than
active backup.

I'd like to reset "backup" and "inactive" flag in bond_open,
thus we can keep the correct value of them.

As for bond_is_active_slave(), I'd like to prepare another patch to handle it.

V2:
Use C style comment.
Move read_lock(&bond->curr_slave_lock).
Replace restore with reset, for active backup mode, it means "restore",
but for other modes, it means "reset".

Signed-off-by: Weiping Pan <panweiping3@gmail.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 38a83ac..43f2ea5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3419,9 +3419,27 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
 static int bond_open(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+	int i;
 
 	bond->kill_timers = 0;
 
+	/* reset slave->backup and slave->inactive */
+	read_lock(&bond->lock);
+	if (bond->slave_cnt > 0) {
+		read_lock(&bond->curr_slave_lock);
+		bond_for_each_slave(bond, slave, i) {
+			if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+				&& (slave != bond->curr_active_slave)) {
+				bond_set_slave_inactive_flags(slave);
+			} else {
+				bond_set_slave_active_flags(slave);
+			}
+		}
+		read_unlock(&bond->curr_slave_lock);
+	}
+	read_unlock(&bond->lock);
+
 	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
 
 	if (bond_is_lb(bond)) {
-- 
cgit v1.1


From 9a75a97296c43c34add7dca8275496186e1b4ae9 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 15 Aug 2011 22:33:34 +0000
Subject: via-velocity: remove non-tagged packet filtering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's undesired to filter untagged packets at any time. So simply remove this.

Reported-by: Stephan Bärwolf <stephan.baerwolf@tu-ilmenau.de>
Tested-by: Stephan Bärwolf <stephan.baerwolf@tu-ilmenau.de>
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/via-velocity.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index deb1eca..7c5336c 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -515,10 +515,6 @@ static void velocity_init_cam_filter(struct velocity_info *vptr)
 	mac_set_cam_mask(regs, vptr->mCAMmask);
 
 	/* Enable VCAMs */
-
-	if (test_bit(0, vptr->active_vlans))
-		WORD_REG_BITS_ON(MCFG_RTGOPT, &regs->MCFG);
-
 	for_each_set_bit(vid, vptr->active_vlans, VLAN_N_VID) {
 		mac_set_vlan_cam(regs, i, (u8 *) &vid);
 		vptr->vCAMmask[i / 8] |= 0x1 << (i % 8);
-- 
cgit v1.1


From 9331db4f00cfee8a79d2147ac83723ef436b9759 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 17 Aug 2011 23:50:37 -0700
Subject: forcedeth: call vlan_mode only if hw supports vlans

If hw does not support vlans, dont call nv_vlan_mode because it has no point.
I believe that this should fix issues on older non-vlan supportive
chips (like Ingo has).

Reported-ty: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/forcedeth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index e55df30..6d5fbd4 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -5615,7 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 		goto out_error;
 	}
 
-	nv_vlan_mode(dev, dev->features);
+	if (id->driver_data & DEV_HAS_VLAN)
+		nv_vlan_mode(dev, dev->features);
 
 	netif_carrier_off(dev);
 
-- 
cgit v1.1


From 77e57297b4ff3f602ba5105398d342a4b4a54774 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 23 May 2011 14:39:17 +0200
Subject: perf list: Fix exit value

This patch fixes an issue with the exit value of perf list:

$ perf list; echo $?
129

perf list returns an error exit code even though there is no error.

There was a stray exit(129) in print_events(). This patch removes this
exit().

$ perf list; echo $?
0

$ perf list hw sw
  cpu-cycles OR cycles                               [Hardware event]
  stalled-cycles-frontend OR idle-cycles-frontend    [Hardware event]
  stalled-cycles-backend OR idle-cycles-backend      [Hardware event]
  instructions                                       [Hardware event]
  cache-references                                   [Hardware event]
  cache-misses                                       [Hardware event]
  branch-instructions OR branches                    [Hardware event]
  branch-misses                                      [Hardware event]
  bus-cycles                                         [Hardware event]

  cpu-clock                                          [Software event]
  task-clock                                         [Software event]
  page-faults OR faults                              [Software event]
  minor-faults                                       [Software event]
  major-faults                                       [Software event]
  context-switches OR cs                             [Software event]
  cpu-migrations OR migrations                       [Software event]
  alignment-faults                                   [Software event]
  emulation-faults                                   [Software event]
$ echo $?
0

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20110523123917.GA31060@quad
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4ea7e19..d93f3ce 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1097,6 +1097,4 @@ void print_events(const char *event_glob)
 	printf("\n");
 
 	print_tracepoint_events(NULL, NULL);
-
-	exit(129);
 }
-- 
cgit v1.1


From cc2d86b04d9ac28a6be6cb05da6ea8f014fd5aa0 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Tue, 7 Jun 2011 18:19:36 +0200
Subject: perf evlist: Fix missing event name init for default event

When no event is given to perf record, perf top, a default event is
initialized (cycles). However, perf_evlist__add_default() was not
setting the symbolic name for the event. Perf top worked simply because
it was reconstructing the name from the event code. But it should not
have to do this. This patch initializes the evsel->name field properly.

This second version improves the code flow on the non error path.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20110607161936.GA8163@quad
Signed-off-by: Stephane Eranian <eranian@google.com>
[committer note: Use perf_evsel__delete() instead of plain free()]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e03e7bc..c12bd47 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -85,10 +85,19 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
 	struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
 
 	if (evsel == NULL)
-		return -ENOMEM;
+		goto error;
+
+	/* use strdup() because free(evsel) assumes name is allocated */
+	evsel->name = strdup("cycles");
+	if (!evsel->name)
+		goto error_free;
 
 	perf_evlist__add(evlist, evsel);
 	return 0;
+error_free:
+	perf_evsel__delete(evsel);
+error:
+	return -ENOMEM;
 }
 
 void perf_evlist__disable(struct perf_evlist *evlist)
-- 
cgit v1.1


From 777d1d71db622a5e1ff703495741c3d257b532e5 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Sat, 23 Jul 2011 04:10:43 +0200
Subject: perf tools: Fix error handling of unknown events

There was a problem with the parse_events() code not printing the
correct event name when an event was unknown and starting with an 'r'.
The source of the problem was the way raw notation was parsed.

Without the patch:
	$ perf stat -e retired_foo
	invalid event modifier: 'tired_foo'

With the patch:
	$ perf stat -e retired_foo
	invalid or unsupported event: 'retired_foo'

This also covers the case where the name of the event was not printed at
all when perf was linked with libpfm4.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20110723021043.GA20178@quad
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d93f3ce..928918b 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -697,7 +697,11 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
 		return EVT_FAILED;
 	n = hex2u64(str + 1, &config);
 	if (n > 0) {
-		*strp = str + n + 1;
+		const char *end = str + n + 1;
+		if (*end != '\0' && *end != ',' && *end != ':')
+			return EVT_FAILED;
+
+		*strp = end;
 		attr->type = PERF_TYPE_RAW;
 		attr->config = config;
 		return EVT_HANDLED;
-- 
cgit v1.1


From 195bcbf5078d74c8e00d68f04eb8695196fb31e8 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Thu, 18 Aug 2011 07:37:21 -0400
Subject: perf tools: Fix build against newer glibc

Upstream glibc commit 295e904 added a definition for __attribute_const__
to cdefs.h.  This causes the following error when building perf:

util/include/linux/compiler.h:8:0: error: "__attribute_const__"
redefined [-Werror] /usr/include/sys/cdefs.h:226:0: note: this is the
location of the previous definition

Wrap __attribute_const__ in #ifndef as we do for __always_inline.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110818113720.GL2227@zod.bos.redhat.com
Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h
index 791f9dd..547628e 100644
--- a/tools/perf/util/include/linux/compiler.h
+++ b/tools/perf/util/include/linux/compiler.h
@@ -5,7 +5,9 @@
 #define __always_inline	inline
 #endif
 #define __user
+#ifndef __attribute_const__
 #define __attribute_const__
+#endif
 
 #define __used		__attribute__((__unused__))
 
-- 
cgit v1.1


From d53e8365eaacfdb29253b39d186109f5b4fcc08d Mon Sep 17 00:00:00 2001
From: Geunsik Lim <geunsik.lim@samsung.com>
Date: Thu, 18 Aug 2011 16:44:57 +0900
Subject: MAINTAINERS: Fix list of perf events source files

Recent changes made kernel/perf_event.c be split and moved to
kernel/events/.

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Kosina <trivial@kernel.org>
Cc: Joe Perches <joe@perches.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1313653497-27263-1-git-send-email-leemgs1@gmail.com
Signed-off-by: Geunsik Lim <geunsik.lim@samsung.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1e55e1e..7110675 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4971,7 +4971,7 @@ M:	Paul Mackerras <paulus@samba.org>
 M:	Ingo Molnar <mingo@elte.hu>
 M:	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
 S:	Supported
-F:	kernel/perf_event*.c
+F:	kernel/events/*
 F:	include/linux/perf_event.h
 F:	arch/*/kernel/perf_event*.c
 F:	arch/*/kernel/*/perf_event*.c
-- 
cgit v1.1


From 43bece79796c2a39ec98998fd3f1071f04f3d8c3 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 17 Aug 2011 18:42:07 +0800
Subject: perf tools: Add group event scheduling option to perf record/stat

Group event scheduling command line option is missing in perf
record/stat.

Add it to perf record/stat, which is same as in perf top.

Reported-by: Andi Kleen <andi@firstfloor.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1313577727.2754.5.camel@hp6530s
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 4 +++-
 tools/perf/builtin-stat.c   | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f6426b4..6b0519f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -45,7 +45,7 @@ static int			freq				=   1000;
 static int			output;
 static int			pipe_output			=      0;
 static const char		*output_name			= NULL;
-static int			group				=      0;
+static bool			group				=  false;
 static int			realtime_prio			=      0;
 static bool			nodelay				=  false;
 static bool			raw_samples			=  false;
@@ -753,6 +753,8 @@ const struct option record_options[] = {
 		    "child tasks do not inherit counters"),
 	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
 	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
+	OPT_BOOLEAN(0, "group", &group,
+		    "put the counters into a counter group"),
 	OPT_BOOLEAN('g', "call-graph", &call_graph,
 		    "do call-graph (stack chain/backtrace) recording"),
 	OPT_INCR('v', "verbose", &verbose,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1ad04ce..5deb17d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -193,6 +193,7 @@ static int			big_num_opt			=  -1;
 static const char		*cpu_list;
 static const char		*csv_sep			= NULL;
 static bool			csv_output			= false;
+static bool			group				= false;
 
 static volatile int done = 0;
 
@@ -280,14 +281,14 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 	attr->inherit = !no_inherit;
 
 	if (system_wide)
-		return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false);
+		return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, group);
 
 	if (target_pid == -1 && target_tid == -1) {
 		attr->disabled = 1;
 		attr->enable_on_exec = 1;
 	}
 
-	return perf_evsel__open_per_thread(evsel, evsel_list->threads, false);
+	return perf_evsel__open_per_thread(evsel, evsel_list->threads, group);
 }
 
 /*
@@ -1043,6 +1044,8 @@ static const struct option options[] = {
 		    "stat events on existing thread id"),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
+	OPT_BOOLEAN('g', "group", &group,
+		    "put the counters into a counter group"),
 	OPT_BOOLEAN('c', "scale", &scale,
 		    "scale/normalize counters"),
 	OPT_INCR('v', "verbose", &verbose,
-- 
cgit v1.1


From 3fe45aeaf2033c9eaa5028ed5ba68b466008876f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 18 Aug 2011 15:13:17 +0200
Subject: ALSA: hda - Add "PCM" volume to vmaster slave list

The new parser may use "PCM" volume, but it was missing the vmaster
slave list, thus "Master" volume didn't control it.

Reference: https://bugzilla.kernel.org/show_bug.cgi?id=41342

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 9a1aa09..fcb11af 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1784,6 +1784,7 @@ static const char * const alc_slave_vols[] = {
 	"Speaker Playback Volume",
 	"Mono Playback Volume",
 	"Line-Out Playback Volume",
+	"PCM Playback Volume",
 	NULL,
 };
 
@@ -1798,6 +1799,7 @@ static const char * const alc_slave_sws[] = {
 	"Mono Playback Switch",
 	"IEC958 Playback Switch",
 	"Line-Out Playback Switch",
+	"PCM Playback Switch",
 	NULL,
 };
 
-- 
cgit v1.1


From e574044acbad7421879270a80acd337459c94cc8 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@bitmer.com>
Date: Thu, 18 Aug 2011 15:02:47 +0300
Subject: ASoC: omap: Fix build errors in ams-delta

Fix "error: too few arguments to function 'ams_delta_set_bias_level'"
build errors in ams-delta.c that were introduced after commit d4c6005 ("ASoC:
Add context parameter to card DAPM callbacks") by adding dapm context
to ams_delta_set_bias_level calls.

Signed-off-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/omap/ams-delta.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sound/soc/omap/ams-delta.c b/sound/soc/omap/ams-delta.c
index 30fe0d0..0aa475f 100644
--- a/sound/soc/omap/ams-delta.c
+++ b/sound/soc/omap/ams-delta.c
@@ -514,7 +514,7 @@ static int ams_delta_cx20442_init(struct snd_soc_pcm_runtime *rtd)
 	}
 
 	/* Set codec bias level */
-	ams_delta_set_bias_level(card, SND_SOC_BIAS_STANDBY);
+	ams_delta_set_bias_level(card, dapm, SND_SOC_BIAS_STANDBY);
 
 	/* Add hook switch - can be used to control the codec from userspace
 	 * even if line discipline fails */
@@ -649,7 +649,9 @@ static void __exit ams_delta_module_exit(void)
 			ams_delta_hook_switch_gpios);
 
 	/* Keep modem power on */
-	ams_delta_set_bias_level(&ams_delta_audio_card, SND_SOC_BIAS_STANDBY);
+	ams_delta_set_bias_level(&ams_delta_audio_card,
+				 &ams_delta_audio_card.rtd[0].codec->dapm,
+				 SND_SOC_BIAS_STANDBY);
 
 	platform_device_unregister(cx20442_platform_device);
 	platform_device_unregister(ams_delta_audio_platform_device);
-- 
cgit v1.1


From c5114cd59d2664f258b0d021d79b1532d94bdc2b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 18 Aug 2011 21:29:27 -0700
Subject: vlan: reset headers on accel emulation path

It's after all necessary to do reset headers here. The reason is we
cannot depend that it gets reseted in __netif_receive_skb once skb is
reinjected. For incoming vlanids without vlan_dev, vlan_do_receive()
returns false with skb != NULL and __netif_reveive_skb continues, skb is
not reinjected.

This might be good material for 3.0-stable as well

Reported-by: Mike Auty <mike.auty@gmail.com>
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 5f27f8e..f1f2f7b 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
 	if (unlikely(!skb))
 		goto err_free;
 
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
 	return skb;
 
 err_free:
-- 
cgit v1.1


From 2d5b2c5ca0d3ebe707386b3add365496460cf918 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 18 Aug 2011 21:30:37 -0700
Subject: net: netdev-features.txt update to Documentation/networking/00-INDEX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update netdev-features.txt entry in 00-INDEX to incorporate
feedback by Michał Mirosław.

v2: restored tabs that were inadvertently changed to spaces in v1.
sorry for the error.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/00-INDEX | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 811252b..bbce121 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -135,7 +135,7 @@ multiqueue.txt
 netconsole.txt
 	- The network console module netconsole.ko: configuration and notes.
 netdev-features.txt
-	- Network interface "feature mess and how to get out from it alive".
+	- Network interface features API description.
 netdevices.txt
 	- info on network device driver functions exported to the kernel.
 netif-msg.txt
-- 
cgit v1.1


From 38b65190c6ab0be8ce7cff69e734ca5b5e7fa309 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 19 Aug 2011 07:55:10 +0200
Subject: ALSA: usb-audio - Fix missing mixer dB information

The recent fix for testing dB range at the mixer creation time seems
to cause regressions in some devices.  In such devices, reading the dB
info at probing time gives an error, thus both dBmin and dBmax are still
zero, and TLV flag isn't set although the later read of dB info succeeds.

This patch adds a workaround for such a case by assuming that the later
read will succeed.  In future, a similar test should be performed in a
case where a wrong dB range is seen even in the later read.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Cc: <stable@kernel.org>
---
 sound/usb/mixer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index c04d7c7..cdd19d7 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -152,6 +152,7 @@ static inline void check_mapped_dB(const struct usbmix_name_map *p,
 	if (p && p->dB) {
 		cval->dBmin = p->dB->min;
 		cval->dBmax = p->dB->max;
+		cval->initialized = 1;
 	}
 }
 
@@ -1092,7 +1093,7 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
 				" Switch" : " Volume");
 		if (control == UAC_FU_VOLUME) {
 			check_mapped_dB(map, cval);
-			if (cval->dBmin < cval->dBmax) {
+			if (cval->dBmin < cval->dBmax || !cval->initialized) {
 				kctl->tlv.c = mixer_vol_tlv;
 				kctl->vd[0].access |= 
 					SNDRV_CTL_ELEM_ACCESS_TLV_READ |
-- 
cgit v1.1


From 98e77438aed3cd3343cbb86825127b1d9d2bea33 Mon Sep 17 00:00:00 2001
From: Daniel Baluta <dbaluta@ixiacom.com>
Date: Fri, 19 Aug 2011 03:19:07 -0700
Subject: ipv6: Fix ipv6_getsockopt for IPV6_2292PKTOPTIONS

IPV6_2292PKTOPTIONS is broken for 32-bit applications running
in COMPAT mode on 64-bit kernels.

The same problem was fixed for IPv4 with the patch:
ipv4: Fix ip_getsockopt for IP_PKTOPTIONS,
commit dd23198e58cd35259dd09e8892bbdb90f1d57748

Signed-off-by: Sorin Dumitru <sdumitru@ixiacom.com>
Signed-off-by: Daniel Baluta <dbaluta@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9cb191e..147ede38 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
 }
 
 static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int __user *optlen)
+		    char __user *optval, int __user *optlen, unsigned flags)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	int len;
@@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 
 		msg.msg_control = optval;
 		msg.msg_controllen = len;
-		msg.msg_flags = 0;
+		msg.msg_flags = flags;
 
 		lock_sock(sk);
 		skb = np->pktoptions;
@@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 	if(level != SOL_IPV6)
 		return -ENOPROTOOPT;
 
-	err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+	err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
 	if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
@@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		return compat_mc_getsockopt(sk, level, optname, optval, optlen,
 			ipv6_getsockopt);
 
-	err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+	err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
+				 MSG_CMSG_COMPAT);
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
 	if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
-- 
cgit v1.1


From bb0822954aab7d23a3f902c2a103ee0242f6046e Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 16 Aug 2011 13:37:14 -0600
Subject: squeeze max-pause area and drop pass-good area

Revert the pass-good area introduced in ffd1f609ab10 ("writeback:
introduce max-pause and pass-good dirty limits") and make the max-pause
area smaller and safe.

This fixes ~30% performance regression in the ext3 data=writeback
fio_mmap_randwrite_64k/fio_mmap_randrw_64k test cases, where there are
12 JBOD disks, on each disk runs 8 concurrent tasks doing reads+writes.

Using deadline scheduler also has a regression, but not that big as CFQ,
so this suggests we have some write starvation.

The test logs show that

- the disks are sometimes under utilized

- global dirty pages sometimes rush high to the pass-good area for
  several hundred seconds, while in the mean time some bdi dirty pages
  drop to very low value (bdi_dirty << bdi_thresh).  Then suddenly the
  global dirty pages dropped under global dirty threshold and bdi_dirty
  rush very high (for example, 2 times higher than bdi_thresh). During
  which time balance_dirty_pages() is not called at all.

So the problems are

1) The random writes progress so slow that they break the assumption of
   the max-pause logic that "8 pages per 200ms is typically more than
   enough to curb heavy dirtiers".

2) The max-pause logic ignored task_bdi_thresh and thus opens the possibility
   for some bdi's to over dirty pages, leading to (bdi_dirty >> bdi_thresh)
   and then (bdi_thresh >> bdi_dirty) for others.

3) The higher max-pause/pass-good thresholds somehow leads to the bad
   swing of dirty pages.

The fix is to allow the task to slightly dirty over task_bdi_thresh, but
no way to exceed bdi_dirty and/or global dirty_thresh.

Tests show that it fixed the JBOD regression completely (both behavior
and performance), while still being able to cut down large pause times
in balance_dirty_pages() for single-disk cases.

Reported-by: Li Shaohua <shaohua.li@intel.com>
Tested-by: Li Shaohua <shaohua.li@intel.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/writeback.h | 11 -----------
 mm/page-writeback.c       | 15 ++-------------
 2 files changed, 2 insertions(+), 24 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index f1bfa12e..2b8963f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -12,15 +12,6 @@
  *
  *	(thresh - thresh/DIRTY_FULL_SCOPE, thresh)
  *
- * The 1/16 region above the global dirty limit will be put to maximum pauses:
- *
- *	(limit, limit + limit/DIRTY_MAXPAUSE_AREA)
- *
- * The 1/16 region above the max-pause region, dirty exceeded bdi's will be put
- * to loops:
- *
- *	(limit + limit/DIRTY_MAXPAUSE_AREA, limit + limit/DIRTY_PASSGOOD_AREA)
- *
  * Further beyond, all dirtier tasks will enter a loop waiting (possibly long
  * time) for the dirty pages to drop, unless written enough pages.
  *
@@ -31,8 +22,6 @@
  */
 #define DIRTY_SCOPE		8
 #define DIRTY_FULL_SCOPE	(DIRTY_SCOPE / 2)
-#define DIRTY_MAXPAUSE_AREA		16
-#define DIRTY_PASSGOOD_AREA		8
 
 /*
  * 4MB minimal write chunk size
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d196074..0e309cd 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -754,21 +754,10 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 * 200ms is typically more than enough to curb heavy dirtiers;
 		 * (b) the pause time limit makes the dirtiers more responsive.
 		 */
-		if (nr_dirty < dirty_thresh +
-			       dirty_thresh / DIRTY_MAXPAUSE_AREA &&
+		if (nr_dirty < dirty_thresh &&
+		    bdi_dirty < (task_bdi_thresh + bdi_thresh) / 2 &&
 		    time_after(jiffies, start_time + MAX_PAUSE))
 			break;
-		/*
-		 * pass-good area. When some bdi gets blocked (eg. NFS server
-		 * not responding), or write bandwidth dropped dramatically due
-		 * to concurrent reads, or dirty threshold suddenly dropped and
-		 * the dirty pages cannot be brought down anytime soon (eg. on
-		 * slow USB stick), at least let go of the good bdi's.
-		 */
-		if (nr_dirty < dirty_thresh +
-			       dirty_thresh / DIRTY_PASSGOOD_AREA &&
-		    bdi_dirty < bdi_thresh)
-			break;
 
 		/*
 		 * Increase the delay for each loop, up to our previous
-- 
cgit v1.1


From 63b37de12889b7b96463b7d6de6d3f3704486b91 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 16 Aug 2011 15:36:21 -0400
Subject: cpupower: fix Makefile typo

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 tools/power/cpupower/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 11521d2..edb021c 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -35,7 +35,7 @@ NLS ?=		true
 
 # Set the following to 'true' to build/install the
 # cpufreq-bench benchmarking tool
-CPUFRQ_BENCH ?= true
+CPUFREQ_BENCH ?= true
 
 # Prefix to the directories we're installing to
 DESTDIR ?=
@@ -139,7 +139,7 @@ ifeq ($(strip $(NLS)),true)
 	COMPILE_NLS += create-gmo
 endif
 
-ifeq ($(strip $(CPUFRQ_BENCH)),true)
+ifeq ($(strip $(CPUFREQ_BENCH)),true)
 	INSTALL_BENCH += install-bench
 	COMPILE_BENCH += compile-bench
 endif
-- 
cgit v1.1


From 47c336307a3680cfdf4adbe718d79f3fe66702ea Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 19 Aug 2011 17:00:02 +0200
Subject: cpupower: make NLS truly optional

Loosely based on a patch for cpufrequtils, submittted by
Sergey Dryabzhinsky <sergey.dryabzhinsky@gmail.com> and

signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 tools/power/cpupower/Makefile                | 1 +
 tools/power/cpupower/utils/helpers/helpers.h | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index edb021c..e8a03ac 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -137,6 +137,7 @@ CFLAGS +=	-pipe
 ifeq ($(strip $(NLS)),true)
 	INSTALL_NLS += install-gmo
 	COMPILE_NLS += create-gmo
+	CFLAGS += -DNLS
 endif
 
 ifeq ($(strip $(CPUFREQ_BENCH)),true)
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 7a83022..2747e73 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -16,11 +16,20 @@
 #include "helpers/bitmask.h"
 
 /* Internationalization ****************************/
+#ifdef NLS
+
 #define _(String) gettext(String)
 #ifndef gettext_noop
 #define gettext_noop(String) String
 #endif
 #define N_(String) gettext_noop(String)
+
+#else /* !NLS */
+
+#define _(String) String
+#define N_(String) String
+
+#endif
 /* Internationalization ****************************/
 
 extern int run_as_root;
-- 
cgit v1.1


From 498ca793d90aef8ad38a852a969c257f62832738 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sat, 6 Aug 2011 18:11:43 +0200
Subject: cpupower: use man(1) when calling "cpupower help subcommand"

Instead of printing something non-formatted to stdout, call
man(1) to show the man page for the proper subcommand.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 tools/power/cpupower/man/cpupower-frequency-info.1 |  6 +-
 tools/power/cpupower/man/cpupower-frequency-set.1  |  8 +-
 tools/power/cpupower/man/cpupower.1                | 14 ++--
 tools/power/cpupower/utils/builtin.h               |  7 --
 tools/power/cpupower/utils/cpufreq-info.c          | 42 +---------
 tools/power/cpupower/utils/cpufreq-set.c           | 29 +------
 tools/power/cpupower/utils/cpuidle-info.c          | 24 +-----
 tools/power/cpupower/utils/cpupower-info.c         | 20 +----
 tools/power/cpupower/utils/cpupower-set.c          | 25 +-----
 tools/power/cpupower/utils/cpupower.c              | 91 ++++++++++++----------
 .../cpupower/utils/idle_monitor/cpupower-monitor.c | 48 ++++--------
 11 files changed, 86 insertions(+), 228 deletions(-)

diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
index 3194811..bb60a8d 100644
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -1,10 +1,10 @@
-.TH "cpufreq-info" "1" "0.1" "Mattia Dongili" ""
+.TH "cpupower-frequency-info" "1" "0.1" "Mattia Dongili" ""
 .SH "NAME"
 .LP 
-cpufreq\-info \- Utility to retrieve cpufreq kernel information
+cpupower frequency\-info \- Utility to retrieve cpufreq kernel information
 .SH "SYNTAX"
 .LP 
-cpufreq\-info [\fIoptions\fP]
+cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP]
 .SH "DESCRIPTION"
 .LP 
 A small tool which prints out cpufreq information helpful to developers and interested users.
diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1
index 26e3e13..685f469 100644
--- a/tools/power/cpupower/man/cpupower-frequency-set.1
+++ b/tools/power/cpupower/man/cpupower-frequency-set.1
@@ -1,13 +1,13 @@
-.TH "cpufreq-set" "1" "0.1" "Mattia Dongili" ""
+.TH "cpupower-freqency-set" "1" "0.1" "Mattia Dongili" ""
 .SH "NAME"
 .LP 
-cpufreq\-set \- A small tool which allows to modify cpufreq settings.
+cpupower frequency\-set \- A small tool which allows to modify cpufreq settings.
 .SH "SYNTAX"
 .LP 
-cpufreq\-set [\fIoptions\fP]
+cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP]
 .SH "DESCRIPTION"
 .LP 
-cpufreq\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
+cpupower frequency\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
 .SH "OPTIONS"
 .LP 
 .TP 
diff --git a/tools/power/cpupower/man/cpupower.1 b/tools/power/cpupower/man/cpupower.1
index 78c20fe..baf741d 100644
--- a/tools/power/cpupower/man/cpupower.1
+++ b/tools/power/cpupower/man/cpupower.1
@@ -3,7 +3,7 @@
 cpupower \- Shows and sets processor power related values
 .SH SYNOPSIS
 .ft B
-.B cpupower [ \-c cpulist ] subcommand [ARGS]
+.B cpupower [ \-c cpulist ] <command> [ARGS]
 
 .B cpupower \-v|\-\-version
 
@@ -13,24 +13,24 @@ cpupower \- Shows and sets processor power related values
 \fBcpupower \fP is a collection of tools to examine and tune power saving
 related features of your processor.
 
-The manpages of the subcommands (cpupower\-<subcommand>(1)) provide detailed
+The manpages of the commands (cpupower\-<command>(1)) provide detailed
 descriptions of supported features. Run \fBcpupower help\fP to get an overview
-of supported subcommands.
+of supported commands.
 
 .SH Options
 .PP
 \-\-help, \-h
 .RS 4
-Shows supported subcommands and general usage.
+Shows supported commands and general usage.
 .RE
 .PP
 \-\-cpu cpulist,  \-c cpulist
 .RS 4
 Only show or set values for specific cores.
-This option is not supported by all subcommands, details can be found in the
-manpages of the subcommands.
+This option is not supported by all commands, details can be found in the
+manpages of the commands.
 
-Some subcommands access all cores (typically the *\-set commands), some only
+Some commands access all cores (typically the *\-set commands), some only
 the first core (typically the *\-info commands) by default.
 
 The syntax for <cpulist> is based on how the kernel exports CPU bitmasks via
diff --git a/tools/power/cpupower/utils/builtin.h b/tools/power/cpupower/utils/builtin.h
index c870ffb..c10496f 100644
--- a/tools/power/cpupower/utils/builtin.h
+++ b/tools/power/cpupower/utils/builtin.h
@@ -8,11 +8,4 @@ extern int cmd_freq_info(int argc, const char **argv);
 extern int cmd_idle_info(int argc, const char **argv);
 extern int cmd_monitor(int argc, const char **argv);
 
-extern void set_help(void);
-extern void info_help(void);
-extern void freq_set_help(void);
-extern void freq_info_help(void);
-extern void idle_info_help(void);
-extern void monitor_help(void);
-
 #endif
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index 5a1d25f..28953c9 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -510,37 +510,6 @@ static int get_latency(unsigned int cpu, unsigned int human)
 	return 0;
 }
 
-void freq_info_help(void)
-{
-	printf(_("Usage: cpupower freqinfo [options]\n"));
-	printf(_("Options:\n"));
-	printf(_("  -e, --debug          Prints out debug information [default]\n"));
-	printf(_("  -f, --freq           Get frequency the CPU currently runs at, according\n"
-	       "                       to the cpufreq core *\n"));
-	printf(_("  -w, --hwfreq         Get frequency the CPU currently runs at, by reading\n"
-	       "                       it from hardware (only available to root) *\n"));
-	printf(_("  -l, --hwlimits       Determine the minimum and maximum CPU frequency allowed *\n"));
-	printf(_("  -d, --driver         Determines the used cpufreq kernel driver *\n"));
-	printf(_("  -p, --policy         Gets the currently used cpufreq policy *\n"));
-	printf(_("  -g, --governors      Determines available cpufreq governors *\n"));
-	printf(_("  -r, --related-cpus   Determines which CPUs run at the same hardware frequency *\n"));
-	printf(_("  -a, --affected-cpus  Determines which CPUs need to have their frequency\n"
-			"                       coordinated by software *\n"));
-	printf(_("  -s, --stats          Shows cpufreq statistics if available\n"));
-	printf(_("  -y, --latency        Determines the maximum latency on CPU frequency changes *\n"));
-	printf(_("  -b, --boost          Checks for turbo or boost modes  *\n"));
-	printf(_("  -o, --proc           Prints out information like provided by the /proc/cpufreq\n"
-	       "                       interface in 2.4. and early 2.6. kernels\n"));
-	printf(_("  -m, --human          human-readable output for the -f, -w, -s and -y parameters\n"));
-	printf(_("  -h, --help           Prints out this screen\n"));
-
-	printf("\n");
-	printf(_("If no argument is given, full output about\n"
-	       "cpufreq is printed which is useful e.g. for reporting bugs.\n\n"));
-	printf(_("By default info of CPU 0 is shown which can be overridden\n"
-		 "with the cpupower --cpu main command option.\n"));
-}
-
 static struct option info_opts[] = {
 	{ .name = "debug",	.has_arg = no_argument,		.flag = NULL,	.val = 'e'},
 	{ .name = "boost",	.has_arg = no_argument,		.flag = NULL,	.val = 'b'},
@@ -556,7 +525,6 @@ static struct option info_opts[] = {
 	{ .name = "latency",	.has_arg = no_argument,		.flag = NULL,	.val = 'y'},
 	{ .name = "proc",	.has_arg = no_argument,		.flag = NULL,	.val = 'o'},
 	{ .name = "human",	.has_arg = no_argument,		.flag = NULL,	.val = 'm'},
-	{ .name = "help",	.has_arg = no_argument,		.flag = NULL,	.val = 'h'},
 	{ },
 };
 
@@ -570,16 +538,12 @@ int cmd_freq_info(int argc, char **argv)
 	int output_param = 0;
 
 	do {
-		ret = getopt_long(argc, argv, "hoefwldpgrasmyb", info_opts, NULL);
+		ret = getopt_long(argc, argv, "oefwldpgrasmyb", info_opts, NULL);
 		switch (ret) {
 		case '?':
 			output_param = '?';
 			cont = 0;
 			break;
-		case 'h':
-			output_param = 'h';
-			cont = 0;
-			break;
 		case -1:
 			cont = 0;
 			break;
@@ -642,11 +606,7 @@ int cmd_freq_info(int argc, char **argv)
 		return -EINVAL;
 	case '?':
 		printf(_("invalid or unknown argument\n"));
-		freq_info_help();
 		return -EINVAL;
-	case 'h':
-		freq_info_help();
-		return EXIT_SUCCESS;
 	case 'o':
 		proc_cpufreq_output();
 		return EXIT_SUCCESS;
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 5f78362..dd1539e 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -20,34 +20,11 @@
 
 #define NORM_FREQ_LEN 32
 
-void freq_set_help(void)
-{
-	printf(_("Usage: cpupower frequency-set [options]\n"));
-	printf(_("Options:\n"));
-	printf(_("  -d FREQ, --min FREQ      new minimum CPU frequency the governor may select\n"));
-	printf(_("  -u FREQ, --max FREQ      new maximum CPU frequency the governor may select\n"));
-	printf(_("  -g GOV, --governor GOV   new cpufreq governor\n"));
-	printf(_("  -f FREQ, --freq FREQ     specific frequency to be set. Requires userspace\n"
-	       "                           governor to be available and loaded\n"));
-	printf(_("  -r, --related            Switches all hardware-related CPUs\n"));
-	printf(_("  -h, --help               Prints out this screen\n"));
-	printf("\n");
-	printf(_("Notes:\n"
-	       "1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"));
-	printf(_("2. The -f FREQ, --freq FREQ parameter cannot be combined with any other parameter\n"
-	       "   except the -c CPU, --cpu CPU parameter\n"
-	       "3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
-	       "   by postfixing the value with the wanted unit name, without any space\n"
-	       "   (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"));
-
-}
-
 static struct option set_opts[] = {
 	{ .name = "min",	.has_arg = required_argument,	.flag = NULL,	.val = 'd'},
 	{ .name = "max",	.has_arg = required_argument,	.flag = NULL,	.val = 'u'},
 	{ .name = "governor",	.has_arg = required_argument,	.flag = NULL,	.val = 'g'},
 	{ .name = "freq",	.has_arg = required_argument,	.flag = NULL,	.val = 'f'},
-	{ .name = "help",	.has_arg = no_argument,		.flag = NULL,	.val = 'h'},
 	{ .name = "related",	.has_arg = no_argument,		.flag = NULL,	.val='r'},
 	{ },
 };
@@ -80,7 +57,6 @@ const struct freq_units def_units[] = {
 static void print_unknown_arg(void)
 {
 	printf(_("invalid or unknown argument\n"));
-	freq_set_help();
 }
 
 static unsigned long string_to_frequency(const char *str)
@@ -231,14 +207,11 @@ int cmd_freq_set(int argc, char **argv)
 
 	/* parameter parsing */
 	do {
-		ret = getopt_long(argc, argv, "d:u:g:f:hr", set_opts, NULL);
+		ret = getopt_long(argc, argv, "d:u:g:f:r", set_opts, NULL);
 		switch (ret) {
 		case '?':
 			print_unknown_arg();
 			return -EINVAL;
-		case 'h':
-			freq_set_help();
-			return 0;
 		case -1:
 			cont = 0;
 			break;
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 70da357..b028267 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -139,30 +139,14 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
 	}
 }
 
-/* --freq / -f */
-
-void idle_info_help(void)
-{
-	printf(_ ("Usage: cpupower idleinfo [options]\n"));
-	printf(_ ("Options:\n"));
-	printf(_ ("  -s, --silent         Only show general C-state information\n"));
-	printf(_ ("  -o, --proc           Prints out information like provided by the /proc/acpi/processor/*/power\n"
-	       "                       interface in older kernels\n"));
-	printf(_ ("  -h, --help           Prints out this screen\n"));
-
-	printf("\n");
-}
-
 static struct option info_opts[] = {
 	{ .name = "silent",	.has_arg = no_argument,	.flag = NULL,	.val = 's'},
 	{ .name = "proc",	.has_arg = no_argument,	.flag = NULL,	.val = 'o'},
-	{ .name = "help",	.has_arg = no_argument,	.flag = NULL,	.val = 'h'},
 	{ },
 };
 
 static inline void cpuidle_exit(int fail)
 {
-	idle_info_help();
 	exit(EXIT_FAILURE);
 }
 
@@ -174,7 +158,7 @@ int cmd_idle_info(int argc, char **argv)
 	unsigned int cpu = 0;
 
 	do {
-		ret = getopt_long(argc, argv, "hos", info_opts, NULL);
+		ret = getopt_long(argc, argv, "os", info_opts, NULL);
 		if (ret == -1)
 			break;
 		switch (ret) {
@@ -182,10 +166,6 @@ int cmd_idle_info(int argc, char **argv)
 			output_param = '?';
 			cont = 0;
 			break;
-		case 'h':
-			output_param = 'h';
-			cont = 0;
-			break;
 		case 's':
 			verbose = 0;
 			break;
@@ -211,8 +191,6 @@ int cmd_idle_info(int argc, char **argv)
 	case '?':
 		printf(_("invalid or unknown argument\n"));
 		cpuidle_exit(EXIT_FAILURE);
-	case 'h':
-		cpuidle_exit(EXIT_SUCCESS);
 	}
 
 	/* Default is: show output of CPU 0 only */
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
index 85253cb..3f68632 100644
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -16,31 +16,16 @@
 #include "helpers/helpers.h"
 #include "helpers/sysfs.h"
 
-void info_help(void)
-{
-	printf(_("Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"));
-	printf(_("Options:\n"));
-	printf(_("  -b, --perf-bias    Gets CPU's power vs performance policy on some\n"
-	       "                           Intel models [0-15], see manpage for details\n"));
-	printf(_("  -m, --sched-mc     Gets the kernel's multi core scheduler policy.\n"));
-	printf(_("  -s, --sched-smt    Gets the kernel's thread sibling scheduler policy.\n"));
-	printf(_("  -h, --help               Prints out this screen\n"));
-	printf(_("\nPassing no option will show all info, by default only on core 0\n"));
-	printf("\n");
-}
-
 static struct option set_opts[] = {
 	{ .name = "perf-bias",	.has_arg = optional_argument,	.flag = NULL,	.val = 'b'},
 	{ .name = "sched-mc",	.has_arg = optional_argument,	.flag = NULL,	.val = 'm'},
 	{ .name = "sched-smt",	.has_arg = optional_argument,	.flag = NULL,	.val = 's'},
-	{ .name = "help",	.has_arg = no_argument,		.flag = NULL,	.val = 'h'},
 	{ },
 };
 
 static void print_wrong_arg_exit(void)
 {
 	printf(_("invalid or unknown argument\n"));
-	info_help();
 	exit(EXIT_FAILURE);
 }
 
@@ -64,11 +49,8 @@ int cmd_info(int argc, char **argv)
 	textdomain(PACKAGE);
 
 	/* parameter parsing */
-	while ((ret = getopt_long(argc, argv, "msbh", set_opts, NULL)) != -1) {
+	while ((ret = getopt_long(argc, argv, "msb", set_opts, NULL)) != -1) {
 		switch (ret) {
-		case 'h':
-			info_help();
-			return 0;
 		case 'b':
 			if (params.perf_bias)
 				print_wrong_arg_exit();
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index bc1b391..dc4de37 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -17,30 +17,16 @@
 #include "helpers/sysfs.h"
 #include "helpers/bitmask.h"
 
-void set_help(void)
-{
-	printf(_("Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"));
-	printf(_("Options:\n"));
-	printf(_("  -b, --perf-bias [VAL]    Sets CPU's power vs performance policy on some\n"
-	       "                           Intel models [0-15], see manpage for details\n"));
-	printf(_("  -m, --sched-mc  [VAL]    Sets the kernel's multi core scheduler policy.\n"));
-	printf(_("  -s, --sched-smt [VAL]    Sets the kernel's thread sibling scheduler policy.\n"));
-	printf(_("  -h, --help               Prints out this screen\n"));
-	printf("\n");
-}
-
 static struct option set_opts[] = {
 	{ .name = "perf-bias",	.has_arg = optional_argument,	.flag = NULL,	.val = 'b'},
 	{ .name = "sched-mc",	.has_arg = optional_argument,	.flag = NULL,	.val = 'm'},
 	{ .name = "sched-smt",	.has_arg = optional_argument,	.flag = NULL,	.val = 's'},
-	{ .name = "help",	.has_arg = no_argument,		.flag = NULL,	.val = 'h'},
 	{ },
 };
 
 static void print_wrong_arg_exit(void)
 {
 	printf(_("invalid or unknown argument\n"));
-	set_help();
 	exit(EXIT_FAILURE);
 }
 
@@ -66,12 +52,9 @@ int cmd_set(int argc, char **argv)
 
 	params.params = 0;
 	/* parameter parsing */
-	while ((ret = getopt_long(argc, argv, "m:s:b:h",
+	while ((ret = getopt_long(argc, argv, "m:s:b:",
 						set_opts, NULL)) != -1) {
 		switch (ret) {
-		case 'h':
-			set_help();
-			return 0;
 		case 'b':
 			if (params.perf_bias)
 				print_wrong_arg_exit();
@@ -110,10 +93,8 @@ int cmd_set(int argc, char **argv)
 		}
 	};
 
-	if (!params.params) {
-		set_help();
-		return -EINVAL;
-	}
+	if (!params.params)
+		print_wrong_arg_exit();
 
 	if (params.sched_mc) {
 		ret = sysfs_set_sched("mc", sched_mc);
diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c
index 5844ae0..52bee59 100644
--- a/tools/power/cpupower/utils/cpupower.c
+++ b/tools/power/cpupower/utils/cpupower.c
@@ -11,6 +11,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <errno.h>
 
 #include "builtin.h"
 #include "helpers/helpers.h"
@@ -19,13 +20,12 @@
 struct cmd_struct {
 	const char *cmd;
 	int (*main)(int, const char **);
-	void (*usage)(void);
 	int needs_root;
 };
 
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 
-int cmd_help(int argc, const char **argv);
+static int cmd_help(int argc, const char **argv);
 
 /* Global cpu_info object available for all binaries
  * Info only retrieved from CPU 0
@@ -44,55 +44,66 @@ int be_verbose;
 static void print_help(void);
 
 static struct cmd_struct commands[] = {
-	{ "frequency-info",	cmd_freq_info,	freq_info_help,	0	},
-	{ "frequency-set",	cmd_freq_set,	freq_set_help,	1	},
-	{ "idle-info",		cmd_idle_info,	idle_info_help,	0	},
-	{ "set",		cmd_set,	set_help,	1	},
-	{ "info",		cmd_info,	info_help,	0	},
-	{ "monitor",		cmd_monitor,	monitor_help,	0	},
-	{ "help",		cmd_help,	print_help,	0	},
-	/*	{ "bench",	cmd_bench,	NULL,		1	}, */
+	{ "frequency-info",	cmd_freq_info,	0	},
+	{ "frequency-set",	cmd_freq_set,	1	},
+	{ "idle-info",		cmd_idle_info,	0	},
+	{ "set",		cmd_set,	1	},
+	{ "info",		cmd_info,	0	},
+	{ "monitor",		cmd_monitor,	0	},
+	{ "help",		cmd_help,	0	},
+	/*	{ "bench",	cmd_bench,	1	}, */
 };
 
-int cmd_help(int argc, const char **argv)
-{
-	unsigned int i;
-
-	if (argc > 1) {
-		for (i = 0; i < ARRAY_SIZE(commands); i++) {
-			struct cmd_struct *p = commands + i;
-			if (strcmp(p->cmd, argv[1]))
-				continue;
-			if (p->usage) {
-				p->usage();
-				return EXIT_SUCCESS;
-			}
-		}
-	}
-	print_help();
-	if (argc == 1)
-		return EXIT_SUCCESS; /* cpupower help */
-	return EXIT_FAILURE;
-}
-
 static void print_help(void)
 {
 	unsigned int i;
 
 #ifdef DEBUG
-	printf(_("cpupower [ -d ][ -c cpulist ] subcommand [ARGS]\n"));
-	printf(_("  -d, --debug      May increase output (stderr) on some subcommands\n"));
+	printf(_("Usage:\tcpupower [-d|--debug] [-c|--cpu cpulist ] <command> [<args>]\n"));
 #else
-	printf(_("cpupower [ -c cpulist ] subcommand [ARGS]\n"));
+	printf(_("Usage:\tcpupower [-c|--cpu cpulist ] <command> [<args>]\n"));
 #endif
-	printf(_("cpupower --version\n"));
-	printf(_("Supported subcommands are:\n"));
+	printf(_("Supported commands are:\n"));
 	for (i = 0; i < ARRAY_SIZE(commands); i++)
 		printf("\t%s\n", commands[i].cmd);
-	printf(_("\nSome subcommands can make use of the -c cpulist option.\n"));
-	printf(_("Look at the general cpupower manpage how to use it\n"));
-	printf(_("and read up the subcommand's manpage whether it is supported.\n"));
-	printf(_("\nUse cpupower help subcommand for getting help for above subcommands.\n"));
+	printf(_("\nNot all commands can make use of the -c cpulist option.\n"));
+	printf(_("\nUse 'cpupower help <command>' for getting help for above commands.\n"));
+}
+
+static int print_man_page(const char *subpage)
+{
+	int len;
+	char *page;
+
+	len = 10; /* enough for "cpupower-" */
+	if (subpage != NULL)
+		len += strlen(subpage);
+
+	page = malloc(len);
+	if (!page)
+		return -ENOMEM;
+
+	sprintf(page, "cpupower");
+	if ((subpage != NULL) && strcmp(subpage, "help")) {
+		strcat(page, "-");
+		strcat(page, subpage);
+	}
+
+	execlp("man", "man", page, NULL);
+
+	/* should not be reached */
+	return -EINVAL;
+}
+
+static int cmd_help(int argc, const char **argv)
+{
+	if (argc > 1) {
+		print_man_page(argv[1]); /* exits within execlp() */
+		return EXIT_FAILURE;
+	}
+
+	print_help();
+	return EXIT_SUCCESS;
 }
 
 static void print_version(void)
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index 6cb8d9e..0d6571e 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -43,6 +43,12 @@ static struct cpupower_topology cpu_top;
 /* ToDo: Document this in the manpage */
 static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
 
+static void print_wrong_arg_exit(void)
+{
+	printf(_("invalid or unknown argument\n"));
+	exit(EXIT_FAILURE);
+}
+
 long long timespec_diff_us(struct timespec start, struct timespec end)
 {
 	struct timespec temp;
@@ -56,21 +62,6 @@ long long timespec_diff_us(struct timespec start, struct timespec end)
 	return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000);
 }
 
-void monitor_help(void)
-{
-	printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] command\n"));
-	printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] [ -i interval_sec ]\n"));
-	printf(_("cpupower monitor: -l\n"));
-	printf(_("\t command: pass an arbitrary command to measure specific workload\n"));
-	printf(_("\t -i: time intervall to measure for in seconds (default 1)\n"));
-	printf(_("\t -l: list available CPU sleep monitors (for use with -m)\n"));
-	printf(_("\t -m: show specific CPU sleep monitors only (in same order)\n"));
-	printf(_("\t -h: print this help\n"));
-	printf("\n");
-	printf(_("only one of: -l, -m are allowed\nIf none of them is passed,"));
-	printf(_(" all supported monitors are shown\n"));
-}
-
 void print_n_spaces(int n)
 {
 	int x;
@@ -246,7 +237,6 @@ static void parse_monitor_param(char *param)
 	if (hits == 0) {
 		printf(_("No matching monitor found in %s, "
 			 "try -l option\n"), param);
-		monitor_help();
 		exit(EXIT_FAILURE);
 	}
 	/* Override detected/registerd monitors array with requested one */
@@ -343,37 +333,27 @@ static void cmdline(int argc, char *argv[])
 	int opt;
 	progname = basename(argv[0]);
 
-	while ((opt = getopt(argc, argv, "+hli:m:")) != -1) {
+	while ((opt = getopt(argc, argv, "+li:m:")) != -1) {
 		switch (opt) {
-		case 'h':
-			monitor_help();
-			exit(EXIT_SUCCESS);
 		case 'l':
-			if (mode) {
-				monitor_help();
-				exit(EXIT_FAILURE);
-			}
+			if (mode)
+				print_wrong_arg_exit();
 			mode = list;
 			break;
 		case 'i':
 			/* only allow -i with -m or no option */
-			if (mode && mode != show) {
-				monitor_help();
-				exit(EXIT_FAILURE);
-			}
+			if (mode && mode != show)
+				print_wrong_arg_exit();
 			interval = atoi(optarg);
 			break;
 		case 'm':
-			if (mode) {
-				monitor_help();
-				exit(EXIT_FAILURE);
-			}
+			if (mode)
+				print_wrong_arg_exit();
 			mode = show;
 			show_monitors_param = optarg;
 			break;
 		default:
-			monitor_help();
-			exit(EXIT_FAILURE);
+			print_wrong_arg_exit();
 		}
 	}
 	if (!mode)
-- 
cgit v1.1


From d555ab6bb321814853ca8a8d4e8e22d52e18a871 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Fri, 29 Jul 2011 21:11:43 -0700
Subject: max8998_charger: Needs module.h

power/max8998_charger.c uses interfaces from linux/module.h,
so it should include that file.  This fixes build errors.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/max8998_charger.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/power/max8998_charger.c b/drivers/power/max8998_charger.c
index cc21fa2..ef8efad 100644
--- a/drivers/power/max8998_charger.c
+++ b/drivers/power/max8998_charger.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
-- 
cgit v1.1


From 71aa79a8c2537eb07cd26b5e4dc43274a9c10692 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Mon, 1 Aug 2011 07:29:31 +0800
Subject: max8997_charger: Needs module.h

power/max8997_charger.c uses interfaces from linux/module.h,
so it should include that file.  This fixes build errors.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/max8997_charger.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/power/max8997_charger.c b/drivers/power/max8997_charger.c
index 7106b49..ffc5033 100644
--- a/drivers/power/max8997_charger.c
+++ b/drivers/power/max8997_charger.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
-- 
cgit v1.1


From 815efa1eab5b0c3e071e5d6df0cc2d7e0c7e6fd7 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Fri, 12 Aug 2011 17:55:18 +0300
Subject: s3c-adc-battery: Fix compilation error due to missing header
 (module.h)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add linux/module.h to fix this compilation error:

drivers/power/s3c_adc_battery.c:435:15: error: expected declaration specifiers or ‘...’ before string constant
drivers/power/s3c_adc_battery.c:435:1: warning: data definition has no type or storage class
drivers/power/s3c_adc_battery.c:435:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_AUTHOR’
drivers/power/s3c_adc_battery.c:435:15: warning: function declaration isn’t a prototype
drivers/power/s3c_adc_battery.c:436:20: error: expected declaration specifiers or ‘...’ before string constant
drivers/power/s3c_adc_battery.c:436:1: warning: data definition has no type or storage class
drivers/power/s3c_adc_battery.c:436:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_DESCRIPTION’
drivers/power/s3c_adc_battery.c:436:20: warning: function declaration isn’t a prototype
drivers/power/s3c_adc_battery.c:437:16: error: expected declaration specifiers or ‘...’ before string constant
drivers/power/s3c_adc_battery.c:437:1: warning: data definition has no type or storage class
drivers/power/s3c_adc_battery.c:437:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_LICENSE’
drivers/power/s3c_adc_battery.c:437:16: warning: function declaration isn’t a prototype
make[2]: *** [drivers/power/s3c_adc_battery.o] Error 1

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ian Lartey <ian@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/s3c_adc_battery.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/power/s3c_adc_battery.c b/drivers/power/s3c_adc_battery.c
index a675e31..d32d0d7 100644
--- a/drivers/power/s3c_adc_battery.c
+++ b/drivers/power/s3c_adc_battery.c
@@ -20,6 +20,7 @@
 #include <linux/s3c_adc_battery.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/module.h>
 
 #include <plat/adc.h>
 
-- 
cgit v1.1


From b095cd0a0ccdbc00c9fd99d90b22f8563687971f Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Fri, 12 Aug 2011 15:28:32 -0700
Subject: drm/i915: set GFX_MODE to pre-Ivybridge default value even on
 Ivybridge

Prior to Ivybridge, the GFX_MODE would default to 0x800, meaning that
MI_FLUSH would flush the TLBs in addition to the rest of the caches
indicated in the MI_FLUSH command.  However starting with Ivybridge, the
register defaults to 0x2800 out of reset, meaning that to invalidate the
TLB we need to use PIPE_CONTROL.  Since we're not doing that yet, go
back to the old default so things work.

v2: don't forget to actually *clear* the new bit

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_reg.h         | 4 ++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5baaef4..542453f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -375,6 +375,7 @@
 # define MI_FLUSH_ENABLE				(1 << 11)
 
 #define GFX_MODE	0x02520
+#define GFX_MODE_GEN7	0x0229c
 #define   GFX_RUN_LIST_ENABLE		(1<<15)
 #define   GFX_TLB_INVALIDATE_ALWAYS	(1<<13)
 #define   GFX_SURFACE_FAULT_ENABLE	(1<<12)
@@ -382,6 +383,9 @@
 #define   GFX_PSMI_GRANULARITY		(1<<10)
 #define   GFX_PPGTT_ENABLE		(1<<9)
 
+#define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit))
+#define GFX_MODE_DISABLE(bit) (((bit) << 16) | (0))
+
 #define SCPD0		0x0209c /* 915+ only */
 #define IER		0x020a0
 #define IIR		0x020a4
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 47b9b27..c30626e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -290,6 +290,10 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 		if (IS_GEN6(dev) || IS_GEN7(dev))
 			mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE;
 		I915_WRITE(MI_MODE, mode);
+		if (IS_GEN7(dev))
+			I915_WRITE(GFX_MODE_GEN7,
+				   GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
+				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
 	}
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-- 
cgit v1.1


From c956b753e706f24d18a026f8efa4df3b1919fcc9 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@ti.com>
Date: Fri, 19 Aug 2011 16:59:39 -0600
Subject: OMAP: powerdomains: Make all powerdomain target states as ON at init

Program all powerdomain target state as ON; this is to prevent domains
from hitting low power states (if bootloader has target states set to
something other than ON) and potentially even losing context while PM
is not fully initialized, which can cause the system to crash.  The PM
late init code can then program the desired target state for all the
power domains.

Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
[paul@pwsan.com: dropped comment typo hunk; fixed comment indent and moved
 to kerneldoc; moved code to pwrdm_init(); changed pwrdm_init() argument name
 to prevent clash; cleaned up patch description]
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/powerdomain.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c
index 9af0847..ef71fdd 100644
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -195,28 +195,35 @@ static int _pwrdm_post_transition_cb(struct powerdomain *pwrdm, void *unused)
 
 /**
  * pwrdm_init - set up the powerdomain layer
- * @pwrdm_list: array of struct powerdomain pointers to register
+ * @pwrdms: array of struct powerdomain pointers to register
  * @custom_funcs: func pointers for arch specific implementations
  *
- * Loop through the array of powerdomains @pwrdm_list, registering all
- * that are available on the current CPU. If pwrdm_list is supplied
- * and not null, all of the referenced powerdomains will be
- * registered.  No return value.  XXX pwrdm_list is not really a
- * "list"; it is an array.  Rename appropriately.
+ * Loop through the array of powerdomains @pwrdms, registering all
+ * that are available on the current CPU.  Also, program all
+ * powerdomain target state as ON; this is to prevent domains from
+ * hitting low power states (if bootloader has target states set to
+ * something other than ON) and potentially even losing context while
+ * PM is not fully initialized.  The PM late init code can then program
+ * the desired target state for all the power domains.  No return
+ * value.
  */
-void pwrdm_init(struct powerdomain **pwrdm_list, struct pwrdm_ops *custom_funcs)
+void pwrdm_init(struct powerdomain **pwrdms, struct pwrdm_ops *custom_funcs)
 {
 	struct powerdomain **p = NULL;
+	struct powerdomain *temp_p;
 
 	if (!custom_funcs)
 		WARN(1, "powerdomain: No custom pwrdm functions registered\n");
 	else
 		arch_pwrdm = custom_funcs;
 
-	if (pwrdm_list) {
-		for (p = pwrdm_list; *p; p++)
+	if (pwrdms) {
+		for (p = pwrdms; *p; p++)
 			_pwrdm_register(*p);
 	}
+
+	list_for_each_entry(temp_p, &pwrdm_list, node)
+		pwrdm_set_next_pwrst(temp_p, PWRDM_POWER_ON);
 }
 
 /**
-- 
cgit v1.1


From b1cbdb00da2ac00eb67fe277e563ff1f5093b4ba Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Fri, 19 Aug 2011 16:59:39 -0600
Subject: OMAP: clockdomain: Wait for powerdomain to be ON when using
 clockdomain force wakeup

While using clockdomain force wakeup method, not waiting for powerdomain
to be effectively ON may end up locking the clockdomain FSM until a
next wakeup event occurs.

One such issue was seen on OMAP4430, where L4_PER was periodically
getting stuck in in-transition state when transitioning from from OSWR to ON.

This issue was reported and investigated by Patrick Titiano <p-titiano@ti.com>

Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Rajendra Nayak <rnayak@ti.com>
Reported-by: Patrick Titiano <p-titiano@ti.com>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Benoit Cousson <b-cousson@ti.com>
Cc: Paul Walmsley <paul@pwsan.com>
[paul@pwsan.com: updated to apply; added transition wait on clkdm_deny_idle();
 remove two superfluous pwrdm_wait_transition() calls]
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/clockdomain.c | 2 ++
 arch/arm/mach-omap2/pm.c          | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c
index ab7db08..8f08906 100644
--- a/arch/arm/mach-omap2/clockdomain.c
+++ b/arch/arm/mach-omap2/clockdomain.c
@@ -747,6 +747,7 @@ int clkdm_wakeup(struct clockdomain *clkdm)
 	spin_lock_irqsave(&clkdm->lock, flags);
 	clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED;
 	ret = arch_clkdm->clkdm_wakeup(clkdm);
+	ret |= pwrdm_state_switch(clkdm->pwrdm.ptr);
 	spin_unlock_irqrestore(&clkdm->lock, flags);
 	return ret;
 }
@@ -818,6 +819,7 @@ void clkdm_deny_idle(struct clockdomain *clkdm)
 	spin_lock_irqsave(&clkdm->lock, flags);
 	clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED;
 	arch_clkdm->clkdm_deny_idle(clkdm);
+	pwrdm_state_switch(clkdm->pwrdm.ptr);
 	spin_unlock_irqrestore(&clkdm->lock, flags);
 }
 
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 3feb359..472bf22 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -130,7 +130,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state)
 		} else {
 			hwsup = clkdm_in_hwsup(pwrdm->pwrdm_clkdms[0]);
 			clkdm_wakeup(pwrdm->pwrdm_clkdms[0]);
-			pwrdm_wait_transition(pwrdm);
 			sleep_switch = FORCEWAKEUP_SWITCH;
 		}
 	}
@@ -156,7 +155,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state)
 		return ret;
 	}
 
-	pwrdm_wait_transition(pwrdm);
 	pwrdm_state_switch(pwrdm);
 err:
 	return ret;
-- 
cgit v1.1


From dccaf33fa37a1bc5d651baeb3bfeb6becb86597b Mon Sep 17 00:00:00 2001
From: Jiaying Zhang <jiayingz@google.com>
Date: Fri, 19 Aug 2011 19:13:32 -0400
Subject: ext4: flush any pending end_io requests before DIO reads
 w/dioread_nolock

There is a race between ext4 buffer write and direct_IO read with
dioread_nolock mount option enabled. The problem is that we clear
PageWriteback flag during end_io time but will do
uninitialized-to-initialized extent conversion later with dioread_nolock.
If an O_direct read request comes in during this period, ext4 will return
zero instead of the recently written data.

This patch checks whether there are any pending uninitialized-to-initialized
extent conversion requests before doing O_direct read to close the race.
Note that this is just a bandaid fix. The fundamental issue is that we
clear PageWriteback flag before we really complete an IO, which is
problem-prone. To fix the fundamental issue, we may need to implement an
extent tree cache that we can use to look up pending to-be-converted extents.

Signed-off-by: Jiaying Zhang <jiayingz@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/indirect.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index b8602cd..0962642 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -800,12 +800,17 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 retry:
-	if (rw == READ && ext4_should_dioread_nolock(inode))
+	if (rw == READ && ext4_should_dioread_nolock(inode)) {
+		if (unlikely(!list_empty(&ei->i_completed_io_list))) {
+			mutex_lock(&inode->i_mutex);
+			ext4_flush_completed_IO(inode);
+			mutex_unlock(&inode->i_mutex);
+		}
 		ret = __blockdev_direct_IO(rw, iocb, inode,
 				 inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
 				 ext4_get_block, NULL, NULL, 0);
-	else {
+	} else {
 		ret = blockdev_direct_IO(rw, iocb, inode, iov,
 				 offset, nr_segs, ext4_get_block);
 
-- 
cgit v1.1


From b6acf013bdc6f6ff9643030add85832d44034a28 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sat, 20 Aug 2011 09:14:45 +0200
Subject: ALSA: hda - Don't spew too many ELD errors

Currently HD-audio driver shows the all error ELD byte as an error
in the kernel message.  This is annoying when the video driver doesn't
set the correct ELD from the beginning. e.g. radeon sends a zero-byte
data, but we still check ELD with the fixed 128 byte as a workaround
for some broken devices, it spews 128-times errors.

For avoiding this, the driver aborts reading when the first byte is
invalid.  In such a case, the whole data is certainly invalid.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_eld.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c
index 28ce17d..c34f730 100644
--- a/sound/pci/hda/hda_eld.c
+++ b/sound/pci/hda/hda_eld.c
@@ -144,25 +144,17 @@ static int cea_sampling_frequencies[8] = {
 	SNDRV_PCM_RATE_192000,	/* 7: 192000Hz */
 };
 
-static unsigned char hdmi_get_eld_byte(struct hda_codec *codec, hda_nid_t nid,
+static unsigned int hdmi_get_eld_data(struct hda_codec *codec, hda_nid_t nid,
 					int byte_index)
 {
 	unsigned int val;
 
 	val = snd_hda_codec_read(codec, nid, 0,
 					AC_VERB_GET_HDMI_ELDD, byte_index);
-
 #ifdef BE_PARANOID
 	printk(KERN_INFO "HDMI: ELD data byte %d: 0x%x\n", byte_index, val);
 #endif
-
-	if ((val & AC_ELDD_ELD_VALID) == 0) {
-		snd_printd(KERN_INFO "HDMI: invalid ELD data byte %d\n",
-								byte_index);
-		val = 0;
-	}
-
-	return val & AC_ELDD_ELD_DATA;
+	return val;
 }
 
 #define GRAB_BITS(buf, byte, lowbit, bits) 		\
@@ -344,11 +336,26 @@ int snd_hdmi_get_eld(struct hdmi_eld *eld,
 	if (!buf)
 		return -ENOMEM;
 
-	for (i = 0; i < size; i++)
-		buf[i] = hdmi_get_eld_byte(codec, nid, i);
+	for (i = 0; i < size; i++) {
+		unsigned int val = hdmi_get_eld_data(codec, nid, i);
+		if (!(val & AC_ELDD_ELD_VALID)) {
+			if (!i) {
+				snd_printd(KERN_INFO
+					   "HDMI: invalid ELD data\n");
+				ret = -EINVAL;
+				goto error;
+			}
+			snd_printd(KERN_INFO
+				  "HDMI: invalid ELD data byte %d\n", i);
+			val = 0;
+		} else
+			val &= AC_ELDD_ELD_DATA;
+		buf[i] = val;
+	}
 
 	ret = hdmi_update_eld(eld, buf, size);
 
+error:
 	kfree(buf);
 	return ret;
 }
-- 
cgit v1.1


From 1b004d03d8670bdd871e0f297ed20bc510e404de Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sat, 20 Aug 2011 09:19:59 +0200
Subject: ALSA: hda - Fix error check from snd_hda_get_conn_index() in
 patch_cirrus.c

snd_hda_get_conn_index() returns a negative value while the current code
stores it in an unsigned int.  It must be stored in a signed integer.

Reported-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_cirrus.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 47d6ffc..d6c93d9 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -375,7 +375,7 @@ static int is_ext_mic(struct hda_codec *codec, unsigned int idx)
 static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin,
 			 unsigned int *idxp)
 {
-	int i;
+	int i, idx;
 	hda_nid_t nid;
 
 	nid = codec->start_nid;
@@ -384,9 +384,11 @@ static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin,
 		type = get_wcaps_type(get_wcaps(codec, nid));
 		if (type != AC_WID_AUD_IN)
 			continue;
-		*idxp = snd_hda_get_conn_index(codec, nid, pin, false);
-		if (*idxp >= 0)
+		idx = snd_hda_get_conn_index(codec, nid, pin, false);
+		if (idx >= 0) {
+			*idxp = idx;
 			return nid;
+		}
 	}
 	return 0;
 }
-- 
cgit v1.1


From de75577c8c3ab733f808c65e1a9d55882efde68e Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 20 Aug 2011 08:12:41 +0200
Subject: ALSA: sound/aoa/fabrics/layout.c: remove unneeded kfree

The label outnodev is only used when kzalloc has not yet taken place or has
failed, so there is no need for the call for kfree under this label.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier x;
expression E1!=0,E2,E3,E4;
statement S;
iterator I;
@@

(
if (...) { ... when != kfree(x)
               when != x = E3
               when != E3 = x
*  return ...;
 }
... when != x = E2
    when != I(...,x,...) S
if (...) { ... when != x = E4
 kfree(x); ... return ...; }
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/aoa/fabrics/layout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c
index 3fd1a7e..552b97a 100644
--- a/sound/aoa/fabrics/layout.c
+++ b/sound/aoa/fabrics/layout.c
@@ -1073,10 +1073,10 @@ static int aoa_fabric_layout_probe(struct soundbus_dev *sdev)
 	sdev->pcmid = -1;
 	list_del(&ldev->list);
 	layouts_list_items--;
+	kfree(ldev);
  outnodev:
  	of_node_put(sound);
  	layout_device = NULL;
- 	kfree(ldev);
 	return -ENODEV;
 }
 
-- 
cgit v1.1


From fbe5e29ec1886967255e76946aaf537b8cc9b81e Mon Sep 17 00:00:00 2001
From: Daniel Schwierzeck <daniel.schwierzeck@googlemail.com>
Date: Fri, 19 Aug 2011 12:04:20 +0000
Subject: atm: br2684: Fix oops due to skb->dev being NULL

This oops have been already fixed with commit

    27141666b69f535a4d63d7bc6d9e84ee5032f82a

    atm: [br2684] Fix oops due to skb->dev being NULL

    It happens that if a packet arrives in a VC between the call to open it on
    the hardware and the call to change the backend to br2684, br2684_regvcc
    processes the packet and oopses dereferencing skb->dev because it is
    NULL before the call to br2684_push().

but have been introduced again with commit

    b6211ae7f2e56837c6a4849316396d1535606e90

    atm: Use SKB queue and list helpers instead of doing it by-hand.

Signed-off-by: Daniel Schwierzeck <daniel.schwierzeck@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 52cfd0c..d07223c 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -558,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	skb_queue_walk_safe(&queue, skb, tmp) {
-		struct net_device *dev = skb->dev;
+		struct net_device *dev;
+
+		br2684_push(atmvcc, skb);
+		dev = skb->dev;
 
 		dev->stats.rx_bytes -= skb->len;
 		dev->stats.rx_packets--;
-
-		br2684_push(atmvcc, skb);
 	}
 
 	/* initialize netdev carrier state */
-- 
cgit v1.1


From d70d43d7d719ab709af7df109e706e804fe21834 Mon Sep 17 00:00:00 2001
From: Jiejing Zhang <jiejing.zhang@freescale.com>
Date: Sat, 20 Aug 2011 14:38:01 -0700
Subject: Input: max11801_ts - correct license statement

The original license statement was confusing since it was unclear if
the license was pure GPLv2 or GPLv2+ and did not match the license
of the driver max11801_ts was derived from. The license is GPLv2+.

Signed-off-by: Jiejing Zhang <jiejing.zhang@freescale.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/max11801_ts.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/input/touchscreen/max11801_ts.c b/drivers/input/touchscreen/max11801_ts.c
index 4f2713d..4627fe5 100644
--- a/drivers/input/touchscreen/max11801_ts.c
+++ b/drivers/input/touchscreen/max11801_ts.c
@@ -9,7 +9,8 @@
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  */
 
 /*
-- 
cgit v1.1


From 5598473a5b40c47a8c5349dd2c2630797169cf1a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 20 Aug 2011 17:14:54 -0700
Subject: sparc: Allow handling signals when stack is corrupted.

If we can't push the pending register windows onto the user's stack,
we disallow signal delivery even if the signal would be delivered on a
valid seperate signal stack.

Add a register window save area in the signal frame, and store any
unsavable windows there.

On sigreturn, if any windows are still queued up in the signal frame,
try to push them back onto the stack and if that fails we kill the
process immediately.

This allows the debug/tst-longjmp_chk2 glibc test case to pass.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/sigcontext.h |  14 +++
 arch/sparc/kernel/Makefile          |   1 +
 arch/sparc/kernel/signal32.c        | 184 ++++++++++++++++++++----------------
 arch/sparc/kernel/signal_32.c       | 172 ++++++++++++++++-----------------
 arch/sparc/kernel/signal_64.c       | 108 +++++++++------------
 arch/sparc/kernel/sigutil.h         |   9 ++
 arch/sparc/kernel/sigutil_32.c      | 120 +++++++++++++++++++++++
 arch/sparc/kernel/sigutil_64.c      |  93 ++++++++++++++++++
 8 files changed, 468 insertions(+), 233 deletions(-)
 create mode 100644 arch/sparc/kernel/sigutil.h
 create mode 100644 arch/sparc/kernel/sigutil_32.c
 create mode 100644 arch/sparc/kernel/sigutil_64.c

diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h
index a1607d18..69914d7 100644
--- a/arch/sparc/include/asm/sigcontext.h
+++ b/arch/sparc/include/asm/sigcontext.h
@@ -45,6 +45,19 @@ typedef struct {
 	int			si_mask;
 } __siginfo32_t;
 
+#define __SIGC_MAXWIN	7
+
+typedef struct {
+	unsigned long locals[8];
+	unsigned long ins[8];
+} __siginfo_reg_window;
+
+typedef struct {
+	int			wsaved;
+	__siginfo_reg_window	reg_window[__SIGC_MAXWIN];
+	unsigned long		rwbuf_stkptrs[__SIGC_MAXWIN];
+} __siginfo_rwin_t;
+
 #ifdef CONFIG_SPARC64
 typedef struct {
 	unsigned   int si_float_regs [64];
@@ -73,6 +86,7 @@ struct sigcontext {
 		unsigned long	ss_size;
 	}			sigc_stack;
 	unsigned long		sigc_mask;
+	__siginfo_rwin_t *	sigc_rwin_save;
 };
 
 #else
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index b90b4a1..cb85458 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_SPARC32)   += sun4m_irq.o sun4c_irq.o sun4d_irq.o
 
 obj-y                   += process_$(BITS).o
 obj-y                   += signal_$(BITS).o
+obj-y                   += sigutil_$(BITS).o
 obj-$(CONFIG_SPARC32)   += ioport.o
 obj-y                   += setup_$(BITS).o
 obj-y                   += idprom.o
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 75fad42..1ba95af 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -29,6 +29,8 @@
 #include <asm/visasm.h>
 #include <asm/compat_signal.h>
 
+#include "sigutil.h"
+
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 /* This magic should be in g_upper[0] for all upper parts
@@ -44,14 +46,14 @@ typedef struct {
 struct signal_frame32 {
 	struct sparc_stackf32	ss;
 	__siginfo32_t		info;
-	/* __siginfo_fpu32_t * */ u32 fpu_save;
+	/* __siginfo_fpu_t * */ u32 fpu_save;
 	unsigned int		insns[2];
 	unsigned int		extramask[_COMPAT_NSIG_WORDS - 1];
 	unsigned int		extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
 	/* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
 	siginfo_extra_v8plus_t	v8plus;
-	__siginfo_fpu_t		fpu_state;
-};
+	/* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
 
 typedef struct compat_siginfo{
 	int si_signo;
@@ -110,18 +112,14 @@ struct rt_signal_frame32 {
 	compat_siginfo_t	info;
 	struct pt_regs32	regs;
 	compat_sigset_t		mask;
-	/* __siginfo_fpu32_t * */ u32 fpu_save;
+	/* __siginfo_fpu_t * */ u32 fpu_save;
 	unsigned int		insns[2];
 	stack_t32		stack;
 	unsigned int		extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
 	/* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
 	siginfo_extra_v8plus_t	v8plus;
-	__siginfo_fpu_t		fpu_state;
-};
-
-/* Align macros */
-#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame32) + 15) & (~15)))
-#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame32) + 15) & (~15)))
+	/* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
 {
@@ -192,30 +190,13 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 	return 0;
 }
 
-static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	unsigned long *fpregs = current_thread_info()->fpregs;
-	unsigned long fprs;
-	int err;
-	
-	err = __get_user(fprs, &fpu->si_fprs);
-	fprs_write(0);
-	regs->tstate &= ~TSTATE_PEF;
-	if (fprs & FPRS_DL)
-		err |= copy_from_user(fpregs, &fpu->si_float_regs[0], (sizeof(unsigned int) * 32));
-	if (fprs & FPRS_DU)
-		err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], (sizeof(unsigned int) * 32));
-	err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-	err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-	current_thread_info()->fpsaved[0] |= fprs;
-	return err;
-}
-
 void do_sigreturn32(struct pt_regs *regs)
 {
 	struct signal_frame32 __user *sf;
+	compat_uptr_t fpu_save;
+	compat_uptr_t rwin_save;
 	unsigned int psr;
-	unsigned pc, npc, fpu_save;
+	unsigned pc, npc;
 	sigset_t set;
 	unsigned seta[_COMPAT_NSIG_WORDS];
 	int err, i;
@@ -273,8 +254,13 @@ void do_sigreturn32(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-	if (fpu_save)
-		err |= restore_fpu_state32(regs, &sf->fpu_state);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, compat_ptr(fpu_save));
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(compat_ptr(rwin_save)))
+			goto segv;
+	}
 	err |= __get_user(seta[0], &sf->info.si_mask);
 	err |= copy_from_user(seta+1, &sf->extramask,
 			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
@@ -300,7 +286,9 @@ segv:
 asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 {
 	struct rt_signal_frame32 __user *sf;
-	unsigned int psr, pc, npc, fpu_save, u_ss_sp;
+	unsigned int psr, pc, npc, u_ss_sp;
+	compat_uptr_t fpu_save;
+	compat_uptr_t rwin_save;
 	mm_segment_t old_fs;
 	sigset_t set;
 	compat_sigset_t seta;
@@ -359,8 +347,8 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-	if (fpu_save)
-		err |= restore_fpu_state32(regs, &sf->fpu_state);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, compat_ptr(fpu_save));
 	err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t));
 	err |= __get_user(u_ss_sp, &sf->stack.ss_sp);
 	st.ss_sp = compat_ptr(u_ss_sp);
@@ -376,6 +364,12 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 	do_sigaltstack((stack_t __user *) &st, NULL, (unsigned long)sf);
 	set_fs(old_fs);
 	
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(compat_ptr(rwin_save)))
+			goto segv;
+	}
+
 	switch (_NSIG_WORDS) {
 		case 4: set.sig[3] = seta.sig[6] + (((long)seta.sig[7]) << 32);
 		case 3: set.sig[2] = seta.sig[4] + (((long)seta.sig[5]) << 32);
@@ -433,26 +427,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
 	return (void __user *) sp;
 }
 
-static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	unsigned long *fpregs = current_thread_info()->fpregs;
-	unsigned long fprs;
-	int err = 0;
-	
-	fprs = current_thread_info()->fpsaved[0];
-	if (fprs & FPRS_DL)
-		err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-				    (sizeof(unsigned int) * 32));
-	if (fprs & FPRS_DU)
-		err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-				    (sizeof(unsigned int) * 32));
-	err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-	err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-	err |= __put_user(fprs, &fpu->si_fprs);
-
-	return err;
-}
-
 /* The I-cache flush instruction only works in the primary ASI, which
  * right now is the nucleus, aka. kernel space.
  *
@@ -515,18 +489,23 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 			 int signo, sigset_t *oldset)
 {
 	struct signal_frame32 __user *sf;
+	int i, err, wsaved;
+	void __user *tail;
 	int sigframe_size;
 	u32 psr;
-	int i, err;
 	unsigned int seta[_COMPAT_NSIG_WORDS];
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 	save_and_clear_fpu();
 	
-	sigframe_size = SF_ALIGNEDSZ;
-	if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = get_thread_wsaved();
+
+	sigframe_size = sizeof(*sf);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
 
 	sf = (struct signal_frame32 __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
@@ -534,8 +513,7 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	if (invalid_frame_pointer(sf, sigframe_size))
 		goto sigill;
 
-	if (get_thread_wsaved() != 0)
-		goto sigill;
+	tail = (sf + 1);
 
 	/* 2. Save the current process state */
 	if (test_thread_flag(TIF_32BIT)) {
@@ -560,11 +538,22 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 			  &sf->v8plus.asi);
 
 	if (psr & PSR_EF) {
-		err |= save_fpu_state32(regs, &sf->fpu_state);
-		err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user((u64)fp, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user((u64)rwp, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 
 	switch (_NSIG_WORDS) {
 	case 4: seta[7] = (oldset->sig[3] >> 32);
@@ -580,10 +569,21 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __copy_to_user(sf->extramask, seta + 1,
 			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
 
-	err |= copy_in_user((u32 __user *)sf,
-			    (u32 __user *)(regs->u_regs[UREG_FP]),
-			    sizeof(struct reg_window32));
-	
+	if (!wsaved) {
+		err |= copy_in_user((u32 __user *)sf,
+				    (u32 __user *)(regs->u_regs[UREG_FP]),
+				    sizeof(struct reg_window32));
+	} else {
+		struct reg_window *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		for (i = 0; i < 8; i++)
+			err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+		for (i = 0; i < 6; i++)
+			err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+		err |= __put_user(rp->ins[6], &sf->ss.fp);
+		err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+	}	
 	if (err)
 		goto sigsegv;
 
@@ -613,7 +613,6 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 		err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/
 		if (err)
 			goto sigsegv;
-
 		flush_signal_insns(address);
 	}
 	return 0;
@@ -632,18 +631,23 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 			    siginfo_t *info)
 {
 	struct rt_signal_frame32 __user *sf;
+	int i, err, wsaved;
+	void __user *tail;
 	int sigframe_size;
 	u32 psr;
-	int i, err;
 	compat_sigset_t seta;
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 	save_and_clear_fpu();
 	
-	sigframe_size = RT_ALIGNEDSZ;
-	if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = get_thread_wsaved();
+
+	sigframe_size = sizeof(*sf);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
 
 	sf = (struct rt_signal_frame32 __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
@@ -651,8 +655,7 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	if (invalid_frame_pointer(sf, sigframe_size))
 		goto sigill;
 
-	if (get_thread_wsaved() != 0)
-		goto sigill;
+	tail = (sf + 1);
 
 	/* 2. Save the current process state */
 	if (test_thread_flag(TIF_32BIT)) {
@@ -677,11 +680,22 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 			  &sf->v8plus.asi);
 
 	if (psr & PSR_EF) {
-		err |= save_fpu_state32(regs, &sf->fpu_state);
-		err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user((u64)fp, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user((u64)rwp, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 
 	/* Update the siginfo structure.  */
 	err |= copy_siginfo_to_user32(&sf->info, info);
@@ -703,9 +717,21 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	}
 	err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t));
 
-	err |= copy_in_user((u32 __user *)sf,
-			    (u32 __user *)(regs->u_regs[UREG_FP]),
-			    sizeof(struct reg_window32));
+	if (!wsaved) {
+		err |= copy_in_user((u32 __user *)sf,
+				    (u32 __user *)(regs->u_regs[UREG_FP]),
+				    sizeof(struct reg_window32));
+	} else {
+		struct reg_window *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		for (i = 0; i < 8; i++)
+			err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+		for (i = 0; i < 6; i++)
+			err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+		err |= __put_user(rp->ins[6], &sf->ss.fp);
+		err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+	}
 	if (err)
 		goto sigsegv;
 	
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 5e5c5fd..04ede8f 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -26,6 +26,8 @@
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>	/* flush_sig_insns */
 
+#include "sigutil.h"
+
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
@@ -39,8 +41,8 @@ struct signal_frame {
 	unsigned long		insns[2] __attribute__ ((aligned (8)));
 	unsigned int		extramask[_NSIG_WORDS - 1];
 	unsigned int		extra_size; /* Should be 0 */
-	__siginfo_fpu_t		fpu_state;
-};
+	__siginfo_rwin_t __user	*rwin_save;
+} __attribute__((aligned(8)));
 
 struct rt_signal_frame {
 	struct sparc_stackf	ss;
@@ -51,8 +53,8 @@ struct rt_signal_frame {
 	unsigned int		insns[2];
 	stack_t			stack;
 	unsigned int		extra_size; /* Should be 0 */
-	__siginfo_fpu_t		fpu_state;
-};
+	__siginfo_rwin_t __user	*rwin_save;
+} __attribute__((aligned(8)));
 
 /* Align macros */
 #define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
@@ -79,43 +81,13 @@ asmlinkage int sys_sigsuspend(old_sigset_t set)
 	return _sigpause_common(set);
 }
 
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	int err;
-#ifdef CONFIG_SMP
-	if (test_tsk_thread_flag(current, TIF_USEDFPU))
-		regs->psr &= ~PSR_EF;
-#else
-	if (current == last_task_used_math) {
-		last_task_used_math = NULL;
-		regs->psr &= ~PSR_EF;
-	}
-#endif
-	set_used_math();
-	clear_tsk_thread_flag(current, TIF_USEDFPU);
-
-	if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
-		return -EFAULT;
-
-	err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
-			       (sizeof(unsigned long) * 32));
-	err |= __get_user(current->thread.fsr, &fpu->si_fsr);
-	err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-	if (current->thread.fpqdepth != 0)
-		err |= __copy_from_user(&current->thread.fpqueue[0],
-					&fpu->si_fpqueue[0],
-					((sizeof(unsigned long) +
-					(sizeof(unsigned long *)))*16));
-	return err;
-}
-
 asmlinkage void do_sigreturn(struct pt_regs *regs)
 {
 	struct signal_frame __user *sf;
 	unsigned long up_psr, pc, npc;
 	sigset_t set;
 	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
 	int err;
 
 	/* Always make any pending restarted system calls return -EINTR */
@@ -150,9 +122,11 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-
 	if (fpu_save)
 		err |= restore_fpu_state(regs, fpu_save);
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (rwin_save)
+		err |= restore_rwin_state(rwin_save);
 
 	/* This is pretty much atomic, no amount locking would prevent
 	 * the races which exist anyways.
@@ -180,6 +154,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	struct rt_signal_frame __user *sf;
 	unsigned int psr, pc, npc;
 	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
 	mm_segment_t old_fs;
 	sigset_t set;
 	stack_t st;
@@ -207,8 +182,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-
-	if (fpu_save)
+	if (!err && fpu_save)
 		err |= restore_fpu_state(regs, fpu_save);
 	err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
 	
@@ -228,6 +202,12 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	do_sigaltstack((const stack_t __user *) &st, NULL, (unsigned long)sf);
 	set_fs(old_fs);
 
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(rwin_save))
+			goto segv;
+	}
+
 	sigdelsetmask(&set, ~_BLOCKABLE);
 	spin_lock_irq(&current->sighand->siglock);
 	current->blocked = set;
@@ -280,53 +260,23 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re
 	return (void __user *) sp;
 }
 
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	int err = 0;
-#ifdef CONFIG_SMP
-	if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
-		put_psr(get_psr() | PSR_EF);
-		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
-		regs->psr &= ~(PSR_EF);
-		clear_tsk_thread_flag(current, TIF_USEDFPU);
-	}
-#else
-	if (current == last_task_used_math) {
-		put_psr(get_psr() | PSR_EF);
-		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
-		last_task_used_math = NULL;
-		regs->psr &= ~(PSR_EF);
-	}
-#endif
-	err |= __copy_to_user(&fpu->si_float_regs[0],
-			      &current->thread.float_regs[0],
-			      (sizeof(unsigned long) * 32));
-	err |= __put_user(current->thread.fsr, &fpu->si_fsr);
-	err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-	if (current->thread.fpqdepth != 0)
-		err |= __copy_to_user(&fpu->si_fpqueue[0],
-				      &current->thread.fpqueue[0],
-				      ((sizeof(unsigned long) +
-				      (sizeof(unsigned long *)))*16));
-	clear_used_math();
-	return err;
-}
-
 static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
 		       int signo, sigset_t *oldset)
 {
 	struct signal_frame __user *sf;
-	int sigframe_size, err;
+	int sigframe_size, err, wsaved;
+	void __user *tail;
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 
-	sigframe_size = SF_ALIGNEDSZ;
-	if (!used_math())
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = current_thread_info()->w_saved;
+
+	sigframe_size = sizeof(*sf);
+	if (used_math())
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
 
 	sf = (struct signal_frame __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
@@ -334,8 +284,7 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	if (invalid_frame_pointer(sf, sigframe_size))
 		goto sigill_and_return;
 
-	if (current_thread_info()->w_saved != 0)
-		goto sigill_and_return;
+	tail = sf + 1;
 
 	/* 2. Save the current process state */
 	err = __copy_to_user(&sf->info.si_regs, regs, sizeof(struct pt_regs));
@@ -343,17 +292,34 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __put_user(0, &sf->extra_size);
 
 	if (used_math()) {
-		err |= save_fpu_state(regs, &sf->fpu_state);
-		err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user(fp, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user(rwp, &sf->rwin_save);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 
 	err |= __put_user(oldset->sig[0], &sf->info.si_mask);
 	err |= __copy_to_user(sf->extramask, &oldset->sig[1],
 			      (_NSIG_WORDS - 1) * sizeof(unsigned int));
-	err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-			      sizeof(struct reg_window32));
+	if (!wsaved) {
+		err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+				      sizeof(struct reg_window32));
+	} else {
+		struct reg_window32 *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+	}
 	if (err)
 		goto sigsegv;
 	
@@ -399,21 +365,24 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 			  int signo, sigset_t *oldset, siginfo_t *info)
 {
 	struct rt_signal_frame __user *sf;
-	int sigframe_size;
+	int sigframe_size, wsaved;
+	void __user *tail;
 	unsigned int psr;
 	int err;
 
 	synchronize_user_stack();
-	sigframe_size = RT_ALIGNEDSZ;
-	if (!used_math())
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = current_thread_info()->w_saved;
+	sigframe_size = sizeof(*sf);
+	if (used_math())
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
 	sf = (struct rt_signal_frame __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
 	if (invalid_frame_pointer(sf, sigframe_size))
 		goto sigill;
-	if (current_thread_info()->w_saved != 0)
-		goto sigill;
 
+	tail = sf + 1;
 	err  = __put_user(regs->pc, &sf->regs.pc);
 	err |= __put_user(regs->npc, &sf->regs.npc);
 	err |= __put_user(regs->y, &sf->regs.y);
@@ -425,11 +394,21 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __put_user(0, &sf->extra_size);
 
 	if (psr & PSR_EF) {
-		err |= save_fpu_state(regs, &sf->fpu_state);
-		err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user(fp, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user(rwp, &sf->rwin_save);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 	err |= __copy_to_user(&sf->mask, &oldset->sig[0], sizeof(sigset_t));
 	
 	/* Setup sigaltstack */
@@ -437,8 +416,15 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &sf->stack.ss_size);
 	
-	err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-			      sizeof(struct reg_window32));
+	if (!wsaved) {
+		err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+				      sizeof(struct reg_window32));
+	} else {
+		struct reg_window32 *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+	}
 
 	err |= copy_siginfo_to_user(&sf->info, info);
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 006fe45..47509df 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -34,6 +34,7 @@
 
 #include "entry.h"
 #include "systbls.h"
+#include "sigutil.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -236,7 +237,7 @@ struct rt_signal_frame {
 	__siginfo_fpu_t __user	*fpu_save;
 	stack_t			stack;
 	sigset_t		mask;
-	__siginfo_fpu_t		fpu_state;
+	__siginfo_rwin_t	*rwin_save;
 };
 
 static long _sigpause_common(old_sigset_t set)
@@ -266,33 +267,12 @@ asmlinkage long sys_sigsuspend(old_sigset_t set)
 	return _sigpause_common(set);
 }
 
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	unsigned long *fpregs = current_thread_info()->fpregs;
-	unsigned long fprs;
-	int err;
-
-	err = __get_user(fprs, &fpu->si_fprs);
-	fprs_write(0);
-	regs->tstate &= ~TSTATE_PEF;
-	if (fprs & FPRS_DL)
-		err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
-		       	       (sizeof(unsigned int) * 32));
-	if (fprs & FPRS_DU)
-		err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
-		       	       (sizeof(unsigned int) * 32));
-	err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-	err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-	current_thread_info()->fpsaved[0] |= fprs;
-	return err;
-}
-
 void do_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_signal_frame __user *sf;
 	unsigned long tpc, tnpc, tstate;
 	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
 	sigset_t set;
 	int err;
 
@@ -325,8 +305,8 @@ void do_rt_sigreturn(struct pt_regs *regs)
 	regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
 
 	err |= __get_user(fpu_save, &sf->fpu_save);
-	if (fpu_save)
-		err |= restore_fpu_state(regs, &sf->fpu_state);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, fpu_save);
 
 	err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
 	err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf);
@@ -334,6 +314,12 @@ void do_rt_sigreturn(struct pt_regs *regs)
 	if (err)
 		goto segv;
 
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(rwin_save))
+			goto segv;
+	}
+
 	regs->tpc = tpc;
 	regs->tnpc = tnpc;
 
@@ -351,34 +337,13 @@ segv:
 }
 
 /* Checks if the fp is valid */
-static int invalid_frame_pointer(void __user *fp, int fplen)
+static int invalid_frame_pointer(void __user *fp)
 {
 	if (((unsigned long) fp) & 15)
 		return 1;
 	return 0;
 }
 
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-	unsigned long *fpregs = current_thread_info()->fpregs;
-	unsigned long fprs;
-	int err = 0;
-	
-	fprs = current_thread_info()->fpsaved[0];
-	if (fprs & FPRS_DL)
-		err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-				    (sizeof(unsigned int) * 32));
-	if (fprs & FPRS_DU)
-		err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-				    (sizeof(unsigned int) * 32));
-	err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-	err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-	err |= __put_user(fprs, &fpu->si_fprs);
-
-	return err;
-}
-
 static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize)
 {
 	unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
@@ -414,34 +379,48 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	       int signo, sigset_t *oldset, siginfo_t *info)
 {
 	struct rt_signal_frame __user *sf;
-	int sigframe_size, err;
+	int wsaved, err, sf_size;
+	void __user *tail;
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 	save_and_clear_fpu();
 	
-	sigframe_size = sizeof(struct rt_signal_frame);
-	if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-		sigframe_size -= sizeof(__siginfo_fpu_t);
+	wsaved = get_thread_wsaved();
 
+	sf_size = sizeof(struct rt_signal_frame);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sf_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sf_size += sizeof(__siginfo_rwin_t);
 	sf = (struct rt_signal_frame __user *)
-		get_sigframe(ka, regs, sigframe_size);
-	
-	if (invalid_frame_pointer (sf, sigframe_size))
-		goto sigill;
+		get_sigframe(ka, regs, sf_size);
 
-	if (get_thread_wsaved() != 0)
+	if (invalid_frame_pointer (sf))
 		goto sigill;
 
+	tail = (sf + 1);
+
 	/* 2. Save the current process state */
 	err = copy_to_user(&sf->regs, regs, sizeof (*regs));
 
 	if (current_thread_info()->fpsaved[0] & FPRS_FEF) {
-		err |= save_fpu_state(regs, &sf->fpu_state);
-		err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+		__siginfo_fpu_t __user *fpu_save = tail;
+		tail += sizeof(__siginfo_fpu_t);
+		err |= save_fpu_state(regs, fpu_save);
+		err |= __put_user((u64)fpu_save, &sf->fpu_save);
 	} else {
 		err |= __put_user(0, &sf->fpu_save);
 	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwin_save = tail;
+		tail += sizeof(__siginfo_rwin_t);
+		err |= save_rwin_state(wsaved, rwin_save);
+		err |= __put_user((u64)rwin_save, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
 	
 	/* Setup sigaltstack */
 	err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp);
@@ -450,10 +429,17 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 
 	err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t));
 
-	err |= copy_in_user((u64 __user *)sf,
-			    (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS),
-			    sizeof(struct reg_window));
+	if (!wsaved) {
+		err |= copy_in_user((u64 __user *)sf,
+				    (u64 __user *)(regs->u_regs[UREG_FP] +
+						   STACK_BIAS),
+				    sizeof(struct reg_window));
+	} else {
+		struct reg_window *rp;
 
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= copy_to_user(sf, rp, sizeof(struct reg_window));
+	}
 	if (info)
 		err |= copy_siginfo_to_user(&sf->info, info);
 	else {
diff --git a/arch/sparc/kernel/sigutil.h b/arch/sparc/kernel/sigutil.h
new file mode 100644
index 0000000..d223aa4
--- /dev/null
+++ b/arch/sparc/kernel/sigutil.h
@@ -0,0 +1,9 @@
+#ifndef _SIGUTIL_H
+#define _SIGUTIL_H
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin);
+int restore_rwin_state(__siginfo_rwin_t __user *rp);
+
+#endif /* _SIGUTIL_H */
diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c
new file mode 100644
index 0000000..35c7897
--- /dev/null
+++ b/arch/sparc/kernel/sigutil_32.c
@@ -0,0 +1,120 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	int err = 0;
+#ifdef CONFIG_SMP
+	if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+		regs->psr &= ~(PSR_EF);
+		clear_tsk_thread_flag(current, TIF_USEDFPU);
+	}
+#else
+	if (current == last_task_used_math) {
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+		last_task_used_math = NULL;
+		regs->psr &= ~(PSR_EF);
+	}
+#endif
+	err |= __copy_to_user(&fpu->si_float_regs[0],
+			      &current->thread.float_regs[0],
+			      (sizeof(unsigned long) * 32));
+	err |= __put_user(current->thread.fsr, &fpu->si_fsr);
+	err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+	if (current->thread.fpqdepth != 0)
+		err |= __copy_to_user(&fpu->si_fpqueue[0],
+				      &current->thread.fpqueue[0],
+				      ((sizeof(unsigned long) +
+				      (sizeof(unsigned long *)))*16));
+	clear_used_math();
+	return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	int err;
+#ifdef CONFIG_SMP
+	if (test_tsk_thread_flag(current, TIF_USEDFPU))
+		regs->psr &= ~PSR_EF;
+#else
+	if (current == last_task_used_math) {
+		last_task_used_math = NULL;
+		regs->psr &= ~PSR_EF;
+	}
+#endif
+	set_used_math();
+	clear_tsk_thread_flag(current, TIF_USEDFPU);
+
+	if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
+		return -EFAULT;
+
+	err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
+			       (sizeof(unsigned long) * 32));
+	err |= __get_user(current->thread.fsr, &fpu->si_fsr);
+	err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+	if (current->thread.fpqdepth != 0)
+		err |= __copy_from_user(&current->thread.fpqueue[0],
+					&fpu->si_fpqueue[0],
+					((sizeof(unsigned long) +
+					(sizeof(unsigned long *)))*16));
+	return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+	int i, err = __put_user(wsaved, &rwin->wsaved);
+
+	for (i = 0; i < wsaved; i++) {
+		struct reg_window32 *rp;
+		unsigned long fp;
+
+		rp = &current_thread_info()->reg_window[i];
+		fp = current_thread_info()->rwbuf_stkptrs[i];
+		err |= copy_to_user(&rwin->reg_window[i], rp,
+				    sizeof(struct reg_window32));
+		err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+	}
+	return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+	struct thread_info *t = current_thread_info();
+	int i, wsaved, err;
+
+	__get_user(wsaved, &rp->wsaved);
+	if (wsaved > NSWINS)
+		return -EFAULT;
+
+	err = 0;
+	for (i = 0; i < wsaved; i++) {
+		err |= copy_from_user(&t->reg_window[i],
+				      &rp->reg_window[i],
+				      sizeof(struct reg_window32));
+		err |= __get_user(t->rwbuf_stkptrs[i],
+				  &rp->rwbuf_stkptrs[i]);
+	}
+	if (err)
+		return err;
+
+	t->w_saved = wsaved;
+	synchronize_user_stack();
+	if (t->w_saved)
+		return -EFAULT;
+	return 0;
+
+}
diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c
new file mode 100644
index 0000000..e7dc508
--- /dev/null
+++ b/arch/sparc/kernel/sigutil_64.c
@@ -0,0 +1,93 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	unsigned long *fpregs = current_thread_info()->fpregs;
+	unsigned long fprs;
+	int err = 0;
+	
+	fprs = current_thread_info()->fpsaved[0];
+	if (fprs & FPRS_DL)
+		err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
+				    (sizeof(unsigned int) * 32));
+	if (fprs & FPRS_DU)
+		err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
+				    (sizeof(unsigned int) * 32));
+	err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+	err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+	err |= __put_user(fprs, &fpu->si_fprs);
+
+	return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	unsigned long *fpregs = current_thread_info()->fpregs;
+	unsigned long fprs;
+	int err;
+
+	err = __get_user(fprs, &fpu->si_fprs);
+	fprs_write(0);
+	regs->tstate &= ~TSTATE_PEF;
+	if (fprs & FPRS_DL)
+		err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
+		       	       (sizeof(unsigned int) * 32));
+	if (fprs & FPRS_DU)
+		err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
+		       	       (sizeof(unsigned int) * 32));
+	err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+	err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+	current_thread_info()->fpsaved[0] |= fprs;
+	return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+	int i, err = __put_user(wsaved, &rwin->wsaved);
+
+	for (i = 0; i < wsaved; i++) {
+		struct reg_window *rp = &current_thread_info()->reg_window[i];
+		unsigned long fp = current_thread_info()->rwbuf_stkptrs[i];
+
+		err |= copy_to_user(&rwin->reg_window[i], rp,
+				    sizeof(struct reg_window));
+		err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+	}
+	return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+	struct thread_info *t = current_thread_info();
+	int i, wsaved, err;
+
+	__get_user(wsaved, &rp->wsaved);
+	if (wsaved > NSWINS)
+		return -EFAULT;
+
+	err = 0;
+	for (i = 0; i < wsaved; i++) {
+		err |= copy_from_user(&t->reg_window[i],
+				      &rp->reg_window[i],
+				      sizeof(struct reg_window));
+		err |= __get_user(t->rwbuf_stkptrs[i],
+				  &rp->rwbuf_stkptrs[i]);
+	}
+	if (err)
+		return err;
+
+	set_thread_wsaved(wsaved);
+	synchronize_user_stack();
+	if (get_thread_wsaved())
+		return -EFAULT;
+	return 0;
+}
-- 
cgit v1.1


From 47c08f3107270e5a439bc0106a308f7c48c9621d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 20 Aug 2011 11:49:43 -0700
Subject: pci: fix new kernel-doc warning in pci.c

Fix new kernel-doc warning in pci.c:

  Warning(drivers/pci/pci.c:3259): No description found for parameter 'mps'
  Warning(drivers/pci/pci.c:3259): Excess function parameter 'rq' description in 'pcie_set_mps'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 466fad6..0ce6742 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3250,7 +3250,7 @@ int pcie_get_mps(struct pci_dev *dev)
 /**
  * pcie_set_mps - set PCI Express maximum payload size
  * @dev: PCI device to query
- * @rq: maximum payload size in bytes
+ * @mps: maximum payload size in bytes
  *    valid values are 128, 256, 512, 1024, 2048, 4096
  *
  * If possible sets maximum payload size
-- 
cgit v1.1


From 450a37d2eca6ddf6ea8186f57a7531318df6e796 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Sun, 21 Aug 2011 00:28:56 -0600
Subject: OMAP4: clock: fix compile warning

Fix the following compile warning:

arch/arm/mach-omap2/clock44xx_data.c: In function 'omap4xxx_clk_init':
arch/arm/mach-omap2/clock44xx_data.c:3371:6: warning: 'cpu_clkflg' may be used uninitialized in this function

The approach taken here is intended to work if omap4xxx_clk_init() is
converted into an initcall.

Thanks to Bjarne Steinsbo <bsteinsbo@gmail.com> for proposing another
approach.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Cc: Bjarne Steinsbo <bsteinsbo@gmail.com>
---
 arch/arm/mach-omap2/clock44xx_data.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c
index 2af0e3f0..4304768 100644
--- a/arch/arm/mach-omap2/clock44xx_data.c
+++ b/arch/arm/mach-omap2/clock44xx_data.c
@@ -3376,6 +3376,8 @@ int __init omap4xxx_clk_init(void)
 	} else if (cpu_is_omap446x()) {
 		cpu_mask = RATE_IN_4460;
 		cpu_clkflg = CK_446X;
+	} else {
+		return 0;
 	}
 
 	clk_init(&omap2_clk_functions);
-- 
cgit v1.1


From 6719db6a23d4b7f1e5052eedae394135e3aef9c1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Sat, 20 Aug 2011 08:29:51 -0400
Subject: Btrfs: fix 64 bit divide problem

This fixes a regression introduced by commit cdcb725c05fe ("Btrfs: check
if there is enough space for balancing smarter").  We can't do 64-bit
divides on 32-bit architectures.

In cases where we need to divide/multiply by 2 we should just left/right
shift respectively, and in cases where theres N number of devices use
do_div.  Also make the counters u64 to match up with rw_devices.
Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Acked-and-tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/extent-tree.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 80d6148..f5be06a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6735,9 +6735,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
 	struct btrfs_device *device;
 	u64 min_free;
+	u64 dev_min = 1;
+	u64 dev_nr = 0;
 	int index;
-	int dev_nr = 0;
-	int dev_min = 1;
 	int full = 0;
 	int ret = 0;
 
@@ -6796,14 +6796,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 	index = get_block_group_index(block_group);
 	if (index == 0) {
 		dev_min = 4;
-		min_free /= 2;
+		/* Divide by 2 */
+		min_free >>= 1;
 	} else if (index == 1) {
 		dev_min = 2;
 	} else if (index == 2) {
-		min_free *= 2;
+		/* Multiply by 2 */
+		min_free <<= 1;
 	} else if (index == 3) {
 		dev_min = fs_devices->rw_devices;
-		min_free /= dev_min;
+		do_div(min_free, dev_min);
 	}
 
 	mutex_lock(&root->fs_info->chunk_mutex);
-- 
cgit v1.1


From 2782a35132339574b06ce30556eb9f97eb1d26cd Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Sun, 21 Aug 2011 12:48:04 -0700
Subject: Input: tnetv107x-ts - add missing include of linux/module.h

tnetv107x-ts.c uses interfaces from linux/module.h,
so it should include that file.  This patch fixes build errors.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/tnetv107x-ts.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c
index 089b0a0..0e8f63e 100644
--- a/drivers/input/touchscreen/tnetv107x-ts.c
+++ b/drivers/input/touchscreen/tnetv107x-ts.c
@@ -13,6 +13,7 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/errno.h>
-- 
cgit v1.1


From b9cc510b395543cb7dba89c76421d23ed9e85f95 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Sun, 21 Aug 2011 12:48:08 -0700
Subject: Input: ep93xx_keypad - add missing include of linux/module.h

ep93xx_keypad.c uses interfaces from linux/module.h,
so it should include that file.  This patch fixes build errors.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/ep93xx_keypad.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c
index c8242dd..aa17e02 100644
--- a/drivers/input/keyboard/ep93xx_keypad.c
+++ b/drivers/input/keyboard/ep93xx_keypad.c
@@ -20,6 +20,7 @@
  * flag.
  */
 
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
-- 
cgit v1.1


From ffb57c4b8612c31204b06713770f6df4b8a94e4f Mon Sep 17 00:00:00 2001
From: Jay Estabrook <jay.estabrook@gmail.com>
Date: Wed, 6 Jul 2011 23:57:13 +0000
Subject: drm/radeon/alpha: Add Alpha support to Radeon DRM code

Alpha needs to have available the system bus address for the Radeon's
local memory, so that it can be used in ttm_bo_vm_fault(), when building
the PTEs for accessing that VRAM.  So, we make bus.addr hold the ioremap()
return, and then we can modify bus.base appropriately for use during page
fault processing.

Signed-off-by: Jay Estabrook <jay.estabrook@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 60125dd..9b86fb0 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -450,6 +450,29 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 			return -EINVAL;
 		mem->bus.base = rdev->mc.aper_base;
 		mem->bus.is_iomem = true;
+#ifdef __alpha__
+		/*
+		 * Alpha: use bus.addr to hold the ioremap() return,
+		 * so we can modify bus.base below.
+		 */
+		if (mem->placement & TTM_PL_FLAG_WC)
+			mem->bus.addr =
+				ioremap_wc(mem->bus.base + mem->bus.offset,
+					   mem->bus.size);
+		else
+			mem->bus.addr =
+				ioremap_nocache(mem->bus.base + mem->bus.offset,
+						mem->bus.size);
+
+		/*
+		 * Alpha: Use just the bus offset plus
+		 * the hose/domain memory base for bus.base.
+		 * It then can be used to build PTEs for VRAM
+		 * access, as done in ttm_bo_vm_fault().
+		 */
+		mem->bus.base = (mem->bus.base & 0x0ffffffffUL) +
+			rdev->ddev->hose->dense_mem_base;
+#endif
 		break;
 	default:
 		return -EINVAL;
-- 
cgit v1.1


From 24cae9e7c9537fd6a16bc2f5ec398ee4bef5d007 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 19 Aug 2011 15:24:16 +0000
Subject: drm/radeon: Take IH ring into account for test size calculation.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_test.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index dee4a0c..1ebd0fe 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -40,10 +40,14 @@ void radeon_test_moves(struct radeon_device *rdev)
 	size = 1024 * 1024;
 
 	/* Number of tests =
-	 * (Total GTT - IB pool - writeback page - ring buffer) / test size
+	 * (Total GTT - IB pool - writeback page - ring buffers) / test size
 	 */
-	n = ((u32)(rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - RADEON_GPU_PAGE_SIZE -
-	     rdev->cp.ring_size)) / size;
+	n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - rdev->cp.ring_size;
+	if (rdev->wb.wb_obj)
+		n -= RADEON_GPU_PAGE_SIZE;
+	if (rdev->ih.ring_obj)
+		n -= rdev->ih.ring_size;
+	n /= size;
 
 	gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
 	if (!gtt_obj) {
-- 
cgit v1.1


From 4fb1a35c0185f8fa3e71b12de62b8752a9a9ed0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 19 Aug 2011 15:24:17 +0000
Subject: drm/radeon: Explicitly print GTT/VRAM offsets on test failure.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise these would need to be painstakingly calculated looking at the source
code.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_test.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 1ebd0fe..602fa35 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -136,9 +136,15 @@ void radeon_test_moves(struct radeon_device *rdev)
 		     gtt_start++, vram_start++) {
 			if (*vram_start != gtt_start) {
 				DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
-					  "expected 0x%p (GTT map 0x%p-0x%p)\n",
-					  i, *vram_start, gtt_start, gtt_map,
-					  gtt_end);
+					  "expected 0x%p (GTT/VRAM offset "
+					  "0x%16llx/0x%16llx)\n",
+					  i, *vram_start, gtt_start,
+					  (unsigned long long)
+					  (gtt_addr - rdev->mc.gtt_start +
+					   (void*)gtt_start - gtt_map),
+					  (unsigned long long)
+					  (vram_addr - rdev->mc.vram_start +
+					   (void*)gtt_start - gtt_map));
 				radeon_bo_kunmap(vram_obj);
 				goto out_cleanup;
 			}
@@ -179,9 +185,15 @@ void radeon_test_moves(struct radeon_device *rdev)
 		     gtt_start++, vram_start++) {
 			if (*gtt_start != vram_start) {
 				DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
-					  "expected 0x%p (VRAM map 0x%p-0x%p)\n",
-					  i, *gtt_start, vram_start, vram_map,
-					  vram_end);
+					  "expected 0x%p (VRAM/GTT offset "
+					  "0x%16llx/0x%16llx)\n",
+					  i, *gtt_start, vram_start,
+					  (unsigned long long)
+					  (vram_addr - rdev->mc.vram_start +
+					   (void*)vram_start - vram_map),
+					  (unsigned long long)
+					  (gtt_addr - rdev->mc.gtt_start +
+					   (void*)vram_start - vram_map));
 				radeon_bo_kunmap(gtt_obj[i]);
 				goto out_cleanup;
 			}
-- 
cgit v1.1


From ba95c45a78d57ac05bf45d81b92a6ec4d299695d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 19 Aug 2011 15:24:18 +0000
Subject: drm/radeon: Make vramlimit parameter actually work.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index a3b011b..b51e157 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -301,6 +301,8 @@ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64
 		mc->mc_vram_size = mc->aper_size;
 	}
 	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+	if (radeon_vram_limit && radeon_vram_limit < mc->real_vram_size)
+		mc->real_vram_size = radeon_vram_limit;
 	dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
 			mc->mc_vram_size >> 20, mc->vram_start,
 			mc->vram_end, mc->real_vram_size >> 20);
-- 
cgit v1.1


From 4f41adfd8ce9ff84fa9e968e0fe2854e9bc6fcb0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 20 Aug 2011 10:23:38 +0100
Subject: ASoC: WM8996 record paths need AIFCLK

Make AIFCLK supply the record paths otherwise record will not work unless
there is a simultaneous playback.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 sound/soc/codecs/wm8996.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c
index ab8e9d1..85f43b6 100644
--- a/sound/soc/codecs/wm8996.c
+++ b/sound/soc/codecs/wm8996.c
@@ -1213,6 +1213,16 @@ static const struct snd_soc_dapm_route wm8996_dapm_routes[] = {
 	{ "AIF2RX0", NULL, "AIFCLK" },
 	{ "AIF2RX1", NULL, "AIFCLK" },
 
+	{ "AIF1TX0", NULL, "AIFCLK" },
+	{ "AIF1TX1", NULL, "AIFCLK" },
+	{ "AIF1TX2", NULL, "AIFCLK" },
+	{ "AIF1TX3", NULL, "AIFCLK" },
+	{ "AIF1TX4", NULL, "AIFCLK" },
+	{ "AIF1TX5", NULL, "AIFCLK" },
+
+	{ "AIF2TX0", NULL, "AIFCLK" },
+	{ "AIF2TX1", NULL, "AIFCLK" },
+
 	{ "DSP1RXL", NULL, "SYSDSPCLK" },
 	{ "DSP1RXR", NULL, "SYSDSPCLK" },
 	{ "DSP2RXL", NULL, "SYSDSPCLK" },
-- 
cgit v1.1


From 7691cd74c5d6b173e1483277fb70d7798e97d2fa Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 20 Aug 2011 16:59:27 +0100
Subject: ASoC: Fix configuration of WM8996 input enables

There's no need for separate widgets for the enables (as the map already
shows).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 sound/soc/codecs/wm8996.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c
index 85f43b6..e76d4ed 100644
--- a/sound/soc/codecs/wm8996.c
+++ b/sound/soc/codecs/wm8996.c
@@ -988,15 +988,10 @@ SND_SOC_DAPM_MICBIAS("MICB1", WM8996_POWER_MANAGEMENT_1, 8, 0),
 SND_SOC_DAPM_PGA("IN1L PGA", WM8996_POWER_MANAGEMENT_2, 5, 0, NULL, 0),
 SND_SOC_DAPM_PGA("IN1R PGA", WM8996_POWER_MANAGEMENT_2, 4, 0, NULL, 0),
 
-SND_SOC_DAPM_MUX("IN1L Mux", SND_SOC_NOPM, 0, 0, &in1_mux),
-SND_SOC_DAPM_MUX("IN1R Mux", SND_SOC_NOPM, 0, 0, &in1_mux),
-SND_SOC_DAPM_MUX("IN2L Mux", SND_SOC_NOPM, 0, 0, &in2_mux),
-SND_SOC_DAPM_MUX("IN2R Mux", SND_SOC_NOPM, 0, 0, &in2_mux),
-
-SND_SOC_DAPM_PGA("IN1L", WM8996_POWER_MANAGEMENT_7, 2, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN1R", WM8996_POWER_MANAGEMENT_7, 3, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN2L", WM8996_POWER_MANAGEMENT_7, 6, 0, NULL, 0),
-SND_SOC_DAPM_PGA("IN2R", WM8996_POWER_MANAGEMENT_7, 7, 0, NULL, 0),
+SND_SOC_DAPM_MUX("IN1L Mux", WM8996_POWER_MANAGEMENT_7, 2, 0, &in1_mux),
+SND_SOC_DAPM_MUX("IN1R Mux", WM8996_POWER_MANAGEMENT_7, 3, 0, &in1_mux),
+SND_SOC_DAPM_MUX("IN2L Mux", WM8996_POWER_MANAGEMENT_7, 6, 0, &in2_mux),
+SND_SOC_DAPM_MUX("IN2R Mux", WM8996_POWER_MANAGEMENT_7, 7, 0, &in2_mux),
 
 SND_SOC_DAPM_SUPPLY("DMIC2", WM8996_POWER_MANAGEMENT_7, 9, 0, NULL, 0),
 SND_SOC_DAPM_SUPPLY("DMIC1", WM8996_POWER_MANAGEMENT_7, 8, 0, NULL, 0),
-- 
cgit v1.1


From f79e7ff85223e054c2967820d3be1c125a903bd4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 21 Aug 2011 12:20:00 +0100
Subject: ASoC: Ensure we only run Speyside WM8962 bias level callbacks once

We get called once per DAPM context but only need to run once. When DAPM
was serialized this was a series of noops but now it can run in parallel
we need to take proper care.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 sound/soc/samsung/speyside_wm8962.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/soc/samsung/speyside_wm8962.c b/sound/soc/samsung/speyside_wm8962.c
index 0b9eb5f..72535f2 100644
--- a/sound/soc/samsung/speyside_wm8962.c
+++ b/sound/soc/samsung/speyside_wm8962.c
@@ -23,6 +23,9 @@ static int speyside_wm8962_set_bias_level(struct snd_soc_card *card,
 	struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai;
 	int ret;
 
+	if (dapm->dev != codec_dai->dev)
+		return 0;
+
 	switch (level) {
 	case SND_SOC_BIAS_PREPARE:
 		if (dapm->bias_level == SND_SOC_BIAS_STANDBY) {
@@ -57,6 +60,9 @@ static int speyside_wm8962_set_bias_level_post(struct snd_soc_card *card,
 	struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai;
 	int ret;
 
+	if (dapm->dev != codec_dai->dev)
+		return 0;
+
 	switch (level) {
 	case SND_SOC_BIAS_STANDBY:
 		ret = snd_soc_dai_set_sysclk(codec_dai, WM8962_SYSCLK_MCLK,
-- 
cgit v1.1


From 4df0cb2fa977f99963b616487a22ebd021ea5463 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 21 Aug 2011 17:18:52 +0100
Subject: ASoC: Clear any outstanding WM8962 FLL lock completions before
 waiting

Ensure that we don't spuriously trigger early.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 sound/soc/codecs/wm8962.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 28650ed..1725550 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -2221,6 +2221,8 @@ static int sysclk_event(struct snd_soc_dapm_widget *w,
 	switch (event) {
 	case SND_SOC_DAPM_PRE_PMU:
 		if (fll) {
+			try_wait_for_completion(&wm8962->fll_lock);
+
 			snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1,
 					    WM8962_FLL_ENA, WM8962_FLL_ENA);
 			if (wm8962->irq) {
@@ -3284,6 +3286,8 @@ static int wm8962_set_fll(struct snd_soc_codec *codec, int fll_id, int source,
 	snd_soc_write(codec, WM8962_FLL_CONTROL_7, fll_div.lambda);
 	snd_soc_write(codec, WM8962_FLL_CONTROL_8, fll_div.n);
 
+	try_wait_for_completion(&wm8962->fll_lock);
+
 	snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1,
 			    WM8962_FLL_FRAC | WM8962_FLL_REFCLK_SRC_MASK |
 			    WM8962_FLL_ENA, fll1);
-- 
cgit v1.1


From a41619455c0e28b6973471e87f1702c6129d3439 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 16 Aug 2011 16:57:58 +0900
Subject: ASoC: Clear completions from late WM8996 FLL lock IRQs

In case we have a pending completion, for example due to a problem with
the input clock which got corrected after we timed out.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 sound/soc/codecs/wm8996.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c
index e76d4ed..0936ae5 100644
--- a/sound/soc/codecs/wm8996.c
+++ b/sound/soc/codecs/wm8996.c
@@ -2111,6 +2111,9 @@ static int wm8996_set_fll(struct snd_soc_codec *codec, int fll_id, int source,
 
 	snd_soc_write(codec, WM8996_FLL_EFS_1, fll_div.lambda);
 
+	/* Clear any pending completions (eg, from failed startups) */
+	try_wait_for_completion(&wm8996->fll_lock);
+
 	snd_soc_update_bits(codec, WM8996_FLL_CONTROL_1,
 			    WM8996_FLL_ENA, WM8996_FLL_ENA);
 
-- 
cgit v1.1


From 3c05c4bed4ccce3f22f6d7899b308faae24ad198 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 17 Aug 2011 15:15:00 +0200
Subject: xen: Do not enable PV IPIs when vector callback not present

Fix regression for HVM case on older (<4.1.1) hypervisors caused by

  commit 99bbb3a84a99cd04ab16b998b20f01a72cfa9f4f
  Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
  Date:   Thu Dec 2 17:55:10 2010 +0000

    xen: PV on HVM: support PV spinlocks and IPIs

This change replaced the SMP operations with event based handlers without
taking into account that this only works when the hypervisor supports
callback vectors. This causes unexplainable hangs early on boot for
HVM guests with more than one CPU.

BugLink: http://bugs.launchpad.net/bugs/791850

CC: stable@kernel.org
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Tested-and-Reported-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b4533a8..e79dbb9 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -521,8 +521,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 	native_smp_prepare_cpus(max_cpus);
 	WARN_ON(xen_smp_intr_init(0));
 
-	if (!xen_have_vector_callback)
-		return;
 	xen_init_lock_cpu(0);
 	xen_init_spinlocks();
 }
@@ -546,6 +544,8 @@ static void xen_hvm_cpu_die(unsigned int cpu)
 
 void __init xen_hvm_smp_init(void)
 {
+	if (!xen_have_vector_callback)
+		return;
 	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
 	smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
 	smp_ops.cpu_up = xen_hvm_cpu_up;
-- 
cgit v1.1


From 60c5f08e154fd235056645e050f2cd5671b19125 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 11 Aug 2011 13:17:20 -0700
Subject: xen/tracing: Fix tracing config option properly

Steven Rostedt says we should use CONFIG_EVENT_TRACING.

Cc:Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3326204..add2c2d 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,7 @@ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
 			grant-table.o suspend.o platform-pci-unplug.o \
 			p2m.o
 
-obj-$(CONFIG_FTRACE) += trace.o
+obj-$(CONFIG_EVENT_TRACING) += trace.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
-- 
cgit v1.1


From 5b9063b19caaffe7135e1f9b8b22174ded0f586b Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Sun, 21 Aug 2011 21:04:12 -0700
Subject: Input: ad714xx-spi - force SPI bus into the default 8-bit mode

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/ad714x-spi.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c
index 4120dd5..da83ac9 100644
--- a/drivers/input/misc/ad714x-spi.c
+++ b/drivers/input/misc/ad714x-spi.c
@@ -54,6 +54,12 @@ static int ad714x_spi_write(struct device *dev, unsigned short reg,
 static int __devinit ad714x_spi_probe(struct spi_device *spi)
 {
 	struct ad714x_chip *chip;
+	int err;
+
+	spi->bits_per_word = 8;
+	err = spi_setup(spi);
+	if (err < 0)
+		return err;
 
 	chip = ad714x_probe(&spi->dev, BUS_SPI, spi->irq,
 			    ad714x_spi_read, ad714x_spi_write);
-- 
cgit v1.1


From 6337de2204be3b7b40825a1d30de30e514e8947b Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Sun, 21 Aug 2011 21:04:12 -0700
Subject: Input: ad714x - fix endianness issues

Allow driver to be used on Big Endian boxes.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/ad714x-i2c.c | 34 ++++++++++------------------------
 drivers/input/misc/ad714x-spi.c | 24 +++++++++++++++---------
 2 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c
index e21deb1..00a6a22 100644
--- a/drivers/input/misc/ad714x-i2c.c
+++ b/drivers/input/misc/ad714x-i2c.c
@@ -32,17 +32,12 @@ static int ad714x_i2c_write(struct device *dev, unsigned short reg,
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	int ret = 0;
-	u8 *_reg = (u8 *)&reg;
-	u8 *_data = (u8 *)&data;
-
-	u8 tx[4] = {
-		_reg[1],
-		_reg[0],
-		_data[1],
-		_data[0]
+	unsigned short tx[2] = {
+		cpu_to_be16(reg),
+		cpu_to_be16(data)
 	};
 
-	ret = i2c_master_send(client, tx, 4);
+	ret = i2c_master_send(client, (u8 *)tx, 4);
 	if (ret < 0)
 		dev_err(&client->dev, "I2C write error\n");
 
@@ -54,25 +49,16 @@ static int ad714x_i2c_read(struct device *dev, unsigned short reg,
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	int ret = 0;
-	u8 *_reg = (u8 *)&reg;
-	u8 *_data = (u8 *)data;
+	unsigned short tx = cpu_to_be16(reg);
 
-	u8 tx[2] = {
-		_reg[1],
-		_reg[0]
-	};
-	u8 rx[2];
-
-	ret = i2c_master_send(client, tx, 2);
+	ret = i2c_master_send(client, (u8 *)&tx, 2);
 	if (ret >= 0)
-		ret = i2c_master_recv(client, rx, 2);
+		ret = i2c_master_recv(client, (u8 *)data, 2);
 
-	if (unlikely(ret < 0)) {
+	if (unlikely(ret < 0))
 		dev_err(&client->dev, "I2C read error\n");
-	} else {
-		_data[0] = rx[1];
-		_data[1] = rx[0];
-	}
+	else
+		*data = be16_to_cpu(*data);
 
 	return ret;
 }
diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c
index da83ac9..0c7f948 100644
--- a/drivers/input/misc/ad714x-spi.c
+++ b/drivers/input/misc/ad714x-spi.c
@@ -6,7 +6,7 @@
  * Licensed under the GPL-2 or later.
  */
 
-#include <linux/input.h>	/* BUS_I2C */
+#include <linux/input.h>	/* BUS_SPI */
 #include <linux/module.h>
 #include <linux/spi/spi.h>
 #include <linux/pm.h>
@@ -30,22 +30,28 @@ static int ad714x_spi_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume);
 
-static int ad714x_spi_read(struct device *dev, unsigned short reg,
-		unsigned short *data)
+static int ad714x_spi_read(struct device *dev,
+			   unsigned short reg, unsigned short *data)
 {
 	struct spi_device *spi = to_spi_device(dev);
-	unsigned short tx = AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg;
+	unsigned short tx = cpu_to_be16(AD714x_SPI_CMD_PREFIX |
+					AD714x_SPI_READ | reg);
+	int ret;
 
-	return spi_write_then_read(spi, (u8 *)&tx, 2, (u8 *)data, 2);
+	ret = spi_write_then_read(spi, &tx, 2, data, 2);
+
+	*data = be16_to_cpup(data);
+
+	return ret;
 }
 
-static int ad714x_spi_write(struct device *dev, unsigned short reg,
-		unsigned short data)
+static int ad714x_spi_write(struct device *dev,
+			    unsigned short reg, unsigned short data)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	unsigned short tx[2] = {
-		AD714x_SPI_CMD_PREFIX | reg,
-		data
+		cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg),
+		cpu_to_be16(data)
 	};
 
 	return spi_write(spi, (u8 *)tx, 4);
-- 
cgit v1.1


From c0409feb86893f5ccf73964c7b2b47ca64bdb014 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 22 Aug 2011 09:45:39 -0700
Subject: Input: ad714x - use DMA-safe buffers for spi_write()

spi_write() requires use of DMA-safe (cacheline aligned) buffers.
Also use the same buffers when reading data since to avoid extra
locking and potential memory allocation in spi_write_then_read().

Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/ad714x-i2c.c | 60 ++++++++++++++-------------
 drivers/input/misc/ad714x-spi.c | 51 ++++++++++++++++-------
 drivers/input/misc/ad714x.c     | 90 ++++++++++++++---------------------------
 drivers/input/misc/ad714x.h     | 33 ++++++++++++++-
 4 files changed, 131 insertions(+), 103 deletions(-)

diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c
index 00a6a22..6c61218 100644
--- a/drivers/input/misc/ad714x-i2c.c
+++ b/drivers/input/misc/ad714x-i2c.c
@@ -27,40 +27,46 @@ static int ad714x_i2c_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume);
 
-static int ad714x_i2c_write(struct device *dev, unsigned short reg,
-				unsigned short data)
+static int ad714x_i2c_write(struct ad714x_chip *chip,
+			    unsigned short reg, unsigned short data)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	int ret = 0;
-	unsigned short tx[2] = {
-		cpu_to_be16(reg),
-		cpu_to_be16(data)
-	};
-
-	ret = i2c_master_send(client, (u8 *)tx, 4);
-	if (ret < 0)
-		dev_err(&client->dev, "I2C write error\n");
-
-	return ret;
+	struct i2c_client *client = to_i2c_client(chip->dev);
+	int error;
+
+	chip->xfer_buf[0] = cpu_to_be16(reg);
+	chip->xfer_buf[1] = cpu_to_be16(data);
+
+	error = i2c_master_send(client, (u8 *)chip->xfer_buf,
+				2 * sizeof(*chip->xfer_buf));
+	if (unlikely(error < 0)) {
+		dev_err(&client->dev, "I2C write error: %d\n", error);
+		return error;
+	}
+
+	return 0;
 }
 
-static int ad714x_i2c_read(struct device *dev, unsigned short reg,
-				unsigned short *data)
+static int ad714x_i2c_read(struct ad714x_chip *chip,
+			   unsigned short reg, unsigned short *data)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	int ret = 0;
-	unsigned short tx = cpu_to_be16(reg);
+	struct i2c_client *client = to_i2c_client(chip->dev);
+	int error;
+
+	chip->xfer_buf[0] = cpu_to_be16(reg);
 
-	ret = i2c_master_send(client, (u8 *)&tx, 2);
-	if (ret >= 0)
-		ret = i2c_master_recv(client, (u8 *)data, 2);
+	error = i2c_master_send(client, (u8 *)chip->xfer_buf,
+				sizeof(*chip->xfer_buf));
+	if (error >= 0)
+		error = i2c_master_recv(client, (u8 *)chip->xfer_buf,
+					sizeof(*chip->xfer_buf));
 
-	if (unlikely(ret < 0))
-		dev_err(&client->dev, "I2C read error\n");
-	else
-		*data = be16_to_cpu(*data);
+	if (unlikely(error < 0)) {
+		dev_err(&client->dev, "I2C read error: %d\n", error);
+		return error;
+	}
 
-	return ret;
+	*data = be16_to_cpup(chip->xfer_buf);
+	return 0;
 }
 
 static int __devinit ad714x_i2c_probe(struct i2c_client *client,
diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c
index 0c7f948..306577d 100644
--- a/drivers/input/misc/ad714x-spi.c
+++ b/drivers/input/misc/ad714x-spi.c
@@ -30,31 +30,54 @@ static int ad714x_spi_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume);
 
-static int ad714x_spi_read(struct device *dev,
+static int ad714x_spi_read(struct ad714x_chip *chip,
 			   unsigned short reg, unsigned short *data)
 {
-	struct spi_device *spi = to_spi_device(dev);
-	unsigned short tx = cpu_to_be16(AD714x_SPI_CMD_PREFIX |
+	struct spi_device *spi = to_spi_device(chip->dev);
+	struct spi_message message;
+	struct spi_transfer xfer[2];
+	int error;
+
+	spi_message_init(&message);
+	memset(xfer, 0, sizeof(xfer));
+
+	chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX |
 					AD714x_SPI_READ | reg);
-	int ret;
+	xfer[0].tx_buf = &chip->xfer_buf[0];
+	xfer[0].len = sizeof(chip->xfer_buf[0]);
+	spi_message_add_tail(&xfer[0], &message);
 
-	ret = spi_write_then_read(spi, &tx, 2, data, 2);
+	xfer[1].rx_buf = &chip->xfer_buf[1];
+	xfer[1].len = sizeof(chip->xfer_buf[1]);
+	spi_message_add_tail(&xfer[1], &message);
 
-	*data = be16_to_cpup(data);
+	error = spi_sync(spi, &message);
+	if (unlikely(error)) {
+		dev_err(chip->dev, "SPI read error: %d\n", error);
+		return error;
+	}
 
-	return ret;
+	*data = be16_to_cpu(chip->xfer_buf[1]);
+	return 0;
 }
 
-static int ad714x_spi_write(struct device *dev,
+static int ad714x_spi_write(struct ad714x_chip *chip,
 			    unsigned short reg, unsigned short data)
 {
-	struct spi_device *spi = to_spi_device(dev);
-	unsigned short tx[2] = {
-		cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg),
-		cpu_to_be16(data)
-	};
+	struct spi_device *spi = to_spi_device(chip->dev);
+	int error;
 
-	return spi_write(spi, (u8 *)tx, 4);
+	chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg);
+	chip->xfer_buf[1] = cpu_to_be16(data);
+
+	error = spi_write(spi, (u8 *)chip->xfer_buf,
+			  2 * sizeof(*chip->xfer_buf));
+	if (unlikely(error)) {
+		dev_err(chip->dev, "SPI write error: %d\n", error);
+		return error;
+	}
+
+	return 0;
 }
 
 static int __devinit ad714x_spi_probe(struct spi_device *spi)
diff --git a/drivers/input/misc/ad714x.c b/drivers/input/misc/ad714x.c
index c3a62c4..2be0366c 100644
--- a/drivers/input/misc/ad714x.c
+++ b/drivers/input/misc/ad714x.c
@@ -59,7 +59,6 @@
 #define STAGE11_AMBIENT		0x27D
 
 #define PER_STAGE_REG_NUM      36
-#define STAGE_NUM              12
 #define STAGE_CFGREG_NUM       8
 #define SYS_CFGREG_NUM         8
 
@@ -124,28 +123,6 @@ struct ad714x_driver_data {
  * information to integrate all things which will be private data
  * of spi/i2c device
  */
-struct ad714x_chip {
-	unsigned short h_state;
-	unsigned short l_state;
-	unsigned short c_state;
-	unsigned short adc_reg[STAGE_NUM];
-	unsigned short amb_reg[STAGE_NUM];
-	unsigned short sensor_val[STAGE_NUM];
-
-	struct ad714x_platform_data *hw;
-	struct ad714x_driver_data *sw;
-
-	int irq;
-	struct device *dev;
-	ad714x_read_t read;
-	ad714x_write_t write;
-
-	struct mutex mutex;
-
-	unsigned product;
-	unsigned version;
-};
-
 static void ad714x_use_com_int(struct ad714x_chip *ad714x,
 				int start_stage, int end_stage)
 {
@@ -154,13 +131,13 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x,
 
 	mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
 
-	ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+	ad714x->read(ad714x, STG_COM_INT_EN_REG, &data);
 	data |= 1 << end_stage;
-	ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+	ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
 
-	ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+	ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data);
 	data &= ~mask;
-	ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+	ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
 }
 
 static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
@@ -171,13 +148,13 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
 
 	mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
 
-	ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+	ad714x->read(ad714x, STG_COM_INT_EN_REG, &data);
 	data &= ~(1 << end_stage);
-	ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+	ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
 
-	ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+	ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data);
 	data |= mask;
-	ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+	ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
 }
 
 static int ad714x_cal_highest_stage(struct ad714x_chip *ad714x,
@@ -274,10 +251,8 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
 	int i;
 
 	for (i = hw->start_stage; i <= hw->end_stage; i++) {
-		ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-			&ad714x->adc_reg[i]);
-		ad714x->read(ad714x->dev,
-				STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+		ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]);
+		ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
 				&ad714x->amb_reg[i]);
 
 		ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] -
@@ -445,10 +420,8 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
 	int i;
 
 	for (i = hw->start_stage; i <= hw->end_stage; i++) {
-		ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-			&ad714x->adc_reg[i]);
-		ad714x->read(ad714x->dev,
-				STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+		ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]);
+		ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
 				&ad714x->amb_reg[i]);
 		if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
 			ad714x->sensor_val[i] = ad714x->adc_reg[i] -
@@ -598,10 +571,8 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
 	int i;
 
 	for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) {
-		ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-				&ad714x->adc_reg[i]);
-		ad714x->read(ad714x->dev,
-				STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+		ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]);
+		ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
 				&ad714x->amb_reg[i]);
 		if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
 			ad714x->sensor_val[i] = ad714x->adc_reg[i] -
@@ -891,7 +862,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x)
 {
 	unsigned short data;
 
-	ad714x->read(ad714x->dev, AD714X_PARTID_REG, &data);
+	ad714x->read(ad714x, AD714X_PARTID_REG, &data);
 	switch (data & 0xFFF0) {
 	case AD7142_PARTID:
 		ad714x->product = 0x7142;
@@ -940,23 +911,22 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x)
 	for (i = 0; i < STAGE_NUM; i++) {
 		reg_base = AD714X_STAGECFG_REG + i * STAGE_CFGREG_NUM;
 		for (j = 0; j < STAGE_CFGREG_NUM; j++)
-			ad714x->write(ad714x->dev, reg_base + j,
+			ad714x->write(ad714x, reg_base + j,
 					ad714x->hw->stage_cfg_reg[i][j]);
 	}
 
 	for (i = 0; i < SYS_CFGREG_NUM; i++)
-		ad714x->write(ad714x->dev, AD714X_SYSCFG_REG + i,
+		ad714x->write(ad714x, AD714X_SYSCFG_REG + i,
 			ad714x->hw->sys_cfg_reg[i]);
 	for (i = 0; i < SYS_CFGREG_NUM; i++)
-		ad714x->read(ad714x->dev, AD714X_SYSCFG_REG + i,
-			&data);
+		ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data);
 
-	ad714x->write(ad714x->dev, AD714X_STG_CAL_EN_REG, 0xFFF);
+	ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF);
 
 	/* clear all interrupts */
-	ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
-	ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
-	ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data);
+	ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data);
+	ad714x->read(ad714x, STG_COM_INT_STA_REG, &data);
 }
 
 static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
@@ -966,9 +936,9 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
 
 	mutex_lock(&ad714x->mutex);
 
-	ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &ad714x->l_state);
-	ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &ad714x->h_state);
-	ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &ad714x->c_state);
+	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state);
+	ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &ad714x->h_state);
+	ad714x->read(ad714x, STG_COM_INT_STA_REG, &ad714x->c_state);
 
 	for (i = 0; i < ad714x->hw->button_num; i++)
 		ad714x_button_state_machine(ad714x, i);
@@ -1245,7 +1215,7 @@ int ad714x_disable(struct ad714x_chip *ad714x)
 	mutex_lock(&ad714x->mutex);
 
 	data = ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL] | 0x3;
-	ad714x->write(ad714x->dev, AD714X_PWR_CTRL, data);
+	ad714x->write(ad714x, AD714X_PWR_CTRL, data);
 
 	mutex_unlock(&ad714x->mutex);
 
@@ -1263,16 +1233,16 @@ int ad714x_enable(struct ad714x_chip *ad714x)
 
 	/* resume to non-shutdown mode */
 
-	ad714x->write(ad714x->dev, AD714X_PWR_CTRL,
+	ad714x->write(ad714x, AD714X_PWR_CTRL,
 			ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL]);
 
 	/* make sure the interrupt output line is not low level after resume,
 	 * otherwise we will get no chance to enter falling-edge irq again
 	 */
 
-	ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
-	ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
-	ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data);
+	ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data);
+	ad714x->read(ad714x, STG_COM_INT_STA_REG, &data);
 
 	mutex_unlock(&ad714x->mutex);
 
diff --git a/drivers/input/misc/ad714x.h b/drivers/input/misc/ad714x.h
index 45c54fb..d12d149 100644
--- a/drivers/input/misc/ad714x.h
+++ b/drivers/input/misc/ad714x.h
@@ -11,11 +11,40 @@
 
 #include <linux/types.h>
 
+#define STAGE_NUM              12
+
 struct device;
+struct ad714x_platform_data;
+struct ad714x_driver_data;
 struct ad714x_chip;
 
-typedef int (*ad714x_read_t)(struct device *, unsigned short, unsigned short *);
-typedef int (*ad714x_write_t)(struct device *, unsigned short, unsigned short);
+typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *);
+typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short);
+
+struct ad714x_chip {
+	unsigned short h_state;
+	unsigned short l_state;
+	unsigned short c_state;
+	unsigned short adc_reg[STAGE_NUM];
+	unsigned short amb_reg[STAGE_NUM];
+	unsigned short sensor_val[STAGE_NUM];
+
+	struct ad714x_platform_data *hw;
+	struct ad714x_driver_data *sw;
+
+	int irq;
+	struct device *dev;
+	ad714x_read_t read;
+	ad714x_write_t write;
+
+	struct mutex mutex;
+
+	unsigned product;
+	unsigned version;
+
+	__be16 xfer_buf[16] ____cacheline_aligned;
+
+};
 
 int ad714x_disable(struct ad714x_chip *ad714x);
 int ad714x_enable(struct ad714x_chip *ad714x);
-- 
cgit v1.1


From 9eff794b777ac9ca034129a1b637204000c8fb29 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 22 Aug 2011 09:45:42 -0700
Subject: Input: ad714x - read the interrupt status registers in a row

The interrupt status registers should be read in row to avoid invalid data.

Alter "read" method for both bus options to allow reading several registers
in a row and make sure we read interrupt status registers properly.

Read sequence saves 50% of bus transactions compared to single register
reads. So use it also for the result registers, which are also located
in a row.

Also update copyright notice.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/ad714x-i2c.c | 11 +++++---
 drivers/input/misc/ad714x-spi.c | 11 +++++---
 drivers/input/misc/ad714x.c     | 62 +++++++++++++++++++++--------------------
 drivers/input/misc/ad714x.h     |  6 ++--
 4 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c
index 6c61218..025417d 100644
--- a/drivers/input/misc/ad714x-i2c.c
+++ b/drivers/input/misc/ad714x-i2c.c
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver (I2C bus)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -47,9 +47,10 @@ static int ad714x_i2c_write(struct ad714x_chip *chip,
 }
 
 static int ad714x_i2c_read(struct ad714x_chip *chip,
-			   unsigned short reg, unsigned short *data)
+			   unsigned short reg, unsigned short *data, size_t len)
 {
 	struct i2c_client *client = to_i2c_client(chip->dev);
+	int i;
 	int error;
 
 	chip->xfer_buf[0] = cpu_to_be16(reg);
@@ -58,14 +59,16 @@ static int ad714x_i2c_read(struct ad714x_chip *chip,
 				sizeof(*chip->xfer_buf));
 	if (error >= 0)
 		error = i2c_master_recv(client, (u8 *)chip->xfer_buf,
-					sizeof(*chip->xfer_buf));
+					len * sizeof(*chip->xfer_buf));
 
 	if (unlikely(error < 0)) {
 		dev_err(&client->dev, "I2C read error: %d\n", error);
 		return error;
 	}
 
-	*data = be16_to_cpup(chip->xfer_buf);
+	for (i = 0; i < len; i++)
+		data[i] = be16_to_cpu(chip->xfer_buf[i]);
+
 	return 0;
 }
 
diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c
index 306577d..875b508 100644
--- a/drivers/input/misc/ad714x-spi.c
+++ b/drivers/input/misc/ad714x-spi.c
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver (SPI bus)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -31,11 +31,12 @@ static int ad714x_spi_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume);
 
 static int ad714x_spi_read(struct ad714x_chip *chip,
-			   unsigned short reg, unsigned short *data)
+			   unsigned short reg, unsigned short *data, size_t len)
 {
 	struct spi_device *spi = to_spi_device(chip->dev);
 	struct spi_message message;
 	struct spi_transfer xfer[2];
+	int i;
 	int error;
 
 	spi_message_init(&message);
@@ -48,7 +49,7 @@ static int ad714x_spi_read(struct ad714x_chip *chip,
 	spi_message_add_tail(&xfer[0], &message);
 
 	xfer[1].rx_buf = &chip->xfer_buf[1];
-	xfer[1].len = sizeof(chip->xfer_buf[1]);
+	xfer[1].len = sizeof(chip->xfer_buf[1]) * len;
 	spi_message_add_tail(&xfer[1], &message);
 
 	error = spi_sync(spi, &message);
@@ -57,7 +58,9 @@ static int ad714x_spi_read(struct ad714x_chip *chip,
 		return error;
 	}
 
-	*data = be16_to_cpu(chip->xfer_buf[1]);
+	for (i = 0; i < len; i++)
+		data[i] = be16_to_cpu(chip->xfer_buf[i + 1]);
+
 	return 0;
 }
 
diff --git a/drivers/input/misc/ad714x.c b/drivers/input/misc/ad714x.c
index 2be0366c..ca42c7d 100644
--- a/drivers/input/misc/ad714x.c
+++ b/drivers/input/misc/ad714x.c
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver supporting AD7142/3/7/8/7A
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -123,6 +123,7 @@ struct ad714x_driver_data {
  * information to integrate all things which will be private data
  * of spi/i2c device
  */
+
 static void ad714x_use_com_int(struct ad714x_chip *ad714x,
 				int start_stage, int end_stage)
 {
@@ -131,11 +132,11 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x,
 
 	mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
 
-	ad714x->read(ad714x, STG_COM_INT_EN_REG, &data);
+	ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1);
 	data |= 1 << end_stage;
 	ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
 
-	ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data);
+	ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1);
 	data &= ~mask;
 	ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
 }
@@ -148,11 +149,11 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
 
 	mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
 
-	ad714x->read(ad714x, STG_COM_INT_EN_REG, &data);
+	ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1);
 	data &= ~(1 << end_stage);
 	ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
 
-	ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data);
+	ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1);
 	data |= mask;
 	ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
 }
@@ -250,13 +251,16 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
 	struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
 	int i;
 
+	ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage,
+			&ad714x->adc_reg[hw->start_stage],
+			hw->end_stage - hw->start_stage + 1);
+
 	for (i = hw->start_stage; i <= hw->end_stage; i++) {
-		ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]);
 		ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-				&ad714x->amb_reg[i]);
+				&ad714x->amb_reg[i], 1);
 
-		ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] -
-				ad714x->amb_reg[i]);
+		ad714x->sensor_val[i] =
+			abs(ad714x->adc_reg[i] - ad714x->amb_reg[i]);
 	}
 }
 
@@ -419,13 +423,16 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
 	struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
 	int i;
 
+	ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage,
+			&ad714x->adc_reg[hw->start_stage],
+			hw->end_stage - hw->start_stage + 1);
+
 	for (i = hw->start_stage; i <= hw->end_stage; i++) {
-		ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]);
 		ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-				&ad714x->amb_reg[i]);
+				&ad714x->amb_reg[i], 1);
 		if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
-			ad714x->sensor_val[i] = ad714x->adc_reg[i] -
-				ad714x->amb_reg[i];
+			ad714x->sensor_val[i] =
+				ad714x->adc_reg[i] - ad714x->amb_reg[i];
 		else
 			ad714x->sensor_val[i] = 0;
 	}
@@ -570,13 +577,16 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
 	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
 	int i;
 
+	ad714x->read(ad714x, CDC_RESULT_S0 + hw->x_start_stage,
+			&ad714x->adc_reg[hw->x_start_stage],
+			hw->x_end_stage - hw->x_start_stage + 1);
+
 	for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) {
-		ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]);
 		ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-				&ad714x->amb_reg[i]);
+				&ad714x->amb_reg[i], 1);
 		if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
-			ad714x->sensor_val[i] = ad714x->adc_reg[i] -
-				ad714x->amb_reg[i];
+			ad714x->sensor_val[i] =
+				ad714x->adc_reg[i] - ad714x->amb_reg[i];
 		else
 			ad714x->sensor_val[i] = 0;
 	}
@@ -862,7 +872,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x)
 {
 	unsigned short data;
 
-	ad714x->read(ad714x, AD714X_PARTID_REG, &data);
+	ad714x->read(ad714x, AD714X_PARTID_REG, &data, 1);
 	switch (data & 0xFFF0) {
 	case AD7142_PARTID:
 		ad714x->product = 0x7142;
@@ -919,14 +929,12 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x)
 		ad714x->write(ad714x, AD714X_SYSCFG_REG + i,
 			ad714x->hw->sys_cfg_reg[i]);
 	for (i = 0; i < SYS_CFGREG_NUM; i++)
-		ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data);
+		ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data, 1);
 
 	ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF);
 
 	/* clear all interrupts */
-	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data);
-	ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data);
-	ad714x->read(ad714x, STG_COM_INT_STA_REG, &data);
+	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 }
 
 static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
@@ -936,9 +944,7 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
 
 	mutex_lock(&ad714x->mutex);
 
-	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state);
-	ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &ad714x->h_state);
-	ad714x->read(ad714x, STG_COM_INT_STA_REG, &ad714x->c_state);
+	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 
 	for (i = 0; i < ad714x->hw->button_num; i++)
 		ad714x_button_state_machine(ad714x, i);
@@ -1225,8 +1231,6 @@ EXPORT_SYMBOL(ad714x_disable);
 
 int ad714x_enable(struct ad714x_chip *ad714x)
 {
-	unsigned short data;
-
 	dev_dbg(ad714x->dev, "%s enter\n", __func__);
 
 	mutex_lock(&ad714x->mutex);
@@ -1240,9 +1244,7 @@ int ad714x_enable(struct ad714x_chip *ad714x)
 	 * otherwise we will get no chance to enter falling-edge irq again
 	 */
 
-	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data);
-	ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data);
-	ad714x->read(ad714x, STG_COM_INT_STA_REG, &data);
+	ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
 
 	mutex_unlock(&ad714x->mutex);
 
diff --git a/drivers/input/misc/ad714x.h b/drivers/input/misc/ad714x.h
index d12d149..3c85455 100644
--- a/drivers/input/misc/ad714x.h
+++ b/drivers/input/misc/ad714x.h
@@ -1,7 +1,7 @@
 /*
  * AD714X CapTouch Programmable Controller driver (bus interfaces)
  *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -18,12 +18,12 @@ struct ad714x_platform_data;
 struct ad714x_driver_data;
 struct ad714x_chip;
 
-typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *);
+typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *, size_t);
 typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short);
 
 struct ad714x_chip {
-	unsigned short h_state;
 	unsigned short l_state;
+	unsigned short h_state;
 	unsigned short c_state;
 	unsigned short adc_reg[STAGE_NUM];
 	unsigned short amb_reg[STAGE_NUM];
-- 
cgit v1.1


From d5c073caf050bc713271a02e016b1672d9b7b935 Mon Sep 17 00:00:00 2001
From: Geoffrey Thomas <geofft@mit.edu>
Date: Mon, 22 Aug 2011 11:28:57 -0700
Subject: net: Documentation: RFC 2553bis is now RFC 3493

Signed-off-by: Geoffrey Thomas <geofft@mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index db2a406..8154699 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -992,7 +992,7 @@ bindv6only - BOOLEAN
 		TRUE: disable IPv4-mapped address feature
 		FALSE: enable IPv4-mapped address feature
 
-	Default: FALSE (as specified in RFC2553bis)
+	Default: FALSE (as specified in RFC3493)
 
 IPv6 Fragmentation:
 
-- 
cgit v1.1


From fcb8ce5cfe30ca9ca5c9a79cdfe26d1993e65e0c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 22 Aug 2011 11:42:53 -0700
Subject: Linux 3.1-rc3

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 3241d41..788511f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 VERSION = 3
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
-NAME = Wet Seal
+EXTRAVERSION = -rc3
+NAME = "Divemaster Edition"
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
-- 
cgit v1.1


From 052605c6caa3e1edf8eee8fe5fe6d53f5721f39a Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 26 Jul 2011 17:48:43 -0700
Subject: target: Make standard INQUIRY return 'not connected' for
 tpg_virt_lun0

This patch changes target_emulate_inquiry_std() to set the 'not connected'
(0x35) bit in standard INQUIRY response data when we are processing a
request to a virtual LUN=0 mapping from struct se_device *g_lun0_dev that
have been setup for us in transport_lookup_cmd_lun().

This addresses an issue where qla2xxx FC clients need to be able
to create demo-mode I_T FC Nexuses by default, but should not be
exposing the default set of TPG LUNs to all FC clients.  This includes
adding an new optional target_core_fabric_ops->tpg_check_demo_mode_login_only()
caller to allow demo_mode nexuses to skip the old default of bulding
a demo-mode MappedLUNs list via core_tpg_add_node_to_devs().

(roland: Add missing tpg_check_demo_mode_login_only check in core_dev_add_lun)

Reported-by: Roland Dreier <roland@purestorage.com>
Cc: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: Nicholas Bellinger <nab@risingtidesystems.com>
---
 drivers/target/target_core_cdb.c        | 11 ++++++++---
 drivers/target/target_core_device.c     |  4 +++-
 drivers/target/target_core_tpg.c        | 12 ++++++++++--
 include/target/target_core_fabric_ops.h |  6 ++++++
 4 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
index 8ae09a1..d095408 100644
--- a/drivers/target/target_core_cdb.c
+++ b/drivers/target/target_core_cdb.c
@@ -67,6 +67,7 @@ target_emulate_inquiry_std(struct se_cmd *cmd)
 {
 	struct se_lun *lun = cmd->se_lun;
 	struct se_device *dev = cmd->se_dev;
+	struct se_portal_group *tpg = lun->lun_sep->sep_tpg;
 	unsigned char *buf;
 
 	/*
@@ -81,9 +82,13 @@ target_emulate_inquiry_std(struct se_cmd *cmd)
 
 	buf = transport_kmap_first_data_page(cmd);
 
-	buf[0] = dev->transport->get_device_type(dev);
-	if (buf[0] == TYPE_TAPE)
-		buf[1] = 0x80;
+	if (dev == tpg->tpg_virt_lun0.lun_se_dev) {
+		buf[0] = 0x3f; /* Not connected */
+	} else {
+		buf[0] = dev->transport->get_device_type(dev);
+		if (buf[0] == TYPE_TAPE)
+			buf[1] = 0x80;
+	}
 	buf[2] = dev->transport->get_device_rev(dev);
 
 	/*
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index b38b6c9..ec3fbcd 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1346,7 +1346,9 @@ struct se_lun *core_dev_add_lun(
 		struct se_node_acl *acl;
 		spin_lock_bh(&tpg->acl_node_lock);
 		list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
-			if (acl->dynamic_node_acl) {
+			if (acl->dynamic_node_acl &&
+			    (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only ||
+			     !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) {
 				spin_unlock_bh(&tpg->acl_node_lock);
 				core_tpg_add_node_to_devs(acl, tpg);
 				spin_lock_bh(&tpg->acl_node_lock);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 4f1ba4c..718ccd1 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -298,8 +298,16 @@ struct se_node_acl *core_tpg_check_initiator_node_acl(
 		tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl);
 		return NULL;
 	}
-
-	core_tpg_add_node_to_devs(acl, tpg);
+	/*
+	 * Here we only create demo-mode MappedLUNs from the active
+	 * TPG LUNs if the fabric is not explictly asking for
+	 * tpg_check_demo_mode_login_only() == 1.
+	 */
+	if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only != NULL) &&
+	    (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) == 1))
+		do { ; } while (0);
+	else
+		core_tpg_add_node_to_devs(acl, tpg);
 
 	spin_lock_bh(&tpg->acl_node_lock);
 	list_add_tail(&acl->acl_list, &tpg->acl_node_list);
diff --git a/include/target/target_core_fabric_ops.h b/include/target/target_core_fabric_ops.h
index 2de8fe9..126c675 100644
--- a/include/target/target_core_fabric_ops.h
+++ b/include/target/target_core_fabric_ops.h
@@ -27,6 +27,12 @@ struct target_core_fabric_ops {
 	int (*tpg_check_demo_mode_cache)(struct se_portal_group *);
 	int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *);
 	int (*tpg_check_prod_mode_write_protect)(struct se_portal_group *);
+	/*
+	 * Optionally used by fabrics to allow demo-mode login, but not
+	 * expose any TPG LUNs, and return 'not connected' in standard
+	 * inquiry response
+	 */
+	int (*tpg_check_demo_mode_login_only)(struct se_portal_group *);
 	struct se_node_acl *(*tpg_alloc_fabric_acl)(
 					struct se_portal_group *);
 	void (*tpg_release_fabric_acl)(struct se_portal_group *,
-- 
cgit v1.1


From e1750ba20f0d850c38820190ccbf0f647723091a Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Mon, 1 Aug 2011 23:58:18 +0200
Subject: target: Use ERR_CAST inlined function

Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...))

The semantic patch that makes this output is available
in scripts/coccinelle/api/err_cast.cocci.

More information about semantic patching is available at
http://coccinelle.lip6.fr/

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_configfs.c | 4 ++--
 drivers/target/target_core_fabric_configfs.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index f095e65..f1643db 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -268,7 +268,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
 				ISCSI_TCP);
 	if (IS_ERR(tpg_np)) {
 		iscsit_put_tpg(tpg);
-		return ERR_PTR(PTR_ERR(tpg_np));
+		return ERR_CAST(tpg_np);
 	}
 	pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n");
 
@@ -1285,7 +1285,7 @@ struct se_wwn *lio_target_call_coreaddtiqn(
 
 	tiqn = iscsit_add_tiqn((unsigned char *)name);
 	if (IS_ERR(tiqn))
-		return ERR_PTR(PTR_ERR(tiqn));
+		return ERR_CAST(tiqn);
 	/*
 	 * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group.
 	 */
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index f165469..55bbe08 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -481,7 +481,7 @@ static struct config_group *target_fabric_make_nodeacl(
 
 	se_nacl = tf->tf_ops.fabric_make_nodeacl(se_tpg, group, name);
 	if (IS_ERR(se_nacl))
-		return ERR_PTR(PTR_ERR(se_nacl));
+		return ERR_CAST(se_nacl);
 
 	nacl_cg = &se_nacl->acl_group;
 	nacl_cg->default_groups = se_nacl->acl_default_groups;
-- 
cgit v1.1


From 9be08c5804ae4ad96ec22d0b1e71e630803a85ea Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Tue, 2 Aug 2011 10:26:36 +0200
Subject: iscsi-target: Fix leak on failure in iscsi_copy_param_list()

We leak memory if the allocations for 'new_param->name' or
'new_param->value' fail in iscsi_target_parameters.c::iscsi_copy_param_list()

We also do a lot of variable assignments that are completely pointless
if the allocations fail.

So, let's move the allocations before the assignments and also make
sure that we free whatever was allocated to one if the allocation fail.

There's also some small CodingStyle fixups in there (curly braces on
both branches of if statement, only one variable per line) since I was
in the area anyway. And finally, error messages in the function are
put on a single line for easy grep'abillity.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_parameters.c | 43 ++++++++++----------------
 1 file changed, 16 insertions(+), 27 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index 252e246..497b2e7 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -545,13 +545,13 @@ int iscsi_copy_param_list(
 	struct iscsi_param_list *src_param_list,
 	int leading)
 {
-	struct iscsi_param *new_param = NULL, *param = NULL;
+	struct iscsi_param *param = NULL;
+	struct iscsi_param *new_param = NULL;
 	struct iscsi_param_list *param_list = NULL;
 
 	param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
 	if (!param_list) {
-		pr_err("Unable to allocate memory for"
-				" struct iscsi_param_list.\n");
+		pr_err("Unable to allocate memory for struct iscsi_param_list.\n");
 		goto err_out;
 	}
 	INIT_LIST_HEAD(&param_list->param_list);
@@ -567,8 +567,17 @@ int iscsi_copy_param_list(
 
 		new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
 		if (!new_param) {
-			pr_err("Unable to allocate memory for"
-				" struct iscsi_param.\n");
+			pr_err("Unable to allocate memory for struct iscsi_param.\n");
+			goto err_out;
+		}
+
+		new_param->name = kstrdup(param->name, GFP_KERNEL);
+		new_param->value = kstrdup(param->value, GFP_KERNEL);
+		if (!new_param->value || !new_param->name) {
+			kfree(new_param->value);
+			kfree(new_param->name);
+			kfree(new_param);
+			pr_err("Unable to allocate memory for parameter name/value.\n");
 			goto err_out;
 		}
 
@@ -580,32 +589,12 @@ int iscsi_copy_param_list(
 		new_param->use = param->use;
 		new_param->type_range = param->type_range;
 
-		new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL);
-		if (!new_param->name) {
-			pr_err("Unable to allocate memory for"
-				" parameter name.\n");
-			goto err_out;
-		}
-
-		new_param->value = kzalloc(strlen(param->value) + 1,
-				GFP_KERNEL);
-		if (!new_param->value) {
-			pr_err("Unable to allocate memory for"
-				" parameter value.\n");
-			goto err_out;
-		}
-
-		memcpy(new_param->name, param->name, strlen(param->name));
-		new_param->name[strlen(param->name)] = '\0';
-		memcpy(new_param->value, param->value, strlen(param->value));
-		new_param->value[strlen(param->value)] = '\0';
-
 		list_add_tail(&new_param->p_list, &param_list->param_list);
 	}
 
-	if (!list_empty(&param_list->param_list))
+	if (!list_empty(&param_list->param_list)) {
 		*dst_param_list = param_list;
-	else {
+	} else {
 		pr_err("No parameters allocated.\n");
 		goto err_out;
 	}
-- 
cgit v1.1


From 6fc6148865c9a17cee33f251723f6a056f022ecd Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 2 Aug 2011 12:35:02 +0200
Subject: target: Convert target_core_rd.c to use use BUG_ON

Use BUG_ON(x) rather than if(x) BUG();

The semantic patch that fixes this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@ identifier x; @@
-if (x) BUG();
+BUG_ON(x);

@@ identifier x; @@
-if (!x) BUG();
+BUG_ON(!x);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_rd.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index 3dd81d2..e567e12 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -390,12 +390,10 @@ static int rd_MEMCPY_read(struct rd_request *req)
 				length = req->rd_size;
 
 			dst = sg_virt(&sg_d[i++]) + dst_offset;
-			if (!dst)
-				BUG();
+			BUG_ON(!dst);
 
 			src = sg_virt(&sg_s[j]) + src_offset;
-			if (!src)
-				BUG();
+			BUG_ON(!src);
 
 			dst_offset = 0;
 			src_offset = length;
@@ -415,8 +413,7 @@ static int rd_MEMCPY_read(struct rd_request *req)
 				length = req->rd_size;
 
 			dst = sg_virt(&sg_d[i]) + dst_offset;
-			if (!dst)
-				BUG();
+			BUG_ON(!dst);
 
 			if (sg_d[i].length == length) {
 				i++;
@@ -425,8 +422,7 @@ static int rd_MEMCPY_read(struct rd_request *req)
 				dst_offset = length;
 
 			src = sg_virt(&sg_s[j++]) + src_offset;
-			if (!src)
-				BUG();
+			BUG_ON(!src);
 
 			src_offset = 0;
 			page_end = 1;
@@ -510,12 +506,10 @@ static int rd_MEMCPY_write(struct rd_request *req)
 				length = req->rd_size;
 
 			src = sg_virt(&sg_s[i++]) + src_offset;
-			if (!src)
-				BUG();
+			BUG_ON(!src);
 
 			dst = sg_virt(&sg_d[j]) + dst_offset;
-			if (!dst)
-				BUG();
+			BUG_ON(!dst);
 
 			src_offset = 0;
 			dst_offset = length;
@@ -535,8 +529,7 @@ static int rd_MEMCPY_write(struct rd_request *req)
 				length = req->rd_size;
 
 			src = sg_virt(&sg_s[i]) + src_offset;
-			if (!src)
-				BUG();
+			BUG_ON(!src);
 
 			if (sg_s[i].length == length) {
 				i++;
@@ -545,8 +538,7 @@ static int rd_MEMCPY_write(struct rd_request *req)
 				src_offset = length;
 
 			dst = sg_virt(&sg_d[j++]) + dst_offset;
-			if (!dst)
-				BUG();
+			BUG_ON(!dst);
 
 			dst_offset = 0;
 			page_end = 1;
-- 
cgit v1.1


From c2337c709102b343bd917ae00c79b266fb15b871 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 8 Aug 2011 14:02:27 -0700
Subject: iscsi-target: remove duplicate return

We returned on the line before already.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index c24fb10..6a4ea29 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -2243,7 +2243,6 @@ static int iscsit_handle_snack(
 	case 0:
 		return iscsit_handle_recovery_datain_or_r2t(conn, buf,
 			hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength);
-		return 0;
 	case ISCSI_FLAG_SNACK_TYPE_STATUS:
 		return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt,
 			hdr->begrun, hdr->runlength);
-- 
cgit v1.1


From 387e96c05299ca7a0ade874f343f91f0b01086a0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 8 Aug 2011 14:06:44 -0700
Subject: iscsi-target: forever loop bug in iscsit_attach_ooo_cmdsn()

This patch fixes a forever loop bug in iscsit_attach_ooo_cmdsn()
while walking sess->sess_ooo_cmdsn_list when the received
CmdSN is less than the tail of the list.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_erl1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
index 9806507..c4c68da 100644
--- a/drivers/target/iscsi/iscsi_target_erl1.c
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -834,7 +834,7 @@ static int iscsit_attach_ooo_cmdsn(
 			 */
 			list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list,
 						ooo_list) {
-				while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
+				if (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
 					continue;
 
 				list_add(&ooo_cmdsn->ooo_list,
-- 
cgit v1.1


From 16ab8e60a0ebc22cfbe61d84e620457a15f3a0bc Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Mon, 8 Aug 2011 19:03:38 -0700
Subject: target: Fix write payload exception handling with ->new_cmd_map

This patch fixes a bug for fabrics using tfo->new_cmd_map() that
are expect transport_generic_request_failure() to be calling
transport_send_check_condition_and_sense() for both READ and WRITE,
instead of only for READ exceptions.

This was originally observed with a failed WRITE_SAME_16 w/ unmap=0
using tcm_loop.

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index cc5a339..fd7d451 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2053,8 +2053,14 @@ static void transport_generic_request_failure(
 		cmd->scsi_sense_reason = TCM_UNSUPPORTED_SCSI_OPCODE;
 		break;
 	}
-
-	if (!sc)
+	/*
+	 * If a fabric does not define a cmd->se_tfo->new_cmd_map caller,
+	 * make the call to transport_send_check_condition_and_sense()
+	 * directly.  Otherwise expect the fabric to make the call to
+	 * transport_send_check_condition_and_sense() after handling
+	 * possible unsoliticied write data payloads.
+	 */
+	if (!sc && !cmd->se_tfo->new_cmd_map)
 		transport_new_cmd_failure(cmd);
 	else {
 		ret = transport_send_check_condition_and_sense(cmd,
-- 
cgit v1.1


From 706d5860969b3b24d65d9a57bd3bb5e4a1419c08 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 28 Jul 2011 00:07:03 -0700
Subject: target: Add WRITE_SAME (10) parsing and refactor passthrough checks

This patch adds initial WRITE_SAME (10) w/ UNMAP=1 support following updates in
sbcr26 to allow UNMAP=1 for the non 16 + 32 byte CDB case.  It also refactors
current pSCSI passthrough passthrough checks into target_check_write_same_discard()
ahead of UNMAP=0 w/ write payload support into target_core_iblock.c.

This includes the support for handling WRITE_SAME in transport_emulate_control_cdb(),
and converts target_emulate_write_same to accept num_blocks directly for
WRITE_SAME, WRITE_SAME_16 and WRITE_SAME_32.

Reported-by: Eric Seppanen <eric@purestorage.com>
Cc: Roland Dreier <roland@purestorage.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@risingtidesystems.com>
---
 drivers/target/target_core_cdb.c       |  28 +++++----
 drivers/target/target_core_transport.c | 102 ++++++++++++++++++---------------
 2 files changed, 73 insertions(+), 57 deletions(-)

diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
index d095408..4c1d3a9 100644
--- a/drivers/target/target_core_cdb.c
+++ b/drivers/target/target_core_cdb.c
@@ -1090,24 +1090,17 @@ err:
  * Note this is not used for TCM/pSCSI passthrough
  */
 static int
-target_emulate_write_same(struct se_task *task, int write_same32)
+target_emulate_write_same(struct se_task *task, u32 num_blocks)
 {
 	struct se_cmd *cmd = task->task_se_cmd;
 	struct se_device *dev = cmd->se_dev;
 	sector_t range;
 	sector_t lba = cmd->t_task_lba;
-	unsigned int num_blocks;
 	int ret;
 	/*
-	 * Extract num_blocks from the WRITE_SAME_* CDB.  Then use the explict
-	 * range when non zero is supplied, otherwise calculate the remaining
-	 * range based on ->get_blocks() - starting LBA.
+	 * Use the explicit range when non zero is supplied, otherwise calculate
+	 * the remaining range based on ->get_blocks() - starting LBA.
 	 */
-	if (write_same32)
-		num_blocks = get_unaligned_be32(&cmd->t_task_cdb[28]);
-	else
-		num_blocks = get_unaligned_be32(&cmd->t_task_cdb[10]);
-
 	if (num_blocks != 0)
 		range = num_blocks;
 	else
@@ -1170,13 +1163,23 @@ transport_emulate_control_cdb(struct se_task *task)
 		}
 		ret = target_emulate_unmap(task);
 		break;
+	case WRITE_SAME:
+		if (!dev->transport->do_discard) {
+			pr_err("WRITE_SAME emulation not supported"
+					" for: %s\n", dev->transport->name);
+			return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
+		}
+		ret = target_emulate_write_same(task,
+				get_unaligned_be16(&cmd->t_task_cdb[7]));
+		break;
 	case WRITE_SAME_16:
 		if (!dev->transport->do_discard) {
 			pr_err("WRITE_SAME_16 emulation not supported"
 					" for: %s\n", dev->transport->name);
 			return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
 		}
-		ret = target_emulate_write_same(task, 0);
+		ret = target_emulate_write_same(task,
+				get_unaligned_be32(&cmd->t_task_cdb[10]));
 		break;
 	case VARIABLE_LENGTH_CMD:
 		service_action =
@@ -1189,7 +1192,8 @@ transport_emulate_control_cdb(struct se_task *task)
 					dev->transport->name);
 				return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
 			}
-			ret = target_emulate_write_same(task, 1);
+			ret = target_emulate_write_same(task,
+				get_unaligned_be32(&cmd->t_task_cdb[28]));
 			break;
 		default:
 			pr_err("Unsupported VARIABLE_LENGTH_CMD SA:"
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index fd7d451..eb8055a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2861,6 +2861,38 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd)
 	return sectors;
 }
 
+static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev)
+{
+	/*
+	 * Determine if the received WRITE_SAME is used to for direct
+	 * passthrough into Linux/SCSI with struct request via TCM/pSCSI
+	 * or we are signaling the use of internal WRITE_SAME + UNMAP=1
+	 * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK code.
+	 */
+	int passthrough = (dev->transport->transport_type ==
+				TRANSPORT_PLUGIN_PHBA_PDEV);
+
+	if (!passthrough) {
+		if ((flags[0] & 0x04) || (flags[0] & 0x02)) {
+			pr_err("WRITE_SAME PBDATA and LBDATA"
+				" bits not supported for Block Discard"
+				" Emulation\n");
+			return -ENOSYS;
+		}
+		/*
+		 * Currently for the emulated case we only accept
+		 * tpws with the UNMAP=1 bit set.
+		 */
+		if (!(flags[0] & 0x08)) {
+			pr_err("WRITE_SAME w/o UNMAP bit not"
+				" supported for Block Discard Emulation\n");
+			return -ENOSYS;
+		}
+	}
+
+	return 0;
+}
+
 /*	transport_generic_cmd_sequencer():
  *
  *	Generic Command Sequencer that should work for most DAS transport
@@ -3081,27 +3113,9 @@ static int transport_generic_cmd_sequencer(
 			cmd->t_task_lba = get_unaligned_be64(&cdb[12]);
 			cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
 
-			/*
-			 * Skip the remaining assignments for TCM/PSCSI passthrough
-			 */
-			if (passthrough)
-				break;
-
-			if ((cdb[10] & 0x04) || (cdb[10] & 0x02)) {
-				pr_err("WRITE_SAME PBDATA and LBDATA"
-					" bits not supported for Block Discard"
-					" Emulation\n");
+			if (target_check_write_same_discard(&cdb[10], dev) < 0)
 				goto out_invalid_cdb_field;
-			}
-			/*
-			 * Currently for the emulated case we only accept
-			 * tpws with the UNMAP=1 bit set.
-			 */
-			if (!(cdb[10] & 0x08)) {
-				pr_err("WRITE_SAME w/o UNMAP bit not"
-					" supported for Block Discard Emulation\n");
-				goto out_invalid_cdb_field;
-			}
+
 			break;
 		default:
 			pr_err("VARIABLE_LENGTH_CMD service action"
@@ -3358,33 +3372,31 @@ static int transport_generic_cmd_sequencer(
 		}
 
 		cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
-		passthrough = (dev->transport->transport_type ==
-				TRANSPORT_PLUGIN_PHBA_PDEV);
-		/*
-		 * Determine if the received WRITE_SAME_16 is used to for direct
-		 * passthrough into Linux/SCSI with struct request via TCM/pSCSI
-		 * or we are signaling the use of internal WRITE_SAME + UNMAP=1
-		 * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK and
-		 * TCM/FILEIO subsystem plugin backstores.
-		 */
-		if (!passthrough) {
-			if ((cdb[1] & 0x04) || (cdb[1] & 0x02)) {
-				pr_err("WRITE_SAME PBDATA and LBDATA"
-					" bits not supported for Block Discard"
-					" Emulation\n");
-				goto out_invalid_cdb_field;
-			}
-			/*
-			 * Currently for the emulated case we only accept
-			 * tpws with the UNMAP=1 bit set.
-			 */
-			if (!(cdb[1] & 0x08)) {
-				pr_err("WRITE_SAME w/o UNMAP bit not "
-					" supported for Block Discard Emulation\n");
-				goto out_invalid_cdb_field;
-			}
+		cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
+
+		if (target_check_write_same_discard(&cdb[1], dev) < 0)
+			goto out_invalid_cdb_field;
+		break;
+	case WRITE_SAME:
+		sectors = transport_get_sectors_10(cdb, cmd, &sector_ret);
+		if (sector_ret)
+			goto out_unsupported_cdb;
+
+		if (sectors)
+			size = transport_get_size(sectors, cdb, cmd);
+		else {
+			pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
+			goto out_invalid_cdb_field;
 		}
+
+		cmd->t_task_lba = get_unaligned_be32(&cdb[2]);
 		cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
+		/*
+		 * Follow sbcr26 with WRITE_SAME (10) and check for the existence
+		 * of byte 1 bit 3 UNMAP instead of original reserved field
+		 */
+		if (target_check_write_same_discard(&cdb[1], dev) < 0)
+			goto out_invalid_cdb_field;
 		break;
 	case ALLOW_MEDIUM_REMOVAL:
 	case GPCMD_CLOSE_TRACK:
-- 
cgit v1.1


From 12850626e2717f866a94e6ced724e3efe5a0aab8 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Mon, 8 Aug 2011 19:08:23 -0700
Subject: target: Fix WRITE_SAME usage with transport_get_size

For all flavours of WRITE_SAME, we only expect to handle a single block
of data-out buffer payload, regardless of the number of logical blocks
presented in the CDB.  This patch changes all flavours of WRITE_SAME in
transport_generic_cmd_sequencer() to pass '1' into transport_get_size()
instead of the extracted 'sectors' to properly handle the default usage
of sg_write_same without the --xferlen parameter.

Reported-by: Eric Seppanen <eric@purestorage.com>
Signed-off-by: Nicholas Bellinger <nab@risingtidesystems.com>
---
 drivers/target/target_core_transport.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index eb8055a..d35c2cc 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -3103,7 +3103,7 @@ static int transport_generic_cmd_sequencer(
 				goto out_unsupported_cdb;
 
 			if (sectors)
-				size = transport_get_size(sectors, cdb, cmd);
+				size = transport_get_size(1, cdb, cmd);
 			else {
 				pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not"
 				       " supported\n");
@@ -3365,7 +3365,7 @@ static int transport_generic_cmd_sequencer(
 			goto out_unsupported_cdb;
 
 		if (sectors)
-			size = transport_get_size(sectors, cdb, cmd);
+			size = transport_get_size(1, cdb, cmd);
 		else {
 			pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
 			goto out_invalid_cdb_field;
@@ -3383,7 +3383,7 @@ static int transport_generic_cmd_sequencer(
 			goto out_unsupported_cdb;
 
 		if (sectors)
-			size = transport_get_size(sectors, cdb, cmd);
+			size = transport_get_size(1, cdb, cmd);
 		else {
 			pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
 			goto out_invalid_cdb_field;
-- 
cgit v1.1


From 72f4ba1e32a1e5da31dcf14ea4b8985ae88a8bdb Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 9 Aug 2011 22:53:02 -0700
Subject: target: Remove duplicate task completions in
 transport_emulate_control_cdb

This patch removes a duplicate set of transport_complete_task() calls in
target_emulate_unmap() and target_emulate_write_same() as the completion
call is already done within transport_emulate_control_cdb()

This patch also adds a check in transport_emulate_control_cdb() for the
existing SCF_EMULATE_CDB_ASYNC flag currently used by SYNCHRONIZE_CACHE
in order to handle IMMEDIATE processing.

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_cdb.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
index 4c1d3a9..40ad142 100644
--- a/drivers/target/target_core_cdb.c
+++ b/drivers/target/target_core_cdb.c
@@ -1077,8 +1077,6 @@ target_emulate_unmap(struct se_task *task)
 		size -= 16;
 	}
 
-	task->task_scsi_status = GOOD;
-	transport_complete_task(task, 1);
 err:
 	transport_kunmap_first_data_page(cmd);
 
@@ -1115,8 +1113,6 @@ target_emulate_write_same(struct se_task *task, u32 num_blocks)
 		return ret;
 	}
 
-	task->task_scsi_status = GOOD;
-	transport_complete_task(task, 1);
 	return 0;
 }
 
@@ -1228,8 +1224,14 @@ transport_emulate_control_cdb(struct se_task *task)
 
 	if (ret < 0)
 		return ret;
-	task->task_scsi_status = GOOD;
-	transport_complete_task(task, 1);
+	/*
+	 * Handle the successful completion here unless a caller
+	 * has explictly requested an asychronous completion.
+	 */
+	if (!(cmd->se_cmd_flags & SCF_EMULATE_CDB_ASYNC)) {
+		task->task_scsi_status = GOOD;
+		transport_complete_task(task, 1);
+	}
 
 	return PYX_TRANSPORT_SENT_TO_TRANSPORT;
 }
-- 
cgit v1.1


From 7abbe7f3e4243e28a9169ee1b8d76f10a6f5d37c Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 10 Aug 2011 18:41:14 -0700
Subject: target: Fix SYNCHRONIZE_CACHE zero LBA + range breakage

This patch fixes a SYNCHRONIZE_CACHE CDB handling bug with IBLOCK/FILEIO
backends where transport_cmd_get_valid_sectors() was incorrectly rejecting
a zero LBA + range CDB from being processed, and returning CHECK_CONDITION.

This includes changing transport_cmd_get_valid_sectors() to return '0' on
success and '-EINVAL' on failure (this makes more sense than sectors),
and to only check transport_cmd_get_valid_sectors() when a non zero LBA +
range SYNCHRONIZE_CACHE operation has been receieved for the non passthrough
case.

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index d35c2cc..d385c317 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2853,12 +2853,10 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd)
 			" transport_dev_end_lba(): %llu\n",
 			cmd->t_task_lba, sectors,
 			transport_dev_end_lba(dev));
-		pr_err("  We should return CHECK_CONDITION"
-		       " but we don't yet\n");
-		return 0;
+		return -EINVAL;
 	}
 
-	return sectors;
+	return 0;
 }
 
 static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev)
@@ -3350,10 +3348,12 @@ static int transport_generic_cmd_sequencer(
 		cmd->se_cmd_flags |= SCF_EMULATE_CDB_ASYNC;
 		/*
 		 * Check to ensure that LBA + Range does not exceed past end of
-		 * device.
+		 * device for IBLOCK and FILEIO ->do_sync_cache() backend calls
 		 */
-		if (!transport_cmd_get_valid_sectors(cmd))
-			goto out_invalid_cdb_field;
+		if ((cmd->t_task_lba != 0) || (sectors != 0)) {
+			if (transport_cmd_get_valid_sectors(cmd) < 0)
+				goto out_invalid_cdb_field;
+		}
 		break;
 	case UNMAP:
 		size = get_unaligned_be16(&cdb[7]);
-- 
cgit v1.1


From 01cde4d54327884a0b61ce8666092f5703557d4b Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 10 Aug 2011 00:59:58 -0700
Subject: target: Add missing DATA_SG_IO transport_cmd_get_valid_sectors check

This patch adds the missing transport_cmd_get_valid_sectors() check for
SCF_SCSI_DATA_SG_IO_CDB type payloads to ensure that a received LBA + range
does not exeed past the end of associated backend struct se_device.

This patch also fixes a bug in the failure path of transport_new_cmd_obj()
where this check can fail, so change to use a signed 'rc' and return '-EINVAL'
to signal proper transport_generic_request_failure() handling.

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index d385c317..ab61c55 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -3891,9 +3891,7 @@ EXPORT_SYMBOL(transport_generic_map_mem_to_cmd);
 static int transport_new_cmd_obj(struct se_cmd *cmd)
 {
 	struct se_device *dev = cmd->se_dev;
-	u32 task_cdbs;
-	u32 rc;
-	int set_counts = 1;
+	int set_counts = 1, rc, task_cdbs;
 
 	/*
 	 * Setup any BIDI READ tasks and memory from
@@ -3911,7 +3909,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd)
 			cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
 			cmd->scsi_sense_reason =
 				TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-			return PYX_TRANSPORT_LU_COMM_FAILURE;
+			return -EINVAL;
 		}
 		atomic_inc(&cmd->t_fe_count);
 		atomic_inc(&cmd->t_se_count);
@@ -3930,7 +3928,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd)
 		cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
 		cmd->scsi_sense_reason =
 			TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-		return PYX_TRANSPORT_LU_COMM_FAILURE;
+		return -EINVAL;
 	}
 
 	if (set_counts) {
@@ -4248,10 +4246,13 @@ static u32 transport_allocate_tasks(
 	struct scatterlist *sgl,
 	unsigned int sgl_nents)
 {
-	if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB)
+	if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) {
+		if (transport_cmd_get_valid_sectors(cmd) < 0)
+			return -EINVAL;
+
 		return transport_allocate_data_tasks(cmd, lba, data_direction,
 						     sgl, sgl_nents);
-	else
+	} else
 		return transport_allocate_control_task(cmd);
 
 }
-- 
cgit v1.1


From 525a48a21da259d00d6ebc5b60563b5bcf022c26 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 13 Aug 2011 02:11:38 -0700
Subject: target: Fix task count > 1 handling breakage and use max_sector page
 alignment

This patch addresses recent breakage with multiple se_task (task_count > 1)
operation following backend dev->se_sub_dev->se_dev_attrib.max_sectors in new
transport_allocate_data_tasks() code.  The initial bug here was a bogus
task->task_sg_nents assignment in transport_allocate_data_tasks() based on
the passed parameter, which now uses DIV_ROUND_UP(task_size, PAGE_SIZE) to
determine the proper number of per task SGL entries for the (task_count > 1)
case.

This also means we now need to enforce a PAGE_SIZE aligned max_sector count
value for this to work as expected without bringing back the pre v3.1
transport_map_mem_to_sg() logic to handle SGL offsets across multiple tasks.
So this patch adds se_dev_align_max_sectors() to round down max_sectors as
necessary to ensure this alignment via se_dev_set_default_attribs() and
se_dev_align_max_sectors() and keeps it simple for (task_count > 1)
operation.

So far this bugfix has been tested with (task_count > 1) operation
using iscsi-target and iblock backends.

Reported-by: Chris Boot <bootc@bootc.net>
Cc: Kiran Patil <kiran.patil@intel.com>
Cc: Andy Grover <agrover@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c    | 28 ++++++++++++++++++++++++++++
 drivers/target/target_core_transport.c |  7 +++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index ec3fbcd..4b5237f 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -839,6 +839,24 @@ int se_dev_check_shutdown(struct se_device *dev)
 	return ret;
 }
 
+u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size)
+{
+	u32 tmp, aligned_max_sectors;
+	/*
+	 * Limit max_sectors to a PAGE_SIZE aligned value for modern
+	 * transport_allocate_data_tasks() operation.
+	 */
+	tmp = rounddown((max_sectors * block_size), PAGE_SIZE);
+	aligned_max_sectors = (tmp / block_size);
+	if (max_sectors != aligned_max_sectors) {
+		printk(KERN_INFO "Rounding down aligned max_sectors from %u"
+				" to %u\n", max_sectors, aligned_max_sectors);
+		return aligned_max_sectors;
+	}
+
+	return max_sectors;
+}
+
 void se_dev_set_default_attribs(
 	struct se_device *dev,
 	struct se_dev_limits *dev_limits)
@@ -878,6 +896,11 @@ void se_dev_set_default_attribs(
 	 * max_sectors is based on subsystem plugin dependent requirements.
 	 */
 	dev->se_sub_dev->se_dev_attrib.hw_max_sectors = limits->max_hw_sectors;
+	/*
+	 * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+	 */
+	limits->max_sectors = se_dev_align_max_sectors(limits->max_sectors,
+						limits->logical_block_size);
 	dev->se_sub_dev->se_dev_attrib.max_sectors = limits->max_sectors;
 	/*
 	 * Set optimal_sectors from max_sectors, which can be lowered via
@@ -1242,6 +1265,11 @@ int se_dev_set_max_sectors(struct se_device *dev, u32 max_sectors)
 			return -EINVAL;
 		}
 	}
+	/*
+	 * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+	 */
+	max_sectors = se_dev_align_max_sectors(max_sectors,
+				dev->se_sub_dev->se_dev_attrib.block_size);
 
 	dev->se_sub_dev->se_dev_attrib.max_sectors = max_sectors;
 	pr_debug("dev[%p]: SE Device max_sectors changed to %u\n",
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ab61c55..efac6a9 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -4126,7 +4126,11 @@ static int transport_allocate_data_tasks(
 
 		/* Update new cdb with updated lba/sectors */
 		cmd->transport_split_cdb(task->task_lba, task->task_sectors, cdb);
-
+		/*
+		 * This now assumes that passed sg_ents are in PAGE_SIZE chunks
+		 * in order to calculate the number per task SGL entries
+		 */
+		task->task_sg_nents = DIV_ROUND_UP(task->task_size, PAGE_SIZE);
 		/*
 		 * Check if the fabric module driver is requesting that all
 		 * struct se_task->task_sg[] be chained together..  If so,
@@ -4136,7 +4140,6 @@ static int transport_allocate_data_tasks(
 		 * It's so much easier and only a waste when task_count > 1.
 		 * That is extremely rare.
 		 */
-		task->task_sg_nents = sgl_nents;
 		if (cmd->se_tfo->task_sg_chaining) {
 			task->task_sg_nents++;
 			task->task_padded_sg = 1;
-- 
cgit v1.1


From c3c74c7a33d837be391ab61aaae39bb21f16736a Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 13 Aug 2011 05:30:31 -0700
Subject: target: Fix task SGL chaining breakage with
 transport_allocate_data_tasks

This patch fixes two bugs associated with transport_do_task_sg_chain()
operation where transport_allocate_data_tasks() was incorrectly setting
task_padded_sg for all tasks, and causing bogus task->task_sg_nents
assignments + OOPsen with fabrics depending upon this code.  The first bit
here adds a task_sg_nents_padded check in transport_allocate_data_tasks()
to include an extra SGL vector when necessary for tasks that expect to
be linked using sg_chain().

The second change involves making transport_do_task_sg_chain() properly
account for the extra SGL vector when task->task_padded_sg is set for
the non trailing ->task_sg or single ->task_sg allocations.  Note this
patch also removes the BUG_ON(!task->task_padded_sg) check within
transport_do_task_sg_chain() as we expect this to happen normally
with the updated logic in transport_allocate_data_tasks(), along with
being bogus for CONTROL_SG_IO_CDB type payloads.

So far this bugfix has been tested with tcm_qla2xxx and iblock backends
in (task_count > 1)( and (task_count == 1) operation.

Reported-by: Kiran Patil <kiran.patil@intel.com>
Cc: Kiran Patil <kiran.patil@intel.com>
Cc: Andy Grover <agrover@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index efac6a9..9cc49d1 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -4044,8 +4044,6 @@ void transport_do_task_sg_chain(struct se_cmd *cmd)
 		if (!task->task_sg)
 			continue;
 
-		BUG_ON(!task->task_padded_sg);
-
 		if (!sg_first) {
 			sg_first = task->task_sg;
 			chained_nents = task->task_sg_nents;
@@ -4053,9 +4051,19 @@ void transport_do_task_sg_chain(struct se_cmd *cmd)
 			sg_chain(sg_prev, sg_prev_nents, task->task_sg);
 			chained_nents += task->task_sg_nents;
 		}
+		/*
+		 * For the padded tasks, use the extra SGL vector allocated
+		 * in transport_allocate_data_tasks() for the sg_prev_nents
+		 * offset into sg_chain() above..  The last task of a
+		 * multi-task list, or a single task will not have
+		 * task->task_sg_padded set..
+		 */
+		if (task->task_padded_sg)
+			sg_prev_nents = (task->task_sg_nents + 1);
+		else
+			sg_prev_nents = task->task_sg_nents;
 
 		sg_prev = task->task_sg;
-		sg_prev_nents = task->task_sg_nents;
 	}
 	/*
 	 * Setup the starting pointer and total t_tasks_sg_linked_no including
@@ -4107,7 +4115,7 @@ static int transport_allocate_data_tasks(
 	
 	cmd_sg = sgl;
 	for (i = 0; i < task_count; i++) {
-		unsigned int task_size;
+		unsigned int task_size, task_sg_nents_padded;
 		int count;
 
 		task = transport_generic_get_task(cmd, data_direction);
@@ -4135,24 +4143,24 @@ static int transport_allocate_data_tasks(
 		 * Check if the fabric module driver is requesting that all
 		 * struct se_task->task_sg[] be chained together..  If so,
 		 * then allocate an extra padding SG entry for linking and
-		 * marking the end of the chained SGL.
-		 * Possibly over-allocate task sgl size by using cmd sgl size.
-		 * It's so much easier and only a waste when task_count > 1.
-		 * That is extremely rare.
+		 * marking the end of the chained SGL for every task except
+		 * the last one for (task_count > 1) operation, or skipping
+		 * the extra padding for the (task_count == 1) case.
 		 */
-		if (cmd->se_tfo->task_sg_chaining) {
-			task->task_sg_nents++;
+		if (cmd->se_tfo->task_sg_chaining && (i < (task_count - 1))) {
+			task_sg_nents_padded = (task->task_sg_nents + 1);
 			task->task_padded_sg = 1;
-		}
+		} else
+			task_sg_nents_padded = task->task_sg_nents;
 
 		task->task_sg = kmalloc(sizeof(struct scatterlist) *
-					task->task_sg_nents, GFP_KERNEL);
+					task_sg_nents_padded, GFP_KERNEL);
 		if (!task->task_sg) {
 			cmd->se_dev->transport->free_task(task);
 			return -ENOMEM;
 		}
 
-		sg_init_table(task->task_sg, task->task_sg_nents);
+		sg_init_table(task->task_sg, task_sg_nents_padded);
 
 		task_size = task->task_size;
 
-- 
cgit v1.1


From 6626a0572657a0945a7b9ccf4a6d6ad1750f9adc Mon Sep 17 00:00:00 2001
From: Chris Boot <bootc@bootc.net>
Date: Sat, 13 Aug 2011 22:10:46 -0700
Subject: iscsi-target: Implement iSCSI target IPv6 address printing.

The iSCSI target configfs code to print out an initiator's IPv6 address
is not fully implemented. This patch uses snprintf() with the "%pI6c"
format string to format the IPv6 address for display purposes.

Signed-off-by: Chris Boot <bootc@bootc.net>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_login.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index bcaf82f..daad362 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1013,19 +1013,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
 					ISCSI_LOGIN_STATUS_TARGET_ERROR);
 			goto new_sess_out;
 		}
-#if 0
-		if (!iscsi_ntop6((const unsigned char *)
-				&sock_in6.sin6_addr.in6_u,
-				(char *)&conn->ipv6_login_ip[0],
-				IPV6_ADDRESS_SPACE)) {
-			pr_err("iscsi_ntop6() failed\n");
-			iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
-					ISCSI_LOGIN_STATUS_TARGET_ERROR);
-			goto new_sess_out;
-		}
-#else
-		pr_debug("Skipping iscsi_ntop6()\n");
-#endif
+		snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c",
+				&sock_in6.sin6_addr.in6_u);
+		conn->login_port = ntohs(sock_in6.sin6_port);
 	} else {
 		memset(&sock_in, 0, sizeof(struct sockaddr_in));
 
-- 
cgit v1.1


From ba7736696341ad4253125055c0c85aa9f42959a0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 13 Aug 2011 22:35:00 -0700
Subject: iscsi-target: Fix iscsit_allocate_se_cmd_for_tmr failure path bugs

This patch fixes two bugs in allocation failure handling in
iscsit_allocate_se_cmd_for_tmr():

This first reported by DanC is a free-after call to transport_free_se_cmd(), this
patch drops the transport_free_se_cmd() call all together, as iscsit_release_cmd()
will release existing allocations as expected.

The second is a bug where iscsi_cmd_t was being leaked on a cmd->tmr_req allocation
failure, so make this jump to iscsit_release_cmd() as well.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_util.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index a1acb01..a0d23bc 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -243,7 +243,7 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
 	if (!cmd->tmr_req) {
 		pr_err("Unable to allocate memory for"
 			" Task Management command!\n");
-		return NULL;
+		goto out;
 	}
 	/*
 	 * TASK_REASSIGN for ERL=2 / connection stays inside of
@@ -298,8 +298,6 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
 	return cmd;
 out:
 	iscsit_release_cmd(cmd);
-	if (se_cmd)
-		transport_free_se_cmd(se_cmd);
 	return NULL;
 }
 
-- 
cgit v1.1


From f15ea5780d08e4c96930c0d607d05e480ec588c8 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Fri, 12 Aug 2011 10:01:24 -0700
Subject: target: Print subpage too for unhandled MODE SENSE pages

Make a log message more useful by printing both the page and subpage
that an initiator is requesting.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_cdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
index 40ad142..89ae923 100644
--- a/drivers/target/target_core_cdb.c
+++ b/drivers/target/target_core_cdb.c
@@ -920,8 +920,8 @@ target_emulate_modesense(struct se_cmd *cmd, int ten)
 		length += target_modesense_control(dev, &buf[offset+length]);
 		break;
 	default:
-		pr_err("Got Unknown Mode Page: 0x%02x\n",
-				cdb[2] & 0x3f);
+		pr_err("MODE SENSE: unimplemented page/subpage: 0x%02x/0x%02x\n",
+		       cdb[2] & 0x3f, cdb[3]);
 		return PYX_TRANSPORT_UNKNOWN_MODE_PAGE;
 	}
 	offset += length;
-- 
cgit v1.1


From 4e0f05297ff615a9a4e269da301ff77f660a3ab0 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Fri, 12 Aug 2011 10:16:52 -0700
Subject: tcm_fc: init/exit functions should not be protected by "#ifdef
 MODULE"

There's no need for the #ifdef protection when building into the kernel,
and in fact we need the module_init() for the initialization function to
be called.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/tcm_fc/tfc_conf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
index 8781d1e..520a8ba 100644
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -655,9 +655,7 @@ static void __exit ft_exit(void)
 	synchronize_rcu();
 }
 
-#ifdef MODULE
 MODULE_DESCRIPTION("FC TCM fabric driver " FT_VERSION);
 MODULE_LICENSE("GPL");
 module_init(ft_init);
 module_exit(ft_exit);
-#endif /* MODULE */
-- 
cgit v1.1


From e63a8e1933a2218cf801e46dd01bd8cca4a555ec Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Fri, 12 Aug 2011 16:01:02 -0700
Subject: target: Make locking in transport_deregister_session() IRQ safe

At least the tcm_qla2xxx fabric driver calls into transport_deregister_session()
while holding an IRQ-disabled spinlock, so the inner locking needs to
use spin_lock_irqsave() instead of spin_lock_bh().

This fixes warnings seen with tcm_qla2xxx like:

    WARNING: at kernel/softirq.c:159 local_bh_enable_ip+0x98/0xb0()
    Call Trace:
     [<ffffffff8104e65f>] warn_slowpath_common+0x7f/0xc0
     [<ffffffff8104e6ba>] warn_slowpath_null+0x1a/0x20
     [<ffffffff81055368>] local_bh_enable_ip+0x98/0xb0
     [<ffffffff814d5284>] _raw_spin_unlock_bh+0x14/0x20
     [<ffffffffa027b7f6>] transport_deregister_session+0x96/0x180 [target_core_mod]
     [<ffffffffa00f7731>] tcm_qla2xxx_free_session+0xd1/0x170 [tcm_qla2xxx]
     [<ffffffffa01b9173>] qla_tgt_sess_put+0xc3/0x140 [qla2xxx]
     [<ffffffffa01bf40f>] qla_tgt_stop_phase1+0x8f/0x2c0 [qla2xxx]
     [<ffffffffa00f735e>] tcm_qla2xxx_tpg_store_enable+0x6e/0xd0 [tcm_qla2xxx]
     [<ffffffffa026ca29>] target_fabric_tpg_attr_store+0x39/0x40 [target_core_mod]
     [<ffffffffa00a575d>] configfs_write_file+0xbd/0x120 [configfs]
     [<ffffffff811464a6>] vfs_write+0xc6/0x180
     [<ffffffff811467c1>] sys_write+0x51/0x90
     [<ffffffff814dd382>] system_call_fastpath+0x16/0x1b

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 9cc49d1..8d0c58e 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -389,17 +389,18 @@ void transport_deregister_session(struct se_session *se_sess)
 {
 	struct se_portal_group *se_tpg = se_sess->se_tpg;
 	struct se_node_acl *se_nacl;
+	unsigned long flags;
 
 	if (!se_tpg) {
 		transport_free_session(se_sess);
 		return;
 	}
 
-	spin_lock_bh(&se_tpg->session_lock);
+	spin_lock_irqsave(&se_tpg->session_lock, flags);
 	list_del(&se_sess->sess_list);
 	se_sess->se_tpg = NULL;
 	se_sess->fabric_sess_ptr = NULL;
-	spin_unlock_bh(&se_tpg->session_lock);
+	spin_unlock_irqrestore(&se_tpg->session_lock, flags);
 
 	/*
 	 * Determine if we need to do extra work for this initiator node's
@@ -407,22 +408,22 @@ void transport_deregister_session(struct se_session *se_sess)
 	 */
 	se_nacl = se_sess->se_node_acl;
 	if (se_nacl) {
-		spin_lock_bh(&se_tpg->acl_node_lock);
+		spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
 		if (se_nacl->dynamic_node_acl) {
 			if (!se_tpg->se_tpg_tfo->tpg_check_demo_mode_cache(
 					se_tpg)) {
 				list_del(&se_nacl->acl_list);
 				se_tpg->num_node_acls--;
-				spin_unlock_bh(&se_tpg->acl_node_lock);
+				spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
 
 				core_tpg_wait_for_nacl_pr_ref(se_nacl);
 				core_free_device_list_for_node(se_nacl, se_tpg);
 				se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg,
 						se_nacl);
-				spin_lock_bh(&se_tpg->acl_node_lock);
+				spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
 			}
 		}
-		spin_unlock_bh(&se_tpg->acl_node_lock);
+		spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
 	}
 
 	transport_free_session(se_sess);
-- 
cgit v1.1


From 28638887f351d11867562322b7abaa014dd5528a Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Tue, 16 Aug 2011 09:40:01 -0700
Subject: target: Convert acl_node_lock to be IRQ-disabling

With qla2xxx, acl_node_lock is taken inside qla2xxx's hardware_lock,
which is taken in hardirq context.  This means acl_node_lock must become
an IRQ-disabling lock; in particular this fixes lockdep warnings along
the lines of

    ======================================================
    [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ]

     (&(&se_tpg->acl_node_lock)->rlock){+.....}, at: [<ffffffffa026f872>] transport_deregister_session+0x92/0x140 [target_core_mod]

    and this task is already holding:
     (&(&ha->hardware_lock)->rlock){-.-...}, at: [<ffffffffa017c5e7>] qla_tgt_stop_phase1+0x57/0x2c0 [qla2xxx]
    which would create a new lock dependency:
     (&(&ha->hardware_lock)->rlock){-.-...} -> (&(&se_tpg->acl_node_lock)->rlock){+.....}

    but this new dependency connects a HARDIRQ-irq-safe lock:
     (&(&ha->hardware_lock)->rlock){-.-...}

    to a HARDIRQ-irq-unsafe lock:
     (&(&se_tpg->acl_node_lock)->rlock){+.....}

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 16 ++++++------
 drivers/target/target_core_pr.c     |  8 +++---
 drivers/target/target_core_tpg.c    | 52 ++++++++++++++++++-------------------
 drivers/target/tcm_fc/tfc_conf.c    |  4 +--
 4 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 4b5237f..ca6e4a4 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -472,9 +472,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
 	struct se_dev_entry *deve;
 	u32 i;
 
-	spin_lock_bh(&tpg->acl_node_lock);
+	spin_lock_irq(&tpg->acl_node_lock);
 	list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) {
-		spin_unlock_bh(&tpg->acl_node_lock);
+		spin_unlock_irq(&tpg->acl_node_lock);
 
 		spin_lock_irq(&nacl->device_list_lock);
 		for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
@@ -491,9 +491,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
 		}
 		spin_unlock_irq(&nacl->device_list_lock);
 
-		spin_lock_bh(&tpg->acl_node_lock);
+		spin_lock_irq(&tpg->acl_node_lock);
 	}
-	spin_unlock_bh(&tpg->acl_node_lock);
+	spin_unlock_irq(&tpg->acl_node_lock);
 }
 
 static struct se_port *core_alloc_port(struct se_device *dev)
@@ -1372,17 +1372,17 @@ struct se_lun *core_dev_add_lun(
 	 */
 	if (tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) {
 		struct se_node_acl *acl;
-		spin_lock_bh(&tpg->acl_node_lock);
+		spin_lock_irq(&tpg->acl_node_lock);
 		list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
 			if (acl->dynamic_node_acl &&
 			    (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only ||
 			     !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) {
-				spin_unlock_bh(&tpg->acl_node_lock);
+				spin_unlock_irq(&tpg->acl_node_lock);
 				core_tpg_add_node_to_devs(acl, tpg);
-				spin_lock_bh(&tpg->acl_node_lock);
+				spin_lock_irq(&tpg->acl_node_lock);
 			}
 		}
-		spin_unlock_bh(&tpg->acl_node_lock);
+		spin_unlock_irq(&tpg->acl_node_lock);
 	}
 
 	return lun_p;
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 1c1b849..7fd3a16 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -1598,14 +1598,14 @@ static int core_scsi3_decode_spec_i_port(
 			 * from the decoded fabric module specific TransportID
 			 * at *i_str.
 			 */
-			spin_lock_bh(&tmp_tpg->acl_node_lock);
+			spin_lock_irq(&tmp_tpg->acl_node_lock);
 			dest_node_acl = __core_tpg_get_initiator_node_acl(
 						tmp_tpg, i_str);
 			if (dest_node_acl) {
 				atomic_inc(&dest_node_acl->acl_pr_ref_count);
 				smp_mb__after_atomic_inc();
 			}
-			spin_unlock_bh(&tmp_tpg->acl_node_lock);
+			spin_unlock_irq(&tmp_tpg->acl_node_lock);
 
 			if (!dest_node_acl) {
 				core_scsi3_tpg_undepend_item(tmp_tpg);
@@ -3496,14 +3496,14 @@ after_iport_check:
 	/*
 	 * Locate the destination struct se_node_acl from the received Transport ID
 	 */
-	spin_lock_bh(&dest_se_tpg->acl_node_lock);
+	spin_lock_irq(&dest_se_tpg->acl_node_lock);
 	dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg,
 				initiator_str);
 	if (dest_node_acl) {
 		atomic_inc(&dest_node_acl->acl_pr_ref_count);
 		smp_mb__after_atomic_inc();
 	}
-	spin_unlock_bh(&dest_se_tpg->acl_node_lock);
+	spin_unlock_irq(&dest_se_tpg->acl_node_lock);
 
 	if (!dest_node_acl) {
 		pr_err("Unable to locate %s dest_node_acl for"
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 718ccd1..162b736 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -137,15 +137,15 @@ struct se_node_acl *core_tpg_get_initiator_node_acl(
 {
 	struct se_node_acl *acl;
 
-	spin_lock_bh(&tpg->acl_node_lock);
+	spin_lock_irq(&tpg->acl_node_lock);
 	list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
 		if (!strcmp(acl->initiatorname, initiatorname) &&
 		    !acl->dynamic_node_acl) {
-			spin_unlock_bh(&tpg->acl_node_lock);
+			spin_unlock_irq(&tpg->acl_node_lock);
 			return acl;
 		}
 	}
-	spin_unlock_bh(&tpg->acl_node_lock);
+	spin_unlock_irq(&tpg->acl_node_lock);
 
 	return NULL;
 }
@@ -309,10 +309,10 @@ struct se_node_acl *core_tpg_check_initiator_node_acl(
 	else
 		core_tpg_add_node_to_devs(acl, tpg);
 
-	spin_lock_bh(&tpg->acl_node_lock);
+	spin_lock_irq(&tpg->acl_node_lock);
 	list_add_tail(&acl->acl_list, &tpg->acl_node_list);
 	tpg->num_node_acls++;
-	spin_unlock_bh(&tpg->acl_node_lock);
+	spin_unlock_irq(&tpg->acl_node_lock);
 
 	pr_debug("%s_TPG[%u] - Added DYNAMIC ACL with TCQ Depth: %d for %s"
 		" Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
@@ -362,7 +362,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
 {
 	struct se_node_acl *acl = NULL;
 
-	spin_lock_bh(&tpg->acl_node_lock);
+	spin_lock_irq(&tpg->acl_node_lock);
 	acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
 	if (acl) {
 		if (acl->dynamic_node_acl) {
@@ -370,7 +370,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
 			pr_debug("%s_TPG[%u] - Replacing dynamic ACL"
 				" for %s\n", tpg->se_tpg_tfo->get_fabric_name(),
 				tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname);
-			spin_unlock_bh(&tpg->acl_node_lock);
+			spin_unlock_irq(&tpg->acl_node_lock);
 			/*
 			 * Release the locally allocated struct se_node_acl
 			 * because * core_tpg_add_initiator_node_acl() returned
@@ -386,10 +386,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
 			" Node %s already exists for TPG %u, ignoring"
 			" request.\n",  tpg->se_tpg_tfo->get_fabric_name(),
 			initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
-		spin_unlock_bh(&tpg->acl_node_lock);
+		spin_unlock_irq(&tpg->acl_node_lock);
 		return ERR_PTR(-EEXIST);
 	}
-	spin_unlock_bh(&tpg->acl_node_lock);
+	spin_unlock_irq(&tpg->acl_node_lock);
 
 	if (!se_nacl) {
 		pr_err("struct se_node_acl pointer is NULL\n");
@@ -426,10 +426,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
 		return ERR_PTR(-EINVAL);
 	}
 
-	spin_lock_bh(&tpg->acl_node_lock);
+	spin_lock_irq(&tpg->acl_node_lock);
 	list_add_tail(&acl->acl_list, &tpg->acl_node_list);
 	tpg->num_node_acls++;
-	spin_unlock_bh(&tpg->acl_node_lock);
+	spin_unlock_irq(&tpg->acl_node_lock);
 
 done:
 	pr_debug("%s_TPG[%hu] - Added ACL with TCQ Depth: %d for %s"
@@ -453,14 +453,14 @@ int core_tpg_del_initiator_node_acl(
 	struct se_session *sess, *sess_tmp;
 	int dynamic_acl = 0;
 
-	spin_lock_bh(&tpg->acl_node_lock);
+	spin_lock_irq(&tpg->acl_node_lock);
 	if (acl->dynamic_node_acl) {
 		acl->dynamic_node_acl = 0;
 		dynamic_acl = 1;
 	}
 	list_del(&acl->acl_list);
 	tpg->num_node_acls--;
-	spin_unlock_bh(&tpg->acl_node_lock);
+	spin_unlock_irq(&tpg->acl_node_lock);
 
 	spin_lock_bh(&tpg->session_lock);
 	list_for_each_entry_safe(sess, sess_tmp,
@@ -511,21 +511,21 @@ int core_tpg_set_initiator_node_queue_depth(
 	struct se_node_acl *acl;
 	int dynamic_acl = 0;
 
-	spin_lock_bh(&tpg->acl_node_lock);
+	spin_lock_irq(&tpg->acl_node_lock);
 	acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
 	if (!acl) {
 		pr_err("Access Control List entry for %s Initiator"
 			" Node %s does not exists for TPG %hu, ignoring"
 			" request.\n", tpg->se_tpg_tfo->get_fabric_name(),
 			initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
-		spin_unlock_bh(&tpg->acl_node_lock);
+		spin_unlock_irq(&tpg->acl_node_lock);
 		return -ENODEV;
 	}
 	if (acl->dynamic_node_acl) {
 		acl->dynamic_node_acl = 0;
 		dynamic_acl = 1;
 	}
-	spin_unlock_bh(&tpg->acl_node_lock);
+	spin_unlock_irq(&tpg->acl_node_lock);
 
 	spin_lock_bh(&tpg->session_lock);
 	list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) {
@@ -541,10 +541,10 @@ int core_tpg_set_initiator_node_queue_depth(
 				tpg->se_tpg_tfo->get_fabric_name(), initiatorname);
 			spin_unlock_bh(&tpg->session_lock);
 
-			spin_lock_bh(&tpg->acl_node_lock);
+			spin_lock_irq(&tpg->acl_node_lock);
 			if (dynamic_acl)
 				acl->dynamic_node_acl = 1;
-			spin_unlock_bh(&tpg->acl_node_lock);
+			spin_unlock_irq(&tpg->acl_node_lock);
 			return -EEXIST;
 		}
 		/*
@@ -579,10 +579,10 @@ int core_tpg_set_initiator_node_queue_depth(
 		if (init_sess)
 			tpg->se_tpg_tfo->close_session(init_sess);
 
-		spin_lock_bh(&tpg->acl_node_lock);
+		spin_lock_irq(&tpg->acl_node_lock);
 		if (dynamic_acl)
 			acl->dynamic_node_acl = 1;
-		spin_unlock_bh(&tpg->acl_node_lock);
+		spin_unlock_irq(&tpg->acl_node_lock);
 		return -EINVAL;
 	}
 	spin_unlock_bh(&tpg->session_lock);
@@ -598,10 +598,10 @@ int core_tpg_set_initiator_node_queue_depth(
 		initiatorname, tpg->se_tpg_tfo->get_fabric_name(),
 		tpg->se_tpg_tfo->tpg_get_tag(tpg));
 
-	spin_lock_bh(&tpg->acl_node_lock);
+	spin_lock_irq(&tpg->acl_node_lock);
 	if (dynamic_acl)
 		acl->dynamic_node_acl = 1;
-	spin_unlock_bh(&tpg->acl_node_lock);
+	spin_unlock_irq(&tpg->acl_node_lock);
 
 	return 0;
 }
@@ -725,20 +725,20 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
 	 * not been released because of TFO->tpg_check_demo_mode_cache() == 1
 	 * in transport_deregister_session().
 	 */
-	spin_lock_bh(&se_tpg->acl_node_lock);
+	spin_lock_irq(&se_tpg->acl_node_lock);
 	list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list,
 			acl_list) {
 		list_del(&nacl->acl_list);
 		se_tpg->num_node_acls--;
-		spin_unlock_bh(&se_tpg->acl_node_lock);
+		spin_unlock_irq(&se_tpg->acl_node_lock);
 
 		core_tpg_wait_for_nacl_pr_ref(nacl);
 		core_free_device_list_for_node(nacl, se_tpg);
 		se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, nacl);
 
-		spin_lock_bh(&se_tpg->acl_node_lock);
+		spin_lock_irq(&se_tpg->acl_node_lock);
 	}
-	spin_unlock_bh(&se_tpg->acl_node_lock);
+	spin_unlock_irq(&se_tpg->acl_node_lock);
 
 	if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL)
 		core_tpg_release_virtual_lun0(se_tpg);
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
index 520a8ba..b15879d 100644
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -256,7 +256,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
 	struct se_portal_group *se_tpg = &tpg->se_tpg;
 	struct se_node_acl *se_acl;
 
-	spin_lock_bh(&se_tpg->acl_node_lock);
+	spin_lock_irq(&se_tpg->acl_node_lock);
 	list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) {
 		acl = container_of(se_acl, struct ft_node_acl, se_node_acl);
 		pr_debug("acl %p port_name %llx\n",
@@ -270,7 +270,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
 			break;
 		}
 	}
-	spin_unlock_bh(&se_tpg->acl_node_lock);
+	spin_unlock_irq(&se_tpg->acl_node_lock);
 	return found;
 }
 
-- 
cgit v1.1


From 0e69d75ccb2f091757b38d4d6a2ed739e06b615e Mon Sep 17 00:00:00 2001
From: Andrew Bird <ajb@spheresystems.co.uk>
Date: Tue, 16 Aug 2011 13:57:14 -0600
Subject: USB option driver add PID of Huawei Vodafone K3806

This patch adds the product ID of Huawei's Vodafone K3806 mobile broadband
modem to option.c. This is necessary so that the driver gets loaded on
demand without the intervention of usb_modeswitch. This has the benefit of
it becoming available faster and also ensures that the option driver is not
bound to a network interface that should be claimed by cdc_ether.

Signed-off-by: Andrew Bird <ajb@spheresystems.co.uk>
Signed-off-by: Alex Chiang <achiang@canonical.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 8156561..6e04222 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -148,6 +148,7 @@ static void option_instat_callback(struct urb *urb);
 #define HUAWEI_PRODUCT_K4505			0x1464
 #define HUAWEI_PRODUCT_K3765			0x1465
 #define HUAWEI_PRODUCT_E14AC			0x14AC
+#define HUAWEI_PRODUCT_K3806			0x14AE
 #define HUAWEI_PRODUCT_K3770			0x14C9
 #define HUAWEI_PRODUCT_K3771			0x14CA
 #define HUAWEI_PRODUCT_K4510			0x14CB
@@ -551,6 +552,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) },
-- 
cgit v1.1


From 7e1805844da18a37e6d251d286f93c94b52d791e Mon Sep 17 00:00:00 2001
From: Andrew Bird <ajb@spheresystems.co.uk>
Date: Tue, 16 Aug 2011 13:58:21 -0600
Subject: USB option driver add PID of Huawei Vodafone K4605

This patch adds the product ID of Huawei's Vodafone K4605 mobile broadband
modem to option.c. This is necessary so that the driver gets loaded on
demand without the intervention of usb_modeswitch. This has the benefit of
it becoming available faster and also ensures that the option driver is not
bound to a network interface that should be claimed by suitable network
driver.

Signed-off-by: Andrew Bird <ajb@spheresystems.co.uk>
Signed-off-by: Alex Chiang <achiang@canonical.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/option.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 6e04222..ab86e0d 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -149,6 +149,7 @@ static void option_instat_callback(struct urb *urb);
 #define HUAWEI_PRODUCT_K3765			0x1465
 #define HUAWEI_PRODUCT_E14AC			0x14AC
 #define HUAWEI_PRODUCT_K3806			0x14AE
+#define HUAWEI_PRODUCT_K4605			0x14C6
 #define HUAWEI_PRODUCT_K3770			0x14C9
 #define HUAWEI_PRODUCT_K3771			0x14CA
 #define HUAWEI_PRODUCT_K4510			0x14CB
@@ -553,6 +554,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) },
@@ -1136,10 +1138,11 @@ static int option_probe(struct usb_serial *serial,
 		serial->interface->cur_altsetting->desc.bInterfaceClass != 0xff)
 		return -ENODEV;
 
-	/* Don't bind network interfaces on Huawei K3765 & K4505 */
+	/* Don't bind network interfaces on Huawei K3765, K4505 & K4605 */
 	if (serial->dev->descriptor.idVendor == HUAWEI_VENDOR_ID &&
 		(serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 ||
-			serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505) &&
+			serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505 ||
+			serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4605) &&
 		serial->interface->cur_altsetting->desc.bInterfaceNumber == 1)
 		return -ENODEV;
 
-- 
cgit v1.1


From d0f2fb2500b1c5fe4967eb45d8c9bc758d7aef80 Mon Sep 17 00:00:00 2001
From: Wang Zhi <zhi.wang@windriver.com>
Date: Wed, 17 Aug 2011 10:39:31 +0800
Subject: USB: EHCI: Do not rely on PORT_SUSPEND to stop USB resuming in
 ehci_bus_resume().

From EHCI Spec p.28 HC should clear PORT_SUSPEND when SW clears
PORT_RESUME. In Intel Oaktrail platform, MPH (Multi-Port Host
Controller) core clears PORT_SUSPEND directly when SW sets PORT_RESUME
bit. If we rely on PORT_SUSPEND bit to stop USB resume, we will miss
the action of clearing PORT_RESUME. This will cause unexpected long
resume signal on USB bus.

Signed-off-by: Wang Zhi <zhi.wang@windriver.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/ehci-hub.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index e051b30..4c32cb1 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -343,7 +343,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
 	u32			temp;
 	u32			power_okay;
 	int			i;
-	u8			resume_needed = 0;
+	unsigned long		resume_needed = 0;
 
 	if (time_before (jiffies, ehci->next_statechange))
 		msleep(5);
@@ -416,7 +416,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
 		if (test_bit(i, &ehci->bus_suspended) &&
 				(temp & PORT_SUSPEND)) {
 			temp |= PORT_RESUME;
-			resume_needed = 1;
+			set_bit(i, &resume_needed);
 		}
 		ehci_writel(ehci, temp, &ehci->regs->port_status [i]);
 	}
@@ -431,8 +431,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
 	i = HCS_N_PORTS (ehci->hcs_params);
 	while (i--) {
 		temp = ehci_readl(ehci, &ehci->regs->port_status [i]);
-		if (test_bit(i, &ehci->bus_suspended) &&
-				(temp & PORT_SUSPEND)) {
+		if (test_bit(i, &resume_needed)) {
 			temp &= ~(PORT_RWC_BITS | PORT_RESUME);
 			ehci_writel(ehci, temp, &ehci->regs->port_status [i]);
 			ehci_vdbg (ehci, "resumed port %d\n", i + 1);
-- 
cgit v1.1


From e5d3d4463fb30998385f9e78ab3c7f63b5813000 Mon Sep 17 00:00:00 2001
From: Yulgon Kim <yulgon.kim@samsung.com>
Date: Thu, 18 Aug 2011 14:02:45 +0900
Subject: usb: s5p-ehci: fix a NULL pointer deference

This patch fixes a NULL pointer deference. A NULL pointer
dereference happens since s5p_ehci->hcd field is not initialized
yet in probe function.

[jg1.han@samsung.com: edit commit message]
Signed-off-by: Yulgon Kim <yulgon.kim@samsung.com>
Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/ehci-s5p.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/ehci-s5p.c b/drivers/usb/host/ehci-s5p.c
index b3958b3..9e77f1c 100644
--- a/drivers/usb/host/ehci-s5p.c
+++ b/drivers/usb/host/ehci-s5p.c
@@ -86,6 +86,7 @@ static int __devinit s5p_ehci_probe(struct platform_device *pdev)
 		goto fail_hcd;
 	}
 
+	s5p_ehci->hcd = hcd;
 	s5p_ehci->clk = clk_get(&pdev->dev, "usbhost");
 
 	if (IS_ERR(s5p_ehci->clk)) {
-- 
cgit v1.1


From c6eb2d75ffcdfafa37ff010bf467de20d468ef79 Mon Sep 17 00:00:00 2001
From: "Gavin.zhu" <gavin.kx@qq.com>
Date: Mon, 22 Aug 2011 13:51:53 -0700
Subject: USB: option: add YUGA device id to driver

Signed-off-by: Gavin.zhu <gavin.kx@qq.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/option.c | 92 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index ab86e0d..78452ba 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -418,6 +418,56 @@ static void option_instat_callback(struct urb *urb);
 #define SAMSUNG_VENDOR_ID                       0x04e8
 #define SAMSUNG_PRODUCT_GT_B3730                0x6889
 
+/* YUGA products  www.yuga-info.com*/
+#define YUGA_VENDOR_ID				0x257A
+#define YUGA_PRODUCT_CEM600			0x1601
+#define YUGA_PRODUCT_CEM610			0x1602
+#define YUGA_PRODUCT_CEM500			0x1603
+#define YUGA_PRODUCT_CEM510			0x1604
+#define YUGA_PRODUCT_CEM800			0x1605
+#define YUGA_PRODUCT_CEM900			0x1606
+
+#define YUGA_PRODUCT_CEU818			0x1607
+#define YUGA_PRODUCT_CEU816			0x1608
+#define YUGA_PRODUCT_CEU828			0x1609
+#define YUGA_PRODUCT_CEU826			0x160A
+#define YUGA_PRODUCT_CEU518			0x160B
+#define YUGA_PRODUCT_CEU516			0x160C
+#define YUGA_PRODUCT_CEU528			0x160D
+#define YUGA_PRODUCT_CEU526			0x160F
+
+#define YUGA_PRODUCT_CWM600			0x2601
+#define YUGA_PRODUCT_CWM610			0x2602
+#define YUGA_PRODUCT_CWM500			0x2603
+#define YUGA_PRODUCT_CWM510			0x2604
+#define YUGA_PRODUCT_CWM800			0x2605
+#define YUGA_PRODUCT_CWM900			0x2606
+
+#define YUGA_PRODUCT_CWU718			0x2607
+#define YUGA_PRODUCT_CWU716			0x2608
+#define YUGA_PRODUCT_CWU728			0x2609
+#define YUGA_PRODUCT_CWU726			0x260A
+#define YUGA_PRODUCT_CWU518			0x260B
+#define YUGA_PRODUCT_CWU516			0x260C
+#define YUGA_PRODUCT_CWU528			0x260D
+#define YUGA_PRODUCT_CWU526			0x260F
+
+#define YUGA_PRODUCT_CLM600			0x2601
+#define YUGA_PRODUCT_CLM610			0x2602
+#define YUGA_PRODUCT_CLM500			0x2603
+#define YUGA_PRODUCT_CLM510			0x2604
+#define YUGA_PRODUCT_CLM800			0x2605
+#define YUGA_PRODUCT_CLM900			0x2606
+
+#define YUGA_PRODUCT_CLU718			0x2607
+#define YUGA_PRODUCT_CLU716			0x2608
+#define YUGA_PRODUCT_CLU728			0x2609
+#define YUGA_PRODUCT_CLU726			0x260A
+#define YUGA_PRODUCT_CLU518			0x260B
+#define YUGA_PRODUCT_CLU516			0x260C
+#define YUGA_PRODUCT_CLU528			0x260D
+#define YUGA_PRODUCT_CLU526			0x260F
+
 /* some devices interfaces need special handling due to a number of reasons */
 enum option_blacklist_reason {
 		OPTION_BLACKLIST_NONE = 0,
@@ -1009,6 +1059,48 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
 	{ USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */
 	{ USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM500) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM510) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM800) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM900) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU818) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU816) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU828) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU826) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU518) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU516) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU528) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU526) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM600) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM610) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM500) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM510) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM800) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM900) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU718) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU716) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU728) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU726) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU518) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU516) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU528) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU526) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM600) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM610) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM500) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM510) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM800) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM900) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU718) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU716) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU728) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU726) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU518) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU516) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU528) },
+	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU526) },
 	{ } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
-- 
cgit v1.1


From 6118514e8749105334f46ccec6faf9a439be6cf9 Mon Sep 17 00:00:00 2001
From: Andrew Bird <ajb@spheresystems.co.uk>
Date: Wed, 17 Aug 2011 00:20:03 +0100
Subject: USB option driver K3765/K4505 avoid CDC_DATA interface

Currently the Option driver avoids binding interface 1 on Huawei K3765
and K4505 broadband modems as it should be handled by the cdc_ether
driver instead. This patch ensures we don't bind the interface 2
on those devices as that is CDC_DATA.

Signed-off-by: Andrew Bird <ajb@spheresystems.co.uk>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/option.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 78452ba..fe22e90 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1235,7 +1235,8 @@ static int option_probe(struct usb_serial *serial,
 		(serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 ||
 			serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505 ||
 			serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4605) &&
-		serial->interface->cur_altsetting->desc.bInterfaceNumber == 1)
+		(serial->interface->cur_altsetting->desc.bInterfaceNumber == 1 ||
+			serial->interface->cur_altsetting->desc.bInterfaceNumber == 2))
 		return -ENODEV;
 
 	/* Don't bind network interface on Samsung GT-B3730, it is handled by a separate module */
-- 
cgit v1.1


From 858a914324c7786f483661e3a89bc8fbe50f1b9d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <guenter.roeck@ericsson.com>
Date: Tue, 16 Aug 2011 08:15:26 -0700
Subject: hwmon: (ntc_thermistor) Simplify if sequence

Replace unnecessary if with else statement.

This fixes the following (false) compile warning reported with some combinations
of C compiler version and configuration.

drivers/hwmon/ntc_thermistor.c: In function 'ntc_show_temp':
drivers/hwmon/ntc_thermistor.c:225: warning: 'low' may be used uninitialized in
this function
drivers/hwmon/ntc_thermistor.c:225: note: 'low' was declared here
drivers/hwmon/ntc_thermistor.c:225: warning: 'high' may be used uninitialized in
this function
drivers/hwmon/ntc_thermistor.c:225: note: 'high' was declared here
drivers/hwmon/ntc_thermistor.c:294: warning: 'temp' may be used uninitialized in
this function

Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/hwmon/ntc_thermistor.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index d7926f4..eab1161 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -211,8 +211,7 @@ static int lookup_comp(struct ntc_data *data,
 	if (data->comp[mid].ohm <= ohm) {
 		*i_low = mid;
 		*i_high = mid - 1;
-	}
-	if (data->comp[mid].ohm > ohm) {
+	} else {
 		*i_low = mid + 1;
 		*i_high = mid;
 	}
-- 
cgit v1.1


From 8ea95e08711f504d83281e762ca65a849a89593e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 2 Aug 2011 10:08:34 +0200
Subject: pti: add missing CONFIG_PCI dependency

allmodconfig compile fails on s390 because of the new PTI driver:

drivers/misc/pti.c:407:3: error: implicit declaration of function 'pci_iounmap'
drivers/misc/pti.c:410:3: error: implicit declaration of function 'pci_release_region'

Add a 'depends on PCI' statement so it doesn't get compiled.

Cc: J Freyensee <james_p_freyensee@linux.intel.com>

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 0a4d86c..2d6423c 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -146,6 +146,7 @@ config PHANTOM
 
 config INTEL_MID_PTI
 	tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
+	depends on PCI
 	default n
 	help
 	  The PTI (Parallel Trace Interface) driver directs
-- 
cgit v1.1


From 86ec67fd0a28c7f2b765e33aaf5b002d28c5f1fa Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Fri, 12 Aug 2011 14:58:31 -0700
Subject: base/devres.c: quiet sparse noise about context imbalance

devres_release_all and devres_release_group both aquire the lock
&dev->devres_lock but the release of that lock is done in release_nodes.
This results in sparse noise about context imbalance.

Add a lock annotation to release_nodes to quiet this noise.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/devres.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index cf7a0c7..65cd748 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -397,6 +397,7 @@ static int remove_nodes(struct device *dev,
 
 static int release_nodes(struct device *dev, struct list_head *first,
 			 struct list_head *end, unsigned long flags)
+	__releases(&dev->devres_lock)
 {
 	LIST_HEAD(todo);
 	int cnt;
-- 
cgit v1.1


From 5926cef26c72cd121266b000b8975e6373cbf2b3 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Wed, 10 Aug 2011 10:18:30 -0500
Subject: drivers:misc: ti-st: avoid a misleading dbg msg

Previously the private data of each protocol registered to use ST was
used to determine whether the protocol was registered to use shared
transport or otherwise.
However, now a flag is_registered is maintained to identify whether a
protocol intends to use ST.
Upon closing of the UART the error message relevant to this lack of
un-registration was misleading and this patch fixes that.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 54c91ff..c8e335d 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -717,7 +717,7 @@ static void st_tty_close(struct tty_struct *tty)
 	 */
 	spin_lock_irqsave(&st_gdata->lock, flags);
 	for (i = ST_BT; i < ST_MAX_CHANNELS; i++) {
-		if (st_gdata->list[i] != NULL)
+		if (st_gdata->is_registered[i] == true)
 			pr_err("%d not un-registered", i);
 		st_gdata->list[i] = NULL;
 	}
-- 
cgit v1.1


From 0d7c5f2572ccfa7bf83292b1506926663f2d164a Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Wed, 10 Aug 2011 10:18:31 -0500
Subject: drivers:misc:ti-st: platform hooks for chip states

Certain platform specific or Host-WiLink Interface specific actions would be
required to be taken when the chip is being enabled and after the chip is
disabled such as configuration of the mux modes for the GPIO of host connected
to the nshutdown of the chip or relinquishing UART after the chip is disabled.

Similar actions can also be taken when the chip is in deep sleep or when the
chip is awake. Performance enhancements such as configuring the host to run
faster when chip is awake and slower when chip is asleep can also be made
here.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_kim.c  | 12 ++++++++++++
 drivers/misc/ti-st/st_ll.c   | 19 +++++++++++++++++++
 include/linux/ti_wilink_st.h | 27 ++++++++++++++++++++++++++-
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index 38fd2f0..6884dd1 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -434,11 +434,17 @@ long st_kim_start(void *kim_data)
 {
 	long err = 0;
 	long retry = POR_RETRY_COUNT;
+	struct ti_st_plat_data	*pdata;
 	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
 
 	pr_info(" %s", __func__);
+	pdata = kim_gdata->kim_pdev->dev.platform_data;
 
 	do {
+		/* platform specific enabling code here */
+		if (pdata->chip_enable)
+			pdata->chip_enable(kim_gdata);
+
 		/* Configure BT nShutdown to HIGH state */
 		gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
 		mdelay(5);	/* FIXME: a proper toggle */
@@ -489,6 +495,8 @@ long st_kim_stop(void *kim_data)
 {
 	long err = 0;
 	struct kim_data_s	*kim_gdata = (struct kim_data_s *)kim_data;
+	struct ti_st_plat_data	*pdata =
+		kim_gdata->kim_pdev->dev.platform_data;
 
 	INIT_COMPLETION(kim_gdata->ldisc_installed);
 
@@ -515,6 +523,10 @@ long st_kim_stop(void *kim_data)
 	gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH);
 	mdelay(1);
 	gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
+
+	/* platform specific disable */
+	if (pdata->chip_disable)
+		pdata->chip_disable(kim_gdata);
 	return err;
 }
 
diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c
index 3f24951..1ff460a 100644
--- a/drivers/misc/ti-st/st_ll.c
+++ b/drivers/misc/ti-st/st_ll.c
@@ -22,6 +22,7 @@
 #define pr_fmt(fmt) "(stll) :" fmt
 #include <linux/skbuff.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/ti_wilink_st.h>
 
 /**********************************************************************/
@@ -37,6 +38,9 @@ static void send_ll_cmd(struct st_data_s *st_data,
 
 static void ll_device_want_to_sleep(struct st_data_s *st_data)
 {
+	struct kim_data_s	*kim_data;
+	struct ti_st_plat_data	*pdata;
+
 	pr_debug("%s", __func__);
 	/* sanity check */
 	if (st_data->ll_state != ST_LL_AWAKE)
@@ -46,10 +50,19 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data)
 	send_ll_cmd(st_data, LL_SLEEP_ACK);
 	/* update state */
 	st_data->ll_state = ST_LL_ASLEEP;
+
+	/* communicate to platform about chip asleep */
+	kim_data = st_data->kim_data;
+	pdata = kim_data->kim_pdev->dev.platform_data;
+	if (pdata->chip_asleep)
+		pdata->chip_asleep(NULL);
 }
 
 static void ll_device_want_to_wakeup(struct st_data_s *st_data)
 {
+	struct kim_data_s	*kim_data;
+	struct ti_st_plat_data	*pdata;
+
 	/* diff actions in diff states */
 	switch (st_data->ll_state) {
 	case ST_LL_ASLEEP:
@@ -70,6 +83,12 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data)
 	}
 	/* update state */
 	st_data->ll_state = ST_LL_AWAKE;
+
+	/* communicate to platform about chip wakeup */
+	kim_data = st_data->kim_data;
+	pdata = kim_data->kim_pdev->dev.platform_data;
+	if (pdata->chip_asleep)
+		pdata->chip_awake(NULL);
 }
 
 /**********************************************************************/
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index b004e55..2ef4385 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -410,7 +410,28 @@ struct gps_event_hdr {
 	u16 plen;
 } __attribute__ ((packed));
 
-/* platform data */
+/**
+ * struct ti_st_plat_data - platform data shared between ST driver and
+ *	platform specific board file which adds the ST device.
+ * @nshutdown_gpio: Host's GPIO line to which chip's BT_EN is connected.
+ * @dev_name: The UART/TTY name to which chip is interfaced. (eg: /dev/ttyS1)
+ * @flow_cntrl: Should always be 1, since UART's CTS/RTS is used for PM
+ *	purposes.
+ * @baud_rate: The baud rate supported by the Host UART controller, this will
+ *	be shared across with the chip via a HCI VS command from User-Space Init
+ *	Mgr application.
+ * @suspend:
+ * @resume: legacy PM routines hooked to platform specific board file, so as
+ *	to take chip-host interface specific action.
+ * @chip_enable:
+ * @chip_disable: Platform/Interface specific mux mode setting, GPIO
+ *	configuring, Host side PM disabling etc.. can be done here.
+ * @chip_asleep:
+ * @chip_awake: Chip specific deep sleep states is communicated to Host
+ *	specific board-xx.c to take actions such as cut UART clocks when chip
+ *	asleep or run host faster when chip awake etc..
+ *
+ */
 struct ti_st_plat_data {
 	long nshutdown_gpio;
 	unsigned char dev_name[UART_DEV_NAME_LEN]; /* uart name */
@@ -418,6 +439,10 @@ struct ti_st_plat_data {
 	unsigned long baud_rate;
 	int (*suspend)(struct platform_device *, pm_message_t);
 	int (*resume)(struct platform_device *);
+	int (*chip_enable) (struct kim_data_s *);
+	int (*chip_disable) (struct kim_data_s *);
+	int (*chip_asleep) (struct kim_data_s *);
+	int (*chip_awake) (struct kim_data_s *);
 };
 
 #endif /* TI_WILINK_ST_H */
-- 
cgit v1.1


From 74a4fcf19eed6550651f455db5741fd216b4f004 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Wed, 10 Aug 2011 10:18:32 -0500
Subject: drivers:misc: ti-st: reinit completion on ver read

After the version information has been read, the completion which assists in
wait_for_completion during the firmware send/wait sequence is being re-used
and hence this needs to be re-initialised for fool proof firmware download
retries.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_kim.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index 6884dd1..e5639ca 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -210,6 +210,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name)
 		pr_err(" waiting for ver info- timed out ");
 		return -ETIMEDOUT;
 	}
+	INIT_COMPLETION(kim_gdata->kim_rcvd);
 
 	version =
 		MAKEWORD(kim_gdata->resp_buffer[13],
-- 
cgit v1.1


From 78bb9697e2c4b62c426f1a2571c293a2e4463adf Mon Sep 17 00:00:00 2001
From: Vijay Badawadagi <bvijay@ti.com>
Date: Wed, 10 Aug 2011 10:18:33 -0500
Subject: drivers:misc: ti-st: fail-safe on wrong pkt type

Texas Instrument's shared transport driver interpret incoming data from the
UART based on the various protocol drivers registered to the driver such as
btwilink driver or FM or GPS driver which provide logical channel IDs.

In case of bad-behavior from chip such as HCI Event response for a GPS command
or a HCI Event (h/w error event) for a FM response & In case of bad-behavior
from UART driver such as dropping data bytes a fail-safe is required to avoid
kernel panic.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Vijay Badawadagi <bvijay@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index c8e335d..1f973ce 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -338,6 +338,12 @@ void st_int_recv(void *disc_data,
 			/* Unknow packet? */
 		default:
 			type = *ptr;
+			if (st_gdata->list[type] == NULL) {
+				pr_err("chip/interface misbehavior dropping"
+					" frame starting with 0x%02x", type);
+				goto done;
+
+			}
 			st_gdata->rx_skb = alloc_skb(
 					st_gdata->list[type]->max_frame_size,
 					GFP_ATOMIC);
@@ -354,6 +360,7 @@ void st_int_recv(void *disc_data,
 		ptr++;
 		count--;
 	}
+done:
 	spin_unlock_irqrestore(&st_gdata->lock, flags);
 	pr_debug("done %s", __func__);
 	return;
-- 
cgit v1.1


From 2f81a02ce0693863019dc3fcc532533af6dc0dcd Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Wed, 10 Aug 2011 10:18:34 -0500
Subject: drivers:misc: ti-st: reinit completion before send

download firmware behaves differently at different times, when logs are
enabled and the system is loaded, the wait_for_completion is able to wait for
every send, However during other times the wait does not happen.

So, for reliability reinitializing the completion before every send, makes
sure the wait happens for every send.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_kim.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index e5639ca..1748a93 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -299,6 +299,7 @@ static long download_firmware(struct kim_data_s *kim_gdata)
 
 		switch (((struct bts_action *)ptr)->type) {
 		case ACTION_SEND_COMMAND:	/* action send */
+			pr_debug("S");
 			action_ptr = &(((struct bts_action *)ptr)->data[0]);
 			if (unlikely
 			    (((struct hci_command *)action_ptr)->opcode ==
@@ -336,6 +337,10 @@ static long download_firmware(struct kim_data_s *kim_gdata)
 				release_firmware(kim_gdata->fw_entry);
 				return -ETIMEDOUT;
 			}
+			/* reinit completion before sending for the
+			 * relevant wait
+			 */
+			INIT_COMPLETION(kim_gdata->kim_rcvd);
 
 			/*
 			 * Free space found in uart buffer, call st_int_write
@@ -362,6 +367,7 @@ static long download_firmware(struct kim_data_s *kim_gdata)
 			}
 			break;
 		case ACTION_WAIT_EVENT:  /* wait */
+			pr_debug("W");
 			if (!wait_for_completion_timeout
 					(&kim_gdata->kim_rcvd,
 					 msecs_to_jiffies(CMD_RESP_TIME))) {
-- 
cgit v1.1


From d0344ef670d686628f369e649c86f71c90ebe222 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Wed, 10 Aug 2011 10:18:35 -0500
Subject: drivers:misc: ti-st: wait for completion at fail

When the line discipline install fails for reasons such as missing user-space
UIM or broken communication between UIM and ST driver, then the ST
attempts/retries to request for ldisc installation again.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_kim.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index 1748a93..d8ca406 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -473,6 +473,12 @@ long st_kim_start(void *kim_data)
 			pr_info("ldisc_install = 0");
 			sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
 					NULL, "install");
+			/* the following wait is never going to be completed,
+			 * since the ldisc was never installed, hence serving
+			 * as a mdelay of LDISC_TIME msecs */
+			err = wait_for_completion_timeout
+				(&kim_gdata->ldisc_installed,
+				 msecs_to_jiffies(LDISC_TIME));
 			err = -ETIMEDOUT;
 			continue;
 		} else {
@@ -485,6 +491,13 @@ long st_kim_start(void *kim_data)
 				pr_info("ldisc_install = 0");
 				sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
 						NULL, "install");
+				/* this wait might be completed, though in the
+				 * tty_close() since the ldisc is already
+				 * installed */
+				err = wait_for_completion_timeout
+					(&kim_gdata->ldisc_installed,
+					 msecs_to_jiffies(LDISC_TIME));
+				err = -EINVAL;
 				continue;
 			} else {	/* on success don't retry */
 				break;
-- 
cgit v1.1


From 76ff0e64d42fac59fb756536342a3d3f3e4e8833 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Wed, 10 Aug 2011 10:18:36 -0500
Subject: drivers:misc: ti-st: free skb on firmware download

If during validation of the firmware download the data doesn't match what is
expected out of the chip, this calls for a firmware download failure and a
retry.
Free the SKB which collects response during such scenarios.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_kim.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index d8ca406..3a35805 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -68,6 +68,7 @@ void validate_firmware_response(struct kim_data_s *kim_gdata)
 	if (unlikely(skb->data[5] != 0)) {
 		pr_err("no proper response during fw download");
 		pr_err("data6 %x", skb->data[5]);
+		kfree_skb(skb);
 		return;		/* keep waiting for the proper response */
 	}
 	/* becos of all the script being downloaded */
-- 
cgit v1.1


From 651d62a8b0378b911f083a1712d9d228894f46d8 Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Wed, 10 Aug 2011 10:18:37 -0500
Subject: drivers:misc: ti-st: fix unexpected UART close

If suppose the UIM were to die and hence UART were to close when the
Bluetooth/FM or GPS is turned on, prep the ST for a state where-in if
the UIM comes back up, Bluetooth/FM/GPS can be turned on.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/misc/ti-st/st_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 1f973ce..ba168a7 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -727,6 +727,7 @@ static void st_tty_close(struct tty_struct *tty)
 		if (st_gdata->is_registered[i] == true)
 			pr_err("%d not un-registered", i);
 		st_gdata->list[i] = NULL;
+		st_gdata->is_registered[i] = false;
 	}
 	st_gdata->protos_registered = 0;
 	spin_unlock_irqrestore(&st_gdata->lock, flags);
-- 
cgit v1.1


From 6c4b47d243112e98811ce0da7bbb32cc3857dd1a Mon Sep 17 00:00:00 2001
From: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Date: Wed, 20 Jul 2011 20:17:49 +0900
Subject: pch_uart: Set PCIe bus number using probe parameter

Currently, PCIe bus number is set as fixed value "2".
However, PCIe bus number is not always "2".
This patch sets bus number using probe() parameter.

Signed-off-by: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/pch_uart.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index 846dfcd..b46218d 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -598,7 +598,8 @@ static void pch_request_dma(struct uart_port *port)
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	dma_dev = pci_get_bus_and_slot(2, PCI_DEVFN(0xa, 0)); /* Get DMA's dev
+	dma_dev = pci_get_bus_and_slot(priv->pdev->bus->number,
+				       PCI_DEVFN(0xa, 0)); /* Get DMA's dev
 								information */
 	/* Set Tx DMA */
 	param = &priv->param_tx;
-- 
cgit v1.1


From 181d5762bd8eaa2881b7df27bad260bf4abda1bc Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 4 Aug 2011 03:13:10 -0500
Subject: drivers/serial/ucc_uart.c: Fix compiler warning

drivers/tty/serial/ucc_uart.c: In function 'qe2cpu_addr':
drivers/tty/serial/ucc_uart.c:238:2: warning: format '%x' expects type 'unsigned int', but argument 3 has type 'dma_addr_t'

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Acked-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/ucc_uart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index c327218..9af9f08 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -235,7 +235,7 @@ static inline void *qe2cpu_addr(dma_addr_t addr, struct uart_qe_port *qe_port)
 		return qe_port->bd_virt + (addr - qe_port->bd_dma_addr);
 
 	/* something nasty happened */
-	printk(KERN_ERR "%s: addr=%x\n", __func__, addr);
+	printk(KERN_ERR "%s: addr=%llx\n", __func__, (u64)addr);
 	BUG();
 	return NULL;
 }
-- 
cgit v1.1


From ab8ba3a2d2cba6a658ef596cd5b2e0905b6c8a9f Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 16 Aug 2011 12:02:28 -0600
Subject: serial: 8250_pnp: add Intermec CV60 touchscreen device

It would have been nice if Intermec had supplied a PNP0501 _CID for the
COM3 device, but they didn't, so we have to recognize it explicitly.

Reference: https://bugzilla.kernel.org/show_bug.cgi?id=40612
CC: Jeff Chua <jeff.chua.linux@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable <stable@kernel.org>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250_pnp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/8250_pnp.c b/drivers/tty/serial/8250_pnp.c
index fc301f6..a2f2365 100644
--- a/drivers/tty/serial/8250_pnp.c
+++ b/drivers/tty/serial/8250_pnp.c
@@ -109,6 +109,9 @@ static const struct pnp_device_id pnp_dev_table[] = {
 	/* IBM */
 	/* IBM Thinkpad 701 Internal Modem Voice */
 	{	"IBM0033",		0	},
+	/* Intermec */
+	/* Intermec CV60 touchscreen port */
+	{	"PNP4972",		0	},
 	/* Intertex */
 	/* Intertex 28k8 33k6 Voice EXT PnP */
 	{	"IXDC801",		0	},
-- 
cgit v1.1


From 0d0a3cc183c50956fe1d9e37cca520debea93ad5 Mon Sep 17 00:00:00 2001
From: "Voss, Nikolaus" <N.Voss@weinmann.de>
Date: Wed, 10 Aug 2011 14:02:29 +0200
Subject: atmel_serial: fix atmel_default_console_device

reflect new static uart platform ids introduced by patch
http://article.gmane.org/gmane.linux.kernel/1126105

Signed-off-by: Nikolaus Voss <n.voss@weinmann.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/atmel_serial.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index af9b781..b922f5d 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1609,9 +1609,11 @@ static struct console atmel_console = {
 static int __init atmel_console_init(void)
 {
 	if (atmel_default_console_device) {
-		add_preferred_console(ATMEL_DEVICENAME,
-				      atmel_default_console_device->id, NULL);
-		atmel_init_port(&atmel_ports[atmel_default_console_device->id],
+		struct atmel_uart_data *pdata =
+			atmel_default_console_device->dev.platform_data;
+
+		add_preferred_console(ATMEL_DEVICENAME, pdata->num, NULL);
+		atmel_init_port(&atmel_ports[pdata->num],
 				atmel_default_console_device);
 		register_console(&atmel_console);
 	}
-- 
cgit v1.1


From b6bede3b4cdfbd188557ab50fceec2e91d295edf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 14 Aug 2011 17:13:00 +0000
Subject: xfs: fix tracing builds inside the source tree

The code really requires the current source directory to be in the
header search path.  We already do this if building with an object
tree separate from the source, but it needs to be added manually
if building inside the source.  The cflags addition for it accidentally
got removed when collapsing the xfs directory structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ffce328..427a4e8 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,6 +16,8 @@
 # Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #
 
+ccflags-y += -I$(src)			# needed for trace events
+
 ccflags-$(CONFIG_XFS_DEBUG) += -g
 
 obj-$(CONFIG_XFS_FS)		+= xfs.o
-- 
cgit v1.1


From a2cc797d2d1a116b607de353de0ae1c2cab80b74 Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Mon, 22 Aug 2011 12:39:10 -0700
Subject: i915: do not setup intel_backlight twice

The commit "Not all systems expose a firmware or platform mechanism for
changing the backlight intensity on i915, so add native driver support"
adds calls to  intel_panel_setup_backlight() from intel_{lvds,dp}_init
so do not call it again from intel_setup_outputs().

BugLink: http://bugs.launchpad.net/bugs/831542

Signed-off-by: Kamal Mostafa <kamal@canonical.com>
ACKed-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Keith Packard <keithp@keithp.com>
---
 drivers/gpu/drm/i915/intel_display.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ee1d701..5a1ae9f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7238,8 +7238,6 @@ static void intel_setup_outputs(struct drm_device *dev)
 			intel_encoder_clones(dev, encoder->clone_mask);
 	}
 
-	intel_panel_setup_backlight(dev);
-
 	/* disable all the possible outputs/crtcs before entering KMS mode */
 	drm_helper_disable_unused_functions(dev);
 }
-- 
cgit v1.1


From e21757a05730f03f18fbfc528a919e0205aa6a61 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Mon, 22 Aug 2011 16:13:00 -0600
Subject: OMAP3: clock: indicate that gpt12_fck and wdt1_fck are in the WKUP
 clockdomain

The oscillator that supplies GPT12_FCLK and WDT1_FCLK exists in the
WKUP powerdomain[1].  This resolves at least one boot-time warning:

omap_hwmod: gpt12_fck: missing clockdomain for gpt12_fck.

1. _OMAP34xx Multimedia High Security (HS) Device Silicon Revision 3.1.x
   Security Addendum Version K (SWPU119K)_  Figure 3-29.  August 2010.
---
 arch/arm/mach-omap2/clock3xxx_data.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c
index ffd55b1..b9b8446 100644
--- a/arch/arm/mach-omap2/clock3xxx_data.c
+++ b/arch/arm/mach-omap2/clock3xxx_data.c
@@ -3078,6 +3078,7 @@ static struct clk gpt12_fck = {
 	.name		= "gpt12_fck",
 	.ops		= &clkops_null,
 	.parent		= &secure_32k_fck,
+	.clkdm_name	= "wkup_clkdm",
 	.recalc		= &followparent_recalc,
 };
 
@@ -3085,6 +3086,7 @@ static struct clk wdt1_fck = {
 	.name		= "wdt1_fck",
 	.ops		= &clkops_null,
 	.parent		= &secure_32k_fck,
+	.clkdm_name	= "wkup_clkdm",
 	.recalc		= &followparent_recalc,
 };
 
-- 
cgit v1.1


From 81a081fff7f3c144a0da9ee726906e533f66dd89 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Mon, 22 Aug 2011 09:22:41 -0500
Subject: sound/soc/fsl/fsl_dma.c: add missing of_node_put

of_parse_phandle increments the reference count of np, so this should be
decremented before trying the next possibility.

Since we don't actually use np, we can decrement the reference count
immediately.

Reported-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Timur Tabi <timur@freescale.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/fsl/fsl_dma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c
index 732208c..cb50598 100644
--- a/sound/soc/fsl/fsl_dma.c
+++ b/sound/soc/fsl/fsl_dma.c
@@ -879,10 +879,12 @@ static struct device_node *find_ssi_node(struct device_node *dma_channel_np)
 		 * assume that device_node pointers are a valid comparison.
 		 */
 		np = of_parse_phandle(ssi_np, "fsl,playback-dma", 0);
+		of_node_put(np);
 		if (np == dma_channel_np)
 			return ssi_np;
 
 		np = of_parse_phandle(ssi_np, "fsl,capture-dma", 0);
+		of_node_put(np);
 		if (np == dma_channel_np)
 			return ssi_np;
 	}
-- 
cgit v1.1


From 57cf9d4512c86a4a1b58857ca22b18bffbfd6812 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Sat, 20 Aug 2011 11:03:44 +0800
Subject: ASoC: soc-core: use GFP_KERNEL flag for kmalloc in snd_soc_cnew

GFP_ATOMIC is not needed here, use GFP_KERNEL instead.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/soc-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 83ad8ca..b085d8e 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1913,7 +1913,7 @@ struct snd_kcontrol *snd_soc_cnew(const struct snd_kcontrol_new *_template,
 
 	if (prefix) {
 		name_len = strlen(long_name) + strlen(prefix) + 2;
-		name = kmalloc(name_len, GFP_ATOMIC);
+		name = kmalloc(name_len, GFP_KERNEL);
 		if (!name)
 			return NULL;
 
-- 
cgit v1.1


From 96101bd0bf7974686f6875c065a7a9a83cd2107a Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 20 Aug 2011 08:12:38 +0200
Subject: sound/soc/kirkwood/kirkwood-i2s.c: add missing kfree

Adjust the goto to jump to the error handling code that includes kfree.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier x;
expression E1!=0,E2,E3,E4;
statement S;
iterator I;
@@

(
if (...) { ... when != kfree(x)
               when != x = E3
               when != E3 = x
*  return ...;
 }
... when != x = E2
    when != I(...,x,...) S
if (...) { ... when != x = E4
 kfree(x); ... return ...; }
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/kirkwood/kirkwood-i2s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c
index a33fc51..8f16cd3 100644
--- a/sound/soc/kirkwood/kirkwood-i2s.c
+++ b/sound/soc/kirkwood/kirkwood-i2s.c
@@ -424,7 +424,7 @@ static __devinit int kirkwood_i2s_dev_probe(struct platform_device *pdev)
 	if (!priv->mem) {
 		dev_err(&pdev->dev, "request_mem_region failed\n");
 		err = -EBUSY;
-		goto error;
+		goto error_alloc;
 	}
 
 	priv->io = ioremap(priv->mem->start, SZ_16K);
-- 
cgit v1.1


From 5006b313283c56cfda498513b7091692bcd74433 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 20 Aug 2011 08:12:39 +0200
Subject: sound/soc/ep93xx/ep93xx-i2s.c: add missing kfree

Introduce a new label that includes kfree and jump to that one.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier x;
expression E1!=0,E2,E3,E4;
statement S;
iterator I;
@@

(
if (...) { ... when != kfree(x)
               when != x = E3
               when != E3 = x
*  return ...;
 }
... when != x = E2
    when != I(...,x,...) S
if (...) { ... when != x = E4
 kfree(x); ... return ...; }
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Alexander Sverdlin <subaparts@yandex.ru>
Reviewed-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/ep93xx/ep93xx-i2s.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sound/soc/ep93xx/ep93xx-i2s.c b/sound/soc/ep93xx/ep93xx-i2s.c
index 56efa0c..099614e 100644
--- a/sound/soc/ep93xx/ep93xx-i2s.c
+++ b/sound/soc/ep93xx/ep93xx-i2s.c
@@ -385,14 +385,14 @@ static int ep93xx_i2s_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		err = -ENODEV;
-		goto fail;
+		goto fail_free_info;
 	}
 
 	info->mem = request_mem_region(res->start, resource_size(res),
 				       pdev->name);
 	if (!info->mem) {
 		err = -EBUSY;
-		goto fail;
+		goto fail_free_info;
 	}
 
 	info->regs = ioremap(info->mem->start, resource_size(info->mem));
@@ -435,6 +435,7 @@ fail_unmap_mem:
 	iounmap(info->regs);
 fail_release_mem:
 	release_mem_region(info->mem->start, resource_size(info->mem));
+fail_free_info:
 	kfree(info);
 fail:
 	return err;
-- 
cgit v1.1


From 178b279b645a14ca8ea01e4ea818c88681a31b07 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 20 Aug 2011 09:02:00 +0200
Subject: sound/soc/fsl/p1022_ds.c: add missing of_node_put

dma_channel_np has been accessed at this point, so decrease its reference
count before leaving the function.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier x;
expression E1!=0,E2,E3,E4;
statement S;
iterator I;
@@

(
if (...) { ... when != of_node_put(x)
               when != x = E3
               when != E3 = x
*  return ...;
 }
... when != x = E2
    when != I(...,x,...) S
if (...) { ... when != x = E4
 of_node_put(x); ... return ...; }
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/fsl/p1022_ds.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/soc/fsl/p1022_ds.c b/sound/soc/fsl/p1022_ds.c
index 8fa4d5f..fcb862e 100644
--- a/sound/soc/fsl/p1022_ds.c
+++ b/sound/soc/fsl/p1022_ds.c
@@ -297,8 +297,10 @@ static int get_dma_channel(struct device_node *ssi_np,
 	 * dai->platform name should already point to an allocated buffer.
 	 */
 	ret = of_address_to_resource(dma_channel_np, 0, &res);
-	if (ret)
+	if (ret) {
+		of_node_put(dma_channel_np);
 		return ret;
+	}
 	snprintf((char *)dai->platform_name, DAI_NAME_SIZE, "%llx.%s",
 		 (unsigned long long) res.start, dma_channel_np->name);
 
-- 
cgit v1.1


From c09f5ca7bdc9a82c5f721bc28c46d65452240cfa Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 20 Aug 2011 09:02:01 +0200
Subject: sound/soc/fsl/mpc8610_hpcd.c: add missing of_node_put

The first change is to add an of_node_put, since codec_np has previously
been allocated.  The rest of the patch reorganizes the error handling code
so the only code executed is that which is needed.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier x;
expression E1!=0,E2,E3,E4;
statement S;
iterator I;
@@

(
if (...) { ... when != of_node_put(x)
               when != x = E3
               when != E3 = x
*  return ...;
 }
... when != x = E2
    when != I(...,x,...) S
if (...) { ... when != x = E4
 of_node_put(x); ... return ...; }
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Timur Tabi <timur@freescale.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/fsl/mpc8610_hpcd.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c
index a192979..358f0ba 100644
--- a/sound/soc/fsl/mpc8610_hpcd.c
+++ b/sound/soc/fsl/mpc8610_hpcd.c
@@ -345,8 +345,10 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
 	}
 
 	machine_data = kzalloc(sizeof(struct mpc8610_hpcd_data), GFP_KERNEL);
-	if (!machine_data)
-		return -ENOMEM;
+	if (!machine_data) {
+		ret = -ENOMEM;
+		goto error_alloc;
+	}
 
 	machine_data->dai[0].cpu_dai_name = dev_name(&ssi_pdev->dev);
 	machine_data->dai[0].ops = &mpc8610_hpcd_ops;
@@ -494,7 +496,7 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
 	ret = platform_device_add(sound_device);
 	if (ret) {
 		dev_err(&pdev->dev, "platform device add failed\n");
-		goto error;
+		goto error_sound;
 	}
 	dev_set_drvdata(&pdev->dev, sound_device);
 
@@ -502,14 +504,12 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev)
 
 	return 0;
 
+error_sound:
+	platform_device_unregister(sound_device);
 error:
-	of_node_put(codec_np);
-
-	if (sound_device)
-		platform_device_unregister(sound_device);
-
 	kfree(machine_data);
-
+error_alloc:
+	of_node_put(codec_np);
 	return ret;
 }
 
-- 
cgit v1.1


From 0278ccd9d53e07c4e699432b2fed9de6c56f506c Mon Sep 17 00:00:00 2001
From: Chris Boot <bootc@bootc.net>
Date: Mon, 22 Aug 2011 21:38:38 +0100
Subject: firewire: sbp2: fix panic after rmmod with slow targets

If firewire-sbp2 starts a login to a target that doesn't complete ORBs
in a timely manner (and has to retry the login), and the module is
removed before the operation times out, you end up with a null-pointer
dereference and a kernel panic.

[SR:  This happens because sbp2_target_get/put() do not maintain
module references.  scsi_device_get/put() do, but at occasions like
Chris describes one, nobody holds a reference to an SBP-2 sdev.]

This patch cancels pending work for each unit in sbp2_remove(), which
hopefully means there are no extra references around that prevent us
from unloading. This fixes my crash.

Signed-off-by: Chris Boot <bootc@bootc.net>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/sbp2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 41841a3..17cef86 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1198,6 +1198,10 @@ static int sbp2_remove(struct device *dev)
 {
 	struct fw_unit *unit = fw_unit(dev);
 	struct sbp2_target *tgt = dev_get_drvdata(&unit->device);
+	struct sbp2_logical_unit *lu;
+
+	list_for_each_entry(lu, &tgt->lu_list, link)
+		cancel_delayed_work_sync(&lu->work);
 
 	sbp2_target_put(tgt);
 	return 0;
-- 
cgit v1.1


From 11f3a6bdc2528d1ce2af50202dbf7138fdee1b34 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 22 Aug 2011 06:05:59 +0000
Subject: bridge: fix a possible net_device leak

Jan Beulich reported a possible net_device leak in bridge code after
commit bb900b27a2f4 (bridge: allow creating bridge devices with netlink)

Reported-by: Jan Beulich <JBeulich@novell.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 2cdf007..e738154 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -231,6 +231,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 int br_add_bridge(struct net *net, const char *name)
 {
 	struct net_device *dev;
+	int res;
 
 	dev = alloc_netdev(sizeof(struct net_bridge), name,
 			   br_dev_setup);
@@ -240,7 +241,10 @@ int br_add_bridge(struct net *net, const char *name)
 
 	dev_net_set(dev, net);
 
-	return register_netdev(dev);
+	res = register_netdev(dev);
+	if (res)
+		free_netdev(dev);
+	return res;
 }
 
 int br_del_bridge(struct net *net, const char *name)
-- 
cgit v1.1


From f5e4282586dc0c9dab8c7d32e6c43aa07f68586b Mon Sep 17 00:00:00 2001
From: Jeremiah Matthey <sprg86@gmail.com>
Date: Tue, 23 Aug 2011 09:44:30 +0200
Subject: HID: usbhid: Add support for SiGma Micro chip

Patch to add SiGma Micro-based keyboards (1c4f:0002) to hid-quirks.

These keyboards dont seem to allow the records to be initialized, and hence a
timeout occurs when the usbhid driver attempts to initialize them. The patch
just adds the signature for these keyboards to the hid-quirks list with the
setting HID_QUIRK_NO_INIT_REPORTS. This removes the 5-10 second wait for the
timeout to occur.

Signed-off-by: Jeremiah Matthey <sprg86@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 61c8809..7d27d2b 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -579,6 +579,9 @@
 #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE	0x0001
 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE	0x0600
 
+#define USB_VENDOR_ID_SIGMA_MICRO	0x1c4f
+#define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD	0x0002
+
 #define USB_VENDOR_ID_SKYCABLE			0x1223
 #define	USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER	0x3F07
 
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 621959d..4bdb5d4 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -89,6 +89,7 @@ static const struct hid_blacklist {
 
 	{ USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT },
+	{ USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS },
 	{ 0, 0 }
 };
 
-- 
cgit v1.1


From 7c4c3960dff109bc5db4c35da481c212dadb5eb5 Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Mon, 22 Aug 2011 21:17:57 +0000
Subject: drm/ttm: fix ttm_bo_add_ttm(user) failure path

ttm_tt_destroy kfrees passed object, so we need to nullify
a reference to it.

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: stable@kernel.org
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 56619f6..384116a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -353,8 +353,10 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 
 		ret = ttm_tt_set_user(bo->ttm, current,
 				      bo->buffer_start, bo->num_pages);
-		if (unlikely(ret != 0))
+		if (unlikely(ret != 0)) {
 			ttm_tt_destroy(bo->ttm);
+			bo->ttm = NULL;
+		}
 		break;
 	default:
 		printk(KERN_ERR TTM_PFX "Illegal buffer object type\n");
-- 
cgit v1.1


From eac2095398668f989a3dd8d00be1b87850d78c01 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 22 Aug 2011 03:15:04 +0000
Subject: drm/ttm: unbind ttm before destroying node in accel move cleanup

Nouveau makes the assumption that if a TTM is bound there will be a mm_node
around for it and the backwards ordering here resulted in a use-after-free
on some eviction paths.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 77dbf40..ae3c6f5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -635,13 +635,13 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		ttm_bo_free_old_node(bo);
 		if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 		    (bo->ttm != NULL)) {
 			ttm_tt_unbind(bo->ttm);
 			ttm_tt_destroy(bo->ttm);
 			bo->ttm = NULL;
 		}
+		ttm_bo_free_old_node(bo);
 	} else {
 		/**
 		 * This should help pipeline ordinary buffer moves.
-- 
cgit v1.1


From 8d3bb23609d4ae22803a15d232289fc09a7b61c4 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 22 Aug 2011 03:15:05 +0000
Subject: drm/ttm: ensure ttm for new node is bound before calling
 move_notify()

This was true for new TTM_PL_SYSTEM and new TTM_PL_TT cases, but wasn't
the case on TTM_PL_SYSTEM<->TTM_PL_TT moves, which causes trouble on some
paths as nouveau's move_notify() hook requires that the dma addresses be
valid at this point.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 384116a..a4d38d8 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -392,10 +392,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 	 * Create and bind a ttm if required.
 	 */
 
-	if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm == NULL)) {
-		ret = ttm_bo_add_ttm(bo, false);
-		if (ret)
-			goto out_err;
+	if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
+		if (bo->ttm == NULL) {
+			ret = ttm_bo_add_ttm(bo, false);
+			if (ret)
+				goto out_err;
+		}
 
 		ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement);
 		if (ret)
-- 
cgit v1.1


From 3989ef6cfb80825af2f7933415797f052817ac3e Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@googlemail.com>
Date: Wed, 17 Aug 2011 11:43:20 +0200
Subject: HID: wiimote: Simplify synchronization

The new locking scheme in HID core allows us to remove a bit of synchronization.
Since the HID layer acts synchronously we simply register input core last and
there are no synchonization issues anymore.
Also register sysfs files after that to simplify the code.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-wiimote.c | 78 ++++++++++++++++-------------------------------
 1 file changed, 27 insertions(+), 51 deletions(-)

diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c
index a594383..8a68bf5 100644
--- a/drivers/hid/hid-wiimote.c
+++ b/drivers/hid/hid-wiimote.c
@@ -10,7 +10,6 @@
  * any later version.
  */
 
-#include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/hid.h>
 #include <linux/input.h>
@@ -33,7 +32,6 @@ struct wiimote_state {
 };
 
 struct wiimote_data {
-	atomic_t ready;
 	struct hid_device *hdev;
 	struct input_dev *input;
 
@@ -200,9 +198,6 @@ static ssize_t wiifs_led_show_##num(struct device *dev,			\
 	unsigned long flags;						\
 	int state;							\
 									\
-	if (!atomic_read(&wdata->ready))				\
-		return -EBUSY;						\
-									\
 	spin_lock_irqsave(&wdata->state.lock, flags);			\
 	state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num);	\
 	spin_unlock_irqrestore(&wdata->state.lock, flags);		\
@@ -217,9 +212,6 @@ static ssize_t wiifs_led_set_##num(struct device *dev,			\
 	unsigned long flags;						\
 	__u8 state;							\
 									\
-	if (!atomic_read(&wdata->ready))				\
-		return -EBUSY;						\
-									\
 	spin_lock_irqsave(&wdata->state.lock, flags);			\
 									\
 	state = wdata->state.flags;					\
@@ -244,13 +236,6 @@ wiifs_led_show_set(4);
 static int wiimote_input_event(struct input_dev *dev, unsigned int type,
 						unsigned int code, int value)
 {
-	struct wiimote_data *wdata = input_get_drvdata(dev);
-
-	if (!atomic_read(&wdata->ready))
-		return -EBUSY;
-	/* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */
-	smp_rmb();
-
 	return 0;
 }
 
@@ -300,11 +285,6 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report,
 	int i;
 	unsigned long flags;
 
-	if (!atomic_read(&wdata->ready))
-		return -EBUSY;
-	/* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */
-	smp_rmb();
-
 	if (size < 1)
 		return -EINVAL;
 
@@ -362,6 +342,15 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 
 static void wiimote_destroy(struct wiimote_data *wdata)
 {
+	device_remove_file(&wdata->hdev->dev, &dev_attr_led1);
+	device_remove_file(&wdata->hdev->dev, &dev_attr_led2);
+	device_remove_file(&wdata->hdev->dev, &dev_attr_led3);
+	device_remove_file(&wdata->hdev->dev, &dev_attr_led4);
+
+	input_unregister_device(wdata->input);
+	cancel_work_sync(&wdata->worker);
+	hid_hw_stop(wdata->hdev);
+
 	kfree(wdata);
 }
 
@@ -377,19 +366,6 @@ static int wiimote_hid_probe(struct hid_device *hdev,
 		return -ENOMEM;
 	}
 
-	ret = device_create_file(&hdev->dev, &dev_attr_led1);
-	if (ret)
-		goto err;
-	ret = device_create_file(&hdev->dev, &dev_attr_led2);
-	if (ret)
-		goto err;
-	ret = device_create_file(&hdev->dev, &dev_attr_led3);
-	if (ret)
-		goto err;
-	ret = device_create_file(&hdev->dev, &dev_attr_led4);
-	if (ret)
-		goto err;
-
 	ret = hid_parse(hdev);
 	if (ret) {
 		hid_err(hdev, "HID parse failed\n");
@@ -408,9 +384,19 @@ static int wiimote_hid_probe(struct hid_device *hdev,
 		goto err_stop;
 	}
 
-	/* smp_wmb: Write wdata->xy first before wdata->ready is set to 1 */
-	smp_wmb();
-	atomic_set(&wdata->ready, 1);
+	ret = device_create_file(&hdev->dev, &dev_attr_led1);
+	if (ret)
+		goto err_free;
+	ret = device_create_file(&hdev->dev, &dev_attr_led2);
+	if (ret)
+		goto err_free;
+	ret = device_create_file(&hdev->dev, &dev_attr_led3);
+	if (ret)
+		goto err_free;
+	ret = device_create_file(&hdev->dev, &dev_attr_led4);
+	if (ret)
+		goto err_free;
+
 	hid_info(hdev, "New device registered\n");
 
 	/* by default set led1 after device initialization */
@@ -420,15 +406,15 @@ static int wiimote_hid_probe(struct hid_device *hdev,
 
 	return 0;
 
+err_free:
+	wiimote_destroy(wdata);
+	return ret;
+
 err_stop:
 	hid_hw_stop(hdev);
 err:
 	input_free_device(wdata->input);
-	device_remove_file(&hdev->dev, &dev_attr_led1);
-	device_remove_file(&hdev->dev, &dev_attr_led2);
-	device_remove_file(&hdev->dev, &dev_attr_led3);
-	device_remove_file(&hdev->dev, &dev_attr_led4);
-	wiimote_destroy(wdata);
+	kfree(wdata);
 	return ret;
 }
 
@@ -437,16 +423,6 @@ static void wiimote_hid_remove(struct hid_device *hdev)
 	struct wiimote_data *wdata = hid_get_drvdata(hdev);
 
 	hid_info(hdev, "Device removed\n");
-
-	device_remove_file(&hdev->dev, &dev_attr_led1);
-	device_remove_file(&hdev->dev, &dev_attr_led2);
-	device_remove_file(&hdev->dev, &dev_attr_led3);
-	device_remove_file(&hdev->dev, &dev_attr_led4);
-
-	hid_hw_stop(hdev);
-	input_unregister_device(wdata->input);
-
-	cancel_work_sync(&wdata->worker);
 	wiimote_destroy(wdata);
 }
 
-- 
cgit v1.1


From 26af17484a737aaa991a7ce578cb15809a582fbc Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@googlemail.com>
Date: Wed, 17 Aug 2011 11:43:21 +0200
Subject: HID: wiimote: Correctly call HID open/close callbacks

Even though the bluetooth hid backend does not react on open/close callbacks, we
should call them to be consistent with other hid drivers.

Also the new input open/close handlers will be used in future to prepare the
wiimote device for IR/extension input.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-wiimote.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c
index 8a68bf5..d49f67c 100644
--- a/drivers/hid/hid-wiimote.c
+++ b/drivers/hid/hid-wiimote.c
@@ -239,6 +239,20 @@ static int wiimote_input_event(struct input_dev *dev, unsigned int type,
 	return 0;
 }
 
+static int wiimote_input_open(struct input_dev *dev)
+{
+	struct wiimote_data *wdata = input_get_drvdata(dev);
+
+	return hid_hw_open(wdata->hdev);
+}
+
+static void wiimote_input_close(struct input_dev *dev)
+{
+	struct wiimote_data *wdata = input_get_drvdata(dev);
+
+	hid_hw_close(wdata->hdev);
+}
+
 static void handler_keys(struct wiimote_data *wdata, const __u8 *payload)
 {
 	input_report_key(wdata->input, wiiproto_keymap[WIIPROTO_KEY_LEFT],
@@ -321,6 +335,8 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 
 	input_set_drvdata(wdata->input, wdata);
 	wdata->input->event = wiimote_input_event;
+	wdata->input->open = wiimote_input_open;
+	wdata->input->close = wiimote_input_close;
 	wdata->input->dev.parent = &wdata->hdev->dev;
 	wdata->input->id.bustype = wdata->hdev->bus;
 	wdata->input->id.vendor = wdata->hdev->vendor;
-- 
cgit v1.1


From 23a5a4a39eddbe515a832767a371cc54e82cc25e Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@googlemail.com>
Date: Wed, 17 Aug 2011 11:43:22 +0200
Subject: HID: wiimote: Register led class devices

This registers 4 led devices to allow controlling the wiimote leds via standard
LED sysfs API. It removes the four sysfs attributes so we don't have two APIs
for one device.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig       |   1 +
 drivers/hid/hid-wiimote.c | 165 +++++++++++++++++++++++++++++-----------------
 2 files changed, 107 insertions(+), 59 deletions(-)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 306b15f..1130a89 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -589,6 +589,7 @@ config HID_WACOM_POWER_SUPPLY
 config HID_WIIMOTE
 	tristate "Nintendo Wii Remote support"
 	depends on BT_HIDP
+	depends on LEDS_CLASS
 	---help---
 	Support for the Nintendo Wii Remote bluetooth device.
 
diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c
index d49f67c..29edd55 100644
--- a/drivers/hid/hid-wiimote.c
+++ b/drivers/hid/hid-wiimote.c
@@ -13,6 +13,7 @@
 #include <linux/device.h>
 #include <linux/hid.h>
 #include <linux/input.h>
+#include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include "hid-ids.h"
@@ -34,6 +35,7 @@ struct wiimote_state {
 struct wiimote_data {
 	struct hid_device *hdev;
 	struct input_dev *input;
+	struct led_classdev *leds[4];
 
 	spinlock_t qlock;
 	__u8 head;
@@ -51,6 +53,9 @@ struct wiimote_data {
 #define WIIPROTO_FLAGS_LEDS (WIIPROTO_FLAG_LED1 | WIIPROTO_FLAG_LED2 | \
 					WIIPROTO_FLAG_LED3 | WIIPROTO_FLAG_LED4)
 
+/* return flag for led \num */
+#define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1))
+
 enum wiiproto_reqs {
 	WIIPROTO_REQ_LED = 0x11,
 	WIIPROTO_REQ_DRM_K = 0x30,
@@ -85,9 +90,6 @@ static __u16 wiiproto_keymap[] = {
 	BTN_MODE,	/* WIIPROTO_KEY_HOME */
 };
 
-#define dev_to_wii(pdev) hid_get_drvdata(container_of(pdev, struct hid_device, \
-									dev))
-
 static ssize_t wiimote_hid_send(struct hid_device *hdev, __u8 *buffer,
 								size_t count)
 {
@@ -190,48 +192,53 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds)
 	wiimote_queue(wdata, cmd, sizeof(cmd));
 }
 
-#define wiifs_led_show_set(num)						\
-static ssize_t wiifs_led_show_##num(struct device *dev,			\
-			struct device_attribute *attr, char *buf)	\
-{									\
-	struct wiimote_data *wdata = dev_to_wii(dev);			\
-	unsigned long flags;						\
-	int state;							\
-									\
-	spin_lock_irqsave(&wdata->state.lock, flags);			\
-	state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num);	\
-	spin_unlock_irqrestore(&wdata->state.lock, flags);		\
-									\
-	return sprintf(buf, "%d\n", state);				\
-}									\
-static ssize_t wiifs_led_set_##num(struct device *dev,			\
-	struct device_attribute *attr, const char *buf, size_t count)	\
-{									\
-	struct wiimote_data *wdata = dev_to_wii(dev);			\
-	int tmp = simple_strtoul(buf, NULL, 10);			\
-	unsigned long flags;						\
-	__u8 state;							\
-									\
-	spin_lock_irqsave(&wdata->state.lock, flags);			\
-									\
-	state = wdata->state.flags;					\
-									\
-	if (tmp)							\
-		wiiproto_req_leds(wdata, state | WIIPROTO_FLAG_LED##num);\
-	else								\
-		wiiproto_req_leds(wdata, state & ~WIIPROTO_FLAG_LED##num);\
-									\
-	spin_unlock_irqrestore(&wdata->state.lock, flags);		\
-									\
-	return count;							\
-}									\
-static DEVICE_ATTR(led##num, S_IRUGO | S_IWUSR, wiifs_led_show_##num,	\
-						wiifs_led_set_##num)
-
-wiifs_led_show_set(1);
-wiifs_led_show_set(2);
-wiifs_led_show_set(3);
-wiifs_led_show_set(4);
+static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev)
+{
+	struct wiimote_data *wdata;
+	struct device *dev = led_dev->dev->parent;
+	int i;
+	unsigned long flags;
+	bool value = false;
+
+	wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev));
+
+	for (i = 0; i < 4; ++i) {
+		if (wdata->leds[i] == led_dev) {
+			spin_lock_irqsave(&wdata->state.lock, flags);
+			value = wdata->state.flags & WIIPROTO_FLAG_LED(i + 1);
+			spin_unlock_irqrestore(&wdata->state.lock, flags);
+			break;
+		}
+	}
+
+	return value ? LED_FULL : LED_OFF;
+}
+
+static void wiimote_leds_set(struct led_classdev *led_dev,
+						enum led_brightness value)
+{
+	struct wiimote_data *wdata;
+	struct device *dev = led_dev->dev->parent;
+	int i;
+	unsigned long flags;
+	__u8 state, flag;
+
+	wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev));
+
+	for (i = 0; i < 4; ++i) {
+		if (wdata->leds[i] == led_dev) {
+			flag = WIIPROTO_FLAG_LED(i + 1);
+			spin_lock_irqsave(&wdata->state.lock, flags);
+			state = wdata->state.flags;
+			if (value == LED_OFF)
+				wiiproto_req_leds(wdata, state & ~flag);
+			else
+				wiiproto_req_leds(wdata, state | flag);
+			spin_unlock_irqrestore(&wdata->state.lock, flags);
+			break;
+		}
+	}
+}
 
 static int wiimote_input_event(struct input_dev *dev, unsigned int type,
 						unsigned int code, int value)
@@ -315,6 +322,58 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report,
 	return 0;
 }
 
+static void wiimote_leds_destroy(struct wiimote_data *wdata)
+{
+	int i;
+	struct led_classdev *led;
+
+	for (i = 0; i < 4; ++i) {
+		if (wdata->leds[i]) {
+			led = wdata->leds[i];
+			wdata->leds[i] = NULL;
+			led_classdev_unregister(led);
+			kfree(led);
+		}
+	}
+}
+
+static int wiimote_leds_create(struct wiimote_data *wdata)
+{
+	int i, ret;
+	struct device *dev = &wdata->hdev->dev;
+	size_t namesz = strlen(dev_name(dev)) + 9;
+	struct led_classdev *led;
+	char *name;
+
+	for (i = 0; i < 4; ++i) {
+		led = kzalloc(sizeof(struct led_classdev) + namesz, GFP_KERNEL);
+		if (!led) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		name = (void*)&led[1];
+		snprintf(name, namesz, "%s:blue:p%d", dev_name(dev), i);
+		led->name = name;
+		led->brightness = 0;
+		led->max_brightness = 1;
+		led->brightness_get = wiimote_leds_get;
+		led->brightness_set = wiimote_leds_set;
+
+		ret = led_classdev_register(dev, led);
+		if (ret) {
+			kfree(led);
+			goto err;
+		}
+		wdata->leds[i] = led;
+	}
+
+	return 0;
+
+err:
+	wiimote_leds_destroy(wdata);
+	return ret;
+}
+
 static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 {
 	struct wiimote_data *wdata;
@@ -358,10 +417,7 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
 
 static void wiimote_destroy(struct wiimote_data *wdata)
 {
-	device_remove_file(&wdata->hdev->dev, &dev_attr_led1);
-	device_remove_file(&wdata->hdev->dev, &dev_attr_led2);
-	device_remove_file(&wdata->hdev->dev, &dev_attr_led3);
-	device_remove_file(&wdata->hdev->dev, &dev_attr_led4);
+	wiimote_leds_destroy(wdata);
 
 	input_unregister_device(wdata->input);
 	cancel_work_sync(&wdata->worker);
@@ -400,16 +456,7 @@ static int wiimote_hid_probe(struct hid_device *hdev,
 		goto err_stop;
 	}
 
-	ret = device_create_file(&hdev->dev, &dev_attr_led1);
-	if (ret)
-		goto err_free;
-	ret = device_create_file(&hdev->dev, &dev_attr_led2);
-	if (ret)
-		goto err_free;
-	ret = device_create_file(&hdev->dev, &dev_attr_led3);
-	if (ret)
-		goto err_free;
-	ret = device_create_file(&hdev->dev, &dev_attr_led4);
+	ret = wiimote_leds_create(wdata);
 	if (ret)
 		goto err_free;
 
-- 
cgit v1.1


From 2cb5e4bc530471e9596cd32390bf70c8ada13d9a Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@googlemail.com>
Date: Wed, 17 Aug 2011 11:43:23 +0200
Subject: HID: wiimote: Add drm request

The wiimote reports data in several data reporting modes (DRM). The DRM
request makes the wiimote send data in the requested drm.

The DRM mode can be set explicitely or can be chosen by the driver. To let
the driver choose the DRM mode, pass WIIPROTO_REQ_NULL placeholder to it. This
is no valid request and is replaced with an appropriate DRM.

Currently, the driver always sets the basic DRM_K mode, but this will be
extended when further peripherals like accelerometer and IR are supported.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-wiimote.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c
index 29edd55..84c9eb9 100644
--- a/drivers/hid/hid-wiimote.c
+++ b/drivers/hid/hid-wiimote.c
@@ -57,7 +57,9 @@ struct wiimote_data {
 #define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1))
 
 enum wiiproto_reqs {
+	WIIPROTO_REQ_NULL = 0x0,
 	WIIPROTO_REQ_LED = 0x11,
+	WIIPROTO_REQ_DRM = 0x12,
 	WIIPROTO_REQ_DRM_K = 0x30,
 };
 
@@ -192,6 +194,30 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds)
 	wiimote_queue(wdata, cmd, sizeof(cmd));
 }
 
+/*
+ * Check what peripherals of the wiimote are currently
+ * active and select a proper DRM that supports all of
+ * the requested data inputs.
+ */
+static __u8 select_drm(struct wiimote_data *wdata)
+{
+	return WIIPROTO_REQ_DRM_K;
+}
+
+static void wiiproto_req_drm(struct wiimote_data *wdata, __u8 drm)
+{
+	__u8 cmd[3];
+
+	if (drm == WIIPROTO_REQ_NULL)
+		drm = select_drm(wdata);
+
+	cmd[0] = WIIPROTO_REQ_DRM;
+	cmd[1] = 0;
+	cmd[2] = drm;
+
+	wiimote_queue(wdata, cmd, sizeof(cmd));
+}
+
 static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev)
 {
 	struct wiimote_data *wdata;
-- 
cgit v1.1


From c87019e41d61f3f972bd2f6a2380fc9896e4ab74 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@googlemail.com>
Date: Wed, 17 Aug 2011 11:43:24 +0200
Subject: HID: wiimote: Add status and return request handlers

The wiimote resets the current drm when an extension is plugged in.
Fortunately, it also sends a status report in this situation so we just
reset the drm on every status report to keep the drm consistent.

Also handle return reports from the wiimote which indicate success and
failure of requests that we've sent.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-wiimote.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c
index 84c9eb9..85a02e5 100644
--- a/drivers/hid/hid-wiimote.c
+++ b/drivers/hid/hid-wiimote.c
@@ -60,6 +60,8 @@ enum wiiproto_reqs {
 	WIIPROTO_REQ_NULL = 0x0,
 	WIIPROTO_REQ_LED = 0x11,
 	WIIPROTO_REQ_DRM = 0x12,
+	WIIPROTO_REQ_STATUS = 0x20,
+	WIIPROTO_REQ_RETURN = 0x22,
 	WIIPROTO_REQ_DRM_K = 0x30,
 };
 
@@ -313,6 +315,26 @@ static void handler_keys(struct wiimote_data *wdata, const __u8 *payload)
 	input_sync(wdata->input);
 }
 
+static void handler_status(struct wiimote_data *wdata, const __u8 *payload)
+{
+	handler_keys(wdata, payload);
+
+	/* on status reports the drm is reset so we need to resend the drm */
+	wiiproto_req_drm(wdata, WIIPROTO_REQ_NULL);
+}
+
+static void handler_return(struct wiimote_data *wdata, const __u8 *payload)
+{
+	__u8 err = payload[3];
+	__u8 cmd = payload[2];
+
+	handler_keys(wdata, payload);
+
+	if (err)
+		hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err,
+									cmd);
+}
+
 struct wiiproto_handler {
 	__u8 id;
 	size_t size;
@@ -320,6 +342,8 @@ struct wiiproto_handler {
 };
 
 static struct wiiproto_handler handlers[] = {
+	{ .id = WIIPROTO_REQ_STATUS, .size = 6, .func = handler_status },
+	{ .id = WIIPROTO_REQ_RETURN, .size = 4, .func = handler_return },
 	{ .id = WIIPROTO_REQ_DRM_K, .size = 2, .func = handler_keys },
 	{ .id = 0 }
 };
-- 
cgit v1.1


From 675c1aa3c4a7290e537e854d0af7cdf9692bd396 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 23 Aug 2011 12:36:28 +0200
Subject: ALSA: hda - Fix output-path initialization for Realtek auto-parser

When the headphone or speaker output has no own DAC, initialize the path
using the primary DAC.  Otherwise the path won't be set properly and
can result in the silence.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index fcb11af..0fefc10 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -3083,16 +3083,22 @@ static void alc_auto_init_multi_out(struct hda_codec *codec)
 static void alc_auto_init_extra_out(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
-	hda_nid_t pin;
+	hda_nid_t pin, dac;
 
 	pin = spec->autocfg.hp_pins[0];
-	if (pin)
-		alc_auto_set_output_and_unmute(codec, pin, PIN_HP,
-						  spec->multiout.hp_nid);
+	if (pin) {
+		dac = spec->multiout.hp_nid;
+		if (!dac)
+			dac = spec->multiout.dac_nids[0];
+		alc_auto_set_output_and_unmute(codec, pin, PIN_HP, dac);
+	}
 	pin = spec->autocfg.speaker_pins[0];
-	if (pin)
-		alc_auto_set_output_and_unmute(codec, pin, PIN_OUT,
-					spec->multiout.extra_out_nid[0]);
+	if (pin) {
+		dac = spec->multiout.extra_out_nid[0];
+		if (!dac)
+			dac = spec->multiout.dac_nids[0];
+		alc_auto_set_output_and_unmute(codec, pin, PIN_OUT, dac);
+	}
 }
 
 /*
-- 
cgit v1.1


From 3c715a98844f72cec0fa3ef2b68232b8f751468b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 23 Aug 2011 12:41:09 +0200
Subject: ALSA: hda - Update jack-sense info even when no automute is set

The internal states, jack_present and line_jack_present should be
updated upon unsolicited events even if no automute is set.
Otherwise the wrong state is referred when the automute behavior is
changed by the mixer control.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0fefc10..7cabd73 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -565,11 +565,11 @@ static void alc_hp_automute(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
 
-	if (!spec->automute)
-		return;
 	spec->jack_present =
 		detect_jacks(codec, ARRAY_SIZE(spec->autocfg.hp_pins),
 			     spec->autocfg.hp_pins);
+	if (!spec->automute)
+		return;
 	update_speakers(codec);
 }
 
@@ -578,11 +578,11 @@ static void alc_line_automute(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
 
-	if (!spec->automute || !spec->detect_line)
-		return;
 	spec->line_jack_present =
 		detect_jacks(codec, ARRAY_SIZE(spec->autocfg.line_out_pins),
 			     spec->autocfg.line_out_pins);
+	if (!spec->automute || !spec->detect_line)
+		return;
 	update_speakers(codec);
 }
 
-- 
cgit v1.1


From f2b60717e692550bf753a5d64a5b69ea430fc832 Mon Sep 17 00:00:00 2001
From: Thomas Reim <reimth@googlemail.com>
Date: Wed, 17 Aug 2011 09:03:32 +0000
Subject: drm/radeon: Extended DDC Probing for Toshiba L300D Radeon Mobility
 X1100 HDMI-A Connector

Toshiba Satellite L300D with ATI Mobility Radeon X1100 sends data
   to i2c bus for a HDMI connector that is not implemented/existent
   on the notebook's board.

   Fix by applying extented DDC probing for this connector.

   Requires [PATCH] drm/radeon: Extended DDC Probing for Connectors
   with Improperly Wired DDC Lines

   Tested for kernel 2.6.38 on Toshiba Satellite L300D notebook

   BugLink: http://bugs.launchpad.net/bugs/826677

Signed-off-by: Thomas Reim <reimth@gmail.com>
Acked-by: Chris Routh <routhy@gmail.com>
Cc: <stable@kernel.org>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_connectors.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 7f65940..4f0c1ec 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -466,6 +466,16 @@ static bool radeon_connector_needs_extended_probe(struct radeon_device *dev,
 		    (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
 			return true;
 	}
+	/* TOSHIBA Satellite L300D with ATI Mobility Radeon x1100
+	 * (RS690M) sends data to i2c bus for a HDMI connector that
+	 * is not implemented */
+	if ((dev->pdev->device == 0x791f) &&
+	    (dev->pdev->subsystem_vendor == 0x1179) &&
+	    (dev->pdev->subsystem_device == 0xff68)) {
+		if ((connector_type == DRM_MODE_CONNECTOR_HDMIA) &&
+		    (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
+			return true;
+	}
 
 	/* Default: no EDID header probe required for DDC probing */
 	return false;
-- 
cgit v1.1


From 1f015f5fdc4003f3f2a7c66efdb1acf7a2d230bf Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 23 Aug 2011 14:57:08 +0200
Subject: ALSA: hda - Fix double-headphone/speaker paths for Cxt auto-parser

When multiple headphones or speakers are assigned but no individual
DACs are available, the driver should take the first HP/SPK DAC instead
of another primary output.  The patch adds a bit-flag to dac field of
struct pin_dac_pair indicating that it's a slave DAC.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 502fc94..4c462c3 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3348,6 +3348,8 @@ static hda_nid_t get_unassigned_dac(struct hda_codec *codec, hda_nid_t pin,
 
 #define MAX_AUTO_DACS	5
 
+#define DAC_SLAVE_FLAG	0x8000	/* filled dac is a slave */
+
 /* fill analog DAC list from the widget tree */
 static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs)
 {
@@ -3379,6 +3381,8 @@ static int fill_dacs_for_pins(struct hda_codec *codec, hda_nid_t *pins,
 		filled[nums].pin = pins[i];
 		filled[nums].type = type;
 		filled[nums].dac = get_unassigned_dac(codec, pins[i], dacs, rest);
+		if (!filled[nums].dac && i > 0 && filled[0].dac)
+			filled[nums].dac = filled[0].dac | DAC_SLAVE_FLAG;
 		nums++;
 	}
 	return nums;
@@ -3407,7 +3411,7 @@ static void cx_auto_parse_output(struct hda_codec *codec)
 	/* fill multiout struct */
 	for (i = 0; i < nums; i++) {
 		hda_nid_t dac = spec->dac_info[i].dac;
-		if (!dac)
+		if (!dac || (dac & DAC_SLAVE_FLAG))
 			continue;
 		switch (spec->dac_info[i].type) {
 		case AUTO_PIN_LINE_OUT:
@@ -4035,6 +4039,8 @@ static void cx_auto_init_output(struct hda_codec *codec)
 		nid = spec->dac_info[i].dac;
 		if (!nid)
 			nid = spec->multiout.dac_nids[0];
+		else if (nid & DAC_SLAVE_FLAG)
+			nid &= ~DAC_SLAVE_FLAG;
 		select_connection(codec, spec->dac_info[i].pin, nid);
 	}
 	if (spec->auto_mute) {
@@ -4191,7 +4197,8 @@ static int cx_auto_build_output_controls(struct hda_codec *codec)
 	for (i = 0; i < spec->dac_info_filled; i++) {
 		const char *label;
 		int idx, type;
-		if (!spec->dac_info[i].dac)
+		hda_nid_t dac = spec->dac_info[i].dac;
+		if (!dac || (dac & DAC_SLAVE_FLAG))
 			continue;
 		type = spec->dac_info[i].type;
 		if (type == AUTO_PIN_LINE_OUT)
@@ -4211,7 +4218,7 @@ static int cx_auto_build_output_controls(struct hda_codec *codec)
 			idx = num_spk++;
 			break;
 		}
-		err = try_add_pb_volume(codec, spec->dac_info[i].dac,
+		err = try_add_pb_volume(codec, dac,
 					spec->dac_info[i].pin,
 					label, idx);
 		if (err < 0)
-- 
cgit v1.1


From 8c4074cd2254606aeb788d518ccc27c9f97129e1 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Mon, 1 Aug 2011 21:20:10 +0800
Subject: tty: Add "spi:" prefix for spi modalias

Since commit e0626e38 (spi: prefix modalias with "spi:"),
the spi modalias is prefixed with "spi:".

This patch adds "spi:" prefix and removes "-spi" suffix in the modalias.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/max3107-aava.c | 2 +-
 drivers/tty/serial/max3107.c      | 2 +-
 drivers/tty/serial/mrst_max3110.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/max3107-aava.c b/drivers/tty/serial/max3107-aava.c
index a1fe304..d73aadd 100644
--- a/drivers/tty/serial/max3107-aava.c
+++ b/drivers/tty/serial/max3107-aava.c
@@ -340,5 +340,5 @@ module_exit(max3107_exit);
 
 MODULE_DESCRIPTION("MAX3107 driver");
 MODULE_AUTHOR("Aavamobile");
-MODULE_ALIAS("aava-max3107-spi");
+MODULE_ALIAS("spi:aava-max3107");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c
index 750b4f6..a816460 100644
--- a/drivers/tty/serial/max3107.c
+++ b/drivers/tty/serial/max3107.c
@@ -1209,5 +1209,5 @@ module_exit(max3107_exit);
 
 MODULE_DESCRIPTION("MAX3107 driver");
 MODULE_AUTHOR("Aavamobile");
-MODULE_ALIAS("max3107-spi");
+MODULE_ALIAS("spi:max3107");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/tty/serial/mrst_max3110.c b/drivers/tty/serial/mrst_max3110.c
index a764bf9..23bc743 100644
--- a/drivers/tty/serial/mrst_max3110.c
+++ b/drivers/tty/serial/mrst_max3110.c
@@ -917,4 +917,4 @@ module_init(serial_m3110_init);
 module_exit(serial_m3110_exit);
 
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("max3110-uart");
+MODULE_ALIAS("spi:max3110-uart");
-- 
cgit v1.1


From 44178176ecc55ad370b837dd2c4b4b8bed1e3823 Mon Sep 17 00:00:00 2001
From: Eric Smith <eric@brouhaha.com>
Date: Mon, 11 Jul 2011 22:53:13 -0600
Subject: 8250_pci: add support for Rosewill RC-305 4x serial port card

This patch adds support for the Rosewill RC-305 four-port PCI serial
card, and probably any other four-port serial cards based on the
Moschip MCS9865 chip, assuming that the EEPROM on the card was
programmed in accordance with Table 6 of the MCS9865 EEPROM
Application Note version 0.3 dated 16-May-2008, available from the
Moschip web site (registration required).

This patch is based on an earlier patch [1] for the SYBA 6x serial
port card by Ira W. Snyder.

[1]: http://www.gossamer-threads.com/lists/linux/kernel/1162435

Signed-off-by: Eric Smith <eric@brouhaha.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250_pci.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c
index 6b887d9..f652a7b 100644
--- a/drivers/tty/serial/8250_pci.c
+++ b/drivers/tty/serial/8250_pci.c
@@ -4021,7 +4021,7 @@ static struct pci_device_id serial_pci_tbl[] = {
 		0, 0, pbn_NETMOS9900_2s_115200 },
 
 	/*
-	 * Best Connectivity PCI Multi I/O cards
+	 * Best Connectivity and Rosewill PCI Multi I/O cards
 	 */
 
 	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
@@ -4029,6 +4029,10 @@ static struct pci_device_id serial_pci_tbl[] = {
 		0, 0, pbn_b0_1_115200 },
 
 	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+		0xA000, 0x3002,
+		0, 0, pbn_b0_bt_2_115200 },
+
+	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
 		0xA000, 0x3004,
 		0, 0, pbn_b0_bt_4_115200 },
 	/* Intel CE4100 */
-- 
cgit v1.1


From dacacc3e794c4c5bab05d97afc19e372e1877943 Mon Sep 17 00:00:00 2001
From: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Date: Tue, 12 Jul 2011 16:08:49 +0900
Subject: serial/8250_pci: delete duplicate data definition

Data definiton "VendorID=10DB, device_id=800D" is already defined.
This patch deletes the duplicate definition.

Signed-off-by: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250_pci.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c
index f652a7b..3abeca2 100644
--- a/drivers/tty/serial/8250_pci.c
+++ b/drivers/tty/serial/8250_pci.c
@@ -1599,11 +1599,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.device         = 0x800D,
 		.init		= pci_eg20t_init,
 	},
-	{
-		.vendor         = 0x10DB,
-		.device         = 0x800D,
-		.init		= pci_eg20t_init,
-	},
 	/*
 	 * Cronyx Omega PCI (PLX-chip based)
 	 */
-- 
cgit v1.1


From dbb3b1ca5609d1f3848cd387d06cc60aaacf7f98 Mon Sep 17 00:00:00 2001
From: Al Cooper <alcooperx@gmail.com>
Date: Mon, 25 Jul 2011 16:19:52 -0400
Subject: 8250: Fix race condition in serial8250_backup_timeout().

This is to fix an issue where output will suddenly become very slow.
The problem occurs on 8250 UARTS with the hardware bug UART_BUG_THRE.

BACKGROUND
For normal UARTs (without UART_BUG_THRE): When the serial core layer
gets new transmit data and the transmitter is idle, it buffers the
data and calls the 8250s' serial8250_start_tx() routine which will
simply enable the TX interrupt in the IER register and return. This
should immediately fire a THRE interrupt and begin transmitting the
data.
For buggy UARTs (with UART_BUG_THRE): merely enabling the TX interrupt
in IER does not necessarily generate a new THRE interrupt.
Therefore, a background timer periodically checks to see if there is
pending data, and starts transmission if that is the case.

The bug happens on SMP systems when the system has nothing to transmit,
the transmit interrupt is disabled and the following sequence occurs:
- CPU0: The background timer routine serial8250_backup_timeout()
  starts and saves the state of the interrupt enable register (IER)
  and then disables all interrupts in IER. NOTE: The transmit interrupt
  (TI) bit is saved as disabled.
- CPU1: The serial core gets data to transmit, grabs the port lock and
  calls serial8250_start_tx() which enables the TI in IER.
- CPU0: serial8250_backup_timeout() waits for the port lock.
- CPU1: finishes (with TI enabled) and releases the port lock.
- CPU0: serial8250_backup_timeout() calls the interrupt routine which
  will transmit the next fifo's worth of data and then restores the
  IER from the previously saved value (TI disabled).
At this point, as long as the serial core has more transmit data
buffered, it will not call serial8250_start_tx() again and the
background timer routine will slowly transmit the data.

The fix is to have serial8250_start_tx() get the port lock before
it saves the IER state and release it after restoring IER. This will
prevent serial8250_start_tx() from running in parallel.

Signed-off-by: Al Cooper <alcooperx@gmail.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/8250.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c
index f2dfec8..7f50999 100644
--- a/drivers/tty/serial/8250.c
+++ b/drivers/tty/serial/8250.c
@@ -1819,6 +1819,8 @@ static void serial8250_backup_timeout(unsigned long data)
 	unsigned int iir, ier = 0, lsr;
 	unsigned long flags;
 
+	spin_lock_irqsave(&up->port.lock, flags);
+
 	/*
 	 * Must disable interrupts or else we risk racing with the interrupt
 	 * based handler.
@@ -1836,10 +1838,8 @@ static void serial8250_backup_timeout(unsigned long data)
 	 * the "Diva" UART used on the management processor on many HP
 	 * ia64 and parisc boxes.
 	 */
-	spin_lock_irqsave(&up->port.lock, flags);
 	lsr = serial_in(up, UART_LSR);
 	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
-	spin_unlock_irqrestore(&up->port.lock, flags);
 	if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) &&
 	    (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) &&
 	    (lsr & UART_LSR_THRE)) {
@@ -1848,11 +1848,13 @@ static void serial8250_backup_timeout(unsigned long data)
 	}
 
 	if (!(iir & UART_IIR_NO_INT))
-		serial8250_handle_port(up);
+		transmit_chars(up);
 
 	if (is_real_interrupt(up->port.irq))
 		serial_out(up, UART_IER, ier);
 
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
 	/* Standard timer interval plus 0.2s to keep the port running */
 	mod_timer(&up->timer,
 		jiffies + uart_poll_timeout(&up->port) + HZ / 5);
-- 
cgit v1.1


From 24d406a6bf736f7aebdc8fa0f0ec86e0890c6d24 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 10 Aug 2011 14:59:28 +0200
Subject: TTY: pty, fix pty counting

tty_operations->remove is normally called like:
queue_release_one_tty
 ->tty_shutdown
   ->tty_driver_remove_tty
     ->tty_operations->remove

However tty_shutdown() is called from queue_release_one_tty() only if
tty_operations->shutdown is NULL. But for pty, it is not.
pty_unix98_shutdown() is used there as ->shutdown.

So tty_operations->remove of pty (i.e. pty_unix98_remove()) is never
called. This results in invalid pty_count. I.e. what can be seen in
/proc/sys/kernel/pty/nr.

I see this was already reported at:
  https://lkml.org/lkml/2009/11/5/370
But it was not fixed since then.

This patch is kind of a hackish way. The problem lies in ->install. We
allocate there another tty (so-called tty->link). So ->install is
called once, but ->remove twice, for both tty and tty->link. The fix
here is to count both tty and tty->link and divide the count by 2 for
user.

And to have ->remove called, let's make tty_driver_remove_tty() global
and call that from pty_unix98_shutdown() (tty_operations->shutdown).

While at it, let's document that when ->shutdown is defined,
tty_shutdown() is not called.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/pty.c          | 17 +++++++++++++++--
 drivers/tty/tty_io.c       |  3 +--
 include/linux/tty.h        |  2 ++
 include/linux/tty_driver.h |  3 +++
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 98b6e3b..e809e9d 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -446,8 +446,19 @@ static inline void legacy_pty_init(void) { }
 int pty_limit = NR_UNIX98_PTY_DEFAULT;
 static int pty_limit_min;
 static int pty_limit_max = NR_UNIX98_PTY_MAX;
+static int tty_count;
 static int pty_count;
 
+static inline void pty_inc_count(void)
+{
+	pty_count = (++tty_count) / 2;
+}
+
+static inline void pty_dec_count(void)
+{
+	pty_count = (--tty_count) / 2;
+}
+
 static struct cdev ptmx_cdev;
 
 static struct ctl_table pty_table[] = {
@@ -542,6 +553,7 @@ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver,
 
 static void pty_unix98_shutdown(struct tty_struct *tty)
 {
+	tty_driver_remove_tty(tty->driver, tty);
 	/* We have our own method as we don't use the tty index */
 	kfree(tty->termios);
 }
@@ -588,7 +600,8 @@ static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty)
 	 */
 	tty_driver_kref_get(driver);
 	tty->count++;
-	pty_count++;
+	pty_inc_count(); /* tty */
+	pty_inc_count(); /* tty->link */
 	return 0;
 err_free_mem:
 	deinitialize_tty_struct(o_tty);
@@ -602,7 +615,7 @@ err_free_tty:
 
 static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 {
-	pty_count--;
+	pty_dec_count();
 }
 
 static const struct tty_operations ptm_unix98_ops = {
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 150e4f7..4f1fc81 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1295,8 +1295,7 @@ static int tty_driver_install_tty(struct tty_driver *driver,
  *
  *	Locking: tty_mutex for now
  */
-static void tty_driver_remove_tty(struct tty_driver *driver,
-						struct tty_struct *tty)
+void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty)
 {
 	if (driver->ops->remove)
 		driver->ops->remove(driver, tty);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 44bc0c5..5f2ede8 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -421,6 +421,8 @@ extern void tty_driver_flush_buffer(struct tty_struct *tty);
 extern void tty_throttle(struct tty_struct *tty);
 extern void tty_unthrottle(struct tty_struct *tty);
 extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
+extern void tty_driver_remove_tty(struct tty_driver *driver,
+				  struct tty_struct *tty);
 extern void tty_shutdown(struct tty_struct *tty);
 extern void tty_free_termios(struct tty_struct *tty);
 extern int is_current_pgrp_orphaned(void);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 9deeac8..ecdaeb9 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -47,6 +47,9 @@
  *
  * 	This routine is called synchronously when a particular tty device
  *	is closed for the last time freeing up the resources.
+ *	Note that tty_shutdown() is not called if ops->shutdown is defined.
+ *	This means one is responsible to take care of calling ops->remove (e.g.
+ *	via tty_driver_remove_tty) and releasing tty->termios.
  *
  *
  * void (*cleanup)(struct tty_struct * tty);
-- 
cgit v1.1


From 0055197e984e5fbe6f48f37fc50dd30254915493 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 17 Aug 2011 13:48:15 +0200
Subject: TTY: serial, document ignoring of uart->ops->startup error

When a user has SYS_ADMIN capabilities and uart->ops->startup returns
an error in uart_startup, we silently drop the error. We then return 0
and behave as if it didn't fail. (Not quite, since we set TTY_IO_ERROR
bit and leave ASYNC_INITIALIZED bit cleared.)

This all is to allow setserial to work with improperly configured or
unconfigured ports. User can thus set port properties and reconfigure
properly.

This patch only documents this behavior.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Russel King <linux@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/serial_core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index db7912c..a3efbea 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -200,6 +200,11 @@ static int uart_startup(struct tty_struct *tty, struct uart_state *state, int in
 		clear_bit(TTY_IO_ERROR, &tty->flags);
 	}
 
+	/*
+	 * This is to allow setserial on this port. People may want to set
+	 * port/irq/type and then reconfigure the port properly if it failed
+	 * now.
+	 */
 	if (retval && capable(CAP_SYS_ADMIN))
 		retval = 0;
 
-- 
cgit v1.1


From 69dd3d8e29e294caaf63eb5e8a72d250279f9e5f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 23 Aug 2011 10:36:51 -0700
Subject: Revert "irq: Always set IRQF_ONESHOT if no primary handler is
 specified"

This reverts commit f3637a5f2e2eb391ff5757bc83fb5de8f9726464.

It turns out that this breaks several drivers, one example being OMAP
boards which use the on-board OMAP UARTs and the omap-serial driver that
will not boot to userspace after the commit.

Paul Walmsley reports that enabling CONFIG_DEBUG_SHIRQ reveals 'IRQ
handler type mismatch' errors:

  IRQ handler type mismatch for IRQ 74
  current handler: serial idle
  ...

and the reason is that setting IRQF_ONESHOT will now result in those
interrupt handlers having different IRQF flags, and thus being
unsharable.  So the commit log in the reverted commit:

                            "Since it is required for those users and
    there is no difference for others it makes sense to add this flag
    unconditionally."

is simply not true: there may not be any difference from a "actions at
irq time", but there is a *big* difference wrt this flag testing irq
management (see __setup_irq() in kernel/irq/manage.c).

One solution may be to stop verifying IRQF_ONESHOT in __setup_irq(), but
right now the safe course of action is to revert the change.  Let's
revisit this in a later merge window.

Reported-by: Paul Walmsley <paul@pwsan.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Requested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/irq/manage.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 2e94258..9b956fa 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1331,7 +1331,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 		if (!thread_fn)
 			return -EINVAL;
 		handler = irq_default_primary_handler;
-		irqflags |= IRQF_ONESHOT;
 	}
 
 	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
-- 
cgit v1.1


From b280a97d1caf6fe1d38b51ebb31219391f5ad1a0 Mon Sep 17 00:00:00 2001
From: Nick Pelly <npelly@google.com>
Date: Fri, 15 Jul 2011 13:53:08 -0700
Subject: omap-serial: Allow IXON and IXOFF to be disabled.

Fixes logic bug that software flow control cannot be disabled, because
serial_omap_configure_xonxoff() is not called if both IXON and IXOFF bits
are cleared.

Signed-off-by: Nick Pelly <npelly@google.com>
Acked-by: Govindraj.R <govindraj.raja@ti.com>
Tested-by: Govindraj.R <govindraj.raja@ti.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/omap-serial.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index c37df8d..5e713d3 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -806,8 +806,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	serial_omap_set_mctrl(&up->port, up->port.mctrl);
 	/* Software Flow Control Configuration */
-	if (termios->c_iflag & (IXON | IXOFF))
-		serial_omap_configure_xonxoff(up, termios);
+	serial_omap_configure_xonxoff(up, termios);
 
 	spin_unlock_irqrestore(&up->port.lock, flags);
 	dev_dbg(up->port.dev, "serial_omap_set_termios+%d\n", up->pdev->id);
-- 
cgit v1.1


From 4b723a471050a8b80f7fa86e76f01f4c711b3443 Mon Sep 17 00:00:00 2001
From: srinidhi kasagar <srinidhi.kasagar@stericsson.com>
Date: Tue, 9 Aug 2011 20:17:22 +0200
Subject: i2c-nomadik: Do not use _interruptible_ variant call

If there is a signal pending and wait_for_completion_interruptible_timeout
exited because of the -ERESTARTSYS error we are unable to send any more
i2c messages.

So, deprecate this _interruptible_ variant call.

Signed-off-by: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-nomadik.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c
index 0c731ca..f9b8854 100644
--- a/drivers/i2c/busses/i2c-nomadik.c
+++ b/drivers/i2c/busses/i2c-nomadik.c
@@ -417,12 +417,12 @@ static int read_i2c(struct nmk_i2c_dev *dev)
 	writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask,
 			dev->virtbase + I2C_IMSCR);
 
-	timeout = wait_for_completion_interruptible_timeout(
+	timeout = wait_for_completion_timeout(
 		&dev->xfer_complete, dev->adap.timeout);
 
 	if (timeout < 0) {
 		dev_err(&dev->pdev->dev,
-			"wait_for_completion_interruptible_timeout"
+			"wait_for_completion_timeout"
 			"returned %d waiting for event\n", timeout);
 		status = timeout;
 	}
@@ -504,12 +504,12 @@ static int write_i2c(struct nmk_i2c_dev *dev)
 	writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask,
 			dev->virtbase + I2C_IMSCR);
 
-	timeout = wait_for_completion_interruptible_timeout(
+	timeout = wait_for_completion_timeout(
 		&dev->xfer_complete, dev->adap.timeout);
 
 	if (timeout < 0) {
 		dev_err(&dev->pdev->dev,
-			"wait_for_completion_interruptible_timeout"
+			"wait_for_completion_timeout"
 			"returned %d waiting for event\n", timeout);
 		status = timeout;
 	}
-- 
cgit v1.1


From 584b408d37af4e0b38ad5b60f236381bcdf396bc Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@ti.com>
Date: Thu, 4 Aug 2011 07:53:02 -0700
Subject: Revert "i2c-omap: fix static suspend vs. runtime suspend"

This reverts commit adf6e07922255937c8bfeea777d19502b4c9a2be.

Remove system PM methods which can race with runtime PM methods.

Also, as of v3.1, the PM domain level code for OMAP handles device
power state transistions automatically for devices, so drivers no
longer need to specifically call the bus/pm_domain methods themselves.

Signed-off-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-omap.c | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 1a766cf..2dfb631 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1139,41 +1139,12 @@ omap_i2c_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_SUSPEND
-static int omap_i2c_suspend(struct device *dev)
-{
-	if (!pm_runtime_suspended(dev))
-		if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
-			dev->bus->pm->runtime_suspend(dev);
-
-	return 0;
-}
-
-static int omap_i2c_resume(struct device *dev)
-{
-	if (!pm_runtime_suspended(dev))
-		if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
-			dev->bus->pm->runtime_resume(dev);
-
-	return 0;
-}
-
-static struct dev_pm_ops omap_i2c_pm_ops = {
-	.suspend = omap_i2c_suspend,
-	.resume = omap_i2c_resume,
-};
-#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops)
-#else
-#define OMAP_I2C_PM_OPS NULL
-#endif
-
 static struct platform_driver omap_i2c_driver = {
 	.probe		= omap_i2c_probe,
 	.remove		= omap_i2c_remove,
 	.driver		= {
 		.name	= "omap_i2c",
 		.owner	= THIS_MODULE,
-		.pm	= OMAP_I2C_PM_OPS,
 	},
 };
 
-- 
cgit v1.1


From 9818a4775a3ab18b84a689537088b3d72a742130 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend@LB-BUN-53.bun.broadcom.com>
Date: Mon, 8 Aug 2011 15:57:45 +0200
Subject: staging: brcm80211: fix compile error on non-x86 archs since 3.0
 kernel

Since the arrival of kernel version 3.0 in the staging tree it
turns out compile error occurs for sparc64, powerpc, and arm
platforms. This patch fixes that issue.

Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Henry Ptasinski <henryp@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/brcm80211/brcmsmac/types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/brcm80211/brcmsmac/types.h b/drivers/staging/brcm80211/brcmsmac/types.h
index bbf2189..823b5e4 100644
--- a/drivers/staging/brcm80211/brcmsmac/types.h
+++ b/drivers/staging/brcm80211/brcmsmac/types.h
@@ -18,6 +18,7 @@
 #define _BRCM_TYPES_H_
 
 #include <linux/types.h>
+#include <linux/io.h>
 
 /* Bus types */
 #define	SI_BUS			0	/* SOC Interconnect */
-- 
cgit v1.1


From 20cc7995fe66ce6417678bb0db6b3d4955fb1ff6 Mon Sep 17 00:00:00 2001
From: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Date: Mon, 8 Aug 2011 15:59:03 +0200
Subject: staging: brcm80211: SPARC build error fix

Due to missing memset function declaration.

Reviewed-by: Roland Vossen <rvossen@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/brcm80211/brcmsmac/otp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/brcm80211/brcmsmac/otp.c b/drivers/staging/brcm80211/brcmsmac/otp.c
index 34253cf..4a70180 100644
--- a/drivers/staging/brcm80211/brcmsmac/otp.c
+++ b/drivers/staging/brcm80211/brcmsmac/otp.c
@@ -16,6 +16,7 @@
 
 #include <linux/io.h>
 #include <linux/errno.h>
+#include <linux/string.h>
 
 #include <brcm_hw_ids.h>
 #include <chipcommon.h>
-- 
cgit v1.1


From ba8f318471f66d5d5b79da68112525cf432b2b18 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Thu, 18 Aug 2011 09:37:02 +0100
Subject: m68k: fix __page_to_pfn for a const struct page argument

Fixes fallout due to the removal of the cast in commit aa462abe8aaf
("mm: fix __page_to_pfn for a const struct page argument")

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: linux-m68k@lists.linux-m68k.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/include/asm/page_mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 31d5570..89f2014 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -162,7 +162,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
 	pgdat->node_mem_map + (__pfn - pgdat->node_start_pfn);		\
 })
 #define page_to_pfn(_page) ({						\
-	struct page *__p = (_page);					\
+	const struct page *__p = (_page);				\
 	struct pglist_data *pgdat;					\
 	pgdat = &pg_data_map[page_to_nid(__p)];				\
 	((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn;		\
-- 
cgit v1.1


From c5f5c4db393837ebb2ae47bf061d70e498f48f8c Mon Sep 17 00:00:00 2001
From: Seth Jennings <sjenning@linux.vnet.ibm.com>
Date: Wed, 10 Aug 2011 12:56:49 -0500
Subject: staging: zcache: fix crash on high memory swap

zcache_put_page() was modified to pass page_address(page) instead of the
actual page structure. In combination with the function signature changes
to tmem_put() and zcache_pampd_create(), zcache_pampd_create() tries to
(re)derive the page structure from the virtual address.  However, if the
original page is a high memory page (or any unmapped page), this
virt_to_page() fails because the page_address() in zcache_put_page()
returned NULL.

This patch changes zcache_put_page() and zcache_get_page() to pass
the page structure instead of the page's virtual address, which
may or may not exist.

Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/zcache/zcache-main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
index 855a5bb..a3f5162 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -1158,7 +1158,7 @@ static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
 	size_t clen;
 	int ret;
 	unsigned long count;
-	struct page *page = virt_to_page(data);
+	struct page *page = (struct page *)(data);
 	struct zcache_client *cli = pool->client;
 	uint16_t client_id = get_client_id_from_client(cli);
 	unsigned long zv_mean_zsize;
@@ -1227,7 +1227,7 @@ static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,
 	int ret = 0;
 
 	BUG_ON(is_ephemeral(pool));
-	zv_decompress(virt_to_page(data), pampd);
+	zv_decompress((struct page *)(data), pampd);
 	return ret;
 }
 
@@ -1539,7 +1539,7 @@ static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
 		goto out;
 	if (!zcache_freeze && zcache_do_preload(pool) == 0) {
 		/* preload does preempt_disable on success */
-		ret = tmem_put(pool, oidp, index, page_address(page),
+		ret = tmem_put(pool, oidp, index, (char *)(page),
 				PAGE_SIZE, 0, is_ephemeral(pool));
 		if (ret < 0) {
 			if (is_ephemeral(pool))
@@ -1572,7 +1572,7 @@ static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
 	pool = zcache_get_pool_by_id(cli_id, pool_id);
 	if (likely(pool != NULL)) {
 		if (atomic_read(&pool->obj_count) > 0)
-			ret = tmem_get(pool, oidp, index, page_address(page),
+			ret = tmem_get(pool, oidp, index, (char *)(page),
 					&size, 0, is_ephemeral(pool));
 		zcache_put_pool(pool);
 	}
-- 
cgit v1.1


From 1dcab0875b113a148b6601d87b4e0e3444440339 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 9 Aug 2011 21:01:33 +0300
Subject: Staging: zcache: signedness bug in tmem_get()

"ret" needs to be signed for the error handling to work properly.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/zcache/tmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c
index 975e34b..1ca66ea 100644
--- a/drivers/staging/zcache/tmem.c
+++ b/drivers/staging/zcache/tmem.c
@@ -604,7 +604,7 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
 	struct tmem_obj *obj;
 	void *pampd;
 	bool ephemeral = is_ephemeral(pool);
-	uint32_t ret = -1;
+	int ret = -1;
 	struct tmem_hashbucket *hb;
 	bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
 	bool lock_held = false;
-- 
cgit v1.1


From 048316be72893455f69ad728fa94c26e2e582ba2 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 16 Aug 2011 10:10:56 -0700
Subject: staging: octeon-ethernet: Add missing #includes.

I looks like something used to implicitly include linux/interrupt.h,
and no longer does.  Fix the resulting build error by explicitly
including it.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/octeon/ethernet-rgmii.c | 1 +
 drivers/staging/octeon/ethernet-spi.c   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c
index 9c0d293..c3d73f8 100644
--- a/drivers/staging/octeon/ethernet-rgmii.c
+++ b/drivers/staging/octeon/ethernet-rgmii.c
@@ -26,6 +26,7 @@
 **********************************************************************/
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/interrupt.h>
 #include <linux/phy.h>
 #include <linux/ratelimit.h>
 #include <net/dst.h>
diff --git a/drivers/staging/octeon/ethernet-spi.c b/drivers/staging/octeon/ethernet-spi.c
index 9708254..d0e2d51 100644
--- a/drivers/staging/octeon/ethernet-spi.c
+++ b/drivers/staging/octeon/ethernet-spi.c
@@ -26,6 +26,7 @@
 **********************************************************************/
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/interrupt.h>
 #include <net/dst.h>
 
 #include <asm/octeon/octeon.h>
-- 
cgit v1.1


From 7ca0758cdb7c241cb4e0490a8d95f0eb5b861daf Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Mon, 22 Aug 2011 13:27:06 -0700
Subject: x86-32, vdso: On system call restart after SYSENTER, use int $0x80

When we enter a 32-bit system call via SYSENTER or SYSCALL, we shuffle
the arguments to match the int $0x80 calling convention.  This was
probably a design mistake, but it's what it is now.  This causes
errors if the system call as to be restarted.

For SYSENTER, we have to invoke the instruction from the vdso as the
return address is hardcoded.  Accordingly, we can simply replace the
jump in the vdso with an int $0x80 instruction and use the slower
entry point for a post-restart.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/CA%2B55aFztZ=r5wa0x26KJQxvZOaQq8s2v3u50wCyJcA-Sc4g8gQ@mail.gmail.com
Cc: <stable@kernel.org>
---
 arch/x86/vdso/vdso32/sysenter.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
index e2800af..e354bce 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -43,7 +43,7 @@ __kernel_vsyscall:
 	.space 7,0x90
 
 	/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
-	jmp .Lenter_kernel
+	int $0x80
 	/* 16: System call normal return point is here! */
 VDSO32_SYSENTER_RETURN:	/* Symbol used by sysenter.c via vdso32-syms.h */
 	pop %ebp
-- 
cgit v1.1


From b4cb0d4da745bc1d806b9b4a27cc4ce1f7adbf99 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 23 Aug 2011 21:04:28 -0700
Subject: hwmon: (i5k_amb) Drop i5k_channel_pci_id

Function i5k_channel_pci_id looks like it can fail, while a better
code design would make it more obvious that it can't. We can even get
rid of the function.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
---
 drivers/hwmon/i5k_amb.c | 42 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/drivers/hwmon/i5k_amb.c b/drivers/hwmon/i5k_amb.c
index c4c40be..d22f241 100644
--- a/drivers/hwmon/i5k_amb.c
+++ b/drivers/hwmon/i5k_amb.c
@@ -114,7 +114,6 @@ struct i5k_amb_data {
 	void __iomem *amb_mmio;
 	struct i5k_device_attribute *attrs;
 	unsigned int num_attrs;
-	unsigned long chipset_id;
 };
 
 static ssize_t show_name(struct device *dev, struct device_attribute *devattr,
@@ -444,8 +443,6 @@ static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data,
 		goto out;
 	}
 
-	data->chipset_id = devid;
-
 	res = 0;
 out:
 	pci_dev_put(pcidev);
@@ -478,23 +475,13 @@ out:
 	return res;
 }
 
-static unsigned long i5k_channel_pci_id(struct i5k_amb_data *data,
-					unsigned long channel)
-{
-	switch (data->chipset_id) {
-	case PCI_DEVICE_ID_INTEL_5000_ERR:
-		return PCI_DEVICE_ID_INTEL_5000_FBD0 + channel;
-	case PCI_DEVICE_ID_INTEL_5400_ERR:
-		return PCI_DEVICE_ID_INTEL_5400_FBD0 + channel;
-	default:
-		BUG();
-	}
-}
-
-static unsigned long chipset_ids[] = {
-	PCI_DEVICE_ID_INTEL_5000_ERR,
-	PCI_DEVICE_ID_INTEL_5400_ERR,
-	0
+static struct {
+	unsigned long err;
+	unsigned long fbd0;
+} chipset_ids[] __devinitdata  = {
+	{ PCI_DEVICE_ID_INTEL_5000_ERR, PCI_DEVICE_ID_INTEL_5000_FBD0 },
+	{ PCI_DEVICE_ID_INTEL_5400_ERR, PCI_DEVICE_ID_INTEL_5400_FBD0 },
+	{ 0, 0 }
 };
 
 #ifdef MODULE
@@ -510,8 +497,7 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
 {
 	struct i5k_amb_data *data;
 	struct resource *reso;
-	int i;
-	int res = -ENODEV;
+	int i, res;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -520,22 +506,22 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
 	/* Figure out where the AMB registers live */
 	i = 0;
 	do {
-		res = i5k_find_amb_registers(data, chipset_ids[i]);
+		res = i5k_find_amb_registers(data, chipset_ids[i].err);
+		if (res == 0)
+			break;
 		i++;
-	} while (res && chipset_ids[i]);
+	} while (chipset_ids[i].err);
 
 	if (res)
 		goto err;
 
 	/* Copy the DIMM presence map for the first two channels */
-	res = i5k_channel_probe(&data->amb_present[0],
-				i5k_channel_pci_id(data, 0));
+	res = i5k_channel_probe(&data->amb_present[0], chipset_ids[i].fbd0);
 	if (res)
 		goto err;
 
 	/* Copy the DIMM presence map for the optional second two channels */
-	i5k_channel_probe(&data->amb_present[2],
-			  i5k_channel_pci_id(data, 1));
+	i5k_channel_probe(&data->amb_present[2], chipset_ids[i].fbd0 + 1);
 
 	/* Set up resource regions */
 	reso = request_mem_region(data->amb_base, data->amb_len, DRVNAME);
-- 
cgit v1.1


From c2183d1e9b3f313dd8ba2b1b0197c8d9fb86a7ae Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 24 Aug 2011 10:20:17 +0200
Subject: fuse: check size of FUSE_NOTIFY_INVAL_ENTRY message

FUSE_NOTIFY_INVAL_ENTRY didn't check the length of the write so the
message processing could overrun and result in a "kernel BUG at
fs/fuse/dev.c:629!"

Reported-by: Han-Wen Nienhuys <hanwenn@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
---
 fs/fuse/dev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 640fc22..168a80f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1358,6 +1358,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
 	if (outarg.namelen > FUSE_NAME_MAX)
 		goto err;
 
+	err = -EINVAL;
+	if (size != sizeof(outarg) + outarg.namelen + 1)
+		goto err;
+
 	name.name = buf;
 	name.len = outarg.namelen;
 	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
-- 
cgit v1.1


From 7675535958175b85b8117bcee245d9ecbc4d3d74 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 24 Aug 2011 10:53:10 +0200
Subject: ALSA: hda/conexant - Enable ADC-switching for auto-mic mode, too

The ADC-switching can work also in the auto-mic mode, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 4c462c3..5616444 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3866,7 +3866,7 @@ static void cx_auto_parse_input(struct hda_codec *codec)
 	}
 	if (imux->num_items >= 2 && cfg->num_inputs == imux->num_items)
 		cx_auto_check_auto_mic(codec);
-	if (imux->num_items > 1 && !spec->auto_mic) {
+	if (imux->num_items > 1) {
 		for (i = 1; i < imux->num_items; i++) {
 			if (spec->imux_info[i].adc != spec->imux_info[0].adc) {
 				spec->adc_switching = 1;
-- 
cgit v1.1


From 52c49e0156e167fa65bbc3dd87a3a2f651af03fb Mon Sep 17 00:00:00 2001
From: Joseph Pentland <jp@opensource.wolfsonmicro.com>
Date: Tue, 23 Aug 2011 10:41:50 +0100
Subject: ASoC: Add Springbank I/O card to Speyside Kconfig

Signed-off-by: Joseph Pentland <jp@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/samsung/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig
index b99091f..65f980e 100644
--- a/sound/soc/samsung/Kconfig
+++ b/sound/soc/samsung/Kconfig
@@ -185,6 +185,7 @@ config SND_SOC_SPEYSIDE
 	select SND_SAMSUNG_I2S
 	select SND_SOC_WM8996
 	select SND_SOC_WM9081
+	select SND_SOC_WM1250_EV1
 
 config SND_SOC_SPEYSIDE_WM8962
 	tristate "Audio support for Wolfson Speyside with WM8962"
-- 
cgit v1.1


From 250b68512dd7e7d31a8c85a740a4b085bade4ba0 Mon Sep 17 00:00:00 2001
From: Sangbeom Kim <sbkim73@samsung.com>
Date: Tue, 23 Aug 2011 19:36:59 +0900
Subject: ASoC: Add samsung maintainer

Signed-off-by: Sangbeom Kim <sbkim73@samsung.com>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 46e3e6b..4f555d8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5532,6 +5532,7 @@ F:	include/media/*7146*
 
 SAMSUNG AUDIO (ASoC) DRIVERS
 M:	Jassi Brar <jassisinghbrar@gmail.com>
+M:	Sangbeom Kim <sbkim73@samsung.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Supported
 F:	sound/soc/samsung
-- 
cgit v1.1


From ee1a4d4b7fcfce31dade9f2ad333b34159cee799 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Tue, 23 Aug 2011 11:16:28 -0600
Subject: ASoC: Tegra: wm8903 machine driver: Drop Ventana support

Board file support for Ventana is not yet mainlined, and probably won't
ever be given the move to Device-Tree. Consequently, the Ventana entry
is being removed from arch/arm/tools/mach-types in the next merge window,
since it was registered over a year ago.

This will also remove function machine_is_ventana(), which is used by
the ASoC Tegra WM8903 machine driver. This will cause compilation
failures. Drop Ventana support to resolve this.

Hopefully, in the not-too-distant future, tegra_wm8903.c will be able to
configure itself from Device-Tree, and hence we'll be able to re-instate
Ventana support just by creating a .dts file for the board.

Also note that Aebl support is in a similar boat. However, that board
isn't scheduled for deprecation for at least another 5 months, and
perhaps we will have completely removed non-Device-Tree support from
tegra_wm8903.c by then and/or adjusted mach-types policy.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 sound/soc/tegra/tegra_wm8903.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/tegra/tegra_wm8903.c b/sound/soc/tegra/tegra_wm8903.c
index 661373c..be27f1d 100644
--- a/sound/soc/tegra/tegra_wm8903.c
+++ b/sound/soc/tegra/tegra_wm8903.c
@@ -319,7 +319,7 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd)
 	snd_soc_dapm_force_enable_pin(dapm, "Mic Bias");
 
 	/* FIXME: Calculate automatically based on DAPM routes? */
-	if (!machine_is_harmony() && !machine_is_ventana())
+	if (!machine_is_harmony())
 		snd_soc_dapm_nc_pin(dapm, "IN1L");
 	if (!machine_is_seaboard() && !machine_is_aebl())
 		snd_soc_dapm_nc_pin(dapm, "IN1R");
@@ -395,7 +395,7 @@ static __devinit int tegra_wm8903_driver_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, card);
 	snd_soc_card_set_drvdata(card, machine);
 
-	if (machine_is_harmony() || machine_is_ventana()) {
+	if (machine_is_harmony()) {
 		card->dapm_routes = harmony_audio_map;
 		card->num_dapm_routes = ARRAY_SIZE(harmony_audio_map);
 	} else if (machine_is_seaboard()) {
-- 
cgit v1.1


From 27e7318c3e47e4fac71fcb472623434063ccc7a5 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 24 Aug 2011 17:15:09 +0200
Subject: [S390] nss,initrd: kernel image and initrd must be in different
 segments

When IPL'ing from a block device and an NSS should be created we must
make sure that the kernel image and the initrd are in different 1MB
segments. Otherwise creating the NSS will fail.
So we make sure the initrd is 4MB behind the end of the kernel image
like we do already when IPL via the VM reader is performed.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/early.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 068f846..f297456 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -396,17 +396,19 @@ static __init void detect_machine_facilities(void)
 static __init void rescue_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
+	unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
 	/*
-	 * Move the initrd right behind the bss section in case it starts
-	 * within the bss section. So we don't overwrite it when the bss
-	 * section gets cleared.
+	 * Just like in case of IPL from VM reader we make sure there is a
+	 * gap of 4MB between end of kernel and start of initrd.
+	 * That way we can also be sure that saving an NSS will succeed,
+	 * which however only requires different segments.
 	 */
 	if (!INITRD_START || !INITRD_SIZE)
 		return;
-	if (INITRD_START >= (unsigned long) __bss_stop)
+	if (INITRD_START >= min_initrd_addr)
 		return;
-	memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE);
-	INITRD_START = (unsigned long) __bss_stop;
+	memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+	INITRD_START = min_initrd_addr;
 #endif
 }
 
-- 
cgit v1.1


From ba465d830ed1703713251917f154688ec537580f Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 24 Aug 2011 17:15:10 +0200
Subject: [S390] drivers/s390/block/dasd_ioctl.c: add missing kfree

Data is only used to temporarily hold information to be copied to the user
level, so it should be freed before leaving the function.

A simplified version of the semantic match that finds this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@exists@
local idexpression x;
statement S,S1;
expression E;
identifier fl;
expression *ptr != NULL;
@@

x = \(kmalloc\|kzalloc\|kcalloc\)(...);
...
if (x == NULL) S
<... when != x
     when != if (...) { <+...kfree(x)...+> }
     when any
     when != true x == NULL
x->fl
...>
(
if (x == NULL) S1
|
if (...) { ... when != x
               when forall
(
 return \(0\|<+...x...+>\|ptr\);
|
* return ...;
)
}
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd_ioctl.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index eb4e034..f1a2016 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -249,6 +249,7 @@ static int dasd_ioctl_reset_profile(struct dasd_block *block)
 static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
 {
 	struct dasd_profile_info_t *data;
+	int rc = 0;
 
 	data = kmalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -279,11 +280,14 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
 		spin_unlock_bh(&block->profile.lock);
 	} else {
 		spin_unlock_bh(&block->profile.lock);
-		return -EIO;
+		rc = -EIO;
+		goto out;
 	}
 	if (copy_to_user(argp, data, sizeof(*data)))
-		return -EFAULT;
-	return 0;
+		rc = -EFAULT;
+out:
+	kfree(data);
+	return rc;
 }
 #else
 static int dasd_ioctl_reset_profile(struct dasd_block *block)
-- 
cgit v1.1


From 798620fb1dd510d163f1c875c8422dc605f446da Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 24 Aug 2011 17:15:11 +0200
Subject: [S390] arch/s390/kernel/ipl.c: correct error detection check

reipl_fcp_kset was just initialized, so it appears that it should be tested
instead of reipl_kset.

Signed-off-by: Julia Lawall <julia@diku.dk>
Reported-by: Suman Saha <sumsaha@gmail.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ipl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 04361d5..ee28e06 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1220,7 +1220,7 @@ static int __init reipl_fcp_init(void)
 	/* sysfs: create fcp kset for mixing attr group and bin attrs */
 	reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
 					     &reipl_kset->kobj);
-	if (!reipl_kset) {
+	if (!reipl_fcp_kset) {
 		free_page((unsigned long) reipl_block_fcp);
 		return -ENOMEM;
 	}
-- 
cgit v1.1


From e1202edadbf846f0a4de70c8c0b9fe5a6c88b1cb Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Wed, 24 Aug 2011 17:15:12 +0200
Subject: [S390] Change default action from reipl to stop for on_restart

The main purpose for PSW restart will be kdump. Therefore customers will
issue "system restart" for creating a dump. If kdump is not enabled,
currently "PSW restart" will reboot the system and then no dump can
be created any more. In order to still allow a manual stand-alone dump in
the case a user issues "PSW restart" on a system that has not enabled
kdump we now stop the system.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ipl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index ee28e06..48c7102 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1618,7 +1618,8 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
 
 static void stop_run(struct shutdown_trigger *trigger)
 {
-	if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+	if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+	    strcmp(trigger->name, ON_RESTART_STR) == 0)
 		disabled_wait((unsigned long) __builtin_return_address(0));
 	while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
 		cpu_relax();
@@ -1717,7 +1718,7 @@ static void do_panic(void)
 /* on restart */
 
 static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
-	&reipl_action};
+	&stop_action};
 
 static ssize_t on_restart_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *page)
-- 
cgit v1.1


From 8adb4ca344b48bbbf87ca66fd07a2dd503619714 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 24 Aug 2011 17:15:13 +0200
Subject: [S390] memory hotplug: only unassign assigned increments

Make sure that only assigned storage increments are unassigned when
attaching a storage element.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/sclp_cmd.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index be55fb2..837e010 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -383,8 +383,10 @@ static int sclp_attach_storage(u8 id)
 	switch (sccb->header.response_code) {
 	case 0x0020:
 		set_bit(id, sclp_storage_ids);
-		for (i = 0; i < sccb->assigned; i++)
-			sclp_unassign_storage(sccb->entries[i] >> 16);
+		for (i = 0; i < sccb->assigned; i++) {
+			if (sccb->entries[i])
+				sclp_unassign_storage(sccb->entries[i] >> 16);
+		}
 		break;
 	default:
 		rc = -EIO;
-- 
cgit v1.1


From 18036b5866b5e407a28f444a80de186a5d7df767 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 24 Aug 2011 16:35:32 +0100
Subject: ASoC: Correct element count for WM8996 sidetone HPF

I can count. Honest.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@ti.com>
---
 sound/soc/codecs/wm8996.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c
index 0936ae5..0cdb9d1 100644
--- a/sound/soc/codecs/wm8996.c
+++ b/sound/soc/codecs/wm8996.c
@@ -420,7 +420,7 @@ static const char *sidetone_hpf_text[] = {
 };
 
 static const struct soc_enum sidetone_hpf =
-	SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 6, sidetone_hpf_text);
+	SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 7, sidetone_hpf_text);
 
 static const char *hpf_mode_text[] = {
 	"HiFi", "Custom", "Voice"
-- 
cgit v1.1


From b7ab83edba2d50583bc9520431618489379718b2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 24 Aug 2011 21:40:56 +0200
Subject: PM: Use spinlock instead of mutex in clock management functions

The lock member of struct pm_clk_data is of type struct mutex,
which is a problem, because the suspend and resume routines
defined in drivers/base/power/clock_ops.c cannot be executed
with interrupts disabled for this reason.  Modify
struct pm_clk_data so that its lock member is a spinlock.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Magnus Damm <damm@opensource.se>
---
 drivers/base/power/clock_ops.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index a846b2f..2c18d58 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -19,7 +19,7 @@
 
 struct pm_clk_data {
 	struct list_head clock_list;
-	struct mutex lock;
+	spinlock_t lock;
 };
 
 enum pce_status {
@@ -73,9 +73,9 @@ int pm_clk_add(struct device *dev, const char *con_id)
 		}
 	}
 
-	mutex_lock(&pcd->lock);
+	spin_lock_irq(&pcd->lock);
 	list_add_tail(&ce->node, &pcd->clock_list);
-	mutex_unlock(&pcd->lock);
+	spin_unlock_irq(&pcd->lock);
 	return 0;
 }
 
@@ -83,8 +83,8 @@ int pm_clk_add(struct device *dev, const char *con_id)
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
  *
- * This routine must be called under the mutex protecting the PM list of clocks
- * corresponding the the @ce's device.
+ * This routine must be called under the spinlock protecting the PM list of
+ * clocks corresponding the the @ce's device.
  */
 static void __pm_clk_remove(struct pm_clock_entry *ce)
 {
@@ -123,7 +123,7 @@ void pm_clk_remove(struct device *dev, const char *con_id)
 	if (!pcd)
 		return;
 
-	mutex_lock(&pcd->lock);
+	spin_lock_irq(&pcd->lock);
 
 	list_for_each_entry(ce, &pcd->clock_list, node) {
 		if (!con_id && !ce->con_id) {
@@ -137,7 +137,7 @@ void pm_clk_remove(struct device *dev, const char *con_id)
 		}
 	}
 
-	mutex_unlock(&pcd->lock);
+	spin_unlock_irq(&pcd->lock);
 }
 
 /**
@@ -158,7 +158,7 @@ int pm_clk_init(struct device *dev)
 	}
 
 	INIT_LIST_HEAD(&pcd->clock_list);
-	mutex_init(&pcd->lock);
+	spin_lock_init(&pcd->lock);
 	dev->power.subsys_data = pcd;
 	return 0;
 }
@@ -181,12 +181,12 @@ void pm_clk_destroy(struct device *dev)
 
 	dev->power.subsys_data = NULL;
 
-	mutex_lock(&pcd->lock);
+	spin_lock_irq(&pcd->lock);
 
 	list_for_each_entry_safe_reverse(ce, c, &pcd->clock_list, node)
 		__pm_clk_remove(ce);
 
-	mutex_unlock(&pcd->lock);
+	spin_unlock_irq(&pcd->lock);
 
 	kfree(pcd);
 }
@@ -220,13 +220,14 @@ int pm_clk_suspend(struct device *dev)
 {
 	struct pm_clk_data *pcd = __to_pcd(dev);
 	struct pm_clock_entry *ce;
+	unsigned long flags;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
 	if (!pcd)
 		return 0;
 
-	mutex_lock(&pcd->lock);
+	spin_lock_irqsave(&pcd->lock, flags);
 
 	list_for_each_entry_reverse(ce, &pcd->clock_list, node) {
 		if (ce->status == PCE_STATUS_NONE)
@@ -238,7 +239,7 @@ int pm_clk_suspend(struct device *dev)
 		}
 	}
 
-	mutex_unlock(&pcd->lock);
+	spin_unlock_irqrestore(&pcd->lock, flags);
 
 	return 0;
 }
@@ -251,13 +252,14 @@ int pm_clk_resume(struct device *dev)
 {
 	struct pm_clk_data *pcd = __to_pcd(dev);
 	struct pm_clock_entry *ce;
+	unsigned long flags;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
 	if (!pcd)
 		return 0;
 
-	mutex_lock(&pcd->lock);
+	spin_lock_irqsave(&pcd->lock, flags);
 
 	list_for_each_entry(ce, &pcd->clock_list, node) {
 		if (ce->status == PCE_STATUS_NONE)
@@ -269,7 +271,7 @@ int pm_clk_resume(struct device *dev)
 		}
 	}
 
-	mutex_unlock(&pcd->lock);
+	spin_unlock_irqrestore(&pcd->lock, flags);
 
 	return 0;
 }
@@ -344,6 +346,7 @@ int pm_clk_suspend(struct device *dev)
 {
 	struct pm_clk_data *pcd = __to_pcd(dev);
 	struct pm_clock_entry *ce;
+	unsigned long flags;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -351,12 +354,12 @@ int pm_clk_suspend(struct device *dev)
 	if (!pcd || !dev->driver)
 		return 0;
 
-	mutex_lock(&pcd->lock);
+	spin_lock_irqsave(&pcd->lock, flags);
 
 	list_for_each_entry_reverse(ce, &pcd->clock_list, node)
 		clk_disable(ce->clk);
 
-	mutex_unlock(&pcd->lock);
+	spin_unlock_irqrestore(&pcd->lock, flags);
 
 	return 0;
 }
@@ -369,6 +372,7 @@ int pm_clk_resume(struct device *dev)
 {
 	struct pm_clk_data *pcd = __to_pcd(dev);
 	struct pm_clock_entry *ce;
+	unsigned long flags;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -376,12 +380,12 @@ int pm_clk_resume(struct device *dev)
 	if (!pcd || !dev->driver)
 		return 0;
 
-	mutex_lock(&pcd->lock);
+	spin_lock_irqsave(&pcd->lock, flags);
 
 	list_for_each_entry(ce, &pcd->clock_list, node)
 		clk_enable(ce->clk);
 
-	mutex_unlock(&pcd->lock);
+	spin_unlock_irqrestore(&pcd->lock, flags);
 
 	return 0;
 }
-- 
cgit v1.1


From 5a50a01bf00c8191073fdf518e1af1e950ac3af5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 24 Aug 2011 21:41:08 +0200
Subject: sh-sci / PM: Use power.irq_safe

Since sci_port_enable() and sci_port_disable() may be run with
interrupts off and they execute pm_runtime_get_sync() and
pm_runtime_put_sync(), respectively, the SCI device's
power.irq_safe flag has to be set to indicate that it is safe
to execute runtime PM callbacks for this device with interrupts off.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Magnus Damm <damm@opensource.se>
---
 drivers/tty/serial/sh-sci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 2ec57b2..a9414fa 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1913,6 +1913,7 @@ static int __devinit sci_init_single(struct platform_device *dev,
 
 		port->dev = &dev->dev;
 
+		pm_runtime_irq_safe(&dev->dev);
 		pm_runtime_enable(&dev->dev);
 	}
 
-- 
cgit v1.1


From 5c3f96b20954fd6932bcfb1a860fa1d8b5b22ab0 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 24 Aug 2011 22:38:43 +0200
Subject: ARM: mach-shmobile: sh7372 LCDC1 suspend fix

Associate the HDMI clock together with LCDC1 on sh7372.

Without this patch Suspend-to-RAM hangs on the boards
AP4EVB and Mackerel. The code hangs in the LCDC driver
where the software is waiting forever for the hardware to
power down. By explicitly associating the HDMI clock with
LCDC1 we can make sure the HDMI clock is enabled using
Runtime PM whenever the driver is accessing the hardware.

This HDMI and LCDC1 dependency is documented in the sh7372
data sheet. Older kernels did work as expected but the
recently merged (3.1-rc)

 794d78f drivers: sh: late disabling of clocks V2

introduced code to turn off clocks lacking software reference
which happens to include the HDMI clock that is needed by
LCDC1 to operate as expected.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/board-ap4evb.c   | 1 +
 arch/arm/mach-shmobile/board-mackerel.c | 1 +
 arch/arm/mach-shmobile/clock-sh7372.c   | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index 9e0856b..fadbe5b 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -1412,6 +1412,7 @@ static void __init ap4evb_init(void)
 	fsi_init_pm_clock();
 	sh7372_pm_init();
 	pm_clk_add(&fsi_device.dev, "spu2");
+	pm_clk_add(&hdmi_lcdc_device.dev, "hdmi");
 }
 
 static void __init ap4evb_timer_init(void)
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index d41c01f..0ea71f8 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -1588,6 +1588,7 @@ static void __init mackerel_init(void)
 	hdmi_init_pm_clock();
 	sh7372_pm_init();
 	pm_clk_add(&fsi_device.dev, "spu2");
+	pm_clk_add(&hdmi_lcdc_device.dev, "hdmi");
 }
 
 static void __init mackerel_timer_init(void)
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 6b1619a..e6e11e4 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -655,6 +655,8 @@ static struct clk_lookup lookups[] = {
 	CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[MSTP406]), /* USB1 */
 	CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */
 
+	CLKDEV_ICK_ID("hdmi", "sh_mobile_lcdc_fb.1",
+		      &div6_reparent_clks[DIV6_HDMI]),
 	CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]),
 	CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]),
 	CLKDEV_ICK_ID("ickb", "sh_fsi2", &div6_reparent_clks[DIV6_FSIB]),
-- 
cgit v1.1


From 1b965f1891eac2d8583b5248ef0bcbc91c201e27 Mon Sep 17 00:00:00 2001
From: Omar Ramirez Luna <omar.ramirez@ti.com>
Date: Wed, 24 Aug 2011 15:07:04 -0500
Subject: staging: tidspbridge: fix compilation on dsp clock functions

Seen on v3.1-rc3, patch:

omap: mcbsp: Drop in-driver transfer support
bafe2721a0fbd1cc1af04384133684f660f3658e

Removed code that now cause tidspbridge to break while compiling.

Signed-off-by: Omar Ramirez Luna <omar.ramirez@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/tidspbridge/core/dsp-clock.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/tidspbridge/core/dsp-clock.c b/drivers/staging/tidspbridge/core/dsp-clock.c
index 589a055..3d1279c 100644
--- a/drivers/staging/tidspbridge/core/dsp-clock.c
+++ b/drivers/staging/tidspbridge/core/dsp-clock.c
@@ -209,7 +209,6 @@ int dsp_clk_enable(enum dsp_clk_id clk_id)
 		break;
 #ifdef CONFIG_OMAP_MCBSP
 	case MCBSP_CLK:
-		omap_mcbsp_set_io_type(MCBSP_ID(clk_id), OMAP_MCBSP_POLL_IO);
 		omap_mcbsp_request(MCBSP_ID(clk_id));
 		omap2_mcbsp_set_clks_src(MCBSP_ID(clk_id), MCBSP_CLKS_PAD_SRC);
 		break;
-- 
cgit v1.1


From c8d47631a48f254d062db8084776d1fb24785e7b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 9 Aug 2011 20:17:29 +0200
Subject: i2c-nomadik: fix kerneldoc warning

There was a missing struct item in the kerneldoc, add it and fix
another pretty-printing formatting issue with a missing space.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 drivers/i2c/busses/i2c-nomadik.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c
index f9b8854..b228e09 100644
--- a/drivers/i2c/busses/i2c-nomadik.c
+++ b/drivers/i2c/busses/i2c-nomadik.c
@@ -146,6 +146,7 @@ struct i2c_nmk_client {
  * @stop: stop condition
  * @xfer_complete: acknowledge completion for a I2C message
  * @result: controller propogated result
+ * @regulator: pointer to i2c regulator
  * @busy: Busy doing transfer
  */
 struct nmk_i2c_dev {
@@ -509,7 +510,7 @@ static int write_i2c(struct nmk_i2c_dev *dev)
 
 	if (timeout < 0) {
 		dev_err(&dev->pdev->dev,
-			"wait_for_completion_timeout"
+			"wait_for_completion_timeout "
 			"returned %d waiting for event\n", timeout);
 		status = timeout;
 	}
-- 
cgit v1.1


From caca9510ff4e5d842c0589110243d60927836222 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 24 Aug 2011 15:55:30 -0700
Subject: firmware loader: allow builtin firmware load even if usermodehelper
 is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit a144c6a6c924 ("PM: Print a warning if firmware is requested
when tasks are frozen") we not only printed a warning if somebody tried
to load the firmware when tasks are frozen - we also failed the load.

But that check was done before the check for built-in firmware, and then
when we disallowed usermode helpers during bootup (commit 288d5abec831:
"Boot up with usermodehelper disabled"), that actually means that
built-in modules can no longer load their firmware even if the firmware
is built in too.  Which used to work, and some people depended on it for
the R100 driver.

So move the test for usermodehelper_is_disabled() down, to after
checking the built-in firmware.

This should fix:

	https://bugzilla.kernel.org/show_bug.cgi?id=40952

Reported-by: James Cloos <cloos@hjcloos.com>
Bisected-by: Elimar Riesebieter <riesebie@lxtec.de>
Cc: Michel Dänzer <michel@daenzer.net>
Cc: Rafael Wysocki <rjw@sisk.pl>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/firmware_class.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index bbb03e6..06ed6b4 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -521,11 +521,6 @@ static int _request_firmware(const struct firmware **firmware_p,
 	if (!firmware_p)
 		return -EINVAL;
 
-	if (WARN_ON(usermodehelper_is_disabled())) {
-		dev_err(device, "firmware: %s will not be loaded\n", name);
-		return -EBUSY;
-	}
-
 	*firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
 	if (!firmware) {
 		dev_err(device, "%s: kmalloc(struct firmware) failed\n",
@@ -539,6 +534,12 @@ static int _request_firmware(const struct firmware **firmware_p,
 		return 0;
 	}
 
+	if (WARN_ON(usermodehelper_is_disabled())) {
+		dev_err(device, "firmware: %s will not be loaded\n", name);
+		retval = -EBUSY;
+		goto out;
+	}
+
 	if (uevent)
 		dev_dbg(device, "firmware: requesting %s\n", name);
 
-- 
cgit v1.1


From 468c5458856236cde6df1b0654d32bf6625349a5 Mon Sep 17 00:00:00 2001
From: David Henningsson <david.henningsson@canonical.com>
Date: Thu, 25 Aug 2011 13:16:02 +0200
Subject: ALSA: hda: Conexant: Allow different output types to share DAC

Headphones has stopped working for the original reported (a regression
compared to 2.6.38). This is because Speaker and Headphones share the
same DAC, in which case no Headphones volume control was created.
This patch fixes so that both Speaker and Headphones volume
controls are created in such scenario.

BugLink: http://bugs.launchpad.net/bugs/817943
Signed-off-by: David Henningsson <david.henningsson@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 46 +++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 5616444..7696d05 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3372,18 +3372,26 @@ static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs)
 /* fill pin_dac_pair list from the pin and dac list */
 static int fill_dacs_for_pins(struct hda_codec *codec, hda_nid_t *pins,
 			      int num_pins, hda_nid_t *dacs, int *rest,
-			      struct pin_dac_pair *filled, int type)
+			      struct pin_dac_pair *filled, int nums, 
+			      int type)
 {
-	int i, nums;
+	int i, start = nums;
 
-	nums = 0;
-	for (i = 0; i < num_pins; i++) {
+	for (i = 0; i < num_pins; i++, nums++) {
 		filled[nums].pin = pins[i];
 		filled[nums].type = type;
 		filled[nums].dac = get_unassigned_dac(codec, pins[i], dacs, rest);
-		if (!filled[nums].dac && i > 0 && filled[0].dac)
+		if (filled[nums].dac) 
+			continue;
+		if (filled[start].dac && get_connection_index(codec, pins[i], filled[start].dac) >= 0) {
+			filled[nums].dac = filled[start].dac | DAC_SLAVE_FLAG;
+			continue;
+		}
+		if (filled[0].dac && get_connection_index(codec, pins[i], filled[0].dac) >= 0) {
 			filled[nums].dac = filled[0].dac | DAC_SLAVE_FLAG;
-		nums++;
+			continue;
+		}
+		snd_printdd("Failed to find a DAC for pin 0x%x", pins[i]);
 	}
 	return nums;
 }
@@ -3399,14 +3407,14 @@ static void cx_auto_parse_output(struct hda_codec *codec)
 	rest = fill_cx_auto_dacs(codec, dacs);
 	/* parse all analog output pins */
 	nums = fill_dacs_for_pins(codec, cfg->line_out_pins, cfg->line_outs,
-				  dacs, &rest, spec->dac_info,
-				  AUTO_PIN_LINE_OUT);
-	nums += fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs,
-				  dacs, &rest, spec->dac_info + nums,
-				  AUTO_PIN_HP_OUT);
-	nums += fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs,
-				  dacs, &rest, spec->dac_info + nums,
-				  AUTO_PIN_SPEAKER_OUT);
+			  dacs, &rest, spec->dac_info, 0,
+			  AUTO_PIN_LINE_OUT);
+	nums = fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs,
+			  dacs, &rest, spec->dac_info, nums,
+			  AUTO_PIN_HP_OUT);
+	nums = fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs,
+			  dacs, &rest, spec->dac_info, nums,
+			  AUTO_PIN_SPEAKER_OUT);
 	spec->dac_info_filled = nums;
 	/* fill multiout struct */
 	for (i = 0; i < nums; i++) {
@@ -4173,9 +4181,11 @@ static int try_add_pb_volume(struct hda_codec *codec, hda_nid_t dac,
 			     hda_nid_t pin, const char *name, int idx)
 {
 	unsigned int caps;
-	caps = query_amp_caps(codec, dac, HDA_OUTPUT);
-	if (caps & AC_AMPCAP_NUM_STEPS)
-		return cx_auto_add_pb_volume(codec, dac, name, idx);
+	if (dac && !(dac & DAC_SLAVE_FLAG)) {
+		caps = query_amp_caps(codec, dac, HDA_OUTPUT);
+		if (caps & AC_AMPCAP_NUM_STEPS)
+			return cx_auto_add_pb_volume(codec, dac, name, idx);
+	}
 	caps = query_amp_caps(codec, pin, HDA_OUTPUT);
 	if (caps & AC_AMPCAP_NUM_STEPS)
 		return cx_auto_add_pb_volume(codec, pin, name, idx);
@@ -4198,8 +4208,6 @@ static int cx_auto_build_output_controls(struct hda_codec *codec)
 		const char *label;
 		int idx, type;
 		hda_nid_t dac = spec->dac_info[i].dac;
-		if (!dac || (dac & DAC_SLAVE_FLAG))
-			continue;
 		type = spec->dac_info[i].type;
 		if (type == AUTO_PIN_LINE_OUT)
 			type = spec->autocfg.line_out_type;
-- 
cgit v1.1


From 64584eb9cde5f3c5a07f24b2e7cd38f1157be181 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 25 Aug 2011 15:31:05 +0200
Subject: PM / Runtime: Correct documentation of pm_runtime_irq_safe()

The description of pm_runtime_irq_safe() has to be updated to follow
the code after commit 02b2677 (PM / Runtime: Allow _put_sync() from
interrupts-disabled context).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Kevin Hilman <khilman@ti.com>
---
 Documentation/power/runtime_pm.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 4ce5450..6066e3a 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -431,8 +431,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
 
   void pm_runtime_irq_safe(struct device *dev);
     - set the power.irq_safe flag for the device, causing the runtime-PM
-      suspend and resume callbacks (but not the idle callback) to be invoked
-      with interrupts disabled
+      callbacks to be invoked with interrupts off
 
   void pm_runtime_mark_last_busy(struct device *dev);
     - set the power.last_busy field to the current time
-- 
cgit v1.1


From ff35336d3efd1ec4015b56f690191ed69730cbb0 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@ti.com>
Date: Thu, 25 Aug 2011 15:31:14 +0200
Subject: OMAP: omap_device: only override _noirq methods, not normal
 suspend/resume

Commit c03f007a8bf0e092caeb6856a5c8a850df10b974 (OMAP: PM:
omap_device: add system PM methods for PM domain handling) mistakenly
used SET_SYSTEM_SLEEP_PM_OPS() when trying to configure custom methods
for the PM domains noirq methods.  Fix that by setting only the
suspend_noirq and resume_noirq methods with custom versions.

Note that all other PM domain methods (including the "normal"
suspend/resume methods) are populated using USE_PLATFORM_PM_SLEEP_OPS,
which configures them all to the default subsystem (platform_bus)
methods.

Reported-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/plat-omap/omap_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c
index b6b4097..9a6a538 100644
--- a/arch/arm/plat-omap/omap_device.c
+++ b/arch/arm/plat-omap/omap_device.c
@@ -622,7 +622,8 @@ static struct dev_pm_domain omap_device_pm_domain = {
 		SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume,
 				   _od_runtime_idle)
 		USE_PLATFORM_PM_SLEEP_OPS
-		SET_SYSTEM_SLEEP_PM_OPS(_od_suspend_noirq, _od_resume_noirq)
+		.suspend_noirq = _od_suspend_noirq,
+		.resume_noirq = _od_resume_noirq,
 	}
 };
 
-- 
cgit v1.1


From c96fbdd0ab97235f930ebf24b38fa42a2e3458cf Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Thu, 25 Aug 2011 11:46:58 +0200
Subject: USB: ftdi_sio: add Calao reference board support

Calao use on there dev kits a FT2232 where the port 0 is used for the JTAG and
port 1 for the UART

They use the same VID and PID as FTDI Chip but they program the manufacturer
name in the eeprom

So use this information to detect it

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Gregory Hermant <gregory.hermant@calao-systems.com>
Cc: Alan Cox <alan@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/ftdi_sio.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 78a2cf9..5fc13e717 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -101,6 +101,7 @@ static int   ftdi_jtag_probe(struct usb_serial *serial);
 static int   ftdi_mtxorb_hack_setup(struct usb_serial *serial);
 static int   ftdi_NDI_device_setup(struct usb_serial *serial);
 static int   ftdi_stmclite_probe(struct usb_serial *serial);
+static int   ftdi_8u2232c_probe(struct usb_serial *serial);
 static void  ftdi_USB_UIRT_setup(struct ftdi_private *priv);
 static void  ftdi_HE_TIRA1_setup(struct ftdi_private *priv);
 
@@ -128,6 +129,10 @@ static struct ftdi_sio_quirk ftdi_stmclite_quirk = {
 	.probe	= ftdi_stmclite_probe,
 };
 
+static struct ftdi_sio_quirk ftdi_8u2232c_quirk = {
+	.probe	= ftdi_8u2232c_probe,
+};
+
 /*
  * The 8U232AM has the same API as the sio except for:
  * - it can support MUCH higher baudrates; up to:
@@ -178,7 +183,8 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_8U232AM_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_8U232AM_ALT_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_232RL_PID) },
-	{ USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) ,
+		.driver_info = (kernel_ulong_t)&ftdi_8u2232c_quirk },
 	{ USB_DEVICE(FTDI_VID, FTDI_4232H_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_232H_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_MICRO_CHAMELEON_PID) },
@@ -1737,6 +1743,18 @@ static int ftdi_jtag_probe(struct usb_serial *serial)
 	return 0;
 }
 
+static int ftdi_8u2232c_probe(struct usb_serial *serial)
+{
+	struct usb_device *udev = serial->dev;
+
+	dbg("%s", __func__);
+
+	if (strcmp(udev->manufacturer, "CALAO Systems") == 0)
+		return ftdi_jtag_probe(serial);
+
+	return 0;
+}
+
 /*
  * First and second port on STMCLiteadaptors is reserved for JTAG interface
  * and the forth port for pio
-- 
cgit v1.1


From be27425dcc516fd08245b047ea57f83b8f6f0903 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 19 Aug 2011 16:15:10 -0700
Subject: Add a personality to report 2.6.x version numbers

I ran into a couple of programs which broke with the new Linux 3.0
version.  Some of those were binary only.  I tried to use LD_PRELOAD to
work around it, but it was quite difficult and in one case impossible
because of a mix of 32bit and 64bit executables.

For example, all kind of management software from HP doesnt work, unless
we pretend to run a 2.6 kernel.

  $ uname -a
  Linux svivoipvnx001 3.0.0-08107-g97cd98f #1062 SMP Fri Aug 12 18:11:45 CEST 2011 i686 i686 i386 GNU/Linux

  $ hpacucli ctrl all show

  Error: No controllers detected.

  $ rpm -qf /usr/sbin/hpacucli
  hpacucli-8.75-12.0

Another notable case is that Python now reports "linux3" from
sys.platform(); which in turn can break things that were checking
sys.platform() == "linux2":

  https://bugzilla.mozilla.org/show_bug.cgi?id=664564

It seems pretty clear to me though it's a bug in the apps that are using
'==' instead of .startswith(), but this allows us to unbreak broken
programs.

This patch adds a UNAME26 personality that makes the kernel report a
2.6.40+x version number instead.  The x is the x in 3.x.

I know this is somewhat ugly, but I didn't find a better workaround, and
compatibility to existing programs is important.

Some programs also read /proc/sys/kernel/osrelease.  This can be worked
around in user space with mount --bind (and a mount namespace)

To use:

  wget ftp://ftp.kernel.org/pub/linux/kernel/people/ak/uname26/uname26.c
  gcc -o uname26 uname26.c
  ./uname26 program

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/personality.h |  1 +
 kernel/sys.c                | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/include/linux/personality.h b/include/linux/personality.h
index eec3bae..8fc7dd1a 100644
--- a/include/linux/personality.h
+++ b/include/linux/personality.h
@@ -22,6 +22,7 @@ extern int		__set_personality(unsigned int);
  * These occupy the top three bytes.
  */
 enum {
+	UNAME26	=               0x0020000,
 	ADDR_NO_RANDOMIZE = 	0x0040000,	/* disable randomization of VA space */
 	FDPIC_FUNCPTRS =	0x0080000,	/* userspace function ptrs point to descriptors
 						 * (signal handling)
diff --git a/kernel/sys.c b/kernel/sys.c
index dd948a1..18ee1d2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -37,6 +37,8 @@
 #include <linux/fs_struct.h>
 #include <linux/gfp.h>
 #include <linux/syscore_ops.h>
+#include <linux/version.h>
+#include <linux/ctype.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -44,6 +46,8 @@
 #include <linux/user_namespace.h>
 
 #include <linux/kmsg_dump.h>
+/* Move somewhere else to avoid recompiling? */
+#include <generated/utsrelease.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1161,6 +1165,34 @@ DECLARE_RWSEM(uts_sem);
 #define override_architecture(name)	0
 #endif
 
+/*
+ * Work around broken programs that cannot handle "Linux 3.0".
+ * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ */
+static int override_release(char __user *release, int len)
+{
+	int ret = 0;
+	char buf[len];
+
+	if (current->personality & UNAME26) {
+		char *rest = UTS_RELEASE;
+		int ndots = 0;
+		unsigned v;
+
+		while (*rest) {
+			if (*rest == '.' && ++ndots >= 3)
+				break;
+			if (!isdigit(*rest) && *rest != '.')
+				break;
+			rest++;
+		}
+		v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+		snprintf(buf, len, "2.6.%u%s", v, rest);
+		ret = copy_to_user(release, buf, len);
+	}
+	return ret;
+}
+
 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 {
 	int errno = 0;
@@ -1170,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 		errno = -EFAULT;
 	up_read(&uts_sem);
 
+	if (!errno && override_release(name->release, sizeof(name->release)))
+		errno = -EFAULT;
 	if (!errno && override_architecture(name))
 		errno = -EFAULT;
 	return errno;
@@ -1191,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
 		error = -EFAULT;
 	up_read(&uts_sem);
 
+	if (!error && override_release(name->release, sizeof(name->release)))
+		error = -EFAULT;
 	if (!error && override_architecture(name))
 		error = -EFAULT;
 	return error;
@@ -1225,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
 
 	if (!error && override_architecture(name))
 		error = -EFAULT;
+	if (!error && override_release(name->release, sizeof(name->release)))
+		error = -EFAULT;
 	return error ? -EFAULT : 0;
 }
 #endif
-- 
cgit v1.1


From e096d0c7e2e4e5893792db865dd065ac73cf1f00 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Thu, 25 Aug 2011 07:48:12 -0400
Subject: lockdep: Add helper function for dir vs file i_mutex annotation

Purely in-memory filesystems do not use the inode hash as the dcache
tells us if an entry already exists.  As a result, they do not call
unlock_new_inode, and thus directory inodes do not get put into a
different lockdep class for i_sem.

We need the different lockdep classes, because the locking order for
i_mutex is different for directory inodes and regular inodes.  Directory
inodes can do "readdir()", which takes i_mutex *before* possibly taking
mm->mmap_sem (due to a page fault while copying the directory entry to
user space).

In contrast, regular inodes can be mmap'ed, which takes mm->mmap_sem
before accessing i_mutex.

The two cases can never happen for the same inode, so no real deadlock
can occur, but without the different lockdep classes, lockdep cannot
understand that.  As a result, if CONFIG_DEBUG_LOCK_ALLOC is set, this
can lead to false positives from lockdep like below:

    find/645 is trying to acquire lock:
     (&mm->mmap_sem){++++++}, at: [<ffffffff81109514>] might_fault+0x5c/0xac

    but task is already holding lock:
     (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [<ffffffff81149f34>]
    vfs_readdir+0x5b/0xb4

    which lock already depends on the new lock.

    the existing dependency chain (in reverse order) is:

    -> #1 (&sb->s_type->i_mutex_key#15){+.+.+.}:
          [<ffffffff8108ac26>] lock_acquire+0xbf/0x103
          [<ffffffff814db822>] __mutex_lock_common+0x4c/0x361
          [<ffffffff814dbc46>] mutex_lock_nested+0x40/0x45
          [<ffffffff811daa87>] hugetlbfs_file_mmap+0x82/0x110
          [<ffffffff81111557>] mmap_region+0x258/0x432
          [<ffffffff811119dd>] do_mmap_pgoff+0x2ac/0x306
          [<ffffffff81111b4f>] sys_mmap_pgoff+0x118/0x16a
          [<ffffffff8100c858>] sys_mmap+0x22/0x24
          [<ffffffff814e3ec2>] system_call_fastpath+0x16/0x1b

    -> #0 (&mm->mmap_sem){++++++}:
          [<ffffffff8108a4bc>] __lock_acquire+0xa1a/0xcf7
          [<ffffffff8108ac26>] lock_acquire+0xbf/0x103
          [<ffffffff81109541>] might_fault+0x89/0xac
          [<ffffffff81149cff>] filldir+0x6f/0xc7
          [<ffffffff811586ea>] dcache_readdir+0x67/0x205
          [<ffffffff81149f54>] vfs_readdir+0x7b/0xb4
          [<ffffffff8114a073>] sys_getdents+0x7e/0xd1
          [<ffffffff814e3ec2>] system_call_fastpath+0x16/0x1b

This patch moves the directory vs file lockdep annotation into a helper
function that can be called by in-memory filesystems and has hugetlbfs
call it.

Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c |  1 +
 fs/inode.c           | 24 +++++++++++++++---------
 include/linux/fs.h   |  5 +++++
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87b6e04..ec88953 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -491,6 +491,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 			inode->i_op = &page_symlink_inode_operations;
 			break;
 		}
+		lockdep_annotate_inode_mutex_key(inode);
 	}
 	return inode;
 }
diff --git a/fs/inode.c b/fs/inode.c
index 73920d5..ec79246 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -848,16 +848,9 @@ struct inode *new_inode(struct super_block *sb)
 }
 EXPORT_SYMBOL(new_inode);
 
-/**
- * unlock_new_inode - clear the I_NEW state and wake up any waiters
- * @inode:	new inode to unlock
- *
- * Called when the inode is fully initialised to clear the new state of the
- * inode and wake up anyone waiting for the inode to finish initialisation.
- */
-void unlock_new_inode(struct inode *inode)
-{
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
+void lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
 	if (S_ISDIR(inode->i_mode)) {
 		struct file_system_type *type = inode->i_sb->s_type;
 
@@ -873,7 +866,20 @@ void unlock_new_inode(struct inode *inode)
 					  &type->i_mutex_dir_key);
 		}
 	}
+}
+EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
 #endif
+
+/**
+ * unlock_new_inode - clear the I_NEW state and wake up any waiters
+ * @inode:	new inode to unlock
+ *
+ * Called when the inode is fully initialised to clear the new state of the
+ * inode and wake up anyone waiting for the inode to finish initialisation.
+ */
+void unlock_new_inode(struct inode *inode)
+{
+	lockdep_annotate_inode_mutex_key(inode);
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_NEW));
 	inode->i_state &= ~I_NEW;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 178cdb4..c2bd68f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2318,6 +2318,11 @@ extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*te
 extern struct inode * iget_locked(struct super_block *, unsigned long);
 extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
 extern int insert_inode_locked(struct inode *);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
+#else
+static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
+#endif
 extern void unlock_new_inode(struct inode *);
 extern unsigned int get_next_ino(void);
 
-- 
cgit v1.1


From cbbfa38fcb95930babc5233cf6927ec430f38abc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 25 Aug 2011 19:46:56 +0200
Subject: mtrr: fix UP breakage caused during switch to stop_machine

While removing custom rendezvous code and switching to stop_machine,
commit 192d8857427d ("x86, mtrr: use stop_machine APIs for doing MTRR
rendezvous") completely dropped mtrr setting code on !CONFIG_SMP
breaking MTRR settting on UP.

Fix it by removing the incorrect CONFIG_SMP.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Anders Eriksson <aeriksson@fastmail.fm>
Tested-and-acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/mtrr/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 08119a3..6b96110 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -149,7 +149,6 @@ struct set_mtrr_data {
  */
 static int mtrr_rendezvous_handler(void *info)
 {
-#ifdef CONFIG_SMP
 	struct set_mtrr_data *data = info;
 
 	/*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
 	} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
 		mtrr_if->set_all();
 	}
-#endif
 	return 0;
 }
 
-- 
cgit v1.1


From 0de3b4856907d9937c6c23cfbfdec1e8e3596ea7 Mon Sep 17 00:00:00 2001
From: Thomas Jarosch <thomas.jarosch@intra2net.com>
Date: Thu, 25 Aug 2011 15:37:45 +0200
Subject: drm/i915: Fix wrong initializer for "locked" variable in
 assert_panel_unlocked

Otherwise it just contains random memory.

Issue detected by cppcheck.

Signed-off-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Keith Packard <keithp@keithp.com>
---
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5a1ae9f..56a8554 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -878,7 +878,7 @@ static void assert_panel_unlocked(struct drm_i915_private *dev_priv,
 	int pp_reg, lvds_reg;
 	u32 val;
 	enum pipe panel_pipe = PIPE_A;
-	bool locked = locked;
+	bool locked = true;
 
 	if (HAS_PCH_SPLIT(dev_priv->dev)) {
 		pp_reg = PCH_PP_CONTROL;
-- 
cgit v1.1


From b4ca46e4e82a0a5976fe5eab85be585d75f8202f Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@mit.edu>
Date: Thu, 25 Aug 2011 16:10:33 -0400
Subject: x86-32: Fix boot with CONFIG_X86_INVD_BUG

entry_32.S contained a hardcoded alternative instruction entry, and the
format changed in commit 59e97e4d6fbc ("x86: Make alternative
instruction pointers relative").

Replace the hardcoded entry with the altinstruction_entry macro.  This
fixes the 32-bit boot with CONFIG_X86_INVD_BUG=y.

Reported-and-tested-by: Arnaud Lacombe <lacombar@gmail.com>
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/entry_32.S | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c1a9197..f3f6f53 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,7 @@
 #include <asm/ftrace.h>
 #include <asm/irq_vectors.h>
 #include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
 661:	pushl_cfi $do_general_protection
 662:
 .section .altinstructions,"a"
-	.balign 4
-	.long 661b
-	.long 663f
-	.word X86_FEATURE_XMM
-	.byte 662b-661b
-	.byte 664f-663f
+	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
 .previous
 .section .altinstr_replacement,"ax"
 663:	pushl $do_simd_coprocessor_error
-- 
cgit v1.1


From 06ed4625fdfffee1251708cd30de276186d5fdcf Mon Sep 17 00:00:00 2001
From: Sergei Trofimovich <slyfox@gentoo.org>
Date: Thu, 25 Aug 2011 15:59:01 -0700
Subject: drivers/misc/pti.c: add missing includes

Found on allmodconfig build (ARCH=alpha)

    drivers/misc/pti.c: In function 'get_id':
    drivers/misc/pti.c:249: error: implicit declaration of function 'kmalloc'
    drivers/misc/pti.c: In function 'pti_char_write':
    drivers/misc/pti.c:658: error: implicit declaration of function 'copy_from_user'

Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: J Freyensee <james_p_freyensee@linux.intel.com>
Cc: Jeremy Rocher <rocher.jeremy@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/pti.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
index 8653bd0..06df187 100644
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -33,6 +33,8 @@
 #include <linux/mutex.h>
 #include <linux/miscdevice.h>
 #include <linux/pti.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #define DRIVERNAME		"pti"
 #define PCINAME			"pciPTI"
-- 
cgit v1.1


From 2df7a7d1cd07626dd235ca102830ebfc6c01a09e Mon Sep 17 00:00:00 2001
From: Sergei Trofimovich <slyfox@gentoo.org>
Date: Thu, 25 Aug 2011 15:59:02 -0700
Subject: alpha: unbreak osf_setsysinfo(SSI_NVPAIRS, [SSIN_UACPROC,
 UAC_SIGBUS])

The bug was accidentally found by the following program:

    #include <asm/sysinfo.h>
    #include <asm/unistd.h>
    #include <sys/syscall.h>
    static int setsysinfo(unsigned long op, void *buffer, unsigned long size,
                          int *start, void *arg, unsigned long flag) {
        return syscall(__NR_osf_setsysinfo, op, buffer, size, start, arg, flag);
    }

    int main(int argc, char **argv) {
        short x[10];
        unsigned int buf[2] = { SSIN_UACPROC, UAC_SIGBUS, };
        setsysinfo(SSI_NVPAIRS, buf, 1, 0, 0, 0);

        int  *y = (int*) (x+1);
        *y = 0;
        return 0;
    }

The program shoud fail on SIGBUS, but didn't.

The patch is a second part of userspace flag fix (commit 745dd2405e28
"Alpha: Rearrange thread info flags fixing two regressions").

Deleted outdated out-of-sync 'UAC_SHIFT' (the cause of bug) in favour of
'ALPHA_UAC_SHIFT'.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
Acked-by: Michael Cree <mcree@orcon.net.nz>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/sysinfo.h     |  9 ---------
 arch/alpha/include/asm/thread_info.h |  8 ++++----
 arch/alpha/kernel/osf_sys.c          | 12 +++++++-----
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/arch/alpha/include/asm/sysinfo.h b/arch/alpha/include/asm/sysinfo.h
index 086aba2..e77d77c 100644
--- a/arch/alpha/include/asm/sysinfo.h
+++ b/arch/alpha/include/asm/sysinfo.h
@@ -27,13 +27,4 @@
 #define UAC_NOFIX			2
 #define UAC_SIGBUS			4
 
-
-#ifdef __KERNEL__
-
-/* This is the shift that is applied to the UAC bits as stored in the
-   per-thread flags.  See thread_info.h.  */
-#define UAC_SHIFT			6
-
-#endif
-
 #endif /* __ASM_ALPHA_SYSINFO_H */
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 6f32f9c..ff73db0 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -74,9 +74,9 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_POLLING_NRFLAG	8	/* poll_idle is polling NEED_RESCHED */
 #define TIF_DIE_IF_KERNEL	9	/* dik recursion lock */
-#define TIF_UAC_NOPRINT		10	/* see sysinfo.h */
-#define TIF_UAC_NOFIX		11
-#define TIF_UAC_SIGBUS		12
+#define TIF_UAC_NOPRINT		10	/* ! Preserve sequence of following */
+#define TIF_UAC_NOFIX		11	/* ! flags as they match            */
+#define TIF_UAC_SIGBUS		12	/* ! userspace part of 'osf_sysinfo' */
 #define TIF_MEMDIE		13	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	14	/* restore signal mask in do_signal */
 #define TIF_FREEZE		16	/* is freezing for suspend */
@@ -97,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK		\
 				 | _TIF_SYSCALL_TRACE)
 
-#define ALPHA_UAC_SHIFT		10
+#define ALPHA_UAC_SHIFT		TIF_UAC_NOPRINT
 #define ALPHA_UAC_MASK		(1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \
 				 1 << TIF_UAC_SIGBUS)
 
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 326f0a2..01e8715 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -42,6 +42,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/sysinfo.h>
+#include <asm/thread_info.h>
 #include <asm/hwrpb.h>
 #include <asm/processor.h>
 
@@ -633,9 +634,10 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
  	case GSI_UACPROC:
 		if (nbytes < sizeof(unsigned int))
 			return -EINVAL;
- 		w = (current_thread_info()->flags >> UAC_SHIFT) & UAC_BITMASK;
- 		if (put_user(w, (unsigned int __user *)buffer))
- 			return -EFAULT;
+		w = (current_thread_info()->flags >> ALPHA_UAC_SHIFT) &
+			UAC_BITMASK;
+		if (put_user(w, (unsigned int __user *)buffer))
+			return -EFAULT;
  		return 1;
 
 	case GSI_PROC_TYPE:
@@ -756,8 +758,8 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
  			case SSIN_UACPROC:
 			again:
 				old = current_thread_info()->flags;
-				new = old & ~(UAC_BITMASK << UAC_SHIFT);
-				new = new | (w & UAC_BITMASK) << UAC_SHIFT;
+				new = old & ~(UAC_BITMASK << ALPHA_UAC_SHIFT);
+				new = new | (w & UAC_BITMASK) << ALPHA_UAC_SHIFT;
 				if (cmpxchg(&current_thread_info()->flags,
 					    old, new) != old)
 					goto again;
-- 
cgit v1.1


From 58299449257566613f58dcfb757f0ba4a377987a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 25 Aug 2011 15:59:04 -0700
Subject: w1: fix for loop in w1_f29_remove_slave()

The for loop was looking for i <= 0 instead of i >= 0 so this function
never did anything.  Also we started with i = NB_SYSFS_BIN_FILES instead
of "NB_SYSFS_BIN_FILES - 1" which is an off by one bug.

Reported-by: Bojan Prtvar <prtvar.b@gmail.com>
Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Jean-Franois Dagenais <dagenaisj@sonatest.com>
Cc: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/w1/slaves/w1_ds2408.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/w1/slaves/w1_ds2408.c b/drivers/w1/slaves/w1_ds2408.c
index c377818..7c8cdb8 100644
--- a/drivers/w1/slaves/w1_ds2408.c
+++ b/drivers/w1/slaves/w1_ds2408.c
@@ -373,7 +373,7 @@ static int w1_f29_add_slave(struct w1_slave *sl)
 static void w1_f29_remove_slave(struct w1_slave *sl)
 {
 	int i;
-	for (i = NB_SYSFS_BIN_FILES; i <= 0; --i)
+	for (i = NB_SYSFS_BIN_FILES - 1; i >= 0; --i)
 		sysfs_remove_bin_file(&sl->dev.kobj,
 			&(w1_f29_sysfs_bin_files[i]));
 }
-- 
cgit v1.1


From a801876638c5ce650223476c4eb8f37cea32dc1c Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Thu, 25 Aug 2011 15:59:06 -0700
Subject: MAINTAINERS: Evgeniy has moved

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS                    | 2 +-
 drivers/w1/masters/ds2490.c    | 4 ++--
 drivers/w1/masters/matrox_w1.c | 4 ++--
 drivers/w1/slaves/w1_smem.c    | 4 ++--
 drivers/w1/slaves/w1_therm.c   | 4 ++--
 drivers/w1/w1.c                | 4 ++--
 drivers/w1/w1.h                | 2 +-
 drivers/w1/w1_family.c         | 2 +-
 drivers/w1/w1_family.h         | 2 +-
 drivers/w1/w1_int.c            | 2 +-
 drivers/w1/w1_int.h            | 2 +-
 drivers/w1/w1_io.c             | 2 +-
 drivers/w1/w1_log.h            | 2 +-
 drivers/w1/w1_netlink.c        | 2 +-
 drivers/w1/w1_netlink.h        | 2 +-
 include/linux/connector.h      | 2 +-
 16 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 069ee3b..ee0ac2c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7087,7 +7087,7 @@ S:	Supported
 F:	drivers/mmc/host/vub300.c
 
 W1 DALLAS'S 1-WIRE BUS
-M:	Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+M:	Evgeniy Polyakov <zbr@ioremap.net>
 S:	Maintained
 F:	Documentation/w1/
 F:	drivers/w1/
diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c
index 02bf7bf..b5abaae 100644
--- a/drivers/w1/masters/ds2490.c
+++ b/drivers/w1/masters/ds2490.c
@@ -1,7 +1,7 @@
 /*
  *	dscore.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -1024,5 +1024,5 @@ module_init(ds_init);
 module_exit(ds_fini);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("DS2490 USB <-> W1 bus master driver (DS9490*)");
diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c
index 334d1cc..f667c26 100644
--- a/drivers/w1/masters/matrox_w1.c
+++ b/drivers/w1/masters/matrox_w1.c
@@ -1,7 +1,7 @@
 /*
  *	matrox_w1.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -39,7 +39,7 @@
 #include "../w1_log.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for transport(Dallas 1-wire prtocol) over VGA DDC(matrox gpio).");
 
 static struct pci_device_id matrox_w1_tbl[] = {
diff --git a/drivers/w1/slaves/w1_smem.c b/drivers/w1/slaves/w1_smem.c
index cc8c02e..8465562 100644
--- a/drivers/w1/slaves/w1_smem.c
+++ b/drivers/w1/slaves/w1_smem.c
@@ -1,7 +1,7 @@
 /*
  *	w1_smem.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -32,7 +32,7 @@
 #include "../w1_family.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, 64bit memory family.");
 
 static struct w1_family w1_smem_family_01 = {
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index 402928b..a1ef9b5 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -1,7 +1,7 @@
 /*
  *	w1_therm.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -34,7 +34,7 @@
 #include "../w1_family.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, temperature family.");
 
 /* Allow the strong pullup to be disabled, but default to enabled.
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index 6c136c1..c374978 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -1,7 +1,7 @@
 /*
  *	w1.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
@@ -42,7 +42,7 @@
 #include "w1_netlink.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol.");
 
 static int w1_timeout = 10;
diff --git a/drivers/w1/w1.h b/drivers/w1/w1.h
index 1ce23fc..4d012ca 100644
--- a/drivers/w1/w1.h
+++ b/drivers/w1/w1.h
@@ -1,7 +1,7 @@
 /*
  *	w1.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c
index 4a09904..6335979 100644
--- a/drivers/w1/w1_family.c
+++ b/drivers/w1/w1_family.c
@@ -1,7 +1,7 @@
 /*
  *	w1_family.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h
index 98a1ac0..490cda2 100644
--- a/drivers/w1/w1_family.h
+++ b/drivers/w1/w1_family.h
@@ -1,7 +1,7 @@
 /*
  *	w1_family.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
index b50be3f..d220bce 100644
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -1,7 +1,7 @@
 /*
  *	w1_int.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_int.h b/drivers/w1/w1_int.h
index 4274082..2ad7d44 100644
--- a/drivers/w1/w1_int.h
+++ b/drivers/w1/w1_int.h
@@ -1,7 +1,7 @@
 /*
  *	w1_int.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c
index 8e8b64c..765b37b 100644
--- a/drivers/w1/w1_io.c
+++ b/drivers/w1/w1_io.c
@@ -1,7 +1,7 @@
 /*
  *	w1_io.c
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_log.h b/drivers/w1/w1_log.h
index e6ab7cf..9c7bd62 100644
--- a/drivers/w1/w1_log.h
+++ b/drivers/w1/w1_log.h
@@ -1,7 +1,7 @@
 /*
  *	w1_log.h
  *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 55aabd9..40788c9 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -1,7 +1,7 @@
 /*
  * w1_netlink.c
  *
- * Copyright (c) 2003 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2003 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_netlink.h b/drivers/w1/w1_netlink.h
index 27e950f..b0922dc 100644
--- a/drivers/w1/w1_netlink.h
+++ b/drivers/w1/w1_netlink.h
@@ -1,7 +1,7 @@
 /*
  * w1_netlink.h
  *
- * Copyright (c) 2003 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2003 Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 0c69ad8..3c9c54f 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -1,7 +1,7 @@
 /*
  * 	connector.h
  * 
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * 2004-2005 Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
  * All rights reserved.
  * 
  * This program is free software; you can redistribute it and/or modify
-- 
cgit v1.1


From 5af12d0efdbd9967cc71a0a10c4025c4255a6254 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 25 Aug 2011 15:59:07 -0700
Subject: memcg: pin execution to current cpu while draining stock

Commit d1a05b6973c7 ("memcg do not try to drain per-cpu caches without
pages") added a drain_local_stock() call to a preemptible section.

The draining task looks up the cpu-local stock twice to set the
draining-flag, then to drain the stock and clear the flag again.  If the
task is migrated to a different CPU in between, noone will clear the
flag on the first stock and it will be forever undrainable.  Its charge
can not be recovered and the cgroup can not be deleted anymore.

Properly pin the task to the executing CPU while draining stocks.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 930de94..0e40f02 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2169,13 +2169,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
 
 	/* Notify other cpus that system-wide "drain" is running */
 	get_online_cpus();
-	/*
-	 * Get a hint for avoiding draining charges on the current cpu,
-	 * which must be exhausted by our charging.  It is not required that
-	 * this be a precise check, so we use raw_smp_processor_id() instead of
-	 * getcpu()/putcpu().
-	 */
-	curcpu = raw_smp_processor_id();
+	curcpu = get_cpu();
 	for_each_online_cpu(cpu) {
 		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
 		struct mem_cgroup *mem;
@@ -2192,6 +2186,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
 				schedule_work_on(cpu, &stock->work);
 		}
 	}
+	put_cpu();
 
 	if (!sync)
 		goto out;
-- 
cgit v1.1


From 3d1c2f72a9464c9880054194af0c041d7beb9124 Mon Sep 17 00:00:00 2001
From: Ralf Thielow <ralf.thielow@googlemail.com>
Date: Thu, 25 Aug 2011 15:59:07 -0700
Subject: scripts/get_maintainer.pl: update Linus's git repository

Change to new git tree -
(git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git).

Signed-off-by: Ralf Thielow <ralf.thielow@googlemail.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/get_maintainer.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index eb2f1e6..4594f33 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -1389,7 +1389,7 @@ sub vcs_exists {
 	warn("$P: No supported VCS found.  Add --nogit to options?\n");
 	warn("Using a git repository produces better results.\n");
 	warn("Try Linus Torvalds' latest git repository using:\n");
-	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git\n");
+	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
 	$printed_novcs = 1;
     }
     return 0;
-- 
cgit v1.1


From 30ecad51849ae132dc6ef6ddb62d499c7257515b Mon Sep 17 00:00:00 2001
From: Hui Zhu <teawater@gmail.com>
Date: Thu, 25 Aug 2011 15:59:08 -0700
Subject: checkpatch: add missing WARN argument for min_t and max_t tests

The test for bad usage of min_t() and max_t() is missing the --ignore
type.  Add it.

Signed-off-by: Hui Zhu <teawater@gmail.com>
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 9d761c9..3dfc471 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2574,7 +2574,8 @@ sub process {
 				} else {
 					$cast = $cast2;
 				}
-				WARN("$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr);
+				WARN("MINMAX",
+				     "$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr);
 			}
 		}
 
-- 
cgit v1.1


From 7e8aa048989bf7e0604996a3e2068fb1a81f81bd Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 25 Aug 2011 15:59:09 -0700
Subject: drivers/char/msm_smd_pkt.c: don't use IS_ERR()

The various basic memory allocation function return NULL, not an
ERR_PTR.

The semantic patch that makes this change is available in
scripts/coccinelle/null/eno.cocci.

More information about semantic patching is available at
http://coccinelle.lip6.fr/

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Cc: Niranjana Vishwanathapura <nvishwan@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/msm_smd_pkt.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/char/msm_smd_pkt.c b/drivers/char/msm_smd_pkt.c
index b6f8a65..8eca55d 100644
--- a/drivers/char/msm_smd_pkt.c
+++ b/drivers/char/msm_smd_pkt.c
@@ -379,9 +379,8 @@ static int __init smd_pkt_init(void)
 	for (i = 0; i < NUM_SMD_PKT_PORTS; ++i) {
 		smd_pkt_devp[i] = kzalloc(sizeof(struct smd_pkt_dev),
 					  GFP_KERNEL);
-		if (IS_ERR(smd_pkt_devp[i])) {
-			r = PTR_ERR(smd_pkt_devp[i]);
-			pr_err("kmalloc() failed %d\n", r);
+		if (!smd_pkt_devp[i]) {
+			pr_err("kmalloc() failed\n");
 			goto clean_cdevs;
 		}
 
-- 
cgit v1.1


From f51bdd2e97098a5cbb3cba7c3a56fa0e9ac3c444 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 25 Aug 2011 15:59:10 -0700
Subject: mm: fix a vmscan warning

I get the below warning:

  BUG: using smp_processor_id() in preemptible [00000000] code: bash/746
  caller is native_sched_clock+0x37/0x6e
  Pid: 746, comm: bash Tainted: G        W   3.0.0+ #254
  Call Trace:
   [<ffffffff813435c6>] debug_smp_processor_id+0xc2/0xdc
   [<ffffffff8104158d>] native_sched_clock+0x37/0x6e
   [<ffffffff81116219>] try_to_free_mem_cgroup_pages+0x7d/0x270
   [<ffffffff8114f1f8>] mem_cgroup_force_empty+0x24b/0x27a
   [<ffffffff8114ff21>] ? sys_close+0x38/0x138
   [<ffffffff8114ff21>] ? sys_close+0x38/0x138
   [<ffffffff8114f257>] mem_cgroup_force_empty_write+0x17/0x19
   [<ffffffff810c72fb>] cgroup_file_write+0xa8/0xba
   [<ffffffff811522d2>] vfs_write+0xb3/0x138
   [<ffffffff8115241a>] sys_write+0x4a/0x71
   [<ffffffff8114ffd9>] ? sys_close+0xf0/0x138
   [<ffffffff8176deab>] system_call_fastpath+0x16/0x1b

sched_clock() can't be used with preempt enabled.  And we don't need
fast approach to get clock here, so let's use ktime API.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7ef6912..22631e0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2283,7 +2283,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 		.mem_cgroup = mem,
 		.memcg_record = rec,
 	};
-	unsigned long start, end;
+	ktime_t start, end;
 
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2292,7 +2292,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 						      sc.may_writepage,
 						      sc.gfp_mask);
 
-	start = sched_clock();
+	start = ktime_get();
 	/*
 	 * NOTE: Although we can get the priority field, using it
 	 * here is not a good idea, since it limits the pages we can scan.
@@ -2301,10 +2301,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 	 * the priority and make it zero.
 	 */
 	shrink_zone(0, zone, &sc);
-	end = sched_clock();
+	end = ktime_get();
 
 	if (rec)
-		rec->elapsed += end - start;
+		rec->elapsed += ktime_to_ns(ktime_sub(end, start));
 	*scanned = sc.nr_scanned;
 
 	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
@@ -2319,7 +2319,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 {
 	struct zonelist *zonelist;
 	unsigned long nr_reclaimed;
-	unsigned long start, end;
+	ktime_t start, end;
 	int nid;
 	struct scan_control sc = {
 		.may_writepage = !laptop_mode,
@@ -2337,7 +2337,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 		.gfp_mask = sc.gfp_mask,
 	};
 
-	start = sched_clock();
+	start = ktime_get();
 	/*
 	 * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
 	 * take care of from where we get pages. So the node where we start the
@@ -2352,9 +2352,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 					    sc.gfp_mask);
 
 	nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
-	end = sched_clock();
+	end = ktime_get();
 	if (rec)
-		rec->elapsed += end - start;
+		rec->elapsed += ktime_to_ns(ktime_sub(end, start));
 
 	trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
-- 
cgit v1.1


From 47331231bb997a8adb79774fc4cf4bb48fe4e00a Mon Sep 17 00:00:00 2001
From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Date: Thu, 25 Aug 2011 15:59:10 -0700
Subject: MAINTAINERS: Paul Menage has moved

Paul said: I left Google at the end of last week - if it's not bouncing
already, menage@google.com isn't going to work for much longer.

Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Acked-by: Paul Menage <paul@paulmenage.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ee0ac2c..d942920 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1883,7 +1883,7 @@ S:	Maintained
 F:	drivers/connector/
 
 CONTROL GROUPS (CGROUPS)
-M:	Paul Menage <menage@google.com>
+M:	Paul Menage <paul@paulmenage.org>
 M:	Li Zefan <lizf@cn.fujitsu.com>
 L:	containers@lists.linux-foundation.org
 S:	Maintained
@@ -1932,7 +1932,7 @@ S:	Maintained
 F:	tools/power/cpupower
 
 CPUSETS
-M:	Paul Menage <menage@google.com>
+M:	Paul Menage <paul@paulmenage.org>
 W:	http://www.bullopensource.org/cpuset/
 W:	http://oss.sgi.com/projects/cpusets/
 S:	Supported
-- 
cgit v1.1


From 4c30c6f566c0989ddaee3407da44751e340a63ed Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Thu, 25 Aug 2011 15:59:11 -0700
Subject: kernel/printk: do not turn off bootconsole in printk_late_init() if
 keep_bootcon

It seems that 7bf693951a8e ("console: allow to retain boot console via
boot option keep_bootcon") doesn't always achieve what it aims, as when
printk_late_init() runs it unconditionally turns off all boot consoles.
With this patch, I am able to see more messages on the boot console in
KVM guests than I can without, when keep_bootcon is specified.

I think it is appropriate for the relevant -stable trees.  However, it's
more of an annoyance than a serious bug (ideally you don't need to keep
the boot console around as console handover should be working -- I was
encountering a situation where the console handover wasn't working and
not having the boot console available meant I couldn't see why).

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg KH <gregkh@suse.de>
Acked-by: Fabio M. Di Nitto <fdinitto@redhat.com>
Cc: <stable@kernel.org>		[2.6.39.x, 3.0.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/printk.c b/kernel/printk.c
index 836a2ae..28a40d8 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1604,7 +1604,7 @@ static int __init printk_late_init(void)
 	struct console *con;
 
 	for_each_console(con) {
-		if (con->flags & CON_BOOT) {
+		if (!keep_bootcon && con->flags & CON_BOOT) {
 			printk(KERN_INFO "turn off boot console %s%d\n",
 				con->name, con->index);
 			unregister_console(con);
-- 
cgit v1.1


From 439423f6894aa0dec22187526827456f5004baed Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 25 Aug 2011 15:59:12 -0700
Subject: vmscan: clear ZONE_CONGESTED for zone with good watermark

ZONE_CONGESTED is only cleared in kswapd, but pages can be freed in any
task.  It's possible ZONE_CONGESTED isn't cleared in some cases:

 1. the zone is already balanced just entering balance_pgdat() for
    order-0 because concurrent tasks free memory.  In this case, later
    check will skip the zone as it's balanced so the flag isn't cleared.

 2. high order balance fallbacks to order-0.  quote from Mel: At the
    end of balance_pgdat(), kswapd uses the following logic;

	If reclaiming at high order {
		for each zone {
			if all_unreclaimable
				skip
			if watermark is not met
				order = 0
				loop again

			/* watermark is met */
			clear congested
		}
	}

    i.e. it clears ZONE_CONGESTED if it the zone is balanced.  if not,
    it restarts balancing at order-0.  However, if the higher zones are
    balanced for order-0, kswapd will miss clearing ZONE_CONGESTED as
    that only happens after a zone is shrunk.  This can mean that
    wait_iff_congested() stalls unnecessarily.

This patch makes kswapd clear ZONE_CONGESTED during its initial
highmem->dma scan for zones that are already balanced.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 22631e0..b7719ec 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2529,6 +2529,9 @@ loop_again:
 					high_wmark_pages(zone), 0, 0)) {
 				end_zone = i;
 				break;
+			} else {
+				/* If balanced, clear the congested flag */
+				zone_clear_flag(zone, ZONE_CONGESTED);
 			}
 		}
 		if (i < 0)
-- 
cgit v1.1


From 284fb68d00c56e971ed01e0b4bac5ddd4d1b74ab Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Thu, 25 Aug 2011 15:59:13 -0700
Subject: rapidio: fix use of non-compatible registers

Replace/remove use of RIO v.1.2 registers/bits that are not
forward-compatible with newer versions of RapidIO specification.

RapidIO specification v.1.3 removed Write Port CSR, Doorbell CSR,
Mailbox CSR and Mailbox and Doorbell bits of the PEF CAR.

Use of removed (since RIO v.1.3) register bits affects users of
currently available 1.3 and 2.x compliant devices who may use not so
recent kernel versions.

Removing checks for unsupported bits makes corresponding routines
compatible with all versions of RapidIO specification.  Therefore,
backporting makes stable kernel versions compliant with RIO v.1.3 and
later as well.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Chul Kim <chul.kim@idt.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/rionet.c       | 23 ++++++++---------------
 drivers/rapidio/rio-scan.c |  3 +--
 include/linux/rio_regs.h   | 18 +++++++++---------
 3 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 86ac38c..3bb1311 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -80,13 +80,13 @@ static int rionet_capable = 1;
  */
 static struct rio_dev **rionet_active;
 
-#define is_rionet_capable(pef, src_ops, dst_ops)		\
-			((pef & RIO_PEF_INB_MBOX) &&		\
-			 (pef & RIO_PEF_INB_DOORBELL) &&	\
+#define is_rionet_capable(src_ops, dst_ops)			\
+			((src_ops & RIO_SRC_OPS_DATA_MSG) &&	\
+			 (dst_ops & RIO_DST_OPS_DATA_MSG) &&	\
 			 (src_ops & RIO_SRC_OPS_DOORBELL) &&	\
 			 (dst_ops & RIO_DST_OPS_DOORBELL))
 #define dev_rionet_capable(dev) \
-	is_rionet_capable(dev->pef, dev->src_ops, dev->dst_ops)
+	is_rionet_capable(dev->src_ops, dev->dst_ops)
 
 #define RIONET_MAC_MATCH(x)	(*(u32 *)x == 0x00010001)
 #define RIONET_GET_DESTID(x)	(*(u16 *)(x + 4))
@@ -282,7 +282,6 @@ static int rionet_open(struct net_device *ndev)
 {
 	int i, rc = 0;
 	struct rionet_peer *peer, *tmp;
-	u32 pwdcsr;
 	struct rionet_private *rnet = netdev_priv(ndev);
 
 	if (netif_msg_ifup(rnet))
@@ -332,13 +331,8 @@ static int rionet_open(struct net_device *ndev)
 			continue;
 		}
 
-		/*
-		 * If device has initialized inbound doorbells,
-		 * send a join message
-		 */
-		rio_read_config_32(peer->rdev, RIO_WRITE_PORT_CSR, &pwdcsr);
-		if (pwdcsr & RIO_DOORBELL_AVAIL)
-			rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
+		/* Send a join message */
+		rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
 	}
 
       out:
@@ -492,7 +486,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
 {
 	int rc = -ENODEV;
-	u32 lpef, lsrc_ops, ldst_ops;
+	u32 lsrc_ops, ldst_ops;
 	struct rionet_peer *peer;
 	struct net_device *ndev = NULL;
 
@@ -515,12 +509,11 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
 	 * on later probes
 	 */
 	if (!rionet_check) {
-		rio_local_read_config_32(rdev->net->hport, RIO_PEF_CAR, &lpef);
 		rio_local_read_config_32(rdev->net->hport, RIO_SRC_OPS_CAR,
 					 &lsrc_ops);
 		rio_local_read_config_32(rdev->net->hport, RIO_DST_OPS_CAR,
 					 &ldst_ops);
-		if (!is_rionet_capable(lpef, lsrc_ops, ldst_ops)) {
+		if (!is_rionet_capable(lsrc_ops, ldst_ops)) {
 			printk(KERN_ERR
 			       "%s: local device is not network capable\n",
 			       DRV_NAME);
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index ee89358..ebe77dd 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -505,8 +505,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
 	rdev->dev.dma_mask = &rdev->dma_mask;
 	rdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 
-	if ((rdev->pef & RIO_PEF_INB_DOORBELL) &&
-	    (rdev->dst_ops & RIO_DST_OPS_DOORBELL))
+	if (rdev->dst_ops & RIO_DST_OPS_DOORBELL)
 		rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE],
 				   0, 0xffff);
 
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index 9026b30..218168a 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -36,12 +36,12 @@
 #define  RIO_PEF_PROCESSOR		0x20000000	/* [I] Processor */
 #define  RIO_PEF_SWITCH			0x10000000	/* [I] Switch */
 #define  RIO_PEF_MULTIPORT		0x08000000	/* [VI, 2.1] Multiport */
-#define  RIO_PEF_INB_MBOX		0x00f00000	/* [II] Mailboxes */
-#define  RIO_PEF_INB_MBOX0		0x00800000	/* [II] Mailbox 0 */
-#define  RIO_PEF_INB_MBOX1		0x00400000	/* [II] Mailbox 1 */
-#define  RIO_PEF_INB_MBOX2		0x00200000	/* [II] Mailbox 2 */
-#define  RIO_PEF_INB_MBOX3		0x00100000	/* [II] Mailbox 3 */
-#define  RIO_PEF_INB_DOORBELL		0x00080000	/* [II] Doorbells */
+#define  RIO_PEF_INB_MBOX		0x00f00000	/* [II, <= 1.2] Mailboxes */
+#define  RIO_PEF_INB_MBOX0		0x00800000	/* [II, <= 1.2] Mailbox 0 */
+#define  RIO_PEF_INB_MBOX1		0x00400000	/* [II, <= 1.2] Mailbox 1 */
+#define  RIO_PEF_INB_MBOX2		0x00200000	/* [II, <= 1.2] Mailbox 2 */
+#define  RIO_PEF_INB_MBOX3		0x00100000	/* [II, <= 1.2] Mailbox 3 */
+#define  RIO_PEF_INB_DOORBELL		0x00080000	/* [II, <= 1.2] Doorbells */
 #define  RIO_PEF_EXT_RT			0x00000200	/* [III, 1.3] Extended route table support */
 #define  RIO_PEF_STD_RT			0x00000100	/* [III, 1.3] Standard route table support */
 #define  RIO_PEF_CTLS			0x00000010	/* [III] CTLS */
@@ -102,7 +102,7 @@
 #define	RIO_SWITCH_RT_LIMIT	0x34	/* [III, 1.3] Switch Route Table Destination ID Limit CAR */
 #define	 RIO_RT_MAX_DESTID		0x0000ffff
 
-#define RIO_MBOX_CSR		0x40	/* [II] Mailbox CSR */
+#define RIO_MBOX_CSR		0x40	/* [II, <= 1.2] Mailbox CSR */
 #define  RIO_MBOX0_AVAIL		0x80000000	/* [II] Mbox 0 avail */
 #define  RIO_MBOX0_FULL			0x40000000	/* [II] Mbox 0 full */
 #define  RIO_MBOX0_EMPTY		0x20000000	/* [II] Mbox 0 empty */
@@ -128,8 +128,8 @@
 #define  RIO_MBOX3_FAIL			0x00000008	/* [II] Mbox 3 fail */
 #define  RIO_MBOX3_ERROR		0x00000004	/* [II] Mbox 3 error */
 
-#define RIO_WRITE_PORT_CSR	0x44	/* [I] Write Port CSR */
-#define RIO_DOORBELL_CSR	0x44	/* [II] Doorbell CSR */
+#define RIO_WRITE_PORT_CSR	0x44	/* [I, <= 1.2] Write Port CSR */
+#define RIO_DOORBELL_CSR	0x44	/* [II, <= 1.2] Doorbell CSR */
 #define  RIO_DOORBELL_AVAIL		0x80000000	/* [II] Doorbell avail */
 #define  RIO_DOORBELL_FULL		0x40000000	/* [II] Doorbell full */
 #define  RIO_DOORBELL_EMPTY		0x20000000	/* [II] Doorbell empty */
-- 
cgit v1.1


From 15b1a8f2b990c0c1dacfad0e5ccaf05c32c52147 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Aug 2011 15:59:14 -0700
Subject: drivers/video/backlight/ep93xx_bl.c: add missing include of
 linux/module.h

ep93xx_bl.c uses interfaces from linux/module.h, so it should include
that file.  This patch fixes build errors:

    CC [M]  drivers/video/backlight/ep93xx_bl.o
  drivers/video/backlight/ep93xx_bl.c:138: error: 'THIS_MODULE' undeclared here (not in a function)
  drivers/video/backlight/ep93xx_bl.c:158: error: expected declaration specifiers or '...' before string constant
  drivers/video/backlight/ep93xx_bl.c:158: warning: data definition has no type or storage class
  ...

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Ryan Mallon <rmallon@gmail.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/ep93xx_bl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/backlight/ep93xx_bl.c b/drivers/video/backlight/ep93xx_bl.c
index 9f1e389..b058291 100644
--- a/drivers/video/backlight/ep93xx_bl.c
+++ b/drivers/video/backlight/ep93xx_bl.c
@@ -11,7 +11,7 @@
  * BRIGHT, on the Cirrus EP9307, EP9312, and EP9315 processors.
  */
 
-
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/fb.h>
-- 
cgit v1.1


From 86383b55791bd97e88ef493e33ef521ee244f3d9 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Aug 2011 15:59:15 -0700
Subject: leds: add missing include of linux/module.h

Add missing include of linux/module.h for drivers that use interfaces from
linux/module.h.  This patch fixes build errors.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Cc: Jonathan McDowell <noodles@earth.li>
Acked-by: Kristoffer Ericson <kristoffer.ericson@gmail.com>
Cc: Magnus Damm <damm@opensource.se>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/leds/leds-ams-delta.c | 1 +
 drivers/leds/leds-hp6xx.c     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/leds/leds-ams-delta.c b/drivers/leds/leds-ams-delta.c
index b982603..8c00937 100644
--- a/drivers/leds/leds-ams-delta.c
+++ b/drivers/leds/leds-ams-delta.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
diff --git a/drivers/leds/leds-hp6xx.c b/drivers/leds/leds-hp6xx.c
index e4ce1fd..bcfbd3a 100644
--- a/drivers/leds/leds-hp6xx.c
+++ b/drivers/leds/leds-hp6xx.c
@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-- 
cgit v1.1


From 23751be0094012eb6b4756fa80ca54b3eb83069f Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 25 Aug 2011 15:59:16 -0700
Subject: memcg: fix hierarchical oom locking

Commit 79dfdaccd1d5 ("memcg: make oom_lock 0 and 1 based rather than
counter") tried to oom lock the hierarchy and roll back upon
encountering an already locked memcg.

The code is confused when it comes to detecting a locked memcg, though,
so it would fail and rollback after locking one memcg and encountering
an unlocked second one.

The result is that oom-locking hierarchies fails unconditionally and
that every oom killer invocation simply goes to sleep on the oom
waitqueue forever.  The tasks practically hang forever without anyone
intervening, possibly holding locks that trip up unrelated tasks, too.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0e40f02..ebd1e86 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1841,29 +1841,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
  */
 static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
 {
-	int lock_count = -1;
 	struct mem_cgroup *iter, *failed = NULL;
 	bool cond = true;
 
 	for_each_mem_cgroup_tree_cond(iter, mem, cond) {
-		bool locked = iter->oom_lock;
-
-		iter->oom_lock = true;
-		if (lock_count == -1)
-			lock_count = iter->oom_lock;
-		else if (lock_count != locked) {
+		if (iter->oom_lock) {
 			/*
 			 * this subtree of our hierarchy is already locked
 			 * so we cannot give a lock.
 			 */
-			lock_count = 0;
 			failed = iter;
 			cond = false;
-		}
+		} else
+			iter->oom_lock = true;
 	}
 
 	if (!failed)
-		goto done;
+		return true;
 
 	/*
 	 * OK, we failed to lock the whole subtree so we have to clean up
@@ -1877,8 +1871,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
 		}
 		iter->oom_lock = false;
 	}
-done:
-	return lock_count;
+	return false;
 }
 
 /*
-- 
cgit v1.1


From cc7993f6439b49909a8792660c4d0741fec9d584 Mon Sep 17 00:00:00 2001
From: Dilan Lee <dilee@nvidia.com>
Date: Thu, 25 Aug 2011 15:59:17 -0700
Subject: backlight: add a callback 'notify_after' for backlight control

We need a callback to do some things after pwm_enable, pwm_disable
and pwm_config.

Signed-off-by: Dilan Lee <dilee@nvidia.com>
Reviewed-by: Robert Morell <rmorell@nvidia.com>
Reviewed-by: Arun Murthy <arun.murthy@stericsson.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/pwm_bl.c | 9 +++++++++
 include/linux/pwm_backlight.h    | 1 +
 2 files changed, 10 insertions(+)

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index b8f38ec..8b5b2a4 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -28,6 +28,8 @@ struct pwm_bl_data {
 	unsigned int		lth_brightness;
 	int			(*notify)(struct device *,
 					  int brightness);
+	void			(*notify_after)(struct device *,
+					int brightness);
 	int			(*check_fb)(struct device *, struct fb_info *);
 };
 
@@ -55,6 +57,10 @@ static int pwm_backlight_update_status(struct backlight_device *bl)
 		pwm_config(pb->pwm, brightness, pb->period);
 		pwm_enable(pb->pwm);
 	}
+
+	if (pb->notify_after)
+		pb->notify_after(pb->dev, brightness);
+
 	return 0;
 }
 
@@ -105,6 +111,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
 	pb->period = data->pwm_period_ns;
 	pb->notify = data->notify;
+	pb->notify_after = data->notify_after;
 	pb->check_fb = data->check_fb;
 	pb->lth_brightness = data->lth_brightness *
 		(data->pwm_period_ns / data->max_brightness);
@@ -172,6 +179,8 @@ static int pwm_backlight_suspend(struct platform_device *pdev,
 		pb->notify(pb->dev, 0);
 	pwm_config(pb->pwm, 0, pb->period);
 	pwm_disable(pb->pwm);
+	if (pb->notify_after)
+		pb->notify_after(pb->dev, 0);
 	return 0;
 }
 
diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index 5e3e25a..63d2df4 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -14,6 +14,7 @@ struct platform_pwm_backlight_data {
 	unsigned int pwm_period_ns;
 	int (*init)(struct device *dev);
 	int (*notify)(struct device *dev, int brightness);
+	void (*notify_after)(struct device *dev, int brightness);
 	void (*exit)(struct device *dev);
 	int (*check_fb)(struct device *dev, struct fb_info *info);
 };
-- 
cgit v1.1


From c53252b780e26c73c6a4e40bc14179447504cccd Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Aug 2011 15:59:18 -0700
Subject: backlight: fix module alias prefix for adp8870_bl

This is an i2c driver, not a platform driver, thus use "i2c" prefix for
the module alias.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/adp8870_bl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c
index 05a8832..d06886a 100644
--- a/drivers/video/backlight/adp8870_bl.c
+++ b/drivers/video/backlight/adp8870_bl.c
@@ -1009,4 +1009,4 @@ module_exit(adp8870_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("ADP8870 Backlight driver");
-MODULE_ALIAS("platform:adp8870-backlight");
+MODULE_ALIAS("i2c:adp8870-backlight");
-- 
cgit v1.1


From b89d5f17d4b02ae9f3a691c2cb260e1929c6261b Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Aug 2011 15:59:19 -0700
Subject: drivers/misc/fsa9480.c: fix a leak of the IRQ during init failure

Make sure we are passing the same cookie in all calls to
request_threaded_irq() and free_irq().

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Cc: Donggeun Kim <dg77.kim@samsung.com>
Cc: Minkyu Kang <mk7.kang@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/fsa9480.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c
index 5325a7e..27dc0d2 100644
--- a/drivers/misc/fsa9480.c
+++ b/drivers/misc/fsa9480.c
@@ -455,7 +455,7 @@ static int __devinit fsa9480_probe(struct i2c_client *client,
 
 fail2:
 	if (client->irq)
-		free_irq(client->irq, NULL);
+		free_irq(client->irq, usbsw);
 fail1:
 	i2c_set_clientdata(client, NULL);
 	kfree(usbsw);
@@ -466,7 +466,7 @@ static int __devexit fsa9480_remove(struct i2c_client *client)
 {
 	struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
 	if (client->irq)
-		free_irq(client->irq, NULL);
+		free_irq(client->irq, usbsw);
 	i2c_set_clientdata(client, NULL);
 
 	sysfs_remove_group(&client->dev.kobj, &fsa9480_group);
-- 
cgit v1.1


From 37b7bf67c36d3a2b426c0cb2787d948949574103 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Aug 2011 15:59:20 -0700
Subject: drivers/misc/ab8500-pwm.c: fix modalias

Since 43cc71eed12 ("platform: prefix MODALIAS with "platform:""), the
platform modalias is prefixed with "platform:".

This patch changes the MODULE_ALIAS to "platform:ab8500-pwm".

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Arun Murthy <arun.murthy@stericsson.com>
Cc: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/ab8500-pwm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ab8500-pwm.c b/drivers/misc/ab8500-pwm.c
index 54e3d05..3590315 100644
--- a/drivers/misc/ab8500-pwm.c
+++ b/drivers/misc/ab8500-pwm.c
@@ -164,5 +164,5 @@ subsys_initcall(ab8500_pwm_init);
 module_exit(ab8500_pwm_exit);
 MODULE_AUTHOR("Arun MURTHY <arun.murthy@stericsson.com>");
 MODULE_DESCRIPTION("AB8500 Pulse Width Modulation Driver");
-MODULE_ALIAS("AB8500 PWM driver");
+MODULE_ALIAS("platform:ab8500-pwm");
 MODULE_LICENSE("GPL v2");
-- 
cgit v1.1


From 02016bc0381c4af9153b4e4008edfadae2bd6abc Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 25 Aug 2011 15:59:20 -0700
Subject: cris: add arch/cris/include/asm/serial.h

Fix the following build errors:

  drivers/tty/serial/8250_early.c:160: error: 'BASE_BAUD' undeclared (first use in this function): 1 errors in 1 logs
  drivers/tty/serial/8250_early.c:37:24: error: asm/serial.h: No such file or directory: 1 errors in 1 logs

I am not sure if (1843200 / 16) is suitable for cris, but most other
arch's define it as this value.

Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/cris/include/asm/serial.h | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 arch/cris/include/asm/serial.h

diff --git a/arch/cris/include/asm/serial.h b/arch/cris/include/asm/serial.h
new file mode 100644
index 0000000..af7535a
--- /dev/null
+++ b/arch/cris/include/asm/serial.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_SERIAL_H
+#define _ASM_SERIAL_H
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ */
+#define BASE_BAUD (1843200 / 16)
+
+#endif /* _ASM_SERIAL_H */
-- 
cgit v1.1


From 1424e21f66f4c51c31ba6ac188df46b43f51556b Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Thu, 25 Aug 2011 15:59:21 -0700
Subject: drivers/leds/leds-bd2802.c: bd2802_unregister_led_classdev() should
 unregister all registered leds

bd2802_unregister_led_classdev() should unregister all registered
instances of led_classdev class that had registered by
bd2802_register_led_classdev().

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Kim Kyuwon <q1.kim@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/leds/leds-bd2802.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c
index 3ebe382..ea21855 100644
--- a/drivers/leds/leds-bd2802.c
+++ b/drivers/leds/leds-bd2802.c
@@ -662,6 +662,11 @@ failed_unregister_led1_R:
 static void bd2802_unregister_led_classdev(struct bd2802_led *led)
 {
 	cancel_work_sync(&led->work);
+	led_classdev_unregister(&led->cdev_led2b);
+	led_classdev_unregister(&led->cdev_led2g);
+	led_classdev_unregister(&led->cdev_led2r);
+	led_classdev_unregister(&led->cdev_led1b);
+	led_classdev_unregister(&led->cdev_led1g);
 	led_classdev_unregister(&led->cdev_led1r);
 }
 
-- 
cgit v1.1


From 4e8896cde182b4eab6f2d0af9b6eef87720fae0d Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Thu, 25 Aug 2011 15:59:22 -0700
Subject: drivers/rtc/rtc-s3c.c: correct debug messages

RTC-S3C used to print out debug messages incorrectly.  This patch
corrects incorrect outputs.  (undecoded bcd numbers, incorrectly decoded
register values)

This patch affects the pr-debug messages only.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Changhwan Youn <chaos.youn@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-s3c.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 9329dbb..067207a 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -152,10 +152,6 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 		goto retry_get_time;
 	}
 
-	pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n",
-		 1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
-		 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
-
 	rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
 	rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
 	rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
@@ -164,6 +160,11 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
 
 	rtc_tm->tm_year += 100;
+
+	pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n",
+		 1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
+		 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
+
 	rtc_tm->tm_mon -= 1;
 
 	clk_disable(rtc_clk);
@@ -269,10 +270,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	clk_enable(rtc_clk);
 	pr_debug("s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
 		 alrm->enabled,
-		 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
+		 1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
 		 tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-
 	alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
 	writeb(0x00, base + S3C2410_RTCALM);
 
-- 
cgit v1.1


From 62d1760180c84cba68cc83696fa0bde0593007bd Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Thu, 25 Aug 2011 15:59:24 -0700
Subject: drivers/rtc/rtc-s3c.c: allow multiple open / allow no-ioctl-open'ed
 rtc to have irq.

The previous rtc-s3c had two issues related with its IRQ.

1. Users cannot open rtc multiple times because an open operation
   calls request_irq on the same IRQ.  (e.g., two user processes wants to
   open and read RTC time from rtc-s3c at the same time)

2. If alarm is set and no one has the rtc opened with filesystem
   (either the alarm is set by kernel/boot-loader or user set an alarm and
   closed rtc dev file), the pending bit is not cleared and no further
   interrupt is invoked.  When the alarm is used by the system itself such
   as a resume from suspend-to-RAM or other Low-power modes/idle, this is
   a critical issue.

This patch mitigates these issues by calling request_irq at probe and
free_irq at remove.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Changhwan Youn <chaos.youn@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-s3c.c | 67 +++++++++++++++++++--------------------------------
 1 file changed, 25 insertions(+), 42 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index 067207a..4e7c04e 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -319,49 +319,7 @@ static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
 	return 0;
 }
 
-static int s3c_rtc_open(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_device *rtc_dev = platform_get_drvdata(pdev);
-	int ret;
-
-	ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
-			  IRQF_DISABLED,  "s3c2410-rtc alarm", rtc_dev);
-
-	if (ret) {
-		dev_err(dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
-		return ret;
-	}
-
-	ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
-			  IRQF_DISABLED,  "s3c2410-rtc tick", rtc_dev);
-
-	if (ret) {
-		dev_err(dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
-		goto tick_err;
-	}
-
-	return ret;
-
- tick_err:
-	free_irq(s3c_rtc_alarmno, rtc_dev);
-	return ret;
-}
-
-static void s3c_rtc_release(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_device *rtc_dev = platform_get_drvdata(pdev);
-
-	/* do not clear AIE here, it may be needed for wake */
-
-	free_irq(s3c_rtc_alarmno, rtc_dev);
-	free_irq(s3c_rtc_tickno, rtc_dev);
-}
-
 static const struct rtc_class_ops s3c_rtcops = {
-	.open		= s3c_rtc_open,
-	.release	= s3c_rtc_release,
 	.read_time	= s3c_rtc_gettime,
 	.set_time	= s3c_rtc_settime,
 	.read_alarm	= s3c_rtc_getalarm,
@@ -425,6 +383,9 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
 {
 	struct rtc_device *rtc = platform_get_drvdata(dev);
 
+	free_irq(s3c_rtc_alarmno, rtc);
+	free_irq(s3c_rtc_tickno, rtc);
+
 	platform_set_drvdata(dev, NULL);
 	rtc_device_unregister(rtc);
 
@@ -548,10 +509,32 @@ static int __devinit s3c_rtc_probe(struct platform_device *pdev)
 
 	s3c_rtc_setfreq(&pdev->dev, 1);
 
+	ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
+			  IRQF_DISABLED,  "s3c2410-rtc alarm", rtc);
+	if (ret) {
+		dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
+		goto err_alarm_irq;
+	}
+
+	ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
+			  IRQF_DISABLED,  "s3c2410-rtc tick", rtc);
+	if (ret) {
+		dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
+		free_irq(s3c_rtc_alarmno, rtc);
+		goto err_tick_irq;
+	}
+
 	clk_disable(rtc_clk);
 
 	return 0;
 
+ err_tick_irq:
+	free_irq(s3c_rtc_alarmno, rtc);
+
+ err_alarm_irq:
+	platform_set_drvdata(pdev, NULL);
+	rtc_device_unregister(rtc);
+
  err_nortc:
 	s3c_rtc_enable(pdev, 0);
 	clk_disable(rtc_clk);
-- 
cgit v1.1


From 671ee7f0ce62e4b991b47fcf1c161c3f710dabbc Mon Sep 17 00:00:00 2001
From: Liu Gang-B34182 <B34182@freescale.com>
Date: Thu, 25 Aug 2011 15:59:25 -0700
Subject: arch/powerpc/sysdev/fsl_rio.c: correct IECSR register clear value

This bug causes the IECSR register clear failure.  In this case, the RETE
(retry error threshold exceeded) interrupt will be generated and cannot be
cleared.  So the related ISR may be called persistently.

The RETE bit in IECSR is cleared by writing a 1 to it.

Signed-off-by: Liu Gang <Gang.Liu@freescale.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/sysdev/fsl_rio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 2de8551..c65f75a 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -54,6 +54,7 @@
 #define ODSR_CLEAR		0x1c00
 #define LTLEECSR_ENABLE_ALL	0xFFC000FC
 #define ESCSR_CLEAR		0x07120204
+#define IECSR_CLEAR		0x80000000
 
 #define RIO_PORT1_EDCSR		0x0640
 #define RIO_PORT2_EDCSR		0x0680
@@ -1089,11 +1090,11 @@ static void port_error_handler(struct rio_mport *port, int offset)
 
 	if (offset == 0) {
 		out_be32((u32 *)(rio_regs_win + RIO_PORT1_EDCSR), 0);
-		out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), 0);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), IECSR_CLEAR);
 		out_be32((u32 *)(rio_regs_win + RIO_ESCSR), ESCSR_CLEAR);
 	} else {
 		out_be32((u32 *)(rio_regs_win + RIO_PORT2_EDCSR), 0);
-		out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), 0);
+		out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), IECSR_CLEAR);
 		out_be32((u32 *)(rio_regs_win + RIO_PORT2_ESCSR), ESCSR_CLEAR);
 	}
 }
-- 
cgit v1.1


From a94cc4e6c0a26a7c8f79a432ab2c89534aa674d5 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 26 Aug 2011 12:20:59 +0100
Subject: sfi: table irq 0xFF means 'no interrupt'

According to the SFI specification irq number 0xFF means device has no
interrupt or interrupt attached via GPIO.

Currently, we don't handle this special case and set irq field in
*_board_info structs to 255.  It leads to confusion in some drivers.
Accelerometer driver tries to register interrupt 255, fails and prints
"Cannot get IRQ" to dmesg.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/platform/mrst/mrst.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 7000e74b..58425ad 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -689,7 +689,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 			irq_attr.trigger = 1;
 			irq_attr.polarity = 1;
 			io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
-		}
+		} else
+			pentry->irq = 0; /* No irq */
+
 		switch (pentry->type) {
 		case SFI_DEV_TYPE_IPC:
 			/* ID as IRQ is a hack that will go away */
-- 
cgit v1.1


From f5b940997397229975ea073679b03967932a541b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 26 Aug 2011 18:03:11 -0400
Subject: All Arch: remove linkage for sys_nfsservctl system call

The nfsservctl system call is now gone, so we should remove all
linkage for it.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/systbls.S            | 2 +-
 arch/arm/kernel/calls.S                | 2 +-
 arch/avr32/kernel/syscall_table.S      | 2 +-
 arch/blackfin/mach-common/entry.S      | 2 +-
 arch/cris/arch-v10/kernel/entry.S      | 2 +-
 arch/cris/arch-v32/kernel/entry.S      | 2 +-
 arch/frv/kernel/entry.S                | 2 +-
 arch/h8300/kernel/syscalls.S           | 2 +-
 arch/ia64/kernel/entry.S               | 2 +-
 arch/m32r/kernel/syscall_table.S       | 2 +-
 arch/m68k/kernel/syscalltable.S        | 2 +-
 arch/microblaze/kernel/syscall_table.S | 2 +-
 arch/mips/kernel/scall32-o32.S         | 2 +-
 arch/mips/kernel/scall64-64.S          | 2 +-
 arch/mips/kernel/scall64-n32.S         | 2 +-
 arch/mips/kernel/scall64-o32.S         | 2 +-
 arch/mn10300/kernel/entry.S            | 2 +-
 arch/s390/kernel/compat_wrapper.S      | 6 ------
 arch/s390/kernel/syscalls.S            | 2 +-
 arch/sh/kernel/syscalls_32.S           | 2 +-
 arch/sh/kernel/syscalls_64.S           | 2 +-
 arch/sparc/kernel/sys32.S              | 1 -
 arch/sparc/kernel/systbls_32.S         | 2 +-
 arch/sparc/kernel/systbls_64.S         | 2 +-
 arch/x86/ia32/ia32entry.S              | 2 +-
 arch/x86/include/asm/unistd_64.h       | 2 +-
 arch/x86/kernel/syscall_table_32.S     | 2 +-
 arch/xtensa/include/asm/unistd.h       | 2 +-
 fs/compat.c                            | 5 -----
 include/asm-generic/unistd.h           | 2 +-
 include/linux/compat.h                 | 1 -
 include/linux/syscalls.h               | 3 ---
 kernel/sys_ni.c                        | 1 -
 33 files changed, 27 insertions(+), 44 deletions(-)

diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index b9c28f3..6acea1f 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -360,7 +360,7 @@ sys_call_table:
 	.quad sys_newuname
 	.quad sys_nanosleep			/* 340 */
 	.quad sys_mremap
-	.quad sys_nfsservctl
+	.quad sys_ni_syscall			/* old nfsservctl */
 	.quad sys_setresuid
 	.quad sys_getresuid
 	.quad sys_pciconfig_read		/* 345 */
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 80f7896..9943e9e 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -178,7 +178,7 @@
 		CALL(sys_ni_syscall)		/* vm86 */
 		CALL(sys_ni_syscall)		/* was sys_query_module */
 		CALL(sys_poll)
-		CALL(sys_nfsservctl)
+		CALL(sys_ni_syscall)		/* was nfsservctl */
 /* 170 */	CALL(sys_setresgid16)
 		CALL(sys_getresgid16)
 		CALL(sys_prctl)
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index c7fd394..6eba535 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -158,7 +158,7 @@ sys_call_table:
 	.long	sys_sched_rr_get_interval
 	.long	sys_nanosleep
 	.long	sys_poll
-	.long	sys_nfsservctl		/* 145 */
+	.long	sys_ni_syscall		/* 145 was nfsservctl */
 	.long	sys_setresgid
 	.long	sys_getresgid
 	.long	sys_prctl
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 225d311..e413729 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1543,7 +1543,7 @@ ENTRY(_sys_call_table)
 	.long _sys_ni_syscall	/* for vm86 */
 	.long _sys_ni_syscall	/* old "query_module" */
 	.long _sys_ni_syscall	/* sys_poll */
-	.long _sys_nfsservctl
+	.long _sys_ni_syscall   /* old nfsservctl */
 	.long _sys_setresgid	/* setresgid16 */	/* 170 */
 	.long _sys_getresgid	/* getresgid16 */
 	.long _sys_prctl
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 1161883..592fbe9 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -771,7 +771,7 @@ sys_call_table:
 	.long sys_ni_syscall	/* sys_vm86 */
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall    /* old nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 84fed7e..c3ea4694 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -714,7 +714,7 @@ sys_call_table:
 	.long sys_ni_syscall	/* sys_vm86 */
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* Old nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 017d6d7..5ba23f7 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1358,7 +1358,7 @@ sys_call_table:
 	.long sys_ni_syscall	/* for vm86 */
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* Old nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index f4b2e67..4be2ea2 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -183,7 +183,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
 	.long SYMBOL_NAME(sys_ni_syscall)	/* for vm86 */
 	.long SYMBOL_NAME(sys_ni_syscall)	/* sys_query_module */
 	.long SYMBOL_NAME(sys_poll)
-	.long SYMBOL_NAME(sys_nfsservctl)
+	.long SYMBOL_NAME(sys_ni_syscall)	/* old nfsservctl */
 	.long SYMBOL_NAME(sys_setresgid16)	/* 170 */
 	.long SYMBOL_NAME(sys_getresgid16)
 	.long SYMBOL_NAME(sys_prctl)
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 97dd2ab..198c753 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1614,7 +1614,7 @@ sys_call_table:
 	data8 sys_sched_get_priority_min
 	data8 sys_sched_rr_get_interval
 	data8 sys_nanosleep
-	data8 sys_nfsservctl
+	data8 sys_ni_syscall			// old nfsservctl
 	data8 sys_prctl				// 1170
 	data8 sys_getpagesize
 	data8 sys_mmap2
diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S
index 528f2e6..f365c19 100644
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
 	.long sys_tas			/* vm86 syscall holder */
 	.long sys_ni_syscall		/* query_module syscall holder */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall		/* was nfsservctl */
 	.long sys_setresgid		/* 170 */
 	.long sys_getresgid
 	.long sys_prctl
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 00d1452..c468f2e 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -189,7 +189,7 @@ ENTRY(sys_call_table)
 	.long sys_getpagesize
 	.long sys_ni_syscall		/* old "query_module" */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall		/* old nfsservctl */
 	.long sys_setresgid16		/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index d915a12..8789daa 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -173,7 +173,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall		/* sys_vm86 */
 	.long sys_ni_syscall		/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall		/* old nfsservctl */
 	.long sys_setresgid		/* 170 */
 	.long sys_getresgid
 	.long sys_prctl
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index e521420..865bc7a 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -424,7 +424,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_getresuid		3
 	sys	sys_ni_syscall		0	/* was sys_query_module */
 	sys	sys_poll		3
-	sys	sys_nfsservctl		3
+	sys	sys_ni_syscall		0	/* was nfsservctl */
 	sys	sys_setresgid		3	/* 4190 */
 	sys	sys_getresgid		3
 	sys	sys_prctl		5
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 85874d6..fb7334b 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -299,7 +299,7 @@ sys_call_table:
 	PTR	sys_ni_syscall			/* 5170, was get_kernel_syms */
 	PTR	sys_ni_syscall			/* was query_module */
 	PTR	sys_quotactl
-	PTR	sys_nfsservctl
+	PTR	sys_ni_syscall			/* was nfsservctl */
 	PTR	sys_ni_syscall			/* res. for getpmsg */
 	PTR	sys_ni_syscall			/* 5175  for putpmsg */
 	PTR	sys_ni_syscall			/* res. for afs_syscall */
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index b85842f..f9296e8 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -294,7 +294,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_ni_syscall			/* 6170, was get_kernel_syms */
 	PTR	sys_ni_syscall			/* was query_module */
 	PTR	sys_quotactl
-	PTR	compat_sys_nfsservctl
+	PTR	sys_ni_syscall			/* was nfsservctl */
 	PTR	sys_ni_syscall			/* res. for getpmsg */
 	PTR	sys_ni_syscall			/* 6175  for putpmsg */
 	PTR	sys_ni_syscall			/* res. for afs_syscall */
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 46c4763..4d7c982 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -392,7 +392,7 @@ sys_call_table:
 	PTR	sys_getresuid
 	PTR	sys_ni_syscall			/* was query_module */
 	PTR	sys_poll
-	PTR	compat_sys_nfsservctl
+	PTR	sys_ni_syscall			/* was nfsservctl */
 	PTR	sys_setresgid			/* 4190 */
 	PTR	sys_getresgid
 	PTR	sys_prctl
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index ae435e1..3e3620d 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -589,7 +589,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* vm86 */
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* was nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 08ab9aa..7526db6 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -665,12 +665,6 @@ ENTRY(sys32_poll_wrapper)
 	lgfr	%r4,%r4			# long
 	jg	sys_poll		# branch to system call
 
-ENTRY(compat_sys_nfsservctl_wrapper)
-	lgfr	%r2,%r2			# int
-	llgtr	%r3,%r3			# struct compat_nfsctl_arg*
-	llgtr	%r4,%r4			# union compat_nfsctl_res*
-	jg	compat_sys_nfsservctl	# branch to system call
-
 ENTRY(sys32_setresgid16_wrapper)
 	llgfr	%r2,%r2			# __kernel_old_gid_emu31_t
 	llgfr	%r3,%r3			# __kernel_old_gid_emu31_t
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 6ee39ef..73eb08c 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -177,7 +177,7 @@ SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper)	/* 165 old get
 NI_SYSCALL							/* for vm86 */
 NI_SYSCALL							/* old sys_query_module */
 SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper)
-SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper)
+NI_SYSCALL							/* old nfsservctl */
 SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper)	/* 170 old setresgid16 syscall */
 SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper)	/* old getresgid16 syscall */
 SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper)
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 39b051d..293e39c 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -185,7 +185,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* vm86 */
 	.long sys_ni_syscall	/* old "query_module" */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* was nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 089c4d8..ceb34b9 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -189,7 +189,7 @@ sys_call_table:
 	.long sys_ni_syscall	/* vm86 */
 	.long sys_ni_syscall	/* old "query_module" */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* was nfsservctl */
 	.long sys_setresgid16		/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index 44e5faf..d97f3eb 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -81,7 +81,6 @@ SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4)
 SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5)
 SIGN2(sys32_bdflush, sys_bdflush, %o0, %o1)
 SIGN1(sys32_mlockall, sys_mlockall, %o0)
-SIGN1(sys32_nfsservctl, compat_sys_nfsservctl, %o0)
 SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1)
 SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1)
 SIGN1(sys32_io_submit, compat_sys_io_submit, %o1)
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 6e492d5..09d8ec4 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -67,7 +67,7 @@ sys_call_table:
 /*235*/	.long sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
 /*240*/	.long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
 /*245*/	.long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
-/*250*/	.long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+/*250*/	.long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
 /*255*/	.long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
 /*260*/	.long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
 /*265*/	.long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index f566518..c9296ab 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -145,7 +145,7 @@ sys_call_table:
 	.word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
 /*240*/	.word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
 	.word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
-/*250*/	.word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+/*250*/	.word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
 	.word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
 /*260*/	.word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
 	.word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a0e866d..54edb207 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -672,7 +672,7 @@ ia32_sys_call_table:
 	.quad sys32_vm86_warning	/* vm86 */ 
 	.quad quiet_ni_syscall	/* query_module */
 	.quad sys_poll
-	.quad compat_sys_nfsservctl
+	.quad quiet_ni_syscall /* old nfsservctl */
 	.quad sys_setresgid16	/* 170 */
 	.quad sys_getresgid16
 	.quad sys_prctl
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d92641c..2010405 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -414,7 +414,7 @@ __SYSCALL(__NR_query_module, sys_ni_syscall)
 __SYSCALL(__NR_quotactl, sys_quotactl)
 
 #define __NR_nfsservctl				180
-__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
 
 /* reserved for LiS/STREAMS */
 #define __NR_getpmsg				181
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index fbb0a04..bc19be3 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
 	.long ptregs_vm86
 	.long sys_ni_syscall	/* Old sys_query_module */
 	.long sys_poll
-	.long sys_nfsservctl
+	.long sys_ni_syscall	/* Old nfsservctl */
 	.long sys_setresgid16	/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index a6f934f..798ee6d 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -455,7 +455,7 @@ __SYSCALL(203, sys_reboot, 3)
 #define __NR_quotactl 				204
 __SYSCALL(204, sys_quotactl, 4)
 #define __NR_nfsservctl 			205
-__SYSCALL(205, sys_nfsservctl, 3)
+__SYSCALL(205, sys_ni_syscall, 0)
 #define __NR__sysctl 				206
 __SYSCALL(206, sys_sysctl, 1)
 #define __NR_bdflush 				207
diff --git a/fs/compat.c b/fs/compat.c
index 0b48d01..58b1da4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1675,11 +1675,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 }
 #endif /* HAVE_SET_RESTORE_SIGMASK */
 
-long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
-{
-	return sys_ni_syscall();
-}
-
 #ifdef CONFIG_EPOLL
 
 #ifdef HAVE_SET_RESTORE_SIGMASK
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 4f769593..f4c38d8c 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -143,7 +143,7 @@ __SYSCALL(__NR_pivot_root, sys_pivot_root)
 
 /* fs/nfsctl.c */
 #define __NR_nfsservctl 42
-__SC_COMP(__NR_nfsservctl, sys_nfsservctl, compat_sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
 
 /* fs/open.c */
 #define __NR3264_statfs 43
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 8779405..c6e7523 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -438,7 +438,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 				 struct compat_timespec __user *tsp,
 				 const compat_sigset_t __user *sigmask,
 				 compat_size_t sigsetsize);
-asmlinkage long compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
 asmlinkage long compat_sys_signalfd4(int ufd,
 				     const compat_sigset_t __user *sigmask,
 				     compat_size_t sigsetsize, int flags);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8c03b98..1ff0ec2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -702,9 +702,6 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args);
 asmlinkage long sys_sysinfo(struct sysinfo __user *info);
 asmlinkage long sys_sysfs(int option,
 				unsigned long arg1, unsigned long arg2);
-asmlinkage long sys_nfsservctl(int cmd,
-				struct nfsctl_arg __user *arg,
-				void __user *res);
 asmlinkage long sys_syslog(int type, char __user *buf, int len);
 asmlinkage long sys_uselib(const char __user *library);
 asmlinkage long sys_ni_syscall(void);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 62cbc88..a9a5de0 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void)
 	return -ENOSYS;
 }
 
-cond_syscall(sys_nfsservctl);
 cond_syscall(sys_quotactl);
 cond_syscall(sys32_quotactl);
 cond_syscall(sys_acct);
-- 
cgit v1.1


From d0168fdc7a1301c8139eb63a465038f63e342b39 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Sat, 27 Aug 2011 14:21:00 +0200
Subject: ARM: mach-shmobile: sh7372 LCDC1 suspend fix V2 (incremental)

This patch updates the recently submitted
"Associate the HDMI clock together with LCDC1 on sh7372"
to V2 with the following change:
 - Use lcdc1_device on AP4EVB to build properly.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/board-ap4evb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index fadbe5b..523f608 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -1412,7 +1412,7 @@ static void __init ap4evb_init(void)
 	fsi_init_pm_clock();
 	sh7372_pm_init();
 	pm_clk_add(&fsi_device.dev, "spu2");
-	pm_clk_add(&hdmi_lcdc_device.dev, "hdmi");
+	pm_clk_add(&lcdc1_device.dev, "hdmi");
 }
 
 static void __init ap4evb_timer_init(void)
-- 
cgit v1.1


From c6a389f123b9f68d605bb7e0f9b32ec1e3e14132 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Aug 2011 21:16:01 -0700
Subject: Linux 3.1-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 788511f..c3e90c5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = "Divemaster Edition"
 
 # *DOCUMENTATION*
-- 
cgit v1.1


From 10167873a415ba642aa2eee0c310ebd5a4633573 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Sun, 4 Sep 2011 20:20:53 -0600
Subject: OMAP2430: hwmod: musb: add missing terminator to
 omap2430_usbhsotg_addrs[]

Add a missing array terminator to omap2430_usbhsotg_addrs[].  Without
this terminator, the omap_hwmod resource building code runs off the
end of the array, resulting in at least this error -- if not worse
behavior:

[    0.578002] musb-omap2430: failed to claim resource 4
[    0.583465] omap_device: musb-omap2430: build failed (-16)
[    0.589294] Could not build omap_device for musb-omap2430 usb_otg_hs

This should have been part of commit
78183f3fdf76f422431a81852468be01b36db325 ("omap_hwmod: use a null
structure record to terminate omap_hwmod_addr_space arrays") but was
evidently missed.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/omap_hwmod_2430_data.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index 16743c7..408193d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -192,6 +192,7 @@ static struct omap_hwmod_addr_space omap2430_usbhsotg_addrs[] = {
 		.pa_end		= OMAP243X_HS_BASE + SZ_4K - 1,
 		.flags		= ADDR_TYPE_RT
 	},
+	{ }
 };
 
 /*  l4_core ->usbhsotg  interface */
-- 
cgit v1.1