summaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.h
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-03-26 09:16:45 +1100
committerDave Chinner <david@fromorbit.com>2011-03-26 09:16:45 +1100
commit0e6e847ffe37436e331c132639f9f872febce82e (patch)
treeeb440ef910af695eafef787e12badc64fac0f8fe /fs/xfs/linux-2.6/xfs_buf.h
parent704b2907c2d47ceb187c0e25a6bbc2174b198f2f (diff)
downloadop-kernel-dev-0e6e847ffe37436e331c132639f9f872febce82e.zip
op-kernel-dev-0e6e847ffe37436e331c132639f9f872febce82e.tar.gz
xfs: stop using the page cache to back the buffer cache
Now that the buffer cache has it's own LRU, we do not need to use the page cache to provide persistent caching and reclaim infrastructure. Convert the buffer cache to use alloc_pages() instead of the page cache. This will remove all the overhead of page cache management from setup and teardown of the buffers, as well as needing to mark pages accessed as we find buffers in the buffer cache. By avoiding the page cache, we also remove the need to keep state in the page_private(page) field for persistant storage across buffer free/buffer rebuild and so all that code can be removed. This also fixes the long-standing problem of not having enough bits in the page_private field to track all the state needed for a 512 sector/64k page setup. It also removes the need for page locking during reads as the pages are unique to the buffer and nobody else will be attempting to access them. Finally, it removes the buftarg address space lock as a point of global contention on workloads that allocate and free buffers quickly such as when creating or removing large numbers of inodes in parallel. This remove the 16TB limit on filesystem size on 32 bit machines as the page index (32 bit) is no longer used for lookups of metadata buffers - the buffer cache is now solely indexed by disk address which is stored in a 64 bit field in the buffer. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.h')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h40
1 files changed, 4 insertions, 36 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index cbe6595..a9a1c45 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -61,30 +61,11 @@ typedef enum {
#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
/* flags used only internally */
-#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
+#define _XBF_KMEM (1 << 20)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
-/*
- * Special flag for supporting metadata blocks smaller than a FSB.
- *
- * In this case we can have multiple xfs_buf_t on a single page and
- * need to lock out concurrent xfs_buf_t readers as they only
- * serialise access to the buffer.
- *
- * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
- * between reads of the page. Hence we can have one thread read the
- * page and modify it, but then race with another thread that thinks
- * the page is not up-to-date and hence reads it again.
- *
- * The result is that the first modifcation to the page is lost.
- * This sort of AGF/AGI reading race can happen when unlinking inodes
- * that require truncation and results in the AGI unlinked list
- * modifications being lost.
- */
-#define _XBF_PAGE_LOCKED (1 << 22)
-
typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_LOCK, "LOCK" }, /* should never be set */\
{ XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
{ XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
- { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
- { _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }
-
+ { _XBF_KMEM, "KMEM" }, \
+ { _XBF_DELWRI_Q, "DELWRI_Q" }
typedef enum {
XBT_FORCE_SLEEP = 0,
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash {
typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
- struct address_space *bt_mapping;
+ struct backing_dev_info *bt_bdi;
struct xfs_mount *bt_mount;
unsigned int bt_bsize;
unsigned int bt_sshift;
@@ -139,17 +118,6 @@ typedef struct xfs_buftarg {
unsigned int bt_lru_nr;
} xfs_buftarg_t;
-/*
- * xfs_buf_t: Buffer structure for pagecache-based buffers
- *
- * This buffer structure is used by the pagecache buffer management routines
- * to refer to an assembly of pages forming a logical buffer.
- *
- * The buffer structure is used on a temporary basis only, and discarded when
- * released. The real data storage is recorded in the pagecache. Buffers are
- * hashed to the block device on which the file system resides.
- */
-
struct xfs_buf;
typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
OpenPOWER on IntegriCloud