summaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c82
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c79
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h64
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c68
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c449
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c130
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c81
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h62
12 files changed, 645 insertions, 415 deletions
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 39f4f80..44ce516 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -114,6 +114,8 @@ xfs_get_acl(struct inode *inode, int type)
if (acl != ACL_NOT_CACHED)
return acl;
+ trace_xfs_get_acl(ip);
+
switch (type) {
case ACL_TYPE_ACCESS:
ea_name = SGI_ACL_FILE;
@@ -218,40 +220,6 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
}
-int
-xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
-{
- struct xfs_inode *ip;
- struct posix_acl *acl;
- int error = -EAGAIN;
-
- ip = XFS_I(inode);
- trace_xfs_check_acl(ip);
-
- /*
- * If there is no attribute fork no ACL exists on this inode and
- * we can skip the whole exercise.
- */
- if (!XFS_IFORK_Q(ip))
- return -EAGAIN;
-
- if (flags & IPERM_FLAG_RCU) {
- if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
- return -ECHILD;
- return -EAGAIN;
- }
-
- acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- }
-
- return error;
-}
-
static int
xfs_set_mode(struct inode *inode, mode_t mode)
{
@@ -264,7 +232,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
iattr.ia_mode = mode;
iattr.ia_ctime = current_fs_time(inode->i_sb);
- error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+ error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
}
return error;
@@ -297,29 +265,23 @@ posix_acl_default_exists(struct inode *inode)
* No need for i_mutex because the inode is not yet exposed to the VFS.
*/
int
-xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
+xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
{
- struct posix_acl *clone;
- mode_t mode;
+ mode_t mode = inode->i_mode;
int error = 0, inherit = 0;
if (S_ISDIR(inode->i_mode)) {
- error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
+ error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
if (error)
- return error;
+ goto out;
}
- clone = posix_acl_clone(default_acl, GFP_KERNEL);
- if (!clone)
- return -ENOMEM;
-
- mode = inode->i_mode;
- error = posix_acl_create_masq(clone, &mode);
+ error = posix_acl_create(&acl, GFP_KERNEL, &mode);
if (error < 0)
- goto out_release_clone;
+ return error;
/*
- * If posix_acl_create_masq returns a positive value we need to
+ * If posix_acl_create returns a positive value we need to
* inherit a permission that can't be represented using the Unix
* mode bits and we actually need to set an ACL.
*/
@@ -328,20 +290,20 @@ xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
error = xfs_set_mode(inode, mode);
if (error)
- goto out_release_clone;
+ goto out;
if (inherit)
- error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+ error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
- out_release_clone:
- posix_acl_release(clone);
+out:
+ posix_acl_release(acl);
return error;
}
int
xfs_acl_chmod(struct inode *inode)
{
- struct posix_acl *acl, *clone;
+ struct posix_acl *acl;
int error;
if (S_ISLNK(inode->i_mode))
@@ -351,16 +313,12 @@ xfs_acl_chmod(struct inode *inode)
if (IS_ERR(acl) || !acl)
return PTR_ERR(acl);
- clone = posix_acl_clone(acl, GFP_KERNEL);
- posix_acl_release(acl);
- if (!clone)
- return -ENOMEM;
-
- error = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!error)
- error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+ error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ if (error)
+ return error;
- posix_acl_release(clone);
+ error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+ posix_acl_release(acl);
return error;
}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7559861..63e971e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
isize = xfs_ioend_new_eof(ioend);
if (isize) {
+ trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
ip->i_d.di_size = isize;
xfs_mark_inode_dirty(ip);
}
@@ -894,11 +895,6 @@ out_invalidate:
* For unwritten space on the page we need to start the conversion to
* regular allocated space.
* For any other dirty buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush the page, we
- * have to check the process flags first, if we are already in a transaction
- * or disk I/O during allocations is off, we need to fail the writepage and
- * redirty the page.
*/
STATIC int
xfs_vm_writepage(
@@ -906,7 +902,6 @@ xfs_vm_writepage(
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
- int delalloc, unwritten;
struct buffer_head *bh, *head;
struct xfs_bmbt_irec imap;
xfs_ioend_t *ioend = NULL, *iohead = NULL;
@@ -938,15 +933,10 @@ xfs_vm_writepage(
goto redirty;
/*
- * We need a transaction if there are delalloc or unwritten buffers
- * on the page.
- *
- * If we need a transaction and the process flags say we are already
- * in a transaction, or no IO is allowed then mark the page dirty
- * again and leave the page as is.
+ * Given that we do not allow direct reclaim to call us, we should
+ * never be called while in a filesystem transaction.
*/
- xfs_count_page_state(page, &delalloc, &unwritten);
- if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
+ if (WARN_ON(current->flags & PF_FSTRANS))
goto redirty;
/* Is this page beyond the end of the file? */
@@ -1339,6 +1329,9 @@ xfs_end_io_direct_write(
} else {
xfs_finish_ioend_sync(ioend);
}
+
+ /* XXX: probably should move into the real I/O completion handler */
+ inode_dio_done(ioend->io_inode);
}
STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5e68099..b2b4119 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -499,16 +499,14 @@ found:
spin_unlock(&pag->pag_buf_lock);
xfs_perag_put(pag);
- if (xfs_buf_cond_lock(bp)) {
- /* failed, so wait for the lock if requested. */
- if (!(flags & XBF_TRYLOCK)) {
- xfs_buf_lock(bp);
- XFS_STATS_INC(xb_get_locked_waited);
- } else {
+ if (!xfs_buf_trylock(bp)) {
+ if (flags & XBF_TRYLOCK) {
xfs_buf_rele(bp);
XFS_STATS_INC(xb_busy_locked);
return NULL;
}
+ xfs_buf_lock(bp);
+ XFS_STATS_INC(xb_get_locked_waited);
}
/*
@@ -594,10 +592,8 @@ _xfs_buf_read(
ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
- bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
- bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
+ bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
+ bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
status = xfs_buf_iorequest(bp);
if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
@@ -681,7 +677,6 @@ xfs_buf_read_uncached(
return NULL;
/* set up the buffer for a read IO */
- xfs_buf_lock(bp);
XFS_BUF_SET_ADDR(bp, daddr);
XFS_BUF_READ(bp);
XFS_BUF_BUSY(bp);
@@ -816,8 +811,6 @@ xfs_buf_get_uncached(
goto fail_free_mem;
}
- xfs_buf_unlock(bp);
-
trace_xfs_buf_get_uncached(bp, _RET_IP_);
return bp;
@@ -896,8 +889,8 @@ xfs_buf_rele(
* to push on stale inode buffers.
*/
int
-xfs_buf_cond_lock(
- xfs_buf_t *bp)
+xfs_buf_trylock(
+ struct xfs_buf *bp)
{
int locked;
@@ -907,15 +900,8 @@ xfs_buf_cond_lock(
else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
xfs_log_force(bp->b_target->bt_mount, 0);
- trace_xfs_buf_cond_lock(bp, _RET_IP_);
- return locked ? 0 : -EBUSY;
-}
-
-int
-xfs_buf_lock_value(
- xfs_buf_t *bp)
-{
- return bp->b_sema.count;
+ trace_xfs_buf_trylock(bp, _RET_IP_);
+ return locked;
}
/*
@@ -929,7 +915,7 @@ xfs_buf_lock_value(
*/
void
xfs_buf_lock(
- xfs_buf_t *bp)
+ struct xfs_buf *bp)
{
trace_xfs_buf_lock(bp, _RET_IP_);
@@ -950,7 +936,7 @@ xfs_buf_lock(
*/
void
xfs_buf_unlock(
- xfs_buf_t *bp)
+ struct xfs_buf *bp)
{
if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
atomic_inc(&bp->b_hold);
@@ -1121,7 +1107,7 @@ xfs_bioerror_relse(
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_DONE(bp);
XFS_BUF_STALE(bp);
- XFS_BUF_CLR_IODONE_FUNC(bp);
+ bp->b_iodone = NULL;
if (!(fl & XBF_ASYNC)) {
/*
* Mark b_error and B_ERROR _both_.
@@ -1223,23 +1209,21 @@ _xfs_buf_ioapply(
total_nr_pages = bp->b_page_count;
map_i = 0;
- if (bp->b_flags & XBF_ORDERED) {
- ASSERT(!(bp->b_flags & XBF_READ));
- rw = WRITE_FLUSH_FUA;
- } else if (bp->b_flags & XBF_LOG_BUFFER) {
- ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
- } else if (bp->b_flags & _XBF_RUN_QUEUES) {
- ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
+ if (bp->b_flags & XBF_WRITE) {
+ if (bp->b_flags & XBF_SYNCIO)
+ rw = WRITE_SYNC;
+ else
+ rw = WRITE;
+ if (bp->b_flags & XBF_FUA)
+ rw |= REQ_FUA;
+ if (bp->b_flags & XBF_FLUSH)
+ rw |= REQ_FLUSH;
+ } else if (bp->b_flags & XBF_READ_AHEAD) {
+ rw = READA;
} else {
- rw = (bp->b_flags & XBF_WRITE) ? WRITE :
- (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
+ rw = READ;
}
-
next_chunk:
atomic_inc(&bp->b_io_remaining);
nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
@@ -1694,15 +1678,14 @@ xfs_buf_delwri_split(
list_for_each_entry_safe(bp, n, dwq, b_list) {
ASSERT(bp->b_flags & XBF_DELWRI);
- if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
+ if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) {
if (!force &&
time_before(jiffies, bp->b_queuetime + age)) {
xfs_buf_unlock(bp);
break;
}
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
- _XBF_RUN_QUEUES);
+ bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
bp->b_flags |= XBF_WRITE;
list_move_tail(&bp->b_list, list);
trace_xfs_buf_delwri_split(bp, _RET_IP_);
@@ -1738,14 +1721,6 @@ xfs_buf_cmp(
return 0;
}
-void
-xfs_buf_delwri_sort(
- xfs_buftarg_t *target,
- struct list_head *list)
-{
- list_sort(NULL, list, xfs_buf_cmp);
-}
-
STATIC int
xfsbufd(
void *data)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 50a7d5f..6a83b46 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -46,43 +46,46 @@ typedef enum {
#define XBF_READ (1 << 0) /* buffer intended for reading from device */
#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
-#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */
+#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
+#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
-#define XBF_ORDERED (1 << 11)/* use ordered writes */
-#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
-#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
+
+/* I/O hints for the BIO layer */
+#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
/* flags used only as arguments to access routines */
-#define XBF_LOCK (1 << 14)/* lock requested */
-#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
+#define XBF_LOCK (1 << 15)/* lock requested */
+#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
/* flags used only internally */
-#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
-#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
-#define _XBF_KMEM (1 << 20)/* backed by heap memory */
-#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
+#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM (1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */
typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \
{ XBF_READ, "READ" }, \
{ XBF_WRITE, "WRITE" }, \
+ { XBF_READ_AHEAD, "READ_AHEAD" }, \
{ XBF_MAPPED, "MAPPED" }, \
{ XBF_ASYNC, "ASYNC" }, \
{ XBF_DONE, "DONE" }, \
{ XBF_DELWRI, "DELWRI" }, \
{ XBF_STALE, "STALE" }, \
- { XBF_ORDERED, "ORDERED" }, \
- { XBF_READ_AHEAD, "READ_AHEAD" }, \
+ { XBF_SYNCIO, "SYNCIO" }, \
+ { XBF_FUA, "FUA" }, \
+ { XBF_FLUSH, "FLUSH" }, \
{ XBF_LOCK, "LOCK" }, /* should never be set */\
{ XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
{ XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
{ _XBF_PAGES, "PAGES" }, \
- { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }
@@ -91,11 +94,6 @@ typedef enum {
XBT_FORCE_FLUSH = 1,
} xfs_buftarg_flags_t;
-typedef struct xfs_bufhash {
- struct list_head bh_list;
- spinlock_t bh_lock;
-} xfs_bufhash_t;
-
typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
@@ -151,7 +149,7 @@ typedef struct xfs_buf {
xfs_buf_iodone_t b_iodone; /* I/O completion function */
struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
- void *b_fspriv2;
+ struct xfs_trans *b_transp;
struct page **b_pages; /* array of page pointers */
struct page *b_page_array[XB_PAGES]; /* inline pages */
unsigned long b_queuetime; /* time buffer was queued */
@@ -192,10 +190,11 @@ extern void xfs_buf_free(xfs_buf_t *);
extern void xfs_buf_rele(xfs_buf_t *);
/* Locking and Unlocking Buffers */
-extern int xfs_buf_cond_lock(xfs_buf_t *);
-extern int xfs_buf_lock_value(xfs_buf_t *);
+extern int xfs_buf_trylock(xfs_buf_t *);
extern void xfs_buf_lock(xfs_buf_t *);
extern void xfs_buf_unlock(xfs_buf_t *);
+#define xfs_buf_islocked(bp) \
+ ((bp)->b_sema.count <= 0)
/* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
@@ -234,8 +233,9 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
-#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
- ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
+#define XFS_BUF_ZEROFLAGS(bp) \
+ ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
+ XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
@@ -267,10 +267,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
-#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)
-#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
-#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
-
#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
@@ -280,14 +276,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
-#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
-#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
-#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
-
-#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
-#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
#define XFS_BUF_SET_START(bp) do { } while (0)
#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
@@ -313,10 +301,6 @@ xfs_buf_set_ref(
#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
-#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
-#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
-#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index f4f878f..75e5d32 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -151,14 +151,14 @@ xfs_nfs_get_inode(
* We don't use ESTALE directly down the chain to not
* confuse applications using bulkstat that expect EINVAL.
*/
- if (error == EINVAL)
+ if (error == EINVAL || error == ENOENT)
error = ESTALE;
return ERR_PTR(-error);
}
if (ip->i_d.di_gen != generation) {
IRELE(ip);
- return ERR_PTR(-ENOENT);
+ return ERR_PTR(-ESTALE);
}
return VFS_I(ip);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4213ba..825390e 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -127,23 +127,44 @@ xfs_iozero(
STATIC int
xfs_file_fsync(
struct file *file,
+ loff_t start,
+ loff_t end,
int datasync)
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
trace_xfs_file_fsync(ip);
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (error)
+ return error;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
xfs_iflags_clear(ip, XFS_ITRUNCATED);
xfs_ioend_wait(ip);
+ if (mp->m_flags & XFS_MOUNT_BARRIER) {
+ /*
+ * If we have an RT and/or log subvolume we need to make sure
+ * to flush the write cache the device used for file data
+ * first. This is to ensure newly written file data make
+ * it to disk before logging the new inode size in case of
+ * an extending write.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+ else if (mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
+ }
+
/*
* We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the
@@ -175,9 +196,9 @@ xfs_file_fsync(
* updates. The sync transaction will also force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0,
- XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
+ XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return -error;
@@ -209,28 +230,25 @@ xfs_file_fsync(
* force the log.
*/
if (xfs_ipincount(ip)) {
- error = _xfs_log_force_lsn(ip->i_mount,
+ error = _xfs_log_force_lsn(mp,
ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed);
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
}
- if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
- /*
- * If the log write didn't issue an ordered tag we need
- * to flush the disk cache for the data device now.
- */
- if (!log_flushed)
- xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
-
- /*
- * If this inode is on the RT dev we need to flush that
- * cache as well.
- */
- if (XFS_IS_REALTIME_INODE(ip))
- xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
- }
+ /*
+ * If we only have a single device, and the log force about was
+ * a no-op we might have to flush the data device cache here.
+ * This can only happen for fdatasync/O_DSYNC if we were overwriting
+ * an already allocated file and thus do not have any metadata to
+ * commit.
+ */
+ if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
+ mp->m_logdev_targp == mp->m_ddev_targp &&
+ !XFS_IS_REALTIME_INODE(ip) &&
+ !log_flushed)
+ xfs_blkdev_issue_flush(mp->m_ddev_targp);
return -error;
}
@@ -863,18 +881,14 @@ xfs_file_aio_write(
/* Handle various SYNC-type writes */
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
- int error, error2;
+ int error;
xfs_rw_iunlock(ip, iolock);
- error = filemap_write_and_wait_range(mapping, pos, end);
+ error = xfs_file_fsync(file, pos, end,
+ (file->f_flags & __O_SYNC) ? 0 : 1);
xfs_rw_ilock(ip, iolock);
-
- error2 = -xfs_file_fsync(file,
- (file->f_flags & __O_SYNC) ? 0 : 1);
if (error)
ret = error;
- else if (error2)
- ret = error2;
}
out_unlock:
@@ -932,7 +946,7 @@ xfs_file_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
+ error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
}
out_unlock:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index dd21784..6544c32 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -39,6 +39,7 @@
#include "xfs_buf_item.h"
#include "xfs_utils.h"
#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
#include "xfs_trace.h"
#include <linux/capability.h>
@@ -182,7 +183,7 @@ xfs_vn_mknod(
if (IS_POSIXACL(dir)) {
default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
if (IS_ERR(default_acl))
- return -PTR_ERR(default_acl);
+ return PTR_ERR(default_acl);
if (!default_acl)
mode &= ~current_umask();
@@ -201,9 +202,9 @@ xfs_vn_mknod(
if (default_acl) {
error = -xfs_inherit_acl(inode, default_acl);
+ default_acl = NULL;
if (unlikely(error))
goto out_cleanup_inode;
- posix_acl_release(default_acl);
}
@@ -497,12 +498,442 @@ xfs_vn_getattr(
return 0;
}
+int
+xfs_setattr_nonsize(
+ struct xfs_inode *ip,
+ struct iattr *iattr,
+ int flags)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ struct inode *inode = VFS_I(ip);
+ int mask = iattr->ia_valid;
+ xfs_trans_t *tp;
+ int error;
+ uid_t uid = 0, iuid = 0;
+ gid_t gid = 0, igid = 0;
+ struct xfs_dquot *udqp = NULL, *gdqp = NULL;
+ struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL;
+
+ trace_xfs_setattr(ip);
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return XFS_ERROR(EROFS);
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ error = -inode_change_ok(inode, iattr);
+ if (error)
+ return XFS_ERROR(error);
+
+ ASSERT((mask & ATTR_SIZE) == 0);
+
+ /*
+ * If disk quotas is on, we make sure that the dquots do exist on disk,
+ * before we start any other transactions. Trying to do this later
+ * is messy. We don't care to take a readlock to look at the ids
+ * in inode here, because we can't hold it across the trans_reserve.
+ * If the IDs do change before we take the ilock, we're covered
+ * because the i_*dquot fields will get updated anyway.
+ */
+ if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
+ uint qflags = 0;
+
+ if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+ uid = iattr->ia_uid;
+ qflags |= XFS_QMOPT_UQUOTA;
+ } else {
+ uid = ip->i_d.di_uid;
+ }
+ if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+ gid = iattr->ia_gid;
+ qflags |= XFS_QMOPT_GQUOTA;
+ } else {
+ gid = ip->i_d.di_gid;
+ }
+
+ /*
+ * We take a reference when we initialize udqp and gdqp,
+ * so it is important that we never blindly double trip on
+ * the same variable. See xfs_create() for an example.
+ */
+ ASSERT(udqp == NULL);
+ ASSERT(gdqp == NULL);
+ error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
+ qflags, &udqp, &gdqp);
+ if (error)
+ return error;
+ }
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+ error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+ if (error)
+ goto out_dqrele;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * Change file ownership. Must be the owner or privileged.
+ */
+ if (mask & (ATTR_UID|ATTR_GID)) {
+ /*
+ * These IDs could have changed since we last looked at them.
+ * But, we're assured that if the ownership did change
+ * while we didn't have the inode locked, inode's dquot(s)
+ * would have changed also.
+ */
+ iuid = ip->i_d.di_uid;
+ igid = ip->i_d.di_gid;
+ gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+ uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+
+ /*
+ * Do a quota reservation only if uid/gid is actually
+ * going to change.
+ */
+ if (XFS_IS_QUOTA_RUNNING(mp) &&
+ ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+ (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+ ASSERT(tp);
+ error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+ capable(CAP_FOWNER) ?
+ XFS_QMOPT_FORCE_RES : 0);
+ if (error) /* out of quota */
+ goto out_trans_cancel;
+ }
+ }
+
+ xfs_trans_ijoin(tp, ip);
+
+ /*
+ * Change file ownership. Must be the owner or privileged.
+ */
+ if (mask & (ATTR_UID|ATTR_GID)) {
+ /*
+ * CAP_FSETID overrides the following restrictions:
+ *
+ * The set-user-ID and set-group-ID bits of a file will be
+ * cleared upon successful return from chown()
+ */
+ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+ !capable(CAP_FSETID))
+ ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+ /*
+ * Change the ownerships and register quota modifications
+ * in the transaction.
+ */
+ if (iuid != uid) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+ ASSERT(mask & ATTR_UID);
+ ASSERT(udqp);
+ olddquot1 = xfs_qm_vop_chown(tp, ip,
+ &ip->i_udquot, udqp);
+ }
+ ip->i_d.di_uid = uid;
+ inode->i_uid = uid;
+ }
+ if (igid != gid) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+ ASSERT(!XFS_IS_PQUOTA_ON(mp));
+ ASSERT(mask & ATTR_GID);
+ ASSERT(gdqp);
+ olddquot2 = xfs_qm_vop_chown(tp, ip,
+ &ip->i_gdquot, gdqp);
+ }
+ ip->i_d.di_gid = gid;
+ inode->i_gid = gid;
+ }
+ }
+
+ /*
+ * Change file access modes.
+ */
+ if (mask & ATTR_MODE) {
+ umode_t mode = iattr->ia_mode;
+
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ mode &= ~S_ISGID;
+
+ ip->i_d.di_mode &= S_IFMT;
+ ip->i_d.di_mode |= mode & ~S_IFMT;
+
+ inode->i_mode &= S_IFMT;
+ inode->i_mode |= mode & ~S_IFMT;
+ }
+
+ /*
+ * Change file access or modified times.
+ */
+ if (mask & ATTR_ATIME) {
+ inode->i_atime = iattr->ia_atime;
+ ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+ if (mask & ATTR_CTIME) {
+ inode->i_ctime = iattr->ia_ctime;
+ ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+ if (mask & ATTR_MTIME) {
+ inode->i_mtime = iattr->ia_mtime;
+ ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_INC(xs_ig_attrchg);
+
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp, 0);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ /*
+ * Release any dquot(s) the inode had kept before chown.
+ */
+ xfs_qm_dqrele(olddquot1);
+ xfs_qm_dqrele(olddquot2);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+
+ if (error)
+ return XFS_ERROR(error);
+
+ /*
+ * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+ * update. We could avoid this with linked transactions
+ * and passing down the transaction pointer all the way
+ * to attr_set. No previous user of the generic
+ * Posix ACL code seems to care about this issue either.
+ */
+ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+ error = -xfs_acl_chmod(inode);
+ if (error)
+ return XFS_ERROR(error);
+ }
+
+ return 0;
+
+out_trans_cancel:
+ xfs_trans_cancel(tp, 0);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_dqrele:
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ return error;
+}
+
+/*
+ * Truncate file. Must have write permission and not be a directory.
+ */
+int
+xfs_setattr_size(
+ struct xfs_inode *ip,
+ struct iattr *iattr,
+ int flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct inode *inode = VFS_I(ip);
+ int mask = iattr->ia_valid;
+ struct xfs_trans *tp;
+ int error;
+ uint lock_flags;
+ uint commit_flags = 0;
+
+ trace_xfs_setattr(ip);
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return XFS_ERROR(EROFS);
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ error = -inode_change_ok(inode, iattr);
+ if (error)
+ return XFS_ERROR(error);
+
+ ASSERT(S_ISREG(ip->i_d.di_mode));
+ ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+ ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+ ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+
+ lock_flags = XFS_ILOCK_EXCL;
+ if (!(flags & XFS_ATTR_NOLOCK))
+ lock_flags |= XFS_IOLOCK_EXCL;
+ xfs_ilock(ip, lock_flags);
+
+ /*
+ * Short circuit the truncate case for zero length files.
+ */
+ if (iattr->ia_size == 0 &&
+ ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+ if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+ goto out_unlock;
+
+ /*
+ * Use the regular setattr path to update the timestamps.
+ */
+ xfs_iunlock(ip, lock_flags);
+ iattr->ia_valid &= ~ATTR_SIZE;
+ return xfs_setattr_nonsize(ip, iattr, 0);
+ }
+
+ /*
+ * Make sure that the dquots are attached to the inode.
+ */
+ error = xfs_qm_dqattach_locked(ip, 0);
+ if (error)
+ goto out_unlock;
+
+ /*
+ * Now we can make the changes. Before we join the inode to the
+ * transaction, take care of the part of the truncation that must be
+ * done without the inode lock. This needs to be done before joining
+ * the inode to the transaction, because the inode cannot be unlocked
+ * once it is a part of the transaction.
+ */
+ if (iattr->ia_size > ip->i_size) {
+ /*
+ * Do the first part of growing a file: zero any data in the
+ * last block that is beyond the old EOF. We need to do this
+ * before the inode is joined to the transaction to modify
+ * i_size.
+ */
+ error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+ if (error)
+ goto out_unlock;
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ lock_flags &= ~XFS_ILOCK_EXCL;
+
+ /*
+ * We are going to log the inode size change in this transaction so
+ * any previous writes that are beyond the on disk EOF and the new
+ * EOF that have not been written out need to be written here. If we
+ * do not write the data out, we expose ourselves to the null files
+ * problem.
+ *
+ * Only flush from the on disk size to the smaller of the in memory
+ * file size or the new size as that's the range we really care about
+ * here and prevents waiting for other data not within the range we
+ * care about here.
+ */
+ if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
+ error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
+ XBF_ASYNC, FI_NONE);
+ if (error)
+ goto out_unlock;
+ }
+
+ /*
+ * Wait for all I/O to complete.
+ */
+ xfs_ioend_wait(ip);
+
+ error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+ xfs_get_blocks);
+ if (error)
+ goto out_unlock;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+ error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_ITRUNCATE_LOG_COUNT);
+ if (error)
+ goto out_trans_cancel;
+
+ truncate_setsize(inode, iattr->ia_size);
+
+ commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+ lock_flags |= XFS_ILOCK_EXCL;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ xfs_trans_ijoin(tp, ip);
+
+ /*
+ * Only change the c/mtime if we are changing the size or we are
+ * explicitly asked to change it. This handles the semantic difference
+ * between truncate() and ftruncate() as implemented in the VFS.
+ *
+ * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+ * special case where we need to update the times despite not having
+ * these flags set. For all other operations the VFS set these flags
+ * explicitly if it wants a timestamp update.
+ */
+ if (iattr->ia_size != ip->i_size &&
+ (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+ iattr->ia_ctime = iattr->ia_mtime =
+ current_fs_time(inode->i_sb);
+ mask |= ATTR_CTIME | ATTR_MTIME;
+ }
+
+ if (iattr->ia_size > ip->i_size) {
+ ip->i_d.di_size = iattr->ia_size;
+ ip->i_size = iattr->ia_size;
+ } else if (iattr->ia_size <= ip->i_size ||
+ (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
+ error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
+ if (error)
+ goto out_trans_abort;
+
+ /*
+ * Truncated "down", so we're removing references to old data
+ * here - if we delay flushing for a long time, we expose
+ * ourselves unduly to the notorious NULL files problem. So,
+ * we mark this inode and flush it when the file is closed,
+ * and do not wait the usual (long) time for writeout.
+ */
+ xfs_iflags_set(ip, XFS_ITRUNCATED);
+ }
+
+ if (mask & ATTR_CTIME) {
+ inode->i_ctime = iattr->ia_ctime;
+ ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+ if (mask & ATTR_MTIME) {
+ inode->i_mtime = iattr->ia_mtime;
+ ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+ ip->i_update_core = 1;
+ }
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ XFS_STATS_INC(xs_ig_attrchg);
+
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+out_unlock:
+ if (lock_flags)
+ xfs_iunlock(ip, lock_flags);
+ return error;
+
+out_trans_abort:
+ commit_flags |= XFS_TRANS_ABORT;
+out_trans_cancel:
+ xfs_trans_cancel(tp, commit_flags);
+ goto out_unlock;
+}
+
STATIC int
xfs_vn_setattr(
struct dentry *dentry,
struct iattr *iattr)
{
- return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
+ if (iattr->ia_valid & ATTR_SIZE)
+ return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
+ return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
}
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -591,7 +1022,7 @@ xfs_vn_fiemap(
}
static const struct inode_operations xfs_inode_operations = {
- .check_acl = xfs_check_acl,
+ .get_acl = xfs_get_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -617,7 +1048,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
.rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
- .check_acl = xfs_check_acl,
+ .get_acl = xfs_get_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -642,7 +1073,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
.rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
- .check_acl = xfs_check_acl,
+ .get_acl = xfs_get_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -655,7 +1086,7 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = xfs_vn_follow_link,
.put_link = xfs_vn_put_link,
- .check_acl = xfs_check_acl,
+ .get_acl = xfs_get_acl,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -763,6 +1194,10 @@ xfs_setup_inode(
break;
}
+ /* if there is no attribute fork no ACL can exist on this inode */
+ if (!XFS_IFORK_Q(ip))
+ cache_no_acl(inode);
+
xfs_iflags_clear(ip, XFS_INEW);
barrier();
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 8633521..d42f814 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -33,7 +33,6 @@
#endif
#include <xfs_types.h>
-#include <xfs_arch.h>
#include <kmem.h>
#include <mrlock.h>
@@ -88,6 +87,12 @@
#include <xfs_buf.h>
#include <xfs_message.h>
+#ifdef __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
/*
* Feature macros (disable/enable)
*/
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1e3a7ce..9a72dda 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -33,7 +33,6 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
@@ -627,68 +626,6 @@ xfs_blkdev_put(
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
-/*
- * Try to write out the superblock using barriers.
- */
-STATIC int
-xfs_barrier_test(
- xfs_mount_t *mp)
-{
- xfs_buf_t *sbp = xfs_getsb(mp, 0);
- int error;
-
- XFS_BUF_UNDONE(sbp);
- XFS_BUF_UNREAD(sbp);
- XFS_BUF_UNDELAYWRITE(sbp);
- XFS_BUF_WRITE(sbp);
- XFS_BUF_UNASYNC(sbp);
- XFS_BUF_ORDERED(sbp);
-
- xfsbdstrat(mp, sbp);
- error = xfs_buf_iowait(sbp);
-
- /*
- * Clear all the flags we set and possible error state in the
- * buffer. We only did the write to try out whether barriers
- * worked and shouldn't leave any traces in the superblock
- * buffer.
- */
- XFS_BUF_DONE(sbp);
- XFS_BUF_ERROR(sbp, 0);
- XFS_BUF_UNORDERED(sbp);
-
- xfs_buf_relse(sbp);
- return error;
-}
-
-STATIC void
-xfs_mountfs_check_barriers(xfs_mount_t *mp)
-{
- int error;
-
- if (mp->m_logdev_targp != mp->m_ddev_targp) {
- xfs_notice(mp,
- "Disabling barriers, not supported with external log device");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
- xfs_notice(mp,
- "Disabling barriers, underlying device is readonly");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-
- error = xfs_barrier_test(mp);
- if (error) {
- xfs_notice(mp,
- "Disabling barriers, trial barrier write failed");
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- return;
- }
-}
-
void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
@@ -1087,11 +1024,6 @@ xfs_fs_put_super(
{
struct xfs_mount *mp = XFS_M(sb);
- /*
- * Unregister the memory shrinker before we tear down the mount
- * structure so we don't have memory reclaim racing with us here.
- */
- xfs_inode_shrinker_unregister(mp);
xfs_syncd_stop(mp);
/*
@@ -1240,14 +1172,6 @@ xfs_fs_remount(
switch (token) {
case Opt_barrier:
mp->m_flags |= XFS_MOUNT_BARRIER;
-
- /*
- * Test if barriers are actually working if we can,
- * else delay this check until the filesystem is
- * marked writeable.
- */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY))
- xfs_mountfs_check_barriers(mp);
break;
case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1206,6 @@ xfs_fs_remount(
/* ro -> rw */
if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
mp->m_flags &= ~XFS_MOUNT_RDONLY;
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
/*
* If this is the first remount to writeable state we
@@ -1465,9 +1387,6 @@ xfs_fs_fill_super(
if (error)
goto out_free_sb;
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_mountfs_check_barriers(mp);
-
error = xfs_filestream_mount(mp);
if (error)
goto out_free_sb;
@@ -1487,36 +1406,31 @@ xfs_fs_fill_super(
sb->s_time_gran = 1;
set_posix_acl_flag(sb);
- error = xfs_syncd_init(mp);
+ error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
- xfs_inode_shrinker_register(mp);
-
- error = xfs_mountfs(mp);
+ error = xfs_syncd_init(mp);
if (error)
- goto out_syncd_stop;
+ goto out_unmount;
root = igrab(VFS_I(mp->m_rootip));
if (!root) {
error = ENOENT;
- goto fail_unmount;
+ goto out_syncd_stop;
}
if (is_bad_inode(root)) {
error = EINVAL;
- goto fail_vnrele;
+ goto out_syncd_stop;
}
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
error = ENOMEM;
- goto fail_vnrele;
+ goto out_iput;
}
return 0;
- out_syncd_stop:
- xfs_inode_shrinker_unregister(mp);
- xfs_syncd_stop(mp);
out_filestream_unmount:
xfs_filestream_unmount(mp);
out_free_sb:
@@ -1531,18 +1445,11 @@ xfs_fs_fill_super(
out:
return -error;
- fail_vnrele:
- if (sb->s_root) {
- dput(sb->s_root);
- sb->s_root = NULL;
- } else {
- iput(root);
- }
-
- fail_unmount:
- xfs_inode_shrinker_unregister(mp);
+ out_iput:
+ iput(root);
+ out_syncd_stop:
xfs_syncd_stop(mp);
-
+ out_unmount:
/*
* Blow away any referenced inode in the filestreams cache.
* This can and will cause log traffic as inodes go inactive
@@ -1566,6 +1473,21 @@ xfs_fs_mount(
return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
}
+static int
+xfs_fs_nr_cached_objects(
+ struct super_block *sb)
+{
+ return xfs_reclaim_inodes_count(XFS_M(sb));
+}
+
+static void
+xfs_fs_free_cached_objects(
+ struct super_block *sb,
+ int nr_to_scan)
+{
+ xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+}
+
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
@@ -1579,6 +1501,8 @@ static const struct super_operations xfs_super_operations = {
.statfs = xfs_fs_statfs,
.remount_fs = xfs_fs_remount,
.show_options = xfs_fs_show_options,
+ .nr_cached_objects = xfs_fs_nr_cached_objects,
+ .free_cached_objects = xfs_fs_free_cached_objects,
};
static struct file_system_type xfs_fs_type = {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 8ecad5f..e4c938a 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -179,6 +179,8 @@ restart:
if (error == EFSCORRUPTED)
break;
+ cond_resched();
+
} while (nr_found && !done);
if (skipped) {
@@ -359,14 +361,12 @@ xfs_quiesce_data(
{
int error, error2 = 0;
- /* push non-blocking */
- xfs_sync_data(mp, 0);
xfs_qm_sync(mp, SYNC_TRYLOCK);
-
- /* push and block till complete */
- xfs_sync_data(mp, SYNC_WAIT);
xfs_qm_sync(mp, SYNC_WAIT);
+ /* force out the newly dirtied log buffers */
+ xfs_log_force(mp, XFS_LOG_SYNC);
+
/* write superblock and hoover up shutdown errors */
error = xfs_sync_fsdata(mp);
@@ -436,7 +436,7 @@ xfs_quiesce_attr(
WARN_ON(atomic_read(&mp->m_active_trans) != 0);
/* Push the superblock and write an unmount record */
- error = xfs_log_sbcount(mp, 1);
+ error = xfs_log_sbcount(mp);
if (error)
xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
"Frozen image may not be consistent.");
@@ -986,6 +986,8 @@ restart:
*nr_to_scan -= XFS_LOOKUP_BATCH;
+ cond_resched();
+
} while (nr_found && !done && *nr_to_scan > 0);
if (trylock && !done)
@@ -1003,7 +1005,7 @@ restart:
* ensure that when we get more reclaimers than AGs we block rather
* than spin trying to execute reclaim.
*/
- if (trylock && skipped && *nr_to_scan > 0) {
+ if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
trylock = 0;
goto restart;
}
@@ -1021,44 +1023,38 @@ xfs_reclaim_inodes(
}
/*
- * Inode cache shrinker.
+ * Scan a certain number of inodes for reclaim.
*
* When called we make sure that there is a background (fast) inode reclaim in
- * progress, while we will throttle the speed of reclaim via doiing synchronous
+ * progress, while we will throttle the speed of reclaim via doing synchronous
* reclaim of inodes. That means if we come across dirty inodes, we wait for
* them to be cleaned, which we hope will not be very long due to the
* background walker having already kicked the IO off on those dirty inodes.
*/
-static int
-xfs_reclaim_inode_shrink(
- struct shrinker *shrink,
- struct shrink_control *sc)
+void
+xfs_reclaim_inodes_nr(
+ struct xfs_mount *mp,
+ int nr_to_scan)
{
- struct xfs_mount *mp;
- struct xfs_perag *pag;
- xfs_agnumber_t ag;
- int reclaimable;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
-
- mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
- if (nr_to_scan) {
- /* kick background reclaimer and push the AIL */
- xfs_syncd_queue_reclaim(mp);
- xfs_ail_push_all(mp->m_ail);
+ /* kick background reclaimer and push the AIL */
+ xfs_syncd_queue_reclaim(mp);
+ xfs_ail_push_all(mp->m_ail);
- if (!(gfp_mask & __GFP_FS))
- return -1;
+ xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
+}
- xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
- &nr_to_scan);
- /* terminate if we don't exhaust the scan */
- if (nr_to_scan > 0)
- return -1;
- }
+/*
+ * Return the number of reclaimable inodes in the filesystem for
+ * the shrinker to determine how much to reclaim.
+ */
+int
+xfs_reclaim_inodes_count(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t ag = 0;
+ int reclaimable = 0;
- reclaimable = 0;
- ag = 0;
while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
ag = pag->pag_agno + 1;
reclaimable += pag->pag_ici_reclaimable;
@@ -1067,18 +1063,3 @@ xfs_reclaim_inode_shrink(
return reclaimable;
}
-void
-xfs_inode_shrinker_register(
- struct xfs_mount *mp)
-{
- mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
- mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
- register_shrinker(&mp->m_inode_shrink);
-}
-
-void
-xfs_inode_shrinker_unregister(
- struct xfs_mount *mp)
-{
- unregister_shrinker(&mp->m_inode_shrink);
-}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e3a6ad2..941202e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,14 +21,6 @@
struct xfs_mount;
struct xfs_perag;
-typedef struct xfs_sync_work {
- struct list_head w_list;
- struct xfs_mount *w_mount;
- void *w_data; /* syncer routine argument */
- void (*w_syncer)(struct xfs_mount *, void *);
- struct completion *w_completion;
-} xfs_sync_work_t;
-
#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
@@ -43,6 +35,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
+int xfs_reclaim_inodes_count(struct xfs_mount *mp);
+void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
@@ -54,7 +48,4 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags);
-void xfs_inode_shrinker_register(struct xfs_mount *mp);
-void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
-
#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index d48b7a5..690fc7a 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -293,7 +293,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__entry->buffer_length = bp->b_buffer_length;
__entry->hold = atomic_read(&bp->b_hold);
__entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
+ __entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags;
__entry->caller_ip = caller_ip;
),
@@ -323,7 +323,7 @@ DEFINE_BUF_EVENT(xfs_buf_bawrite);
DEFINE_BUF_EVENT(xfs_buf_bdwrite);
DEFINE_BUF_EVENT(xfs_buf_lock);
DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_cond_lock);
+DEFINE_BUF_EVENT(xfs_buf_trylock);
DEFINE_BUF_EVENT(xfs_buf_unlock);
DEFINE_BUF_EVENT(xfs_buf_iowait);
DEFINE_BUF_EVENT(xfs_buf_iowait_done);
@@ -366,7 +366,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
__entry->flags = flags;
__entry->hold = atomic_read(&bp->b_hold);
__entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
+ __entry->lockval = bp->b_sema.count;
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
@@ -409,7 +409,7 @@ TRACE_EVENT(xfs_buf_ioerror,
__entry->buffer_length = bp->b_buffer_length;
__entry->hold = atomic_read(&bp->b_hold);
__entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
+ __entry->lockval = bp->b_sema.count;
__entry->error = error;
__entry->flags = bp->b_flags;
__entry->caller_ip = caller_ip;
@@ -454,7 +454,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
__entry->buf_flags = bip->bli_buf->b_flags;
__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
- __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
+ __entry->buf_lockval = bip->bli_buf->b_sema.count;
__entry->li_desc = bip->bli_item.li_desc;
__entry->li_flags = bip->bli_item.li_flags;
),
@@ -571,7 +571,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
DEFINE_INODE_EVENT(xfs_free_file_space);
DEFINE_INODE_EVENT(xfs_readdir);
#ifdef CONFIG_XFS_POSIX_ACL
-DEFINE_INODE_EVENT(xfs_check_acl);
+DEFINE_INODE_EVENT(xfs_get_acl);
#endif
DEFINE_INODE_EVENT(xfs_vm_bmap);
DEFINE_INODE_EVENT(xfs_file_ioctl);
@@ -998,7 +998,8 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
- __field(loff_t, size)
+ __field(loff_t, isize)
+ __field(loff_t, disize)
__field(loff_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
@@ -1006,16 +1007,18 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
+ __entry->isize = ip->i_size;
+ __entry->disize = ip->i_d.di_size;
__entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
"offset 0x%llx count %zd",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
- __entry->size,
+ __entry->isize,
+ __entry->disize,
__entry->new_size,
__entry->offset,
__entry->count)
@@ -1028,40 +1031,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \
DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
-
-
-TRACE_EVENT(xfs_itruncate_start,
- TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
- xfs_off_t toss_start, xfs_off_t toss_finish),
- TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsize_t, size)
- __field(xfs_fsize_t, new_size)
- __field(xfs_off_t, toss_start)
- __field(xfs_off_t, toss_finish)
- __field(int, flag)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->new_size = new_size;
- __entry->toss_start = toss_start;
- __entry->toss_finish = toss_finish;
- __entry->flag = flag;
- ),
- TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
- "toss start 0x%llx toss finish 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
- __entry->size,
- __entry->new_size,
- __entry->toss_start,
- __entry->toss_finish)
-);
+DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
DECLARE_EVENT_CLASS(xfs_itrunc_class,
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
@@ -1089,8 +1059,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
DEFINE_EVENT(xfs_itrunc_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
TRACE_EVENT(xfs_pagecache_inval,
TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
OpenPOWER on IntegriCloud