summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c10
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_super.c1
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c24
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/afs/volume.c7
-rw-r--r--fs/autofs4/root.c5
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/namei.c98
-rw-r--r--fs/cachefiles/security.c4
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/auth.c1
-rw-r--r--fs/ceph/auth_none.h2
-rw-r--r--fs/ceph/auth_x.c32
-rw-r--r--fs/ceph/caps.c21
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c8
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/messenger.c39
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c26
-rw-r--r--fs/ceph/osd_client.h3
-rw-r--r--fs/ceph/osdmap.c29
-rw-r--r--fs/ceph/osdmap.h2
-rw-r--r--fs/ceph/rados.h1
-rw-r--r--fs/ceph/snap.c24
-rw-r--r--fs/ceph/super.c30
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifs_fs_sb.h3
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/main.c10
-rw-r--r--fs/ecryptfs/super.c1
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/exofs/super.c8
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/ext4/mballoc.c21
-rw-r--r--fs/ioctl.c92
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/jfs_dmap.c16
-rw-r--r--fs/jfs/jfs_dmap.h6
-rw-r--r--fs/jfs/jfs_inode.h1
-rw-r--r--fs/jfs/namei.c4
-rw-r--r--fs/jfs/resize.c6
-rw-r--r--fs/jfs/symlink.c14
-rw-r--r--fs/logfs/gc.c8
-rw-r--r--fs/logfs/journal.c29
-rw-r--r--fs/logfs/logfs.h15
-rw-r--r--fs/logfs/readwrite.c75
-rw-r--r--fs/logfs/segment.c8
-rw-r--r--fs/logfs/super.c11
-rw-r--r--fs/namei.c6
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c86
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/nfs4proc.c5
-rw-r--r--fs/nfs/super.c3
-rw-r--r--fs/nfs/write.c55
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/nilfs2/super.c1
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/dlm/dlmast.c5
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/file.c32
-rw-r--r--fs/ocfs2/inode.c68
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/namei.c58
-rw-r--r--fs/ocfs2/refcounttree.c3
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/task_mmu.c19
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/xattr.c19
-rw-r--r--fs/smbfs/inode.c8
-rw-r--r--fs/squashfs/block.c5
-rw-r--r--fs/squashfs/super.c4
-rw-r--r--fs/squashfs/zlib_wrapper.c3
-rw-r--r--fs/super.c9
-rw-r--r--fs/sync.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c112
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_dfrag.c22
-rw-r--r--fs/xfs/xfs_mount.h1
99 files changed, 955 insertions, 466 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 5c5bc84..f8b86e9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -238,6 +238,13 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
return ERR_PTR(-ENOMEM);
}
+ rc = bdi_setup_and_register(&v9ses->bdi, "9p", BDI_CAP_MAP_COPY);
+ if (rc) {
+ __putname(v9ses->aname);
+ __putname(v9ses->uname);
+ return ERR_PTR(rc);
+ }
+
spin_lock(&v9fs_sessionlist_lock);
list_add(&v9ses->slist, &v9fs_sessionlist);
spin_unlock(&v9fs_sessionlist_lock);
@@ -301,6 +308,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
return fid;
error:
+ bdi_destroy(&v9ses->bdi);
return ERR_PTR(retval);
}
@@ -326,6 +334,8 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
__putname(v9ses->uname);
__putname(v9ses->aname);
+ bdi_destroy(&v9ses->bdi);
+
spin_lock(&v9fs_sessionlist_lock);
list_del(&v9ses->slist);
spin_unlock(&v9fs_sessionlist_lock);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index a0a8d3d..bec4d0b 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,7 @@
* Boston, MA 02111-1301 USA
*
*/
+#include <linux/backing-dev.h>
/**
* enum p9_session_flags - option flags for each 9P session
@@ -102,6 +103,7 @@ struct v9fs_session_info {
u32 uid; /* if ACCESS_SINGLE, the uid that has access */
struct p9_client *clnt; /* 9p client */
struct list_head slist; /* list of sessions registered with v9fs */
+ struct backing_dev_info bdi;
};
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 491108b..806da5d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -77,6 +77,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_blocksize = 1 << sb->s_blocksize_bits;
sb->s_magic = V9FS_MAGIC;
sb->s_op = &v9fs_super_ops;
+ sb->s_bdi = &v9ses->bdi;
sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
MS_NOATIME;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c54dad4e60..a10f258 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -19,6 +19,7 @@
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/fscache.h>
+#include <linux/backing-dev.h>
#include "afs.h"
#include "afs_vl.h"
@@ -313,6 +314,7 @@ struct afs_volume {
unsigned short rjservers; /* number of servers discarded due to -ENOMEDIUM */
struct afs_server *servers[8]; /* servers on which volume resides (ordered) */
struct rw_semaphore server_sem; /* lock for accessing current server */
+ struct backing_dev_info bdi;
};
/*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 5e813a8..b3feddc 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -138,9 +138,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
{
struct afs_super_info *super;
struct vfsmount *mnt;
- struct page *page = NULL;
+ struct page *page;
size_t size;
- char *buf, *devname = NULL, *options = NULL;
+ char *buf, *devname, *options;
int ret;
_enter("{%s}", mntpt->d_name.name);
@@ -150,22 +150,22 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
ret = -EINVAL;
size = mntpt->d_inode->i_size;
if (size > PAGE_SIZE - 1)
- goto error;
+ goto error_no_devname;
ret = -ENOMEM;
devname = (char *) get_zeroed_page(GFP_KERNEL);
if (!devname)
- goto error;
+ goto error_no_devname;
options = (char *) get_zeroed_page(GFP_KERNEL);
if (!options)
- goto error;
+ goto error_no_options;
/* read the contents of the AFS special symlink */
page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
- goto error;
+ goto error_no_page;
}
ret = -EIO;
@@ -196,12 +196,12 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
return mnt;
error:
- if (page)
- page_cache_release(page);
- if (devname)
- free_page((unsigned long) devname);
- if (options)
- free_page((unsigned long) options);
+ page_cache_release(page);
+error_no_page:
+ free_page((unsigned long) options);
+error_no_options:
+ free_page((unsigned long) devname);
+error_no_devname:
_leave(" = %d", ret);
return ERR_PTR(ret);
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 14f6431..e932e5a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -311,6 +311,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
sb->s_magic = AFS_FS_MAGIC;
sb->s_op = &afs_super_ops;
sb->s_fs_info = as;
+ sb->s_bdi = &as->volume->bdi;
/* allocate the root inode and dentry */
fid.vid = as->volume->vid;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index a353e69..401eeb2 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -106,6 +106,10 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
volume->cell = params->cell;
volume->vid = vlocation->vldb.vid[params->type];
+ ret = bdi_setup_and_register(&volume->bdi, "afs", BDI_CAP_MAP_COPY);
+ if (ret)
+ goto error_bdi;
+
init_rwsem(&volume->server_sem);
/* look up all the applicable server records */
@@ -151,6 +155,8 @@ error:
return ERR_PTR(ret);
error_discard:
+ bdi_destroy(&volume->bdi);
+error_bdi:
up_write(&params->cell->vl_sem);
for (loop = volume->nservers - 1; loop >= 0; loop--)
@@ -200,6 +206,7 @@ void afs_put_volume(struct afs_volume *volume)
for (loop = volume->nservers - 1; loop >= 0; loop--)
afs_put_server(volume->servers[loop]);
+ bdi_destroy(&volume->bdi);
kfree(volume);
_leave(" [destroyed]");
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 109a6c6..e8e5e63 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
}
/* Trigger mount for path component or follow link */
} else if (ino->flags & AUTOFS_INF_PENDING ||
- autofs4_need_mount(flags) ||
- current->link_count) {
+ autofs4_need_mount(flags)) {
DPRINTK("waiting for mount name=%.*s",
dentry->d_name.len, dentry->d_name.name);
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
spin_unlock(&dcache_lock);
spin_unlock(&sbi->fs_lock);
- status = try_to_fill_dentry(dentry, 0);
+ status = try_to_fill_dentry(dentry, nd->flags);
if (status)
goto out_error;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7ab23e0..2c5f9a0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1005,15 +1005,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
}
} else if (!mm->start_data) {
mm->start_data = seg->addr;
-#ifndef CONFIG_MMU
mm->end_data = seg->addr + phdr->p_memsz;
-#endif
}
-
-#ifdef CONFIG_MMU
- if (seg->addr + phdr->p_memsz > mm->end_data)
- mm->end_data = seg->addr + phdr->p_memsz;
-#endif
}
seg++;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e0e769b..49566c1 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -355,7 +355,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
if (!flat_reloc_valid(r, start_brk - start_data + text_len)) {
printk("BINFMT_FLAT: reloc outside program 0x%x (0 - 0x%x/0x%x)",
- (int) r,(int)(start_brk-start_code),(int)text_len);
+ (int) r,(int)(start_brk-start_data+text_len),(int)text_len);
goto failed;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2a6d019..6dcee88 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -406,16 +406,23 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
- struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+ struct inode *bd_inode = filp->f_mapping->host;
+ struct block_device *bdev = I_BDEV(bd_inode);
int error;
- error = sync_blockdev(bdev);
- if (error)
- return error;
-
+ /*
+ * There is no need to serialise calls to blkdev_issue_flush with
+ * i_mutex and doing so causes performance issues with concurrent
+ * O_SYNC writers to a block device.
+ */
+ mutex_unlock(&bd_inode->i_mutex);
+
error = blkdev_issue_flush(bdev, NULL);
if (error == -EOPNOTSUPP)
error = 0;
+
+ mutex_lock(&bd_inode->i_mutex);
+
return error;
}
EXPORT_SYMBOL(blkdev_fsync);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7b8f2c..feca041 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
-static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
-
/*
* end_io_wq structs are used to do processing in task context when an IO is
* complete. This is used during reads to verify checksums, and it is used
@@ -1375,19 +1373,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
{
int err;
- bdi->name = "btrfs";
bdi->capabilities = BDI_CAP_MAP_COPY;
- err = bdi_init(bdi);
+ err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
if (err)
return err;
- err = bdi_register(bdi, NULL, "btrfs-%d",
- atomic_inc_return(&btrfs_bdi_num));
- if (err) {
- bdi_destroy(bdi);
- return err;
- }
-
bdi->ra_pages = default_backing_dev_info.ra_pages;
bdi->unplug_io_fn = btrfs_unplug_io_fn;
bdi->unplug_io_data = info;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index f7c255f..a8cd821 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -34,6 +34,7 @@ struct cachefiles_object {
loff_t i_size; /* object size */
unsigned long flags;
#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
+#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */
atomic_t usage; /* object usage count */
uint8_t type; /* object type */
uint8_t new; /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d5db84a1e..f4a7840 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
}
/*
+ * mark the owner of a dentry, if there is one, to indicate that that dentry
+ * has been preemptively deleted
+ * - the caller must hold the i_mutex on the dentry's parent as required to
+ * call vfs_unlink(), vfs_rmdir() or vfs_rename()
+ */
+static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
+ struct dentry *dentry)
+{
+ struct cachefiles_object *object;
+ struct rb_node *p;
+
+ _enter(",'%*.*s'",
+ dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
+
+ write_lock(&cache->active_lock);
+
+ p = cache->active_nodes.rb_node;
+ while (p) {
+ object = rb_entry(p, struct cachefiles_object, active_node);
+ if (object->dentry > dentry)
+ p = p->rb_left;
+ else if (object->dentry < dentry)
+ p = p->rb_right;
+ else
+ goto found_dentry;
+ }
+
+ write_unlock(&cache->active_lock);
+ _leave(" [no owner]");
+ return;
+
+ /* found the dentry for */
+found_dentry:
+ kdebug("preemptive burial: OBJ%x [%s] %p",
+ object->fscache.debug_id,
+ fscache_object_states[object->fscache.state],
+ dentry);
+
+ if (object->fscache.state < FSCACHE_OBJECT_DYING) {
+ printk(KERN_ERR "\n");
+ printk(KERN_ERR "CacheFiles: Error:"
+ " Can't preemptively bury live object\n");
+ cachefiles_printk_object(object, NULL);
+ } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
+ printk(KERN_ERR "CacheFiles: Error:"
+ " Object already preemptively buried\n");
+ }
+
+ write_unlock(&cache->active_lock);
+ _leave(" [owner marked]");
+}
+
+/*
* record the fact that an object is now active
*/
static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
@@ -219,7 +272,8 @@ requeue:
*/
static int cachefiles_bury_object(struct cachefiles_cache *cache,
struct dentry *dir,
- struct dentry *rep)
+ struct dentry *rep,
+ bool preemptive)
{
struct dentry *grave, *trap;
char nbuffer[8 + 8 + 1];
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
dir->d_name.len, dir->d_name.len, dir->d_name.name,
rep->d_name.len, rep->d_name.len, rep->d_name.name);
+ _debug("remove %p from %p", rep, dir);
+
/* non-directories can just be unlinked */
if (!S_ISDIR(rep->d_inode->i_mode)) {
_debug("unlink stale object");
ret = vfs_unlink(dir->d_inode, rep);
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+
mutex_unlock(&dir->d_inode->i_mutex);
if (ret == -EIO)
@@ -325,6 +384,9 @@ try_again:
if (ret != 0 && ret != -ENOMEM)
cachefiles_io_error(cache, "Rename failed with error %d", ret);
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+
unlock_rename(cache->graveyard, dir);
dput(grave);
_leave(" = 0");
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
struct dentry *dir;
int ret;
- _enter(",{%p}", object->dentry);
+ _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
ASSERT(object->dentry);
ASSERT(object->dentry->d_inode);
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- /* we need to check that our parent is _still_ our parent - it may have
- * been renamed */
- if (dir == object->dentry->d_parent) {
- ret = cachefiles_bury_object(cache, dir, object->dentry);
- } else {
- /* it got moved, presumably by cachefilesd culling it, so it's
- * no longer in the key path and we can ignore it */
+ if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
+ /* object allocation for the same key preemptively deleted this
+ * object's file so that it could create its own file */
+ _debug("object preemptively buried");
mutex_unlock(&dir->d_inode->i_mutex);
ret = 0;
+ } else {
+ /* we need to check that our parent is _still_ our parent - it
+ * may have been renamed */
+ if (dir == object->dentry->d_parent) {
+ ret = cachefiles_bury_object(cache, dir,
+ object->dentry, false);
+ } else {
+ /* it got moved, presumably by cachefilesd culling it,
+ * so it's no longer in the key path and we can ignore
+ * it */
+ mutex_unlock(&dir->d_inode->i_mutex);
+ ret = 0;
+ }
}
dput(dir);
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
const char *name;
int ret, nlen;
- _enter("{%p},,%s,", parent->dentry, key);
+ _enter("OBJ%x{%p},OBJ%x,%s,",
+ parent->fscache.debug_id, parent->dentry,
+ object->fscache.debug_id, key);
cache = container_of(parent->fscache.cache,
struct cachefiles_cache, cache);
@@ -509,7 +583,7 @@ lookup_again:
* mutex) */
object->dentry = NULL;
- ret = cachefiles_bury_object(cache, dir, next);
+ ret = cachefiles_bury_object(cache, dir, next, true);
dput(next);
next = NULL;
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
/* actually remove the victim (drops the dir mutex) */
_debug("bury");
- ret = cachefiles_bury_object(cache, dir, victim);
+ ret = cachefiles_bury_object(cache, dir, victim, false);
if (ret < 0)
goto error;
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cd..039b501 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
/*
* check the security details of the on-disk cache
* - must be called with security override in force
+ * - must return with a security override in force - even in the case of an
+ * error
*/
int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
* which create files */
ret = set_create_files_as(new, root->d_inode);
if (ret < 0) {
+ abort_creds(new);
+ cachefiles_begin_secure(cache, _saved_cred);
_leave(" = %d [cfa]", ret);
return ret;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4125937..a9005d8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -504,12 +504,11 @@ static void writepages_finish(struct ceph_osd_request *req,
int i;
struct ceph_snap_context *snapc = req->r_snapc;
struct address_space *mapping = inode->i_mapping;
- struct writeback_control *wbc = req->r_wbc;
__s32 rc = -EIO;
u64 bytes = 0;
struct ceph_client *client = ceph_inode_to_client(inode);
long writeback_stat;
- unsigned issued = __ceph_caps_issued(ci, NULL);
+ unsigned issued = ceph_caps_issued(ci);
/* parse reply */
replyhead = msg->front.iov_base;
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req,
clear_bdi_congested(&client->backing_dev_info,
BLK_RW_ASYNC);
- if (i >= wrote) {
- dout("inode %p skipping page %p\n", inode, page);
- wbc->pages_skipped++;
- }
ceph_put_snap_context((void *)page->private);
page->private = 0;
ClearPagePrivate(page);
@@ -799,7 +794,6 @@ get_more_pages:
alloc_page_vec(client, req);
req->r_callback = writepages_finish;
req->r_inode = inode;
- req->r_wbc = wbc;
}
/* note position of first page in pvec */
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b9..818afe7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/slab.h>
#include "types.h"
#include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c0553..8164df1 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
#ifndef _FS_CEPH_AUTH_NONE_H
#define _FS_CEPH_AUTH_NONE_H
+#include <linux/slab.h>
+
#include "auth.h"
/*
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4..fee5a08d 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
#include "auth.h"
#include "decode.h"
-struct kmem_cache *ceph_x_ticketbuf_cachep;
-
#define TEMP_TICKET_BUF_LEN 256
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
char *ticket_buf;
u8 struct_v;
- dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC);
+ dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
if (!dbuf)
return -ENOMEM;
ret = -ENOMEM;
- ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep,
- GFP_NOFS | GFP_ATOMIC);
+ ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
if (!ticket_buf)
goto out_dbuf;
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
ret = 0;
out:
- kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf);
+ kfree(ticket_buf);
out_dbuf:
- kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf);
+ kfree(dbuf);
return ret;
bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
remove_ticket_handler(ac, th);
}
- kmem_cache_destroy(ceph_x_ticketbuf_cachep);
-
kfree(ac->private);
ac->private = NULL;
}
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
int ret;
dout("ceph_x_init %p\n", ac);
+ ret = -ENOMEM;
xi = kzalloc(sizeof(*xi), GFP_NOFS);
if (!xi)
- return -ENOMEM;
+ goto out;
- ret = -ENOMEM;
- ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
- TEMP_TICKET_BUF_LEN, 8,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
- NULL);
- if (!ceph_x_ticketbuf_cachep)
- goto done_nomem;
ret = -EINVAL;
if (!ac->secret) {
pr_err("no secret set (for auth_x protocol)\n");
- goto done_nomem;
+ goto out_nomem;
}
ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
if (ret)
- goto done_nomem;
+ goto out_nomem;
xi->starting = true;
xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
ac->ops = &ceph_x_ops;
return 0;
-done_nomem:
+out_nomem:
kfree(xi);
- if (ceph_x_ticketbuf_cachep)
- kmem_cache_destroy(ceph_x_ticketbuf_cachep);
+out:
return ret;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa2239f..d940053 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
}
/*
+ * Remove a cap. Take steps to deal with a racing iterate_session_caps.
+ *
* caller should hold i_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+ int removed = 0;
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
- /* remove from inode list */
- rb_erase(&cap->ci_node, &ci->i_caps);
- cap->ci = NULL;
- if (ci->i_auth_cap == cap)
- ci->i_auth_cap = NULL;
-
/* remove from session list */
spin_lock(&session->s_cap_lock);
if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
list_del_init(&cap->session_caps);
session->s_nr_caps--;
cap->session = NULL;
+ removed = 1;
}
+ /* protect backpointer with s_cap_lock: see iterate_session_caps */
+ cap->ci = NULL;
spin_unlock(&session->s_cap_lock);
- if (cap->session == NULL)
+ /* remove from inode list */
+ rb_erase(&cap->ci_node, &ci->i_caps);
+ if (ci->i_auth_cap == cap)
+ ci->i_auth_cap = NULL;
+
+ if (removed)
ceph_put_cap(cap);
if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
@@ -1861,8 +1866,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
} else {
pr_err("%p auth cap %p not mds%d ???\n", inode,
cap, session->s_mds);
- spin_unlock(&inode->i_lock);
}
+ spin_unlock(&inode->i_lock);
}
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ea8ee2e..650d2db 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -880,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
* do_request, above). If there is no trace, we need
* to do it here.
*/
+
+ /* d_move screws up d_subdirs order */
+ ceph_i_clear(new_dir, CEPH_I_COMPLETE);
+
d_move(old_dentry, new_dentry);
+
+ /* ensure target dentry is invalidated, despite
+ rehashing bug in vfs_rename_dir */
+ new_dentry->d_time = jiffies;
+ ceph_dentry(new_dentry)->lease_shared_gen = 0;
}
ceph_mdsc_put_request(req);
return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5..ed6f197 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
* throw out any page cache pages in this range. this
* may block.
*/
- truncate_inode_pages_range(inode->i_mapping, pos, pos+len);
+ truncate_inode_pages_range(inode->i_mapping, pos,
+ (pos+len) | (PAGE_CACHE_SIZE-1));
} else {
pages = alloc_page_vector(num_pages);
if (IS_ERR(pages)) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 26f883c..85b4d2f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
__ceph_get_fmode(ci, cap_fmode);
spin_unlock(&inode->i_lock);
}
+ } else if (cap_fmode >= 0) {
+ pr_warning("mds issued no caps on %llx.%llx\n",
+ ceph_vinop(inode));
+ __ceph_get_fmode(ci, cap_fmode);
}
/* update delegation info? */
@@ -997,6 +1001,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
dn, dn->d_name.len, dn->d_name.name);
dout("fill_trace doing d_move %p -> %p\n",
req->r_old_dentry, dn);
+
+ /* d_move screws up d_subdirs order */
+ ceph_i_clear(dir, CEPH_I_COMPLETE);
+
d_move(req->r_old_dentry, dn);
dout(" src %p '%.*s' dst %p '%.*s'\n",
req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4a..24561a5 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
}
/*
- * Helper to safely iterate over all caps associated with a session.
+ * Helper to safely iterate over all caps associated with a session, with
+ * special care taken to handle a racing __ceph_remove_cap().
*
- * caller must hold session s_mutex
+ * Caller must hold session s_mutex.
*/
static int iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
struct ceph_mds_session *session = NULL;
struct ceph_msg *reply;
struct rb_node *p;
- int err;
+ int err = -ENOMEM;
struct ceph_pagelist *pagelist;
pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
goto fail;
err = iterate_session_caps(session, encode_caps_cb, pagelist);
if (err < 0)
- goto out;
+ goto fail;
/*
* snaprealms. we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
reply->nr_pages = calc_pages_for(0, pagelist->length);
ceph_con_send(&session->s_con, reply);
- if (session) {
- session->s_state = CEPH_MDS_SESSION_OPEN;
- __wake_requests(mdsc, &session->s_waiting);
- }
+ session->s_state = CEPH_MDS_SESSION_OPEN;
+ mutex_unlock(&session->s_mutex);
+
+ mutex_lock(&mdsc->mutex);
+ __wake_requests(mdsc, &session->s_waiting);
+ mutex_unlock(&mdsc->mutex);
+
+ ceph_put_mds_session(session);
-out:
up_read(&mdsc->snap_rwsem);
- if (session) {
- mutex_unlock(&session->s_mutex);
- ceph_put_mds_session(session);
- }
mutex_lock(&mdsc->mutex);
return;
fail:
ceph_msg_put(reply);
+ up_read(&mdsc->snap_rwsem);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
fail_nomsg:
ceph_pagelist_release(pagelist);
kfree(pagelist);
fail_nopagelist:
- pr_err("ENOMEM preparing reconnect for mds%d\n", mds);
- goto out;
+ pr_err("error %d preparing reconnect for mds%d\n", err, mds);
+ mutex_lock(&mdsc->mutex);
+ return;
}
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index cdaaa13..cd4fadb 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
list_move_tail(&m->list_head, &con->out_sent);
}
- m->hdr.seq = cpu_to_le64(++con->out_seq);
+ /*
+ * only assign outgoing seq # if we haven't sent this message
+ * yet. if it is requeued, resend with it's original seq.
+ */
+ if (m->needs_out_seq) {
+ m->hdr.seq = cpu_to_le64(++con->out_seq);
+ m->needs_out_seq = false;
+ }
dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1334,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con)
unsigned front_len, middle_len, data_len, data_off;
int datacrc = con->msgr->nocrc;
int skip;
+ u64 seq;
dout("read_partial_message con %p msg %p\n", con, m);
@@ -1368,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con)
return -EIO;
data_off = le16_to_cpu(con->in_hdr.data_off);
+ /* verify seq# */
+ seq = le64_to_cpu(con->in_hdr.seq);
+ if ((s64)seq - (s64)con->in_seq < 1) {
+ pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+ ENTITY_NAME(con->peer_name),
+ pr_addr(&con->peer_addr.in_addr),
+ seq, con->in_seq + 1);
+ con->in_base_pos = -front_len - middle_len - data_len -
+ sizeof(m->footer);
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
+ return 0;
+ } else if ((s64)seq - (s64)con->in_seq > 1) {
+ pr_err("read_partial_message bad seq %lld expected %lld\n",
+ seq, con->in_seq + 1);
+ con->error_msg = "bad message sequence # for incoming message";
+ return -EBADMSG;
+ }
+
/* allocate message? */
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
@@ -1379,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con)
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
return 0;
}
if (IS_ERR(con->in_msg)) {
@@ -1965,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+ msg->needs_out_seq = true;
+
/* queue */
mutex_lock(&con->mutex);
BUG_ON(!list_empty(&msg->list_head));
@@ -2030,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
con->in_tag = CEPH_MSGR_TAG_READY;
+ con->in_seq++;
} else {
dout("con_revoke_pages %p msg %p pages %p no-op\n",
con, con->in_msg, msg);
@@ -2063,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
kref_init(&m->kref);
INIT_LIST_HEAD(&m->list_head);
+ m->hdr.tid = 0;
m->hdr.type = cpu_to_le16(type);
+ m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+ m->hdr.version = 0;
m->hdr.front_len = cpu_to_le32(front_len);
m->hdr.middle_len = 0;
m->hdr.data_len = cpu_to_le32(page_len);
m->hdr.data_off = cpu_to_le16(page_off);
- m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+ m->hdr.reserved = 0;
m->footer.front_crc = 0;
m->footer.middle_crc = 0;
m->footer.data_crc = 0;
+ m->footer.flags = 0;
m->front_max = front_len;
m->front_is_vmalloc = false;
m->more_to_follow = false;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a343dae..a5caf91 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -86,6 +86,7 @@ struct ceph_msg {
struct kref kref;
bool front_is_vmalloc;
bool more_to_follow;
+ bool needs_out_seq;
int front_max;
struct ceph_msgpool *pool;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index c7b4ded..3514f71 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
{
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
struct ceph_pg pgid;
- int o = -1;
+ int acting[CEPH_PG_MAX_SIZE];
+ int o = -1, num = 0;
int err;
dout("map_osds %p tid %lld\n", req, req->r_tid);
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc,
pgid = reqhead->layout.ol_pgid;
req->r_pgid = pgid;
- o = ceph_calc_pg_primary(osdc->osdmap, pgid);
+ err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
+ if (err > 0) {
+ o = acting[0];
+ num = err;
+ }
if ((req->r_osd && req->r_osd->o_osd == o &&
- req->r_sent >= req->r_osd->o_incarnation) ||
+ req->r_sent >= req->r_osd->o_incarnation &&
+ req->r_num_pg_osds == num &&
+ memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
(req->r_osd == NULL && o == -1))
return 0; /* no change */
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
req->r_osd ? req->r_osd->o_osd : -1);
+ /* record full pg acting set */
+ memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
+ req->r_num_pg_osds = num;
+
if (req->r_osd) {
__cancel_request(req);
list_del_init(&req->r_osd_item);
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
__remove_osd_from_lru(req->r_osd);
list_add(&req->r_osd_item, &req->r_osd->o_requests);
}
- err = 1; /* osd changed */
+ err = 1; /* osd or pg changed */
out:
return err;
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
struct ceph_osd_request *req;
u64 tid;
int numops, object_len, flags;
+ s32 result;
tid = le64_to_cpu(msg->hdr.tid);
if (msg->front.iov_len < sizeof(*rhead))
goto bad;
numops = le32_to_cpu(rhead->num_ops);
object_len = le32_to_cpu(rhead->object_len);
+ result = le32_to_cpu(rhead->result);
if (msg->front.iov_len != sizeof(*rhead) + object_len +
numops * sizeof(struct ceph_osd_op))
goto bad;
- dout("handle_reply %p tid %llu\n", msg, tid);
+ dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
/* lookup */
mutex_lock(&osdc->request_mutex);
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
dout("handle_reply tid %llu flags %d\n", tid, flags);
/* either this is a read, or we got the safe response */
- if ((flags & CEPH_OSD_FLAG_ONDISK) ||
+ if (result < 0 ||
+ (flags & CEPH_OSD_FLAG_ONDISK) ||
((flags & CEPH_OSD_FLAG_WRITE) == 0))
__unregister_request(osdc, req);
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index b075991..ce77698 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -48,6 +48,8 @@ struct ceph_osd_request {
struct list_head r_osd_item;
struct ceph_osd *r_osd;
struct ceph_pg r_pgid;
+ int r_pg_osds[CEPH_PG_MAX_SIZE];
+ int r_num_pg_osds;
struct ceph_connection *r_con_filling_msg;
@@ -66,7 +68,6 @@ struct ceph_osd_request {
struct list_head r_unsafe_item;
struct inode *r_inode; /* for use by callbacks */
- struct writeback_control *r_wbc; /* ditto */
char r_oid[40]; /* object name */
int r_oid_len;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 2e2c15e..cfdd8f4 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
}
/*
+ * Return acting set for given pgid.
+ */
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+ int *acting)
+{
+ int rawosds[CEPH_PG_MAX_SIZE], *osds;
+ int i, o, num = CEPH_PG_MAX_SIZE;
+
+ osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
+ if (!osds)
+ return -1;
+
+ /* primary is first up osd */
+ o = 0;
+ for (i = 0; i < num; i++)
+ if (ceph_osd_is_up(osdmap, osds[i]))
+ acting[o++] = osds[i];
+ return o;
+}
+
+/*
* Return primary osd for given pgid, or -1 if none.
*/
int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
{
- int rawosds[10], *osds;
- int i, num = ARRAY_SIZE(rawosds);
+ int rawosds[CEPH_PG_MAX_SIZE], *osds;
+ int i, num = CEPH_PG_MAX_SIZE;
osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
if (!osds)
@@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
/* primary is first up osd */
for (i = 0; i < num; i++)
- if (ceph_osd_is_up(osdmap, osds[i])) {
+ if (ceph_osd_is_up(osdmap, osds[i]))
return osds[i];
- break;
- }
return -1;
}
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 8bc9f1e..970b547 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
const char *oid,
struct ceph_file_layout *fl,
struct ceph_osdmap *osdmap);
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+ int *acting);
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg pgid);
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index a1fc1d0..fd56451 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -58,6 +58,7 @@ struct ceph_timespec {
#define CEPH_PG_LAYOUT_LINEAR 2
#define CEPH_PG_LAYOUT_HYBRID 3
+#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
/*
* placement group.
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 2b88126..d5114db 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -869,16 +869,20 @@ skip_inode:
continue;
ci = ceph_inode(inode);
spin_lock(&inode->i_lock);
- if (!ci->i_snap_realm)
- goto split_skip_inode;
- ceph_put_snap_realm(mdsc, ci->i_snap_realm);
- spin_lock(&realm->inodes_with_caps_lock);
- list_add(&ci->i_snap_realm_item,
- &realm->inodes_with_caps);
- ci->i_snap_realm = realm;
- spin_unlock(&realm->inodes_with_caps_lock);
- ceph_get_snap_realm(mdsc, realm);
-split_skip_inode:
+ if (list_empty(&ci->i_snap_realm_item)) {
+ struct ceph_snap_realm *oldrealm =
+ ci->i_snap_realm;
+
+ dout(" moving %p to split realm %llx %p\n",
+ inode, realm->ino, realm);
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_add(&ci->i_snap_realm_item,
+ &realm->inodes_with_caps);
+ ci->i_snap_realm = realm;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ ceph_get_snap_realm(mdsc, realm);
+ ceph_put_snap_realm(mdsc, oldrealm);
+ }
spin_unlock(&inode->i_lock);
iput(inode);
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75d02ea..110857b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len)
*/
static void ceph_put_super(struct super_block *s)
{
- struct ceph_client *cl = ceph_client(s);
+ struct ceph_client *client = ceph_sb_to_client(s);
dout("put_super\n");
- ceph_mdsc_close_sessions(&cl->mdsc);
+ ceph_mdsc_close_sessions(&client->mdsc);
+
+ /*
+ * ensure we release the bdi before put_anon_super releases
+ * the device name.
+ */
+ if (s->s_bdi == &client->backing_dev_info) {
+ bdi_unregister(&client->backing_dev_info);
+ s->s_bdi = NULL;
+ }
+
return;
}
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client)
destroy_workqueue(client->pg_inv_wq);
destroy_workqueue(client->trunc_wq);
+ bdi_destroy(&client->backing_dev_info);
+
if (client->msgr)
ceph_messenger_destroy(client->msgr);
mempool_destroy(client->wb_pagevec_pool);
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
{
int err;
- sb->s_bdi = &client->backing_dev_info;
-
/* set ra_pages based on rsize mount option? */
if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
client->backing_dev_info.ra_pages =
(client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
>> PAGE_SHIFT;
err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
+ if (!err)
+ sb->s_bdi = &client->backing_dev_info;
return err;
}
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s)
dout("kill_sb %p\n", s);
ceph_mdsc_pre_umount(&client->mdsc);
kill_anon_super(s); /* will call put_super after sb is r/o */
- if (s->s_bdi == &client->backing_dev_info)
- bdi_unregister(&client->backing_dev_info);
- bdi_destroy(&client->backing_dev_info);
ceph_destroy_client(client);
}
@@ -996,9 +1005,10 @@ static int __init init_ceph(void)
if (ret)
goto out_icache;
- pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n",
- CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH,
- CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL);
+ pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
+ CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
+ CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
+ CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
return 0;
out_icache:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e30dfbb..13513b8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,6 +10,7 @@
#include <linux/fs.h>
#include <linux/mempool.h>
#include <linux/pagemap.h>
+#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/writeback.h>
#include <linux/slab.h>
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 4797787..246a167 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -18,6 +18,8 @@
#ifndef _CIFS_FS_SB_H
#define _CIFS_FS_SB_H
+#include <linux/backing-dev.h>
+
#define CIFS_MOUNT_NO_PERM 1 /* do not do client vfs_perm check */
#define CIFS_MOUNT_SET_UID 2 /* set current's euid in create etc. */
#define CIFS_MOUNT_SERVER_INUM 4 /* inode numbers from uniqueid from server */
@@ -50,5 +52,6 @@ struct cifs_sb_info {
#ifdef CONFIG_CIFS_DFS_UPCALL
char *mountdata; /* mount options received at mount time */
#endif
+ struct backing_dev_info bdi;
};
#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ded66be..ad235d6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -103,6 +103,12 @@ cifs_read_super(struct super_block *sb, void *data,
if (cifs_sb == NULL)
return -ENOMEM;
+ rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
+ if (rc) {
+ kfree(cifs_sb);
+ return rc;
+ }
+
#ifdef CONFIG_CIFS_DFS_UPCALL
/* copy mount params to sb for use in submounts */
/* BB: should we move this after the mount so we
@@ -115,6 +121,7 @@ cifs_read_super(struct super_block *sb, void *data,
int len = strlen(data);
cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
if (cifs_sb->mountdata == NULL) {
+ bdi_destroy(&cifs_sb->bdi);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
return -ENOMEM;
@@ -135,6 +142,7 @@ cifs_read_super(struct super_block *sb, void *data,
sb->s_magic = CIFS_MAGIC_NUMBER;
sb->s_op = &cifs_super_ops;
+ sb->s_bdi = &cifs_sb->bdi;
/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
sb->s_blocksize =
cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
@@ -183,6 +191,7 @@ out_mount_failed:
}
#endif
unload_nls(cifs_sb->local_nls);
+ bdi_destroy(&cifs_sb->bdi);
kfree(cifs_sb);
}
return rc;
@@ -214,6 +223,7 @@ cifs_put_super(struct super_block *sb)
#endif
unload_nls(cifs_sb->local_nls);
+ bdi_destroy(&cifs_sb->bdi);
kfree(cifs_sb);
unlock_kernel();
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index a1695dc..d97f993 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -167,6 +167,10 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
return -EBUSY;
}
+ error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY);
+ if (error)
+ goto bdi_err;
+
vc->vc_sb = sb;
sb->s_fs_info = vc;
@@ -175,6 +179,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
sb->s_blocksize_bits = 12;
sb->s_magic = CODA_SUPER_MAGIC;
sb->s_op = &coda_super_operations;
+ sb->s_bdi = &vc->bdi;
/* get root fid from Venus: this needs the root inode */
error = venus_rootfid(sb, &fid);
@@ -200,6 +205,8 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
return 0;
error:
+ bdi_destroy(&vc->bdi);
+ bdi_err:
if (root)
iput(root);
if (vc)
@@ -210,6 +217,7 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
static void coda_put_super(struct super_block *sb)
{
+ bdi_destroy(&coda_vcp(sb)->bdi);
coda_vcp(sb)->vc_sb = NULL;
sb->s_fs_info = NULL;
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03..0544873 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
if (retval < 0)
goto out;
- current->stack_start = current->mm->start_stack;
-
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c32a1b6..641640d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -102,7 +102,6 @@
#include <linux/nbd.h>
#include <linux/random.h>
#include <linux/filter.h>
-#include <linux/pktcdvd.h>
#include <linux/hiddev.h>
@@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
COMPATIBLE_IOCTL(PPGETPHASE)
COMPATIBLE_IOCTL(PPGETFLAGS)
COMPATIBLE_IOCTL(PPSETFLAGS)
-/* pktcdvd */
-COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
/* Big A */
/* sparc only */
/* Big Q for sound/OSS */
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index bc71154..bfc2e0f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -35,6 +35,7 @@
#include <linux/scatterlist.h>
#include <linux/hash.h>
#include <linux/nsproxy.h>
+#include <linux/backing-dev.h>
/* Version verification for shared data structures w/ userspace */
#define ECRYPTFS_VERSION_MAJOR 0x00
@@ -393,6 +394,7 @@ struct ecryptfs_mount_crypt_stat {
struct ecryptfs_sb_info {
struct super_block *wsi_sb;
struct ecryptfs_mount_crypt_stat mount_crypt_stat;
+ struct backing_dev_info bdi;
};
/* file private data. */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index af1a8f0..760983d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -497,17 +497,25 @@ struct kmem_cache *ecryptfs_sb_info_cache;
static int
ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
{
+ struct ecryptfs_sb_info *esi;
int rc = 0;
/* Released in ecryptfs_put_super() */
ecryptfs_set_superblock_private(sb,
kmem_cache_zalloc(ecryptfs_sb_info_cache,
GFP_KERNEL));
- if (!ecryptfs_superblock_to_private(sb)) {
+ esi = ecryptfs_superblock_to_private(sb);
+ if (!esi) {
ecryptfs_printk(KERN_WARNING, "Out of memory\n");
rc = -ENOMEM;
goto out;
}
+
+ rc = bdi_setup_and_register(&esi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
+ if (rc)
+ goto out;
+
+ sb->s_bdi = &esi->bdi;
sb->s_op = &ecryptfs_sops;
/* Released through deactivate_super(sb) from get_sb_nodev */
sb->s_root = d_alloc(NULL, &(const struct qstr) {
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 278743c..0c0ae49 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -122,6 +122,7 @@ static void ecryptfs_put_super(struct super_block *sb)
lock_kernel();
ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
+ bdi_destroy(&sb_info->bdi);
kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
ecryptfs_set_superblock_private(sb, NULL);
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa1..e6e94c6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
if (retval < 0)
goto out;
- current->stack_start = current->mm->start_stack;
-
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 8442e35..22721b2 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -35,6 +35,7 @@
#include <linux/fs.h>
#include <linux/time.h>
+#include <linux/backing-dev.h>
#include "common.h"
/* FIXME: Remove once pnfs hits mainline
@@ -84,6 +85,7 @@ struct exofs_sb_info {
u32 s_next_generation; /* next gen # to use */
atomic_t s_curr_pending; /* number of pending commands */
uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
+ struct backing_dev_info bdi; /* register our bdi with VFS */
struct pnfs_osd_data_map data_map; /* Default raid to use
* FIXME: Needed ?
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 18e57ea..03149b9a 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -302,6 +302,7 @@ static void exofs_put_super(struct super_block *sb)
_exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
sbi->layout.s_pid);
+ bdi_destroy(&sbi->bdi);
exofs_free_sbi(sbi);
sb->s_fs_info = NULL;
}
@@ -546,6 +547,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi)
return -ENOMEM;
+ ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
+ if (ret)
+ goto free_bdi;
+
/* use mount options to fill superblock */
od = osduld_path_lookup(opts->dev_name);
if (IS_ERR(od)) {
@@ -612,6 +617,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
}
/* set up operation vectors */
+ sb->s_bdi = &sbi->bdi;
sb->s_fs_info = sbi;
sb->s_op = &exofs_sops;
sb->s_export_op = &exofs_export_ops;
@@ -643,6 +649,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
return 0;
free_sbi:
+ bdi_destroy(&sbi->bdi);
+free_bdi:
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
opts->dev_name, sbi->layout.s_pid, ret);
exofs_free_sbi(sbi);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 94c8ee8..236b834 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
physical += offset;
length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
flags |= FIEMAP_EXTENT_DATA_INLINE;
+ brelse(iloc.bh);
} else { /* external block */
physical = EXT4_I(inode)->i_file_acl << blockbits;
length = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5381802..81d6054 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5375,7 +5375,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
} else {
struct ext4_iloc iloc;
- err = ext4_get_inode_loc(inode, &iloc);
+ err = __ext4_get_inode_loc(inode, &iloc, 0);
if (err)
return err;
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -5386,6 +5386,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
(unsigned long long)iloc.bh->b_blocknr);
err = -EIO;
}
+ brelse(iloc.bh);
}
return err;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bde9d0b..b423a36 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2535,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->count, entry->group, entry);
+ if (test_opt(sb, DISCARD)) {
+ ext4_fsblk_t discard_block;
+
+ discard_block = entry->start_blk +
+ ext4_group_first_block_no(sb, entry->group);
+ trace_ext4_discard_blocks(sb,
+ (unsigned long long)discard_block,
+ entry->count);
+ sb_issue_discard(sb, discard_block, entry->count);
+ }
+
err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */
BUG_ON(err != 0);
@@ -2556,16 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
page_cache_release(e4b.bd_bitmap_page);
}
ext4_unlock_group(sb, entry->group);
- if (test_opt(sb, DISCARD)) {
- ext4_fsblk_t discard_block;
-
- discard_block = entry->start_blk +
- ext4_group_first_block_no(sb, entry->group);
- trace_ext4_discard_blocks(sb,
- (unsigned long long)discard_block,
- entry->count);
- sb_issue_discard(sb, discard_block, entry->count);
- }
kmem_cache_free(ext4_free_ext_cachep, entry);
ext4_mb_release_desc(&e4b);
}
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6c75110..7faefb4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -228,14 +228,23 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
#ifdef CONFIG_BLOCK
-#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
-#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+ return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+ return (blk << inode->i_blkbits);
+}
/**
* __generic_block_fiemap - FIEMAP for block based inodes (no locking)
- * @inode - the inode to map
- * @arg - the pointer to userspace where we copy everything to
- * @get_block - the fs's get_block function
+ * @inode: the inode to map
+ * @fieinfo: the fiemap info struct that will be passed back to userspace
+ * @start: where to start mapping in the inode
+ * @len: how much space to map
+ * @get_block: the fs's get_block function
*
* This does FIEMAP for block based inodes. Basically it will just loop
* through get_block until we hit the number of extents we want to map, or we
@@ -250,58 +259,63 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
*/
int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block)
+ struct fiemap_extent_info *fieinfo, loff_t start,
+ loff_t len, get_block_t *get_block)
{
- struct buffer_head tmp;
- unsigned long long start_blk;
- long long length = 0, map_len = 0;
+ struct buffer_head map_bh;
+ sector_t start_blk, last_blk;
+ loff_t isize = i_size_read(inode);
u64 logical = 0, phys = 0, size = 0;
u32 flags = FIEMAP_EXTENT_MERGED;
- int ret = 0, past_eof = 0, whole_file = 0;
+ bool past_eof = false, whole_file = false;
+ int ret = 0;
- if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
+ ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ if (ret)
return ret;
- start_blk = logical_to_blk(inode, start);
-
- length = (long long)min_t(u64, len, i_size_read(inode));
- if (length < len)
- whole_file = 1;
+ /*
+ * Either the i_mutex or other appropriate locking needs to be held
+ * since we expect isize to not change at all through the duration of
+ * this call.
+ */
+ if (len >= isize) {
+ whole_file = true;
+ len = isize;
+ }
- map_len = length;
+ start_blk = logical_to_blk(inode, start);
+ last_blk = logical_to_blk(inode, start + len - 1);
do {
/*
* we set b_size to the total size we want so it will map as
* many contiguous blocks as possible at once
*/
- memset(&tmp, 0, sizeof(struct buffer_head));
- tmp.b_size = map_len;
+ memset(&map_bh, 0, sizeof(struct buffer_head));
+ map_bh.b_size = len;
- ret = get_block(inode, start_blk, &tmp, 0);
+ ret = get_block(inode, start_blk, &map_bh, 0);
if (ret)
break;
/* HOLE */
- if (!buffer_mapped(&tmp)) {
- length -= blk_to_logical(inode, 1);
+ if (!buffer_mapped(&map_bh)) {
start_blk++;
/*
- * we want to handle the case where there is an
+ * We want to handle the case where there is an
* allocated block at the front of the file, and then
* nothing but holes up to the end of the file properly,
* to make sure that extent at the front gets properly
* marked with FIEMAP_EXTENT_LAST
*/
if (!past_eof &&
- blk_to_logical(inode, start_blk) >=
- blk_to_logical(inode, 0)+i_size_read(inode))
+ blk_to_logical(inode, start_blk) >= isize)
past_eof = 1;
/*
- * first hole after going past the EOF, this is our
+ * First hole after going past the EOF, this is our
* last extent
*/
if (past_eof && size) {
@@ -309,15 +323,18 @@ int __generic_block_fiemap(struct inode *inode,
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size,
flags);
- break;
+ } else if (size) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ size = 0;
}
/* if we have holes up to/past EOF then we're done */
- if (length <= 0 || past_eof)
+ if (start_blk > last_blk || past_eof || ret)
break;
} else {
/*
- * we have gone over the length of what we wanted to
+ * We have gone over the length of what we wanted to
* map, and it wasn't the entire file, so add the extent
* we got last time and exit.
*
@@ -331,7 +348,7 @@ int __generic_block_fiemap(struct inode *inode,
* are good to go, just add the extent to the fieinfo
* and break
*/
- if (length <= 0 && !whole_file) {
+ if (start_blk > last_blk && !whole_file) {
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size,
flags);
@@ -351,11 +368,10 @@ int __generic_block_fiemap(struct inode *inode,
}
logical = blk_to_logical(inode, start_blk);
- phys = blk_to_logical(inode, tmp.b_blocknr);
- size = tmp.b_size;
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = map_bh.b_size;
flags = FIEMAP_EXTENT_MERGED;
- length -= tmp.b_size;
start_blk += logical_to_blk(inode, size);
/*
@@ -363,15 +379,13 @@ int __generic_block_fiemap(struct inode *inode,
* soon as we find a hole that the last extent we found
* is marked with FIEMAP_EXTENT_LAST
*/
- if (!past_eof &&
- logical+size >=
- blk_to_logical(inode, 0)+i_size_read(inode))
- past_eof = 1;
+ if (!past_eof && logical + size >= isize)
+ past_eof = true;
}
cond_resched();
} while (1);
- /* if ret is 1 then we just hit the end of the extent array */
+ /* If ret is 1 then we just hit the end of the extent array */
if (ret == 1)
ret = 0;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 9dd1262..ed9ba6f 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -61,7 +61,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &page_symlink_inode_operations;
inode->i_mapping->a_ops = &jfs_aops;
} else {
- inode->i_op = &jfs_symlink_inode_operations;
+ inode->i_op = &jfs_fast_symlink_inode_operations;
/*
* The inline data should be null-terminated, but
* don't let on-disk corruption crash the kernel
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 6c4dfcbf..9e2f6a7 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -196,7 +196,7 @@ int dbMount(struct inode *ipbmap)
bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
- bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth);
+ bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
@@ -288,7 +288,7 @@ int dbSync(struct inode *ipbmap)
dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
- dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth);
+ dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
@@ -1441,7 +1441,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
* tree index of this allocation group within the control page.
*/
agperlev =
- (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth;
+ (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
/* dmap control page trees fan-out by 4 and a single allocation
@@ -1460,7 +1460,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
* the subtree to find the leftmost leaf that describes this
* free space.
*/
- for (k = bmp->db_agheigth; k > 0; k--) {
+ for (k = bmp->db_agheight; k > 0; k--) {
for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
if (l2nb <= dcp->stree[m + n]) {
ti = m + n;
@@ -3607,7 +3607,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
}
/*
- * compute db_aglevel, db_agheigth, db_width, db_agstart:
+ * compute db_aglevel, db_agheight, db_width, db_agstart:
* an ag is covered in aglevel dmapctl summary tree,
* at agheight level height (from leaf) with agwidth number of nodes
* each, which starts at agstart index node of the smmary tree node
@@ -3616,9 +3616,9 @@ void dbFinalizeBmap(struct inode *ipbmap)
bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
l2nl =
bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
- bmp->db_agheigth = l2nl >> 1;
- bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1));
- for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0;
+ bmp->db_agheight = l2nl >> 1;
+ bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
+ for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
i--) {
bmp->db_agstart += n;
n <<= 2;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 1a6eb41..6dcb906 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -210,7 +210,7 @@ struct dbmap_disk {
__le32 dn_maxag; /* 4: max active alloc group number */
__le32 dn_agpref; /* 4: preferred alloc group (hint) */
__le32 dn_aglevel; /* 4: dmapctl level holding the AG */
- __le32 dn_agheigth; /* 4: height in dmapctl of the AG */
+ __le32 dn_agheight; /* 4: height in dmapctl of the AG */
__le32 dn_agwidth; /* 4: width in dmapctl of the AG */
__le32 dn_agstart; /* 4: start tree index at AG height */
__le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
@@ -229,7 +229,7 @@ struct dbmap {
int dn_maxag; /* max active alloc group number */
int dn_agpref; /* preferred alloc group (hint) */
int dn_aglevel; /* dmapctl level holding the AG */
- int dn_agheigth; /* height in dmapctl of the AG */
+ int dn_agheight; /* height in dmapctl of the AG */
int dn_agwidth; /* width in dmapctl of the AG */
int dn_agstart; /* start tree index at AG height */
int dn_agl2size; /* l2 num of blks per alloc group */
@@ -255,7 +255,7 @@ struct bmap {
#define db_agsize db_bmap.dn_agsize
#define db_agl2size db_bmap.dn_agl2size
#define db_agwidth db_bmap.dn_agwidth
-#define db_agheigth db_bmap.dn_agheigth
+#define db_agheight db_bmap.dn_agheight
#define db_agstart db_bmap.dn_agstart
#define db_numag db_bmap.dn_numag
#define db_maxlevel db_bmap.dn_maxlevel
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 79e2c79..9e6bda3 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -48,5 +48,6 @@ extern const struct file_operations jfs_dir_operations;
extern const struct inode_operations jfs_file_inode_operations;
extern const struct file_operations jfs_file_operations;
extern const struct inode_operations jfs_symlink_inode_operations;
+extern const struct inode_operations jfs_fast_symlink_inode_operations;
extern const struct dentry_operations jfs_ci_dentry_operations;
#endif /* _H_JFS_INODE */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 4a3e9f3..a9cf8e8 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -956,7 +956,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
*/
if (ssize <= IDATASIZE) {
- ip->i_op = &jfs_symlink_inode_operations;
+ ip->i_op = &jfs_fast_symlink_inode_operations;
i_fastsymlink = JFS_IP(ip)->i_inline;
memcpy(i_fastsymlink, name, ssize);
@@ -978,7 +978,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
else {
jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
- ip->i_op = &page_symlink_inode_operations;
+ ip->i_op = &jfs_symlink_inode_operations;
ip->i_mapping->a_ops = &jfs_aops;
/*
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 7f24a0b..1aba003 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -81,6 +81,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
struct inode *iplist[1];
struct jfs_superblock *j_sb, *j_sb2;
uint old_agsize;
+ int agsizechanged = 0;
struct buffer_head *bh, *bh2;
/* If the volume hasn't grown, get out now */
@@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
*/
if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
goto error_out;
+
+ agsizechanged |= (bmp->db_agsize != old_agsize);
+
/*
* the map now has extended to cover additional nblocks:
* dn_mapsize = oldMapsize + nblocks;
@@ -432,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
* will correctly identify the new ag);
*/
/* if new AG size the same as old AG size, done! */
- if (bmp->db_agsize != old_agsize) {
+ if (agsizechanged) {
if ((rc = diExtendFS(ipimap, ipbmap)))
goto error_out;
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 4af1a05..205b946 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-const struct inode_operations jfs_symlink_inode_operations = {
+const struct inode_operations jfs_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = jfs_follow_link,
+ .setattr = jfs_setattr,
+ .setxattr = jfs_setxattr,
+ .getxattr = jfs_getxattr,
+ .listxattr = jfs_listxattr,
+ .removexattr = jfs_removexattr,
+};
+
+const struct inode_operations jfs_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = page_follow_link_light,
+ .put_link = page_put_link,
+ .setattr = jfs_setattr,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
.listxattr = jfs_listxattr,
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 84e36f5..76c242f 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -459,6 +459,14 @@ static void __logfs_gc_pass(struct super_block *sb, int target)
struct logfs_block *block;
int round, progress, last_progress = 0;
+ /*
+ * Doing too many changes to the segfile at once would result
+ * in a large number of aliases. Write the journal before
+ * things get out of hand.
+ */
+ if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
+ logfs_write_anchor(sb);
+
if (no_free_segments(sb) >= target &&
super->s_no_object_aliases < MAX_OBJ_ALIASES)
return;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 33bd260..fb0a613 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -389,7 +389,10 @@ static void journal_get_erase_count(struct logfs_area *area)
static int journal_erase_segment(struct logfs_area *area)
{
struct super_block *sb = area->a_sb;
- struct logfs_segment_header sh;
+ union {
+ struct logfs_segment_header sh;
+ unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)];
+ } u;
u64 ofs;
int err;
@@ -397,20 +400,21 @@ static int journal_erase_segment(struct logfs_area *area)
if (err)
return err;
- sh.pad = 0;
- sh.type = SEG_JOURNAL;
- sh.level = 0;
- sh.segno = cpu_to_be32(area->a_segno);
- sh.ec = cpu_to_be32(area->a_erase_count);
- sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
- sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
+ memset(&u, 0, sizeof(u));
+ u.sh.pad = 0;
+ u.sh.type = SEG_JOURNAL;
+ u.sh.level = 0;
+ u.sh.segno = cpu_to_be32(area->a_segno);
+ u.sh.ec = cpu_to_be32(area->a_erase_count);
+ u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
+ u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4);
/* This causes a bug in segment.c. Not yet. */
//logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
ofs = dev_ofs(sb, area->a_segno, 0);
- area->a_used_bytes = ALIGN(sizeof(sh), 16);
- logfs_buf_write(area, ofs, &sh, sizeof(sh));
+ area->a_used_bytes = sizeof(u);
+ logfs_buf_write(area, ofs, &u, sizeof(u));
return 0;
}
@@ -494,6 +498,8 @@ static void account_shadows(struct super_block *sb)
btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
+ btree_grim_visitor32(&tree->segment_map, 0, NULL);
+ tree->no_shadowed_segments = 0;
if (li->li_block) {
/*
@@ -607,9 +613,9 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
if (len == 0)
return logfs_write_header(super, header, 0, type);
+ BUG_ON(len > sb->s_blocksize);
compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
if (compr_len < 0 || type == JE_ANCHOR) {
- BUG_ON(len > sb->s_blocksize);
memcpy(data, buf, len);
compr_len = len;
compr = COMPR_NONE;
@@ -661,6 +667,7 @@ static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
if (ofs < 0)
return ofs;
logfs_buf_write(area, ofs, super->s_compressed_je, len);
+ BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES);
super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
return 0;
}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b84b0ee..0a3df1a 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -257,10 +257,14 @@ struct logfs_shadow {
* struct shadow_tree
* @new: shadows where old_ofs==0, indexed by new_ofs
* @old: shadows where old_ofs!=0, indexed by old_ofs
+ * @segment_map: bitfield of segments containing shadows
+ * @no_shadowed_segment: number of segments containing shadows
*/
struct shadow_tree {
struct btree_head64 new;
struct btree_head64 old;
+ struct btree_head32 segment_map;
+ int no_shadowed_segments;
};
struct object_alias_item {
@@ -305,13 +309,14 @@ typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
level_t level, int child_no, __be64 val);
struct logfs_block_ops {
void (*write_block)(struct logfs_block *block);
- gc_level_t (*block_level)(struct logfs_block *block);
void (*free_block)(struct super_block *sb, struct logfs_block*block);
int (*write_alias)(struct super_block *sb,
struct logfs_block *block,
write_alias_t *write_one_alias);
};
+#define MAX_JOURNAL_ENTRIES 256
+
struct logfs_super {
struct mtd_info *s_mtd; /* underlying device */
struct block_device *s_bdev; /* underlying device */
@@ -378,7 +383,7 @@ struct logfs_super {
u32 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
u64 s_last_version;
struct logfs_area *s_journal_area; /* open journal segment */
- __be64 s_je_array[64];
+ __be64 s_je_array[MAX_JOURNAL_ENTRIES];
int s_no_je;
int s_sum_index; /* for the 12 summaries */
@@ -722,4 +727,10 @@ static inline struct logfs_area *get_area(struct super_block *sb,
return logfs_super(sb)->s_area[(__force u8)gc_level];
}
+static inline void logfs_mempool_destroy(mempool_t *pool)
+{
+ if (pool)
+ mempool_destroy(pool);
+}
+
#endif
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index bff4025..3159db6 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -430,25 +430,6 @@ static void inode_write_block(struct logfs_block *block)
}
}
-static gc_level_t inode_block_level(struct logfs_block *block)
-{
- BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
- return GC_LEVEL(LOGFS_MAX_LEVELS);
-}
-
-static gc_level_t indirect_block_level(struct logfs_block *block)
-{
- struct page *page;
- struct inode *inode;
- u64 bix;
- level_t level;
-
- page = block->page;
- inode = page->mapping->host;
- logfs_unpack_index(page->index, &bix, &level);
- return expand_level(inode->i_ino, level);
-}
-
/*
* This silences a false, yet annoying gcc warning. I hate it when my editor
* jumps into bitops.h each time I recompile this file.
@@ -587,14 +568,12 @@ static void indirect_free_block(struct super_block *sb,
static struct logfs_block_ops inode_block_ops = {
.write_block = inode_write_block,
- .block_level = inode_block_level,
.free_block = inode_free_block,
.write_alias = inode_write_alias,
};
struct logfs_block_ops indirect_block_ops = {
.write_block = indirect_write_block,
- .block_level = indirect_block_level,
.free_block = indirect_free_block,
.write_alias = indirect_write_alias,
};
@@ -1241,6 +1220,18 @@ static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
mempool_free(shadow, super->s_shadow_pool);
}
+static void mark_segment(struct shadow_tree *tree, u32 segno)
+{
+ int err;
+
+ if (!btree_lookup32(&tree->segment_map, segno)) {
+ err = btree_insert32(&tree->segment_map, segno, (void *)1,
+ GFP_NOFS);
+ BUG_ON(err);
+ tree->no_shadowed_segments++;
+ }
+}
+
/**
* fill_shadow_tree - Propagate shadow tree changes due to a write
* @inode: Inode owning the page
@@ -1288,6 +1279,8 @@ static void fill_shadow_tree(struct inode *inode, struct page *page,
super->s_dirty_used_bytes += shadow->new_len;
super->s_dirty_free_bytes += shadow->old_len;
+ mark_segment(tree, shadow->old_ofs >> super->s_segshift);
+ mark_segment(tree, shadow->new_ofs >> super->s_segshift);
}
}
@@ -1845,19 +1838,37 @@ static int __logfs_truncate(struct inode *inode, u64 size)
return logfs_truncate_direct(inode, size);
}
-int logfs_truncate(struct inode *inode, u64 size)
+/*
+ * Truncate, by changing the segment file, can consume a fair amount
+ * of resources. So back off from time to time and do some GC.
+ * 8 or 2048 blocks should be well within safety limits even if
+ * every single block resided in a different segment.
+ */
+#define TRUNCATE_STEP (8 * 1024 * 1024)
+int logfs_truncate(struct inode *inode, u64 target)
{
struct super_block *sb = inode->i_sb;
- int err;
+ u64 size = i_size_read(inode);
+ int err = 0;
- logfs_get_wblocks(sb, NULL, 1);
- err = __logfs_truncate(inode, size);
- if (!err)
- err = __logfs_write_inode(inode, 0);
- logfs_put_wblocks(sb, NULL, 1);
+ size = ALIGN(size, TRUNCATE_STEP);
+ while (size > target) {
+ if (size > TRUNCATE_STEP)
+ size -= TRUNCATE_STEP;
+ else
+ size = 0;
+ if (size < target)
+ size = target;
+
+ logfs_get_wblocks(sb, NULL, 1);
+ err = __logfs_truncate(inode, target);
+ if (!err)
+ err = __logfs_write_inode(inode, 0);
+ logfs_put_wblocks(sb, NULL, 1);
+ }
if (!err)
- err = vmtruncate(inode, size);
+ err = vmtruncate(inode, target);
/* I don't trust error recovery yet. */
WARN_ON(err);
@@ -2251,8 +2262,6 @@ void logfs_cleanup_rw(struct super_block *sb)
struct logfs_super *super = logfs_super(sb);
destroy_meta_inode(super->s_segfile_inode);
- if (super->s_block_pool)
- mempool_destroy(super->s_block_pool);
- if (super->s_shadow_pool)
- mempool_destroy(super->s_shadow_pool);
+ logfs_mempool_destroy(super->s_block_pool);
+ logfs_mempool_destroy(super->s_shadow_pool);
}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 801a3a1..f77ce2b 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -183,14 +183,8 @@ static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
return 0;
}
-static gc_level_t btree_block_level(struct logfs_block *block)
-{
- return expand_level(block->ino, block->level);
-}
-
static struct logfs_block_ops btree_block_ops = {
.write_block = btree_write_block,
- .block_level = btree_block_level,
.free_block = __free_block,
.write_alias = btree_write_alias,
};
@@ -919,7 +913,7 @@ err:
for (i--; i >= 0; i--)
free_area(super->s_area[i]);
free_area(super->s_journal_area);
- mempool_destroy(super->s_alias_pool);
+ logfs_mempool_destroy(super->s_alias_pool);
return -ENOMEM;
}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index b60bfac..5866ee6 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -12,6 +12,7 @@
#include "logfs.h"
#include <linux/bio.h>
#include <linux/slab.h>
+#include <linux/blkdev.h>
#include <linux/mtd/mtd.h>
#include <linux/statfs.h>
#include <linux/buffer_head.h>
@@ -137,6 +138,10 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
sb->s_fs_info = super;
sb->s_mtd = super->s_mtd;
sb->s_bdev = super->s_bdev;
+ if (sb->s_bdev)
+ sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
+ if (sb->s_mtd)
+ sb->s_bdi = sb->s_mtd->backing_dev_info;
return 0;
}
@@ -452,6 +457,8 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
+ btree_init_mempool32(&super->s_shadow_tree.segment_map,
+ super->s_btree_pool);
ret = logfs_init_mapping(sb);
if (ret)
@@ -516,8 +523,8 @@ static void logfs_kill_sb(struct super_block *sb)
if (super->s_erase_page)
__free_page(super->s_erase_page);
super->s_devops->put_device(sb);
- mempool_destroy(super->s_btree_pool);
- mempool_destroy(super->s_alias_pool);
+ logfs_mempool_destroy(super->s_btree_pool);
+ logfs_mempool_destroy(super->s_alias_pool);
kfree(super);
log_super("LogFS: Finished unmounting\n");
}
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91..16df7277 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (nd->last.name[nd->last.len]) {
if (open_flag & O_CREAT)
goto exit;
- nd->flags |= LOOKUP_DIRECTORY;
+ nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
}
/* just plain open? */
@@ -1830,6 +1830,8 @@ reval:
}
if (open_flag & O_DIRECTORY)
nd.flags |= LOOKUP_DIRECTORY;
+ if (!(open_flag & O_NOFOLLOW))
+ nd.flags |= LOOKUP_FOLLOW;
filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path holder;
@@ -1837,7 +1839,7 @@ reval:
void *cookie;
error = -ELOOP;
/* S_ISDIR part is a temporary automount kludge */
- if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
+ if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
goto exit_dput;
if (count++ == 32)
goto exit_dput;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index cf98da1..fa33851 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -526,10 +526,15 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
sb->s_blocksize_bits = 10;
sb->s_magic = NCP_SUPER_MAGIC;
sb->s_op = &ncp_sops;
+ sb->s_bdi = &server->bdi;
server = NCP_SBP(sb);
memset(server, 0, sizeof(*server));
+ error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
+ if (error)
+ goto out_bdi;
+
server->ncp_filp = ncp_filp;
server->ncp_sock = sock;
@@ -719,6 +724,8 @@ out_fput2:
if (server->info_filp)
fput(server->info_filp);
out_fput:
+ bdi_destroy(&server->bdi);
+out_bdi:
/* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
*
* The previously used put_filp(ncp_filp); was bogous, since
@@ -756,6 +763,7 @@ static void ncp_put_super(struct super_block *sb)
kill_pid(server->m.wdog_pid, SIGTERM, 1);
put_pid(server->m.wdog_pid);
+ bdi_destroy(&server->bdi);
kfree(server->priv.data);
kfree(server->auth.object_name);
vfree(server->rxbuf);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a8766c4..acc9c49 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -966,6 +966,8 @@ out_error:
static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
{
target->flags = source->flags;
+ target->rsize = source->rsize;
+ target->wsize = source->wsize;
target->acregmin = source->acregmin;
target->acregmax = source->acregmax;
target->acdirmin = source->acdirmin;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 1567124..ea61d26 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -24,6 +24,8 @@
static void nfs_do_free_delegation(struct nfs_delegation *delegation)
{
+ if (delegation->cred)
+ put_rpccred(delegation->cred);
kfree(delegation);
}
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
static void nfs_free_delegation(struct nfs_delegation *delegation)
{
- struct rpc_cred *cred;
-
- cred = rcu_dereference(delegation->cred);
- rcu_assign_pointer(delegation->cred, NULL);
call_rcu(&delegation->rcu, nfs_free_delegation_callback);
- if (cred)
- put_rpccred(cred);
}
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
@@ -129,21 +125,35 @@ again:
*/
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
{
- struct nfs_delegation *delegation = NFS_I(inode)->delegation;
- struct rpc_cred *oldcred;
+ struct nfs_delegation *delegation;
+ struct rpc_cred *oldcred = NULL;
- if (delegation == NULL)
- return;
- memcpy(delegation->stateid.data, res->delegation.data,
- sizeof(delegation->stateid.data));
- delegation->type = res->delegation_type;
- delegation->maxsize = res->maxsize;
- oldcred = delegation->cred;
- delegation->cred = get_rpccred(cred);
- clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
- NFS_I(inode)->delegation_state = delegation->type;
- smp_wmb();
- put_rpccred(oldcred);
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL) {
+ spin_lock(&delegation->lock);
+ if (delegation->inode != NULL) {
+ memcpy(delegation->stateid.data, res->delegation.data,
+ sizeof(delegation->stateid.data));
+ delegation->type = res->delegation_type;
+ delegation->maxsize = res->maxsize;
+ oldcred = delegation->cred;
+ delegation->cred = get_rpccred(cred);
+ clear_bit(NFS_DELEGATION_NEED_RECLAIM,
+ &delegation->flags);
+ NFS_I(inode)->delegation_state = delegation->type;
+ spin_unlock(&delegation->lock);
+ put_rpccred(oldcred);
+ rcu_read_unlock();
+ } else {
+ /* We appear to have raced with a delegation return. */
+ spin_unlock(&delegation->lock);
+ rcu_read_unlock();
+ nfs_inode_set_delegation(inode, cred, res);
+ }
+ } else {
+ rcu_read_unlock();
+ }
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
return inode;
}
-static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
+ const nfs4_stateid *stateid,
+ struct nfs_client *clp)
{
- struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+ struct nfs_delegation *delegation =
+ rcu_dereference_protected(nfsi->delegation,
+ lockdep_is_held(&clp->cl_lock));
if (delegation == NULL)
goto nomatch;
@@ -195,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
- struct nfs_delegation *delegation;
+ struct nfs_delegation *delegation, *old_delegation;
struct nfs_delegation *freeme = NULL;
int status = 0;
@@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
spin_lock_init(&delegation->lock);
spin_lock(&clp->cl_lock);
- if (rcu_dereference(nfsi->delegation) != NULL) {
- if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
- sizeof(delegation->stateid)) == 0 &&
- delegation->type == nfsi->delegation->type) {
+ old_delegation = rcu_dereference_protected(nfsi->delegation,
+ lockdep_is_held(&clp->cl_lock));
+ if (old_delegation != NULL) {
+ if (memcmp(&delegation->stateid, &old_delegation->stateid,
+ sizeof(old_delegation->stateid)) == 0 &&
+ delegation->type == old_delegation->type) {
goto out;
}
/*
@@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
dfprintk(FILE, "%s: server %s handed out "
"a duplicate delegation!\n",
__func__, clp->cl_hostname);
- if (delegation->type <= nfsi->delegation->type) {
+ if (delegation->type <= old_delegation->type) {
freeme = delegation;
delegation = NULL;
goto out;
}
- freeme = nfs_detach_delegation_locked(nfsi, NULL);
+ freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
}
list_add_rcu(&delegation->super_list, &clp->cl_delegations);
nfsi->delegation_state = delegation->type;
@@ -301,7 +317,7 @@ restart:
if (inode == NULL)
continue;
spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+ delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
spin_unlock(&clp->cl_lock);
rcu_read_unlock();
if (delegation != NULL) {
@@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
- if (rcu_dereference(nfsi->delegation) != NULL) {
+ if (rcu_access_pointer(nfsi->delegation) != NULL) {
spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(nfsi, NULL);
+ delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
nfs_do_return_delegation(inode, delegation, 0);
@@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode)
struct nfs_delegation *delegation;
int err = 0;
- if (rcu_dereference(nfsi->delegation) != NULL) {
+ if (rcu_access_pointer(nfsi->delegation) != NULL) {
spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(nfsi, NULL);
+ delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
spin_unlock(&clp->cl_lock);
if (delegation != NULL) {
nfs_msync_inode(inode);
@@ -526,7 +542,7 @@ restart:
if (inode == NULL)
continue;
spin_lock(&clp->cl_lock);
- delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+ delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
spin_unlock(&clp->cl_lock);
rcu_read_unlock();
if (delegation != NULL)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index be46f26..a7bb5c6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -837,6 +837,8 @@ out_zap_parent:
/* If we have submounts, don't unhash ! */
if (have_submounts(dentry))
goto out_valid;
+ if (dentry->d_flags & DCACHE_DISCONNECTED)
+ goto out_valid;
shrink_dcache_parent(dentry);
}
d_drop(dentry);
@@ -1050,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
struct inode *dir;
int openflags, ret = 0;
- if (!is_atomic_open(nd))
+ if (!is_atomic_open(nd) || d_mountpoint(dentry))
goto no_open;
parent = dget_parent(dentry);
dir = parent->d_inode;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6380670..071fced 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5218,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
msg.rpc_resp = &calldata->res;
task_setup_data.callback_data = calldata;
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
+ if (IS_ERR(task)) {
status = PTR_ERR(task);
+ goto out;
+ }
rpc_put_task(task);
+ return 0;
out:
dprintk("<-- %s status=%d\n", __func__, status);
return status;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e016372..b4148fc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2187,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
if (data->version == 4) {
error = nfs4_try_mount(flags, dev_name, data, mnt);
kfree(data->client_address);
+ kfree(data->nfs_server.export_path);
goto out;
}
#endif /* CONFIG_NFS_V4 */
@@ -2657,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
devname = nfs_path(path->mnt->mnt_devname,
path->mnt->mnt_root, path->dentry,
page, PAGE_SIZE);
- if (devname == NULL)
+ if (IS_ERR(devname))
goto out_freepage;
tmp = kstrdup(devname, GFP_KERNEL);
if (tmp == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de38d63..3aea3ca 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1201,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+{
+ if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
+ return 1;
+ if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
+ NFS_INO_COMMIT, nfs_wait_bit_killable,
+ TASK_KILLABLE))
+ return 1;
+ return 0;
+}
+
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+{
+ clear_bit(NFS_INO_COMMIT, &nfsi->flags);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+}
+
+
static void nfs_commitdata_release(void *data)
{
struct nfs_write_data *wdata = data;
@@ -1262,8 +1281,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
- if (how & FLUSH_SYNC)
- rpc_wait_for_completion_task(task);
rpc_put_task(task);
return 0;
}
@@ -1294,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
BDI_RECLAIMABLE);
nfs_clear_page_tag_locked(req);
}
+ nfs_commit_clear_lock(NFS_I(inode));
return -ENOMEM;
}
@@ -1349,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
next:
nfs_clear_page_tag_locked(req);
}
+ nfs_commit_clear_lock(NFS_I(data->inode));
nfs_commitdata_release(calldata);
}
@@ -1363,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
static int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
- int res;
+ int may_wait = how & FLUSH_SYNC;
+ int res = 0;
+ if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+ goto out;
spin_lock(&inode->i_lock);
res = nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&inode->i_lock);
@@ -1372,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
int error = nfs_commit_list(inode, &head, how);
if (error < 0)
return error;
- }
+ if (may_wait)
+ wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+ nfs_wait_bit_killable,
+ TASK_KILLABLE);
+ } else
+ nfs_commit_clear_lock(NFS_I(inode));
+out:
return res;
}
@@ -1444,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
BUG_ON(!PageLocked(page));
for (;;) {
+ wait_on_page_writeback(page);
req = nfs_page_find_request(page);
if (req == NULL)
break;
@@ -1478,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page)
.range_start = range_start,
.range_end = range_end,
};
- struct nfs_page *req;
- int need_commit;
int ret;
while(PagePrivate(page)) {
+ wait_on_page_writeback(page);
if (clear_page_dirty_for_io(page)) {
ret = nfs_writepage_locked(page, &wbc);
if (ret < 0)
goto out_error;
}
- req = nfs_find_and_lock_request(page);
- if (!req)
- break;
- if (IS_ERR(req)) {
- ret = PTR_ERR(req);
+ ret = sync_inode(inode, &wbc);
+ if (ret < 0)
goto out_error;
- }
- need_commit = test_bit(PG_CLEAN, &req->wb_flags);
- nfs_clear_page_tag_locked(req);
- if (need_commit) {
- ret = nfs_commit_inode(inode, FLUSH_SYNC);
- if (ret < 0)
- goto out_error;
- }
}
return 0;
out_error:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e170317..34ccf81 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -161,10 +161,10 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
argp->p = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen < PAGE_SIZE) {
- argp->end = p + (argp->pagelen>>2);
+ argp->end = argp->p + (argp->pagelen>>2);
argp->pagelen = 0;
} else {
- argp->end = p + (PAGE_SIZE>>2);
+ argp->end = argp->p + (PAGE_SIZE>>2);
argp->pagelen -= PAGE_SIZE;
}
memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
@@ -1426,10 +1426,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
argp->p = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen < PAGE_SIZE) {
- argp->end = p + (argp->pagelen>>2);
+ argp->end = argp->p + (argp->pagelen>>2);
argp->pagelen = 0;
} else {
- argp->end = p + (PAGE_SIZE>>2);
+ argp->end = argp->p + (PAGE_SIZE>>2);
argp->pagelen -= PAGE_SIZE;
}
}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0cdbc5e..48145f5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
sb->s_export_op = &nilfs_export_ops;
sb->s_root = NULL;
sb->s_time_gran = 1;
+ sb->s_bdi = nilfs->ns_bdi;
err = load_nilfs(nilfs, sbi);
if (err)
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 3e56dbf..b3a159b 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,6 +15,7 @@ config INOTIFY
config INOTIFY_USER
bool "Inotify support for userspace"
+ select ANON_INODES
select FSNOTIFY
default y
---help---
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index ecebb22..f9d5d3f 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -406,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
struct buffer_head *bh)
{
int ret = 0;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
mlog_entry_void();
@@ -425,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
get_bh(bh); /* for end_buffer_write_sync() */
bh->b_end_io = end_buffer_write_sync;
+ ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
submit_bh(WRITE, bh);
wait_on_buffer(bh);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index a795eb9..12d5eb7 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -184,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
BUG_ON(!lksb);
/* only updates if this node masters the lockres */
+ spin_lock(&res->spinlock);
if (res->owner == dlm->node_num) {
-
- spin_lock(&res->spinlock);
/* check the lksb flags for the direction */
if (lksb->flags & DLM_LKSB_GET_LVB) {
mlog(0, "getting lvb from lockres for %s node\n",
@@ -201,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
* here. In the future we might want to clear it at the time
* the put is actually done.
*/
- spin_unlock(&res->spinlock);
}
+ spin_unlock(&res->spinlock);
/* reset any lvb flags on the lksb */
lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1b0de15..b83d610 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
* O_RDONLY -> PRMODE level
* O_WRONLY -> EXMODE level
*
- * O_NONBLOCK -> LKM_NOQUEUE
+ * O_NONBLOCK -> NOQUEUE
*/
static int dlmfs_decode_open_flags(int open_flags,
int *level,
int *flags)
{
if (open_flags & (O_WRONLY|O_RDWR))
- *level = LKM_EXMODE;
+ *level = DLM_LOCK_EX;
else
- *level = LKM_PRMODE;
+ *level = DLM_LOCK_PR;
*flags = 0;
if (open_flags & O_NONBLOCK)
- *flags |= LKM_NOQUEUE;
+ *flags |= DLM_LKF_NOQUEUE;
return 0;
}
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode,
* to be able userspace to be able to distinguish a
* valid lock request from one that simply couldn't be
* granted. */
- if (flags & LKM_NOQUEUE && status == -EAGAIN)
+ if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
status = -ETXTBSY;
kfree(fp);
goto bail;
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode,
status = 0;
if (fp) {
level = fp->fp_lock_level;
- if (level != LKM_IVMODE)
+ if (level != DLM_LOCK_IV)
user_dlm_cluster_unlock(&ip->ip_lockres, level);
kfree(fp);
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
if ((count + *ppos) > i_size_read(inode))
readlen = i_size_read(inode) - *ppos;
else
- readlen = count - *ppos;
+ readlen = count;
lvb_buf = kmalloc(readlen, GFP_NOFS);
if (!lvb_buf)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 17947dc..a5fbd9c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -684,6 +684,7 @@ restarted_transaction:
if (why == RESTART_META) {
mlog(0, "restarting function.\n");
restart_func = 1;
+ status = 0;
} else {
BUG_ON(why != RESTART_TRANS);
@@ -1981,18 +1982,18 @@ relock:
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_rw_locked(iocb, rw_level);
- if (direct_io) {
- ret = generic_segment_checks(iov, &nr_segs, &ocount,
- VERIFY_READ);
- if (ret)
- goto out_dio;
+ ret = generic_segment_checks(iov, &nr_segs, &ocount,
+ VERIFY_READ);
+ if (ret)
+ goto out_dio;
- count = ocount;
- ret = generic_write_checks(file, ppos, &count,
- S_ISBLK(inode->i_mode));
- if (ret)
- goto out_dio;
+ count = ocount;
+ ret = generic_write_checks(file, ppos, &count,
+ S_ISBLK(inode->i_mode));
+ if (ret)
+ goto out_dio;
+ if (direct_io) {
written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
ppos, count, ocount);
if (written < 0) {
@@ -2007,7 +2008,10 @@ relock:
goto out_dio;
}
} else {
- written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
+ current->backing_dev_info = file->f_mapping->backing_dev_info;
+ written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
+ ppos, count, 0);
+ current->backing_dev_info = NULL;
}
out_dio:
@@ -2021,9 +2025,9 @@ out_dio:
if (ret < 0)
written = ret;
- if (!ret && (old_size != i_size_read(inode) ||
- old_clusters != OCFS2_I(inode)->ip_clusters ||
- has_refcount)) {
+ if (!ret && ((old_size != i_size_read(inode)) ||
+ (old_clusters != OCFS2_I(inode)->ip_clusters) ||
+ has_refcount)) {
ret = jbd2_journal_force_commit(osb->journal->j_journal);
if (ret < 0)
written = ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 07cc8bb..af18988 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -558,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
+ handle = NULL;
mlog_errno(status);
goto out;
}
@@ -639,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode,
goto bail_unlock;
}
- status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
- orphan_dir_bh);
- if (status < 0) {
- mlog_errno(status);
- goto bail_commit;
+ if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
+ status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
+ orphan_dir_bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail_commit;
+ }
}
/* set the inodes dtime */
@@ -722,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
static int ocfs2_wipe_inode(struct inode *inode,
struct buffer_head *di_bh)
{
- int status, orphaned_slot;
+ int status, orphaned_slot = -1;
struct inode *orphan_dir_inode = NULL;
struct buffer_head *orphan_dir_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct ocfs2_dinode *di;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
- di = (struct ocfs2_dinode *) di_bh->b_data;
- orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
+ if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
+ orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
- status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
- if (status)
- return status;
+ status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
+ if (status)
+ return status;
- orphan_dir_inode = ocfs2_get_system_file_inode(osb,
- ORPHAN_DIR_SYSTEM_INODE,
- orphaned_slot);
- if (!orphan_dir_inode) {
- status = -EEXIST;
- mlog_errno(status);
- goto bail;
- }
+ orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+ ORPHAN_DIR_SYSTEM_INODE,
+ orphaned_slot);
+ if (!orphan_dir_inode) {
+ status = -EEXIST;
+ mlog_errno(status);
+ goto bail;
+ }
- /* Lock the orphan dir. The lock will be held for the entire
- * delete_inode operation. We do this now to avoid races with
- * recovery completion on other nodes. */
- mutex_lock(&orphan_dir_inode->i_mutex);
- status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
- if (status < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ /* Lock the orphan dir. The lock will be held for the entire
+ * delete_inode operation. We do this now to avoid races with
+ * recovery completion on other nodes. */
+ mutex_lock(&orphan_dir_inode->i_mutex);
+ status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+ if (status < 0) {
+ mutex_unlock(&orphan_dir_inode->i_mutex);
- mlog_errno(status);
- goto bail;
+ mlog_errno(status);
+ goto bail;
+ }
}
/* we do this while holding the orphan dir lock because we
@@ -794,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode,
mlog_errno(status);
bail_unlock_dir:
+ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
+ return status;
+
ocfs2_inode_unlock(orphan_dir_inode, 1);
mutex_unlock(&orphan_dir_inode->i_mutex);
brelse(orphan_dir_bh);
@@ -889,7 +896,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
/* Do some basic inode verification... */
di = (struct ocfs2_dinode *) di_bh->b_data;
- if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
+ if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
+ !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
/*
* Inodes in the orphan dir must have ORPHANED_FL. The only
* inodes that come back out of the orphan dir are reflink
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ba4fe07..0b28e19 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -100,6 +100,8 @@ struct ocfs2_inode_info
#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
/* Does someone have the file open O_DIRECT */
#define OCFS2_INODE_OPEN_DIRECT 0x00000040
+/* Tell the inode wipe code it's not in orphan dir */
+#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080
static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
{
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b1eb50a..4cbb18f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir,
}
}
- status = ocfs2_add_entry(handle, dentry, inode,
- OCFS2_I(inode)->ip_blkno, parent_fe_bh,
- &lookup);
- if (status < 0) {
+ /*
+ * Do this before adding the entry to the directory. We add
+ * also set d_op after success so that ->d_iput() will cleanup
+ * the dentry lock even if ocfs2_add_entry() fails below.
+ */
+ status = ocfs2_dentry_attach_lock(dentry, inode,
+ OCFS2_I(dir)->ip_blkno);
+ if (status) {
mlog_errno(status);
goto leave;
}
+ dentry->d_op = &ocfs2_dentry_ops;
- status = ocfs2_dentry_attach_lock(dentry, inode,
- OCFS2_I(dir)->ip_blkno);
- if (status) {
+ status = ocfs2_add_entry(handle, dentry, inode,
+ OCFS2_I(inode)->ip_blkno, parent_fe_bh,
+ &lookup);
+ if (status < 0) {
mlog_errno(status);
goto leave;
}
insert_inode_hash(inode);
- dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
status = 0;
leave:
@@ -445,11 +450,6 @@ leave:
ocfs2_free_dir_lookup_result(&lookup);
- if ((status < 0) && inode) {
- clear_nlink(inode);
- iput(inode);
- }
-
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
@@ -459,6 +459,17 @@ leave:
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
+ /*
+ * We should call iput after the i_mutex of the bitmap been
+ * unlocked in ocfs2_free_alloc_context, or the
+ * ocfs2_delete_inode will mutex_lock again.
+ */
+ if ((status < 0) && inode) {
+ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
+ clear_nlink(inode);
+ iput(inode);
+ }
+
mlog_exit(status);
return status;
@@ -1771,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir,
}
}
- status = ocfs2_add_entry(handle, dentry, inode,
- le64_to_cpu(fe->i_blkno), parent_fe_bh,
- &lookup);
- if (status < 0) {
+ /*
+ * Do this before adding the entry to the directory. We add
+ * also set d_op after success so that ->d_iput() will cleanup
+ * the dentry lock even if ocfs2_add_entry() fails below.
+ */
+ status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
+ if (status) {
mlog_errno(status);
goto bail;
}
+ dentry->d_op = &ocfs2_dentry_ops;
- status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
- if (status) {
+ status = ocfs2_add_entry(handle, dentry, inode,
+ le64_to_cpu(fe->i_blkno), parent_fe_bh,
+ &lookup);
+ if (status < 0) {
mlog_errno(status);
goto bail;
}
insert_inode_hash(inode);
- dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
bail:
if (status < 0 && did_quota)
@@ -1811,6 +1827,7 @@ bail:
if (xattr_ac)
ocfs2_free_alloc_context(xattr_ac);
if ((status < 0) && inode) {
+ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
clear_nlink(inode);
iput(inode);
}
@@ -1976,6 +1993,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
}
le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
+ OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
/* Record which orphan dir our inode now resides
* in. delete_inode will use this to determine which orphan
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bd96f6c..5cbcd0f 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4083,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
di->i_attr = s_di->i_attr;
if (preserve) {
+ t_inode->i_uid = s_inode->i_uid;
+ t_inode->i_gid = s_inode->i_gid;
+ t_inode->i_mode = s_inode->i_mode;
di->i_uid = s_di->i_uid;
di->i_gid = s_di->i_gid;
di->i_mode = s_di->i_mode;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec..885ab55 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
#include <linux/pid_namespace.h>
#include <linux/ptrace.h>
#include <linux/tracehook.h>
-#include <linux/swapops.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
rsslim,
mm ? mm->start_code : 0,
mm ? mm->end_code : 0,
- (permitted && mm) ? task->stack_start : 0,
+ (permitted && mm) ? mm->start_stack : 0,
esp,
eip,
/* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7621db8..8418fcc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2909,7 +2909,7 @@ out_no_task:
*/
static const struct pid_entry tid_base_stuff[] = {
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
- DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations),
+ DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 0705534..47f5b14 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
} else if (vma->vm_start <= mm->start_stack &&
vma->vm_end >= mm->start_stack) {
name = "[stack]";
- } else {
- unsigned long stack_start;
- struct proc_maps_private *pmp;
-
- pmp = m->private;
- stack_start = pmp->task->stack_start;
-
- if (vma->vm_start <= stack_start &&
- vma->vm_end >= stack_start) {
- pad_len_spaces(m, len);
- seq_printf(m,
- "[threadstack:%08lx]",
-#ifdef CONFIG_STACK_GROWSUP
- vma->vm_end - stack_start
-#else
- stack_start - vma->vm_start
-#endif
- );
- }
}
} else {
name = "[vdso]";
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index f8a6075a..0793044 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -46,8 +46,6 @@ static inline bool is_privroot_deh(struct dentry *dir,
struct reiserfs_de_head *deh)
{
struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
- if (reiserfs_expose_privroot(dir->d_sb))
- return 0;
return (dir == dir->d_parent && privroot->d_inode &&
deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4f9586b..e7cc00e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -554,7 +554,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
if (!err && new_size < i_size_read(dentry->d_inode)) {
struct iattr newattrs = {
.ia_ctime = current_fs_time(inode->i_sb),
- .ia_size = buffer_size,
+ .ia_size = new_size,
.ia_valid = ATTR_SIZE | ATTR_CTIME,
};
@@ -973,21 +973,13 @@ int reiserfs_permission(struct inode *inode, int mask)
return generic_permission(inode, mask, NULL);
}
-/* This will catch lookups from the fs root to .reiserfs_priv */
-static int
-xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
+static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root;
- if (container_of(q1, struct dentry, d_name) == priv_root)
- return -ENOENT;
- if (q1->len == name->len &&
- !memcmp(q1->name, name->name, name->len))
- return 0;
- return 1;
+ return -EPERM;
}
static const struct dentry_operations xattr_lookup_poison_ops = {
- .d_compare = xattr_lookup_poison,
+ .d_revalidate = xattr_hide_revalidate,
};
int reiserfs_lookup_privroot(struct super_block *s)
@@ -1001,8 +993,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
REISERFS_SB(s)->priv_root = dentry;
- if (!reiserfs_expose_privroot(s))
- s->s_root->d_op = &xattr_lookup_poison_ops;
+ dentry->d_op = &xattr_lookup_poison_ops;
if (dentry->d_inode)
dentry->d_inode->i_flags |= S_PRIVATE;
} else
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1c4c8f0..dfa1d67 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -479,6 +479,7 @@ smb_put_super(struct super_block *sb)
if (server->conn_pid)
kill_pid(server->conn_pid, SIGTERM, 1);
+ bdi_destroy(&server->bdi);
kfree(server->ops);
smb_unload_nls(server);
sb->s_fs_info = NULL;
@@ -525,6 +526,11 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
if (!server)
goto out_no_server;
sb->s_fs_info = server;
+
+ if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
+ goto out_bdi;
+
+ sb->s_bdi = &server->bdi;
server->super_block = sb;
server->mnt = NULL;
@@ -624,6 +630,8 @@ out_no_smbiod:
out_bad_option:
kfree(mem);
out_no_mem:
+ bdi_destroy(&server->bdi);
+out_bdi:
if (!server->mnt)
printk(KERN_ERR "smb_fill_super: allocation failure\n");
sb->s_fs_info = NULL;
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 1cb0d81..653c030 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -87,9 +87,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
u64 cur_index = index >> msblk->devblksize_log2;
int bytes, compressed, b = 0, k = 0, page = 0, avail;
-
- bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1,
- sizeof(*bh), GFP_KERNEL);
+ bh = kcalloc(((srclength + msblk->devblksize - 1)
+ >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
if (bh == NULL)
return -ENOMEM;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3550aec..48b6f4a 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -275,7 +275,8 @@ allocate_root:
err = squashfs_read_inode(root, root_inode);
if (err) {
- iget_failed(root);
+ make_bad_inode(root);
+ iput(root);
goto failed_mount;
}
insert_inode_hash(root);
@@ -353,6 +354,7 @@ static void squashfs_put_super(struct super_block *sb)
kfree(sbi->id_table);
kfree(sbi->fragment_index);
kfree(sbi->meta_index);
+ kfree(sbi->inode_lookup_table);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 15a03d0..7a60387 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -128,8 +128,9 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
goto release_mutex;
}
+ length = stream->total_out;
mutex_unlock(&msblk->read_data_mutex);
- return stream->total_out;
+ return length;
release_mutex:
mutex_unlock(&msblk->read_data_mutex);
diff --git a/fs/super.c b/fs/super.c
index f35ac60..1527e6a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
#include <linux/kobject.h>
#include <linux/mutex.h>
#include <linux/file.h>
+#include <linux/backing-dev.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -693,6 +694,7 @@ int set_anon_super(struct super_block *s, void *data)
return -EMFILE;
}
s->s_dev = MKDEV(0, dev & MINORMASK);
+ s->s_bdi = &noop_backing_dev_info;
return 0;
}
@@ -954,10 +956,11 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
if (error < 0)
goto out_free_secdata;
BUG_ON(!mnt->mnt_sb);
+ WARN_ON(!mnt->mnt_sb->s_bdi);
- error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
- if (error)
- goto out_sb;
+ error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
+ if (error)
+ goto out_sb;
/*
* filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
diff --git a/fs/sync.c b/fs/sync.c
index fc5c3d7..92b2281 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,6 +14,7 @@
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
+#include <linux/backing-dev.h>
#include "internal.h"
#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
@@ -32,7 +33,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
* This should be safe, as we require bdi backing to actually
* write out data in the first place
*/
- if (!sb->s_bdi)
+ if (!sb->s_bdi || sb->s_bdi == &noop_backing_dev_info)
return 0;
if (sb->s_qcop && sb->s_qcop->quota_sync)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 52e06b4..29f1edc 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1209,6 +1209,7 @@ xfs_fs_put_super(
xfs_unmountfs(mp);
xfs_freesb(mp);
+ xfs_inode_shrinker_unregister(mp);
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
xfs_dmops_put(mp);
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
if (error)
goto fail_vnrele;
+ xfs_inode_shrinker_register(mp);
+
kfree(mtpt);
return 0;
@@ -1867,6 +1870,7 @@ init_xfs_fs(void)
goto out_cleanup_procfs;
vfs_initquota();
+ xfs_inode_shrinker_init();
error = register_filesystem(&xfs_fs_type);
if (error)
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
+ xfs_inode_shrinker_destroy();
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index fd96982..a427c63 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
struct xfs_perag *pag, int flags),
int flags,
int tag,
- int exclusive)
+ int exclusive,
+ int *nr_to_scan)
{
uint32_t first_index;
int last_error = 0;
@@ -134,7 +135,7 @@ restart:
if (error == EFSCORRUPTED)
break;
- } while (1);
+ } while ((*nr_to_scan)--);
if (skipped) {
delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
struct xfs_perag *pag, int flags),
int flags,
int tag,
- int exclusive)
+ int exclusive,
+ int *nr_to_scan)
{
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
+ int nr;
+ nr = nr_to_scan ? *nr_to_scan : INT_MAX;
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
struct xfs_perag *pag;
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
continue;
}
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
- exclusive);
+ exclusive, &nr);
xfs_perag_put(pag);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
break;
}
+ if (nr <= 0)
+ break;
}
+ if (nr_to_scan)
+ *nr_to_scan = nr;
return XFS_ERROR(last_error);
}
@@ -291,7 +299,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
- XFS_ICI_NO_TAG, 0);
+ XFS_ICI_NO_TAG, 0, NULL);
if (error)
return XFS_ERROR(error);
@@ -310,7 +318,7 @@ xfs_sync_attr(
ASSERT((flags & ~SYNC_WAIT) == 0);
return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
- XFS_ICI_NO_TAG, 0);
+ XFS_ICI_NO_TAG, 0, NULL);
}
STATIC int
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
radix_tree_tag_set(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
+ pag->pag_ici_reclaimable++;
}
/*
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
{
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+ pag->pag_ici_reclaimable--;
}
/*
@@ -854,5 +864,93 @@ xfs_reclaim_inodes(
int mode)
{
return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
- XFS_ICI_RECLAIM_TAG, 1);
+ XFS_ICI_RECLAIM_TAG, 1, NULL);
+}
+
+/*
+ * Shrinker infrastructure.
+ *
+ * This is all far more complex than it needs to be. It adds a global list of
+ * mounts because the shrinkers can only call a global context. We need to make
+ * the shrinkers pass a context to avoid the need for global state.
+ */
+static LIST_HEAD(xfs_mount_list);
+static struct rw_semaphore xfs_mount_list_lock;
+
+static int
+xfs_reclaim_inode_shrink(
+ int nr_to_scan,
+ gfp_t gfp_mask)
+{
+ struct xfs_mount *mp;
+ struct xfs_perag *pag;
+ xfs_agnumber_t ag;
+ int reclaimable = 0;
+
+ if (nr_to_scan) {
+ if (!(gfp_mask & __GFP_FS))
+ return -1;
+
+ down_read(&xfs_mount_list_lock);
+ list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+ xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+ XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+ if (nr_to_scan <= 0)
+ break;
+ }
+ up_read(&xfs_mount_list_lock);
+ }
+
+ down_read(&xfs_mount_list_lock);
+ list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+ for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+
+ pag = xfs_perag_get(mp, ag);
+ if (!pag->pag_ici_init) {
+ xfs_perag_put(pag);
+ continue;
+ }
+ reclaimable += pag->pag_ici_reclaimable;
+ xfs_perag_put(pag);
+ }
+ }
+ up_read(&xfs_mount_list_lock);
+ return reclaimable;
+}
+
+static struct shrinker xfs_inode_shrinker = {
+ .shrink = xfs_reclaim_inode_shrink,
+ .seeks = DEFAULT_SEEKS,
+};
+
+void __init
+xfs_inode_shrinker_init(void)
+{
+ init_rwsem(&xfs_mount_list_lock);
+ register_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_destroy(void)
+{
+ ASSERT(list_empty(&xfs_mount_list));
+ unregister_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_register(
+ struct xfs_mount *mp)
+{
+ down_write(&xfs_mount_list_lock);
+ list_add_tail(&mp->m_mplist, &xfs_mount_list);
+ up_write(&xfs_mount_list_lock);
+}
+
+void
+xfs_inode_shrinker_unregister(
+ struct xfs_mount *mp)
+{
+ down_write(&xfs_mount_list_lock);
+ list_del(&mp->m_mplist);
+ up_write(&xfs_mount_list_lock);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c34..cdcbaac 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags, int tag, int write_lock);
+ int flags, int tag, int write_lock, int *nr_to_scan);
+
+void xfs_inode_shrinker_init(void);
+void xfs_inode_shrinker_destroy(void);
+void xfs_inode_shrinker_register(struct xfs_mount *mp);
+void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
#endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d0ee8d..50bee07 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
+ XFS_ICI_NO_TAG, 0, NULL);
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1a5a1f..abb8222 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
int pag_ici_init; /* incore inode cache initialised */
rwlock_t pag_ici_lock; /* incore inode lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
+ int pag_ici_reclaimable; /* reclaimable inodes */
#endif
int pagb_count; /* pagb slots in use */
xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index cd27c9d..5bba29a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -177,16 +177,26 @@ xfs_swap_extents_check_format(
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
return EINVAL;
- /* Check root block of temp in btree form to max in target */
+ /*
+ * If we are in a btree format, check that the temp root block will fit
+ * in the target and that it has enough extents to be in btree format
+ * in the target.
+ *
+ * Note that we have to be careful to allow btree->extent conversions
+ * (a common defrag case) which will occur when the temp inode is in
+ * extent format...
+ */
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
- XFS_IFORK_BOFF(ip) &&
- tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
+ ((XFS_IFORK_BOFF(ip) &&
+ tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
+ XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
return EINVAL;
- /* Check root block of target in btree form to max in temp */
+ /* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
- XFS_IFORK_BOFF(tip) &&
- ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
+ ((XFS_IFORK_BOFF(tip) &&
+ ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
+ XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
return EINVAL;
return 0;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4fa0bc7..9ff48a16 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,6 +259,7 @@ typedef struct xfs_mount {
wait_queue_head_t m_wait_single_sync_task;
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
+ struct list_head m_mplist; /* inode shrinker mount list */
} xfs_mount_t;
/*
OpenPOWER on IntegriCloud