diff options
147 files changed, 1707 insertions, 1834 deletions
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 42ee059..9a3334a 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -371,8 +371,6 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path, int retval = -EINVAL; char *name; - lock_kernel(); - name = getname(path); retval = PTR_ERR(name); if (IS_ERR(name)) @@ -392,7 +390,6 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, char __user *, path, } putname(name); out: - unlock_kernel(); return retval; } diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index 823a629..2cd00b5 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -1789,12 +1789,13 @@ static int dv1394_open(struct inode *inode, struct file *file) } else { /* look up the card by ID */ unsigned long flags; + int idx = ieee1394_file_to_instance(file); spin_lock_irqsave(&dv1394_cards_lock, flags); if (!list_empty(&dv1394_cards)) { struct video_card *p; list_for_each_entry(p, &dv1394_cards, list) { - if ((p->id) == ieee1394_file_to_instance(file)) { + if ((p->id) == idx) { video = p; break; } @@ -1803,7 +1804,7 @@ static int dv1394_open(struct inode *inode, struct file *file) spin_unlock_irqrestore(&dv1394_cards_lock, flags); if (!video) { - debug_printk("dv1394: OHCI card %d not found", ieee1394_file_to_instance(file)); + debug_printk("dv1394: OHCI card %d not found", idx); return -ENODEV; } diff --git a/drivers/ieee1394/ieee1394_core.h b/drivers/ieee1394/ieee1394_core.h index 21d50f7..28b9f58 100644 --- a/drivers/ieee1394/ieee1394_core.h +++ b/drivers/ieee1394/ieee1394_core.h @@ -5,6 +5,7 @@ #include <linux/fs.h> #include <linux/list.h> #include <linux/types.h> +#include <linux/cdev.h> #include <asm/atomic.h> #include "hosts.h" @@ -155,7 +156,10 @@ void hpsb_packet_received(struct hpsb_host *host, quadlet_t *data, size_t size, */ static inline unsigned char ieee1394_file_to_instance(struct file *file) { - return file->f_path.dentry->d_inode->i_cindex; + int idx = cdev_index(file->f_path.dentry->d_inode); + if (idx < 0) + idx = 0; + return idx; } extern int hpsb_disable_irm; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index dff5760..ffe75e8 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -39,6 +39,7 @@ #include <linux/parser.h> #include <linux/notifier.h> #include <linux/seq_file.h> +#include <linux/smp_lock.h> #include <asm/byteorder.h> #include "usb.h" #include "hcd.h" @@ -265,9 +266,13 @@ static int remount(struct super_block *sb, int *flags, char *data) return -EINVAL; } + lock_kernel(); + if (usbfs_mount && usbfs_mount->mnt_sb) update_sb(usbfs_mount->mnt_sb); + unlock_kernel(); + return 0; } diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index e0a85db..a6665f3 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -53,6 +53,7 @@ struct adfs_dir_ops { int (*update)(struct adfs_dir *dir, struct object_info *obj); int (*create)(struct adfs_dir *dir, struct object_info *obj); int (*remove)(struct adfs_dir *dir, struct object_info *obj); + int (*sync)(struct adfs_dir *dir); void (*free)(struct adfs_dir *dir); }; @@ -90,7 +91,8 @@ extern const struct dentry_operations adfs_dentry_operations; extern struct adfs_dir_ops adfs_f_dir_ops; extern struct adfs_dir_ops adfs_fplus_dir_ops; -extern int adfs_dir_update(struct super_block *sb, struct object_info *obj); +extern int adfs_dir_update(struct super_block *sb, struct object_info *obj, + int wait); /* file.c */ extern const struct inode_operations adfs_file_inode_operations; diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index e867ccf..4d40734 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -83,7 +83,7 @@ out: } int -adfs_dir_update(struct super_block *sb, struct object_info *obj) +adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait) { int ret = -EINVAL; #ifdef CONFIG_ADFS_FS_RW @@ -106,6 +106,12 @@ adfs_dir_update(struct super_block *sb, struct object_info *obj) ret = ops->update(&dir, obj); write_unlock(&adfs_dir_lock); + if (wait) { + int err = ops->sync(&dir); + if (!ret) + ret = err; + } + ops->free(&dir); out: #endif @@ -199,7 +205,7 @@ const struct file_operations adfs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, .readdir = adfs_readdir, - .fsync = file_fsync, + .fsync = simple_fsync, }; static int diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index ea7df21..31df6ad 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -437,6 +437,22 @@ bad_dir: #endif } +static int +adfs_f_sync(struct adfs_dir *dir) +{ + int err = 0; + int i; + + for (i = dir->nr_buffers - 1; i >= 0; i--) { + struct buffer_head *bh = dir->bh[i]; + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) + err = -EIO; + } + + return err; +} + static void adfs_f_free(struct adfs_dir *dir) { @@ -456,5 +472,6 @@ struct adfs_dir_ops adfs_f_dir_ops = { .setpos = adfs_f_setpos, .getnext = adfs_f_getnext, .update = adfs_f_update, + .sync = adfs_f_sync, .free = adfs_f_free }; diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 1ec644e..139e0f3 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -161,6 +161,22 @@ out: return ret; } +static int +adfs_fplus_sync(struct adfs_dir *dir) +{ + int err = 0; + int i; + + for (i = dir->nr_buffers - 1; i >= 0; i--) { + struct buffer_head *bh = dir->bh[i]; + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) + err = -EIO; + } + + return err; +} + static void adfs_fplus_free(struct adfs_dir *dir) { @@ -175,5 +191,6 @@ struct adfs_dir_ops adfs_fplus_dir_ops = { .read = adfs_fplus_read, .setpos = adfs_fplus_setpos, .getnext = adfs_fplus_getnext, + .sync = adfs_fplus_sync, .free = adfs_fplus_free }; diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 36e381c..8224d54 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -30,7 +30,7 @@ const struct file_operations adfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .mmap = generic_file_mmap, - .fsync = file_fsync, + .fsync = simple_fsync, .write = do_sync_write, .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index e647200..05b3a67 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -376,7 +376,7 @@ out: * The adfs-specific inode data has already been updated by * adfs_notify_change() */ -int adfs_write_inode(struct inode *inode, int unused) +int adfs_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; struct object_info obj; @@ -391,7 +391,7 @@ int adfs_write_inode(struct inode *inode, int unused) obj.attr = ADFS_I(inode)->attr; obj.size = inode->i_size; - ret = adfs_dir_update(sb, &obj); + ret = adfs_dir_update(sb, &obj, wait); unlock_kernel(); return ret; } diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 92ab4fb..568081b 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -62,7 +62,7 @@ static DEFINE_RWLOCK(adfs_map_lock); #define GET_FRAG_ID(_map,_start,_idmask) \ ({ \ unsigned char *_m = _map + (_start >> 3); \ - u32 _frag = get_unaligned((u32 *)_m); \ + u32 _frag = get_unaligned_le32(_m); \ _frag >>= (_start & 7); \ _frag & _idmask; \ }) diff --git a/fs/adfs/super.c b/fs/adfs/super.c index dd9becc..0ec5aaf 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -132,11 +132,15 @@ static void adfs_put_super(struct super_block *sb) int i; struct adfs_sb_info *asb = ADFS_SB(sb); + lock_kernel(); + for (i = 0; i < asb->s_map_size; i++) brelse(asb->s_map[i].dm_bh); kfree(asb->s_map); kfree(asb); sb->s_fs_info = NULL; + + unlock_kernel(); } static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 1a2d5e3..e511dc6 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -182,6 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent void affs_free_prealloc(struct inode *inode); extern void affs_truncate(struct inode *); +int affs_file_fsync(struct file *, struct dentry *, int); /* dir.c */ diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 7b36904..8ca8f3a 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -21,7 +21,7 @@ const struct file_operations affs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, .readdir = affs_readdir, - .fsync = file_fsync, + .fsync = affs_file_fsync, }; /* diff --git a/fs/affs/file.c b/fs/affs/file.c index 9246cb4..184e55c 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -34,7 +34,7 @@ const struct file_operations affs_file_operations = { .mmap = generic_file_mmap, .open = affs_file_open, .release = affs_file_release, - .fsync = file_fsync, + .fsync = affs_file_fsync, .splice_read = generic_file_splice_read, }; @@ -915,3 +915,15 @@ affs_truncate(struct inode *inode) } affs_free_prealloc(inode); } + +int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync) +{ + struct inode * inode = dentry->d_inode; + int ret, err; + + ret = write_inode_now(inode, 0); + err = sync_blockdev(inode->i_sb->s_bdev); + if (!ret) + ret = err; + return ret; +} diff --git a/fs/affs/super.c b/fs/affs/super.c index 63f5183..104fdcb 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -16,6 +16,7 @@ #include <linux/parser.h> #include <linux/magic.h> #include <linux/sched.h> +#include <linux/smp_lock.h> #include "affs.h" extern struct timezone sys_tz; @@ -24,49 +25,67 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); static int affs_remount (struct super_block *sb, int *flags, char *data); static void +affs_commit_super(struct super_block *sb, int clean) +{ + struct affs_sb_info *sbi = AFFS_SB(sb); + struct buffer_head *bh = sbi->s_root_bh; + struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); + + tail->bm_flag = cpu_to_be32(clean); + secs_to_datestamp(get_seconds(), &tail->disk_change); + affs_fix_checksum(sb, bh); + mark_buffer_dirty(bh); +} + +static void affs_put_super(struct super_block *sb) { struct affs_sb_info *sbi = AFFS_SB(sb); pr_debug("AFFS: put_super()\n"); - if (!(sb->s_flags & MS_RDONLY)) { - AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1); - secs_to_datestamp(get_seconds(), - &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change); - affs_fix_checksum(sb, sbi->s_root_bh); - mark_buffer_dirty(sbi->s_root_bh); - } + lock_kernel(); + + if (!(sb->s_flags & MS_RDONLY)) + affs_commit_super(sb, 1); kfree(sbi->s_prefix); affs_free_bitmap(sb); affs_brelse(sbi->s_root_bh); kfree(sbi); sb->s_fs_info = NULL; - return; + + unlock_kernel(); } static void affs_write_super(struct super_block *sb) { int clean = 2; - struct affs_sb_info *sbi = AFFS_SB(sb); + lock_super(sb); if (!(sb->s_flags & MS_RDONLY)) { // if (sbi->s_bitmap[i].bm_bh) { // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) { // clean = 0; - AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(clean); - secs_to_datestamp(get_seconds(), - &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change); - affs_fix_checksum(sb, sbi->s_root_bh); - mark_buffer_dirty(sbi->s_root_bh); + affs_commit_super(sb, clean); sb->s_dirt = !clean; /* redo until bitmap synced */ } else sb->s_dirt = 0; + unlock_super(sb); pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); } +static int +affs_sync_fs(struct super_block *sb, int wait) +{ + lock_super(sb); + affs_commit_super(sb, 2); + sb->s_dirt = 0; + unlock_super(sb); + return 0; +} + static struct kmem_cache * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) @@ -124,6 +143,7 @@ static const struct super_operations affs_sops = { .clear_inode = affs_clear_inode, .put_super = affs_put_super, .write_super = affs_write_super, + .sync_fs = affs_sync_fs, .statfs = affs_statfs, .remount_fs = affs_remount, .show_options = generic_show_options, @@ -507,6 +527,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) kfree(new_opts); return -EINVAL; } + lock_kernel(); replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; @@ -514,8 +535,10 @@ affs_remount(struct super_block *sb, int *flags, char *data) sbi->s_uid = uid; sbi->s_gid = gid; - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + unlock_kernel(); return 0; + } if (*flags & MS_RDONLY) { sb->s_dirt = 1; while (sb->s_dirt) @@ -524,6 +547,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) } else res = affs_init_bitmap(sb, flags); + unlock_kernel(); return res; } diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2b9e2d0..c52be53 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -244,7 +244,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) case -EBUSY: /* someone else made a mount here whilst we were busy */ while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; default: diff --git a/fs/afs/super.c b/fs/afs/super.c index 76828e5..ad0514d 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -440,8 +440,12 @@ static void afs_put_super(struct super_block *sb) _enter(""); + lock_kernel(); + afs_put_volume(as->volume); + unlock_kernel(); + _leave(""); } diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index 4eb4d8d..2316e94 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c @@ -85,13 +85,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, } path.mnt = mnt; path_get(&path); - if (!follow_down(&path.mnt, &path.dentry)) { + if (!follow_down(&path)) { path_put(&path); DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); continue; } - while (d_mountpoint(path.dentry) && - follow_down(&path.mnt, &path.dentry)) + while (d_mountpoint(path.dentry) && follow_down(&path)); ; umount_ok = may_umount(path.mnt); path_put(&path); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index b7ff33c..8f7cdde 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -223,12 +223,12 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); -static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry) +static inline int autofs4_follow_mount(struct path *path) { int res = 0; - while (d_mountpoint(*dentry)) { - int followed = follow_down(mnt, dentry); + while (d_mountpoint(path->dentry)) { + int followed = follow_down(path); if (!followed) break; res = 1; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 84168c0..f3da2eb 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -192,77 +192,42 @@ static int autofs_dev_ioctl_protosubver(struct file *fp, return 0; } -/* - * Walk down the mount stack looking for an autofs mount that - * has the requested device number (aka. new_encode_dev(sb->s_dev). - */ -static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno) +static int find_autofs_mount(const char *pathname, + struct path *res, + int test(struct path *path, void *data), + void *data) { - struct dentry *dentry; - struct inode *inode; - struct super_block *sb; - dev_t s_dev; - unsigned int err; - + struct path path; + int err = kern_path(pathname, 0, &path); + if (err) + return err; err = -ENOENT; - - /* Lookup the dentry name at the base of our mount point */ - dentry = d_lookup(nd->path.dentry, &nd->last); - if (!dentry) - goto out; - - dput(nd->path.dentry); - nd->path.dentry = dentry; - - /* And follow the mount stack looking for our autofs mount */ - while (follow_down(&nd->path.mnt, &nd->path.dentry)) { - inode = nd->path.dentry->d_inode; - if (!inode) - break; - - sb = inode->i_sb; - s_dev = new_encode_dev(sb->s_dev); - if (devno == s_dev) { - if (sb->s_magic == AUTOFS_SUPER_MAGIC) { + while (path.dentry == path.mnt->mnt_root) { + if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) { + if (test(&path, data)) { + path_get(&path); + if (!err) /* already found some */ + path_put(res); + *res = path; err = 0; - break; } } + if (!follow_up(&path)) + break; } -out: + path_put(&path); return err; } -/* - * Walk down the mount stack looking for an autofs mount that - * has the requested mount type (ie. indirect, direct or offset). - */ -static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type) +static int test_by_dev(struct path *path, void *p) { - struct dentry *dentry; - struct autofs_info *ino; - unsigned int err; - - err = -ENOENT; - - /* Lookup the dentry name at the base of our mount point */ - dentry = d_lookup(nd->path.dentry, &nd->last); - if (!dentry) - goto out; - - dput(nd->path.dentry); - nd->path.dentry = dentry; + return path->mnt->mnt_sb->s_dev == *(dev_t *)p; +} - /* And follow the mount stack looking for our autofs mount */ - while (follow_down(&nd->path.mnt, &nd->path.dentry)) { - ino = autofs4_dentry_ino(nd->path.dentry); - if (ino && ino->sbi->type & type) { - err = 0; - break; - } - } -out: - return err; +static int test_by_type(struct path *path, void *p) +{ + struct autofs_info *ino = autofs4_dentry_ino(path->dentry); + return ino && ino->sbi->type & *(unsigned *)p; } static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) @@ -283,31 +248,25 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) * Open a file descriptor on the autofs mount point corresponding * to the given path and device number (aka. new_encode_dev(sb->s_dev)). */ -static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid) +static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) { - struct file *filp; - struct nameidata nd; int err, fd; fd = get_unused_fd(); if (likely(fd >= 0)) { - /* Get nameidata of the parent directory */ - err = path_lookup(path, LOOKUP_PARENT, &nd); + struct file *filp; + struct path path; + + err = find_autofs_mount(name, &path, test_by_dev, &devid); if (err) goto out; /* - * Search down, within the parent, looking for an - * autofs super block that has the device number + * Find autofs super block that has the device number * corresponding to the autofs fs we want to open. */ - err = autofs_dev_ioctl_find_super(&nd, devid); - if (err) { - path_put(&nd.path); - goto out; - } - filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, + filp = dentry_open(path.dentry, path.mnt, O_RDONLY, current_cred()); if (IS_ERR(filp)) { err = PTR_ERR(filp); @@ -340,7 +299,7 @@ static int autofs_dev_ioctl_openmount(struct file *fp, param->ioctlfd = -1; path = param->path; - devid = param->openmount.devid; + devid = new_decode_dev(param->openmount.devid); err = 0; fd = autofs_dev_ioctl_open_mountpoint(path, devid); @@ -475,8 +434,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, struct autofs_dev_ioctl *param) { struct autofs_info *ino; - struct nameidata nd; - const char *path; + struct path path; dev_t devid; int err = -ENOENT; @@ -485,32 +443,24 @@ static int autofs_dev_ioctl_requester(struct file *fp, goto out; } - path = param->path; - devid = new_encode_dev(sbi->sb->s_dev); + devid = sbi->sb->s_dev; param->requester.uid = param->requester.gid = -1; - /* Get nameidata of the parent directory */ - err = path_lookup(path, LOOKUP_PARENT, &nd); + err = find_autofs_mount(param->path, &path, test_by_dev, &devid); if (err) goto out; - err = autofs_dev_ioctl_find_super(&nd, devid); - if (err) - goto out_release; - - ino = autofs4_dentry_ino(nd.path.dentry); + ino = autofs4_dentry_ino(path.dentry); if (ino) { err = 0; - autofs4_expire_wait(nd.path.dentry); + autofs4_expire_wait(path.dentry); spin_lock(&sbi->fs_lock); param->requester.uid = ino->uid; param->requester.gid = ino->gid; spin_unlock(&sbi->fs_lock); } - -out_release: - path_put(&nd.path); + path_put(&path); out: return err; } @@ -569,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - struct nameidata nd; - const char *path; + struct path path; + const char *name; unsigned int type; unsigned int devid, magic; int err = -ENOENT; @@ -580,71 +530,46 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, goto out; } - path = param->path; + name = param->path; type = param->ismountpoint.in.type; param->ismountpoint.out.devid = devid = 0; param->ismountpoint.out.magic = magic = 0; if (!fp || param->ioctlfd == -1) { - if (autofs_type_any(type)) { - struct super_block *sb; - - err = path_lookup(path, LOOKUP_FOLLOW, &nd); - if (err) - goto out; - - sb = nd.path.dentry->d_sb; - devid = new_encode_dev(sb->s_dev); - } else { - struct autofs_info *ino; - - err = path_lookup(path, LOOKUP_PARENT, &nd); - if (err) - goto out; - - err = autofs_dev_ioctl_find_sbi_type(&nd, type); - if (err) - goto out_release; - - ino = autofs4_dentry_ino(nd.path.dentry); - devid = autofs4_get_dev(ino->sbi); - } - + if (autofs_type_any(type)) + err = kern_path(name, LOOKUP_FOLLOW, &path); + else + err = find_autofs_mount(name, &path, test_by_type, &type); + if (err) + goto out; + devid = new_encode_dev(path.mnt->mnt_sb->s_dev); err = 0; - if (nd.path.dentry->d_inode && - nd.path.mnt->mnt_root == nd.path.dentry) { + if (path.dentry->d_inode && + path.mnt->mnt_root == path.dentry) { err = 1; - magic = nd.path.dentry->d_inode->i_sb->s_magic; + magic = path.dentry->d_inode->i_sb->s_magic; } } else { - dev_t dev = autofs4_get_dev(sbi); + dev_t dev = sbi->sb->s_dev; - err = path_lookup(path, LOOKUP_PARENT, &nd); + err = find_autofs_mount(name, &path, test_by_dev, &dev); if (err) goto out; - err = autofs_dev_ioctl_find_super(&nd, dev); - if (err) - goto out_release; - - devid = dev; + devid = new_encode_dev(dev); - err = have_submounts(nd.path.dentry); + err = have_submounts(path.dentry); - if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) { - if (follow_down(&nd.path.mnt, &nd.path.dentry)) { - struct inode *inode = nd.path.dentry->d_inode; - magic = inode->i_sb->s_magic; - } + if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) { + if (follow_down(&path)) + magic = path.mnt->mnt_sb->s_magic; } } param->ismountpoint.out.devid = devid; param->ismountpoint.out.magic = magic; - -out_release: - path_put(&nd.path); + path_put(&path); out: return err; } diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 3077d8f..aa39ae8 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -48,19 +48,19 @@ static inline int autofs4_can_expire(struct dentry *dentry, static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) { struct dentry *top = dentry; + struct path path = {.mnt = mnt, .dentry = dentry}; int status = 1; DPRINTK("dentry %p %.*s", dentry, (int)dentry->d_name.len, dentry->d_name.name); - mntget(mnt); - dget(dentry); + path_get(&path); - if (!follow_down(&mnt, &dentry)) + if (!follow_down(&path)) goto done; - if (is_autofs4_dentry(dentry)) { - struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + if (is_autofs4_dentry(path.dentry)) { + struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb); /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) @@ -70,7 +70,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) * Otherwise it's an offset mount and we need to check * if we can umount its mount, if there is one. */ - if (!d_mountpoint(dentry)) { + if (!d_mountpoint(path.dentry)) { status = 0; goto done; } @@ -86,8 +86,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) status = 0; done: DPRINTK("returning = %d", status); - dput(dentry); - mntput(mnt); + path_put(&path); return status; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e383bf0..b96a3c5 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -181,7 +181,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) nd->flags); /* * For an expire of a covered direct or offset mount we need - * to beeak out of follow_down() at the autofs mount trigger + * to break out of follow_down() at the autofs mount trigger * (d_mounted--), so we can see the expiring flag, and manage * the blocking and following here until the expire is completed. */ @@ -190,7 +190,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) if (ino->flags & AUTOFS_INF_EXPIRING) { spin_unlock(&sbi->fs_lock); /* Follow down to our covering mount. */ - if (!follow_down(&nd->path.mnt, &nd->path.dentry)) + if (!follow_down(&nd->path)) goto done; goto follow; } @@ -230,8 +230,7 @@ follow: * to follow it. */ if (d_mountpoint(dentry)) { - if (!autofs4_follow_mount(&nd->path.mnt, - &nd->path.dentry)) { + if (!autofs4_follow_mount(&nd->path)) { status = -ENOENT; goto out_error; } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 76afd0d..9367b62 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -737,6 +737,8 @@ parse_options(char *options, befs_mount_options * opts) static void befs_put_super(struct super_block *sb) { + lock_kernel(); + kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; @@ -747,7 +749,8 @@ befs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; - return; + + unlock_kernel(); } /* Allocate private field of the superblock, fill it. diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 4dd1b62..54bd07d 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -79,7 +79,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) const struct file_operations bfs_dir_operations = { .read = generic_read_dir, .readdir = bfs_readdir, - .fsync = file_fsync, + .fsync = simple_fsync, .llseek = generic_file_llseek, }; @@ -205,7 +205,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry) inode->i_nlink = 1; } de->ino = 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); inode->i_ctime = dir->i_ctime; @@ -267,7 +267,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode->i_ctime = CURRENT_TIME_SEC; inode_dec_link_count(new_inode); } - mark_buffer_dirty(old_bh); + mark_buffer_dirty_inode(old_bh, old_dir); error = 0; end_rename: @@ -320,7 +320,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name, for (i = 0; i < BFS_NAMELEN; i++) de->name[i] = (i < namelen) ? name[i] : 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); brelse(bh); return 0; } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index cc4062d..6f60336 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -30,6 +30,7 @@ MODULE_LICENSE("GPL"); #define dprintf(x...) #endif +static void bfs_write_super(struct super_block *s); void dump_imap(const char *prefix, struct super_block *s); struct inode *bfs_iget(struct super_block *sb, unsigned long ino) @@ -97,14 +98,15 @@ error: return ERR_PTR(-EIO); } -static int bfs_write_inode(struct inode *inode, int unused) +static int bfs_write_inode(struct inode *inode, int wait) { + struct bfs_sb_info *info = BFS_SB(inode->i_sb); unsigned int ino = (u16)inode->i_ino; unsigned long i_sblock; struct bfs_inode *di; struct buffer_head *bh; int block, off; - struct bfs_sb_info *info = BFS_SB(inode->i_sb); + int err = 0; dprintf("ino=%08x\n", ino); @@ -145,9 +147,14 @@ static int bfs_write_inode(struct inode *inode, int unused) di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); + if (wait) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) + err = -EIO; + } brelse(bh); mutex_unlock(&info->bfs_lock); - return 0; + return err; } static void bfs_delete_inode(struct inode *inode) @@ -209,6 +216,26 @@ static void bfs_delete_inode(struct inode *inode) clear_inode(inode); } +static int bfs_sync_fs(struct super_block *sb, int wait) +{ + struct bfs_sb_info *info = BFS_SB(sb); + + mutex_lock(&info->bfs_lock); + mark_buffer_dirty(info->si_sbh); + sb->s_dirt = 0; + mutex_unlock(&info->bfs_lock); + + return 0; +} + +static void bfs_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + bfs_sync_fs(sb, 1); + else + sb->s_dirt = 0; +} + static void bfs_put_super(struct super_block *s) { struct bfs_sb_info *info = BFS_SB(s); @@ -216,11 +243,18 @@ static void bfs_put_super(struct super_block *s) if (!info) return; + lock_kernel(); + + if (s->s_dirt) + bfs_write_super(s); + brelse(info->si_sbh); mutex_destroy(&info->bfs_lock); kfree(info->si_imap); kfree(info); s->s_fs_info = NULL; + + unlock_kernel(); } static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -240,17 +274,6 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static void bfs_write_super(struct super_block *s) -{ - struct bfs_sb_info *info = BFS_SB(s); - - mutex_lock(&info->bfs_lock); - if (!(s->s_flags & MS_RDONLY)) - mark_buffer_dirty(info->si_sbh); - s->s_dirt = 0; - mutex_unlock(&info->bfs_lock); -} - static struct kmem_cache *bfs_inode_cachep; static struct inode *bfs_alloc_inode(struct super_block *sb) @@ -298,6 +321,7 @@ static const struct super_operations bfs_sops = { .delete_inode = bfs_delete_inode, .put_super = bfs_put_super, .write_super = bfs_write_super, + .sync_fs = bfs_sync_fs, .statfs = bfs_statfs, }; diff --git a/fs/block_dev.c b/fs/block_dev.c index 931f6b8..3a6d4fb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, iov, offset, nr_segs, blkdev_get_blocks, NULL); } +int __sync_blockdev(struct block_device *bdev, int wait) +{ + if (!bdev) + return 0; + if (!wait) + return filemap_flush(bdev->bd_inode->i_mapping); + return filemap_write_and_wait(bdev->bd_inode->i_mapping); +} + /* * Write out and wait upon all the dirty data associated with a block * device via its mapping. Does not take the superblock lock. */ int sync_blockdev(struct block_device *bdev) { - int ret = 0; - - if (bdev) - ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); - return ret; + return __sync_blockdev(bdev, 1); } EXPORT_SYMBOL(sync_blockdev); @@ -199,7 +204,7 @@ int fsync_bdev(struct block_device *bdev) { struct super_block *sb = get_super(bdev); if (sb) { - int res = fsync_super(sb); + int res = sync_filesystem(sb); drop_super(sb); return res; } @@ -241,7 +246,7 @@ struct super_block *freeze_bdev(struct block_device *bdev) sb->s_frozen = SB_FREEZE_WRITE; smp_wmb(); - __fsync_super(sb); + sync_filesystem(sb); sb->s_frozen = SB_FREEZE_TRANS; smp_wmb(); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5b68330..8612b3a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2322,7 +2322,6 @@ err: btrfs_update_inode(trans, root, dir); btrfs_drop_nlink(inode); ret = btrfs_update_inode(trans, root, inode); - dir->i_sb->s_dirt = 1; out: return ret; } @@ -2806,7 +2805,6 @@ error: pending_del_nr); } btrfs_free_path(path); - inode->i_sb->s_dirt = 1; return ret; } @@ -3768,7 +3766,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); btrfs_update_inode(trans, root, inode); } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: @@ -3833,7 +3830,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); out_unlock: @@ -3880,7 +3876,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) drop_inode = 1; - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); @@ -3962,7 +3957,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); @@ -4991,7 +4985,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); btrfs_update_inode_block_group(trans, dir); if (drop_inode) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 708ac06..9f179d4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -394,10 +394,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_root *root = btrfs_sb(sb); int ret; - if (sb->s_flags & MS_RDONLY) - return 0; - - sb->s_dirt = 0; if (!wait) { filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; @@ -408,7 +404,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); - sb->s_dirt = 0; return ret; } @@ -454,11 +449,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) return 0; } -static void btrfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -} - static int btrfs_test_super(struct super_block *s, void *data) { struct btrfs_fs_devices *test_fs_devices = data; @@ -689,7 +679,6 @@ static int btrfs_unfreeze(struct super_block *sb) static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, - .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .write_inode = btrfs_write_inode, diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 1e96234..431accd 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -354,7 +354,9 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache) /* make sure all pages pinned by operations on behalf of the netfs are * written to disc */ cachefiles_begin_secure(cache, &saved_cred); - ret = fsync_super(cache->mnt->mnt_sb); + down_read(&cache->mnt->mnt_sb->s_umount); + ret = sync_filesystem(cache->mnt->mnt_sb); + up_read(&cache->mnt->mnt_sb->s_umount); cachefiles_end_secure(cache, saved_cred); if (ret == -EIO) diff --git a/fs/char_dev.c b/fs/char_dev.c index 38f7122..b7c9d51 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -375,7 +375,6 @@ static int chrdev_open(struct inode *inode, struct file *filp) p = inode->i_cdev; if (!p) { inode->i_cdev = p = new; - inode->i_cindex = idx; list_add(&inode->i_devices, &p->list); new = NULL; } else if (!cdev_get(p)) @@ -405,6 +404,18 @@ static int chrdev_open(struct inode *inode, struct file *filp) return ret; } +int cdev_index(struct inode *inode) +{ + int idx; + struct kobject *kobj; + + kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx); + if (!kobj) + return -1; + kobject_put(kobj); + return idx; +} + void cd_forget(struct inode *inode) { spin_lock(&cdev_lock); @@ -557,6 +568,7 @@ EXPORT_SYMBOL(cdev_init); EXPORT_SYMBOL(cdev_alloc); EXPORT_SYMBOL(cdev_del); EXPORT_SYMBOL(cdev_add); +EXPORT_SYMBOL(cdev_index); EXPORT_SYMBOL(register_chrdev); EXPORT_SYMBOL(unregister_chrdev); EXPORT_SYMBOL(directly_mappable_cdev_bdi); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 83d6275..3bb11be 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -275,7 +275,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, case -EBUSY: /* someone else made a mount here whilst we were busy */ while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; default: diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0a10a59..0d92114 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -204,6 +204,9 @@ cifs_put_super(struct super_block *sb) cFYI(1, ("Empty cifs superblock info passed to unmount")); return; } + + lock_kernel(); + rc = cifs_umount(sb, cifs_sb); if (rc) cERROR(1, ("cifs_umount failed with return code %d", rc)); @@ -216,7 +219,8 @@ cifs_put_super(struct super_block *sb) unload_nls(cifs_sb->local_nls); kfree(cifs_sb); - return; + + unlock_kernel(); } static int diff --git a/fs/compat.c b/fs/compat.c index bb2a9b2..6aefb77 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -812,10 +812,8 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, } } - lock_kernel(); retval = do_mount((char*)dev_page, dir_page, (char*)type_page, flags, (void*)data_page); - unlock_kernel(); out4: free_page(data_page); diff --git a/fs/dcache.c b/fs/dcache.c index 75659a6..9e5cd3c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root, spin_lock(&vfsmount_lock); prepend(&end, &buflen, "\0", 1); - if (!IS_ROOT(dentry) && d_unhashed(dentry) && + if (d_unlinked(dentry) && (prepend(&end, &buflen, " (deleted)", 10) != 0)) goto Elong; @@ -2035,7 +2035,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) spin_lock(&dcache_lock); prepend(&end, &buflen, "\0", 1); - if (!IS_ROOT(dentry) && d_unhashed(dentry) && + if (d_unlinked(dentry) && (prepend(&end, &buflen, "//deleted", 9) != 0)) goto Elong; if (buflen < 1) @@ -2097,9 +2097,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) read_unlock(¤t->fs->lock); error = -ENOENT; - /* Has the current directory has been unlinked? */ spin_lock(&dcache_lock); - if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) { + if (!d_unlinked(pwd.dentry)) { unsigned long len; struct path tmp = root; char * cwd; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index fa4c7e7..12d6496 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -27,6 +27,7 @@ #include <linux/mount.h> #include <linux/key.h> #include <linux/seq_file.h> +#include <linux/smp_lock.h> #include <linux/file.h> #include <linux/crypto.h> #include "ecryptfs_kernel.h" @@ -120,9 +121,13 @@ static void ecryptfs_put_super(struct super_block *sb) { struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb); + lock_kernel(); + ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat); kmem_cache_free(ecryptfs_sb_info_cache, sb_info); ecryptfs_set_superblock_private(sb, NULL); + + unlock_kernel(); } /** diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 9f1985e..8216c5b 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -200,20 +200,21 @@ static const struct export_operations exofs_export_ops; /* * Write the superblock to the OSD */ -static void exofs_write_super(struct super_block *sb) +static int exofs_sync_fs(struct super_block *sb, int wait) { struct exofs_sb_info *sbi; struct exofs_fscb *fscb; struct osd_request *or; struct osd_obj_id obj; - int ret; + int ret = -ENOMEM; fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL); if (!fscb) { EXOFS_ERR("exofs_write_super: memory allocation failed.\n"); - return; + return -ENOMEM; } + lock_super(sb); lock_kernel(); sbi = sb->s_fs_info; fscb->s_nextid = cpu_to_le64(sbi->s_nextid); @@ -246,7 +247,17 @@ out: if (or) osd_end_request(or); unlock_kernel(); + unlock_super(sb); kfree(fscb); + return ret; +} + +static void exofs_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + exofs_sync_fs(sb, 1); + else + sb->s_dirt = 0; } /* @@ -258,6 +269,11 @@ static void exofs_put_super(struct super_block *sb) int num_pend; struct exofs_sb_info *sbi = sb->s_fs_info; + lock_kernel(); + + if (sb->s_dirt) + exofs_write_super(sb); + /* make sure there are no pending commands */ for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; num_pend = atomic_read(&sbi->s_curr_pending)) { @@ -271,6 +287,8 @@ static void exofs_put_super(struct super_block *sb) osduld_put_device(sbi->s_dev); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } /* @@ -484,6 +502,7 @@ static const struct super_operations exofs_sops = { .delete_inode = exofs_delete_inode, .put_super = exofs_put_super, .write_super = exofs_write_super, + .sync_fs = exofs_sync_fs, .statfs = exofs_statfs, }; diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index e0b2b43..f42af45 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT2_FS) += ext2.o -ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \ +ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 2999d72..0035004 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -720,5 +720,5 @@ const struct file_operations ext2_dir_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif - .fsync = ext2_sync_file, + .fsync = simple_fsync, }; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 3203042..b2bbf45 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -113,9 +113,6 @@ extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); -/* fsync.c */ -extern int ext2_sync_file (struct file *, struct dentry *, int); - /* ialloc.c */ extern struct inode * ext2_new_inode (struct inode *, int); extern void ext2_free_inode (struct inode *); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 45ed071..2b9e47d 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -55,7 +55,7 @@ const struct file_operations ext2_file_operations = { .mmap = generic_file_mmap, .open = generic_file_open, .release = ext2_release_file, - .fsync = ext2_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, }; @@ -72,7 +72,7 @@ const struct file_operations ext2_xip_file_operations = { .mmap = xip_file_mmap, .open = generic_file_open, .release = ext2_release_file, - .fsync = ext2_sync_file, + .fsync = simple_fsync, }; #endif diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c deleted file mode 100644 index fc66c93..0000000 --- a/fs/ext2/fsync.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * linux/fs/ext2/fsync.c - * - * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) - * from - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * from - * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds - * - * ext2fs fsync primitive - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * - * Removed unnecessary code duplication for little endian machines - * and excessive __inline__s. - * Andi Kleen, 1997 - * - * Major simplications and cleanup - we only need to do the metadata, because - * we can depend on generic_block_fdatasync() to sync the data blocks. - */ - -#include "ext2.h" -#include <linux/buffer_head.h> /* for sync_mapping_buffers() */ - - -/* - * File may be NULL when we are called. Perhaps we shouldn't - * even pass file to fsync ? - */ - -int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - int ret; - - ret = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return ret; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return ret; - - err = ext2_sync_inode(inode); - if (ret == 0) - ret = err; - return ret; -} diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index acf6788..29ed682 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -41,8 +41,6 @@ MODULE_AUTHOR("Remy Card and others"); MODULE_DESCRIPTION("Second Extended Filesystem"); MODULE_LICENSE("GPL"); -static int ext2_update_inode(struct inode * inode, int do_sync); - /* * Test whether an inode is a fast symlink. */ @@ -66,7 +64,7 @@ void ext2_delete_inode (struct inode * inode) goto no_delete; EXT2_I(inode)->i_dtime = get_seconds(); mark_inode_dirty(inode); - ext2_update_inode(inode, inode_needs_sync(inode)); + ext2_write_inode(inode, inode_needs_sync(inode)); inode->i_size = 0; if (inode->i_blocks) @@ -1337,7 +1335,7 @@ bad_inode: return ERR_PTR(ret); } -static int ext2_update_inode(struct inode * inode, int do_sync) +int ext2_write_inode(struct inode *inode, int do_sync) { struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; @@ -1442,11 +1440,6 @@ static int ext2_update_inode(struct inode * inode, int do_sync) return err; } -int ext2_write_inode(struct inode *inode, int wait) -{ - return ext2_update_inode(inode, wait); -} - int ext2_sync_inode(struct inode *inode) { struct writeback_control wbc = { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e3c748f..4589996 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -42,6 +42,7 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es); static int ext2_remount (struct super_block * sb, int * flags, char * data); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); +static int ext2_sync_fs(struct super_block *sb, int wait); void ext2_error (struct super_block * sb, const char * function, const char * fmt, ...) @@ -114,6 +115,11 @@ static void ext2_put_super (struct super_block * sb) int i; struct ext2_sb_info *sbi = EXT2_SB(sb); + lock_kernel(); + + if (sb->s_dirt) + ext2_write_super(sb); + ext2_xattr_put_super(sb); if (!(sb->s_flags & MS_RDONLY)) { struct ext2_super_block *es = sbi->s_es; @@ -135,7 +141,7 @@ static void ext2_put_super (struct super_block * sb) kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; + unlock_kernel(); } static struct kmem_cache * ext2_inode_cachep; @@ -304,6 +310,7 @@ static const struct super_operations ext2_sops = { .delete_inode = ext2_delete_inode, .put_super = ext2_put_super, .write_super = ext2_write_super, + .sync_fs = ext2_sync_fs, .statfs = ext2_statfs, .remount_fs = ext2_remount, .clear_inode = ext2_clear_inode, @@ -1127,25 +1134,36 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) * set s_state to EXT2_VALID_FS after some corrections. */ -void ext2_write_super (struct super_block * sb) +static int ext2_sync_fs(struct super_block *sb, int wait) { - struct ext2_super_block * es; + struct ext2_super_block *es = EXT2_SB(sb)->s_es; + lock_kernel(); - if (!(sb->s_flags & MS_RDONLY)) { - es = EXT2_SB(sb)->s_es; - - if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { - ext2_debug ("setting valid to 0\n"); - es->s_state &= cpu_to_le16(~EXT2_VALID_FS); - es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); - es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); - es->s_mtime = cpu_to_le32(get_seconds()); - ext2_sync_super(sb, es); - } else - ext2_commit_super (sb, es); + if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { + ext2_debug("setting valid to 0\n"); + es->s_state &= cpu_to_le16(~EXT2_VALID_FS); + es->s_free_blocks_count = + cpu_to_le32(ext2_count_free_blocks(sb)); + es->s_free_inodes_count = + cpu_to_le32(ext2_count_free_inodes(sb)); + es->s_mtime = cpu_to_le32(get_seconds()); + ext2_sync_super(sb, es); + } else { + ext2_commit_super(sb, es); } sb->s_dirt = 0; unlock_kernel(); + + return 0; +} + + +void ext2_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + ext2_sync_fs(sb, 1); + else + sb->s_dirt = 0; } static int ext2_remount (struct super_block * sb, int * flags, char * data) @@ -1157,6 +1175,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) unsigned long old_sb_flags; int err; + lock_kernel(); + /* Store the old options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; @@ -1192,12 +1212,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + unlock_kernel(); return 0; + } if (*flags & MS_RDONLY) { if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || - !(sbi->s_mount_state & EXT2_VALID_FS)) + !(sbi->s_mount_state & EXT2_VALID_FS)) { + unlock_kernel(); return 0; + } /* * OK, we are remounting a valid rw partition rdonly, so set * the rdonly flag and then mark the partition as valid again. @@ -1224,12 +1248,14 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sb->s_flags &= ~MS_RDONLY; } ext2_sync_super(sb, es); + unlock_kernel(); return 0; restore_opts: sbi->s_mount_opt = old_opts.s_mount_opt; sbi->s_resuid = old_opts.s_resuid; sbi->s_resgid = old_opts.s_resgid; sb->s_flags = old_sb_flags; + unlock_kernel(); return err; } diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 225202d..27967f9 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -649,7 +649,7 @@ do_more: count = overflow; goto do_more; } - sb->s_dirt = 1; + error_return: brelse(bitmap_bh); ext3_std_error(sb, err); @@ -1708,7 +1708,6 @@ allocated: if (!fatal) fatal = err; - sb->s_dirt = 1; if (fatal) goto out; diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index dd13d60..b399912 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -181,7 +181,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) err = ext3_journal_dirty_metadata(handle, bitmap_bh); if (!fatal) fatal = err; - sb->s_dirt = 1; + error_return: brelse(bitmap_bh); ext3_std_error(sb, fatal); @@ -537,7 +537,6 @@ got: percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) percpu_counter_inc(&sbi->s_dirs_counter); - sb->s_dirt = 1; inode->i_uid = current_fsuid(); if (test_opt (sb, GRPID)) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index fcfa243..b0248c6 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2960,7 +2960,6 @@ static int ext3_do_update_inode(handle_t *handle, ext3_update_dynamic_rev(sb); EXT3_SET_RO_COMPAT_FEATURE(sb, EXT3_FEATURE_RO_COMPAT_LARGE_FILE); - sb->s_dirt = 1; handle->h_sync = 1; err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 78fdf38..8a0b263 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -934,7 +934,6 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) EXT3_INODES_PER_GROUP(sb)); ext3_journal_dirty_metadata(handle, sbi->s_sbh); - sb->s_dirt = 1; exit_journal: unlock_super(sb); @@ -1066,7 +1065,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, } es->s_blocks_count = cpu_to_le32(o_blocks_count + add); ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - sb->s_dirt = 1; unlock_super(sb); ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count, o_blocks_count + add); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3c70d52..26aa64d 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -67,7 +67,6 @@ static const char *ext3_decode_error(struct super_block * sb, int errno, static int ext3_remount (struct super_block * sb, int * flags, char * data); static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext3_unfreeze(struct super_block *sb); -static void ext3_write_super (struct super_block * sb); static int ext3_freeze(struct super_block *sb); /* @@ -399,6 +398,8 @@ static void ext3_put_super (struct super_block * sb) struct ext3_super_block *es = sbi->s_es; int i, err; + lock_kernel(); + ext3_xattr_put_super(sb); err = journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -447,7 +448,8 @@ static void ext3_put_super (struct super_block * sb) sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; + + unlock_kernel(); } static struct kmem_cache *ext3_inode_cachep; @@ -761,7 +763,6 @@ static const struct super_operations ext3_sops = { .dirty_inode = ext3_dirty_inode, .delete_inode = ext3_delete_inode, .put_super = ext3_put_super, - .write_super = ext3_write_super, .sync_fs = ext3_sync_fs, .freeze_fs = ext3_freeze, .unfreeze_fs = ext3_unfreeze, @@ -1785,7 +1786,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) #else es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); #endif - sb->s_dirt = 1; } if (sbi->s_blocks_per_group > blocksize * 8) { @@ -2265,7 +2265,6 @@ static int ext3_load_journal(struct super_block *sb, if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { es->s_journal_dev = cpu_to_le32(journal_devnum); - sb->s_dirt = 1; /* Make sure we flush the recovery flag to disk. */ ext3_commit_super(sb, es, 1); @@ -2308,7 +2307,6 @@ static int ext3_create_journal(struct super_block * sb, EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); es->s_journal_inum = cpu_to_le32(journal_inum); - sb->s_dirt = 1; /* Make sure we flush the recovery flag to disk. */ ext3_commit_super(sb, es, 1); @@ -2354,7 +2352,6 @@ static void ext3_mark_recovery_complete(struct super_block * sb, if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - sb->s_dirt = 0; ext3_commit_super(sb, es, 1); } unlock_super(sb); @@ -2413,29 +2410,14 @@ int ext3_force_commit(struct super_block *sb) return 0; journal = EXT3_SB(sb)->s_journal; - sb->s_dirt = 0; ret = ext3_journal_force_commit(journal); return ret; } -/* - * Ext3 always journals updates to the superblock itself, so we don't - * have to propagate any other updates to the superblock on disk at this - * point. (We can probably nuke this function altogether, and remove - * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...) - */ -static void ext3_write_super (struct super_block * sb) -{ - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - sb->s_dirt = 0; -} - static int ext3_sync_fs(struct super_block *sb, int wait) { tid_t target; - sb->s_dirt = 0; if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { if (wait) log_wait_commit(EXT3_SB(sb)->s_journal, target); @@ -2451,7 +2433,6 @@ static int ext3_freeze(struct super_block *sb) { int error = 0; journal_t *journal; - sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) { journal = EXT3_SB(sb)->s_journal; @@ -2509,7 +2490,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) int i; #endif + lock_kernel(); + /* Store the original options */ + lock_super(sb); old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; old_opts.s_resuid = sbi->s_resuid; @@ -2617,6 +2601,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) old_opts.s_qf_names[i] != sbi->s_qf_names[i]) kfree(old_opts.s_qf_names[i]); #endif + unlock_super(sb); + unlock_kernel(); return 0; restore_opts: sb->s_flags = old_sb_flags; @@ -2633,6 +2619,8 @@ restore_opts: sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } #endif + unlock_super(sb); + unlock_kernel(); return err; } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 83b7be8..545e37c 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -463,7 +463,6 @@ static void ext3_xattr_update_super_block(handle_t *handle, if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR); - sb->s_dirt = 1; ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); } } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f016707..012c425 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -576,6 +576,11 @@ static void ext4_put_super(struct super_block *sb) struct ext4_super_block *es = sbi->s_es; int i, err; + lock_super(sb); + lock_kernel(); + if (sb->s_dirt) + ext4_commit_super(sb, 1); + ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); @@ -642,8 +647,6 @@ static void ext4_put_super(struct super_block *sb) unlock_super(sb); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); - lock_super(sb); - lock_kernel(); kfree(sbi->s_blockgroup_lock); kfree(sbi); } @@ -3333,7 +3336,9 @@ int ext4_force_commit(struct super_block *sb) static void ext4_write_super(struct super_block *sb) { + lock_super(sb); ext4_commit_super(sb, 1); + unlock_super(sb); } static int ext4_sync_fs(struct super_block *sb, int wait) @@ -3417,7 +3422,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) int i; #endif + lock_kernel(); + /* Store the original options */ + lock_super(sb); old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; old_opts.s_resuid = sbi->s_resuid; @@ -3551,6 +3559,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_qf_names[i] != sbi->s_qf_names[i]) kfree(old_opts.s_qf_names[i]); #endif + unlock_super(sb); + unlock_kernel(); return 0; restore_opts: @@ -3570,6 +3580,8 @@ restore_opts: sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } #endif + unlock_super(sb); + unlock_kernel(); return err; } diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 3a7f603..f350029 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -840,7 +840,7 @@ const struct file_operations fat_dir_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = fat_compat_dir_ioctl, #endif - .fsync = file_fsync, + .fsync = fat_file_fsync, }; static int fat_get_short_entry(struct inode *dir, loff_t *pos, @@ -967,7 +967,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) de++; nr_slots--; } - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bh); brelse(bh); @@ -1001,7 +1001,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) de--; nr_slots--; } - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bh); brelse(bh); @@ -1051,7 +1051,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, } memset(bhs[n]->b_data, 0, sb->s_blocksize); set_buffer_uptodate(bhs[n]); - mark_buffer_dirty(bhs[n]); + mark_buffer_dirty_inode(bhs[n], dir); n++; blknr++; @@ -1131,7 +1131,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) de[0].size = de[1].size = 0; memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); set_buffer_uptodate(bhs[0]); - mark_buffer_dirty(bhs[0]); + mark_buffer_dirty_inode(bhs[0], dir); err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE); if (err) @@ -1193,7 +1193,7 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, slots += copy; size -= copy; set_buffer_uptodate(bhs[n]); - mark_buffer_dirty(bhs[n]); + mark_buffer_dirty_inode(bhs[n], dir); if (!size) break; n++; @@ -1293,7 +1293,7 @@ found: for (i = 0; i < long_bhs; i++) { int copy = min_t(int, sb->s_blocksize - offset, size); memcpy(bhs[i]->b_data + offset, slots, copy); - mark_buffer_dirty(bhs[i]); + mark_buffer_dirty_inode(bhs[i], dir); offset = 0; slots += copy; size -= copy; @@ -1304,7 +1304,7 @@ found: /* Fill the short name slot. */ int copy = min_t(int, sb->s_blocksize - offset, size); memcpy(bhs[i]->b_data + offset, slots, copy); - mark_buffer_dirty(bhs[i]); + mark_buffer_dirty_inode(bhs[i], dir); if (IS_DIRSYNC(dir)) err = sync_dirty_buffer(bhs[i]); } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index ea440d6..e4d8852 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -74,6 +74,7 @@ struct msdos_sb_info { int fatent_shift; struct fatent_operations *fatent_ops; + struct inode *fat_inode; spinlock_t inode_hash_lock; struct hlist_head inode_hashtable[FAT_HASH_SIZE]; @@ -251,6 +252,7 @@ struct fat_entry { } u; int nr_bhs; struct buffer_head *bhs[2]; + struct inode *fat_inode; }; static inline void fatent_init(struct fat_entry *fatent) @@ -259,6 +261,7 @@ static inline void fatent_init(struct fat_entry *fatent) fatent->entry = 0; fatent->u.ent32_p = NULL; fatent->bhs[0] = fatent->bhs[1] = NULL; + fatent->fat_inode = NULL; } static inline void fatent_set_entry(struct fat_entry *fatent, int entry) @@ -275,6 +278,7 @@ static inline void fatent_brelse(struct fat_entry *fatent) brelse(fatent->bhs[i]); fatent->nr_bhs = 0; fatent->bhs[0] = fatent->bhs[1] = NULL; + fatent->fat_inode = NULL; } extern void fat_ent_access_init(struct super_block *sb); @@ -296,6 +300,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr); extern void fat_truncate(struct inode *inode); extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); +extern int fat_file_fsync(struct file *file, struct dentry *dentry, + int datasync); /* fat/inode.c */ extern void fat_attach(struct inode *inode, loff_t i_pos); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index da6eea4..618f530 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -73,6 +73,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, struct buffer_head **bhs = fatent->bhs; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); + fatent->fat_inode = MSDOS_SB(sb)->fat_inode; + bhs[0] = sb_bread(sb, blocknr); if (!bhs[0]) goto err; @@ -103,6 +105,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops; WARN_ON(blocknr < MSDOS_SB(sb)->fat_start); + fatent->fat_inode = MSDOS_SB(sb)->fat_inode; fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", @@ -167,9 +170,9 @@ static void fat12_ent_put(struct fat_entry *fatent, int new) } spin_unlock(&fat12_entry_lock); - mark_buffer_dirty(fatent->bhs[0]); + mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); if (fatent->nr_bhs == 2) - mark_buffer_dirty(fatent->bhs[1]); + mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode); } static void fat16_ent_put(struct fat_entry *fatent, int new) @@ -178,7 +181,7 @@ static void fat16_ent_put(struct fat_entry *fatent, int new) new = EOF_FAT16; *fatent->u.ent16_p = cpu_to_le16(new); - mark_buffer_dirty(fatent->bhs[0]); + mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static void fat32_ent_put(struct fat_entry *fatent, int new) @@ -189,7 +192,7 @@ static void fat32_ent_put(struct fat_entry *fatent, int new) WARN_ON(new & 0xf0000000); new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; *fatent->u.ent32_p = cpu_to_le32(new); - mark_buffer_dirty(fatent->bhs[0]); + mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode); } static int fat12_ent_next(struct fat_entry *fatent) @@ -381,7 +384,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, } memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); - mark_buffer_dirty(c_bh); + mark_buffer_dirty_inode(c_bh, sbi->fat_inode); if (sb->s_flags & MS_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); brelse(c_bh); diff --git a/fs/fat/file.c b/fs/fat/file.c index 0a7f4a9..e955a56 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -133,6 +133,18 @@ static int fat_file_release(struct inode *inode, struct file *filp) return 0; } +int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + int res, err; + + res = simple_fsync(filp, dentry, datasync); + err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); + + return res ? res : err; +} + + const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -142,7 +154,7 @@ const struct file_operations fat_file_operations = { .mmap = generic_file_mmap, .release = fat_file_release, .ioctl = fat_generic_ioctl, - .fsync = file_fsync, + .fsync = fat_file_fsync, .splice_read = generic_file_splice_read, }; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 296785a..51a5ecf 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -441,16 +441,35 @@ static void fat_clear_inode(struct inode *inode) static void fat_write_super(struct super_block *sb) { + lock_super(sb); sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) fat_clusters_flush(sb); + unlock_super(sb); +} + +static int fat_sync_fs(struct super_block *sb, int wait) +{ + lock_super(sb); + fat_clusters_flush(sb); + sb->s_dirt = 0; + unlock_super(sb); + + return 0; } static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); + lock_kernel(); + + if (sb->s_dirt) + fat_write_super(sb); + + iput(sbi->fat_inode); + if (sbi->nls_disk) { unload_nls(sbi->nls_disk); sbi->nls_disk = NULL; @@ -467,6 +486,8 @@ static void fat_put_super(struct super_block *sb) sb->s_fs_info = NULL; kfree(sbi); + + unlock_kernel(); } static struct kmem_cache *fat_inode_cachep; @@ -632,6 +653,7 @@ static const struct super_operations fat_sops = { .delete_inode = fat_delete_inode, .put_super = fat_put_super, .write_super = fat_write_super, + .sync_fs = fat_sync_fs, .statfs = fat_statfs, .clear_inode = fat_clear_inode, .remount_fs = fat_remount, @@ -1174,7 +1196,7 @@ static int fat_read_root(struct inode *inode) int fat_fill_super(struct super_block *sb, void *data, int silent, const struct inode_operations *fs_dir_inode_ops, int isvfat) { - struct inode *root_inode = NULL; + struct inode *root_inode = NULL, *fat_inode = NULL; struct buffer_head *bh; struct fat_boot_sector *b; struct msdos_sb_info *sbi; @@ -1414,6 +1436,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, } error = -ENOMEM; + fat_inode = new_inode(sb); + if (!fat_inode) + goto out_fail; + MSDOS_I(fat_inode)->i_pos = 0; + sbi->fat_inode = fat_inode; root_inode = new_inode(sb); if (!root_inode) goto out_fail; @@ -1439,6 +1466,8 @@ out_invalid: " on dev %s.\n", sb->s_id); out_fail: + if (fat_inode) + iput(fat_inode); if (root_inode) iput(root_inode); if (sbi->nls_io) diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index da3f361..20f5228 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -544,7 +544,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, int start = MSDOS_I(new_dir)->i_logstart; dotdot_de->start = cpu_to_le16(start); dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); + mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); if (err) @@ -586,7 +586,7 @@ error_dotdot: int start = MSDOS_I(old_dir)->i_logstart; dotdot_de->start = cpu_to_le16(start); dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); + mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } error_inode: diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index a0e00e3..b50ecbe 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -965,7 +965,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, int start = MSDOS_I(new_dir)->i_logstart; dotdot_de->start = cpu_to_le16(start); dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); + mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); if (err) @@ -1009,7 +1009,7 @@ error_dotdot: int start = MSDOS_I(old_dir)->i_logstart; dotdot_de->start = cpu_to_le16(start); dotdot_de->starthi = cpu_to_le16(start >> 16); - mark_buffer_dirty(dotdot_bh); + mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } error_inode: diff --git a/fs/file_table.c b/fs/file_table.c index 54018fe..334ce39 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, */ if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) { file_take_write(file); - error = mnt_want_write(mnt); + error = mnt_clone_write(mnt); WARN_ON(error); } return error; @@ -399,6 +399,44 @@ too_bad: return 0; } +/** + * mark_files_ro - mark all files read-only + * @sb: superblock in question + * + * All files are marked read-only. We don't care about pending + * delete files so this should be used in 'force' mode only. + */ +void mark_files_ro(struct super_block *sb) +{ + struct file *f; + +retry: + file_list_lock(); + list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + struct vfsmount *mnt; + if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) + continue; + if (!file_count(f)) + continue; + if (!(f->f_mode & FMODE_WRITE)) + continue; + f->f_mode &= ~FMODE_WRITE; + if (file_check_writeable(f) != 0) + continue; + file_release_write(f); + mnt = mntget(f->f_path.mnt); + file_list_unlock(); + /* + * This can sleep, so we can't hold + * the file_list_lock() spinlock. + */ + mnt_drop_write(mnt); + mntput(mnt); + goto retry; + } + file_list_unlock(); +} + void __init files_init(unsigned long mempages) { int n; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 1dacda8..cdbd165 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -80,12 +80,16 @@ vxfs_put_super(struct super_block *sbp) { struct vxfs_sb_info *infp = VXFS_SBI(sbp); + lock_kernel(); + vxfs_put_fake_inode(infp->vsi_fship); vxfs_put_fake_inode(infp->vsi_ilist); vxfs_put_fake_inode(infp->vsi_stilist); brelse(infp->vsi_bp); kfree(infp); + + unlock_kernel(); } /** diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 91013ff..40308e9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -64,6 +64,28 @@ static void writeback_release(struct backing_dev_info *bdi) clear_bit(BDI_pdflush, &bdi->state); } +static noinline void block_dump___mark_inode_dirty(struct inode *inode) +{ + if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { + struct dentry *dentry; + const char *name = "?"; + + dentry = d_find_alias(inode); + if (dentry) { + spin_lock(&dentry->d_lock); + name = (const char *) dentry->d_name.name; + } + printk(KERN_DEBUG + "%s(%d): dirtied inode %lu (%s) on %s\n", + current->comm, task_pid_nr(current), inode->i_ino, + name, inode->i_sb->s_id); + if (dentry) { + spin_unlock(&dentry->d_lock); + dput(dentry); + } + } +} + /** * __mark_inode_dirty - internal function * @inode: inode to mark @@ -114,23 +136,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) if ((inode->i_state & flags) == flags) return; - if (unlikely(block_dump)) { - struct dentry *dentry = NULL; - const char *name = "?"; - - if (!list_empty(&inode->i_dentry)) { - dentry = list_entry(inode->i_dentry.next, - struct dentry, d_alias); - if (dentry && dentry->d_name.name) - name = (const char *) dentry->d_name.name; - } - - if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) - printk(KERN_DEBUG - "%s(%d): dirtied inode %lu (%s) on %s\n", - current->comm, task_pid_nr(current), inode->i_ino, - name, inode->i_sb->s_id); - } + if (unlikely(block_dump)) + block_dump___mark_inode_dirty(inode); spin_lock(&inode_lock); if ((inode->i_state & flags) != flags) { @@ -289,7 +296,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) int ret; BUG_ON(inode->i_state & I_SYNC); - WARN_ON(inode->i_state & I_NEW); /* Set I_SYNC, reset I_DIRTY */ dirty = inode->i_state & I_DIRTY; @@ -314,7 +320,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } spin_lock(&inode_lock); - WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { if (!(inode->i_state & I_DIRTY) && @@ -679,55 +684,6 @@ void sync_inodes_sb(struct super_block *sb, int wait) } /** - * sync_inodes - writes all inodes to disk - * @wait: wait for completion - * - * sync_inodes() goes through each super block's dirty inode list, writes the - * inodes out, waits on the writeout and puts the inodes back on the normal - * list. - * - * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle - * part of the sync functions is that the blockdev "superblock" is processed - * last. This is because the write_inode() function of a typical fs will - * perform no I/O, but will mark buffers in the blockdev mapping as dirty. - * What we want to do is to perform all that dirtying first, and then write - * back all those inode blocks via the blockdev mapping in one sweep. So the - * additional (somewhat redundant) sync_blockdev() calls here are to make - * sure that really happens. Because if we call sync_inodes_sb(wait=1) with - * outstanding dirty inodes, the writeback goes block-at-a-time within the - * filesystem's write_inode(). This is extremely slow. - */ -static void __sync_inodes(int wait) -{ - struct super_block *sb; - - spin_lock(&sb_lock); -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (sb->s_root) { - sync_inodes_sb(sb, wait); - sync_blockdev(sb->s_bdev); - } - up_read(&sb->s_umount); - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); -} - -void sync_inodes(int wait) -{ - __sync_inodes(0); - - if (wait) - __sync_inodes(1); -} - -/** * write_inode_now - write an inode to disk * @inode: inode to write to disk * @sync: whether the write should be synchronous or not diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index aa62cf5..f2e449c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -764,7 +764,6 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) } gfs2_log_unlock(sdp); - sdp->sd_vfs->s_dirt = 0; up_write(&sdp->sd_log_flush_lock); kfree(ai); @@ -823,7 +822,6 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) log_refund(sdp, tr); buf_lo_incore_commit(sdp, tr); - sdp->sd_vfs->s_dirt = 1; up_read(&sdp->sd_log_flush_lock); gfs2_log_lock(sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 40bcc37..c8930b3 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -719,6 +719,8 @@ static void gfs2_put_super(struct super_block *sb) int error; struct gfs2_jdesc *jd; + lock_kernel(); + /* Unfreeze the filesystem, if we need to */ mutex_lock(&sdp->sd_freeze_lock); @@ -785,17 +787,8 @@ restart: /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); -} - -/** - * gfs2_write_super - * @sb: the superblock - * - */ -static void gfs2_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; + unlock_kernel(); } /** @@ -807,7 +800,6 @@ static void gfs2_write_super(struct super_block *sb) static int gfs2_sync_fs(struct super_block *sb, int wait) { - sb->s_dirt = 0; if (wait && sb->s_fs_info) gfs2_log_flush(sb->s_fs_info, NULL); return 0; @@ -1324,7 +1316,6 @@ const struct super_operations gfs2_super_ops = { .write_inode = gfs2_write_inode, .delete_inode = gfs2_delete_inode, .put_super = gfs2_put_super, - .write_super = gfs2_write_super, .sync_fs = gfs2_sync_fs, .freeze_fs = gfs2_freeze, .unfreeze_fs = gfs2_unfreeze, diff --git a/fs/hfs/super.c b/fs/hfs/super.c index a36bb74..6f833dc 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -49,11 +49,23 @@ MODULE_LICENSE("GPL"); */ static void hfs_write_super(struct super_block *sb) { + lock_super(sb); sb->s_dirt = 0; - if (sb->s_flags & MS_RDONLY) - return; + /* sync everything to the buffers */ + if (!(sb->s_flags & MS_RDONLY)) + hfs_mdb_commit(sb); + unlock_super(sb); +} + +static int hfs_sync_fs(struct super_block *sb, int wait) +{ + lock_super(sb); hfs_mdb_commit(sb); + sb->s_dirt = 0; + unlock_super(sb); + + return 0; } /* @@ -65,9 +77,15 @@ static void hfs_write_super(struct super_block *sb) */ static void hfs_put_super(struct super_block *sb) { + lock_kernel(); + + if (sb->s_dirt) + hfs_write_super(sb); hfs_mdb_close(sb); /* release the MDB's resources */ hfs_mdb_put(sb); + + unlock_kernel(); } /* @@ -164,6 +182,7 @@ static const struct super_operations hfs_super_operations = { .clear_inode = hfs_clear_inode, .put_super = hfs_put_super, .write_super = hfs_write_super, + .sync_fs = hfs_sync_fs, .statfs = hfs_statfs, .remount_fs = hfs_remount, .show_options = hfs_show_options, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index f2a6402..9fc3af0c 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -152,15 +152,14 @@ static void hfsplus_clear_inode(struct inode *inode) } } -static void hfsplus_write_super(struct super_block *sb) +static int hfsplus_sync_fs(struct super_block *sb, int wait) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; dprint(DBG_SUPER, "hfsplus_write_super\n"); + + lock_super(sb); sb->s_dirt = 0; - if (sb->s_flags & MS_RDONLY) - /* warn? */ - return; vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); @@ -192,6 +191,16 @@ static void hfsplus_write_super(struct super_block *sb) } HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; } + unlock_super(sb); + return 0; +} + +static void hfsplus_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + hfsplus_sync_fs(sb, 1); + else + sb->s_dirt = 0; } static void hfsplus_put_super(struct super_block *sb) @@ -199,6 +208,11 @@ static void hfsplus_put_super(struct super_block *sb) dprint(DBG_SUPER, "hfsplus_put_super\n"); if (!sb->s_fs_info) return; + + lock_kernel(); + + if (sb->s_dirt) + hfsplus_write_super(sb); if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; @@ -218,6 +232,8 @@ static void hfsplus_put_super(struct super_block *sb) unload_nls(HFSPLUS_SB(sb).nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -279,6 +295,7 @@ static const struct super_operations hfsplus_sops = { .clear_inode = hfsplus_clear_inode, .put_super = hfsplus_put_super, .write_super = hfsplus_write_super, + .sync_fs = hfsplus_sync_fs, .statfs = hfsplus_statfs, .remount_fs = hfsplus_remount, .show_options = hfsplus_show_options, diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fc77965..f2feaa0 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -13,6 +13,7 @@ #include <linux/statfs.h> #include <linux/magic.h> #include <linux/sched.h> +#include <linux/smp_lock.h> /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ @@ -99,11 +100,16 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2, static void hpfs_put_super(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); + + lock_kernel(); + kfree(sbi->sb_cp_table); kfree(sbi->sb_bmp_dir); unmark_dirty(s); s->s_fs_info = NULL; kfree(sbi); + + unlock_kernel(); } unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) @@ -393,6 +399,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) *flags |= MS_NOATIME; + lock_kernel(); + lock_super(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; lowercase = sbi->sb_lowercase; conv = sbi->sb_conv; @@ -425,9 +433,13 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) replace_mount_options(s, new_opts); + unlock_super(s); + unlock_kernel(); return 0; out_err: + unlock_super(s); + unlock_kernel(); kfree(new_opts); return -EINVAL; } @@ -1422,7 +1422,7 @@ void file_update_time(struct file *file) if (IS_NOCMTIME(inode)) return; - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) return; diff --git a/fs/internal.h b/fs/internal.h index b4dac4f..d55ef56 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) return sb == blockdev_superblock; } +extern int __sync_blockdev(struct block_device *bdev, int wait); + #else static inline void bdev_cache_init(void) { @@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) { return 0; } + +static inline int __sync_blockdev(struct block_device *bdev, int wait) +{ + return 0; +} #endif /* @@ -66,3 +73,13 @@ extern void __init mnt_init(void); * fs_struct.c */ extern void chroot_fs_refs(struct path *, struct path *); + +/* + * file_table.c + */ +extern void mark_files_ro(struct super_block *); + +/* + * super.c + */ +extern int do_remount_sb(struct super_block *, int, void *, int); diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index b4cbe96..068b34b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -42,11 +42,16 @@ static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qst static void isofs_put_super(struct super_block *sb) { struct isofs_sb_info *sbi = ISOFS_SB(sb); + #ifdef CONFIG_JOLIET + lock_kernel(); + if (sbi->s_nls_iocharset) { unload_nls(sbi->s_nls_iocharset); sbi->s_nls_iocharset = NULL; } + + unlock_kernel(); #endif kfree(sbi); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 249305d..3451a81 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -20,6 +20,7 @@ #include <linux/vmalloc.h> #include <linux/vfs.h> #include <linux/crc32.h> +#include <linux/smp_lock.h> #include "nodelist.h" static int jffs2_flash_setup(struct jffs2_sb_info *c); @@ -387,6 +388,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) This also catches the case where it was stopped and this is just a remount to restart it. Flush the writebuffer, if neccecary, else we loose it */ + lock_kernel(); if (!(sb->s_flags & MS_RDONLY)) { jffs2_stop_garbage_collect_thread(c); mutex_lock(&c->alloc_sem); @@ -399,24 +401,10 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) *flags |= MS_NOATIME; + unlock_kernel(); return 0; } -void jffs2_write_super (struct super_block *sb) -{ - struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - sb->s_dirt = 0; - - if (sb->s_flags & MS_RDONLY) - return; - - D1(printk(KERN_DEBUG "jffs2_write_super()\n")); - jffs2_garbage_collect_trigger(c); - jffs2_erase_pending_blocks(c, 0); - jffs2_flush_wbuf_gc(c, 0); -} - - /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, fill in the raw_inode while you're at it. */ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri) diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 5e194a5..2228380 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -181,7 +181,6 @@ void jffs2_dirty_inode(struct inode *inode); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); -void jffs2_write_super (struct super_block *); int jffs2_remount_fs (struct super_block *, int *, char *); int jffs2_do_fill_super(struct super_block *sb, void *data, int silent); void jffs2_gc_release_inode(struct jffs2_sb_info *c, diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 4c4e18c..07a22ca 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -53,10 +53,29 @@ static void jffs2_i_init_once(void *foo) inode_init_once(&f->vfs_inode); } +static void jffs2_write_super(struct super_block *sb) +{ + struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); + + lock_super(sb); + sb->s_dirt = 0; + + if (!(sb->s_flags & MS_RDONLY)) { + D1(printk(KERN_DEBUG "jffs2_write_super()\n")); + jffs2_garbage_collect_trigger(c); + jffs2_erase_pending_blocks(c, 0); + jffs2_flush_wbuf_gc(c, 0); + } + + unlock_super(sb); +} + static int jffs2_sync_fs(struct super_block *sb, int wait) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); + jffs2_write_super(sb); + mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); @@ -174,6 +193,11 @@ static void jffs2_put_super (struct super_block *sb) D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); + lock_kernel(); + + if (sb->s_dirt) + jffs2_write_super(sb); + mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); @@ -192,6 +216,8 @@ static void jffs2_put_super (struct super_block *sb) if (c->mtd->sync) c->mtd->sync(c->mtd); + unlock_kernel(); + D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index d9b0e92..09b1b6e 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -32,6 +32,7 @@ #include <linux/crc32.h> #include <asm/uaccess.h> #include <linux/seq_file.h> +#include <linux/smp_lock.h> #include "jfs_incore.h" #include "jfs_filsys.h" @@ -183,6 +184,9 @@ static void jfs_put_super(struct super_block *sb) int rc; jfs_info("In jfs_put_super"); + + lock_kernel(); + rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); @@ -195,6 +199,8 @@ static void jfs_put_super(struct super_block *sb) sbi->direct_inode = NULL; kfree(sbi); + + unlock_kernel(); } enum { @@ -370,19 +376,24 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) s64 newLVSize = 0; int rc = 0; int flag = JFS_SBI(sb)->flag; + int ret; if (!parse_options(data, sb, &newLVSize, &flag)) { return -EINVAL; } + lock_kernel(); if (newLVSize) { if (sb->s_flags & MS_RDONLY) { printk(KERN_ERR "JFS: resize requires volume to be mounted read-write\n"); + unlock_kernel(); return -EROFS; } rc = jfs_extendfs(sb, newLVSize, 0); - if (rc) + if (rc) { + unlock_kernel(); return rc; + } } if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { @@ -393,23 +404,31 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); JFS_SBI(sb)->flag = flag; - return jfs_mount_rw(sb, 1); + ret = jfs_mount_rw(sb, 1); + unlock_kernel(); + return ret; } if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { rc = jfs_umount_rw(sb); JFS_SBI(sb)->flag = flag; + unlock_kernel(); return rc; } if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) if (!(sb->s_flags & MS_RDONLY)) { rc = jfs_umount_rw(sb); - if (rc) + if (rc) { + unlock_kernel(); return rc; + } JFS_SBI(sb)->flag = flag; - return jfs_mount_rw(sb, 1); + ret = jfs_mount_rw(sb, 1); + unlock_kernel(); + return ret; } JFS_SBI(sb)->flag = flag; + unlock_kernel(); return 0; } @@ -9,6 +9,8 @@ #include <linux/vfs.h> #include <linux/mutex.h> #include <linux/exportfs.h> +#include <linux/writeback.h> +#include <linux/buffer_head.h> #include <asm/uaccess.h> @@ -807,6 +809,29 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, } EXPORT_SYMBOL_GPL(generic_fh_to_parent); +int simple_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, /* metadata-only; caller takes care of data */ + }; + struct inode *inode = dentry->d_inode; + int err; + int ret; + + ret = sync_mapping_buffers(inode->i_mapping); + if (!(inode->i_state & I_DIRTY)) + return ret; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return ret; + + err = sync_inode(inode, &wbc); + if (ret == 0) + ret = err; + return ret; +} +EXPORT_SYMBOL(simple_fsync); + EXPORT_SYMBOL(dcache_dir_close); EXPORT_SYMBOL(dcache_dir_lseek); EXPORT_SYMBOL(dcache_dir_open); diff --git a/fs/minix/dir.c b/fs/minix/dir.c index d4946c4..e5f2064 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -22,7 +22,7 @@ static int minix_readdir(struct file *, void *, filldir_t); const struct file_operations minix_dir_operations = { .read = generic_read_dir, .readdir = minix_readdir, - .fsync = minix_sync_file, + .fsync = simple_fsync, }; static inline void dir_put_page(struct page *page) diff --git a/fs/minix/file.c b/fs/minix/file.c index 17765f6..3eec3e6 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -6,15 +6,12 @@ * minix regular file handling primitives */ -#include <linux/buffer_head.h> /* for fsync_inode_buffers() */ #include "minix.h" /* * We have mostly NULLs here: the current defaults are OK for * the minix filesystem. */ -int minix_sync_file(struct file *, struct dentry *, int); - const struct file_operations minix_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -22,7 +19,7 @@ const struct file_operations minix_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = minix_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; @@ -30,18 +27,3 @@ const struct inode_operations minix_file_inode_operations = { .truncate = minix_truncate, .getattr = minix_getattr, }; - -int minix_sync_file(struct file * file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - - err = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return err; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return err; - - err |= minix_sync_inode(inode); - return err ? -EIO : 0; -} diff --git a/fs/minix/inode.c b/fs/minix/inode.c index daad3c2..f91a236 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -35,6 +35,8 @@ static void minix_put_super(struct super_block *sb) int i; struct minix_sb_info *sbi = minix_sb(sb); + lock_kernel(); + if (!(sb->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ sbi->s_ms->s_state = sbi->s_mount_state; @@ -49,7 +51,7 @@ static void minix_put_super(struct super_block *sb) sb->s_fs_info = NULL; kfree(sbi); - return; + unlock_kernel(); } static struct kmem_cache * minix_inode_cachep; @@ -554,38 +556,25 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode) return bh; } -static struct buffer_head *minix_update_inode(struct inode *inode) -{ - if (INODE_VERSION(inode) == MINIX_V1) - return V1_minix_update_inode(inode); - else - return V2_minix_update_inode(inode); -} - -static int minix_write_inode(struct inode * inode, int wait) -{ - brelse(minix_update_inode(inode)); - return 0; -} - -int minix_sync_inode(struct inode * inode) +static int minix_write_inode(struct inode *inode, int wait) { int err = 0; struct buffer_head *bh; - bh = minix_update_inode(inode); - if (bh && buffer_dirty(bh)) - { + if (INODE_VERSION(inode) == MINIX_V1) + bh = V1_minix_update_inode(inode); + else + bh = V2_minix_update_inode(inode); + if (!bh) + return -EIO; + if (wait && buffer_dirty(bh)) { sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - { + if (buffer_req(bh) && !buffer_uptodate(bh)) { printk("IO error syncing minix inode [%s:%08lx]\n", inode->i_sb->s_id, inode->i_ino); - err = -1; + err = -EIO; } } - else if (!bh) - err = -1; brelse (bh); return err; } diff --git a/fs/minix/minix.h b/fs/minix/minix.h index e6a0b19..cb7fdd1 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -57,7 +57,6 @@ extern int __minix_write_begin(struct file *file, struct address_space *mapping, extern void V1_minix_truncate(struct inode *); extern void V2_minix_truncate(struct inode *); extern void minix_truncate(struct inode *); -extern int minix_sync_inode(struct inode *); extern void minix_set_inode(struct inode *, dev_t); extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int); extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int); @@ -72,7 +71,6 @@ extern int minix_empty_dir(struct inode*); extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*); extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**); extern ino_t minix_inode_by_name(struct dentry*); -extern int minix_sync_file(struct file *, struct dentry *, int); extern const struct inode_operations minix_file_inode_operations; extern const struct inode_operations minix_dir_inode_operations; @@ -552,6 +552,17 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd return result; } +static __always_inline void set_root(struct nameidata *nd) +{ + if (!nd->root.mnt) { + struct fs_struct *fs = current->fs; + read_lock(&fs->lock); + nd->root = fs->root; + path_get(&nd->root); + read_unlock(&fs->lock); + } +} + static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) { int res = 0; @@ -560,14 +571,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l goto fail; if (*link == '/') { - struct fs_struct *fs = current->fs; - + set_root(nd); path_put(&nd->path); - - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); + nd->path = nd->root; + path_get(&nd->root); } res = link_path_walk(link, nd); @@ -668,23 +675,23 @@ loop: return err; } -int follow_up(struct vfsmount **mnt, struct dentry **dentry) +int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; spin_lock(&vfsmount_lock); - parent=(*mnt)->mnt_parent; - if (parent == *mnt) { + parent = path->mnt->mnt_parent; + if (parent == path->mnt) { spin_unlock(&vfsmount_lock); return 0; } mntget(parent); - mountpoint=dget((*mnt)->mnt_mountpoint); + mountpoint = dget(path->mnt->mnt_mountpoint); spin_unlock(&vfsmount_lock); - dput(*dentry); - *dentry = mountpoint; - mntput(*mnt); - *mnt = parent; + dput(path->dentry); + path->dentry = mountpoint; + mntput(path->mnt); + path->mnt = parent; return 1; } @@ -695,7 +702,7 @@ static int __follow_mount(struct path *path) { int res = 0; while (d_mountpoint(path->dentry)) { - struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); + struct vfsmount *mounted = lookup_mnt(path); if (!mounted) break; dput(path->dentry); @@ -708,32 +715,32 @@ static int __follow_mount(struct path *path) return res; } -static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) +static void follow_mount(struct path *path) { - while (d_mountpoint(*dentry)) { - struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); + while (d_mountpoint(path->dentry)) { + struct vfsmount *mounted = lookup_mnt(path); if (!mounted) break; - dput(*dentry); - mntput(*mnt); - *mnt = mounted; - *dentry = dget(mounted->mnt_root); + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); } } /* no need for dcache_lock, as serialization is taken care in * namespace.c */ -int follow_down(struct vfsmount **mnt, struct dentry **dentry) +int follow_down(struct path *path) { struct vfsmount *mounted; - mounted = lookup_mnt(*mnt, *dentry); + mounted = lookup_mnt(path); if (mounted) { - dput(*dentry); - mntput(*mnt); - *mnt = mounted; - *dentry = dget(mounted->mnt_root); + dput(path->dentry); + mntput(path->mnt); + path->mnt = mounted; + path->dentry = dget(mounted->mnt_root); return 1; } return 0; @@ -741,19 +748,16 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) static __always_inline void follow_dotdot(struct nameidata *nd) { - struct fs_struct *fs = current->fs; + set_root(nd); while(1) { struct vfsmount *parent; struct dentry *old = nd->path.dentry; - read_lock(&fs->lock); - if (nd->path.dentry == fs->root.dentry && - nd->path.mnt == fs->root.mnt) { - read_unlock(&fs->lock); + if (nd->path.dentry == nd->root.dentry && + nd->path.mnt == nd->root.mnt) { break; } - read_unlock(&fs->lock); spin_lock(&dcache_lock); if (nd->path.dentry != nd->path.mnt->mnt_root) { nd->path.dentry = dget(nd->path.dentry->d_parent); @@ -775,7 +779,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd) mntput(nd->path.mnt); nd->path.mnt = parent; } - follow_mount(&nd->path.mnt, &nd->path.dentry); + follow_mount(&nd->path); } /* @@ -1017,25 +1021,23 @@ static int path_walk(const char *name, struct nameidata *nd) return link_path_walk(name, nd); } -/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -static int do_path_lookup(int dfd, const char *name, - unsigned int flags, struct nameidata *nd) +static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; int fput_needed; struct file *file; - struct fs_struct *fs = current->fs; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; nd->depth = 0; + nd->root.mnt = NULL; if (*name=='/') { - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); + set_root(nd); + nd->path = nd->root; + path_get(&nd->root); } else if (dfd == AT_FDCWD) { + struct fs_struct *fs = current->fs; read_lock(&fs->lock); nd->path = fs->pwd; path_get(&fs->pwd); @@ -1063,17 +1065,29 @@ static int do_path_lookup(int dfd, const char *name, fput_light(file, fput_needed); } + return 0; - retval = path_walk(name, nd); +fput_fail: + fput_light(file, fput_needed); +out_fail: + return retval; +} + +/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ +static int do_path_lookup(int dfd, const char *name, + unsigned int flags, struct nameidata *nd) +{ + int retval = path_init(dfd, name, flags, nd); + if (!retval) + retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && nd->path.dentry->d_inode)) audit_inode(name, nd->path.dentry); -out_fail: + if (nd->root.mnt) { + path_put(&nd->root); + nd->root.mnt = NULL; + } return retval; - -fput_fail: - fput_light(file, fput_needed); - goto out_fail; } int path_lookup(const char *name, unsigned int flags, @@ -1113,14 +1127,18 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, nd->path.dentry = dentry; nd->path.mnt = mnt; path_get(&nd->path); + nd->root = nd->path; + path_get(&nd->root); retval = path_walk(name, nd); if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && nd->path.dentry->d_inode)) audit_inode(name, nd->path.dentry); - return retval; + path_put(&nd->root); + nd->root.mnt = NULL; + return retval; } /** @@ -1676,9 +1694,14 @@ struct file *do_filp_open(int dfd, const char *pathname, /* * Create - we need to know the parent. */ - error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); + error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); if (error) return ERR_PTR(error); + error = path_walk(pathname, &nd); + if (error) + return ERR_PTR(error); + if (unlikely(!audit_dummy_context())) + audit_inode(pathname, nd.path.dentry); /* * We have the parent and last component. First of all, check @@ -1806,6 +1829,8 @@ exit: if (!IS_ERR(nd.intent.open.file)) release_open_intent(&nd); exit_parent: + if (nd.root.mnt) + path_put(&nd.root); path_put(&nd.path); return ERR_PTR(error); diff --git a/fs/namespace.c b/fs/namespace.c index 134d494..2dd333b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); - atomic_set(&mnt->__mnt_writers, 0); +#ifdef CONFIG_SMP + mnt->mnt_writers = alloc_percpu(int); + if (!mnt->mnt_writers) + goto out_free_devname; +#else + mnt->mnt_writers = 0; +#endif } return mnt; +#ifdef CONFIG_SMP +out_free_devname: + kfree(mnt->mnt_devname); +#endif out_free_id: mnt_free_id(mnt); out_free_cache: @@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt) } EXPORT_SYMBOL_GPL(__mnt_is_readonly); -struct mnt_writer { - /* - * If holding multiple instances of this lock, they - * must be ordered by cpu number. - */ - spinlock_t lock; - struct lock_class_key lock_class; /* compiles out with !lockdep */ - unsigned long count; - struct vfsmount *mnt; -} ____cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct mnt_writer, mnt_writers); +static inline void inc_mnt_writers(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; +#else + mnt->mnt_writers++; +#endif +} -static int __init init_mnt_writers(void) +static inline void dec_mnt_writers(struct vfsmount *mnt) { - int cpu; - for_each_possible_cpu(cpu) { - struct mnt_writer *writer = &per_cpu(mnt_writers, cpu); - spin_lock_init(&writer->lock); - lockdep_set_class(&writer->lock, &writer->lock_class); - writer->count = 0; - } - return 0; +#ifdef CONFIG_SMP + (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; +#else + mnt->mnt_writers--; +#endif } -fs_initcall(init_mnt_writers); -static void unlock_mnt_writers(void) +static unsigned int count_mnt_writers(struct vfsmount *mnt) { +#ifdef CONFIG_SMP + unsigned int count = 0; int cpu; - struct mnt_writer *cpu_writer; for_each_possible_cpu(cpu) { - cpu_writer = &per_cpu(mnt_writers, cpu); - spin_unlock(&cpu_writer->lock); + count += *per_cpu_ptr(mnt->mnt_writers, cpu); } -} -static inline void __clear_mnt_count(struct mnt_writer *cpu_writer) -{ - if (!cpu_writer->mnt) - return; - /* - * This is in case anyone ever leaves an invalid, - * old ->mnt and a count of 0. - */ - if (!cpu_writer->count) - return; - atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers); - cpu_writer->count = 0; -} - /* - * must hold cpu_writer->lock - */ -static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, - struct vfsmount *mnt) -{ - if (cpu_writer->mnt == mnt) - return; - __clear_mnt_count(cpu_writer); - cpu_writer->mnt = mnt; + return count; +#else + return mnt->mnt_writers; +#endif } /* @@ -253,74 +236,73 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer, int mnt_want_write(struct vfsmount *mnt) { int ret = 0; - struct mnt_writer *cpu_writer; - cpu_writer = &get_cpu_var(mnt_writers); - spin_lock(&cpu_writer->lock); + preempt_disable(); + inc_mnt_writers(mnt); + /* + * The store to inc_mnt_writers must be visible before we pass + * MNT_WRITE_HOLD loop below, so that the slowpath can see our + * incremented count after it has set MNT_WRITE_HOLD. + */ + smp_mb(); + while (mnt->mnt_flags & MNT_WRITE_HOLD) + cpu_relax(); + /* + * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will + * be set to match its requirements. So we must not load that until + * MNT_WRITE_HOLD is cleared. + */ + smp_rmb(); if (__mnt_is_readonly(mnt)) { + dec_mnt_writers(mnt); ret = -EROFS; goto out; } - use_cpu_writer_for_mount(cpu_writer, mnt); - cpu_writer->count++; out: - spin_unlock(&cpu_writer->lock); - put_cpu_var(mnt_writers); + preempt_enable(); return ret; } EXPORT_SYMBOL_GPL(mnt_want_write); -static void lock_mnt_writers(void) -{ - int cpu; - struct mnt_writer *cpu_writer; - - for_each_possible_cpu(cpu) { - cpu_writer = &per_cpu(mnt_writers, cpu); - spin_lock(&cpu_writer->lock); - __clear_mnt_count(cpu_writer); - cpu_writer->mnt = NULL; - } +/** + * mnt_clone_write - get write access to a mount + * @mnt: the mount on which to take a write + * + * This is effectively like mnt_want_write, except + * it must only be used to take an extra write reference + * on a mountpoint that we already know has a write reference + * on it. This allows some optimisation. + * + * After finished, mnt_drop_write must be called as usual to + * drop the reference. + */ +int mnt_clone_write(struct vfsmount *mnt) +{ + /* superblock may be r/o */ + if (__mnt_is_readonly(mnt)) + return -EROFS; + preempt_disable(); + inc_mnt_writers(mnt); + preempt_enable(); + return 0; } +EXPORT_SYMBOL_GPL(mnt_clone_write); -/* - * These per-cpu write counts are not guaranteed to have - * matched increments and decrements on any given cpu. - * A file open()ed for write on one cpu and close()d on - * another cpu will imbalance this count. Make sure it - * does not get too far out of whack. +/** + * mnt_want_write_file - get write access to a file's mount + * @file: the file who's mount on which to take a write + * + * This is like mnt_want_write, but it takes a file and can + * do some optimisations if the file is open for write already */ -static void handle_write_count_underflow(struct vfsmount *mnt) +int mnt_want_write_file(struct file *file) { - if (atomic_read(&mnt->__mnt_writers) >= - MNT_WRITER_UNDERFLOW_LIMIT) - return; - /* - * It isn't necessary to hold all of the locks - * at the same time, but doing it this way makes - * us share a lot more code. - */ - lock_mnt_writers(); - /* - * vfsmount_lock is for mnt_flags. - */ - spin_lock(&vfsmount_lock); - /* - * If coalescing the per-cpu writer counts did not - * get us back to a positive writer count, we have - * a bug. - */ - if ((atomic_read(&mnt->__mnt_writers) < 0) && - !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { - WARN(1, KERN_DEBUG "leak detected on mount(%p) writers " - "count: %d\n", - mnt, atomic_read(&mnt->__mnt_writers)); - /* use the flag to keep the dmesg spam down */ - mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; - } - spin_unlock(&vfsmount_lock); - unlock_mnt_writers(); + if (!(file->f_mode & FMODE_WRITE)) + return mnt_want_write(file->f_path.mnt); + else + return mnt_clone_write(file->f_path.mnt); } +EXPORT_SYMBOL_GPL(mnt_want_write_file); /** * mnt_drop_write - give up write access to a mount @@ -332,37 +314,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt) */ void mnt_drop_write(struct vfsmount *mnt) { - int must_check_underflow = 0; - struct mnt_writer *cpu_writer; - - cpu_writer = &get_cpu_var(mnt_writers); - spin_lock(&cpu_writer->lock); - - use_cpu_writer_for_mount(cpu_writer, mnt); - if (cpu_writer->count > 0) { - cpu_writer->count--; - } else { - must_check_underflow = 1; - atomic_dec(&mnt->__mnt_writers); - } - - spin_unlock(&cpu_writer->lock); - /* - * Logically, we could call this each time, - * but the __mnt_writers cacheline tends to - * be cold, and makes this expensive. - */ - if (must_check_underflow) - handle_write_count_underflow(mnt); - /* - * This could be done right after the spinlock - * is taken because the spinlock keeps us on - * the cpu, and disables preemption. However, - * putting it here bounds the amount that - * __mnt_writers can underflow. Without it, - * we could theoretically wrap __mnt_writers. - */ - put_cpu_var(mnt_writers); + preempt_disable(); + dec_mnt_writers(mnt); + preempt_enable(); } EXPORT_SYMBOL_GPL(mnt_drop_write); @@ -370,24 +324,41 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - lock_mnt_writers(); + spin_lock(&vfsmount_lock); + mnt->mnt_flags |= MNT_WRITE_HOLD; /* - * With all the locks held, this value is stable + * After storing MNT_WRITE_HOLD, we'll read the counters. This store + * should be visible before we do. */ - if (atomic_read(&mnt->__mnt_writers) > 0) { - ret = -EBUSY; - goto out; - } + smp_mb(); + /* - * nobody can do a successful mnt_want_write() with all - * of the counts in MNT_DENIED_WRITE and the locks held. + * With writers on hold, if this value is zero, then there are + * definitely no active writers (although held writers may subsequently + * increment the count, they'll have to wait, and decrement it after + * seeing MNT_READONLY). + * + * It is OK to have counter incremented on one CPU and decremented on + * another: the sum will add up correctly. The danger would be when we + * sum up each counter, if we read a counter before it is incremented, + * but then read another CPU's count which it has been subsequently + * decremented from -- we would see more decrements than we should. + * MNT_WRITE_HOLD protects against this scenario, because + * mnt_want_write first increments count, then smp_mb, then spins on + * MNT_WRITE_HOLD, so it can't be decremented by another CPU while + * we're counting up here. */ - spin_lock(&vfsmount_lock); - if (!ret) + if (count_mnt_writers(mnt) > 0) + ret = -EBUSY; + else mnt->mnt_flags |= MNT_READONLY; + /* + * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers + * that become unheld will see MNT_READONLY. + */ + smp_wmb(); + mnt->mnt_flags &= ~MNT_WRITE_HOLD; spin_unlock(&vfsmount_lock); -out: - unlock_mnt_writers(); return ret; } @@ -410,6 +381,9 @@ void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); mnt_free_id(mnt); +#ifdef CONFIG_SMP + free_percpu(mnt->mnt_writers); +#endif kmem_cache_free(mnt_cache, mnt); } @@ -442,11 +416,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, * lookup_mnt increments the ref count before returning * the vfsmount struct. */ -struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) +struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; spin_lock(&vfsmount_lock); - if ((child_mnt = __lookup_mnt(mnt, dentry, 1))) + if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); spin_unlock(&vfsmount_lock); return child_mnt; @@ -604,38 +578,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, static inline void __mntput(struct vfsmount *mnt) { - int cpu; struct super_block *sb = mnt->mnt_sb; /* - * We don't have to hold all of the locks at the - * same time here because we know that we're the - * last reference to mnt and that no new writers - * can come in. - */ - for_each_possible_cpu(cpu) { - struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); - spin_lock(&cpu_writer->lock); - if (cpu_writer->mnt != mnt) { - spin_unlock(&cpu_writer->lock); - continue; - } - atomic_add(cpu_writer->count, &mnt->__mnt_writers); - cpu_writer->count = 0; - /* - * Might as well do this so that no one - * ever sees the pointer and expects - * it to be valid. - */ - cpu_writer->mnt = NULL; - spin_unlock(&cpu_writer->lock); - } - /* * This probably indicates that somebody messed * up a mnt_want/drop_write() pair. If this * happens, the filesystem was probably unable * to make r/w->r/o transitions. */ - WARN_ON(atomic_read(&mnt->__mnt_writers)); + /* + * atomic_dec_and_lock() used to deal with ->mnt_count decrements + * provides barriers, so count_mnt_writers() below is safe. AV + */ + WARN_ON(count_mnt_writers(mnt)); dput(mnt->mnt_root); free_vfsmnt(mnt); deactivate_super(sb); @@ -1106,11 +1060,8 @@ static int do_umount(struct vfsmount *mnt, int flags) * we just try to remount it readonly. */ down_write(&sb->s_umount); - if (!(sb->s_flags & MS_RDONLY)) { - lock_kernel(); + if (!(sb->s_flags & MS_RDONLY)) retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); - unlock_kernel(); - } up_write(&sb->s_umount); return retval; } @@ -1253,11 +1204,11 @@ Enomem: return NULL; } -struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry) +struct vfsmount *collect_mounts(struct path *path) { struct vfsmount *tree; down_write(&namespace_sem); - tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE); + tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE); up_write(&namespace_sem); return tree; } @@ -1430,7 +1381,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) goto out_unlock; err = -ENOENT; - if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry)) + if (!d_unlinked(path->dentry)) err = attach_recursive_mnt(mnt, path, NULL); out_unlock: mutex_unlock(&path->dentry->d_inode->i_mutex); @@ -1601,7 +1552,7 @@ static int do_move_mount(struct path *path, char *old_name) down_write(&namespace_sem); while (d_mountpoint(path->dentry) && - follow_down(&path->mnt, &path->dentry)) + follow_down(path)) ; err = -EINVAL; if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) @@ -1612,7 +1563,7 @@ static int do_move_mount(struct path *path, char *old_name) if (IS_DEADDIR(path->dentry->d_inode)) goto out1; - if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry)) + if (d_unlinked(path->dentry)) goto out1; err = -EINVAL; @@ -1676,7 +1627,9 @@ static int do_new_mount(struct path *path, char *type, int flags, if (!capable(CAP_SYS_ADMIN)) return -EPERM; + lock_kernel(); mnt = do_kern_mount(type, flags, name, data); + unlock_kernel(); if (IS_ERR(mnt)) return PTR_ERR(mnt); @@ -1695,10 +1648,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, down_write(&namespace_sem); /* Something was mounted here while we slept */ while (d_mountpoint(path->dentry) && - follow_down(&path->mnt, &path->dentry)) + follow_down(path)) ; err = -EINVAL; - if (!check_mnt(path->mnt)) + if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) goto unlock; /* Refuse the same filesystem on the same mount point */ @@ -2092,10 +2045,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, if (retval < 0) goto out3; - lock_kernel(); retval = do_mount((char *)dev_page, dir_page, (char *)type_page, flags, (void *)data_page); - unlock_kernel(); free_page(data_page); out3: @@ -2175,9 +2126,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, error = -ENOENT; if (IS_DEADDIR(new.dentry->d_inode)) goto out2; - if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry)) + if (d_unlinked(new.dentry)) goto out2; - if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry)) + if (d_unlinked(old.dentry)) goto out2; error = -EBUSY; if (new.mnt == root.mnt || diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d642f0e..b99ce20 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -736,6 +736,8 @@ static void ncp_put_super(struct super_block *sb) { struct ncp_server *server = NCP_SBP(sb); + lock_kernel(); + ncp_lock_server(server); ncp_disconnect(server); ncp_unlock_server(server); @@ -769,6 +771,8 @@ static void ncp_put_super(struct super_block *sb) vfree(server->packet); sb->s_fs_info = NULL; kfree(server); + + unlock_kernel(); } static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 64a288e..f01caec 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -154,7 +154,7 @@ out_err: goto out; out_follow: while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + follow_down(&nd->path)) ; err = 0; goto out; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d2d6778..26127b6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1813,6 +1813,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) if (data == NULL) return -ENOMEM; + lock_kernel(); /* fill out struct with values from existing mount */ data->flags = nfss->flags; data->rsize = nfss->rsize; @@ -1837,6 +1838,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) error = nfs_compare_remount_data(nfss, data); out: kfree(data); + unlock_kernel(); return error; } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5839b22..8b1f8ef 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -847,9 +847,8 @@ exp_get_fsid_key(svc_client *clp, int fsid) return exp_find_key(clp, FSID_NUM, fsidv, NULL); } -static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, - struct dentry *dentry, - struct cache_req *reqp) +static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, + struct cache_req *reqp) { struct svc_export *exp, key; int err; @@ -858,8 +857,7 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, return ERR_PTR(-ENOENT); key.ex_client = clp; - key.ex_path.mnt = mnt; - key.ex_path.dentry = dentry; + key.ex_path = *path; exp = svc_export_lookup(&key); if (exp == NULL) @@ -873,24 +871,19 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt, /* * Find the export entry for a given dentry. */ -static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt, - struct dentry *dentry, - struct cache_req *reqp) +static struct svc_export *exp_parent(svc_client *clp, struct path *path) { - svc_export *exp; - - dget(dentry); - exp = exp_get_by_name(clp, mnt, dentry, reqp); - - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; - - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = exp_get_by_name(clp, mnt, dentry, reqp); + struct dentry *saved = dget(path->dentry); + svc_export *exp = exp_get_by_name(clp, path, NULL); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = exp_get_by_name(clp, path, NULL); } - dput(dentry); + dput(path->dentry); + path->dentry = saved; return exp; } @@ -1018,7 +1011,7 @@ exp_export(struct nfsctl_export *nxp) goto out_put_clp; err = -EINVAL; - exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL); + exp = exp_get_by_name(clp, &path, NULL); memset(&new, 0, sizeof(new)); @@ -1135,7 +1128,7 @@ exp_unexport(struct nfsctl_export *nxp) goto out_domain; err = -EINVAL; - exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL); + exp = exp_get_by_name(dom, &path, NULL); path_put(&path); if (IS_ERR(exp)) goto out_domain; @@ -1177,7 +1170,7 @@ exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize) dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", name, path.dentry, clp->name, inode->i_sb->s_id, inode->i_ino); - exp = exp_parent(clp, path.mnt, path.dentry, NULL); + exp = exp_parent(clp, &path); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto out; @@ -1207,7 +1200,7 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, if (IS_ERR(ek)) return ERR_CAST(ek); - exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp); + exp = exp_get_by_name(clp, &ek->ek_path, reqp); cache_put(&ek->h, &svc_expkey_cache); if (IS_ERR(exp)) @@ -1247,8 +1240,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) * use exp_get_by_name() or exp_find(). */ struct svc_export * -rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path) { struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); @@ -1256,8 +1248,7 @@ rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt, goto gss; /* First try the auth_unix client: */ - exp = exp_get_by_name(rqstp->rq_client, mnt, dentry, - &rqstp->rq_chandle); + exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -1269,8 +1260,7 @@ gss: /* Otherwise, try falling back on gss client */ if (rqstp->rq_gssclient == NULL) return exp; - gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry, - &rqstp->rq_chandle); + gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle); if (PTR_ERR(gssexp) == -ENOENT) return exp; if (!IS_ERR(exp)) @@ -1309,23 +1299,19 @@ gss: } struct svc_export * -rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt, - struct dentry *dentry) +rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) { - struct svc_export *exp; - - dget(dentry); - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); - - while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) { - struct dentry *parent; - - parent = dget_parent(dentry); - dput(dentry); - dentry = parent; - exp = rqst_exp_get_by_name(rqstp, mnt, dentry); + struct dentry *saved = dget(path->dentry); + struct svc_export *exp = rqst_exp_get_by_name(rqstp, path); + + while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) { + struct dentry *parent = dget_parent(path->dentry); + dput(path->dentry); + path->dentry = parent; + exp = rqst_exp_get_by_name(rqstp, path); } - dput(dentry); + dput(path->dentry); + path->dentry = saved; return exp; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index bd584bc..99f8357 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -101,36 +101,35 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, { struct svc_export *exp = *expp, *exp2 = NULL; struct dentry *dentry = *dpp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - struct dentry *mounts = dget(dentry); + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dentry)}; int err = 0; - while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); + while (d_mountpoint(path.dentry) && follow_down(&path)) + ; - exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); + exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) { if (PTR_ERR(exp2) != -ENOENT) err = PTR_ERR(exp2); - dput(mounts); - mntput(mnt); + path_put(&path); goto out; } if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { /* successfully crossed mount point */ /* - * This is subtle: dentry is *not* under mnt at this point. - * The only reason we are safe is that original mnt is pinned - * down by exp, so we should dput before putting exp. + * This is subtle: path.dentry is *not* on path.mnt + * at this point. The only reason we are safe is that + * original mnt is pinned down by exp, so we should + * put path *before* putting exp */ - dput(dentry); - *dpp = mounts; - exp_put(exp); + *dpp = path.dentry; + path.dentry = dentry; *expp = exp2; - } else { - exp_put(exp2); - dput(mounts); + exp2 = exp; } - mntput(mnt); + path_put(&path); + exp_put(exp2); out: return err; } @@ -169,28 +168,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, /* checking mountpoint crossing is very different when stepping up */ struct svc_export *exp2 = NULL; struct dentry *dp; - struct vfsmount *mnt = mntget(exp->ex_path.mnt); - dentry = dget(dparent); - while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) + struct path path = {.mnt = mntget(exp->ex_path.mnt), + .dentry = dget(dparent)}; + + while (path.dentry == path.mnt->mnt_root && + follow_up(&path)) ; - dp = dget_parent(dentry); - dput(dentry); - dentry = dp; + dp = dget_parent(path.dentry); + dput(path.dentry); + path.dentry = dp; - exp2 = rqst_exp_parent(rqstp, mnt, dentry); + exp2 = rqst_exp_parent(rqstp, &path); if (PTR_ERR(exp2) == -ENOENT) { - dput(dentry); dentry = dget(dparent); } else if (IS_ERR(exp2)) { host_err = PTR_ERR(exp2); - dput(dentry); - mntput(mnt); + path_put(&path); goto out_nfserr; } else { + dentry = dget(path.dentry); exp_put(exp); exp = exp2; } - mntput(mnt); + path_put(&path); } } else { fh_lock(fhp); diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 300f1cd..cadd36b 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -864,11 +864,11 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) case NILFS_CHECKPOINT: /* * Check for protecting existing snapshot mounts: - * bd_mount_sem is used to make this operation atomic and + * ns_mount_mutex is used to make this operation atomic and * exclusive with a new mount job. Though it doesn't cover * umount, it's enough for the purpose. */ - down(&nilfs->ns_bdev->bd_mount_sem); + mutex_lock(&nilfs->ns_mount_mutex); if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { /* Current implementation does not have to protect plain read-only mounts since they are exclusive @@ -877,7 +877,7 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) ret = -EBUSY; } else ret = nilfs_cpfile_clear_snapshot(cpfile, cno); - up(&nilfs->ns_bdev->bd_mount_sem); + mutex_unlock(&nilfs->ns_mount_mutex); return ret; case NILFS_SNAPSHOT: return nilfs_cpfile_set_snapshot(cpfile, cno); diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h index adccd4f..0776ccc 100644 --- a/fs/nilfs2/sb.h +++ b/fs/nilfs2/sb.h @@ -60,6 +60,7 @@ struct nilfs_sb_info { struct super_block *s_super; /* reverse pointer to super_block */ struct the_nilfs *s_nilfs; struct list_head s_list; /* list head for nilfs->ns_supers */ + atomic_t s_count; /* reference count */ /* Segment constructor */ struct list_head s_dirty_files; /* dirty files list */ diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6989b03..1777a34 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -65,9 +65,8 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); +static void nilfs_write_super(struct super_block *sb); static int nilfs_remount(struct super_block *sb, int *flags, char *data); -static int test_exclusive_mount(struct file_system_type *fs_type, - struct block_device *bdev, int flags); /** * nilfs_error() - report failure condition on a filesystem @@ -315,6 +314,11 @@ static void nilfs_put_super(struct super_block *sb) struct nilfs_sb_info *sbi = NILFS_SB(sb); struct the_nilfs *nilfs = sbi->s_nilfs; + lock_kernel(); + + if (sb->s_dirt) + nilfs_write_super(sb); + nilfs_detach_segment_constructor(sbi); if (!(sb->s_flags & MS_RDONLY)) { @@ -323,12 +327,18 @@ static void nilfs_put_super(struct super_block *sb) nilfs_commit_super(sbi, 1); up_write(&nilfs->ns_sem); } + down_write(&nilfs->ns_super_sem); + if (nilfs->ns_current == sbi) + nilfs->ns_current = NULL; + up_write(&nilfs->ns_super_sem); nilfs_detach_checkpoint(sbi); put_nilfs(sbi->s_nilfs); sbi->s_super = NULL; sb->s_fs_info = NULL; - kfree(sbi); + nilfs_put_sbinfo(sbi); + + unlock_kernel(); } /** @@ -383,6 +393,8 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) { int err = 0; + nilfs_write_super(sb); + /* This function is called when super block should be written back */ if (wait) err = nilfs_construct_segment(sb); @@ -396,9 +408,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) struct buffer_head *bh_cp; int err; - down_write(&nilfs->ns_sem); + down_write(&nilfs->ns_super_sem); list_add(&sbi->s_list, &nilfs->ns_supers); - up_write(&nilfs->ns_sem); + up_write(&nilfs->ns_super_sem); sbi->s_ifile = nilfs_mdt_new( nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); @@ -436,9 +448,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; - down_write(&nilfs->ns_sem); + down_write(&nilfs->ns_super_sem); list_del_init(&sbi->s_list); - up_write(&nilfs->ns_sem); + up_write(&nilfs->ns_super_sem); return err; } @@ -450,9 +462,9 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) nilfs_mdt_clear(sbi->s_ifile); nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; - down_write(&nilfs->ns_sem); + down_write(&nilfs->ns_super_sem); list_del_init(&sbi->s_list); - up_write(&nilfs->ns_sem); + up_write(&nilfs->ns_super_sem); } static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) @@ -752,7 +764,7 @@ int nilfs_store_magic_and_option(struct super_block *sb, * @silent: silent mode flag * @nilfs: the_nilfs struct * - * This function is called exclusively by bd_mount_mutex. + * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. */ static int @@ -773,6 +785,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, get_nilfs(nilfs); sbi->s_nilfs = nilfs; sbi->s_super = sb; + atomic_set(&sbi->s_count, 1); err = init_nilfs(nilfs, sbi, (char *)data); if (err) @@ -870,6 +883,11 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, goto failed_root; } + down_write(&nilfs->ns_super_sem); + if (!nilfs_test_opt(sbi, SNAPSHOT)) + nilfs->ns_current = sbi; + up_write(&nilfs->ns_super_sem); + return 0; failed_root: @@ -885,7 +903,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, failed_sbi: put_nilfs(nilfs); sb->s_fs_info = NULL; - kfree(sbi); + nilfs_put_sbinfo(sbi); return err; } @@ -898,6 +916,9 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) struct nilfs_mount_options old_opts; int err; + lock_kernel(); + + down_write(&nilfs->ns_super_sem); old_sb_flags = sb->s_flags; old_opts.mount_opt = sbi->s_mount_opt; old_opts.snapshot_cno = sbi->s_snapshot_cno; @@ -945,14 +966,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) * store the current valid flag. (It may have been changed * by fsck since we originally mounted the partition.) */ - down(&sb->s_bdev->bd_mount_sem); - /* Check existing RW-mount */ - if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) { + if (nilfs->ns_current && nilfs->ns_current != sbi) { printk(KERN_WARNING "NILFS (device %s): couldn't " - "remount because a RW-mount exists.\n", + "remount because an RW-mount exists.\n", sb->s_id); err = -EBUSY; - goto rw_remount_failed; + goto restore_opts; } if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { printk(KERN_WARNING "NILFS (device %s): couldn't " @@ -960,7 +979,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) "the latest one.\n", sb->s_id); err = -EINVAL; - goto rw_remount_failed; + goto restore_opts; } sb->s_flags &= ~MS_RDONLY; nilfs_clear_opt(sbi, SNAPSHOT); @@ -968,28 +987,31 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) err = nilfs_attach_segment_constructor(sbi); if (err) - goto rw_remount_failed; + goto restore_opts; down_write(&nilfs->ns_sem); nilfs_setup_super(sbi); up_write(&nilfs->ns_sem); - up(&sb->s_bdev->bd_mount_sem); + nilfs->ns_current = sbi; } out: + up_write(&nilfs->ns_super_sem); + unlock_kernel(); return 0; - rw_remount_failed: - up(&sb->s_bdev->bd_mount_sem); restore_opts: sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.mount_opt; sbi->s_snapshot_cno = old_opts.snapshot_cno; + up_write(&nilfs->ns_super_sem); + unlock_kernel(); return err; } struct nilfs_super_data { struct block_device *bdev; + struct nilfs_sb_info *sbi; __u64 cno; int flags; }; @@ -1048,33 +1070,7 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data) { struct nilfs_super_data *sd = data; - return s->s_bdev == sd->bdev; -} - -static int nilfs_test_bdev_super2(struct super_block *s, void *data) -{ - struct nilfs_super_data *sd = data; - int ret; - - if (s->s_bdev != sd->bdev) - return 0; - - if (!((s->s_flags | sd->flags) & MS_RDONLY)) - return 1; /* Reuse an old R/W-mode super_block */ - - if (s->s_flags & sd->flags & MS_RDONLY) { - if (down_read_trylock(&s->s_umount)) { - ret = s->s_root && - (sd->cno == NILFS_SB(s)->s_snapshot_cno); - up_read(&s->s_umount); - /* - * This path is locked with sb_lock by sget(). - * So, drop_super() causes deadlock. - */ - return ret; - } - } - return 0; + return sd->sbi && s->s_fs_info == (void *)sd->sbi; } static int @@ -1082,8 +1078,8 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { struct nilfs_super_data sd; - struct super_block *s, *s2; - struct the_nilfs *nilfs = NULL; + struct super_block *s; + struct the_nilfs *nilfs; int err, need_to_close = 1; sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); @@ -1095,7 +1091,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, * much more information than normal filesystems to identify mount * instance. For snapshot mounts, not only a mount type (ro-mount * or rw-mount) but also a checkpoint number is required. - * The results are passed in sget() using nilfs_super_data. */ sd.cno = 0; sd.flags = flags; @@ -1104,64 +1099,59 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, goto failed; } - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - down(&sd.bdev->bd_mount_sem); - if (!sd.cno && - (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) { - err = (err < 0) ? : -EBUSY; - goto failed_unlock; + nilfs = find_or_create_nilfs(sd.bdev); + if (!nilfs) { + err = -ENOMEM; + goto failed; } - /* - * Phase-1: search any existent instance and get the_nilfs - */ - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); - if (IS_ERR(s)) - goto error_s; - - if (!s->s_root) { - err = -ENOMEM; - nilfs = alloc_nilfs(sd.bdev); - if (!nilfs) - goto cancel_new; - } else { - struct nilfs_sb_info *sbi = NILFS_SB(s); + mutex_lock(&nilfs->ns_mount_mutex); + if (!sd.cno) { /* - * s_umount protects super_block from unmount process; - * It covers pointers of nilfs_sb_info and the_nilfs. + * Check if an exclusive mount exists or not. + * Snapshot mounts coexist with a current mount + * (i.e. rw-mount or ro-mount), whereas rw-mount and + * ro-mount are mutually exclusive. */ - nilfs = sbi->s_nilfs; - get_nilfs(nilfs); - up_write(&s->s_umount); + down_read(&nilfs->ns_super_sem); + if (nilfs->ns_current && + ((nilfs->ns_current->s_super->s_flags ^ flags) + & MS_RDONLY)) { + up_read(&nilfs->ns_super_sem); + err = -EBUSY; + goto failed_unlock; + } + up_read(&nilfs->ns_super_sem); + } - /* - * Phase-2: search specified snapshot or R/W mode super_block - */ - if (!sd.cno) - /* trying to get the latest checkpoint. */ - sd.cno = nilfs_last_cno(nilfs); + /* + * Find existing nilfs_sb_info struct + */ + sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); - s2 = sget(fs_type, nilfs_test_bdev_super2, - nilfs_set_bdev_super, &sd); - deactivate_super(s); - /* - * Although deactivate_super() invokes close_bdev_exclusive() at - * kill_block_super(). Here, s is an existent mount; we need - * one more close_bdev_exclusive() call. - */ - s = s2; - if (IS_ERR(s)) - goto error_s; + if (!sd.cno) + /* trying to get the latest checkpoint. */ + sd.cno = nilfs_last_cno(nilfs); + + /* + * Get super block instance holding the nilfs_sb_info struct. + * A new instance is allocated if no existing mount is present or + * existing instance has been unmounted. + */ + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); + if (sd.sbi) + nilfs_put_sbinfo(sd.sbi); + + if (IS_ERR(s)) { + err = PTR_ERR(s); + goto failed_unlock; } if (!s->s_root) { char b[BDEVNAME_SIZE]; + /* New superblock instance created */ s->s_flags = flags; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); @@ -1172,26 +1162,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; need_to_close = 0; - } else if (!(s->s_flags & MS_RDONLY)) { - err = -EBUSY; } - up(&sd.bdev->bd_mount_sem); + mutex_unlock(&nilfs->ns_mount_mutex); put_nilfs(nilfs); if (need_to_close) close_bdev_exclusive(sd.bdev, flags); simple_set_mnt(mnt, s); return 0; - error_s: - up(&sd.bdev->bd_mount_sem); - if (nilfs) - put_nilfs(nilfs); - close_bdev_exclusive(sd.bdev, flags); - return PTR_ERR(s); - failed_unlock: - up(&sd.bdev->bd_mount_sem); + mutex_unlock(&nilfs->ns_mount_mutex); + put_nilfs(nilfs); failed: close_bdev_exclusive(sd.bdev, flags); @@ -1199,70 +1181,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, cancel_new: /* Abandoning the newly allocated superblock */ - up(&sd.bdev->bd_mount_sem); - if (nilfs) - put_nilfs(nilfs); + mutex_unlock(&nilfs->ns_mount_mutex); + put_nilfs(nilfs); up_write(&s->s_umount); deactivate_super(s); /* * deactivate_super() invokes close_bdev_exclusive(). * We must finish all post-cleaning before this call; - * put_nilfs() and unlocking bd_mount_sem need the block device. + * put_nilfs() needs the block device. */ return err; } -static int nilfs_test_bdev_super3(struct super_block *s, void *data) -{ - struct nilfs_super_data *sd = data; - int ret; - - if (s->s_bdev != sd->bdev) - return 0; - if (down_read_trylock(&s->s_umount)) { - ret = (s->s_flags & MS_RDONLY) && s->s_root && - nilfs_test_opt(NILFS_SB(s), SNAPSHOT); - up_read(&s->s_umount); - if (ret) - return 0; /* ignore snapshot mounts */ - } - return !((sd->flags ^ s->s_flags) & MS_RDONLY); -} - -static int __false_bdev_super(struct super_block *s, void *data) -{ -#if 0 /* XXX: workaround for lock debug. This is not good idea */ - up_write(&s->s_umount); -#endif - return -EFAULT; -} - -/** - * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not. - * fs_type: filesystem type - * bdev: block device - * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount) - * res: pointer to an integer to store result - * - * This function must be called within a section protected by bd_mount_mutex. - */ -static int test_exclusive_mount(struct file_system_type *fs_type, - struct block_device *bdev, int flags) -{ - struct super_block *s; - struct nilfs_super_data sd = { .flags = flags, .bdev = bdev }; - - s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd); - if (IS_ERR(s)) { - if (PTR_ERR(s) != -EFAULT) - return PTR_ERR(s); - return 0; /* Not found */ - } - up_write(&s->s_umount); - deactivate_super(s); - return 1; /* Found */ -} - struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index a91f15b..e4e5c78 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -35,6 +35,10 @@ #include "seglist.h" #include "segbuf.h" + +static LIST_HEAD(nilfs_objects); +static DEFINE_SPINLOCK(nilfs_lock); + void nilfs_set_last_segment(struct the_nilfs *nilfs, sector_t start_blocknr, u64 seq, __u64 cno) { @@ -55,7 +59,7 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, * Return Value: On success, pointer to the_nilfs is returned. * On error, NULL is returned. */ -struct the_nilfs *alloc_nilfs(struct block_device *bdev) +static struct the_nilfs *alloc_nilfs(struct block_device *bdev) { struct the_nilfs *nilfs; @@ -68,7 +72,10 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) atomic_set(&nilfs->ns_writer_refcount, -1); atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); + init_rwsem(&nilfs->ns_super_sem); + mutex_init(&nilfs->ns_mount_mutex); mutex_init(&nilfs->ns_writer_mutex); + INIT_LIST_HEAD(&nilfs->ns_list); INIT_LIST_HEAD(&nilfs->ns_supers); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_gc_inodes_h = NULL; @@ -78,6 +85,45 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) } /** + * find_or_create_nilfs - find or create nilfs object + * @bdev: block device to which the_nilfs is related + * + * find_nilfs() looks up an existent nilfs object created on the + * device and gets the reference count of the object. If no nilfs object + * is found on the device, a new nilfs object is allocated. + * + * Return Value: On success, pointer to the nilfs object is returned. + * On error, NULL is returned. + */ +struct the_nilfs *find_or_create_nilfs(struct block_device *bdev) +{ + struct the_nilfs *nilfs, *new = NULL; + + retry: + spin_lock(&nilfs_lock); + list_for_each_entry(nilfs, &nilfs_objects, ns_list) { + if (nilfs->ns_bdev == bdev) { + get_nilfs(nilfs); + spin_unlock(&nilfs_lock); + if (new) + put_nilfs(new); + return nilfs; /* existing object */ + } + } + if (new) { + list_add_tail(&new->ns_list, &nilfs_objects); + spin_unlock(&nilfs_lock); + return new; /* new object */ + } + spin_unlock(&nilfs_lock); + + new = alloc_nilfs(bdev); + if (new) + goto retry; + return NULL; /* insufficient memory */ +} + +/** * put_nilfs - release a reference to the_nilfs * @nilfs: the_nilfs structure to be released * @@ -86,13 +132,20 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev) */ void put_nilfs(struct the_nilfs *nilfs) { - if (!atomic_dec_and_test(&nilfs->ns_count)) + spin_lock(&nilfs_lock); + if (!atomic_dec_and_test(&nilfs->ns_count)) { + spin_unlock(&nilfs_lock); return; + } + list_del_init(&nilfs->ns_list); + spin_unlock(&nilfs_lock); + /* - * Increment of ns_count never occur below because the caller + * Increment of ns_count never occurs below because the caller * of get_nilfs() holds at least one reference to the_nilfs. * Thus its exclusion control is not required here. */ + might_sleep(); if (nilfs_loaded(nilfs)) { nilfs_mdt_clear(nilfs->ns_sufile); @@ -613,13 +666,63 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs) return ret; } +/** + * nilfs_find_sbinfo - find existing nilfs_sb_info structure + * @nilfs: nilfs object + * @rw_mount: mount type (non-zero value for read/write mount) + * @cno: checkpoint number (zero for read-only mount) + * + * nilfs_find_sbinfo() returns the nilfs_sb_info structure which + * @rw_mount and @cno (in case of snapshots) matched. If no instance + * was found, NULL is returned. Although the super block instance can + * be unmounted after this function returns, the nilfs_sb_info struct + * is kept on memory until nilfs_put_sbinfo() is called. + */ +struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs, + int rw_mount, __u64 cno) +{ + struct nilfs_sb_info *sbi; + + down_read(&nilfs->ns_super_sem); + /* + * The SNAPSHOT flag and sb->s_flags are supposed to be + * protected with nilfs->ns_super_sem. + */ + sbi = nilfs->ns_current; + if (rw_mount) { + if (sbi && !(sbi->s_super->s_flags & MS_RDONLY)) + goto found; /* read/write mount */ + else + goto out; + } else if (cno == 0) { + if (sbi && (sbi->s_super->s_flags & MS_RDONLY)) + goto found; /* read-only mount */ + else + goto out; + } + + list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { + if (nilfs_test_opt(sbi, SNAPSHOT) && + sbi->s_snapshot_cno == cno) + goto found; /* snapshot mount */ + } + out: + up_read(&nilfs->ns_super_sem); + return NULL; + + found: + atomic_inc(&sbi->s_count); + up_read(&nilfs->ns_super_sem); + return sbi; +} + int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, int snapshot_mount) { struct nilfs_sb_info *sbi; int ret = 0; - down_read(&nilfs->ns_sem); + down_read(&nilfs->ns_super_sem); if (cno == 0 || cno > nilfs->ns_cno) goto out_unlock; @@ -636,6 +739,6 @@ int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, ret++; out_unlock: - up_read(&nilfs->ns_sem); + up_read(&nilfs->ns_super_sem); return ret; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 30fe587..e8adbff 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -43,12 +43,16 @@ enum { * struct the_nilfs - struct to supervise multiple nilfs mount points * @ns_flags: flags * @ns_count: reference count + * @ns_list: list head for nilfs_list * @ns_bdev: block device * @ns_bdi: backing dev info * @ns_writer: back pointer to writable nilfs_sb_info * @ns_sem: semaphore for shared states + * @ns_super_sem: semaphore for global operations across super block instances + * @ns_mount_mutex: mutex protecting mount process of nilfs * @ns_writer_mutex: mutex protecting ns_writer attach/detach * @ns_writer_refcount: number of referrers on ns_writer + * @ns_current: back pointer to current mount * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data * @ns_sbwtime: previous write time of super blocks @@ -88,15 +92,24 @@ enum { struct the_nilfs { unsigned long ns_flags; atomic_t ns_count; + struct list_head ns_list; struct block_device *ns_bdev; struct backing_dev_info *ns_bdi; struct nilfs_sb_info *ns_writer; struct rw_semaphore ns_sem; + struct rw_semaphore ns_super_sem; + struct mutex ns_mount_mutex; struct mutex ns_writer_mutex; atomic_t ns_writer_refcount; /* + * components protected by ns_super_sem + */ + struct nilfs_sb_info *ns_current; + struct list_head ns_supers; + + /* * used for * - loading the latest checkpoint exclusively. * - allocating a new full segment. @@ -108,7 +121,6 @@ struct the_nilfs { time_t ns_sbwtime[2]; unsigned ns_sbsize; unsigned ns_mount_state; - struct list_head ns_supers; /* * Following fields are dedicated to a writable FS-instance. @@ -191,11 +203,12 @@ THE_NILFS_FNS(DISCONTINUED, discontinued) #define NILFS_ALTSB_FREQ 60 /* spare superblock */ void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); -struct the_nilfs *alloc_nilfs(struct block_device *); +struct the_nilfs *find_or_create_nilfs(struct block_device *); void put_nilfs(struct the_nilfs *); int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); +struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64); int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); int nilfs_near_disk_full(struct the_nilfs *); void nilfs_fall_back_super_block(struct the_nilfs *); @@ -238,6 +251,12 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) mutex_unlock(&nilfs->ns_writer_mutex); } +static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) +{ + if (!atomic_dec_and_test(&sbi->s_count)) + kfree(sbi); +} + static inline void nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, sector_t *seg_start, sector_t *seg_end) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 6aa7c47..abaaa1c 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -443,6 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_volume *vol = NTFS_SB(sb); ntfs_debug("Entering with remount options string: %s", opt); + + lock_kernel(); #ifndef NTFS_RW /* For read-only compiled driver, enforce read-only flag. */ *flags |= MS_RDONLY; @@ -466,15 +468,18 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) if (NVolErrors(vol)) { ntfs_error(sb, "Volume has errors and is read-only%s", es); + unlock_kernel(); return -EROFS; } if (vol->vol_flags & VOLUME_IS_DIRTY) { ntfs_error(sb, "Volume is dirty and read-only%s", es); + unlock_kernel(); return -EROFS; } if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) { ntfs_error(sb, "Volume has been modified by chkdsk " "and is read-only%s", es); + unlock_kernel(); return -EROFS; } if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { @@ -482,11 +487,13 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) "(0x%x) and is read-only%s", (unsigned)le16_to_cpu(vol->vol_flags), es); + unlock_kernel(); return -EROFS; } if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { ntfs_error(sb, "Failed to set dirty bit in volume " "information flags%s", es); + unlock_kernel(); return -EROFS; } #if 0 @@ -506,18 +513,21 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_error(sb, "Failed to empty journal $LogFile%s", es); NVolSetErrors(vol); + unlock_kernel(); return -EROFS; } if (!ntfs_mark_quotas_out_of_date(vol)) { ntfs_error(sb, "Failed to mark quotas out of date%s", es); NVolSetErrors(vol); + unlock_kernel(); return -EROFS; } if (!ntfs_stamp_usnjrnl(vol)) { ntfs_error(sb, "Failed to stamp transation log " "($UsnJrnl)%s", es); NVolSetErrors(vol); + unlock_kernel(); return -EROFS; } } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { @@ -533,8 +543,11 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) // TODO: Deal with *flags. - if (!parse_options(vol, opt)) + if (!parse_options(vol, opt)) { + unlock_kernel(); return -EINVAL; + } + unlock_kernel(); ntfs_debug("Done."); return 0; } @@ -2246,6 +2259,9 @@ static void ntfs_put_super(struct super_block *sb) ntfs_volume *vol = NTFS_SB(sb); ntfs_debug("Entering."); + + lock_kernel(); + #ifdef NTFS_RW /* * Commit all inodes while they are still open in case some of them @@ -2373,39 +2389,12 @@ static void ntfs_put_super(struct super_block *sb) vol->mftmirr_ino = NULL; } /* - * If any dirty inodes are left, throw away all mft data page cache - * pages to allow a clean umount. This should never happen any more - * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as - * the underlying mft records are written out and cleaned. If it does, - * happen anyway, we want to know... + * We should have no dirty inodes left, due to + * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as + * the underlying mft records are written out and cleaned. */ ntfs_commit_inode(vol->mft_ino); write_inode_now(vol->mft_ino, 1); - if (sb_has_dirty_inodes(sb)) { - const char *s1, *s2; - - mutex_lock(&vol->mft_ino->i_mutex); - truncate_inode_pages(vol->mft_ino->i_mapping, 0); - mutex_unlock(&vol->mft_ino->i_mutex); - write_inode_now(vol->mft_ino, 1); - if (sb_has_dirty_inodes(sb)) { - static const char *_s1 = "inodes"; - static const char *_s2 = ""; - s1 = _s1; - s2 = _s2; - } else { - static const char *_s1 = "mft pages"; - static const char *_s2 = "They have been thrown " - "away. "; - s1 = _s1; - s2 = _s2; - } - ntfs_error(sb, "Dirty %s found at umount time. %sYou should " - "run chkdsk. Please email " - "linux-ntfs-dev@lists.sourceforge.net and say " - "that you saw this message. Thank you.", s1, - s2); - } #endif /* NTFS_RW */ iput(vol->mft_ino); @@ -2444,7 +2433,8 @@ static void ntfs_put_super(struct super_block *sb) } sb->s_fs_info = NULL; kfree(vol); - return; + + unlock_kernel(); } /** diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5c6163f..201b40a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -42,6 +42,7 @@ #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/quotaops.h> +#include <linux/smp_lock.h> #define MLOG_MASK_PREFIX ML_SUPER #include <cluster/masklog.h> @@ -126,7 +127,6 @@ static int ocfs2_get_sector(struct super_block *sb, struct buffer_head **bh, int block, int sect_size); -static void ocfs2_write_super(struct super_block *sb); static struct inode *ocfs2_alloc_inode(struct super_block *sb); static void ocfs2_destroy_inode(struct inode *inode); static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); @@ -141,7 +141,6 @@ static const struct super_operations ocfs2_sops = { .clear_inode = ocfs2_clear_inode, .delete_inode = ocfs2_delete_inode, .sync_fs = ocfs2_sync_fs, - .write_super = ocfs2_write_super, .put_super = ocfs2_put_super, .remount_fs = ocfs2_remount, .show_options = ocfs2_show_options, @@ -365,24 +364,12 @@ static struct file_operations ocfs2_osb_debug_fops = { .llseek = generic_file_llseek, }; -/* - * write_super and sync_fs ripped right out of ext3. - */ -static void ocfs2_write_super(struct super_block *sb) -{ - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - sb->s_dirt = 0; -} - static int ocfs2_sync_fs(struct super_block *sb, int wait) { int status; tid_t target; struct ocfs2_super *osb = OCFS2_SB(sb); - sb->s_dirt = 0; - if (ocfs2_is_hard_readonly(osb)) return -EROFS; @@ -595,6 +582,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) struct mount_options parsed_options; struct ocfs2_super *osb = OCFS2_SB(sb); + lock_kernel(); + if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { ret = -EINVAL; goto out; @@ -698,6 +687,7 @@ unlock_osb: ocfs2_set_journal_params(osb); } out: + unlock_kernel(); return ret; } @@ -1550,9 +1540,13 @@ static void ocfs2_put_super(struct super_block *sb) { mlog_entry("(0x%p)\n", sb); + lock_kernel(); + ocfs2_sync_blockdev(sb); ocfs2_dismount_volume(sb, 0); + unlock_kernel(); + mlog_exit_void(); } diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 834b233..d17e774e 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -11,21 +11,6 @@ #include <linux/mpage.h> #include "omfs.h" -static int omfs_sync_file(struct file *file, struct dentry *dentry, - int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - - err = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return err; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return err; - err |= omfs_sync_inode(inode); - return err ? -EIO : 0; -} - static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset) { return (sbi->s_sys_blocksize - offset - @@ -344,7 +329,7 @@ struct file_operations omfs_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = omfs_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; @@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) audit_inode(NULL, dentry); - err = mnt_want_write(file->f_path.mnt); + err = mnt_want_write_file(file); if (err) goto out_putf; mutex_lock(&inode->i_mutex); @@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) if (!file) goto out; - error = mnt_want_write(file->f_path.mnt); + error = mnt_want_write_file(file); if (error) goto out_fput; dentry = file->f_path.dentry; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index f6db961..753ca37 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -92,3 +92,28 @@ struct pde_opener { struct list_head lh; }; void pde_users_dec(struct proc_dir_entry *pde); + +extern spinlock_t proc_subdir_lock; + +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); +int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); +unsigned long task_vsize(struct mm_struct *); +int task_statm(struct mm_struct *, int *, int *, int *, int *); +void task_mem(struct seq_file *, struct mm_struct *); + +struct proc_dir_entry *de_get(struct proc_dir_entry *de); +void de_put(struct proc_dir_entry *de); + +extern struct vfsmount *proc_mnt; +int proc_fill_super(struct super_block *); +struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); + +/* + * These are generic /proc routines that use the internal + * "struct proc_dir_entry" tree to traverse the filesystem. + * + * The /proc root directory has extended versions to take care + * of the /proc/<pid> subdirectories. + */ +int proc_readdir(struct file *, void *, filldir_t); +struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index de2bba5..fc6c302 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <asm/prom.h> #include <asm/uaccess.h> +#include "internal.h" #ifndef HAVE_ARCH_DEVTREE_FIXUPS static inline void set_node_proc_entry(struct device_node *np, diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile index 502d7fe..e4d408c 100644 --- a/fs/qnx4/Makefile +++ b/fs/qnx4/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4.o -qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o +qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c index 8425cf6..e1cd061 100644 --- a/fs/qnx4/bitmap.c +++ b/fs/qnx4/bitmap.c @@ -13,14 +13,9 @@ * 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) . */ -#include <linux/time.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> -#include <linux/stat.h> -#include <linux/kernel.h> -#include <linux/string.h> #include <linux/buffer_head.h> #include <linux/bitops.h> +#include "qnx4.h" #if 0 int qnx4_new_block(struct super_block *sb) diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index ea9ffef..003c68f 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -11,14 +11,9 @@ * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. */ -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> -#include <linux/stat.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> - +#include "qnx4.h" static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -84,7 +79,7 @@ const struct file_operations qnx4_dir_operations = { .read = generic_read_dir, .readdir = qnx4_readdir, - .fsync = file_fsync, + .fsync = simple_fsync, }; const struct inode_operations qnx4_dir_inode_operations = diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index 867f42b..09b170a 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c @@ -12,8 +12,7 @@ * 27-06-1998 by Frank Denis : file overwriting. */ -#include <linux/fs.h> -#include <linux/qnx4_fs.h> +#include "qnx4.h" /* * We have mostly NULL's here: the current defaults are ok for @@ -29,7 +28,7 @@ const struct file_operations qnx4_file_operations = #ifdef CONFIG_QNX4FS_RW .write = do_sync_write, .aio_write = generic_file_aio_write, - .fsync = qnx4_sync_file, + .fsync = simple_fsync, #endif }; diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c deleted file mode 100644 index aa3b195..0000000 --- a/fs/qnx4/fsync.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * QNX4 file system, Linux implementation. - * - * Version : 0.1 - * - * Using parts of the xiafs filesystem. - * - * History : - * - * 24-03-1998 by Richard Frowijn : first release. - */ - -#include <linux/errno.h> -#include <linux/time.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/smp_lock.h> -#include <linux/buffer_head.h> - -#include <linux/fs.h> -#include <linux/qnx4_fs.h> - -#include <asm/system.h> - -/* - * The functions for qnx4 fs file synchronization. - */ - -#ifdef CONFIG_QNX4FS_RW - -static int sync_block(struct inode *inode, unsigned short *block, int wait) -{ - struct buffer_head *bh; - unsigned short tmp; - - if (!*block) - return 0; - tmp = *block; - bh = sb_find_get_block(inode->i_sb, *block); - if (!bh) - return 0; - if (*block != tmp) { - brelse(bh); - return 1; - } - if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { - brelse(bh); - return -1; - } - if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) { - brelse(bh); - return 0; - } - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - return 0; -} - -#ifdef WTF -static int sync_iblock(struct inode *inode, unsigned short *iblock, - struct buffer_head **bh, int wait) -{ - int rc; - unsigned short tmp; - - *bh = NULL; - tmp = *iblock; - if (!tmp) - return 0; - rc = sync_block(inode, iblock, wait); - if (rc) - return rc; - *bh = sb_bread(inode->i_sb, tmp); - if (tmp != *iblock) { - brelse(*bh); - *bh = NULL; - return 1; - } - if (!*bh) - return -1; - return 0; -} -#endif - -static int sync_direct(struct inode *inode, int wait) -{ - int i; - int rc, err = 0; - - for (i = 0; i < 7; i++) { - rc = sync_block(inode, - (unsigned short *) qnx4_raw_inode(inode)->di_first_xtnt.xtnt_blk + i, wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - return err; -} - -#ifdef WTF -static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait) -{ - int i; - struct buffer_head *ind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, iblock, &ind_bh, wait); - if (rc || !ind_bh) - return rc; - - for (i = 0; i < 512; i++) { - rc = sync_block(inode, - ((unsigned short *) ind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(ind_bh); - return err; -} - -static int sync_dindirect(struct inode *inode, unsigned short *diblock, - int wait) -{ - int i; - struct buffer_head *dind_bh; - int rc, err = 0; - - rc = sync_iblock(inode, diblock, &dind_bh, wait); - if (rc || !dind_bh) - return rc; - - for (i = 0; i < 512; i++) { - rc = sync_indirect(inode, - ((unsigned short *) dind_bh->b_data) + i, - wait); - if (rc > 0) - break; - if (rc) - err = rc; - } - brelse(dind_bh); - return err; -} -#endif - -int qnx4_sync_file(struct file *file, struct dentry *dentry, int unused) -{ - struct inode *inode = dentry->d_inode; - int wait, err = 0; - - (void) file; - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return -EINVAL; - - lock_kernel(); - for (wait = 0; wait <= 1; wait++) { - err |= sync_direct(inode, wait); - } - err |= qnx4_sync_inode(inode); - unlock_kernel(); - return (err < 0) ? -EIO : 0; -} - -#endif diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index fe1f0f3..681df5f 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -13,19 +13,15 @@ */ #include <linux/module.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/highuid.h> #include <linux/smp_lock.h> #include <linux/pagemap.h> #include <linux/buffer_head.h> -#include <linux/vfs.h> -#include <asm/uaccess.h> +#include <linux/writeback.h> +#include <linux/statfs.h> +#include "qnx4.h" #define QNX4_VERSION 4 #define QNX4_BMNAME ".bitmap" @@ -34,31 +30,6 @@ static const struct super_operations qnx4_sops; #ifdef CONFIG_QNX4FS_RW -int qnx4_sync_inode(struct inode *inode) -{ - int err = 0; -# if 0 - struct buffer_head *bh; - - bh = qnx4_update_inode(inode); - if (bh && buffer_dirty(bh)) - { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - { - printk ("IO error syncing qnx4 inode [%s:%08lx]\n", - inode->i_sb->s_id, inode->i_ino); - err = -1; - } - brelse (bh); - } else if (!bh) { - err = -1; - } -# endif - - return err; -} - static void qnx4_delete_inode(struct inode *inode) { QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino)); @@ -70,15 +41,7 @@ static void qnx4_delete_inode(struct inode *inode) unlock_kernel(); } -static void qnx4_write_super(struct super_block *sb) -{ - lock_kernel(); - QNX4DEBUG(("qnx4: write_super\n")); - sb->s_dirt = 0; - unlock_kernel(); -} - -static int qnx4_write_inode(struct inode *inode, int unused) +static int qnx4_write_inode(struct inode *inode, int do_sync) { struct qnx4_inode_entry *raw_inode; int block, ino; @@ -115,6 +78,16 @@ static int qnx4_write_inode(struct inode *inode, int unused) raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec); raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks); mark_buffer_dirty(bh); + if (do_sync) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) { + printk("qnx4: IO error syncing inode [%s:%08x]\n", + inode->i_sb->s_id, ino); + brelse(bh); + unlock_kernel(); + return -EIO; + } + } brelse(bh); unlock_kernel(); return 0; @@ -138,7 +111,6 @@ static const struct super_operations qnx4_sops = #ifdef CONFIG_QNX4FS_RW .write_inode = qnx4_write_inode, .delete_inode = qnx4_delete_inode, - .write_super = qnx4_write_super, #endif }; diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 775eed3..5972ed2 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -12,16 +12,9 @@ * 04-07-1998 by Frank Denis : first step for rmdir/unlink. */ -#include <linux/time.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include <linux/errno.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> +#include "qnx4.h" /* @@ -187,7 +180,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry) de->di_status = 0; memset(de->di_fname, 0, sizeof de->di_fname); de->di_mode = 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); clear_nlink(inode); mark_inode_dirty(inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; @@ -232,7 +225,7 @@ int qnx4_unlink(struct inode *dir, struct dentry *dentry) de->di_status = 0; memset(de->di_fname, 0, sizeof de->di_fname); de->di_mode = 0; - mark_buffer_dirty(bh); + mark_buffer_dirty_inode(bh, dir); dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); inode->i_ctime = dir->i_ctime; diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h new file mode 100644 index 0000000..9efc089 --- /dev/null +++ b/fs/qnx4/qnx4.h @@ -0,0 +1,57 @@ +#include <linux/fs.h> +#include <linux/qnx4_fs.h> + +#define QNX4_DEBUG 0 + +#if QNX4_DEBUG +#define QNX4DEBUG(X) printk X +#else +#define QNX4DEBUG(X) (void) 0 +#endif + +struct qnx4_sb_info { + struct buffer_head *sb_buf; /* superblock buffer */ + struct qnx4_super_block *sb; /* our superblock */ + unsigned int Version; /* may be useful */ + struct qnx4_inode_entry *BitMap; /* useful */ +}; + +struct qnx4_inode_info { + struct qnx4_inode_entry raw; + loff_t mmu_private; + struct inode vfs_inode; +}; + +extern struct inode *qnx4_iget(struct super_block *, unsigned long); +extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); +extern unsigned long qnx4_count_free_blocks(struct super_block *sb); +extern unsigned long qnx4_block_map(struct inode *inode, long iblock); + +extern struct buffer_head *qnx4_bread(struct inode *, int, int); + +extern const struct inode_operations qnx4_file_inode_operations; +extern const struct inode_operations qnx4_dir_inode_operations; +extern const struct file_operations qnx4_file_operations; +extern const struct file_operations qnx4_dir_operations; +extern int qnx4_is_free(struct super_block *sb, long block); +extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); +extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); +extern void qnx4_truncate(struct inode *inode); +extern void qnx4_free_inode(struct inode *inode); +extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); +extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); + +static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) +{ + return container_of(inode, struct qnx4_inode_info, vfs_inode); +} + +static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) +{ + return &qnx4_i(inode)->raw; +} diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c index 6437c1c..d94d9ee 100644 --- a/fs/qnx4/truncate.c +++ b/fs/qnx4/truncate.c @@ -10,12 +10,8 @@ * 30-06-1998 by Frank DENIS : ugly filler. */ -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/qnx4_fs.h> #include <linux/smp_lock.h> -#include <asm/uaccess.h> +#include "qnx4.h" #ifdef CONFIG_QNX4FS_RW diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b7f5a46..95c5b42 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -159,10 +159,14 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, return error; } -static void quota_sync_sb(struct super_block *sb, int type) +#ifdef CONFIG_QUOTA +void sync_quota_sb(struct super_block *sb, int type) { int cnt; + if (!sb->s_qcop->quota_sync) + return; + sb->s_qcop->quota_sync(sb, type); if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE) @@ -191,17 +195,13 @@ static void quota_sync_sb(struct super_block *sb, int type) } mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); } +#endif -void sync_dquots(struct super_block *sb, int type) +static void sync_dquots(int type) { + struct super_block *sb; int cnt; - if (sb) { - if (sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - return; - } - spin_lock(&sb_lock); restart: list_for_each_entry(sb, &super_blocks, s_list) { @@ -222,8 +222,8 @@ restart: sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); - if (sb->s_root && sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); + if (sb->s_root) + sync_quota_sb(sb, type); up_read(&sb->s_umount); spin_lock(&sb_lock); if (__put_super_and_need_restart(sb)) @@ -301,7 +301,10 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return sb->s_qcop->set_dqblk(sb, type, id, &idq); } case Q_SYNC: - sync_dquots(sb, type); + if (sb) + sync_quota_sb(sb, type); + else + sync_dquots(type); return 0; case Q_XQUOTAON: diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 45ee3d3..6d2668f 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -44,13 +44,11 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, static inline bool is_privroot_deh(struct dentry *dir, struct reiserfs_de_head *deh) { - int ret = 0; -#ifdef CONFIG_REISERFS_FS_XATTR struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; - ret = (dir == dir->d_parent && privroot->d_inode && - deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); -#endif - return ret; + if (reiserfs_expose_privroot(dir->d_sb)) + return 0; + return (dir == dir->d_parent && privroot->d_inode && + deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); } int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3567fb9..2969773 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -28,6 +28,7 @@ #include <linux/mount.h> #include <linux/namei.h> #include <linux/crc32.h> +#include <linux/smp_lock.h> struct file_system_type reiserfs_fs_type; @@ -64,18 +65,15 @@ static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); static int reiserfs_sync_fs(struct super_block *s, int wait) { - if (!(s->s_flags & MS_RDONLY)) { - struct reiserfs_transaction_handle th; - reiserfs_write_lock(s); - if (!journal_begin(&th, s, 1)) - if (!journal_end_sync(&th, s, 1)) - reiserfs_flush_old_commits(s); - s->s_dirt = 0; /* Even if it's not true. - * We'll loop forever in sync_supers otherwise */ - reiserfs_write_unlock(s); - } else { - s->s_dirt = 0; - } + struct reiserfs_transaction_handle th; + + reiserfs_write_lock(s); + if (!journal_begin(&th, s, 1)) + if (!journal_end_sync(&th, s, 1)) + reiserfs_flush_old_commits(s); + s->s_dirt = 0; /* Even if it's not true. + * We'll loop forever in sync_supers otherwise */ + reiserfs_write_unlock(s); return 0; } @@ -468,6 +466,11 @@ static void reiserfs_put_super(struct super_block *s) struct reiserfs_transaction_handle th; th.t_trans_id = 0; + lock_kernel(); + + if (s->s_dirt) + reiserfs_write_super(s); + /* change file system state to current state if it was mounted with read-write permissions */ if (!(s->s_flags & MS_RDONLY)) { if (!journal_begin(&th, s, 10)) { @@ -500,7 +503,7 @@ static void reiserfs_put_super(struct super_block *s) kfree(s->s_fs_info); s->s_fs_info = NULL; - return; + unlock_kernel(); } static struct kmem_cache *reiserfs_inode_cachep; @@ -898,6 +901,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin {"conv",.setmask = 1 << REISERFS_CONVERT}, {"attrs",.setmask = 1 << REISERFS_ATTRS}, {"noattrs",.clrmask = 1 << REISERFS_ATTRS}, + {"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT}, #ifdef CONFIG_REISERFS_FS_XATTR {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER}, {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER}, @@ -1193,6 +1197,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); #endif + lock_kernel(); rs = SB_DISK_SUPER_BLOCK(s); if (!reiserfs_parse_options @@ -1315,10 +1320,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) out_ok: replace_mount_options(s, new_opts); + unlock_kernel(); return 0; out_err: kfree(new_opts); + unlock_kernel(); return err; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8e7deb0..f3d47d8 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -981,7 +981,8 @@ int reiserfs_lookup_privroot(struct super_block *s) strlen(PRIVROOT_NAME)); if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; - s->s_root->d_op = &xattr_lookup_poison_ops; + if (!reiserfs_expose_privroot(s)) + s->s_root->d_op = &xattr_lookup_poison_ops; if (dentry->d_inode) dentry->d_inode->i_flags |= S_PRIVATE; } else diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index fc27fbf..1402d2d 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -474,6 +474,8 @@ smb_put_super(struct super_block *sb) { struct smb_sb_info *server = SMB_SB(sb); + lock_kernel(); + smb_lock_server(server); server->state = CONN_INVALID; smbiod_unregister_server(server); @@ -489,6 +491,8 @@ smb_put_super(struct super_block *sb) smb_unlock_server(server); put_pid(server->conn_pid); kfree(server); + + unlock_kernel(); } static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 0adc624..3b52770 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -338,6 +338,8 @@ static int squashfs_remount(struct super_block *sb, int *flags, char *data) static void squashfs_put_super(struct super_block *sb) { + lock_kernel(); + if (sb->s_fs_info) { struct squashfs_sb_info *sbi = sb->s_fs_info; squashfs_cache_delete(sbi->block_cache); @@ -350,6 +352,8 @@ static void squashfs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; } + + unlock_kernel(); } @@ -28,7 +28,6 @@ #include <linux/blkdev.h> #include <linux/quotaops.h> #include <linux/namei.h> -#include <linux/buffer_head.h> /* for fsync_super() */ #include <linux/mount.h> #include <linux/security.h> #include <linux/syscalls.h> @@ -38,7 +37,6 @@ #include <linux/kobject.h> #include <linux/mutex.h> #include <linux/file.h> -#include <linux/async.h> #include <asm/uaccess.h> #include "internal.h" @@ -72,7 +70,6 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); - INIT_LIST_HEAD(&s->s_async_list); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); lockdep_set_class(&s->s_umount, &type->s_umount_key); @@ -285,38 +282,6 @@ void unlock_super(struct super_block * sb) EXPORT_SYMBOL(lock_super); EXPORT_SYMBOL(unlock_super); -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. Requires a second blkdev - * flush by the caller to complete the operation. - */ -void __fsync_super(struct super_block *sb) -{ - sync_inodes_sb(sb, 0); - vfs_dq_sync(sb); - lock_super(sb); - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); - sync_inodes_sb(sb, 1); -} - -/* - * Write out and wait upon all dirty data associated with this - * superblock. Filesystem data as well as the underlying block - * device. Takes the superblock lock. - */ -int fsync_super(struct super_block *sb) -{ - __fsync_super(sb); - return sync_blockdev(sb->s_bdev); -} -EXPORT_SYMBOL_GPL(fsync_super); - /** * generic_shutdown_super - common helper for ->kill_sb() * @sb: superblock to kill @@ -338,21 +303,13 @@ void generic_shutdown_super(struct super_block *sb) if (sb->s_root) { shrink_dcache_for_umount(sb); - fsync_super(sb); - lock_super(sb); + sync_filesystem(sb); + get_fs_excl(); sb->s_flags &= ~MS_ACTIVE; - /* - * wait for asynchronous fs operations to finish before going further - */ - async_synchronize_full_domain(&sb->s_async_list); - /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); - lock_kernel(); - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); if (sop->put_super) sop->put_super(sb); @@ -362,9 +319,7 @@ void generic_shutdown_super(struct super_block *sb) "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); } - - unlock_kernel(); - unlock_super(sb); + put_fs_excl(); } spin_lock(&sb_lock); /* should be initialized for __put_super_and_need_restart() */ @@ -441,16 +396,14 @@ void drop_super(struct super_block *sb) EXPORT_SYMBOL(drop_super); -static inline void write_super(struct super_block *sb) -{ - lock_super(sb); - if (sb->s_root && sb->s_dirt) - if (sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); -} - -/* +/** + * sync_supers - helper for periodic superblock writeback + * + * Call the write_super method if present on all dirty superblocks in + * the system. This is for the periodic writeback used by most older + * filesystems. For data integrity superblock writeback use + * sync_filesystems() instead. + * * Note: check the dirty flag before waiting, so we don't * hold up the sync while mounting a device. (The newly * mounted device won't need syncing.) @@ -462,12 +415,15 @@ void sync_supers(void) spin_lock(&sb_lock); restart: list_for_each_entry(sb, &super_blocks, s_list) { - if (sb->s_dirt) { + if (sb->s_op->write_super && sb->s_dirt) { sb->s_count++; spin_unlock(&sb_lock); + down_read(&sb->s_umount); - write_super(sb); + if (sb->s_root && sb->s_dirt) + sb->s_op->write_super(sb); up_read(&sb->s_umount); + spin_lock(&sb_lock); if (__put_super_and_need_restart(sb)) goto restart; @@ -476,60 +432,6 @@ restart: spin_unlock(&sb_lock); } -/* - * Call the ->sync_fs super_op against all filesystems which are r/w and - * which implement it. - * - * This operation is careful to avoid the livelock which could easily happen - * if two or more filesystems are being continuously dirtied. s_need_sync_fs - * is used only here. We set it against all filesystems and then clear it as - * we sync them. So redirtied filesystems are skipped. - * - * But if process A is currently running sync_filesystems and then process B - * calls sync_filesystems as well, process B will set all the s_need_sync_fs - * flags again, which will cause process A to resync everything. Fix that with - * a local mutex. - * - * (Fabian) Avoid sync_fs with clean fs & wait mode 0 - */ -void sync_filesystems(int wait) -{ - struct super_block *sb; - static DEFINE_MUTEX(mutex); - - mutex_lock(&mutex); /* Could be down_interruptible */ - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_op->sync_fs) - continue; - if (sb->s_flags & MS_RDONLY) - continue; - sb->s_need_sync_fs = 1; - } - -restart: - list_for_each_entry(sb, &super_blocks, s_list) { - if (!sb->s_need_sync_fs) - continue; - sb->s_need_sync_fs = 0; - if (sb->s_flags & MS_RDONLY) - continue; /* hm. Was remounted r/o meanwhile */ - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - async_synchronize_full_domain(&sb->s_async_list); - if (sb->s_root && (wait || sb->s_dirt)) - sb->s_op->sync_fs(sb, wait); - up_read(&sb->s_umount); - /* restart only when sb is no longer on the list */ - spin_lock(&sb_lock); - if (__put_super_and_need_restart(sb)) - goto restart; - } - spin_unlock(&sb_lock); - mutex_unlock(&mutex); -} - /** * get_super - get the superblock of a device * @bdev: device to get the superblock for @@ -616,45 +518,6 @@ out: } /** - * mark_files_ro - mark all files read-only - * @sb: superblock in question - * - * All files are marked read-only. We don't care about pending - * delete files so this should be used in 'force' mode only. - */ - -static void mark_files_ro(struct super_block *sb) -{ - struct file *f; - -retry: - file_list_lock(); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { - struct vfsmount *mnt; - if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) - continue; - if (!file_count(f)) - continue; - if (!(f->f_mode & FMODE_WRITE)) - continue; - f->f_mode &= ~FMODE_WRITE; - if (file_check_writeable(f) != 0) - continue; - file_release_write(f); - mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ - mnt_drop_write(mnt); - mntput(mnt); - goto retry; - } - file_list_unlock(); -} - -/** * do_remount_sb - asks filesystem to change mount options. * @sb: superblock in question * @flags: numeric part of options @@ -675,27 +538,31 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); - fsync_super(sb); + sync_filesystem(sb); /* If we are remounting RDONLY and current sb is read/write, make sure there are no rw files opened */ if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { if (force) mark_files_ro(sb); - else if (!fs_may_remount_ro(sb)) + else if (!fs_may_remount_ro(sb)) { + unlock_kernel(); return -EBUSY; + } retval = vfs_dq_off(sb, 1); - if (retval < 0 && retval != -ENOSYS) + if (retval < 0 && retval != -ENOSYS) { + unlock_kernel(); return -EBUSY; + } } remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); if (sb->s_op->remount_fs) { - lock_super(sb); retval = sb->s_op->remount_fs(sb, &flags, data); - unlock_super(sb); - if (retval) + if (retval) { + unlock_kernel(); return retval; + } } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); if (remount_rw) @@ -711,18 +578,17 @@ static void do_emergency_remount(struct work_struct *work) list_for_each_entry(sb, &super_blocks, s_list) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + down_write(&sb->s_umount); if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { /* * ->remount_fs needs lock_kernel(). * * What lock protects sb->s_flags?? */ - lock_kernel(); do_remount_sb(sb, MS_RDONLY, NULL, 1); - unlock_kernel(); } - drop_super(sb); + up_write(&sb->s_umount); + put_super(sb); spin_lock(&sb_lock); } spin_unlock(&sb_lock); @@ -13,38 +13,123 @@ #include <linux/pagemap.h> #include <linux/quotaops.h> #include <linux/buffer_head.h> +#include "internal.h" #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \ SYNC_FILE_RANGE_WAIT_AFTER) /* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. + * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) + * just dirties buffers with inodes so we have to submit IO for these buffers + * via __sync_blockdev(). This also speeds up the wait == 1 case since in that + * case write_inode() functions do sync_dirty_buffer() and thus effectively + * write one block at a time. */ -static void do_sync(unsigned long wait) +static int __sync_filesystem(struct super_block *sb, int wait) { - wakeup_pdflush(0); - sync_inodes(0); /* All mappings, inodes and their blockdevs */ - vfs_dq_sync(NULL); - sync_supers(); /* Write the superblocks */ - sync_filesystems(0); /* Start syncing the filesystems */ - sync_filesystems(wait); /* Waitingly sync the filesystems */ - sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */ + /* Avoid doing twice syncing and cache pruning for quota sync */ if (!wait) - printk("Emergency Sync complete\n"); - if (unlikely(laptop_mode)) - laptop_sync_completion(); + writeout_quota_sb(sb, -1); + else + sync_quota_sb(sb, -1); + sync_inodes_sb(sb, wait); + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, wait); + return __sync_blockdev(sb->s_bdev, wait); +} + +/* + * Write out and wait upon all dirty data associated with this + * superblock. Filesystem data as well as the underlying block + * device. Takes the superblock lock. + */ +int sync_filesystem(struct super_block *sb) +{ + int ret; + + /* + * We need to be protected against the filesystem going from + * r/o to r/w or vice versa. + */ + WARN_ON(!rwsem_is_locked(&sb->s_umount)); + + /* + * No point in syncing out anything if the filesystem is read-only. + */ + if (sb->s_flags & MS_RDONLY) + return 0; + + ret = __sync_filesystem(sb, 0); + if (ret < 0) + return ret; + return __sync_filesystem(sb, 1); +} +EXPORT_SYMBOL_GPL(sync_filesystem); + +/* + * Sync all the data for all the filesystems (called by sys_sync() and + * emergency sync) + * + * This operation is careful to avoid the livelock which could easily happen + * if two or more filesystems are being continuously dirtied. s_need_sync + * is used only here. We set it against all filesystems and then clear it as + * we sync them. So redirtied filesystems are skipped. + * + * But if process A is currently running sync_filesystems and then process B + * calls sync_filesystems as well, process B will set all the s_need_sync + * flags again, which will cause process A to resync everything. Fix that with + * a local mutex. + */ +static void sync_filesystems(int wait) +{ + struct super_block *sb; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); /* Could be down_interruptible */ + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) + sb->s_need_sync = 1; + +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (!sb->s_need_sync) + continue; + sb->s_need_sync = 0; + sb->s_count++; + spin_unlock(&sb_lock); + + down_read(&sb->s_umount); + if (!(sb->s_flags & MS_RDONLY) && sb->s_root) + __sync_filesystem(sb, wait); + up_read(&sb->s_umount); + + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); + mutex_unlock(&mutex); } SYSCALL_DEFINE0(sync) { - do_sync(1); + sync_filesystems(0); + sync_filesystems(1); + if (unlikely(laptop_mode)) + laptop_sync_completion(); return 0; } static void do_sync_work(struct work_struct *work) { - do_sync(0); + /* + * Sync twice to reduce the possibility we skipped some inodes / pages + * because they were temporarily locked + */ + sync_filesystems(0); + sync_filesystems(0); + printk("Emergency Sync complete\n"); kfree(work); } @@ -75,10 +160,8 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) /* sync the superblock to buffers */ sb = inode->i_sb; - lock_super(sb); if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); - unlock_super(sb); /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 56f6552..c779807 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -24,7 +24,7 @@ static int sysv_readdir(struct file *, void *, filldir_t); const struct file_operations sysv_dir_operations = { .read = generic_read_dir, .readdir = sysv_readdir, - .fsync = sysv_sync_file, + .fsync = simple_fsync, }; static inline void dir_put_page(struct page *page) diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 589be21..96340c0 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = { .write = do_sync_write, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, - .fsync = sysv_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; @@ -34,18 +34,3 @@ const struct inode_operations sysv_file_inode_operations = { .truncate = sysv_truncate, .getattr = sysv_getattr, }; - -int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - - err = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return err; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return err; - - err |= sysv_sync_inode(inode); - return err ? -EIO : 0; -} diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index da20b48..4799234 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -31,15 +31,13 @@ #include <asm/byteorder.h> #include "sysv.h" -/* This is only called on sync() and umount(), when s_dirt=1. */ -static void sysv_write_super(struct super_block *sb) +static int sysv_sync_fs(struct super_block *sb, int wait) { struct sysv_sb_info *sbi = SYSV_SB(sb); unsigned long time = get_seconds(), old_time; + lock_super(sb); lock_kernel(); - if (sb->s_flags & MS_RDONLY) - goto clean; /* * If we are going to write out the super block, @@ -53,18 +51,30 @@ static void sysv_write_super(struct super_block *sb) *sbi->s_sb_time = cpu_to_fs32(sbi, time); mark_buffer_dirty(sbi->s_bh2); } -clean: - sb->s_dirt = 0; + unlock_kernel(); + unlock_super(sb); + + return 0; +} + +static void sysv_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + sysv_sync_fs(sb, 1); + else + sb->s_dirt = 0; } static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); + lock_super(sb); if (sbi->s_forced_ro) *flags |= MS_RDONLY; if (!(*flags & MS_RDONLY)) sb->s_dirt = 1; + unlock_super(sb); return 0; } @@ -72,6 +82,11 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); + lock_kernel(); + + if (sb->s_dirt) + sysv_write_super(sb); + if (!(sb->s_flags & MS_RDONLY)) { /* XXX ext2 also updates the state here */ mark_buffer_dirty(sbi->s_bh1); @@ -84,6 +99,8 @@ static void sysv_put_super(struct super_block *sb) brelse(sbi->s_bh2); kfree(sbi); + + unlock_kernel(); } static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -236,7 +253,7 @@ bad_inode: return ERR_PTR(-EIO); } -static struct buffer_head * sysv_update_inode(struct inode * inode) +int sysv_write_inode(struct inode *inode, int wait) { struct super_block * sb = inode->i_sb; struct sysv_sb_info * sbi = SYSV_SB(sb); @@ -244,19 +261,21 @@ static struct buffer_head * sysv_update_inode(struct inode * inode) struct sysv_inode * raw_inode; struct sysv_inode_info * si; unsigned int ino, block; + int err = 0; ino = inode->i_ino; if (!ino || ino > sbi->s_ninodes) { printk("Bad inode number on dev %s: %d is out of range\n", inode->i_sb->s_id, ino); - return NULL; + return -EIO; } raw_inode = sysv_raw_inode(sb, ino, &bh); if (!raw_inode) { printk("unable to read i-node block\n"); - return NULL; + return -EIO; } + lock_kernel(); raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); @@ -272,38 +291,23 @@ static struct buffer_head * sysv_update_inode(struct inode * inode) for (block = 0; block < 10+1+1+1; block++) write3byte(sbi, (u8 *)&si->i_data[block], &raw_inode->i_data[3*block]); + unlock_kernel(); mark_buffer_dirty(bh); - return bh; -} - -int sysv_write_inode(struct inode * inode, int wait) -{ - struct buffer_head *bh; - lock_kernel(); - bh = sysv_update_inode(inode); + if (wait) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) { + printk ("IO error syncing sysv inode [%s:%08x]\n", + sb->s_id, ino); + err = -EIO; + } + } brelse(bh); - unlock_kernel(); return 0; } -int sysv_sync_inode(struct inode * inode) +int sysv_sync_inode(struct inode *inode) { - int err = 0; - struct buffer_head *bh; - - bh = sysv_update_inode(inode); - if (bh && buffer_dirty(bh)) { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) { - printk ("IO error syncing sysv inode [%s:%08lx]\n", - inode->i_sb->s_id, inode->i_ino); - err = -1; - } - } - else if (!bh) - err = -1; - brelse (bh); - return err; + return sysv_write_inode(inode, 1); } static void sysv_delete_inode(struct inode *inode) @@ -347,6 +351,7 @@ const struct super_operations sysv_sops = { .delete_inode = sysv_delete_inode, .put_super = sysv_put_super, .write_super = sysv_write_super, + .sync_fs = sysv_sync_fs, .remount_fs = sysv_remount, .statfs = sysv_statfs, }; diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 5784a31..53786eb 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -144,7 +144,6 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping, extern struct inode *sysv_iget(struct super_block *, unsigned int); extern int sysv_write_inode(struct inode *, int); extern int sysv_sync_inode(struct inode *); -extern int sysv_sync_file(struct file *, struct dentry *, int); extern void sysv_set_inode(struct inode *, dev_t); extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int sysv_init_icache(void); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index e9f7a75..3589eab 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -36,6 +36,7 @@ #include <linux/mount.h> #include <linux/math64.h> #include <linux/writeback.h> +#include <linux/smp_lock.h> #include "ubifs.h" /* @@ -447,9 +448,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) if (!wait) return 0; - if (sb->s_flags & MS_RDONLY) - return 0; - /* * VFS calls '->sync_fs()' before synchronizing all dirty inodes and * pages, so synchronize them first, then commit the journal. Strictly @@ -1687,6 +1685,9 @@ static void ubifs_put_super(struct super_block *sb) ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, c->vi.vol_id); + + lock_kernel(); + /* * The following asserts are only valid if there has not been a failure * of the media. For example, there will be dirty inodes if we failed @@ -1753,6 +1754,8 @@ static void ubifs_put_super(struct super_block *sb) ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); kfree(c); + + unlock_kernel(); } static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) @@ -1768,17 +1771,22 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return err; } + lock_kernel(); if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { if (c->ro_media) { ubifs_msg("cannot re-mount due to prior errors"); + unlock_kernel(); return -EROFS; } err = ubifs_remount_rw(c); - if (err) + if (err) { + unlock_kernel(); return err; + } } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { if (c->ro_media) { ubifs_msg("cannot re-mount due to prior errors"); + unlock_kernel(); return -EROFS; } ubifs_remount_ro(c); @@ -1793,6 +1801,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } ubifs_assert(c->lst.taken_empty_lebs > 0); + unlock_kernel(); return 0; } diff --git a/fs/udf/Makefile b/fs/udf/Makefile index 0d4503f..eb880f6 100644 --- a/fs/udf/Makefile +++ b/fs/udf/Makefile @@ -5,5 +5,5 @@ obj-$(CONFIG_UDF_FS) += udf.o udf-objs := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \ - partition.o super.o truncate.o symlink.o fsync.o \ + partition.o super.o truncate.o symlink.o \ directory.o misc.o udftime.o unicode.o diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 2efd4d5..61d9a76 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -210,5 +210,5 @@ const struct file_operations udf_dir_operations = { .read = generic_read_dir, .readdir = udf_readdir, .ioctl = udf_ioctl, - .fsync = udf_fsync_file, + .fsync = simple_fsync, }; diff --git a/fs/udf/file.c b/fs/udf/file.c index eb91f3b..7464305 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -209,7 +209,7 @@ const struct file_operations udf_file_operations = { .write = do_sync_write, .aio_write = udf_file_aio_write, .release = udf_release_file, - .fsync = udf_fsync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, .llseek = generic_file_llseek, }; diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c deleted file mode 100644 index b2c472b..0000000 --- a/fs/udf/fsync.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * fsync.c - * - * PURPOSE - * Fsync handling routines for the OSTA-UDF(tm) filesystem. - * - * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * - * (C) 1999-2001 Ben Fennema - * (C) 1999-2000 Stelias Computing Inc - * - * HISTORY - * - * 05/22/99 blf Created. - */ - -#include "udfdecl.h" - -#include <linux/fs.h> - -static int udf_fsync_inode(struct inode *, int); - -/* - * File may be NULL when we are called. Perhaps we shouldn't - * even pass file to fsync ? - */ - -int udf_fsync_file(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - - return udf_fsync_inode(inode, datasync); -} - -static int udf_fsync_inode(struct inode *inode, int datasync) -{ - int err; - - err = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return err; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return err; - - err |= udf_sync_inode(inode); - - return err ? -EIO : 0; -} diff --git a/fs/udf/super.c b/fs/udf/super.c index 0ba4410..6832135 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -568,6 +568,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) if (!udf_parse_options(options, &uopt, true)) return -EINVAL; + lock_kernel(); sbi->s_flags = uopt.flags; sbi->s_uid = uopt.uid; sbi->s_gid = uopt.gid; @@ -581,13 +582,16 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) *flags |= MS_RDONLY; } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + unlock_kernel(); return 0; + } if (*flags & MS_RDONLY) udf_close_lvid(sb); else udf_open_lvid(sb); + unlock_kernel(); return 0; } @@ -2062,6 +2066,9 @@ static void udf_put_super(struct super_block *sb) struct udf_sb_info *sbi; sbi = UDF_SB(sb); + + lock_kernel(); + if (sbi->s_vat_inode) iput(sbi->s_vat_inode); if (sbi->s_partitions) @@ -2077,6 +2084,8 @@ static void udf_put_super(struct super_block *sb) kfree(sbi->s_partmaps); kfree(sb->s_fs_info); sb->s_fs_info = NULL; + + unlock_kernel(); } static int udf_sync_fs(struct super_block *sb, int wait) diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index cac51b7..8d46f42 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -223,9 +223,6 @@ extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t, extern int udf_new_block(struct super_block *, struct inode *, uint16_t, uint32_t, int *); -/* fsync.c */ -extern int udf_fsync_file(struct file *, struct dentry *, int); - /* directory.c */ extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *, struct udf_fileident_bh *, diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 6321b79..6f671f1 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -666,6 +666,6 @@ not_empty: const struct file_operations ufs_dir_operations = { .read = generic_read_dir, .readdir = ufs_readdir, - .fsync = ufs_sync_file, + .fsync = simple_fsync, .llseek = generic_file_llseek, }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 2bd3a16..73655c6 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -24,31 +24,10 @@ */ #include <linux/fs.h> -#include <linux/buffer_head.h> /* for sync_mapping_buffers() */ #include "ufs_fs.h" #include "ufs.h" - -int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - int ret; - - ret = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return ret; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return ret; - - err = ufs_sync_inode(inode); - if (ret == 0) - ret = err; - return ret; -} - - /* * We have mostly NULL's here: the current defaults are ok for * the ufs filesystem. @@ -62,6 +41,6 @@ const struct file_operations ufs_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .open = generic_file_open, - .fsync = ufs_sync_file, + .fsync = simple_fsync, .splice_read = generic_file_splice_read, }; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 6035929..5faed79 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -263,6 +263,7 @@ void ufs_panic (struct super_block * sb, const char * function, struct ufs_super_block_first * usb1; va_list args; + lock_kernel(); uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); @@ -594,6 +595,9 @@ static void ufs_put_super_internal(struct super_block *sb) UFSD("ENTER\n"); + + lock_kernel(); + ufs_put_cstotal(sb); size = uspi->s_cssize; blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -621,6 +625,9 @@ static void ufs_put_super_internal(struct super_block *sb) brelse (sbi->s_ucg[i]); kfree (sbi->s_ucg); kfree (base); + + unlock_kernel(); + UFSD("EXIT\n"); } @@ -1118,32 +1125,45 @@ failed_nomem: return -ENOMEM; } -static void ufs_write_super(struct super_block *sb) +static int ufs_sync_fs(struct super_block *sb, int wait) { struct ufs_sb_private_info * uspi; struct ufs_super_block_first * usb1; struct ufs_super_block_third * usb3; unsigned flags; + lock_super(sb); lock_kernel(); + UFSD("ENTER\n"); + flags = UFS_SB(sb)->s_flags; uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); usb3 = ubh_get_usb_third(uspi); - if (!(sb->s_flags & MS_RDONLY)) { - usb1->fs_time = cpu_to_fs32(sb, get_seconds()); - if ((flags & UFS_ST_MASK) == UFS_ST_SUN - || (flags & UFS_ST_MASK) == UFS_ST_SUNOS - || (flags & UFS_ST_MASK) == UFS_ST_SUNx86) - ufs_set_fs_state(sb, usb1, usb3, - UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); - ufs_put_cstotal(sb); - } + usb1->fs_time = cpu_to_fs32(sb, get_seconds()); + if ((flags & UFS_ST_MASK) == UFS_ST_SUN || + (flags & UFS_ST_MASK) == UFS_ST_SUNOS || + (flags & UFS_ST_MASK) == UFS_ST_SUNx86) + ufs_set_fs_state(sb, usb1, usb3, + UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); + ufs_put_cstotal(sb); sb->s_dirt = 0; + UFSD("EXIT\n"); unlock_kernel(); + unlock_super(sb); + + return 0; +} + +static void ufs_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + ufs_sync_fs(sb, 1); + else + sb->s_dirt = 0; } static void ufs_put_super(struct super_block *sb) @@ -1152,6 +1172,9 @@ static void ufs_put_super(struct super_block *sb) UFSD("ENTER\n"); + if (sb->s_dirt) + ufs_write_super(sb); + if (!(sb->s_flags & MS_RDONLY)) ufs_put_super_internal(sb); @@ -1171,7 +1194,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) struct ufs_super_block_third * usb3; unsigned new_mount_opt, ufstype; unsigned flags; - + + lock_kernel(); + lock_super(sb); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; usb1 = ubh_get_usb_first(uspi); @@ -1184,17 +1209,24 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE; new_mount_opt = 0; ufs_set_opt (new_mount_opt, ONERROR_LOCK); - if (!ufs_parse_options (data, &new_mount_opt)) + if (!ufs_parse_options (data, &new_mount_opt)) { + unlock_super(sb); + unlock_kernel(); return -EINVAL; + } if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { new_mount_opt |= ufstype; } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { printk("ufstype can't be changed during remount\n"); + unlock_super(sb); + unlock_kernel(); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; + unlock_super(sb); + unlock_kernel(); return 0; } @@ -1219,6 +1251,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) #ifndef CONFIG_UFS_FS_WRITE printk("ufs was compiled with read-only support, " "can't be mounted as read-write\n"); + unlock_super(sb); + unlock_kernel(); return -EINVAL; #else if (ufstype != UFS_MOUNT_UFSTYPE_SUN && @@ -1227,16 +1261,22 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && ufstype != UFS_MOUNT_UFSTYPE_UFS2) { printk("this ufstype is read-only supported\n"); + unlock_super(sb); + unlock_kernel(); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { printk("failed during remounting\n"); + unlock_super(sb); + unlock_kernel(); return -EPERM; } sb->s_flags &= ~MS_RDONLY; #endif } UFS_SB(sb)->s_mount_opt = new_mount_opt; + unlock_super(sb); + unlock_kernel(); return 0; } @@ -1352,6 +1392,7 @@ static const struct super_operations ufs_super_ops = { .delete_inode = ufs_delete_inode, .put_super = ufs_put_super, .write_super = ufs_write_super, + .sync_fs = ufs_sync_fs, .statfs = ufs_statfs, .remount_fs = ufs_remount, .show_options = ufs_show_options, diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index d0c4acd..644e77e 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -99,7 +99,6 @@ extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, extern const struct inode_operations ufs_file_inode_operations; extern const struct file_operations ufs_file_operations; extern const struct address_space_operations ufs_aops; -extern int ufs_sync_file(struct file *, struct dentry *, int); /* ialloc.c */ extern void ufs_free_inode (struct inode *inode); @@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = setxattr(dentry, name, value, size, flags); mnt_drop_write(f->f_path.mnt); @@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) return error; dentry = f->f_path.dentry; audit_inode(NULL, dentry); - error = mnt_want_write(f->f_path.mnt); + error = mnt_want_write_file(f); if (!error) { error = removexattr(dentry, name); mnt_drop_write(f->f_path.mnt); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bb68526..08d6bd9 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1104,15 +1104,6 @@ xfs_fs_put_super( kfree(mp); } -STATIC void -xfs_fs_write_super( - struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - xfs_sync_fsdata(XFS_M(sb), 0); - sb->s_dirt = 0; -} - STATIC int xfs_fs_sync_super( struct super_block *sb, @@ -1137,7 +1128,6 @@ xfs_fs_sync_super( error = xfs_quiesce_data(mp); else error = xfs_sync_fsdata(mp, 0); - sb->s_dirt = 0; if (unlikely(laptop_mode)) { int prev_sync_seq = mp->m_sync_seq; @@ -1443,7 +1433,6 @@ xfs_fs_fill_super( XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); - sb->s_dirt = 1; sb->s_magic = XFS_SB_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; @@ -1533,7 +1522,6 @@ static struct super_operations xfs_super_operations = { .write_inode = xfs_fs_write_inode, .clear_inode = xfs_fs_clear_inode, .put_super = xfs_fs_put_super, - .write_super = xfs_fs_write_super, .sync_fs = xfs_fs_sync_super, .freeze_fs = xfs_fs_freeze, .statfs = xfs_fs_statfs, diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 8570b82..bcc39d3 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -628,8 +628,6 @@ xfs_trans_apply_sb_deltas( xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), offsetof(xfs_dsb_t, sb_frextents) + sizeof(sbp->sb_frextents) - 1); - - tp->t_mountp->m_super->s_dirt = 1; } /* diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 3f0eaa3..b3afd22 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -135,6 +135,7 @@ header-y += posix_types.h header-y += ppdev.h header-y += prctl.h header-y += qnxtypes.h +header-y += qnx4_fs.h header-y += radeonfb.h header-y += raw.h header-y += resource.h @@ -308,7 +309,6 @@ unifdef-y += poll.h unifdef-y += ppp_defs.h unifdef-y += ppp-comp.h unifdef-y += ptrace.h -unifdef-y += qnx4_fs.h unifdef-y += quota.h unifdef-y += random.h unifdef-y += irqnr.h diff --git a/include/linux/cdev.h b/include/linux/cdev.h index fb45919..f389e31 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -28,6 +28,8 @@ int cdev_add(struct cdev *, dev_t, unsigned); void cdev_del(struct cdev *); +int cdev_index(struct inode *inode); + void cd_forget(struct inode *); extern struct backing_dev_info directly_mappable_cdev_bdi; diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h index 3be4e5a..6fc2bed 100644 --- a/include/linux/cramfs_fs.h +++ b/include/linux/cramfs_fs.h @@ -2,9 +2,8 @@ #define __CRAMFS_H #include <linux/types.h> +#include <linux/magic.h> -#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ -#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define CRAMFS_SIGNATURE "Compressed ROMFS" /* diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9797800..30b93b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -353,6 +353,11 @@ static inline int d_unhashed(struct dentry *dentry) return (dentry->d_flags & DCACHE_UNHASHED); } +static inline int d_unlinked(struct dentry *dentry) +{ + return d_unhashed(dentry) && !IS_ROOT(dentry); +} + static inline struct dentry *dget_parent(struct dentry *dentry) { struct dentry *ret; @@ -370,7 +375,7 @@ static inline int d_mountpoint(struct dentry *dentry) return dentry->d_mounted; } -extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *); +extern struct vfsmount *lookup_mnt(struct path *); extern struct dentry *lookup_create(struct nameidata *nd, int is_dir); extern int sysctl_vfs_cache_pressure; diff --git a/include/linux/fs.h b/include/linux/fs.h index 323b5ce..ede84fa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -729,8 +729,8 @@ struct inode { struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; - unsigned int i_blkbits; blkcnt_t i_blocks; + unsigned int i_blkbits; unsigned short i_bytes; umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ @@ -751,7 +751,6 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; }; - int i_cindex; __u32 i_generation; @@ -1321,7 +1320,7 @@ struct super_block { struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_need_sync_fs; + int s_need_sync; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; @@ -1372,11 +1371,6 @@ struct super_block { * generic_show_options() */ char *s_options; - - /* - * storage for asynchronous operations - */ - struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); @@ -1800,7 +1794,7 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern long do_mount(char *, char *, char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *); +extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); @@ -1947,8 +1941,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int fsync_super(struct super_block *); -extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } @@ -1964,6 +1956,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) return 0; } #endif +extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; @@ -2082,12 +2075,8 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); -extern void sync_filesystems(int wait); -extern void __fsync_super(struct super_block *sb); extern void emergency_sync(void); extern void emergency_remount(void); -extern int do_remount_sb(struct super_block *sb, int flags, - void *data, int force); #ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); #endif @@ -2356,6 +2345,8 @@ extern void simple_release_fs(struct vfsmount **mount, int *count); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); +extern int simple_fsync(struct file *, struct dentry *, int); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); diff --git a/include/linux/magic.h b/include/linux/magic.h index 927138c..1923327 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -6,6 +6,8 @@ #define AFS_SUPER_MAGIC 0x5346414F #define AUTOFS_SUPER_MAGIC 0x0187 #define CODA_SUPER_MAGIC 0x73757245 +#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ +#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define DEBUGFS_MAGIC 0x64626720 #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 diff --git a/include/linux/mount.h b/include/linux/mount.h index 51f55f9..5d52753 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -30,7 +30,7 @@ struct mnt_namespace; #define MNT_STRICTATIME 0x80 #define MNT_SHRINKABLE 0x100 -#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */ +#define MNT_WRITE_HOLD 0x200 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ @@ -65,13 +65,22 @@ struct vfsmount { int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; - /* - * This value is not stable unless all of the mnt_writers[] spinlocks - * are held, and all mnt_writer[]s on this mount have 0 as their ->count - */ - atomic_t __mnt_writers; +#ifdef CONFIG_SMP + int *mnt_writers; +#else + int mnt_writers; +#endif }; +static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + return mnt->mnt_writers; +#else + return &mnt->mnt_writers; +#endif +} + static inline struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) @@ -79,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) return mnt; } +struct file; /* forward dec */ + extern int mnt_want_write(struct vfsmount *mnt); +extern int mnt_want_write_file(struct file *file); +extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput_no_expire(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); diff --git a/include/linux/namei.h b/include/linux/namei.h index 518098f..d870ae2 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -18,6 +18,7 @@ enum { MAX_NESTED_LINKS = 8 }; struct nameidata { struct path path; struct qstr last; + struct path root; unsigned int flags; int last_type; unsigned depth; @@ -77,8 +78,8 @@ extern void release_open_intent(struct nameidata *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_noperm(const char *, struct dentry *); -extern int follow_down(struct vfsmount **, struct dentry **); -extern int follow_up(struct vfsmount **, struct dentry **); +extern int follow_down(struct path *); +extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index bcd0201..a6d9ef2 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -125,11 +125,9 @@ void nfsd_export_flush(void); void exp_readlock(void); void exp_readunlock(void); struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, - struct vfsmount *, - struct dentry *); + struct path *); struct svc_export * rqst_exp_parent(struct svc_rqst *, - struct vfsmount *mnt, - struct dentry *dentry); + struct path *); int exp_rootfh(struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index fbfa3d4..e6e77d3 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -93,20 +93,9 @@ struct vmcore { #ifdef CONFIG_PROC_FS -extern spinlock_t proc_subdir_lock; - extern void proc_root_init(void); void proc_flush_task(struct task_struct *task); -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); -int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); -unsigned long task_vsize(struct mm_struct *); -int task_statm(struct mm_struct *, int *, int *, int *, int *); -void task_mem(struct seq_file *, struct mm_struct *); -void clear_refs_smap(struct mm_struct *mm); - -struct proc_dir_entry *de_get(struct proc_dir_entry *de); -void de_put(struct proc_dir_entry *de); extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); @@ -116,20 +105,7 @@ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, void *data); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); -extern struct vfsmount *proc_mnt; struct pid_namespace; -extern int proc_fill_super(struct super_block *); -extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); - -/* - * These are generic /proc routines that use the internal - * "struct proc_dir_entry" tree to traverse the filesystem. - * - * The /proc root directory has extended versions to take care - * of the /proc/<pid> subdirectories. - */ -extern int proc_readdir(struct file *, void *, filldir_t); -extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 787d19e..8b9aee1 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -85,65 +85,4 @@ struct qnx4_super_block { struct qnx4_inode_entry AltBoot; }; -#ifdef __KERNEL__ - -#define QNX4_DEBUG 0 - -#if QNX4_DEBUG -#define QNX4DEBUG(X) printk X -#else -#define QNX4DEBUG(X) (void) 0 -#endif - -struct qnx4_sb_info { - struct buffer_head *sb_buf; /* superblock buffer */ - struct qnx4_super_block *sb; /* our superblock */ - unsigned int Version; /* may be useful */ - struct qnx4_inode_entry *BitMap; /* useful */ -}; - -struct qnx4_inode_info { - struct qnx4_inode_entry raw; - loff_t mmu_private; - struct inode vfs_inode; -}; - -extern struct inode *qnx4_iget(struct super_block *, unsigned long); -extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); -extern unsigned long qnx4_count_free_blocks(struct super_block *sb); -extern unsigned long qnx4_block_map(struct inode *inode, long iblock); - -extern struct buffer_head *qnx4_bread(struct inode *, int, int); - -extern const struct inode_operations qnx4_file_inode_operations; -extern const struct inode_operations qnx4_dir_inode_operations; -extern const struct file_operations qnx4_file_operations; -extern const struct file_operations qnx4_dir_operations; -extern int qnx4_is_free(struct super_block *sb, long block); -extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); -extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); -extern void qnx4_truncate(struct inode *inode); -extern void qnx4_free_inode(struct inode *inode); -extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); -extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); -extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int); -extern int qnx4_sync_inode(struct inode *inode); - -static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) -{ - return container_of(inode, struct qnx4_inode_info, vfs_inode); -} - -static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) -{ - return &qnx4_i(inode)->raw; -} - -#endif /* __KERNEL__ */ - #endif diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 36353d9..7bc4575 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -20,7 +20,12 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) /* * declaration of quota_function calls in kernel. */ -void sync_dquots(struct super_block *sb, int type); +void sync_quota_sb(struct super_block *sb, int type); +static inline void writeout_quota_sb(struct super_block *sb, int type) +{ + if (sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, type); +} int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); @@ -253,12 +258,7 @@ static inline void vfs_dq_free_inode(struct inode *inode) inode->i_sb->dq_op->free_inode(inode, 1); } -/* The following two functions cannot be called inside a transaction */ -static inline void vfs_dq_sync(struct super_block *sb) -{ - sync_dquots(sb, -1); -} - +/* Cannot be called inside a transaction */ static inline int vfs_dq_off(struct super_block *sb, int remount) { int ret = -ENOSYS; @@ -334,7 +334,11 @@ static inline void vfs_dq_free_inode(struct inode *inode) { } -static inline void vfs_dq_sync(struct super_block *sb) +static inline void sync_quota_sb(struct super_block *sb, int type) +{ +} + +static inline void writeout_quota_sb(struct super_block *sb, int type) { } diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 6473650..dab68bb 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -453,6 +453,7 @@ enum reiserfs_mount_options { REISERFS_ATTRS, REISERFS_XATTRS_USER, REISERFS_POSIXACL, + REISERFS_EXPOSE_PRIVROOT, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, @@ -490,6 +491,7 @@ enum reiserfs_mount_options { #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) +#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9344547..3224820 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -79,7 +79,6 @@ struct writeback_control { void writeback_inodes(struct writeback_control *wbc); int inode_wait(void *); void sync_inodes_sb(struct super_block *, int wait); -void sync_inodes(int wait); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 6e73517..1f6396d7 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -568,7 +568,7 @@ void audit_trim_trees(void) if (err) goto skip_it; - root_mnt = collect_mounts(path.mnt, path.dentry); + root_mnt = collect_mounts(&path); path_put(&path); if (!root_mnt) goto skip_it; @@ -660,7 +660,7 @@ int audit_add_tree_rule(struct audit_krule *rule) err = kern_path(tree->pathname, 0, &path); if (err) goto Err; - mnt = collect_mounts(path.mnt, path.dentry); + mnt = collect_mounts(&path); path_put(&path); if (!mnt) { err = -ENOMEM; @@ -720,7 +720,7 @@ int audit_tag_tree(char *old, char *new) err = kern_path(new, 0, &path); if (err) return err; - tagged = collect_mounts(path.mnt, path.dentry); + tagged = collect_mounts(&path); path_put(&path); if (!tagged) return -ENOMEM; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a7267bf..3fb789f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -46,6 +46,7 @@ #include <linux/cgroupstats.h> #include <linux/hash.h> #include <linux/namei.h> +#include <linux/smp_lock.h> #include <asm/atomic.h> @@ -900,6 +901,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) struct cgroup *cgrp = &root->top_cgroup; struct cgroup_sb_opts opts; + lock_kernel(); mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); @@ -927,6 +929,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) kfree(opts.release_agent); mutex_unlock(&cgroup_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex); + unlock_kernel(); return ret; } |