summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/main.c4
-rw-r--r--fs/aio.c13
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c5
-rw-r--r--fs/btrfs/extent-tree.c5
-rw-r--r--fs/btrfs/ioctl.c37
-rw-r--r--fs/btrfs/ordered-data.c11
-rw-r--r--fs/btrfs/print-tree.c9
-rw-r--r--fs/btrfs/raid56.c5
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/sysfs.c32
-rw-r--r--fs/btrfs/sysfs.h4
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/btrfs/volumes.c32
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/direct-io.c23
-rw-r--r--fs/ext4/balloc.c16
-rw-r--r--fs/ext4/extents_status.c4
-rw-r--r--fs/ext4/ialloc.c37
-rw-r--r--fs/ext4/indirect.c24
-rw-r--r--fs/ext4/mballoc.c12
-rw-r--r--fs/ext4/super.c60
-rw-r--r--fs/f2fs/data.c23
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/f2fs.h6
-rw-r--r--fs/f2fs/file.c12
-rw-r--r--fs/f2fs/inode.c1
-rw-r--r--fs/f2fs/namei.c13
-rw-r--r--fs/f2fs/node.c2
-rw-r--r--fs/f2fs/segment.c5
-rw-r--r--fs/f2fs/super.c4
-rw-r--r--fs/fuse/dev.c51
-rw-r--r--fs/fuse/dir.c41
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/fuse/inode.c27
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c14
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/lock_dlm.c4
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/jbd2/transaction.c5
-rw-r--r--fs/kernfs/file.c69
-rw-r--r--fs/kernfs/mount.c30
-rw-r--r--fs/locks.c26
-rw-r--r--fs/mbcache.c3
-rw-r--r--fs/namei.c3
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs3acl.c43
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/pagelist.c20
-rw-r--r--fs/nfs/write.c335
-rw-r--r--fs/nfsd/nfs4proc.c9
-rw-r--r--fs/nfsd/nfs4xdr.c21
-rw-r--r--fs/open.c5
-rw-r--r--fs/proc/stat.c22
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/seq_file.c30
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/xfs_bmap.c7
-rw-r--r--fs/xfs/xfs_bmap.h4
-rw-r--r--fs/xfs/xfs_bmap_util.c53
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_btree.c82
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_sb.c25
69 files changed, 979 insertions, 421 deletions
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 42dd2e4..35de0c0 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -55,13 +55,13 @@ static int __init afs_get_client_UUID(void)
afs_uuid.time_low = uuidtime;
afs_uuid.time_mid = uuidtime >> 32;
afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
- afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME;
+ afs_uuid.time_hi_and_version |= AFS_UUID_VERSION_TIME;
get_random_bytes(&clockseq, 2);
afs_uuid.clock_seq_low = clockseq;
afs_uuid.clock_seq_hi_and_reserved =
(clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
- afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD;
+ afs_uuid.clock_seq_hi_and_reserved |= AFS_UUID_VARIANT_STD;
_debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
afs_uuid.time_low,
diff --git a/fs/aio.c b/fs/aio.c
index 955947e..bd7ec2c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work)
aio_free_ring(ctx);
free_percpu(ctx->cpu);
+ percpu_ref_exit(&ctx->reqs);
+ percpu_ref_exit(&ctx->users);
kmem_cache_free(kioctx_cachep, ctx);
}
@@ -715,8 +717,8 @@ err_ctx:
err:
mutex_unlock(&ctx->ring_lock);
free_percpu(ctx->cpu);
- free_percpu(ctx->reqs.pcpu_count);
- free_percpu(ctx->users.pcpu_count);
+ percpu_ref_exit(&ctx->reqs);
+ percpu_ref_exit(&ctx->users);
kmem_cache_free(kioctx_cachep, ctx);
pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
@@ -830,16 +832,20 @@ void exit_aio(struct mm_struct *mm)
static void put_reqs_available(struct kioctx *ctx, unsigned nr)
{
struct kioctx_cpu *kcpu;
+ unsigned long flags;
preempt_disable();
kcpu = this_cpu_ptr(ctx->cpu);
+ local_irq_save(flags);
kcpu->reqs_available += nr;
+
while (kcpu->reqs_available >= ctx->req_batch * 2) {
kcpu->reqs_available -= ctx->req_batch;
atomic_add(ctx->req_batch, &ctx->reqs_available);
}
+ local_irq_restore(flags);
preempt_enable();
}
@@ -847,10 +853,12 @@ static bool get_reqs_available(struct kioctx *ctx)
{
struct kioctx_cpu *kcpu;
bool ret = false;
+ unsigned long flags;
preempt_disable();
kcpu = this_cpu_ptr(ctx->cpu);
+ local_irq_save(flags);
if (!kcpu->reqs_available) {
int old, avail = atomic_read(&ctx->reqs_available);
@@ -869,6 +877,7 @@ static bool get_reqs_available(struct kioctx *ctx)
ret = true;
kcpu->reqs_available--;
out:
+ local_irq_restore(flags);
preempt_enable();
return ret;
}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d7bd395..1c55388 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -210,7 +210,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
- int pgrp;
+ int pgrp = 0;
bool pgrp_set = false;
int ret = -EINVAL;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 92371c4..1daea0b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -821,7 +821,7 @@ static void free_workspace(int type, struct list_head *workspace)
spin_lock(workspace_lock);
if (*num_workspace < num_online_cpus()) {
- list_add_tail(workspace, idle_workspace);
+ list_add(workspace, idle_workspace);
(*num_workspace)++;
spin_unlock(workspace_lock);
goto wake;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2af6e66..eea26e1 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -36,6 +36,7 @@
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
+#include "sysfs.h"
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret);
@@ -562,6 +563,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
fs_info->fs_devices->latest_bdev = tgt_device->bdev;
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
+ /* replace the sysfs entry */
+ btrfs_kobj_rm_device(fs_info, src_device);
+ btrfs_kobj_add_device(fs_info, tgt_device);
+
btrfs_rm_dev_replace_blocked(fs_info);
btrfs_rm_dev_replace_srcdev(fs_info, src_device);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8bb4aa1..08e65e9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -369,7 +369,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state, GFP_NOFS);
- btrfs_tree_read_unlock_blocking(eb);
+ if (need_lock)
+ btrfs_tree_read_unlock_blocking(eb);
return ret;
}
@@ -2904,7 +2905,9 @@ retry_root_backup:
if (ret)
goto fail_qgroup;
+ mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(tree_root);
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
printk(KERN_WARNING
"BTRFS: failed to recover relocation\n");
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 99c2539..813537f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5678,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
struct btrfs_block_group_cache *cache;
- struct btrfs_space_info *space_info;
down_write(&fs_info->commit_root_sem);
@@ -5701,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
up_write(&fs_info->commit_root_sem);
- list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
- percpu_counter_set(&space_info->total_bytes_pinned, 0);
-
update_global_block_rsv(fs_info);
}
@@ -5741,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
spin_lock(&cache->lock);
cache->pinned -= len;
space_info->bytes_pinned -= len;
+ percpu_counter_add(&space_info->total_bytes_pinned, -len);
if (cache->ro) {
space_info->bytes_readonly += len;
readonly = true;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0d321c2..47aceb4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -136,19 +136,22 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
void btrfs_update_iflags(struct inode *inode)
{
struct btrfs_inode *ip = BTRFS_I(inode);
-
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ unsigned int new_fl = 0;
if (ip->flags & BTRFS_INODE_SYNC)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (ip->flags & BTRFS_INODE_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (ip->flags & BTRFS_INODE_APPEND)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (ip->flags & BTRFS_INODE_NOATIME)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (ip->flags & BTRFS_INODE_DIRSYNC)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+
+ set_mask_bits(&inode->i_flags,
+ S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
+ new_fl);
}
/*
@@ -3139,7 +3142,6 @@ out:
static void clone_update_extent_map(struct inode *inode,
const struct btrfs_trans_handle *trans,
const struct btrfs_path *path,
- struct btrfs_file_extent_item *fi,
const u64 hole_offset,
const u64 hole_len)
{
@@ -3154,7 +3156,11 @@ static void clone_update_extent_map(struct inode *inode,
return;
}
- if (fi) {
+ if (path) {
+ struct btrfs_file_extent_item *fi;
+
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
em->generation = -1;
if (btrfs_file_extent_type(path->nodes[0], fi) ==
@@ -3508,18 +3514,15 @@ process_slot:
btrfs_item_ptr_offset(leaf, slot),
size);
inode_add_bytes(inode, datal);
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
}
/* If we have an implicit hole (NO_HOLES feature). */
if (drop_start < new_key.offset)
clone_update_extent_map(inode, trans,
- path, NULL, drop_start,
+ NULL, drop_start,
new_key.offset - drop_start);
- clone_update_extent_map(inode, trans, path,
- extent, 0, 0);
+ clone_update_extent_map(inode, trans, path, 0, 0);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
@@ -3562,12 +3565,10 @@ process_slot:
btrfs_end_transaction(trans, root);
goto out;
}
+ clone_update_extent_map(inode, trans, NULL, last_dest_end,
+ destoff + len - last_dest_end);
ret = clone_finish_inode_update(trans, inode, destoff + len,
destoff, olen);
- if (ret)
- goto out;
- clone_update_extent_map(inode, trans, path, NULL, last_dest_end,
- destoff + len - last_dest_end);
}
out:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e12441c..7187b14 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -484,8 +484,19 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
log_list);
list_del_init(&ordered->log_list);
spin_unlock_irq(&log->log_extents_lock[index]);
+
+ if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
+ !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
+ struct inode *inode = ordered->inode;
+ u64 start = ordered->file_offset;
+ u64 end = ordered->file_offset + ordered->len - 1;
+
+ WARN_ON(!inode);
+ filemap_fdatawrite_range(inode->i_mapping, start, end);
+ }
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
&ordered->flags));
+
btrfs_put_ordered_extent(ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 6efd70d..9626b4a 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -54,7 +54,7 @@ static void print_extent_data_ref(struct extent_buffer *eb,
btrfs_extent_data_ref_count(eb, ref));
}
-static void print_extent_item(struct extent_buffer *eb, int slot)
+static void print_extent_item(struct extent_buffer *eb, int slot, int type)
{
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
@@ -63,7 +63,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
struct btrfs_disk_key key;
unsigned long end;
unsigned long ptr;
- int type;
u32 item_size = btrfs_item_size_nr(eb, slot);
u64 flags;
u64 offset;
@@ -88,7 +87,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
flags);
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ if ((type == BTRFS_EXTENT_ITEM_KEY) &&
+ flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
btrfs_tree_block_key(eb, info, &key);
@@ -223,7 +223,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_disk_root_refs(l, ri));
break;
case BTRFS_EXTENT_ITEM_KEY:
- print_extent_item(l, i);
+ case BTRFS_METADATA_ITEM_KEY:
+ print_extent_item(l, i, type);
break;
case BTRFS_TREE_BLOCK_REF_KEY:
printk(KERN_INFO "\t\ttree block backref\n");
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 4055291..4a88f07 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1956,9 +1956,10 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
* pages are going to be uptodate.
*/
for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
- if (rbio->faila == stripe ||
- rbio->failb == stripe)
+ if (rbio->faila == stripe || rbio->failb == stripe) {
+ atomic_inc(&rbio->bbio->error);
continue;
+ }
for (pagenr = 0; pagenr < nr_pages; pagenr++) {
struct page *p;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4662d92..8e16bca 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -522,9 +522,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_ssd_spread:
btrfs_set_and_info(root, SSD_SPREAD,
"use spread ssd allocation scheme");
+ btrfs_set_opt(info->mount_opt, SSD);
break;
case Opt_nossd:
- btrfs_clear_and_info(root, NOSSD,
+ btrfs_set_and_info(root, NOSSD,
"not using ssd allocation scheme");
btrfs_clear_opt(info->mount_opt, SSD);
break;
@@ -1467,7 +1468,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
/* recover relocation */
+ mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(root);
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret)
goto restore;
@@ -1808,6 +1811,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
list_for_each_entry(dev, head, dev_list) {
if (dev->missing)
continue;
+ if (!dev->name)
+ continue;
if (!first_dev || dev->devid < first_dev->devid)
first_dev = dev;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index df39458..7869936 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -605,14 +605,37 @@ static void init_feature_attrs(void)
}
}
-static int add_device_membership(struct btrfs_fs_info *fs_info)
+int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *one_device)
+{
+ struct hd_struct *disk;
+ struct kobject *disk_kobj;
+
+ if (!fs_info->device_dir_kobj)
+ return -EINVAL;
+
+ if (one_device) {
+ disk = one_device->bdev->bd_part;
+ disk_kobj = &part_to_dev(disk)->kobj;
+
+ sysfs_remove_link(fs_info->device_dir_kobj,
+ disk_kobj->name);
+ }
+
+ return 0;
+}
+
+int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *one_device)
{
int error = 0;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *dev;
- fs_info->device_dir_kobj = kobject_create_and_add("devices",
+ if (!fs_info->device_dir_kobj)
+ fs_info->device_dir_kobj = kobject_create_and_add("devices",
&fs_info->super_kobj);
+
if (!fs_info->device_dir_kobj)
return -ENOMEM;
@@ -623,6 +646,9 @@ static int add_device_membership(struct btrfs_fs_info *fs_info)
if (!dev->bdev)
continue;
+ if (one_device && one_device != dev)
+ continue;
+
disk = dev->bdev->bd_part;
disk_kobj = &part_to_dev(disk)->kobj;
@@ -666,7 +692,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
if (error)
goto failure;
- error = add_device_membership(fs_info);
+ error = btrfs_kobj_add_device(fs_info, NULL);
if (error)
goto failure;
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9ab5763..ac46df3 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -66,4 +66,8 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
extern const char * const btrfs_feature_set_names[3];
extern struct kobj_type space_info_ktype;
extern struct kobj_type btrfs_raid_ktype;
+int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *one_device);
+int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
+ struct btrfs_device *one_device);
#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 511839c..5f379af 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -386,11 +386,13 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
bool reloc_reserved = false;
int ret;
+ /* Send isn't supposed to start transactions. */
+ ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB);
+
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return ERR_PTR(-EROFS);
- if (current->journal_info &&
- current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
+ if (current->journal_info) {
WARN_ON(type & TRANS_EXTWRITERS);
h = current->journal_info;
h->use_count++;
@@ -491,6 +493,7 @@ again:
smp_mb();
if (cur_trans->state >= TRANS_STATE_BLOCKED &&
may_wait_transaction(root, type)) {
+ current->journal_info = h;
btrfs_commit_transaction(h, root);
goto again;
}
@@ -1615,11 +1618,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_run_delayed_items(trans, root);
- /*
- * running the delayed items may have added new refs. account
- * them now so that they hinder processing of more delayed refs
- * as little as possible.
- */
if (ret)
return ret;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c83b242..6cb82f6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,7 @@
#include "rcu-string.h"
#include "math.h"
#include "dev-replace.h"
+#include "sysfs.h"
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -554,12 +555,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
* This is ok to do without rcu read locked because we hold the
* uuid mutex so nothing we touch in here is going to disappear.
*/
- name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
- if (!name) {
- kfree(device);
- goto error;
+ if (orig_dev->name) {
+ name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+ if (!name) {
+ kfree(device);
+ goto error;
+ }
+ rcu_assign_pointer(device->name, name);
}
- rcu_assign_pointer(device->name, name);
list_add(&device->dev_list, &fs_devices->devices);
device->fs_devices = fs_devices;
@@ -1677,8 +1680,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
if (device->bdev == root->fs_info->fs_devices->latest_bdev)
root->fs_info->fs_devices->latest_bdev = next_device->bdev;
- if (device->bdev)
+ if (device->bdev) {
device->fs_devices->open_devices--;
+ /* remove sysfs entry */
+ btrfs_kobj_rm_device(root->fs_info, device);
+ }
call_rcu(&device->rcu, free_device);
@@ -2143,9 +2149,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
btrfs_set_super_num_devices(root->fs_info->super_copy,
total_bytes + 1);
+
+ /* add sysfs device entry */
+ btrfs_kobj_add_device(root->fs_info, device);
+
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (seeding_dev) {
+ char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
ret = init_first_rw_device(trans, root, device);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -2156,6 +2167,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
btrfs_abort_transaction(trans, root, ret);
goto error_trans;
}
+
+ /* Sprouting would change fsid of the mounted root,
+ * so rename the fsid on the sysfs
+ */
+ snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
+ root->fs_info->fsid);
+ if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
+ goto error_trans;
} else {
ret = btrfs_add_device(trans, root, device);
if (ret) {
@@ -2205,6 +2224,7 @@ error_trans:
unlock_chunks(root);
btrfs_end_transaction(trans, root);
rcu_string_free(device->name);
+ btrfs_kobj_rm_device(root->fs_info, device);
kfree(device);
error:
blkdev_put(bdev, FMODE_EXCL);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 4f19631..b67d8fc 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -136,7 +136,7 @@ static int zlib_compress_pages(struct list_head *ws,
if (workspace->def_strm.total_in > 8192 &&
workspace->def_strm.total_in <
workspace->def_strm.total_out) {
- ret = -EIO;
+ ret = -E2BIG;
goto out;
}
/* we need another page for writing out. Test this
diff --git a/fs/coredump.c b/fs/coredump.c
index 0b2528f..a93f7e6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -306,7 +306,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (unlikely(nr < 0))
return nr;
- tsk->flags = PF_DUMPCORE;
+ tsk->flags |= PF_DUMPCORE;
if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
/*
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 98040ba..17e39b0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -71,7 +71,6 @@ struct dio_submit {
been performed at the start of a
write */
int pages_in_io; /* approximate total IO pages */
- size_t size; /* total request size (doesn't change)*/
sector_t block_in_file; /* Current offset into the underlying
file in dio_block units. */
unsigned blocks_available; /* At block_in_file. changes */
@@ -198,9 +197,8 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
* L1 cache.
*/
static inline struct page *dio_get_page(struct dio *dio,
- struct dio_submit *sdio, size_t *from, size_t *to)
+ struct dio_submit *sdio)
{
- int n;
if (dio_pages_present(sdio) == 0) {
int ret;
@@ -209,10 +207,7 @@ static inline struct page *dio_get_page(struct dio *dio,
return ERR_PTR(ret);
BUG_ON(dio_pages_present(sdio) == 0);
}
- n = sdio->head++;
- *from = n ? 0 : sdio->from;
- *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
- return dio->pages[n];
+ return dio->pages[sdio->head];
}
/**
@@ -911,11 +906,15 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
while (sdio->block_in_file < sdio->final_block_in_request) {
struct page *page;
size_t from, to;
- page = dio_get_page(dio, sdio, &from, &to);
+
+ page = dio_get_page(dio, sdio);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto out;
}
+ from = sdio->head ? 0 : sdio->from;
+ to = (sdio->head == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
+ sdio->head++;
while (from < to) {
unsigned this_chunk_bytes; /* # of bytes mapped */
@@ -1104,7 +1103,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
unsigned blkbits = i_blkbits;
unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL;
- loff_t end = offset + iov_iter_count(iter);
+ size_t count = iov_iter_count(iter);
+ loff_t end = offset + count;
struct dio *dio;
struct dio_submit sdio = { 0, };
struct buffer_head map_bh = { 0, };
@@ -1287,10 +1287,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
*/
BUG_ON(retval == -EIOCBQUEUED);
if (dio->is_async && retval == 0 && dio->result &&
- ((rw == READ) || (dio->result == sdio.size)))
+ (rw == READ || dio->result == count))
retval = -EIOCBQUEUED;
-
- if (retval != -EIOCBQUEUED)
+ else
dio_await_completion(dio);
if (drop_refcount(dio) == 0) {
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0762d14..fca3820 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -194,7 +194,16 @@ static void ext4_init_block_bitmap(struct super_block *sb,
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
@@ -359,6 +368,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
{
ext4_fsblk_t blk;
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
if (buffer_verified(bh))
return;
@@ -369,6 +379,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
block_group, blk);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
@@ -376,6 +389,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
desc, bh))) {
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 3f5c188..0b7e28e 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -966,10 +966,10 @@ retry:
continue;
}
- if (ei->i_es_lru_nr == 0 || ei == locked_ei)
+ if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
+ !write_trylock(&ei->i_es_lock))
continue;
- write_lock(&ei->i_es_lock);
shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
if (ei->i_es_lru_nr == 0)
list_del_init(&ei->i_es_lru);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0ee59a6..5b87fc3 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -71,6 +71,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct ext4_group_desc *gdp)
{
struct ext4_group_info *grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
J_ASSERT_BH(bh, buffer_locked(bh));
/* If checksum is bad mark all blocks and inodes use to prevent
@@ -78,7 +79,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+ if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return 0;
}
@@ -116,6 +126,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
struct buffer_head *bh = NULL;
ext4_fsblk_t bitmap_blk;
struct ext4_group_info *grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
@@ -185,6 +196,12 @@ verify:
ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
"inode_bitmap = %llu", block_group, bitmap_blk);
grp = ext4_get_group_info(sb, block_group);
+ if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, desc);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return NULL;
}
@@ -321,6 +338,12 @@ out:
fatal = err;
} else {
ext4_error(sb, "bit already cleared for inode %lu", ino);
+ if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+ int count;
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
}
@@ -851,6 +874,13 @@ got:
goto out;
}
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, group_desc_bh);
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
+
/* We may have to initialize the block bitmap if it isn't already */
if (ext4_has_group_desc_csum(sb) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -887,13 +917,6 @@ got:
}
}
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, group_desc_bh);
- if (err) {
- ext4_std_error(sb, err);
- goto out;
- }
-
/* Update the relevant bg descriptor fields */
if (ext4_has_group_desc_csum(sb)) {
int free;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 8a57e9f..fd69da1 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -389,7 +389,13 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
return 0;
failed:
for (; i >= 0; i--) {
- if (i != indirect_blks && branch[i].bh)
+ /*
+ * We want to ext4_forget() only freshly allocated indirect
+ * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
+ * buffer at branch[0].bh is indirect block / inode already
+ * existing before ext4_alloc_branch() was called.
+ */
+ if (i > 0 && i != indirect_blks && branch[i].bh)
ext4_forget(handle, 1, inode, branch[i].bh,
branch[i].bh->b_blocknr);
ext4_free_blocks(handle, inode, NULL, new_blocks[i],
@@ -1310,16 +1316,24 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
blk = *i_data;
if (level > 0) {
ext4_lblk_t first2;
+ ext4_lblk_t count2;
+
bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
if (!bh) {
EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
"Read failure");
return -EIO;
}
- first2 = (first > offset) ? first - offset : 0;
+ if (first > offset) {
+ first2 = first - offset;
+ count2 = count;
+ } else {
+ first2 = 0;
+ count2 = count - (offset - first);
+ }
ret = free_hole_blocks(handle, inode, bh,
(__le32 *)bh->b_data, level - 1,
- first2, count - offset,
+ first2, count2,
inode->i_sb->s_blocksize >> 2);
if (ret) {
brelse(bh);
@@ -1329,8 +1343,8 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
if (level == 0 ||
(bh && all_zeroes((__le32 *)bh->b_data,
(__le32 *)bh->b_data + addr_per_block))) {
- ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
- *i_data = 0;
+ ext4_free_data(handle, inode, parent_bh,
+ i_data, i_data + 1);
}
brelse(bh);
bh = NULL;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 59e3162..2dcb936 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -722,6 +722,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
void *buddy, void *bitmap, ext4_group_t group)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
ext4_grpblk_t i = 0;
ext4_grpblk_t first;
@@ -751,14 +752,17 @@ void ext4_mb_generate_buddy(struct super_block *sb,
if (free != grp->bb_free) {
ext4_grp_locked_error(sb, group, 0, 0,
- "%u clusters in bitmap, %u in gd; "
- "block bitmap corrupt.",
+ "block bitmap and bg descriptor "
+ "inconsistent: %u vs %u free clusters",
free, grp->bb_free);
/*
* If we intend to continue, we consider group descriptor
* corrupt and update bb_free using bitmap value
*/
grp->bb_free = free;
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
}
mb_set_largest_free_order(sb, grp);
@@ -1431,6 +1435,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
if (unlikely(block != -1)) {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t blocknr;
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
@@ -1441,6 +1446,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
"freeing already freed block "
"(bit %u); block bitmap corrupt.",
block);
+ if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ e4b->bd_info->bb_free);
/* Mark the block group as corrupt. */
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
&e4b->bd_info->bb_state);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b9b9aab..6df7bc6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1525,8 +1525,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
sbi->s_commit_interval = HZ * arg;
} else if (token == Opt_max_batch_time) {
- if (arg == 0)
- arg = EXT4_DEF_MAX_BATCH_TIME;
sbi->s_max_batch_time = arg;
} else if (token == Opt_min_batch_time) {
sbi->s_min_batch_time = arg;
@@ -2809,10 +2807,11 @@ static void print_daily_error_info(unsigned long arg)
es = sbi->s_es;
if (es->s_error_count)
- ext4_msg(sb, KERN_NOTICE, "error count: %u",
+ /* fsck newer than v1.41.13 is needed to clean this condition. */
+ ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
le32_to_cpu(es->s_error_count));
if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
@@ -2826,7 +2825,7 @@ static void print_daily_error_info(unsigned long arg)
printk("\n");
}
if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
@@ -3880,38 +3879,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
-
- /*
- * set up enough so that it can read an inode,
- * and create new inode for buddy allocator
- */
- sbi->s_gdb_count = db_count;
- if (!test_opt(sb, NOLOAD) &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
- sb->s_op = &ext4_sops;
- else
- sb->s_op = &ext4_nojournal_sops;
-
- ext4_ext_init(sb);
- err = ext4_mb_init(sb);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
- err);
- goto failed_mount2;
- }
-
if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
- goto failed_mount2a;
+ goto failed_mount2;
}
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
if (!ext4_fill_flex_info(sb)) {
ext4_msg(sb, KERN_ERR,
"unable to initialize "
"flex_bg meta info!");
- goto failed_mount2a;
+ goto failed_mount2;
}
+ sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock);
@@ -3946,6 +3926,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_extent_max_zeroout_kb = 32;
+ /*
+ * set up enough so that it can read an inode
+ */
+ if (!test_opt(sb, NOLOAD) &&
+ EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+ sb->s_op = &ext4_sops;
+ else
+ sb->s_op = &ext4_nojournal_sops;
sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers;
#ifdef CONFIG_QUOTA
@@ -4135,13 +4123,21 @@ no_journal:
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
"reserved pool", ext4_calculate_resv_clusters(sb));
- goto failed_mount5;
+ goto failed_mount4a;
}
err = ext4_setup_system_zone(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize system "
"zone (%d)", err);
+ goto failed_mount4a;
+ }
+
+ ext4_ext_init(sb);
+ err = ext4_mb_init(sb);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
+ err);
goto failed_mount5;
}
@@ -4218,8 +4214,11 @@ failed_mount8:
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
- ext4_release_system_zone(sb);
+ ext4_mb_release(sb);
failed_mount5:
+ ext4_ext_release(sb);
+ ext4_release_system_zone(sb);
+failed_mount4a:
dput(sb->s_root);
sb->s_root = NULL;
failed_mount4:
@@ -4243,14 +4242,11 @@ failed_mount3:
percpu_counter_destroy(&sbi->s_extent_cache_cnt);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
-failed_mount2a:
- ext4_mb_release(sb);
failed_mount2:
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
ext4_kvfree(sbi->s_group_desc);
failed_mount:
- ext4_ext_release(sb);
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
if (sbi->s_proc) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0924521..f8cf619 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -608,8 +608,8 @@ static int __allocate_data_block(struct dnode_of_data *dn)
* b. do not use extent cache for better performance
* c. give the block addresses to blockdev
*/
-static int get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+static int __get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create, bool fiemap)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blkbits = inode->i_sb->s_blocksize_bits;
@@ -637,7 +637,7 @@ static int get_data_block(struct inode *inode, sector_t iblock,
err = 0;
goto unlock_out;
}
- if (dn.data_blkaddr == NEW_ADDR)
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
goto put_out;
if (dn.data_blkaddr != NULL_ADDR) {
@@ -671,7 +671,7 @@ get_next:
err = 0;
goto unlock_out;
}
- if (dn.data_blkaddr == NEW_ADDR)
+ if (dn.data_blkaddr == NEW_ADDR && !fiemap)
goto put_out;
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -708,10 +708,23 @@ out:
return err;
}
+static int get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, false);
+}
+
+static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ return __get_data_block(inode, iblock, bh_result, create, true);
+}
+
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
- return generic_block_fiemap(inode, fieinfo, start, len, get_data_block);
+ return generic_block_fiemap(inode, fieinfo,
+ start, len, get_data_block_fiemap);
}
static int f2fs_read_data_page(struct file *file, struct page *page)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 966acb0..a4addd7 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -376,11 +376,11 @@ static struct page *init_inode_metadata(struct inode *inode,
put_error:
f2fs_put_page(page, 1);
+error:
/* once the failed inode becomes a bad inode, i_mode is S_IFREG */
truncate_inode_pages(&inode->i_data, 0);
truncate_blocks(inode, 0);
remove_dirty_dir_inode(inode);
-error:
remove_inode_page(inode);
return ERR_PTR(err);
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e51c732..58df97e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -342,9 +342,6 @@ struct f2fs_sm_info {
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
- struct list_head wblist_head; /* list of under-writeback pages */
- spinlock_t wblist_lock; /* lock for checkpoint */
-
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
block_t ssa_blkaddr; /* start block address of SSA area */
@@ -644,7 +641,8 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
*/
static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
- WARN_ON((nid >= NM_I(sbi)->max_nid));
+ if (unlikely(nid < F2FS_ROOT_INO(sbi)))
+ return -EINVAL;
if (unlikely(nid >= NM_I(sbi)->max_nid))
return -EINVAL;
return 0;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c58e330..7d8b962 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -659,16 +659,19 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
off_start = offset & (PAGE_CACHE_SIZE - 1);
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+ f2fs_lock_op(sbi);
+
for (index = pg_start; index <= pg_end; index++) {
struct dnode_of_data dn;
- f2fs_lock_op(sbi);
+ if (index == pg_end && !off_end)
+ goto noalloc;
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_reserve_block(&dn, index);
- f2fs_unlock_op(sbi);
if (ret)
break;
-
+noalloc:
if (pg_start == pg_end)
new_size = offset + len;
else if (index == pg_start && off_start)
@@ -683,8 +686,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
i_size_read(inode) < new_size) {
i_size_write(inode, new_size);
mark_inode_dirty(inode);
- f2fs_write_inode(inode, NULL);
+ update_inode_page(inode);
}
+ f2fs_unlock_op(sbi);
return ret;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index adc622c..2cf6962 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -78,6 +78,7 @@ static int do_read_inode(struct inode *inode)
if (check_nid_range(sbi, inode->i_ino)) {
f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
(unsigned long) inode->i_ino);
+ WARN_ON(1);
return -EINVAL;
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 9138c32..a6bdddc 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -417,9 +417,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
- down_write(&F2FS_I(old_inode)->i_sem);
- F2FS_I(old_inode)->i_pino = new_dir->i_ino;
- up_write(&F2FS_I(old_inode)->i_sem);
new_inode->i_ctime = CURRENT_TIME;
down_write(&F2FS_I(new_inode)->i_sem);
@@ -448,6 +445,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
+ down_write(&F2FS_I(old_inode)->i_sem);
+ file_lost_pino(old_inode);
+ up_write(&F2FS_I(old_inode)->i_sem);
+
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
@@ -457,9 +458,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
- down_write(&F2FS_I(old_inode)->i_sem);
- F2FS_I(old_inode)->i_pino = new_dir->i_ino;
- up_write(&F2FS_I(old_inode)->i_sem);
update_inode_page(old_inode);
} else {
kunmap(old_dir_page);
@@ -474,7 +472,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
return 0;
put_out_dir:
- f2fs_put_page(new_page, 1);
+ kunmap(new_page);
+ f2fs_put_page(new_page, 0);
out_dir:
if (old_dir_entry) {
kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9dfb9a0..4b697cc 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -42,6 +42,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DIRTY_DENTS) {
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f25f0e0..d04613d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -272,14 +272,15 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
return -ENOMEM;
spin_lock_init(&fcc->issue_lock);
init_waitqueue_head(&fcc->flush_wait_queue);
+ sbi->sm_info->cmd_control_info = fcc;
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
kfree(fcc);
+ sbi->sm_info->cmd_control_info = NULL;
return err;
}
- sbi->sm_info->cmd_control_info = fcc;
return err;
}
@@ -1885,8 +1886,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
/* init sm info */
sbi->sm_info = sm_info;
- INIT_LIST_HEAD(&sm_info->wblist_head);
- spin_lock_init(&sm_info->wblist_lock);
sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b2b1863..8f96d93 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -689,9 +689,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
- if (unlikely(ino < F2FS_ROOT_INO(sbi)))
- return ERR_PTR(-ESTALE);
- if (unlikely(ino >= NM_I(sbi)->max_nid))
+ if (check_nid_range(sbi, ino))
return ERR_PTR(-ESTALE);
/*
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 098f97b..ca88731 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -643,9 +643,8 @@ struct fuse_copy_state {
unsigned long seglen;
unsigned long addr;
struct page *pg;
- void *mapaddr;
- void *buf;
unsigned len;
+ unsigned offset;
unsigned move_pages:1;
};
@@ -666,23 +665,17 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
if (cs->currbuf) {
struct pipe_buffer *buf = cs->currbuf;
- if (!cs->write) {
- kunmap_atomic(cs->mapaddr);
- } else {
- kunmap_atomic(cs->mapaddr);
+ if (cs->write)
buf->len = PAGE_SIZE - cs->len;
- }
cs->currbuf = NULL;
- cs->mapaddr = NULL;
- } else if (cs->mapaddr) {
- kunmap_atomic(cs->mapaddr);
+ } else if (cs->pg) {
if (cs->write) {
flush_dcache_page(cs->pg);
set_page_dirty_lock(cs->pg);
}
put_page(cs->pg);
- cs->mapaddr = NULL;
}
+ cs->pg = NULL;
}
/*
@@ -691,7 +684,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
*/
static int fuse_copy_fill(struct fuse_copy_state *cs)
{
- unsigned long offset;
+ struct page *page;
int err;
unlock_request(cs->fc, cs->req);
@@ -706,14 +699,12 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
- cs->mapaddr = kmap_atomic(buf->page);
+ cs->pg = buf->page;
+ cs->offset = buf->offset;
cs->len = buf->len;
- cs->buf = cs->mapaddr + buf->offset;
cs->pipebufs++;
cs->nr_segs--;
} else {
- struct page *page;
-
if (cs->nr_segs == cs->pipe->buffers)
return -EIO;
@@ -726,8 +717,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
buf->len = 0;
cs->currbuf = buf;
- cs->mapaddr = kmap_atomic(page);
- cs->buf = cs->mapaddr;
+ cs->pg = page;
+ cs->offset = 0;
cs->len = PAGE_SIZE;
cs->pipebufs++;
cs->nr_segs++;
@@ -740,14 +731,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->iov++;
cs->nr_segs--;
}
- err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
+ err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
if (err < 0)
return err;
BUG_ON(err != 1);
- offset = cs->addr % PAGE_SIZE;
- cs->mapaddr = kmap_atomic(cs->pg);
- cs->buf = cs->mapaddr + offset;
- cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->pg = page;
+ cs->offset = cs->addr % PAGE_SIZE;
+ cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
cs->seglen -= cs->len;
cs->addr += cs->len;
}
@@ -760,15 +750,20 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
{
unsigned ncpy = min(*size, cs->len);
if (val) {
+ void *pgaddr = kmap_atomic(cs->pg);
+ void *buf = pgaddr + cs->offset;
+
if (cs->write)
- memcpy(cs->buf, *val, ncpy);
+ memcpy(buf, *val, ncpy);
else
- memcpy(*val, cs->buf, ncpy);
+ memcpy(*val, buf, ncpy);
+
+ kunmap_atomic(pgaddr);
*val += ncpy;
}
*size -= ncpy;
cs->len -= ncpy;
- cs->buf += ncpy;
+ cs->offset += ncpy;
return ncpy;
}
@@ -874,8 +869,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
out_fallback_unlock:
unlock_page(newpage);
out_fallback:
- cs->mapaddr = kmap_atomic(buf->page);
- cs->buf = cs->mapaddr + buf->offset;
+ cs->pg = buf->page;
+ cs->offset = buf->offset;
err = lock_request(cs->fc, cs->req);
if (err)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4219835..0c60482 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -198,7 +198,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
inode = ACCESS_ONCE(entry->d_inode);
if (inode && is_bad_inode(inode))
goto invalid;
- else if (fuse_dentry_time(entry) < get_jiffies_64()) {
+ else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
+ (flags & LOOKUP_REVAL)) {
int err;
struct fuse_entry_out outarg;
struct fuse_req *req;
@@ -814,13 +815,6 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
return err;
}
-static int fuse_rename(struct inode *olddir, struct dentry *oldent,
- struct inode *newdir, struct dentry *newent)
-{
- return fuse_rename_common(olddir, oldent, newdir, newent, 0,
- FUSE_RENAME, sizeof(struct fuse_rename_in));
-}
-
static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
struct inode *newdir, struct dentry *newent,
unsigned int flags)
@@ -831,17 +825,30 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
return -EINVAL;
- if (fc->no_rename2 || fc->minor < 23)
- return -EINVAL;
+ if (flags) {
+ if (fc->no_rename2 || fc->minor < 23)
+ return -EINVAL;
- err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
- FUSE_RENAME2, sizeof(struct fuse_rename2_in));
- if (err == -ENOSYS) {
- fc->no_rename2 = 1;
- err = -EINVAL;
+ err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
+ FUSE_RENAME2,
+ sizeof(struct fuse_rename2_in));
+ if (err == -ENOSYS) {
+ fc->no_rename2 = 1;
+ err = -EINVAL;
+ }
+ } else {
+ err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
+ FUSE_RENAME,
+ sizeof(struct fuse_rename_in));
}
+
return err;
+}
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent)
+{
+ return fuse_rename2(olddir, oldent, newdir, newent, 0);
}
static int fuse_link(struct dentry *entry, struct inode *newdir,
@@ -985,7 +992,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
int err;
bool r;
- if (fi->i_time < get_jiffies_64()) {
+ if (time_before64(fi->i_time, get_jiffies_64())) {
r = true;
err = fuse_do_getattr(inode, stat, file);
} else {
@@ -1171,7 +1178,7 @@ static int fuse_permission(struct inode *inode, int mask)
((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
struct fuse_inode *fi = get_fuse_inode(inode);
- if (fi->i_time < get_jiffies_64()) {
+ if (time_before64(fi->i_time, get_jiffies_64())) {
refreshed = true;
err = fuse_perm_getattr(inode, mask);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6e16dad..40ac262 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1687,7 +1687,7 @@ static int fuse_writepage_locked(struct page *page)
error = -EIO;
req->ff = fuse_write_file_get(fc, fi);
if (!req->ff)
- goto err_free;
+ goto err_nofile;
fuse_write_fill(req, req->ff, page_offset(page), 0);
@@ -1715,6 +1715,8 @@ static int fuse_writepage_locked(struct page *page)
return 0;
+err_nofile:
+ __free_page(tmp_page);
err_free:
fuse_request_free(req);
err:
@@ -1955,8 +1957,8 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL;
err = -ENOMEM;
- data.orig_pages = kzalloc(sizeof(struct page *) *
- FUSE_MAX_PAGES_PER_REQ,
+ data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
+ sizeof(struct page *),
GFP_NOFS);
if (!data.orig_pages)
goto out;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 754dcf2..03246cd 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -478,6 +478,17 @@ static const match_table_t tokens = {
{OPT_ERR, NULL}
};
+static int fuse_match_uint(substring_t *s, unsigned int *res)
+{
+ int err = -ENOMEM;
+ char *buf = match_strdup(s);
+ if (buf) {
+ err = kstrtouint(buf, 10, res);
+ kfree(buf);
+ }
+ return err;
+}
+
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
{
char *p;
@@ -488,6 +499,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
while ((p = strsep(&opt, ",")) != NULL) {
int token;
int value;
+ unsigned uv;
substring_t args[MAX_OPT_ARGS];
if (!*p)
continue;
@@ -511,18 +523,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
break;
case OPT_USER_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->user_id = make_kuid(current_user_ns(), value);
+ d->user_id = make_kuid(current_user_ns(), uv);
if (!uid_valid(d->user_id))
return 0;
d->user_id_present = 1;
break;
case OPT_GROUP_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->group_id = make_kgid(current_user_ns(), value);
+ d->group_id = make_kgid(current_user_ns(), uv);
if (!gid_valid(d->group_id))
return 0;
d->group_id_present = 1;
@@ -895,9 +907,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->writeback_cache = 1;
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
- else
- fc->sb->s_time_gran = 1000000000;
-
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
@@ -926,7 +935,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
- FUSE_WRITEBACK_CACHE;
+ FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -1006,7 +1015,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
- if (!parse_fuse_opt((char *) data, &d, is_bdev))
+ if (!parse_fuse_opt(data, &d, is_bdev))
goto err;
if (is_bdev) {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4fc3a30..26b3f95 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -981,7 +981,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
int error = 0;
state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
- flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
+ flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT;
mutex_lock(&fp->f_fl_mutex);
@@ -991,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
goto out;
flock_lock_file_wait(file,
&(struct file_lock){.fl_type = F_UNLCK});
- gfs2_glock_dq_wait(fl_gh);
+ gfs2_glock_dq(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
} else {
error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c355f73..ee4e04f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -731,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
cachep = gfs2_glock_aspace_cachep;
else
cachep = gfs2_glock_cachep;
- gl = kmem_cache_alloc(cachep, GFP_KERNEL);
+ gl = kmem_cache_alloc(cachep, GFP_NOFS);
if (!gl)
return -ENOMEM;
memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
if (glops->go_flags & GLOF_LVB) {
- gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL);
+ gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS);
if (!gl->gl_lksb.sb_lvbptr) {
kmem_cache_free(cachep, gl);
return -ENOMEM;
@@ -1404,12 +1404,16 @@ __acquires(&lru_lock)
gl = list_entry(list->next, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
if (!spin_trylock(&gl->gl_spin)) {
+add_back_to_lru:
list_add(&gl->gl_lru, &lru_list);
atomic_inc(&lru_count);
continue;
}
+ if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+ spin_unlock(&gl->gl_spin);
+ goto add_back_to_lru;
+ }
clear_bit(GLF_LRU, &gl->gl_flags);
- spin_unlock(&lru_lock);
gl->gl_lockref.count++;
if (demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1417,7 +1421,7 @@ __acquires(&lru_lock)
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gl->gl_lockref.count--;
spin_unlock(&gl->gl_spin);
- spin_lock(&lru_lock);
+ cond_resched_lock(&lru_lock);
}
}
@@ -1442,7 +1446,7 @@ static long gfs2_scan_glock_lru(int nr)
gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
/* Test for being demotable */
- if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+ if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
list_move(&gl->gl_lru, &dispose);
atomic_dec(&lru_count);
freed++;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index fc11007..2ffc67d 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -234,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl)
* inode_go_inval - prepare a inode glock to be released
* @gl: the glock
* @flags:
- *
- * Normally we invlidate everything, but if we are moving into
+ *
+ * Normally we invalidate everything, but if we are moving into
* LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
* can keep hold of the metadata, since it won't have changed.
*
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 91f274d..4fafea1 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1036,8 +1036,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
new_size = old_size + RECOVER_SIZE_INC;
- submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
- result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+ submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
+ result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
if (!submit || !result) {
kfree(submit);
kfree(result);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index db629d1..f4cb9c0 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -337,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
/**
* gfs2_free_extlen - Return extent length of free blocks
- * @rbm: Starting position
+ * @rrbm: Starting position
* @len: Max length to check
*
* Starting at the block specified by the rbm, see how many free blocks
@@ -2522,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
/**
* gfs2_rlist_free - free a resource group list
- * @list: the list of resource groups
+ * @rlist: the list of resource groups
*
*/
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 38cfcf5..6f0f590 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1588,9 +1588,12 @@ int jbd2_journal_stop(handle_t *handle)
* to perform a synchronous write. We do this to detect the
* case where a single process is doing a stream of sync
* writes. No point in waiting for joiners in that case.
+ *
+ * Setting max_batch_time to 0 disables this completely.
*/
pid = current->pid;
- if (handle->h_sync && journal->j_last_sync_writer != pid) {
+ if (handle->h_sync && journal->j_last_sync_writer != pid &&
+ journal->j_max_batch_time) {
u64 commit_time, trans_time;
journal->j_last_sync_writer = pid;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e3d37f6..d895b4b 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -39,6 +39,19 @@ struct kernfs_open_node {
struct list_head files; /* goes through kernfs_open_file.list */
};
+/*
+ * kernfs_notify() may be called from any context and bounces notifications
+ * through a work item. To minimize space overhead in kernfs_node, the
+ * pending queue is implemented as a singly linked list of kernfs_nodes.
+ * The list is terminated with the self pointer so that whether a
+ * kernfs_node is on the list or not can be determined by testing the next
+ * pointer for NULL.
+ */
+#define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
+
+static DEFINE_SPINLOCK(kernfs_notify_lock);
+static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
+
static struct kernfs_open_file *kernfs_of(struct file *file)
{
return ((struct seq_file *)file->private_data)->private;
@@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
return DEFAULT_POLLMASK|POLLERR|POLLPRI;
}
-/**
- * kernfs_notify - notify a kernfs file
- * @kn: file to notify
- *
- * Notify @kn such that poll(2) on @kn wakes up.
- */
-void kernfs_notify(struct kernfs_node *kn)
+static void kernfs_notify_workfn(struct work_struct *work)
{
- struct kernfs_root *root = kernfs_root(kn);
+ struct kernfs_node *kn;
struct kernfs_open_node *on;
struct kernfs_super_info *info;
- unsigned long flags;
-
- if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+repeat:
+ /* pop one off the notify_list */
+ spin_lock_irq(&kernfs_notify_lock);
+ kn = kernfs_notify_list;
+ if (kn == KERNFS_NOTIFY_EOL) {
+ spin_unlock_irq(&kernfs_notify_lock);
return;
+ }
+ kernfs_notify_list = kn->attr.notify_next;
+ kn->attr.notify_next = NULL;
+ spin_unlock_irq(&kernfs_notify_lock);
/* kick poll */
- spin_lock_irqsave(&kernfs_open_node_lock, flags);
+ spin_lock_irq(&kernfs_open_node_lock);
on = kn->attr.open;
if (on) {
@@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn)
wake_up_interruptible(&on->poll);
}
- spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+ spin_unlock_irq(&kernfs_open_node_lock);
/* kick fsnotify */
mutex_lock(&kernfs_mutex);
- list_for_each_entry(info, &root->supers, node) {
+ list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
struct inode *inode;
struct dentry *dentry;
@@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn)
}
mutex_unlock(&kernfs_mutex);
+ kernfs_put(kn);
+ goto repeat;
+}
+
+/**
+ * kernfs_notify - notify a kernfs file
+ * @kn: file to notify
+ *
+ * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any
+ * context.
+ */
+void kernfs_notify(struct kernfs_node *kn)
+{
+ static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
+ unsigned long flags;
+
+ if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+ return;
+
+ spin_lock_irqsave(&kernfs_notify_lock, flags);
+ if (!kn->attr.notify_next) {
+ kernfs_get(kn);
+ kn->attr.notify_next = kernfs_notify_list;
+ kernfs_notify_list = kn;
+ schedule_work(&kernfs_notify_work);
+ }
+ spin_unlock_irqrestore(&kernfs_notify_lock, flags);
}
EXPORT_SYMBOL_GPL(kernfs_notify);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d171b98..f973ae9 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -211,6 +211,36 @@ void kernfs_kill_sb(struct super_block *sb)
kernfs_put(root_kn);
}
+/**
+ * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
+ * @kernfs_root: the kernfs_root in question
+ * @ns: the namespace tag
+ *
+ * Pin the superblock so the superblock won't be destroyed in subsequent
+ * operations. This can be used to block ->kill_sb() which may be useful
+ * for kernfs users which dynamically manage superblocks.
+ *
+ * Returns NULL if there's no superblock associated to this kernfs_root, or
+ * -EINVAL if the superblock is being freed.
+ */
+struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
+{
+ struct kernfs_super_info *info;
+ struct super_block *sb = NULL;
+
+ mutex_lock(&kernfs_mutex);
+ list_for_each_entry(info, &root->supers, node) {
+ if (info->ns == ns) {
+ sb = info->sb;
+ if (!atomic_inc_not_zero(&info->sb->s_active))
+ sb = ERR_PTR(-EINVAL);
+ break;
+ }
+ }
+ mutex_unlock(&kernfs_mutex);
+ return sb;
+}
+
void __init kernfs_init(void)
{
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
diff --git a/fs/locks.c b/fs/locks.c
index 717fbc4..a6f5480 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -325,7 +325,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
return -ENOMEM;
fl->fl_file = filp;
- fl->fl_owner = (fl_owner_t)filp;
+ fl->fl_owner = filp;
fl->fl_pid = current->tgid;
fl->fl_flags = FL_FLOCK;
fl->fl_type = type;
@@ -431,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
if (assign_type(fl, type) != 0)
return -EINVAL;
- fl->fl_owner = (fl_owner_t)current->files;
+ fl->fl_owner = current->files;
fl->fl_pid = current->tgid;
fl->fl_file = filp;
@@ -1155,7 +1155,6 @@ EXPORT_SYMBOL(posix_lock_file_wait);
int locks_mandatory_locked(struct file *file)
{
struct inode *inode = file_inode(file);
- fl_owner_t owner = current->files;
struct file_lock *fl;
/*
@@ -1165,7 +1164,8 @@ int locks_mandatory_locked(struct file *file)
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!IS_POSIX(fl))
continue;
- if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)
+ if (fl->fl_owner != current->files &&
+ fl->fl_owner != file)
break;
}
spin_unlock(&inode->i_lock);
@@ -1205,7 +1205,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
for (;;) {
if (filp) {
- fl.fl_owner = (fl_owner_t)filp;
+ fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
error = __posix_lock_file(inode, &fl, NULL);
if (!error)
@@ -1948,7 +1948,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
cmd = F_GETLK;
file_lock.fl_flags |= FL_OFDLCK;
- file_lock.fl_owner = (fl_owner_t)filp;
+ file_lock.fl_owner = filp;
}
error = vfs_test_lock(filp, &file_lock);
@@ -2103,7 +2103,7 @@ again:
cmd = F_SETLK;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
break;
case F_OFD_SETLKW:
error = -EINVAL;
@@ -2112,7 +2112,7 @@ again:
cmd = F_SETLKW;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
/* Fallthrough */
case F_SETLKW:
file_lock->fl_flags |= FL_SLEEP;
@@ -2170,7 +2170,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
cmd = F_GETLK64;
file_lock.fl_flags |= FL_OFDLCK;
- file_lock.fl_owner = (fl_owner_t)filp;
+ file_lock.fl_owner = filp;
}
error = vfs_test_lock(filp, &file_lock);
@@ -2242,7 +2242,7 @@ again:
cmd = F_SETLK64;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
break;
case F_OFD_SETLKW:
error = -EINVAL;
@@ -2251,7 +2251,7 @@ again:
cmd = F_SETLKW64;
file_lock->fl_flags |= FL_OFDLCK;
- file_lock->fl_owner = (fl_owner_t)filp;
+ file_lock->fl_owner = filp;
/* Fallthrough */
case F_SETLKW64:
file_lock->fl_flags |= FL_SLEEP;
@@ -2324,11 +2324,11 @@ void locks_remove_file(struct file *filp)
if (!inode->i_flock)
return;
- locks_remove_posix(filp, (fl_owner_t)filp);
+ locks_remove_posix(filp, filp);
if (filp->f_op->flock) {
struct file_lock fl = {
- .fl_owner = (fl_owner_t)filp,
+ .fl_owner = filp,
.fl_pid = current->tgid,
.fl_file = filp,
.fl_flags = FL_FLOCK,
diff --git a/fs/mbcache.c b/fs/mbcache.c
index bf166e3..187477d 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -73,6 +73,7 @@
#include <linux/mbcache.h>
#include <linux/init.h>
#include <linux/blockgroup_lock.h>
+#include <linux/log2.h>
#ifdef MB_CACHE_DEBUG
# define mb_debug(f...) do { \
@@ -93,7 +94,7 @@
#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
-#define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS)
+#define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS)
#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
(hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
diff --git a/fs/namei.c b/fs/namei.c
index 985c6f3..9eb787e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2256,9 +2256,10 @@ done:
goto out;
}
path->dentry = dentry;
- path->mnt = mntget(nd->path.mnt);
+ path->mnt = nd->path.mnt;
if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
return 1;
+ mntget(path->mnt);
follow_mount(path);
error = 0;
out:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f98138..f11b9ee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -756,7 +756,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
- bool do_destroy = true;
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
@@ -765,7 +764,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
case NFS_IOHDR_NEED_COMMIT:
kref_get(&req->wb_kref);
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
- do_destroy = false;
}
nfs_unlock_and_release_request(req);
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf4..f415cbf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
const struct rpc_call_ops *, int, int);
+void nfs_free_request(struct nfs_page *req);
static inline void nfs_iocounter_init(struct nfs_io_counter *c)
{
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 871d6ed..8f854dd 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -247,3 +247,46 @@ const struct xattr_handler *nfs3_xattr_handlers[] = {
&posix_acl_default_xattr_handler,
NULL,
};
+
+static int
+nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
+ size_t size, ssize_t *result)
+{
+ struct posix_acl *acl;
+ char *p = data + *result;
+
+ acl = get_acl(inode, type);
+ if (!acl)
+ return 0;
+
+ posix_acl_release(acl);
+
+ *result += strlen(name);
+ *result += 1;
+ if (!size)
+ return 0;
+ if (*result > size)
+ return -ERANGE;
+
+ strcpy(p, name);
+ return 0;
+}
+
+ssize_t
+nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ ssize_t result = 0;
+ int error;
+
+ error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
+ POSIX_ACL_XATTR_ACCESS, data, size, &result);
+ if (error)
+ return error;
+
+ error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
+ POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+ if (error)
+ return error;
+ return result;
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e7daa42..f0afa29 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,7 +885,7 @@ static const struct inode_operations nfs3_dir_inode_operations = {
.getattr = nfs_getattr,
.setattr = nfs_setattr,
#ifdef CONFIG_NFS_V3_ACL
- .listxattr = generic_listxattr,
+ .listxattr = nfs3_listxattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
.removexattr = generic_removexattr,
@@ -899,7 +899,7 @@ static const struct inode_operations nfs3_file_inode_operations = {
.getattr = nfs_getattr,
.setattr = nfs_setattr,
#ifdef CONFIG_NFS_V3_ACL
- .listxattr = generic_listxattr,
+ .listxattr = nfs3_listxattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
.removexattr = generic_removexattr,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b6ee3a6..17fab89 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,8 +29,6 @@
static struct kmem_cache *nfs_page_cachep;
static const struct rpc_call_ops nfs_pgio_common_ops;
-static void nfs_free_request(struct nfs_page *);
-
static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
{
p->npages = pagecount;
@@ -239,20 +237,28 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
WARN_ON_ONCE(prev == req);
if (!prev) {
+ /* a head request */
req->wb_head = req;
req->wb_this_page = req;
} else {
+ /* a subrequest */
WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
req->wb_head = prev->wb_head;
req->wb_this_page = prev->wb_this_page;
prev->wb_this_page = req;
+ /* All subrequests take a ref on the head request until
+ * nfs_page_group_destroy is called */
+ kref_get(&req->wb_head->wb_kref);
+
/* grab extra ref if head request has extra ref from
* the write/commit path to handle handoff between write
* and commit lists */
- if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+ set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
+ }
}
}
@@ -269,6 +275,10 @@ nfs_page_group_destroy(struct kref *kref)
struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
struct nfs_page *tmp, *next;
+ /* subrequests must release the ref on the head request */
+ if (req->wb_head != req)
+ nfs_release_request(req->wb_head);
+
if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
return;
@@ -394,7 +404,7 @@ static void nfs_clear_request(struct nfs_page *req)
*
* Note: Should never be called with the spinlock held!
*/
-static void nfs_free_request(struct nfs_page *req)
+void nfs_free_request(struct nfs_page *req)
{
WARN_ON_ONCE(req->wb_this_page != req);
@@ -925,7 +935,6 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
nfs_pageio_doio(desc);
if (desc->pg_error < 0)
return 0;
- desc->pg_moreio = 0;
if (desc->pg_recoalesce)
return 0;
/* retry add_request for this subreq */
@@ -972,6 +981,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
desc->pg_count = 0;
desc->pg_base = 0;
desc->pg_recoalesce = 0;
+ desc->pg_moreio = 0;
while (!list_empty(&head)) {
struct nfs_page *req;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 98ff061..5e2f1030 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_clear_request_commit(struct nfs_page *req);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -91,8 +92,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}
+/*
+ * nfs_page_find_head_request_locked - find head request associated with @page
+ *
+ * must be called while holding the inode lock.
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
static struct nfs_page *
-nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
{
struct nfs_page *req = NULL;
@@ -104,25 +112,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
/* Linearly search the commit list for the correct req */
list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
if (freq->wb_page == page) {
- req = freq;
+ req = freq->wb_head;
break;
}
}
}
- if (req)
+ if (req) {
+ WARN_ON_ONCE(req->wb_head != req);
+
kref_get(&req->wb_kref);
+ }
return req;
}
-static struct nfs_page *nfs_page_find_request(struct page *page)
+/*
+ * nfs_page_find_head_request - find head request associated with @page
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
+static struct nfs_page *nfs_page_find_head_request(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req = NULL;
spin_lock(&inode->i_lock);
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
spin_unlock(&inode->i_lock);
return req;
}
@@ -274,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
-static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
+
+/* nfs_page_group_clear_bits
+ * @req - an nfs request
+ * clears all page group related bits from @req
+ */
+static void
+nfs_page_group_clear_bits(struct nfs_page *req)
+{
+ clear_bit(PG_TEARDOWN, &req->wb_flags);
+ clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
+ clear_bit(PG_UPTODATE, &req->wb_flags);
+ clear_bit(PG_WB_END, &req->wb_flags);
+ clear_bit(PG_REMOVE, &req->wb_flags);
+}
+
+
+/*
+ * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req
+ *
+ * this is a helper function for nfs_lock_and_join_requests
+ *
+ * @inode - inode associated with request page group, must be holding inode lock
+ * @head - head request of page group, must be holding head lock
+ * @req - request that couldn't lock and needs to wait on the req bit lock
+ * @nonblock - if true, don't actually wait
+ *
+ * NOTE: this must be called holding page_group bit lock and inode spin lock
+ * and BOTH will be released before returning.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int
+nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
+ struct nfs_page *req, bool nonblock)
+ __releases(&inode->i_lock)
+{
+ struct nfs_page *tmp;
+ int ret;
+
+ /* relinquish all the locks successfully grabbed this run */
+ for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
+ nfs_unlock_request(tmp);
+
+ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+
+ /* grab a ref on the request that will be waited on */
+ kref_get(&req->wb_kref);
+
+ nfs_page_group_unlock(head);
+ spin_unlock(&inode->i_lock);
+
+ /* release ref from nfs_page_find_head_request_locked */
+ nfs_release_request(head);
+
+ if (!nonblock)
+ ret = nfs_wait_on_request(req);
+ else
+ ret = -EAGAIN;
+ nfs_release_request(req);
+
+ return ret;
+}
+
+/*
+ * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
+ *
+ * @destroy_list - request list (using wb_this_page) terminated by @old_head
+ * @old_head - the old head of the list
+ *
+ * All subrequests must be locked and removed from all lists, so at this point
+ * they are only "active" in this function, and possibly in nfs_wait_on_request
+ * with a reference held by some other context.
+ */
+static void
+nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+ struct nfs_page *old_head)
+{
+ while (destroy_list) {
+ struct nfs_page *subreq = destroy_list;
+
+ destroy_list = (subreq->wb_this_page == old_head) ?
+ NULL : subreq->wb_this_page;
+
+ WARN_ON_ONCE(old_head != subreq->wb_head);
+
+ /* make sure old group is not used */
+ subreq->wb_head = subreq;
+ subreq->wb_this_page = subreq;
+
+ nfs_clear_request_commit(subreq);
+
+ /* subreq is now totally disconnected from page group or any
+ * write / commit lists. last chance to wake any waiters */
+ nfs_unlock_request(subreq);
+
+ if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+ /* release ref on old head request */
+ nfs_release_request(old_head);
+
+ nfs_page_group_clear_bits(subreq);
+
+ /* release the PG_INODE_REF reference */
+ if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+ nfs_release_request(subreq);
+ else
+ WARN_ON_ONCE(1);
+ } else {
+ WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
+ /* zombie requests have already released the last
+ * reference and were waiting on the rest of the
+ * group to complete. Since it's no longer part of a
+ * group, simply free the request */
+ nfs_page_group_clear_bits(subreq);
+ nfs_free_request(subreq);
+ }
+ }
+}
+
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req and return
+ * a locked reference, cancelling any pending
+ * operations for this page.
+ *
+ * @page - the page used to lookup the "page group" of nfs_page structures
+ * @nonblock - if true, don't block waiting for request locks
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group. All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page, bool nonblock)
{
struct inode *inode = page_file_mapping(page)->host;
- struct nfs_page *req;
+ struct nfs_page *head, *subreq;
+ struct nfs_page *destroy_list = NULL;
+ unsigned int total_bytes;
int ret;
+try_again:
+ total_bytes = 0;
+
+ WARN_ON_ONCE(destroy_list);
+
spin_lock(&inode->i_lock);
- for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req))
- break;
- /* Note: If we hold the page lock, as is the case in nfs_writepage,
- * then the call to nfs_lock_request() will always
- * succeed provided that someone hasn't already marked the
- * request as dirty (in which case we don't care).
- */
+
+ /*
+ * A reference is taken only on the head request which acts as a
+ * reference to the whole page group - the group will not be destroyed
+ * until the head reference is released.
+ */
+ head = nfs_page_find_head_request_locked(NFS_I(inode), page);
+
+ if (!head) {
spin_unlock(&inode->i_lock);
- if (!nonblock)
- ret = nfs_wait_on_request(req);
- else
- ret = -EAGAIN;
- nfs_release_request(req);
- if (ret != 0)
+ return NULL;
+ }
+
+ /* lock each request in the page group */
+ nfs_page_group_lock(head);
+ subreq = head;
+ do {
+ /*
+ * Subrequests are always contiguous, non overlapping
+ * and in order. If not, it's a programming error.
+ */
+ WARN_ON_ONCE(subreq->wb_offset !=
+ (head->wb_offset + total_bytes));
+
+ /* keep track of how many bytes this group covers */
+ total_bytes += subreq->wb_bytes;
+
+ if (!nfs_lock_request(subreq)) {
+ /* releases page group bit lock and
+ * inode spin lock and all references */
+ ret = nfs_unroll_locks_and_wait(inode, head,
+ subreq, nonblock);
+
+ if (ret == 0)
+ goto try_again;
+
return ERR_PTR(ret);
- spin_lock(&inode->i_lock);
+ }
+
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* Now that all requests are locked, make sure they aren't on any list.
+ * Commit list removal accounting is done after locks are dropped */
+ subreq = head;
+ do {
+ nfs_list_remove_request(subreq);
+ subreq = subreq->wb_this_page;
+ } while (subreq != head);
+
+ /* unlink subrequests from head, destroy them later */
+ if (head->wb_this_page != head) {
+ /* destroy list will be terminated by head */
+ destroy_list = head->wb_this_page;
+ head->wb_this_page = head;
+
+ /* change head request to cover whole range that
+ * the former page group covered */
+ head->wb_bytes = total_bytes;
}
+
+ /*
+ * prepare head request to be added to new pgio descriptor
+ */
+ nfs_page_group_clear_bits(head);
+
+ /*
+ * some part of the group was still on the inode list - otherwise
+ * the group wouldn't be involved in async write.
+ * grab a reference for the head request, iff it needs one.
+ */
+ if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+ kref_get(&head->wb_kref);
+
+ nfs_page_group_unlock(head);
+
+ /* drop lock to clear_request_commit the head req and clean up
+ * requests on destroy list */
spin_unlock(&inode->i_lock);
- return req;
+
+ nfs_destroy_unlinked_subrequests(destroy_list, head);
+
+ /* clean up commit list state */
+ nfs_clear_request_commit(head);
+
+ /* still holds ref on head from nfs_page_find_head_request_locked
+ * and still has lock on head from lock loop */
+ return head;
}
/*
@@ -316,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req;
int ret = 0;
- req = nfs_find_and_lock_request(page, nonblock);
+ req = nfs_lock_and_join_requests(page, nonblock);
if (!req)
goto out;
ret = PTR_ERR(req);
@@ -448,7 +674,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
set_page_private(req->wb_page, (unsigned long)req);
}
nfsi->npages++;
- set_bit(PG_INODE_REF, &req->wb_flags);
+ /* this a head request for a page group - mark it as having an
+ * extra reference so sub groups can follow suit */
+ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
}
@@ -474,7 +702,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
nfsi->npages--;
spin_unlock(&inode->i_lock);
}
- nfs_release_request(req);
+
+ if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+ nfs_release_request(req);
}
static void
@@ -638,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
unsigned long bytes = 0;
- bool do_destroy;
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
@@ -668,7 +897,6 @@ remove_req:
next:
nfs_unlock_request(req);
nfs_end_page_writeback(req);
- do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
nfs_release_request(req);
}
out:
@@ -769,7 +997,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
spin_lock(&inode->i_lock);
for (;;) {
- req = nfs_page_find_request_locked(NFS_I(inode), page);
+ req = nfs_page_find_head_request_locked(NFS_I(inode), page);
if (req == NULL)
goto out_unlock;
@@ -877,7 +1105,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
* dropped page.
*/
do {
- req = nfs_page_find_request(page);
+ req = nfs_page_find_head_request(page);
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
@@ -1569,27 +1797,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
struct nfs_page *req;
int ret = 0;
- for (;;) {
- wait_on_page_writeback(page);
- req = nfs_page_find_request(page);
- if (req == NULL)
- break;
- if (nfs_lock_request(req)) {
- nfs_clear_request_commit(req);
- nfs_inode_remove_request(req);
- /*
- * In case nfs_inode_remove_request has marked the
- * page as being dirty
- */
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
- nfs_unlock_and_release_request(req);
- break;
- }
- ret = nfs_wait_on_request(req);
- nfs_release_request(req);
- if (ret < 0)
- break;
+ wait_on_page_writeback(page);
+
+ /* blocking call to cancel all requests and join to a single (head)
+ * request */
+ req = nfs_lock_and_join_requests(page, false);
+
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ } else if (req) {
+ /* all requests from this page have been cancelled by
+ * nfs_lock_and_join_requests, so just remove the head
+ * request from the inode / page_private pointer and
+ * release it */
+ nfs_inode_remove_request(req);
+ /*
+ * In case nfs_inode_remove_request has marked the
+ * page as being dirty
+ */
+ cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ nfs_unlock_and_release_request(req);
}
+
return ret;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6851b00..8f029db 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -617,15 +617,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
switch (create->cr_type) {
case NF4LNK:
- /* ugh! we have to null-terminate the linktext, or
- * vfs_symlink() will choke. it is always safe to
- * null-terminate by brute force, since at worst we
- * will overwrite the first byte of the create namelen
- * in the XDR buffer, which has already been extracted
- * during XDR decode.
- */
- create->cr_linkname[create->cr_linklen] = 0;
-
status = nfsd_symlink(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
create->cr_linkname, create->cr_linklen,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 83baf2b..944275c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -600,7 +600,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
READ_BUF(4);
create->cr_linklen = be32_to_cpup(p++);
READ_BUF(create->cr_linklen);
- SAVEMEM(create->cr_linkname, create->cr_linklen);
+ /*
+ * The VFS will want a null-terminated string, and
+ * null-terminating in place isn't safe since this might
+ * end on a page boundary:
+ */
+ create->cr_linkname =
+ kmalloc(create->cr_linklen + 1, GFP_KERNEL);
+ if (!create->cr_linkname)
+ return nfserr_jukebox;
+ memcpy(create->cr_linkname, p, create->cr_linklen);
+ create->cr_linkname[create->cr_linklen] = '\0';
+ defer_free(argp, kfree, create->cr_linkname);
break;
case NF4BLK:
case NF4CHR:
@@ -2630,7 +2641,7 @@ nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
{
__be32 *p;
- p = xdr_reserve_space(xdr, 6);
+ p = xdr_reserve_space(xdr, 20);
if (!p)
return NULL;
*p++ = htonl(2);
@@ -2868,6 +2879,7 @@ again:
* return the conflicting open:
*/
if (conf->len) {
+ kfree(conf->data);
conf->len = 0;
conf->data = NULL;
goto again;
@@ -2880,6 +2892,7 @@ again:
if (conf->len) {
p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8);
p = xdr_encode_opaque(p, conf->data, conf->len);
+ kfree(conf->data);
} else { /* non - nfsv4 lock in conflict, no clientid nor owner */
p = xdr_encode_hyper(p, (u64)0); /* clientid */
*p++ = cpu_to_be32(0); /* length of owner name */
@@ -2896,7 +2909,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid);
else if (nfserr == nfserr_denied)
nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied);
- kfree(lock->lk_denied.ld_owner.data);
+
return nfserr;
}
@@ -3267,7 +3280,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
wire_count = htonl(maxcount);
write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
- xdr_truncate_encode(xdr, length_offset + 4 + maxcount);
+ xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4));
if (maxcount & 3)
write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
&zero, 4 - (maxcount&3));
diff --git a/fs/open.c b/fs/open.c
index 36662d0..d6fd3ac 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -263,11 +263,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EPERM;
/*
- * We can not allow to do any fallocate operation on an active
- * swapfile
+ * We cannot allow any fallocate operation on an active swapfile
*/
if (IS_SWAPFILE(inode))
- ret = -ETXTBSY;
+ return -ETXTBSY;
/*
* Revalidate the write permissions, in case security policy has
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 9d231e9..bf2d03f 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p, void *v)
static int stat_open(struct inode *inode, struct file *file)
{
- size_t size = 1024 + 128 * num_possible_cpus();
- char *buf;
- struct seq_file *m;
- int res;
+ size_t size = 1024 + 128 * num_online_cpus();
/* minimum size to display an interrupt count : 2 bytes */
size += 2 * nr_irqs;
-
- /* don't ask for more than the kmalloc() max size */
- if (size > KMALLOC_MAX_SIZE)
- size = KMALLOC_MAX_SIZE;
- buf = kmalloc(size, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- res = single_open(file, show_stat, NULL);
- if (!res) {
- m = file->private_data;
- m->buf = buf;
- m->size = ksize(buf);
- } else
- kfree(buf);
- return res;
+ return single_open_size(file, show_stat, NULL, size);
}
static const struct file_operations proc_stat_operations = {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 9cd5f63..7f30bdc 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -702,6 +702,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
struct dquot *dquot;
unsigned long freed = 0;
+ spin_lock(&dq_list_lock);
head = free_dquots.prev;
while (head != &free_dquots && sc->nr_to_scan) {
dquot = list_entry(head, struct dquot, dq_free);
@@ -713,6 +714,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
freed++;
head = free_dquots.prev;
}
+ spin_unlock(&dq_list_lock);
return freed;
}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1d641bb..3857b72 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -8,8 +8,10 @@
#include <linux/fs.h>
#include <linux/export.h>
#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/cred.h>
+#include <linux/mm.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m)
m->count = m->size;
}
+static void *seq_buf_alloc(unsigned long size)
+{
+ void *buf;
+
+ buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
+ if (!buf && size > PAGE_SIZE)
+ buf = vmalloc(size);
+ return buf;
+}
+
/**
* seq_open - initialize sequential file
* @file: file we initialize
@@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
return 0;
}
if (!m->buf) {
- m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
if (!m->buf)
return -ENOMEM;
}
@@ -135,9 +147,9 @@ static int traverse(struct seq_file *m, loff_t offset)
Eoverflow:
m->op->stop(m, p);
- kfree(m->buf);
+ kvfree(m->buf);
m->count = 0;
- m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size <<= 1);
return !m->buf ? -ENOMEM : -EAGAIN;
}
@@ -192,7 +204,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
/* grab buffer if we didn't have one */
if (!m->buf) {
- m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
if (!m->buf)
goto Enomem;
}
@@ -232,9 +244,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
if (m->count < m->size)
goto Fill;
m->op->stop(m, p);
- kfree(m->buf);
+ kvfree(m->buf);
m->count = 0;
- m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+ m->buf = seq_buf_alloc(m->size <<= 1);
if (!m->buf)
goto Enomem;
m->version = 0;
@@ -350,7 +362,7 @@ EXPORT_SYMBOL(seq_lseek);
int seq_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
- kfree(m->buf);
+ kvfree(m->buf);
kfree(m);
return 0;
}
@@ -605,13 +617,13 @@ EXPORT_SYMBOL(single_open);
int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
void *data, size_t size)
{
- char *buf = kmalloc(size, GFP_KERNEL);
+ char *buf = seq_buf_alloc(size);
int ret;
if (!buf)
return -ENOMEM;
ret = single_open(file, show, data);
if (ret) {
- kfree(buf);
+ kvfree(buf);
return ret;
}
((struct seq_file *)file->private_data)->buf = buf;
diff --git a/fs/xattr.c b/fs/xattr.c
index 3377dff..c69e6d4 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -843,7 +843,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
/* wrap around? */
len = sizeof(*new_xattr) + size;
- if (len <= sizeof(*new_xattr))
+ if (len < sizeof(*new_xattr))
return NULL;
new_xattr = kmalloc(len, GFP_KERNEL);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 96175df..75c3fe5 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4298,8 +4298,8 @@ xfs_bmapi_delay(
}
-int
-__xfs_bmapi_allocate(
+static int
+xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
@@ -4578,9 +4578,6 @@ xfs_bmapi_write(
bma.flist = flist;
bma.firstblock = firstblock;
- if (flags & XFS_BMAPI_STACK_SWITCH)
- bma.stack_switch = 1;
-
while (bno < end && n < *nmap) {
inhole = eof || bma.got.br_startoff > bno;
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 38ba36e..b879ca5 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,7 +77,6 @@ typedef struct xfs_bmap_free
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x040
-#define XFS_BMAPI_STACK_SWITCH 0x080
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -86,8 +85,7 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
- { XFS_BMAPI_CONVERT, "CONVERT" }, \
- { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
+ { XFS_BMAPI_CONVERT, "CONVERT" }
static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 703b3ec..64731ef 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -249,59 +249,6 @@ xfs_bmap_rtalloc(
}
/*
- * Stack switching interfaces for allocation
- */
-static void
-xfs_bmapi_allocate_worker(
- struct work_struct *work)
-{
- struct xfs_bmalloca *args = container_of(work,
- struct xfs_bmalloca, work);
- unsigned long pflags;
- unsigned long new_pflags = PF_FSTRANS;
-
- /*
- * we are in a transaction context here, but may also be doing work
- * in kswapd context, and hence we may need to inherit that state
- * temporarily to ensure that we don't block waiting for memory reclaim
- * in any way.
- */
- if (args->kswapd)
- new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
-
- current_set_flags_nested(&pflags, new_pflags);
-
- args->result = __xfs_bmapi_allocate(args);
- complete(args->done);
-
- current_restore_flags_nested(&pflags, new_pflags);
-}
-
-/*
- * Some allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Otherwise just
- * call directly to avoid the context switch overhead here.
- */
-int
-xfs_bmapi_allocate(
- struct xfs_bmalloca *args)
-{
- DECLARE_COMPLETION_ONSTACK(done);
-
- if (!args->stack_switch)
- return __xfs_bmapi_allocate(args);
-
-
- args->done = &done;
- args->kswapd = current_is_kswapd();
- INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
- queue_work(xfs_alloc_wq, &args->work);
- wait_for_completion(&done);
- destroy_work_on_stack(&args->work);
- return args->result;
-}
-
-/*
* Check if the endoff is outside the last extent. If so the caller will grow
* the allocation to a stripe unit boundary. All offsets are considered outside
* the end of file for an empty fork, so 1 is returned in *eof in that case.
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 075f722..2fdb72d 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -55,8 +55,6 @@ struct xfs_bmalloca {
bool userdata;/* set if is user data */
bool aeof; /* allocated space at eof */
bool conv; /* overwriting unwritten extents */
- bool stack_switch;
- bool kswapd; /* allocation in kswapd context */
int flags;
struct completion *done;
struct work_struct work;
@@ -66,8 +64,6 @@ struct xfs_bmalloca {
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
-int xfs_bmapi_allocate(struct xfs_bmalloca *args);
-int __xfs_bmapi_allocate(struct xfs_bmalloca *args);
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index bf810c6..cf893bc 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -33,6 +33,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
+#include "xfs_alloc.h"
/*
* Cursor allocation zone.
@@ -2323,7 +2324,7 @@ error1:
* record (to be inserted into parent).
*/
STATIC int /* error */
-xfs_btree_split(
+__xfs_btree_split(
struct xfs_btree_cur *cur,
int level,
union xfs_btree_ptr *ptrp,
@@ -2503,6 +2504,85 @@ error0:
return error;
}
+struct xfs_btree_split_args {
+ struct xfs_btree_cur *cur;
+ int level;
+ union xfs_btree_ptr *ptrp;
+ union xfs_btree_key *key;
+ struct xfs_btree_cur **curp;
+ int *stat; /* success/failure */
+ int result;
+ bool kswapd; /* allocation in kswapd context */
+ struct completion *done;
+ struct work_struct work;
+};
+
+/*
+ * Stack switching interfaces for allocation
+ */
+static void
+xfs_btree_split_worker(
+ struct work_struct *work)
+{
+ struct xfs_btree_split_args *args = container_of(work,
+ struct xfs_btree_split_args, work);
+ unsigned long pflags;
+ unsigned long new_pflags = PF_FSTRANS;
+
+ /*
+ * we are in a transaction context here, but may also be doing work
+ * in kswapd context, and hence we may need to inherit that state
+ * temporarily to ensure that we don't block waiting for memory reclaim
+ * in any way.
+ */
+ if (args->kswapd)
+ new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+
+ current_set_flags_nested(&pflags, new_pflags);
+
+ args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
+ args->key, args->curp, args->stat);
+ complete(args->done);
+
+ current_restore_flags_nested(&pflags, new_pflags);
+}
+
+/*
+ * BMBT split requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. For the other
+ * btree types, just call directly to avoid the context switch overhead here.
+ */
+STATIC int /* error */
+xfs_btree_split(
+ struct xfs_btree_cur *cur,
+ int level,
+ union xfs_btree_ptr *ptrp,
+ union xfs_btree_key *key,
+ struct xfs_btree_cur **curp,
+ int *stat) /* success/failure */
+{
+ struct xfs_btree_split_args args;
+ DECLARE_COMPLETION_ONSTACK(done);
+
+ if (cur->bc_btnum != XFS_BTNUM_BMAP)
+ return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
+
+ args.cur = cur;
+ args.level = level;
+ args.ptrp = ptrp;
+ args.key = key;
+ args.curp = curp;
+ args.stat = stat;
+ args.done = &done;
+ args.kswapd = current_is_kswapd();
+ INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
+ queue_work(xfs_alloc_wq, &args.work);
+ wait_for_completion(&done);
+ destroy_work_on_stack(&args.work);
+ return args.result;
+}
+
+
/*
* Copy the old inode root contents into a real block and make the
* broot point to it.
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6c5eb4c..6d3ec2b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -749,8 +749,7 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us.
*/
error = xfs_bmapi_write(tp, ip, map_start_fsb,
- count_fsb,
- XFS_BMAPI_STACK_SWITCH,
+ count_fsb, 0,
&first_block, 1,
imap, &nimaps, &free_list);
if (error)
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index c3453b1..7703fa6 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -483,10 +483,16 @@ xfs_sb_quota_to_disk(
}
/*
- * GQUOTINO and PQUOTINO cannot be used together in versions
- * of superblock that do not have pquotino. from->sb_flags
- * tells us which quota is active and should be copied to
- * disk.
+ * GQUOTINO and PQUOTINO cannot be used together in versions of
+ * superblock that do not have pquotino. from->sb_flags tells us which
+ * quota is active and should be copied to disk. If neither are active,
+ * make sure we write NULLFSINO to the sb_gquotino field as a quota
+ * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
+ * bit is set.
+ *
+ * Note that we don't need to handle the sb_uquotino or sb_pquotino here
+ * as they do not require any translation. Hence the main sb field loop
+ * will write them appropriately from the in-core superblock.
*/
if ((*fields & XFS_SB_GQUOTINO) &&
(from->sb_qflags & XFS_GQUOTA_ACCT))
@@ -494,6 +500,17 @@ xfs_sb_quota_to_disk(
else if ((*fields & XFS_SB_PQUOTINO) &&
(from->sb_qflags & XFS_PQUOTA_ACCT))
to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
+ else {
+ /*
+ * We can't rely on just the fields being logged to tell us
+ * that it is safe to write NULLFSINO - we should only do that
+ * if quotas are not actually enabled. Hence only write
+ * NULLFSINO if both in-core quota inodes are NULL.
+ */
+ if (from->sb_gquotino == NULLFSINO &&
+ from->sb_pquotino == NULLFSINO)
+ to->sb_gquotino = cpu_to_be64(NULLFSINO);
+ }
*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
}
OpenPOWER on IntegriCloud