summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c1
-rw-r--r--fs/Kconfig.binfmt4
-rw-r--r--fs/aio.c207
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/block_dev.c272
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent_io.c16
-rw-r--r--fs/btrfs/inode.c11
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/super.c26
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/buffer.c26
-rw-r--r--fs/ceph/dir.c24
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/cifs/cifsencrypt.c11
-rw-r--r--fs/cifs/cifssmb.c4
-rw-r--r--fs/cifs/connect.c26
-rw-r--r--fs/cifs/transport.c1
-rw-r--r--fs/coredump.c3
-rw-r--r--fs/dax.c10
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/dlm/ast.c2
-rw-r--r--fs/dlm/config.c2
-rw-r--r--fs/dlm/debug_fs.c2
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/dlm/lowcomms.c28
-rw-r--r--fs/dlm/main.c2
-rw-r--r--fs/dlm/netlink.c18
-rw-r--r--fs/dlm/user.c1
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext4/mmp.c6
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/f2fs/acl.c2
-rw-r--r--fs/f2fs/checkpoint.c38
-rw-r--r--fs/f2fs/data.c208
-rw-r--r--fs/f2fs/debug.c29
-rw-r--r--fs/f2fs/dir.c30
-rw-r--r--fs/f2fs/extent_cache.c2
-rw-r--r--fs/f2fs/f2fs.h199
-rw-r--r--fs/f2fs/file.c67
-rw-r--r--fs/f2fs/gc.c35
-rw-r--r--fs/f2fs/inline.c16
-rw-r--r--fs/f2fs/inode.c47
-rw-r--r--fs/f2fs/namei.c6
-rw-r--r--fs/f2fs/node.c230
-rw-r--r--fs/f2fs/node.h13
-rw-r--r--fs/f2fs/recovery.c46
-rw-r--r--fs/f2fs/segment.c240
-rw-r--r--fs/f2fs/segment.h28
-rw-r--r--fs/f2fs/shrinker.c10
-rw-r--r--fs/f2fs/super.c283
-rw-r--r--fs/f2fs/xattr.c4
-rw-r--r--fs/fs-writeback.c16
-rw-r--r--fs/fuse/dir.c12
-rw-r--r--fs/fuse/file.c6
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c3
-rw-r--r--fs/gfs2/dir.c1
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/meta_io.c7
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/isofs/compress.c1
-rw-r--r--fs/isofs/rock.c4
-rw-r--r--fs/jbd2/checkpoint.c2
-rw-r--r--fs/jbd2/commit.c9
-rw-r--r--fs/jbd2/journal.c15
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_logmgr.c4
-rw-r--r--fs/kernfs/inode.c4
-rw-r--r--fs/lockd/netns.h2
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/logfs/dev_bdev.c106
-rw-r--r--fs/mpage.c6
-rw-r--r--fs/nfs/callback.c2
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/netns.h2
-rw-r--r--fs/nfs/nfs4_fs.h7
-rw-r--r--fs/nfs/nfs4proc.c38
-rw-r--r--fs/nfs/nfs4session.c12
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs_common/grace.c2
-rw-r--r--fs/nfsd/netns.h2
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/nsfs.c2
-rw-r--r--fs/ntfs/aops.c1
-rw-r--r--fs/ntfs/dir.c2
-rw-r--r--fs/ntfs/logfile.c1
-rw-r--r--fs/ntfs/mft.c1
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/aops.h3
-rw-r--r--fs/ocfs2/buffer_head_io.c1
-rw-r--r--fs/ocfs2/cluster/heartbeat.c4
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c11
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/journal.c4
-rw-r--r--fs/ocfs2/mmap.c3
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/refcounttree.c1
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/orangefs/inode.c1
-rw-r--r--fs/orangefs/orangefs-debugfs.c149
-rw-r--r--fs/orangefs/orangefs-mod.c6
-rw-r--r--fs/overlayfs/super.c6
-rw-r--r--fs/proc/array.c7
-rw-r--r--fs/proc/base.c31
-rw-r--r--fs/proc/generic.c1
-rw-r--r--fs/proc/inode.c37
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/root.c1
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--fs/pstore/Kconfig2
-rw-r--r--fs/pstore/ftrace.c11
-rw-r--r--fs/pstore/inode.c15
-rw-r--r--fs/pstore/internal.h34
-rw-r--r--fs/pstore/platform.c5
-rw-r--r--fs/pstore/ram.c327
-rw-r--r--fs/pstore/ram_core.c27
-rw-r--r--fs/quota/netlink.c10
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/reiserfs/stree.c1
-rw-r--r--fs/splice.c9
-rw-r--r--fs/squashfs/block.c1
-rw-r--r--fs/udf/dir.c1
-rw-r--r--fs/udf/directory.c1
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/ufs/balloc.c1
-rw-r--r--fs/xattr.c22
-rw-r--r--fs/xfs/libxfs/xfs_defer.c17
-rw-r--r--fs/xfs/xfs_aops.c7
-rw-r--r--fs/xfs/xfs_buf.c2
144 files changed, 2194 insertions, 1176 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6181ad7..5ca1fb0 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -34,6 +34,7 @@
#include <linux/idr.h>
#include <linux/sched.h>
#include <linux/uio.h>
+#include <linux/bvec.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 4c09d93..b2f82cf 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -170,8 +170,8 @@ config BINFMT_MISC
You can do other nice things, too. Read the file
<file:Documentation/binfmt_misc.txt> to learn how to use this
- feature, <file:Documentation/java.txt> for information about how
- to include Java support. and <file:Documentation/mono.txt> for
+ feature, <file:Documentation/admin-guide/java.rst> for information about how
+ to include Java support. and <file:Documentation/admin-guide/mono.rst> for
information about how to include Mono-based .NET support.
To use binfmt_misc, you will need to mount it:
diff --git a/fs/aio.c b/fs/aio.c
index 1157e13..428484f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1078,6 +1078,17 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
unsigned tail, pos, head;
unsigned long flags;
+ if (kiocb->ki_flags & IOCB_WRITE) {
+ struct file *file = kiocb->ki_filp;
+
+ /*
+ * Tell lockdep we inherited freeze protection from submission
+ * thread.
+ */
+ __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ file_end_write(file);
+ }
+
/*
* Special case handling for sync iocbs:
* - events go directly into the iocb for fast handling
@@ -1392,122 +1403,106 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
return -EINVAL;
}
-typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
-
-static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
- struct iovec **iovec,
- bool compat,
- struct iov_iter *iter)
+static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
+ bool vectored, bool compat, struct iov_iter *iter)
{
+ void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
+ size_t len = iocb->aio_nbytes;
+
+ if (!vectored) {
+ ssize_t ret = import_single_range(rw, buf, len, *iovec, iter);
+ *iovec = NULL;
+ return ret;
+ }
#ifdef CONFIG_COMPAT
if (compat)
- return compat_import_iovec(rw,
- (struct compat_iovec __user *)buf,
- len, UIO_FASTIOV, iovec, iter);
+ return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
+ iter);
#endif
- return import_iovec(rw, (struct iovec __user *)buf,
- len, UIO_FASTIOV, iovec, iter);
+ return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
}
-/*
- * aio_run_iocb:
- * Performs the initial checks and io submission.
- */
-static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
- char __user *buf, size_t len, bool compat)
+static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
+{
+ switch (ret) {
+ case -EIOCBQUEUED:
+ return ret;
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /*
+ * There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ ret = -EINTR;
+ /*FALLTHRU*/
+ default:
+ aio_complete(req, ret, 0);
+ return 0;
+ }
+}
+
+static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
+ bool compat)
{
struct file *file = req->ki_filp;
- ssize_t ret;
- int rw;
- fmode_t mode;
- rw_iter_op *iter_op;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
+ ssize_t ret;
- switch (opcode) {
- case IOCB_CMD_PREAD:
- case IOCB_CMD_PREADV:
- mode = FMODE_READ;
- rw = READ;
- iter_op = file->f_op->read_iter;
- goto rw_common;
-
- case IOCB_CMD_PWRITE:
- case IOCB_CMD_PWRITEV:
- mode = FMODE_WRITE;
- rw = WRITE;
- iter_op = file->f_op->write_iter;
- goto rw_common;
-rw_common:
- if (unlikely(!(file->f_mode & mode)))
- return -EBADF;
-
- if (!iter_op)
- return -EINVAL;
-
- if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
- ret = aio_setup_vectored_rw(rw, buf, len,
- &iovec, compat, &iter);
- else {
- ret = import_single_range(rw, buf, len, iovec, &iter);
- iovec = NULL;
- }
- if (!ret)
- ret = rw_verify_area(rw, file, &req->ki_pos,
- iov_iter_count(&iter));
- if (ret < 0) {
- kfree(iovec);
- return ret;
- }
-
- if (rw == WRITE)
- file_start_write(file);
-
- ret = iter_op(req, &iter);
-
- if (rw == WRITE)
- file_end_write(file);
- kfree(iovec);
- break;
-
- case IOCB_CMD_FDSYNC:
- if (!file->f_op->aio_fsync)
- return -EINVAL;
-
- ret = file->f_op->aio_fsync(req, 1);
- break;
+ if (unlikely(!(file->f_mode & FMODE_READ)))
+ return -EBADF;
+ if (unlikely(!file->f_op->read_iter))
+ return -EINVAL;
- case IOCB_CMD_FSYNC:
- if (!file->f_op->aio_fsync)
- return -EINVAL;
+ ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
+ if (ret)
+ return ret;
+ ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
+ if (!ret)
+ ret = aio_ret(req, file->f_op->read_iter(req, &iter));
+ kfree(iovec);
+ return ret;
+}
- ret = file->f_op->aio_fsync(req, 0);
- break;
+static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
+ bool compat)
+{
+ struct file *file = req->ki_filp;
+ struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+ struct iov_iter iter;
+ ssize_t ret;
- default:
- pr_debug("EINVAL: no operation provided\n");
+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
+ return -EBADF;
+ if (unlikely(!file->f_op->write_iter))
return -EINVAL;
- }
- if (ret != -EIOCBQUEUED) {
+ ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
+ if (ret)
+ return ret;
+ ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
+ if (!ret) {
+ req->ki_flags |= IOCB_WRITE;
+ file_start_write(file);
+ ret = aio_ret(req, file->f_op->write_iter(req, &iter));
/*
- * There's no easy way to restart the syscall since other AIO's
- * may be already running. Just fail this IO with EINTR.
+ * We release freeze protection in aio_complete(). Fool lockdep
+ * by telling it the lock got released so that it doesn't
+ * complain about held lock when we return to userspace.
*/
- if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
- ret == -ERESTARTNOHAND ||
- ret == -ERESTART_RESTARTBLOCK))
- ret = -EINTR;
- aio_complete(req, ret, 0);
+ __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}
-
- return 0;
+ kfree(iovec);
+ return ret;
}
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
struct iocb *iocb, bool compat)
{
struct aio_kiocb *req;
+ struct file *file;
ssize_t ret;
/* enforce forwards compatibility on users */
@@ -1530,7 +1525,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;
- req->common.ki_filp = fget(iocb->aio_fildes);
+ req->common.ki_filp = file = fget(iocb->aio_fildes);
if (unlikely(!req->common.ki_filp)) {
ret = -EBADF;
goto out_put_req;
@@ -1565,13 +1560,29 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb->aio_data;
- ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
- (char __user *)(unsigned long)iocb->aio_buf,
- iocb->aio_nbytes,
- compat);
- if (ret)
- goto out_put_req;
+ get_file(file);
+ switch (iocb->aio_lio_opcode) {
+ case IOCB_CMD_PREAD:
+ ret = aio_read(&req->common, iocb, false, compat);
+ break;
+ case IOCB_CMD_PWRITE:
+ ret = aio_write(&req->common, iocb, false, compat);
+ break;
+ case IOCB_CMD_PREADV:
+ ret = aio_read(&req->common, iocb, true, compat);
+ break;
+ case IOCB_CMD_PWRITEV:
+ ret = aio_write(&req->common, iocb, true, compat);
+ break;
+ default:
+ pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
+ ret = -EINVAL;
+ break;
+ }
+ fput(file);
+ if (ret && ret != -EIOCBQUEUED)
+ goto out_put_req;
return 0;
out_put_req:
put_reqs_available(ctx, 1);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 2472af2..e6c1bd4 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2204,7 +2204,9 @@ static int elf_core_dump(struct coredump_params *cprm)
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
- vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
+ if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
+ goto end_coredump;
+ vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
if (!vma_filesz)
goto end_coredump;
@@ -2311,7 +2313,7 @@ end_coredump:
cleanup:
free_note_info(&info);
kfree(shdr4extnum);
- kfree(vma_filesz);
+ vfree(vma_filesz);
kfree(phdr4note);
kfree(elf);
out:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 05b5533..95acbd2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -30,6 +30,7 @@
#include <linux/cleancache.h>
#include <linux/dax.h>
#include <linux/badblocks.h>
+#include <linux/task_io_accounting_ops.h>
#include <linux/falloc.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -175,17 +176,273 @@ static struct inode *bdev_file_inode(struct file *file)
return file->f_mapping->host;
}
+static unsigned int dio_bio_write_op(struct kiocb *iocb)
+{
+ unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
+
+ /* avoid the need for a I/O completion work item */
+ if (iocb->ki_flags & IOCB_DSYNC)
+ op |= REQ_FUA;
+ return op;
+}
+
+#define DIO_INLINE_BIO_VECS 4
+
+static void blkdev_bio_end_io_simple(struct bio *bio)
+{
+ struct task_struct *waiter = bio->bi_private;
+
+ WRITE_ONCE(bio->bi_private, NULL);
+ wake_up_process(waiter);
+}
+
static ssize_t
-blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
+ int nr_pages)
+{
+ struct file *file = iocb->ki_filp;
+ struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+ struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec;
+ loff_t pos = iocb->ki_pos;
+ bool should_dirty = false;
+ struct bio bio;
+ ssize_t ret;
+ blk_qc_t qc;
+ int i;
+
+ if ((pos | iov_iter_alignment(iter)) &
+ (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
+ if (nr_pages <= DIO_INLINE_BIO_VECS)
+ vecs = inline_vecs;
+ else {
+ vecs = kmalloc(nr_pages * sizeof(struct bio_vec), GFP_KERNEL);
+ if (!vecs)
+ return -ENOMEM;
+ }
+
+ bio_init(&bio, vecs, nr_pages);
+ bio.bi_bdev = bdev;
+ bio.bi_iter.bi_sector = pos >> 9;
+ bio.bi_private = current;
+ bio.bi_end_io = blkdev_bio_end_io_simple;
+
+ ret = bio_iov_iter_get_pages(&bio, iter);
+ if (unlikely(ret))
+ return ret;
+ ret = bio.bi_iter.bi_size;
+
+ if (iov_iter_rw(iter) == READ) {
+ bio.bi_opf = REQ_OP_READ;
+ if (iter_is_iovec(iter))
+ should_dirty = true;
+ } else {
+ bio.bi_opf = dio_bio_write_op(iocb);
+ task_io_account_write(ret);
+ }
+
+ qc = submit_bio(&bio);
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!READ_ONCE(bio.bi_private))
+ break;
+ if (!(iocb->ki_flags & IOCB_HIPRI) ||
+ !blk_mq_poll(bdev_get_queue(bdev), qc))
+ io_schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ bio_for_each_segment_all(bvec, &bio, i) {
+ if (should_dirty && !PageCompound(bvec->bv_page))
+ set_page_dirty_lock(bvec->bv_page);
+ put_page(bvec->bv_page);
+ }
+
+ if (vecs != inline_vecs)
+ kfree(vecs);
+
+ if (unlikely(bio.bi_error))
+ return bio.bi_error;
+ iocb->ki_pos += ret;
+ return ret;
+}
+
+struct blkdev_dio {
+ union {
+ struct kiocb *iocb;
+ struct task_struct *waiter;
+ };
+ size_t size;
+ atomic_t ref;
+ bool multi_bio : 1;
+ bool should_dirty : 1;
+ bool is_sync : 1;
+ struct bio bio;
+};
+
+static struct bio_set *blkdev_dio_pool __read_mostly;
+
+static void blkdev_bio_end_io(struct bio *bio)
+{
+ struct blkdev_dio *dio = bio->bi_private;
+ bool should_dirty = dio->should_dirty;
+
+ if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
+ if (bio->bi_error && !dio->bio.bi_error)
+ dio->bio.bi_error = bio->bi_error;
+ } else {
+ if (!dio->is_sync) {
+ struct kiocb *iocb = dio->iocb;
+ ssize_t ret = dio->bio.bi_error;
+
+ if (likely(!ret)) {
+ ret = dio->size;
+ iocb->ki_pos += ret;
+ }
+
+ dio->iocb->ki_complete(iocb, ret, 0);
+ bio_put(&dio->bio);
+ } else {
+ struct task_struct *waiter = dio->waiter;
+
+ WRITE_ONCE(dio->waiter, NULL);
+ wake_up_process(waiter);
+ }
+ }
+
+ if (should_dirty) {
+ bio_check_pages_dirty(bio);
+ } else {
+ struct bio_vec *bvec;
+ int i;
+
+ bio_for_each_segment_all(bvec, bio, i)
+ put_page(bvec->bv_page);
+ bio_put(bio);
+ }
+}
+
+static ssize_t
+__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
{
struct file *file = iocb->ki_filp;
struct inode *inode = bdev_file_inode(file);
+ struct block_device *bdev = I_BDEV(inode);
+ struct blkdev_dio *dio;
+ struct bio *bio;
+ bool is_read = (iov_iter_rw(iter) == READ);
+ loff_t pos = iocb->ki_pos;
+ blk_qc_t qc = BLK_QC_T_NONE;
+ int ret;
+
+ if ((pos | iov_iter_alignment(iter)) &
+ (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
+ bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool);
+ bio_get(bio); /* extra ref for the completion handler */
+
+ dio = container_of(bio, struct blkdev_dio, bio);
+ dio->is_sync = is_sync_kiocb(iocb);
+ if (dio->is_sync)
+ dio->waiter = current;
+ else
+ dio->iocb = iocb;
+
+ dio->size = 0;
+ dio->multi_bio = false;
+ dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
+
+ for (;;) {
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = pos >> 9;
+ bio->bi_private = dio;
+ bio->bi_end_io = blkdev_bio_end_io;
+
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (unlikely(ret)) {
+ bio->bi_error = ret;
+ bio_endio(bio);
+ break;
+ }
+
+ if (is_read) {
+ bio->bi_opf = REQ_OP_READ;
+ if (dio->should_dirty)
+ bio_set_pages_dirty(bio);
+ } else {
+ bio->bi_opf = dio_bio_write_op(iocb);
+ task_io_account_write(bio->bi_iter.bi_size);
+ }
+
+ dio->size += bio->bi_iter.bi_size;
+ pos += bio->bi_iter.bi_size;
+
+ nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
+ if (!nr_pages) {
+ qc = submit_bio(bio);
+ break;
+ }
+
+ if (!dio->multi_bio) {
+ dio->multi_bio = true;
+ atomic_set(&dio->ref, 2);
+ } else {
+ atomic_inc(&dio->ref);
+ }
- return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter,
- blkdev_get_block, NULL, NULL,
- DIO_SKIP_DIO_COUNT);
+ submit_bio(bio);
+ bio = bio_alloc(GFP_KERNEL, nr_pages);
+ }
+
+ if (!dio->is_sync)
+ return -EIOCBQUEUED;
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!READ_ONCE(dio->waiter))
+ break;
+
+ if (!(iocb->ki_flags & IOCB_HIPRI) ||
+ !blk_mq_poll(bdev_get_queue(bdev), qc))
+ io_schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ ret = dio->bio.bi_error;
+ if (likely(!ret)) {
+ ret = dio->size;
+ iocb->ki_pos += ret;
+ }
+
+ bio_put(&dio->bio);
+ return ret;
}
+static ssize_t
+blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+ int nr_pages;
+
+ nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
+ if (!nr_pages)
+ return 0;
+ if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
+ return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
+
+ return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
+}
+
+static __init int blkdev_init(void)
+{
+ blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+ if (!blkdev_dio_pool)
+ return -ENOMEM;
+ return 0;
+}
+module_init(blkdev_init);
+
int __sync_blockdev(struct block_device *bdev, int wait)
{
if (!bdev)
@@ -1950,6 +2207,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
spin_lock(&blockdev_superblock->s_inode_list_lock);
list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
struct address_space *mapping = inode->i_mapping;
+ struct block_device *bdev;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
@@ -1970,8 +2228,12 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
*/
iput(old_inode);
old_inode = inode;
+ bdev = I_BDEV(inode);
- func(I_BDEV(inode), arg);
+ mutex_lock(&bdev->bd_mutex);
+ if (bdev->bd_openers)
+ func(bdev, arg);
+ mutex_unlock(&bdev->bd_mutex);
spin_lock(&blockdev_superblock->s_inode_list_lock);
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3a57f99..fe10afd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -930,7 +930,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
atomic_inc(&fs_info->nr_async_submits);
- if (bio->bi_opf & REQ_SYNC)
+ if (op_is_sync(bio->bi_opf))
btrfs_set_work_high_priority(&async->work);
btrfs_queue_work(fs_info->workers, &async->work);
@@ -3485,9 +3485,9 @@ static int write_dev_supers(struct btrfs_device *device,
* to go down lazy.
*/
if (i == 0)
- ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh);
+ ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh);
else
- ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
+ ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
if (ret)
errors++;
}
@@ -3551,7 +3551,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
bio->bi_end_io = btrfs_end_empty_barrier;
bio->bi_bdev = device->bdev;
- bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+ bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
device->flush_bio = bio;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8ed05d9..1e67723 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -127,7 +127,7 @@ struct extent_page_data {
*/
unsigned int extent_locked:1;
- /* tells the submit_bio code to use a WRITE_SYNC */
+ /* tells the submit_bio code to use REQ_SYNC */
unsigned int sync_io:1;
};
@@ -2047,7 +2047,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
return -EIO;
}
bio->bi_bdev = dev->bdev;
- bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
bio_add_page(bio, page, length, pg_offset);
if (btrfsic_submit_bio_wait(bio)) {
@@ -2388,7 +2388,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct inode *inode = page->mapping->host;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct bio *bio;
- int read_mode;
+ int read_mode = 0;
int ret;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2404,9 +2404,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
}
if (failed_bio->bi_vcnt > 1)
- read_mode = READ_SYNC | REQ_FAILFAST_DEV;
- else
- read_mode = READ_SYNC;
+ read_mode |= REQ_FAILFAST_DEV;
phy_offset >>= inode->i_sb->s_blocksize_bits;
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
@@ -3484,7 +3482,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
unsigned long nr_written = 0;
if (wbc->sync_mode == WB_SYNC_ALL)
- write_flags = WRITE_SYNC;
+ write_flags = REQ_SYNC;
trace___extent_writepage(page, inode, wbc);
@@ -3729,7 +3727,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
unsigned long i, num_pages;
unsigned long bio_flags = 0;
unsigned long start, end;
- int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;
+ int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
int ret = 0;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -4076,7 +4074,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
int ret;
bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
- epd->sync_io ? WRITE_SYNC : 0);
+ epd->sync_io ? REQ_SYNC : 0);
ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
BUG_ON(ret < 0); /* -ENOMEM */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8e3a5a2..a4c8796 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7917,7 +7917,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec;
struct bio *bio;
int isector;
- int read_mode;
+ int read_mode = 0;
int ret;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -7936,9 +7936,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
if ((failed_bio->bi_vcnt > 1)
|| (failed_bio->bi_io_vec->bv_len
> BTRFS_I(inode)->root->sectorsize))
- read_mode = READ_SYNC | REQ_FAILFAST_DEV;
- else
- read_mode = READ_SYNC;
+ read_mode |= REQ_FAILFAST_DEV;
isector = start - btrfs_io_bio(failed_bio)->logical;
isector >>= inode->i_sb->s_blocksize_bits;
@@ -8427,7 +8425,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
if (!bio)
return -ENOMEM;
- bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio));
+ bio->bi_opf = orig_bio->bi_opf;
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
@@ -8465,8 +8463,7 @@ next_block:
start_sector, GFP_NOFS);
if (!bio)
goto out_err;
- bio_set_op_attrs(bio, bio_op(orig_bio),
- bio_flags(orig_bio));
+ bio->bi_opf = orig_bio->bi_opf;
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index fffb9ab..ff30782 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4440,7 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
- bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
ret = bio_add_page(bio, page, PAGE_SIZE, 0);
if (ret != PAGE_SIZE) {
leave_with_eio:
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 74ed5aa..180f910 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -202,27 +202,31 @@ static struct ratelimit_state printk_limits[] = {
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
struct super_block *sb = fs_info->sb;
- char lvl[4];
+ char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1];
struct va_format vaf;
va_list args;
- const char *type = logtypes[4];
+ const char *type = NULL;
int kern_level;
struct ratelimit_state *ratelimit;
va_start(args, fmt);
- kern_level = printk_get_level(fmt);
- if (kern_level) {
+ while ((kern_level = printk_get_level(fmt)) != 0) {
size_t size = printk_skip_level(fmt) - fmt;
- memcpy(lvl, fmt, size);
- lvl[size] = '\0';
+
+ if (kern_level >= '0' && kern_level <= '7') {
+ memcpy(lvl, fmt, size);
+ lvl[size] = '\0';
+ type = logtypes[kern_level - '0'];
+ ratelimit = &printk_limits[kern_level - '0'];
+ }
fmt += size;
- type = logtypes[kern_level - '0'];
- ratelimit = &printk_limits[kern_level - '0'];
- } else {
+ }
+
+ if (!type) {
*lvl = '\0';
- /* Default to debug output */
- ratelimit = &printk_limits[7];
+ type = logtypes[4];
+ ratelimit = &printk_limits[4];
}
vaf.fmt = fmt;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 71a60cc..0d7d635 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6023,7 +6023,7 @@ static void btrfs_end_bio(struct bio *bio)
else
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_READ_ERRS);
- if ((bio->bi_opf & WRITE_FLUSH) == WRITE_FLUSH)
+ if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_FLUSH_ERRS);
btrfs_dev_stat_print_on_error(dev);
@@ -6100,7 +6100,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
bio->bi_next = NULL;
spin_lock(&device->io_lock);
- if (bio->bi_opf & REQ_SYNC)
+ if (op_is_sync(bio->bi_opf))
pending_bios = &device->pending_sync_bios;
else
pending_bios = &device->pending_bios;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 09ed29c..f137ffe 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -62,7 +62,7 @@ struct btrfs_device {
int running_pending;
/* regular prio bios */
struct btrfs_pending_bios pending_bios;
- /* WRITE_SYNC bios */
+ /* sync bios */
struct btrfs_pending_bios pending_sync_bios;
struct block_device *bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index b205a62..a3bfd57 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -753,7 +753,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
* still in flight on potentially older
* contents.
*/
- write_dirty_buffer(bh, WRITE_SYNC);
+ write_dirty_buffer(bh, REQ_SYNC);
/*
* Kick off IO for the previous mapping. Note
@@ -1684,7 +1684,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode *
* prevents this contention from occurring.
*
* If block_write_full_page() is called with wbc->sync_mode ==
- * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
+ * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
* causes the writes to be flagged as synchronous writes.
*/
int __block_write_full_page(struct inode *inode, struct page *page,
@@ -1697,7 +1697,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
struct buffer_head *bh, *head;
unsigned int blocksize, bbits;
int nr_underway = 0;
- int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
+ int write_flags = wbc_to_write_flags(wbc);
head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -3118,7 +3118,7 @@ EXPORT_SYMBOL(submit_bh);
/**
* ll_rw_block: low-level access to block devices (DEPRECATED)
* @op: whether to %READ or %WRITE
- * @op_flags: rq_flag_bits
+ * @op_flags: req_flag_bits
* @nr: number of &struct buffer_heads in the array
* @bhs: array of pointers to &struct buffer_head
*
@@ -3210,7 +3210,7 @@ EXPORT_SYMBOL(__sync_dirty_buffer);
int sync_dirty_buffer(struct buffer_head *bh)
{
- return __sync_dirty_buffer(bh, WRITE_SYNC);
+ return __sync_dirty_buffer(bh, REQ_SYNC);
}
EXPORT_SYMBOL(sync_dirty_buffer);
@@ -3403,7 +3403,7 @@ void free_buffer_head(struct buffer_head *bh)
}
EXPORT_SYMBOL(free_buffer_head);
-static void buffer_exit_cpu(int cpu)
+static int buffer_exit_cpu_dead(unsigned int cpu)
{
int i;
struct bh_lru *b = &per_cpu(bh_lrus, cpu);
@@ -3414,14 +3414,7 @@ static void buffer_exit_cpu(int cpu)
}
this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
per_cpu(bh_accounting, cpu).nr = 0;
-}
-
-static int buffer_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
- buffer_exit_cpu((unsigned long)hcpu);
- return NOTIFY_OK;
+ return 0;
}
/**
@@ -3471,6 +3464,7 @@ EXPORT_SYMBOL(bh_submit_read);
void __init buffer_init(void)
{
unsigned long nrpages;
+ int ret;
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
@@ -3483,5 +3477,7 @@ void __init buffer_init(void)
*/
nrpages = (nr_free_buffer_pages() * 10) / 100;
max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
- hotcpu_notifier(buffer_cpu_notify, 0);
+ ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
+ NULL, buffer_exit_cpu_dead);
+ WARN_ON(ret < 0);
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 78180d1..a594c78 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1261,26 +1261,30 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
op = ceph_snap(dir) == CEPH_SNAPDIR ?
- CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
+ CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
if (!IS_ERR(req)) {
req->r_dentry = dget(dentry);
- req->r_num_caps = 2;
+ req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2;
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = mask;
- req->r_locked_dir = dir;
err = ceph_mdsc_do_request(mdsc, NULL, req);
- if (err == 0 || err == -ENOENT) {
- if (dentry == req->r_dentry) {
- valid = !d_unhashed(dentry);
- } else {
- d_invalidate(req->r_dentry);
- err = -EAGAIN;
- }
+ switch (err) {
+ case 0:
+ if (d_really_is_positive(dentry) &&
+ d_inode(dentry) == req->r_target_inode)
+ valid = 1;
+ break;
+ case -ENOENT:
+ if (d_really_is_negative(dentry))
+ valid = 1;
+ /* Fallthrough */
+ default:
+ break;
}
ceph_mdsc_put_request(req);
dout("d_revalidate %p lookup result=%d\n",
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 18630e8..f995e35 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1770,7 +1770,6 @@ const struct file_operations ceph_file_fops = {
.fsync = ceph_fsync,
.lock = ceph_lock,
.flock = ceph_flock,
- .splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = ceph_ioctl,
.compat_ioctl = ceph_ioctl,
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 8347c90..5eb0412 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -808,7 +808,11 @@ calc_seckey(struct cifs_ses *ses)
struct crypto_skcipher *tfm_arc4;
struct scatterlist sgin, sgout;
struct skcipher_request *req;
- unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
+ unsigned char *sec_key;
+
+ sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL);
+ if (sec_key == NULL)
+ return -ENOMEM;
get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
@@ -816,7 +820,7 @@ calc_seckey(struct cifs_ses *ses)
if (IS_ERR(tfm_arc4)) {
rc = PTR_ERR(tfm_arc4);
cifs_dbg(VFS, "could not allocate crypto API arc4\n");
- return rc;
+ goto out;
}
rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response,
@@ -854,7 +858,8 @@ calc_seckey(struct cifs_ses *ses)
out_free_cipher:
crypto_free_skcipher(tfm_arc4);
-
+out:
+ kfree(sec_key);
return rc;
}
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3f3185f..e3fed92 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3427,6 +3427,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
__u16 rc = 0;
struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)parm_data;
struct posix_acl_xattr_header *local_acl = (void *)pACL;
+ struct posix_acl_xattr_entry *ace = (void *)(local_acl + 1);
int count;
int i;
@@ -3453,8 +3454,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
return 0;
}
for (i = 0; i < count; i++) {
- rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i],
- (struct posix_acl_xattr_entry *)(local_acl + 1));
+ rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i], &ace[i]);
if (rc != 0) {
/* ACE not converted */
break;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index aab5227..f7563c8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -41,6 +41,7 @@
#include <keys/user-type.h>
#include <net/ipv6.h>
#include <linux/parser.h>
+#include <linux/bvec.h>
#include "cifspdu.h"
#include "cifsglob.h"
@@ -412,6 +413,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
}
} while (server->tcpStatus == CifsNeedReconnect);
+ if (server->tcpStatus == CifsNeedNegotiate)
+ mod_delayed_work(cifsiod_wq, &server->echo, 0);
+
return rc;
}
@@ -421,17 +425,25 @@ cifs_echo_request(struct work_struct *work)
int rc;
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, echo.work);
- unsigned long echo_interval = server->echo_interval;
+ unsigned long echo_interval;
+
+ /*
+ * If we need to renegotiate, set echo interval to zero to
+ * immediately call echo service where we can renegotiate.
+ */
+ if (server->tcpStatus == CifsNeedNegotiate)
+ echo_interval = 0;
+ else
+ echo_interval = server->echo_interval;
/*
- * We cannot send an echo if it is disabled or until the
- * NEGOTIATE_PROTOCOL request is done, which is indicated by
- * server->ops->need_neg() == true. Also, no need to ping if
- * we got a response recently.
+ * We cannot send an echo if it is disabled.
+ * Also, no need to ping if we got a response recently.
*/
if (server->tcpStatus == CifsNeedReconnect ||
- server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew ||
+ server->tcpStatus == CifsExiting ||
+ server->tcpStatus == CifsNew ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
time_before(jiffies, server->lstrp + echo_interval - HZ))
goto requeue_echo;
@@ -442,7 +454,7 @@ cifs_echo_request(struct work_struct *work)
server->hostname);
requeue_echo:
- queue_delayed_work(cifsiod_wq, &server->echo, echo_interval);
+ queue_delayed_work(cifsiod_wq, &server->echo, server->echo_interval);
}
static bool
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 206a597..5f02edc 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -28,6 +28,7 @@
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/tcp.h>
+#include <linux/bvec.h>
#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index 281b768..eb9c92c 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -1,6 +1,7 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/freezer.h>
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
@@ -423,7 +424,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
if (core_waiters > 0) {
struct core_thread *ptr;
+ freezer_do_not_count();
wait_for_completion(&core_state->startup);
+ freezer_count();
/*
* Wait for all the threads to become inactive, so that
* all the thread context (extended register state, like
diff --git a/fs/dax.c b/fs/dax.c
index 5bfd27b..5ae8e11 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -183,7 +183,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
entry |= RADIX_DAX_ENTRY_LOCK;
- radix_tree_replace_slot(slot, (void *)entry);
+ radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
return (void *)entry;
}
@@ -197,7 +197,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
- radix_tree_replace_slot(slot, (void *)entry);
+ radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
return (void *)entry;
}
@@ -588,12 +588,14 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
* existing entry is a PMD, we will just leave the PMD in the
* tree and dirty it if necessary.
*/
+ struct radix_tree_node *node;
void **slot;
void *ret;
- ret = __radix_tree_lookup(page_tree, index, NULL, &slot);
+ ret = __radix_tree_lookup(page_tree, index, &node, &slot);
WARN_ON_ONCE(ret != entry);
- radix_tree_replace_slot(slot, new_entry);
+ __radix_tree_replace(page_tree, node, slot,
+ new_entry, NULL, NULL);
}
if (vmf->flags & FAULT_FLAG_WRITE)
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index fb9aa16..835e23a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -457,7 +457,7 @@ static struct bio *dio_await_one(struct dio *dio)
dio->waiter = current;
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
+ !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
io_schedule();
/* wake up sets us TASK_RUNNING */
spin_lock_irqsave(&dio->bio_lock, flags);
@@ -1209,7 +1209,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
dio->inode = inode;
if (iov_iter_rw(iter) == WRITE) {
dio->op = REQ_OP_WRITE;
- dio->op_flags = WRITE_ODIRECT;
+ dio->op_flags = REQ_SYNC | REQ_IDLE;
} else {
dio->op = REQ_OP_READ;
}
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index dcea1e3..07fed83 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -268,7 +268,7 @@ void dlm_callback_work(struct work_struct *work)
int dlm_callback_start(struct dlm_ls *ls)
{
ls->ls_callback_wq = alloc_workqueue("dlm_callback",
- WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+ WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
if (!ls->ls_callback_wq) {
log_print("can't start dlm_callback workqueue");
return -ENOMEM;
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index df955d2..7211e82 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -12,7 +12,7 @@
******************************************************************************/
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/configfs.h>
#include <linux/slab.h>
#include <linux/in.h>
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 466f7d6..ca7089a 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -12,7 +12,7 @@
#include <linux/pagemap.h>
#include <linux/seq_file.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 216b616..b670f56 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -18,7 +18,6 @@
* This is the main header file to be included in each DLM source file.
*/
-#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/types.h>
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f3e7278..91592b7 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -11,6 +11,8 @@
*******************************************************************************
******************************************************************************/
+#include <linux/module.h>
+
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 609998d..7d398d3 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -519,29 +519,25 @@ out:
/* Note: sk_callback_lock must be locked before calling this function. */
static void save_callbacks(struct connection *con, struct sock *sk)
{
- lock_sock(sk);
con->orig_data_ready = sk->sk_data_ready;
con->orig_state_change = sk->sk_state_change;
con->orig_write_space = sk->sk_write_space;
con->orig_error_report = sk->sk_error_report;
- release_sock(sk);
}
static void restore_callbacks(struct connection *con, struct sock *sk)
{
write_lock_bh(&sk->sk_callback_lock);
- lock_sock(sk);
sk->sk_user_data = NULL;
sk->sk_data_ready = con->orig_data_ready;
sk->sk_state_change = con->orig_state_change;
sk->sk_write_space = con->orig_write_space;
sk->sk_error_report = con->orig_error_report;
- release_sock(sk);
write_unlock_bh(&sk->sk_callback_lock);
}
/* Make a socket active */
-static void add_sock(struct socket *sock, struct connection *con)
+static void add_sock(struct socket *sock, struct connection *con, bool save_cb)
{
struct sock *sk = sock->sk;
@@ -549,7 +545,7 @@ static void add_sock(struct socket *sock, struct connection *con)
con->sock = sock;
sk->sk_user_data = con;
- if (!test_bit(CF_IS_OTHERCON, &con->flags))
+ if (save_cb)
save_callbacks(con, sk);
/* Install a data_ready callback */
sk->sk_data_ready = lowcomms_data_ready;
@@ -806,7 +802,7 @@ static int tcp_accept_from_sock(struct connection *con)
newcon->othercon = othercon;
othercon->sock = newsock;
newsock->sk->sk_user_data = othercon;
- add_sock(newsock, othercon);
+ add_sock(newsock, othercon, false);
addcon = othercon;
}
else {
@@ -819,7 +815,10 @@ static int tcp_accept_from_sock(struct connection *con)
else {
newsock->sk->sk_user_data = newcon;
newcon->rx_action = receive_from_sock;
- add_sock(newsock, newcon);
+ /* accept copies the sk after we've saved the callbacks, so we
+ don't want to save them a second time or comm errors will
+ result in calling sk_error_report recursively. */
+ add_sock(newsock, newcon, false);
addcon = newcon;
}
@@ -880,7 +879,8 @@ static int sctp_accept_from_sock(struct connection *con)
}
make_sockaddr(&prim.ssp_addr, 0, &addr_len);
- if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
+ ret = addr_to_nodeid(&prim.ssp_addr, &nodeid);
+ if (ret) {
unsigned char *b = (unsigned char *)&prim.ssp_addr;
log_print("reject connect from unknown addr");
@@ -919,7 +919,7 @@ static int sctp_accept_from_sock(struct connection *con)
newcon->othercon = othercon;
othercon->sock = newsock;
newsock->sk->sk_user_data = othercon;
- add_sock(newsock, othercon);
+ add_sock(newsock, othercon, false);
addcon = othercon;
} else {
printk("Extra connection from node %d attempted\n", nodeid);
@@ -930,7 +930,7 @@ static int sctp_accept_from_sock(struct connection *con)
} else {
newsock->sk->sk_user_data = newcon;
newcon->rx_action = receive_from_sock;
- add_sock(newsock, newcon);
+ add_sock(newsock, newcon, false);
addcon = newcon;
}
@@ -1058,7 +1058,7 @@ static void sctp_connect_to_sock(struct connection *con)
sock->sk->sk_user_data = con;
con->rx_action = receive_from_sock;
con->connect_action = sctp_connect_to_sock;
- add_sock(sock, con);
+ add_sock(sock, con, true);
/* Bind to all addresses. */
if (sctp_bind_addrs(con, 0))
@@ -1146,7 +1146,7 @@ static void tcp_connect_to_sock(struct connection *con)
sock->sk->sk_user_data = con;
con->rx_action = receive_from_sock;
con->connect_action = tcp_connect_to_sock;
- add_sock(sock, con);
+ add_sock(sock, con, true);
/* Bind to our cluster-known address connecting to avoid
routing problems */
@@ -1366,7 +1366,7 @@ static int tcp_listen_for_all(void)
sock = tcp_create_listen_sock(con, dlm_local_addr[0]);
if (sock) {
- add_sock(sock, con);
+ add_sock(sock, con, true);
result = 0;
}
else {
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 079c0bd..8e1b618 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -11,6 +11,8 @@
*******************************************************************************
******************************************************************************/
+#include <linux/module.h>
+
#include "dlm_internal.h"
#include "lockspace.h"
#include "lock.h"
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 1e6e227..43a96c3 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -16,11 +16,7 @@
static uint32_t dlm_nl_seqnum;
static uint32_t listener_nlportid;
-static struct genl_family family = {
- .id = GENL_ID_GENERATE,
- .name = DLM_GENL_NAME,
- .version = DLM_GENL_VERSION,
-};
+static struct genl_family family;
static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
{
@@ -69,16 +65,24 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-static struct genl_ops dlm_nl_ops[] = {
+static const struct genl_ops dlm_nl_ops[] = {
{
.cmd = DLM_CMD_HELLO,
.doit = user_cmd,
},
};
+static struct genl_family family __ro_after_init = {
+ .name = DLM_GENL_NAME,
+ .version = DLM_GENL_VERSION,
+ .ops = dlm_nl_ops,
+ .n_ops = ARRAY_SIZE(dlm_nl_ops),
+ .module = THIS_MODULE,
+};
+
int __init dlm_netlink_init(void)
{
- return genl_register_family_with_ops(&family, dlm_nl_ops);
+ return genl_register_family(&family);
}
void dlm_netlink_exit(void)
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 58c2f4a..1ce908c 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -9,7 +9,6 @@
#include <linux/miscdevice.h>
#include <linux/init.h>
#include <linux/wait.h>
-#include <linux/module.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/poll.h>
diff --git a/fs/exec.c b/fs/exec.c
index 4e497b9..923c57d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1169,8 +1169,10 @@ no_thread_group:
/* we have changed execution domain */
tsk->exit_signal = SIGCHLD;
+#ifdef CONFIG_POSIX_TIMERS
exit_itimers(sig);
flush_itimer_signals();
+#endif
if (atomic_read(&oldsighand->count) != 1) {
struct sighand_struct *newsighand;
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index d89754e..eb98356 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -35,7 +35,7 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
}
/*
- * Write the MMP block using WRITE_SYNC to try to get the block on-disk
+ * Write the MMP block using REQ_SYNC to try to get the block on-disk
* faster.
*/
static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
@@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh);
+ submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
sb_end_write(sb);
if (unlikely(!buffer_uptodate(bh)))
@@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
get_bh(*bh);
lock_buffer(*bh);
(*bh)->b_end_io = end_buffer_read_sync;
- submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh);
+ submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, *bh);
wait_on_buffer(*bh);
if (!buffer_uptodate(*bh)) {
ret = -EIO;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 902a3e3..e2332a6 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -340,7 +340,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
if (bio) {
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
- WRITE_SYNC : 0;
+ REQ_SYNC : 0;
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
submit_bio(io->io_bio);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 79af71d4..dfc8309 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4616,7 +4616,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
if (sync) {
unlock_buffer(sbh);
error = __sync_dirty_buffer(sbh,
- test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
+ test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC);
if (error)
return error;
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 6fe23af..8f48769 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -384,7 +384,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
if (error)
return error;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
if (default_acl) {
error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 7e9b504..f73ee95 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -65,7 +65,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.sbi = sbi,
.type = META,
.op = REQ_OP_READ,
- .op_flags = READ_SYNC | REQ_META | REQ_PRIO,
+ .op_flags = REQ_META | REQ_PRIO,
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
@@ -160,7 +160,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.sbi = sbi,
.type = META,
.op = REQ_OP_READ,
- .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
+ .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
};
struct blk_plug plug;
@@ -228,7 +228,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
f2fs_put_page(page, 0);
if (readahead)
- ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
+ ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
}
static int f2fs_write_meta_page(struct page *page,
@@ -770,7 +770,12 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
/* Sanity checking of checkpoint */
if (sanity_check_ckpt(sbi))
- goto fail_no_cp;
+ goto free_fail_no_cp;
+
+ if (cur_page == cp1)
+ sbi->cur_cp_pack = 1;
+ else
+ sbi->cur_cp_pack = 2;
if (cp_blks <= 1)
goto done;
@@ -793,6 +798,9 @@ done:
f2fs_put_page(cp2, 1);
return 0;
+free_fail_no_cp:
+ f2fs_put_page(cp1, 1);
+ f2fs_put_page(cp2, 1);
fail_no_cp:
kfree(sbi->ckpt);
return -EINVAL;
@@ -921,7 +929,11 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
if (inode) {
- update_inode_page(inode);
+ sync_inode_metadata(inode, 0);
+
+ /* it's on eviction */
+ if (is_inode_flag_set(inode, FI_DIRTY_INODE))
+ update_inode_page(inode);
iput(inode);
}
};
@@ -987,7 +999,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
- build_free_nids(sbi);
+ build_free_nids(sbi, false);
f2fs_unlock_all(sbi);
}
@@ -998,7 +1010,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
for (;;) {
prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&sbi->nr_wb_bios))
+ if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
io_schedule_timeout(5*HZ);
@@ -1123,7 +1135,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
- start_blk = __start_cp_addr(sbi);
+ start_blk = __start_cp_next_addr(sbi);
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
@@ -1184,9 +1196,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
- clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
+ __set_cp_next_pack(sbi);
/*
* redirty superblock if metadata like node page or inode cache is
@@ -1261,8 +1273,12 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
-
- f2fs_wait_all_discard_bio(sbi);
+ if (err) {
+ release_discard_addrs(sbi);
+ } else {
+ clear_prefree_segments(sbi, cpc);
+ f2fs_wait_all_discard_bio(sbi);
+ }
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9f0ba90..9ac2625 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -29,6 +29,26 @@
#include "trace.h"
#include <trace/events/f2fs.h>
+static bool __is_cp_guaranteed(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode;
+ struct f2fs_sb_info *sbi;
+
+ if (!mapping)
+ return false;
+
+ inode = mapping->host;
+ sbi = F2FS_I_SB(inode);
+
+ if (inode->i_ino == F2FS_META_INO(sbi) ||
+ inode->i_ino == F2FS_NODE_INO(sbi) ||
+ S_ISDIR(inode->i_mode) ||
+ is_cold_data(page))
+ return true;
+ return false;
+}
+
static void f2fs_read_end_io(struct bio *bio)
{
struct bio_vec *bvec;
@@ -71,6 +91,7 @@ static void f2fs_write_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
+ enum count_type type = WB_DATA_TYPE(page);
fscrypt_pullback_bio_page(&page, true);
@@ -78,9 +99,11 @@ static void f2fs_write_end_io(struct bio *bio)
mapping_set_error(page->mapping, -EIO);
f2fs_stop_checkpoint(sbi, true);
}
+ dec_page_count(sbi, type);
+ clear_cold_data(page);
end_page_writeback(page);
}
- if (atomic_dec_and_test(&sbi->nr_wb_bios) &&
+ if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
wq_has_sleeper(&sbi->cp_wait))
wake_up(&sbi->cp_wait);
@@ -88,6 +111,46 @@ static void f2fs_write_end_io(struct bio *bio)
}
/*
+ * Return true, if pre_bio's bdev is same as its target device.
+ */
+struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+ block_t blk_addr, struct bio *bio)
+{
+ struct block_device *bdev = sbi->sb->s_bdev;
+ int i;
+
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (FDEV(i).start_blk <= blk_addr &&
+ FDEV(i).end_blk >= blk_addr) {
+ blk_addr -= FDEV(i).start_blk;
+ bdev = FDEV(i).bdev;
+ break;
+ }
+ }
+ if (bio) {
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+ }
+ return bdev;
+}
+
+int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+ int i;
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
+ return i;
+ return 0;
+}
+
+static bool __same_bdev(struct f2fs_sb_info *sbi,
+ block_t blk_addr, struct bio *bio)
+{
+ return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
+}
+
+/*
* Low-level block read/write IO operations.
*/
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
@@ -97,8 +160,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
bio = f2fs_bio_alloc(npages);
- bio->bi_bdev = sbi->sb->s_bdev;
- bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+ f2fs_target_device(sbi, blk_addr, bio);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
bio->bi_private = is_read ? NULL : sbi;
@@ -109,8 +171,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
struct bio *bio, enum page_type type)
{
if (!is_read_io(bio_op(bio))) {
- atomic_inc(&sbi->nr_wb_bios);
- if (f2fs_sb_mounted_hmsmr(sbi->sb) &&
+ if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
current->plug && (type == DATA || type == NODE))
blk_finish_plug(current->plug);
}
@@ -198,11 +259,9 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
io->fio.op = REQ_OP_WRITE;
- if (test_opt(sbi, NOBARRIER))
- io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO;
- else
- io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META |
- REQ_PRIO;
+ io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO;
+ if (!test_opt(sbi, NOBARRIER))
+ io->fio.op_flags |= REQ_FUA;
}
__submit_merged_bio(io);
out:
@@ -270,22 +329,24 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
verify_block_addr(sbi, fio->old_blkaddr);
verify_block_addr(sbi, fio->new_blkaddr);
+ bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+
+ if (!is_read)
+ inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+
down_write(&io->io_rwsem);
if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
- (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
+ (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
+ !__same_bdev(sbi, fio->new_blkaddr, io->bio)))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
- int bio_blocks = MAX_BIO_BLOCKS(sbi);
-
io->bio = __bio_alloc(sbi, fio->new_blkaddr,
- bio_blocks, is_read);
+ BIO_MAX_PAGES, is_read);
io->fio = *fio;
}
- bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
-
if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
PAGE_SIZE) {
__submit_merged_bio(io);
@@ -483,7 +544,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index)
return page;
f2fs_put_page(page, 0);
- page = get_read_data_page(inode, index, READ_SYNC, false);
+ page = get_read_data_page(inode, index, 0, false);
if (IS_ERR(page))
return page;
@@ -509,7 +570,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct page *page;
repeat:
- page = get_read_data_page(inode, index, READ_SYNC, for_write);
+ page = get_read_data_page(inode, index, 0, for_write);
if (IS_ERR(page))
return page;
@@ -590,7 +651,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
- int seg = CURSEG_WARM_DATA;
pgoff_t fofs;
blkcnt_t count = 1;
@@ -608,11 +668,8 @@ alloc:
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
- if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
- seg = CURSEG_DIRECT_IO;
-
allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
- &sum, seg);
+ &sum, CURSEG_WARM_DATA);
set_data_blkaddr(dn);
/* update i_size */
@@ -624,11 +681,18 @@ alloc:
return 0;
}
-ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
+static inline bool __force_buffered_io(struct inode *inode, int rw)
+{
+ return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) ||
+ (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
+ F2FS_I_SB(inode)->s_ndevs);
+}
+
+int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct f2fs_map_blocks map;
- ssize_t ret = 0;
+ int err = 0;
map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
@@ -640,19 +704,22 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
if (iocb->ki_flags & IOCB_DIRECT) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
- return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ return f2fs_map_blocks(inode, &map, 1,
+ __force_buffered_io(inode, WRITE) ?
+ F2FS_GET_BLOCK_PRE_AIO :
+ F2FS_GET_BLOCK_PRE_DIO);
}
if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
}
if (!f2fs_has_inline_data(inode))
return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
- return ret;
+ return err;
}
/*
@@ -676,7 +743,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
unsigned int ofs_in_node, last_ofs_in_node;
blkcnt_t prealloc;
struct extent_info ei;
- bool allocated = false;
block_t blkaddr;
if (!maxblocks)
@@ -716,7 +782,7 @@ next_dnode:
}
prealloc = 0;
- ofs_in_node = dn.ofs_in_node;
+ last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
next_block:
@@ -735,10 +801,8 @@ next_block:
}
} else {
err = __allocate_data_block(&dn);
- if (!err) {
+ if (!err)
set_inode_flag(inode, FI_APPEND_WRITE);
- allocated = true;
- }
}
if (err)
goto sync_out;
@@ -793,7 +857,6 @@ skip:
err = reserve_new_blocks(&dn, prealloc);
if (err)
goto sync_out;
- allocated = dn.node_changed;
map->m_len += dn.ofs_in_node - ofs_in_node;
if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
@@ -812,9 +875,8 @@ skip:
if (create) {
f2fs_unlock_op(sbi);
- f2fs_balance_fs(sbi, allocated);
+ f2fs_balance_fs(sbi, dn.node_changed);
}
- allocated = false;
goto next_dnode;
sync_out:
@@ -822,7 +884,7 @@ sync_out:
unlock_out:
if (create) {
f2fs_unlock_op(sbi);
- f2fs_balance_fs(sbi, allocated);
+ f2fs_balance_fs(sbi, dn.node_changed);
}
out:
trace_f2fs_map_blocks(inode, map, err);
@@ -834,19 +896,19 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
pgoff_t *next_pgofs)
{
struct f2fs_map_blocks map;
- int ret;
+ int err;
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
map.m_next_pgofs = next_pgofs;
- ret = f2fs_map_blocks(inode, &map, create, flag);
- if (!ret) {
+ err = f2fs_map_blocks(inode, &map, create, flag);
+ if (!err) {
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
bh->b_size = map.m_len << inode->i_blkbits;
}
- return ret;
+ return err;
}
static int get_data_block(struct inode *inode, sector_t iblock,
@@ -891,7 +953,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct buffer_head map_bh;
sector_t start_blk, last_blk;
pgoff_t next_pgofs;
- loff_t isize;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
int ret = 0;
@@ -908,13 +969,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
inode_lock(inode);
- isize = i_size_read(inode);
- if (start >= isize)
- goto out;
-
- if (start + len > isize)
- len = isize - start;
-
if (logical_to_blk(inode, len) == 0)
len = blk_to_logical(inode, 1);
@@ -933,13 +987,11 @@ next:
/* HOLE */
if (!buffer_mapped(&map_bh)) {
start_blk = next_pgofs;
- /* Go through holes util pass the EOF */
- if (blk_to_logical(inode, start_blk) < isize)
+
+ if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
+ F2FS_I_SB(inode)->max_file_blocks))
goto prep_next;
- /* Found a hole beyond isize means no more extents.
- * Note that the premise is that filesystems don't
- * punch holes beyond isize and keep size unchanged.
- */
+
flags |= FIEMAP_EXTENT_LAST;
}
@@ -982,7 +1034,6 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct fscrypt_ctx *ctx = NULL;
- struct block_device *bdev = sbi->sb->s_bdev;
struct bio *bio;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
@@ -1000,8 +1051,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
fscrypt_release_ctx(ctx);
return ERR_PTR(-ENOMEM);
}
- bio->bi_bdev = bdev;
- bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
+ f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
bio->bi_private = ctx;
@@ -1096,7 +1146,8 @@ got_it:
* This page will go to BIO. Do we need to send this
* BIO off first?
*/
- if (bio && (last_block_in_bio != block_nr - 1)) {
+ if (bio && (last_block_in_bio != block_nr - 1 ||
+ !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
submit_and_realloc:
__submit_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
@@ -1253,7 +1304,7 @@ static int f2fs_write_data_page(struct page *page,
.sbi = sbi,
.type = DATA,
.op = REQ_OP_WRITE,
- .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
+ .op_flags = wbc_to_write_flags(wbc),
.page = page,
.encrypted_page = NULL,
};
@@ -1313,7 +1364,6 @@ done:
if (err && err != -ENOENT)
goto redirty_out;
- clear_cold_data(page);
out:
inode_dec_dirty_pages(inode);
if (err)
@@ -1334,6 +1384,8 @@ out:
redirty_out:
redirty_page_for_writepage(wbc, page);
+ if (!err)
+ return AOP_WRITEPAGE_ACTIVATE;
unlock_page(page);
return err;
}
@@ -1429,6 +1481,15 @@ continue_unlock:
ret = mapping->a_ops->writepage(page, wbc);
if (unlikely(ret)) {
+ /*
+ * keep nr_to_write, since vfs uses this to
+ * get # of written pages.
+ */
+ if (ret == AOP_WRITEPAGE_ACTIVATE) {
+ unlock_page(page);
+ ret = 0;
+ continue;
+ }
done_index = page->index + 1;
done = 1;
break;
@@ -1665,7 +1726,7 @@ repeat:
err = PTR_ERR(bio);
goto fail;
}
- bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
+ bio->bi_opf = REQ_OP_READ;
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
err = -EFAULT;
@@ -1716,7 +1777,6 @@ static int f2fs_write_end(struct file *file,
goto unlock_out;
set_page_dirty(page);
- clear_cold_data(page);
if (pos + copied > i_size_read(inode))
f2fs_i_size_write(inode, pos + copied);
@@ -1753,9 +1813,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (err)
return err;
- if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
- return 0;
- if (test_opt(F2FS_I_SB(inode), LFS))
+ if (__force_buffered_io(inode, rw))
return 0;
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
@@ -1787,12 +1845,14 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
return;
if (PageDirty(page)) {
- if (inode->i_ino == F2FS_META_INO(sbi))
+ if (inode->i_ino == F2FS_META_INO(sbi)) {
dec_page_count(sbi, F2FS_DIRTY_META);
- else if (inode->i_ino == F2FS_NODE_INO(sbi))
+ } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
dec_page_count(sbi, F2FS_DIRTY_NODES);
- else
+ } else {
inode_dec_dirty_pages(inode);
+ remove_dirty_inode(inode);
+ }
}
/* This is atomic written page, keep Private */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fb245bd..fbd5184 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,7 +50,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
- si->wb_bios = atomic_read(&sbi->nr_wb_bios);
+ si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
+ si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
@@ -74,7 +75,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
- si->fnids = NM_I(sbi)->fcnt;
+ si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
+ si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
@@ -194,7 +196,9 @@ get_cache:
si->cache_mem += sizeof(struct flush_cmd_control);
/* free nids */
- si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid);
+ si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
+ NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) *
+ sizeof(struct free_nid);
si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
@@ -310,22 +314,22 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n",
- si->inmem_pages, si->wb_bios);
- seq_printf(s, " - nodes: %4lld in %4d\n",
+ seq_printf(s, " - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
+ si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
+ seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
- seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n",
+ seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
- seq_printf(s, " - datas: %4lld in files:%4d\n",
+ seq_printf(s, " - datas: %4d in files:%4d\n",
si->ndirty_data, si->ndirty_files);
- seq_printf(s, " - meta: %4lld in %4d\n",
+ seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
- seq_printf(s, " - imeta: %4lld\n",
+ seq_printf(s, " - imeta: %4d\n",
si->ndirty_imeta);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
- seq_printf(s, " - free_nids: %9d\n",
- si->fnids);
+ seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n",
+ si->free_nids, si->alloc_nids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
@@ -373,6 +377,7 @@ static int stat_open(struct inode *inode, struct file *file)
}
static const struct file_operations stat_fops = {
+ .owner = THIS_MODULE,
.open = stat_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 369f451..827c5da 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -136,7 +136,7 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
/* show encrypted name */
if (fname->hash) {
- if (de->hash_code == fname->hash)
+ if (de->hash_code == cpu_to_le32(fname->hash))
goto found;
} else if (de_name.len == name->len &&
de->hash_code == namehash &&
@@ -313,7 +313,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = current_time(dir);
- f2fs_mark_inode_dirty_sync(dir);
+ f2fs_mark_inode_dirty_sync(dir, false);
f2fs_put_page(page, 1);
}
@@ -466,7 +466,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(inode, FI_NEW_INODE);
}
dir->i_mtime = dir->i_ctime = current_time(dir);
- f2fs_mark_inode_dirty_sync(dir);
+ f2fs_mark_inode_dirty_sync(dir, false);
if (F2FS_I(dir)->i_current_depth != current_depth)
f2fs_i_depth_write(dir, current_depth);
@@ -731,7 +731,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
set_page_dirty(page);
dir->i_ctime = dir->i_mtime = current_time(dir);
- f2fs_mark_inode_dirty_sync(dir);
+ f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
f2fs_drop_nlink(dir, inode);
@@ -742,6 +742,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
ClearPagePrivate(page);
ClearPageUptodate(page);
inode_dec_dirty_pages(dir);
+ remove_dirty_inode(dir);
}
f2fs_put_page(page, 1);
}
@@ -784,7 +785,7 @@ bool f2fs_empty_dir(struct inode *dir)
return true;
}
-bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
+int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr)
{
unsigned char d_type = DT_UNKNOWN;
@@ -819,7 +820,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
(u32)de->hash_code, 0,
&de_name, fstr);
if (err)
- return true;
+ return err;
de_name = *fstr;
fstr->len = save_len;
@@ -827,12 +828,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (!dir_emit(ctx, de_name.name, de_name.len,
le32_to_cpu(de->ino), d_type))
- return true;
+ return 1;
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
}
- return false;
+ return 0;
}
static int f2fs_readdir(struct file *file, struct dir_context *ctx)
@@ -871,17 +872,21 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
- if (err == -ENOENT)
+ if (err == -ENOENT) {
+ err = 0;
continue;
- else
+ } else {
goto out;
+ }
}
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
- if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
+ err = f2fs_fill_dentries(ctx, &d,
+ n * NR_DENTRY_IN_BLOCK, &fstr);
+ if (err) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
break;
@@ -891,10 +896,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
- err = 0;
out:
fscrypt_fname_free_buffer(&fstr);
- return err;
+ return err < 0 ? err : 0;
}
static int f2fs_dir_open(struct inode *inode, struct file *filp)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 2b06d4f..4db44da 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -172,7 +172,7 @@ static void __drop_largest_extent(struct inode *inode,
if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
largest->len = 0;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
}
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8e94b7b..2da8c3a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -103,7 +103,7 @@ struct f2fs_mount_info {
};
#define F2FS_FEATURE_ENCRYPT 0x0001
-#define F2FS_FEATURE_HMSMR 0x0002
+#define F2FS_FEATURE_BLKZONED 0x0002
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -401,6 +401,7 @@ struct f2fs_map_blocks {
#define FADVISE_LOST_PINO_BIT 0x02
#define FADVISE_ENCRYPT_BIT 0x04
#define FADVISE_ENC_NAME_BIT 0x08
+#define FADVISE_KEEP_SIZE_BIT 0x10
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
@@ -413,6 +414,8 @@ struct f2fs_map_blocks {
#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT)
#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT)
#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
+#define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT)
+#define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT)
#define DEF_DIR_LEVEL 0
@@ -428,7 +431,7 @@ struct f2fs_inode_info {
/* Use below internally in f2fs*/
unsigned long flags; /* use to pass per-file flags */
struct rw_semaphore i_sem; /* protect fi info */
- struct percpu_counter dirty_pages; /* # of dirty pages */
+ atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
@@ -493,20 +496,26 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
return __is_extent_mergeable(cur, front);
}
-extern void f2fs_mark_inode_dirty_sync(struct inode *);
+extern void f2fs_mark_inode_dirty_sync(struct inode *, bool);
static inline void __try_update_largest_extent(struct inode *inode,
struct extent_tree *et, struct extent_node *en)
{
if (en->ei.len > et->largest.len) {
et->largest = en->ei;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
}
}
+enum nid_list {
+ FREE_NID_LIST,
+ ALLOC_NID_LIST,
+ MAX_NID_LIST,
+};
+
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
- nid_t available_nids; /* maximum available node ids */
+ nid_t available_nids; /* # of available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
unsigned int ram_thresh; /* control the memory footprint */
unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */
@@ -522,9 +531,9 @@ struct f2fs_nm_info {
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
- struct list_head free_nid_list; /* a list for free nids */
- spinlock_t free_nid_list_lock; /* protect free nid list */
- unsigned int fcnt; /* the number of free node id */
+ struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */
+ unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
+ spinlock_t nid_list_lock; /* protect nid lists ops */
struct mutex build_lock; /* lock for build free nids */
/* for checkpoint */
@@ -585,7 +594,6 @@ enum {
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
NO_CHECK_TYPE,
- CURSEG_DIRECT_IO, /* to use for the direct IO path */
};
struct flush_cmd {
@@ -649,6 +657,7 @@ struct f2fs_sm_info {
* f2fs monitors the number of several block types such as on-writeback,
* dirty dentry blocks, dirty node blocks, and dirty meta blocks.
*/
+#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
@@ -656,6 +665,8 @@ enum count_type {
F2FS_DIRTY_META,
F2FS_INMEM_PAGES,
F2FS_DIRTY_IMETA,
+ F2FS_WB_CP_DATA,
+ F2FS_WB_DATA,
NR_COUNT_TYPE,
};
@@ -688,7 +699,7 @@ struct f2fs_io_info {
struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
int op; /* contains REQ_OP_ */
- int op_flags; /* rq_flag_bits */
+ int op_flags; /* req_flag_bits */
block_t new_blkaddr; /* new block address to be written */
block_t old_blkaddr; /* old block address before Cow */
struct page *page; /* page to be written */
@@ -704,6 +715,20 @@ struct f2fs_bio_info {
struct rw_semaphore io_rwsem; /* blocking op for bio */
};
+#define FDEV(i) (sbi->devs[i])
+#define RDEV(i) (raw_super->devs[i])
+struct f2fs_dev_info {
+ struct block_device *bdev;
+ char path[MAX_PATH_LEN];
+ unsigned int total_segments;
+ block_t start_blk;
+ block_t end_blk;
+#ifdef CONFIG_BLK_DEV_ZONED
+ unsigned int nr_blkz; /* Total number of zones */
+ u8 *blkz_type; /* Array of zones type */
+#endif
+};
+
enum inode_type {
DIR_INODE, /* for dirty dir inode */
FILE_INODE, /* for dirty regular/symlink inode */
@@ -750,6 +775,12 @@ struct f2fs_sb_info {
u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
u8 key_prefix_size;
#endif
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ unsigned int blocks_per_blkz; /* F2FS blocks per zone */
+ unsigned int log_blocks_per_blkz; /* log2 F2FS blocks per zone */
+#endif
+
/* for node-related operations */
struct f2fs_nm_info *nm_info; /* node manager */
struct inode *node_inode; /* cache node blocks */
@@ -764,6 +795,7 @@ struct f2fs_sb_info {
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
+ int cur_cp_pack; /* remain current cp pack */
spinlock_t cp_lock; /* for flag in ckpt */
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
@@ -815,10 +847,9 @@ struct f2fs_sb_info {
block_t discard_blks; /* discard command candidats */
block_t last_valid_block_count; /* for recovery */
u32 s_next_generation; /* for NFS support */
- atomic_t nr_wb_bios; /* # of writeback bios */
/* # of pages, see count_type */
- struct percpu_counter nr_pages[NR_COUNT_TYPE];
+ atomic_t nr_pages[NR_COUNT_TYPE];
/* # of allocated blocks */
struct percpu_counter alloc_valid_block_count;
@@ -863,6 +894,8 @@ struct f2fs_sb_info {
/* For shrinker support */
struct list_head s_list;
+ int s_ndevs; /* number of devices */
+ struct f2fs_dev_info *devs; /* for device list */
struct mutex umount_mutex;
unsigned int shrinker_run_no;
@@ -1105,13 +1138,6 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
spin_unlock(&sbi->cp_lock);
}
-static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
-{
- struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
-
- return blk_queue_discard(q);
-}
-
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
down_read(&sbi->cp_rwsem);
@@ -1232,9 +1258,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
{
- percpu_counter_inc(&sbi->nr_pages[count_type]);
+ atomic_inc(&sbi->nr_pages[count_type]);
- if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES)
+ if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
+ count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA)
return;
set_sbi_flag(sbi, SBI_IS_DIRTY);
@@ -1242,14 +1269,14 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_inc_dirty_pages(struct inode *inode)
{
- percpu_counter_inc(&F2FS_I(inode)->dirty_pages);
+ atomic_inc(&F2FS_I(inode)->dirty_pages);
inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
{
- percpu_counter_dec(&sbi->nr_pages[count_type]);
+ atomic_dec(&sbi->nr_pages[count_type]);
}
static inline void inode_dec_dirty_pages(struct inode *inode)
@@ -1258,19 +1285,19 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
!S_ISLNK(inode->i_mode))
return;
- percpu_counter_dec(&F2FS_I(inode)->dirty_pages);
+ atomic_dec(&F2FS_I(inode)->dirty_pages);
dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
{
- return percpu_counter_sum_positive(&sbi->nr_pages[count_type]);
+ return atomic_read(&sbi->nr_pages[count_type]);
}
-static inline s64 get_dirty_pages(struct inode *inode)
+static inline int get_dirty_pages(struct inode *inode)
{
- return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages);
+ return atomic_read(&F2FS_I(inode)->dirty_pages);
}
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
@@ -1329,22 +1356,27 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
{
- block_t start_addr;
- struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- unsigned long long ckpt_version = cur_cp_version(ckpt);
-
- start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+ block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
- /*
- * odd numbered checkpoint should at cp segment 0
- * and even segment must be at cp segment 1
- */
- if (!(ckpt_version & 1))
+ if (sbi->cur_cp_pack == 2)
start_addr += sbi->blocks_per_seg;
+ return start_addr;
+}
+
+static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi)
+{
+ block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+ if (sbi->cur_cp_pack == 1)
+ start_addr += sbi->blocks_per_seg;
return start_addr;
}
+static inline void __set_cp_next_pack(struct f2fs_sb_info *sbi)
+{
+ sbi->cur_cp_pack = (sbi->cur_cp_pack == 1) ? 2 : 1;
+}
+
static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
{
return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
@@ -1621,7 +1653,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
return;
case FI_DATA_EXIST:
case FI_INLINE_DOTS:
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
}
}
@@ -1648,7 +1680,7 @@ static inline void set_acl_inode(struct inode *inode, umode_t mode)
{
F2FS_I(inode)->i_acl_mode = mode;
set_inode_flag(inode, FI_ACL_MODE);
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, false);
}
static inline void f2fs_i_links_write(struct inode *inode, bool inc)
@@ -1657,7 +1689,7 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc)
inc_nlink(inode);
else
drop_nlink(inode);
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void f2fs_i_blocks_write(struct inode *inode,
@@ -1668,7 +1700,7 @@ static inline void f2fs_i_blocks_write(struct inode *inode,
inode->i_blocks = add ? inode->i_blocks + diff :
inode->i_blocks - diff;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
if (clean || recover)
set_inode_flag(inode, FI_AUTO_RECOVER);
}
@@ -1682,34 +1714,27 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
return;
i_size_write(inode, i_size);
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
if (clean || recover)
set_inode_flag(inode, FI_AUTO_RECOVER);
}
-static inline bool f2fs_skip_inode_update(struct inode *inode)
-{
- if (!is_inode_flag_set(inode, FI_AUTO_RECOVER))
- return false;
- return F2FS_I(inode)->last_disk_size == i_size_read(inode);
-}
-
static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
{
F2FS_I(inode)->i_current_depth = depth;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid)
{
F2FS_I(inode)->i_xattr_nid = xnid;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino)
{
F2FS_I(inode)->i_pino = pino;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri)
@@ -1837,13 +1862,31 @@ static inline int is_file(struct inode *inode, int type)
static inline void set_file(struct inode *inode, int type)
{
F2FS_I(inode)->i_advise |= type;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
}
static inline void clear_file(struct inode *inode, int type)
{
F2FS_I(inode)->i_advise &= ~type;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
+}
+
+static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
+{
+ if (dsync) {
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ bool ret;
+
+ spin_lock(&sbi->inode_lock[DIRTY_META]);
+ ret = list_empty(&F2FS_I(inode)->gdirty_list);
+ spin_unlock(&sbi->inode_lock[DIRTY_META]);
+ return ret;
+ }
+ if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) ||
+ file_keep_isize(inode) ||
+ i_size_read(inode) & PAGE_MASK)
+ return false;
+ return F2FS_I(inode)->last_disk_size == i_size_read(inode);
}
static inline int f2fs_readonly(struct super_block *sb)
@@ -1955,7 +1998,7 @@ void set_de_type(struct f2fs_dir_entry *, umode_t);
unsigned char get_de_type(struct f2fs_dir_entry *);
struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
-bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
+int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
unsigned int, struct fscrypt_str *);
void do_make_empty_dir(struct inode *, struct inode *,
struct f2fs_dentry_ptr *);
@@ -1995,7 +2038,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
/*
* super.c
*/
-int f2fs_inode_dirtied(struct inode *);
+int f2fs_inode_dirtied(struct inode *, bool);
void f2fs_inode_synced(struct inode *);
int f2fs_commit_super(struct f2fs_sb_info *, bool);
int f2fs_sync_fs(struct super_block *, int);
@@ -2034,7 +2077,7 @@ void move_node_page(struct page *, int);
int fsync_node_pages(struct f2fs_sb_info *, struct inode *,
struct writeback_control *, bool);
int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
-void build_free_nids(struct f2fs_sb_info *);
+void build_free_nids(struct f2fs_sb_info *, bool);
bool alloc_nid(struct f2fs_sb_info *, nid_t *);
void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
@@ -2060,7 +2103,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *, bool);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
int create_flush_cmd_control(struct f2fs_sb_info *);
-void destroy_flush_cmd_control(struct f2fs_sb_info *);
+void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
@@ -2132,12 +2175,15 @@ void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
void f2fs_flush_merged_bios(struct f2fs_sb_info *);
int f2fs_submit_page_bio(struct f2fs_io_info *);
void f2fs_submit_page_mbio(struct f2fs_io_info *);
+struct block_device *f2fs_target_device(struct f2fs_sb_info *,
+ block_t, struct bio *);
+int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
void set_data_blkaddr(struct dnode_of_data *);
void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
int reserve_new_block(struct dnode_of_data *);
int f2fs_get_block(struct dnode_of_data *, pgoff_t);
-ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
+int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
struct page *find_data_page(struct inode *, pgoff_t);
@@ -2160,7 +2206,7 @@ int f2fs_migrate_page(struct address_space *, struct page *, struct page *,
int start_gc_thread(struct f2fs_sb_info *);
void stop_gc_thread(struct f2fs_sb_info *);
block_t start_bidx_of_node(unsigned int, struct inode *);
-int f2fs_gc(struct f2fs_sb_info *, bool);
+int f2fs_gc(struct f2fs_sb_info *, bool, bool);
void build_gc_manager(struct f2fs_sb_info *);
/*
@@ -2181,12 +2227,12 @@ struct f2fs_stat_info {
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
int ext_tree, zombie_tree, ext_node;
- s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
- s64 inmem_pages;
+ int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
+ int inmem_pages;
unsigned int ndirty_dirs, ndirty_files, ndirty_all;
- int nats, dirty_nats, sits, dirty_sits, fnids;
+ int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
int total_count, utilization;
- int bg_gc, wb_bios;
+ int bg_gc, nr_wb_cp_data, nr_wb_data;
int inline_xattr, inline_inode, inline_dir, orphans;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
@@ -2412,9 +2458,30 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb)
return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
}
-static inline int f2fs_sb_mounted_hmsmr(struct super_block *sb)
+static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb)
+{
+ return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED);
+}
+
+#ifdef CONFIG_BLK_DEV_ZONED
+static inline int get_blkz_type(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t blkaddr)
+{
+ unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
+ int i;
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (FDEV(i).bdev == bdev)
+ return FDEV(i).blkz_type[zno];
+ return -EINVAL;
+}
+#endif
+
+static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi)
{
- return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_HMSMR);
+ struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev);
+
+ return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb);
}
static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f0c83f7..49f10dc 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -94,8 +94,6 @@ mapped:
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
- /* if gced page is attached, don't write to cold segment */
- clear_cold_data(page);
out:
sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME);
@@ -210,7 +208,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
}
/* if the inode is dirty, let's recover all the time */
- if (!datasync && !f2fs_skip_inode_update(inode)) {
+ if (!f2fs_skip_inode_update(inode, datasync)) {
f2fs_write_inode(inode, NULL);
goto go_write;
}
@@ -264,7 +262,7 @@ sync_nodes:
}
if (need_inode_block_update(sbi, ino)) {
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
f2fs_write_inode(inode, NULL);
goto sync_nodes;
}
@@ -632,7 +630,7 @@ int f2fs_truncate(struct inode *inode)
return err;
inode->i_mtime = inode->i_ctime = current_time(inode);
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, false);
return 0;
}
@@ -679,6 +677,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int err;
+ bool size_changed = false;
err = setattr_prepare(dentry, attr);
if (err)
@@ -694,7 +693,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
err = f2fs_truncate(inode);
if (err)
return err;
- f2fs_balance_fs(F2FS_I_SB(inode), true);
} else {
/*
* do not trim all blocks after i_size if target size is
@@ -710,6 +708,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
inode->i_mtime = inode->i_ctime = current_time(inode);
}
+
+ size_changed = true;
}
__setattr_copy(inode, attr);
@@ -722,7 +722,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
}
- f2fs_mark_inode_dirty_sync(inode);
+ /* file size may changed here */
+ f2fs_mark_inode_dirty_sync(inode, size_changed);
+
+ /* inode change will produce dirty node pages flushed by checkpoint */
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
+
return err;
}
@@ -967,7 +972,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
new_size = (dst + i) << PAGE_SHIFT;
if (dst_inode->i_size < new_size)
f2fs_i_size_write(dst_inode, new_size);
- } while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen);
+ } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
f2fs_put_dnode(&dn);
} else {
@@ -1218,6 +1223,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
+
+ f2fs_balance_fs(sbi, dn.node_changed);
+
if (ret)
goto out;
@@ -1313,15 +1321,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
pgoff_t pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
- int ret;
+ int err;
- ret = inode_newsize_ok(inode, (len + offset));
- if (ret)
- return ret;
+ err = inode_newsize_ok(inode, (len + offset));
+ if (err)
+ return err;
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- return ret;
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
f2fs_balance_fs(sbi, true);
@@ -1333,12 +1341,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (off_end)
map.m_len++;
- ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
- if (ret) {
+ err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ if (err) {
pgoff_t last_off;
if (!map.m_len)
- return ret;
+ return err;
last_off = map.m_lblk + map.m_len - 1;
@@ -1352,7 +1360,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
f2fs_i_size_write(inode, new_size);
- return ret;
+ return err;
}
static long f2fs_fallocate(struct file *file, int mode,
@@ -1393,7 +1401,9 @@ static long f2fs_fallocate(struct file *file, int mode,
if (!ret) {
inode->i_mtime = inode->i_ctime = current_time(inode);
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, false);
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ file_set_keep_isize(inode);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
@@ -1526,7 +1536,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
goto out;
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
- "Unexpected flush for atomic writes: ino=%lu, npages=%lld",
+ "Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
@@ -1827,7 +1837,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
mutex_lock(&sbi->gc_mutex);
}
- ret = f2fs_gc(sbi, sync);
+ ret = f2fs_gc(sbi, sync, true);
out:
mnt_drop_write_file(filp);
return ret;
@@ -2241,12 +2251,15 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
- ret = f2fs_preallocate_blocks(iocb, from);
- if (!ret) {
- blk_start_plug(&plug);
- ret = __generic_file_write_iter(iocb, from);
- blk_finish_plug(&plug);
+ int err = f2fs_preallocate_blocks(iocb, from);
+
+ if (err) {
+ inode_unlock(inode);
+ return err;
}
+ blk_start_plug(&plug);
+ ret = __generic_file_write_iter(iocb, from);
+ blk_finish_plug(&plug);
}
inode_unlock(inode);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 6f14ee9..88bfc3d 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -82,7 +82,7 @@ static int gc_thread_func(void *data)
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
- if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
+ if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true))
wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
@@ -544,13 +544,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
-static void move_encrypted_block(struct inode *inode, block_t bidx)
+static void move_encrypted_block(struct inode *inode, block_t bidx,
+ unsigned int segno, int off)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
.op = REQ_OP_READ,
- .op_flags = READ_SYNC,
+ .op_flags = 0,
.encrypted_page = NULL,
};
struct dnode_of_data dn;
@@ -565,6 +566,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
if (!page)
return;
+ if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+ goto out;
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
if (err)
@@ -625,7 +629,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
fio.op = REQ_OP_WRITE;
- fio.op_flags = WRITE_SYNC;
+ fio.op_flags = REQ_SYNC;
fio.new_blkaddr = newaddr;
f2fs_submit_page_mbio(&fio);
@@ -645,7 +649,8 @@ out:
f2fs_put_page(page, 1);
}
-static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
+static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
+ unsigned int segno, int off)
{
struct page *page;
@@ -653,6 +658,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
if (IS_ERR(page))
return;
+ if (!check_valid_map(F2FS_I_SB(inode), segno, off))
+ goto out;
+
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
@@ -663,7 +671,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
.sbi = F2FS_I_SB(inode),
.type = DATA,
.op = REQ_OP_WRITE,
- .op_flags = WRITE_SYNC,
+ .op_flags = REQ_SYNC,
.page = page,
.encrypted_page = NULL,
};
@@ -673,8 +681,10 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
retry:
set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA, true);
- if (clear_page_dirty_for_io(page))
+ if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
+ remove_dirty_inode(inode);
+ }
set_cold_data(page);
@@ -683,8 +693,6 @@ retry:
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
-
- clear_cold_data(page);
}
out:
f2fs_put_page(page, 1);
@@ -794,9 +802,9 @@ next_step:
start_bidx = start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
- move_encrypted_block(inode, start_bidx);
+ move_encrypted_block(inode, start_bidx, segno, off);
else
- move_data_page(inode, start_bidx, gc_type);
+ move_data_page(inode, start_bidx, gc_type, segno, off);
if (locked) {
up_write(&fi->dio_rwsem[WRITE]);
@@ -899,7 +907,7 @@ next:
return sec_freed;
}
-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
+int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
{
unsigned int segno;
int gc_type = sync ? FG_GC : BG_GC;
@@ -940,6 +948,9 @@ gc_more:
if (ret)
goto stop;
}
+ } else if (gc_type == BG_GC && !background) {
+ /* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
+ goto stop;
}
if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 5f1a67f..e32a9e5 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -111,7 +111,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.sbi = F2FS_I_SB(dn->inode),
.type = DATA,
.op = REQ_OP_WRITE,
- .op_flags = WRITE_SYNC | REQ_PRIO,
+ .op_flags = REQ_SYNC | REQ_PRIO,
.page = page,
.encrypted_page = NULL,
};
@@ -137,8 +137,10 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
fio.old_blkaddr = dn->data_blkaddr;
write_data_page(dn, &fio);
f2fs_wait_on_page_writeback(page, DATA, true);
- if (dirty)
+ if (dirty) {
inode_dec_dirty_pages(dn->inode);
+ remove_dirty_inode(dn->inode);
+ }
/* this converted inline_data should be recovered. */
set_inode_flag(dn->inode, FI_APPEND_WRITE);
@@ -419,7 +421,7 @@ static int f2fs_add_inline_entries(struct inode *dir,
}
new_name.name = d.filename[bit_pos];
- new_name.len = de->name_len;
+ new_name.len = le16_to_cpu(de->name_len);
ino = le32_to_cpu(de->ino);
fake_mode = get_de_type(de) << S_SHIFT;
@@ -573,7 +575,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
f2fs_put_page(page, 1);
dir->i_ctime = dir->i_mtime = current_time(dir);
- f2fs_mark_inode_dirty_sync(dir);
+ f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
f2fs_drop_nlink(dir, inode);
@@ -610,6 +612,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
struct f2fs_inline_dentry *inline_dentry = NULL;
struct page *ipage = NULL;
struct f2fs_dentry_ptr d;
+ int err;
if (ctx->pos == NR_INLINE_DENTRY)
return 0;
@@ -622,11 +625,12 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
make_dentry_ptr(inode, &d, (void *)inline_dentry, 2);
- if (!f2fs_fill_dentries(ctx, &d, 0, fstr))
+ err = f2fs_fill_dentries(ctx, &d, 0, fstr);
+ if (!err)
ctx->pos = NR_INLINE_DENTRY;
f2fs_put_page(ipage, 1);
- return 0;
+ return err < 0 ? err : 0;
}
int f2fs_inline_data_fiemap(struct inode *inode,
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index d736989..af06bda 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -19,10 +19,11 @@
#include <trace/events/f2fs.h>
-void f2fs_mark_inode_dirty_sync(struct inode *inode)
+void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
{
- if (f2fs_inode_dirtied(inode))
+ if (f2fs_inode_dirtied(inode, sync))
return;
+
mark_inode_dirty_sync(inode);
}
@@ -43,7 +44,7 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, false);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -252,6 +253,7 @@ retry:
int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
+ struct extent_tree *et = F2FS_I(inode)->extent_tree;
f2fs_inode_synced(inode);
@@ -267,11 +269,13 @@ int update_inode(struct inode *inode, struct page *node_page)
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks);
- if (F2FS_I(inode)->extent_tree)
- set_raw_extent(&F2FS_I(inode)->extent_tree->largest,
- &ri->i_ext);
- else
+ if (et) {
+ read_lock(&et->lock);
+ set_raw_extent(&et->largest, &ri->i_ext);
+ read_unlock(&et->lock);
+ } else {
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
+ }
set_raw_inline(inode, ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@@ -335,7 +339,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
- if (update_inode_page(inode))
+ if (update_inode_page(inode) && wbc && wbc->nr_to_write)
f2fs_balance_fs(sbi, true);
return 0;
}
@@ -373,6 +377,9 @@ void f2fs_evict_inode(struct inode *inode)
goto no_delete;
#endif
+ remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
+ remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
@@ -384,6 +391,8 @@ retry:
f2fs_lock_op(sbi);
err = remove_inode_page(inode);
f2fs_unlock_op(sbi);
+ if (err == -ENOENT)
+ err = 0;
}
/* give more chances, if ENOMEM case */
@@ -403,10 +412,12 @@ no_delete:
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
- if (is_inode_flag_set(inode, FI_APPEND_WRITE))
- add_ino_entry(sbi, inode->i_ino, APPEND_INO);
- if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
- add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ if (inode->i_nlink) {
+ if (is_inode_flag_set(inode, FI_APPEND_WRITE))
+ add_ino_entry(sbi, inode->i_ino, APPEND_INO);
+ if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
+ add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ }
if (is_inode_flag_set(inode, FI_FREE_NID)) {
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(inode, FI_FREE_NID);
@@ -424,6 +435,18 @@ void handle_failed_inode(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
+ /*
+ * clear nlink of inode in order to release resource of inode
+ * immediately.
+ */
+ clear_nlink(inode);
+
+ /*
+ * we must call this to avoid inode being remained as dirty, resulting
+ * in a panic when flushing dirty inodes in gdirty_list.
+ */
+ update_inode_page(inode);
+
/* don't make bad inode, since it becomes a regular file. */
unlock_new_inode(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 489fa0d..db33b56 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -778,7 +778,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
- f2fs_mark_inode_dirty_sync(old_inode);
+ f2fs_mark_inode_dirty_sync(old_inode, false);
f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
@@ -938,7 +938,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_links_write(old_dir, old_nlink > 0);
up_write(&F2FS_I(old_dir)->i_sem);
}
- f2fs_mark_inode_dirty_sync(old_dir);
+ f2fs_mark_inode_dirty_sync(old_dir, false);
/* update directory entry info of new dir inode */
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
@@ -953,7 +953,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_links_write(new_dir, new_nlink > 0);
up_write(&F2FS_I(new_dir)->i_sem);
}
- f2fs_mark_inode_dirty_sync(new_dir);
+ f2fs_mark_inode_dirty_sync(new_dir, false);
f2fs_unlock_op(sbi);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 01177ec..b9078fd 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -45,8 +45,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
* give 25%, 25%, 50%, 50%, 50% memory for each components respectively
*/
if (type == FREE_NIDS) {
- mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
- PAGE_SHIFT;
+ mem_size = (nm_i->nid_cnt[FREE_NID_LIST] *
+ sizeof(struct free_nid)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
@@ -270,8 +270,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
} else {
- f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino ||
- nat_get_blkaddr(e) != ne->block_addr ||
+ f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
+ nat_get_blkaddr(e) !=
+ le32_to_cpu(ne->block_addr) ||
nat_get_version(e) != ne->version);
}
}
@@ -1134,7 +1135,7 @@ repeat:
if (!page)
return ERR_PTR(-ENOMEM);
- err = read_node_page(page, READ_SYNC);
+ err = read_node_page(page, 0);
if (err < 0) {
f2fs_put_page(page, 1);
return ERR_PTR(err);
@@ -1204,6 +1205,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
ret = f2fs_write_inline_data(inode, page);
inode_dec_dirty_pages(inode);
+ remove_dirty_inode(inode);
if (ret)
set_page_dirty(page);
page_out:
@@ -1338,7 +1340,8 @@ retry:
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_put_page(last_page, 0);
pagevec_release(&pvec);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
if (!IS_DNODE(page) || !is_cold_node(page))
@@ -1407,11 +1410,12 @@ continue_unlock:
"Retry to write fsync mark: ino=%u, idx=%lx",
ino, last_page->index);
lock_page(last_page);
+ f2fs_wait_on_page_writeback(last_page, NODE, true);
set_page_dirty(last_page);
unlock_page(last_page);
goto retry;
}
-
+out:
if (nwritten)
f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
return ret ? -EIO: 0;
@@ -1570,7 +1574,7 @@ static int f2fs_write_node_page(struct page *page,
.sbi = sbi,
.type = NODE,
.op = REQ_OP_WRITE,
- .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
+ .op_flags = wbc_to_write_flags(wbc),
.page = page,
.encrypted_page = NULL,
};
@@ -1692,11 +1696,35 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
return radix_tree_lookup(&nm_i->free_nid_root, n);
}
-static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
- struct free_nid *i)
+static int __insert_nid_to_list(struct f2fs_sb_info *sbi,
+ struct free_nid *i, enum nid_list list, bool new)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ if (new) {
+ int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
+ if (err)
+ return err;
+ }
+
+ f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
+ i->state != NID_ALLOC);
+ nm_i->nid_cnt[list]++;
+ list_add_tail(&i->list, &nm_i->nid_list[list]);
+ return 0;
+}
+
+static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
+ struct free_nid *i, enum nid_list list, bool reuse)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
+ i->state != NID_ALLOC);
+ nm_i->nid_cnt[list]--;
list_del(&i->list);
- radix_tree_delete(&nm_i->free_nid_root, i->nid);
+ if (!reuse)
+ radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
@@ -1704,9 +1732,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
struct nat_entry *ne;
-
- if (!available_free_memory(sbi, FREE_NIDS))
- return -1;
+ int err;
/* 0 nid should not be used */
if (unlikely(nid == 0))
@@ -1729,33 +1755,30 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
return 0;
}
- spin_lock(&nm_i->free_nid_list_lock);
- if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
- spin_unlock(&nm_i->free_nid_list_lock);
- radix_tree_preload_end();
+ spin_lock(&nm_i->nid_list_lock);
+ err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
+ spin_unlock(&nm_i->nid_list_lock);
+ radix_tree_preload_end();
+ if (err) {
kmem_cache_free(free_nid_slab, i);
return 0;
}
- list_add_tail(&i->list, &nm_i->free_nid_list);
- nm_i->fcnt++;
- spin_unlock(&nm_i->free_nid_list_lock);
- radix_tree_preload_end();
return 1;
}
-static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
+static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
bool need_free = false;
- spin_lock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
if (i && i->state == NID_NEW) {
- __del_from_free_nid_list(nm_i, i);
- nm_i->fcnt--;
+ __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
need_free = true;
}
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_unlock(&nm_i->nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
@@ -1778,14 +1801,12 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
- if (blk_addr == NULL_ADDR) {
- if (add_free_nid(sbi, start_nid, true) < 0)
- break;
- }
+ if (blk_addr == NULL_ADDR)
+ add_free_nid(sbi, start_nid, true);
}
}
-void build_free_nids(struct f2fs_sb_info *sbi)
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1794,7 +1815,10 @@ void build_free_nids(struct f2fs_sb_info *sbi)
nid_t nid = nm_i->next_scan_nid;
/* Enough entries */
- if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK)
+ if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
+ return;
+
+ if (!sync && !available_free_memory(sbi, FREE_NIDS))
return;
/* readahead nat pages to be scanned */
@@ -1830,7 +1854,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
if (addr == NULL_ADDR)
add_free_nid(sbi, nid, true);
else
- remove_free_nid(nm_i, nid);
+ remove_free_nid(sbi, nid);
}
up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
@@ -1839,6 +1863,13 @@ void build_free_nids(struct f2fs_sb_info *sbi)
nm_i->ra_nid_pages, META_NAT, false);
}
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+{
+ mutex_lock(&NM_I(sbi)->build_lock);
+ __build_free_nids(sbi, sync);
+ mutex_unlock(&NM_I(sbi)->build_lock);
+}
+
/*
* If this function returns success, caller can obtain a new nid
* from second parameter of this function.
@@ -1853,31 +1884,31 @@ retry:
if (time_to_inject(sbi, FAULT_ALLOC_NID))
return false;
#endif
- if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
- return false;
+ spin_lock(&nm_i->nid_list_lock);
- spin_lock(&nm_i->free_nid_list_lock);
+ if (unlikely(nm_i->available_nids == 0)) {
+ spin_unlock(&nm_i->nid_list_lock);
+ return false;
+ }
/* We should not use stale free nids created by build_free_nids */
- if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
- f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
- list_for_each_entry(i, &nm_i->free_nid_list, list)
- if (i->state == NID_NEW)
- break;
-
- f2fs_bug_on(sbi, i->state != NID_NEW);
+ if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
+ f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
+ i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
+ struct free_nid, list);
*nid = i->nid;
+
+ __remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
i->state = NID_ALLOC;
- nm_i->fcnt--;
- spin_unlock(&nm_i->free_nid_list_lock);
+ __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
+ nm_i->available_nids--;
+ spin_unlock(&nm_i->nid_list_lock);
return true;
}
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
- mutex_lock(&nm_i->build_lock);
- build_free_nids(sbi);
- mutex_unlock(&nm_i->build_lock);
+ build_free_nids(sbi, true);
goto retry;
}
@@ -1889,11 +1920,11 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
- spin_lock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
- f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
- __del_from_free_nid_list(nm_i, i);
- spin_unlock(&nm_i->free_nid_list_lock);
+ f2fs_bug_on(sbi, !i);
+ __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
+ spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
}
@@ -1910,17 +1941,22 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
if (!nid)
return;
- spin_lock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
- f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
+ f2fs_bug_on(sbi, !i);
+
if (!available_free_memory(sbi, FREE_NIDS)) {
- __del_from_free_nid_list(nm_i, i);
+ __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
need_free = true;
} else {
+ __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true);
i->state = NID_NEW;
- nm_i->fcnt++;
+ __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
}
- spin_unlock(&nm_i->free_nid_list_lock);
+
+ nm_i->available_nids++;
+
+ spin_unlock(&nm_i->nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
@@ -1932,24 +1968,24 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
struct free_nid *i, *next;
int nr = nr_shrink;
- if (nm_i->fcnt <= MAX_FREE_NIDS)
+ if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
return 0;
if (!mutex_trylock(&nm_i->build_lock))
return 0;
- spin_lock(&nm_i->free_nid_list_lock);
- list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
- if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS)
+ spin_lock(&nm_i->nid_list_lock);
+ list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST],
+ list) {
+ if (nr_shrink <= 0 ||
+ nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
break;
- if (i->state == NID_ALLOC)
- continue;
- __del_from_free_nid_list(nm_i, i);
+
+ __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
kmem_cache_free(free_nid_slab, i);
- nm_i->fcnt--;
nr_shrink--;
}
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_unlock(&nm_i->nid_list_lock);
mutex_unlock(&nm_i->build_lock);
return nr - nr_shrink;
@@ -2005,7 +2041,7 @@ recover_xnid:
if (unlikely(!inc_valid_node_count(sbi, inode)))
f2fs_bug_on(sbi, 1);
- remove_free_nid(NM_I(sbi), new_xnid);
+ remove_free_nid(sbi, new_xnid);
get_node_info(sbi, new_xnid, &ni);
ni.ino = inode->i_ino;
set_node_addr(sbi, &ni, NEW_ADDR, false);
@@ -2035,7 +2071,7 @@ retry:
}
/* Should not use this inode from free nid list */
- remove_free_nid(NM_I(sbi), ino);
+ remove_free_nid(sbi, ino);
if (!PageUptodate(ipage))
SetPageUptodate(ipage);
@@ -2069,7 +2105,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
block_t addr;
- int bio_blocks = MAX_BIO_BLOCKS(sbi);
int i, idx, last_offset, nrpages;
/* scan the node segment */
@@ -2078,7 +2113,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
sum_entry = &sum->entries[0];
for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
- nrpages = min(last_offset - i, bio_blocks);
+ nrpages = min(last_offset - i, BIO_MAX_PAGES);
/* readahead node pages */
ra_meta_pages(sbi, addr, nrpages, META_POR, true);
@@ -2120,6 +2155,19 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
ne = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
+
+ /*
+ * if a free nat in journal has not been used after last
+ * checkpoint, we should remove it from available nids,
+ * since later we will add it again.
+ */
+ if (!get_nat_flag(ne, IS_DIRTY) &&
+ le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
+ spin_lock(&nm_i->nid_list_lock);
+ nm_i->available_nids--;
+ spin_unlock(&nm_i->nid_list_lock);
+ }
+
__set_nat_cache_dirty(nm_i, ne);
}
update_nats_in_cursum(journal, -i);
@@ -2192,8 +2240,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
raw_nat_from_node_info(raw_ne, &ne->ni);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
- if (nat_get_blkaddr(ne) == NULL_ADDR)
+ if (nat_get_blkaddr(ne) == NULL_ADDR) {
add_free_nid(sbi, nid, false);
+ spin_lock(&NM_I(sbi)->nid_list_lock);
+ NM_I(sbi)->available_nids++;
+ spin_unlock(&NM_I(sbi)->nid_list_lock);
+ }
}
if (to_journal)
@@ -2268,21 +2320,24 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
/* not used nids: 0, node, meta, (and root counted as valid node) */
- nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
- nm_i->fcnt = 0;
+ nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
+ F2FS_RESERVED_NODE_NUM;
+ nm_i->nid_cnt[FREE_NID_LIST] = 0;
+ nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
- INIT_LIST_HEAD(&nm_i->free_nid_list);
+ INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]);
+ INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
mutex_init(&nm_i->build_lock);
- spin_lock_init(&nm_i->free_nid_list_lock);
+ spin_lock_init(&nm_i->nid_list_lock);
init_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
@@ -2310,7 +2365,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
- build_free_nids(sbi);
+ build_free_nids(sbi, true);
return 0;
}
@@ -2327,17 +2382,18 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
return;
/* destroy free nid list */
- spin_lock(&nm_i->free_nid_list_lock);
- list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
- f2fs_bug_on(sbi, i->state == NID_ALLOC);
- __del_from_free_nid_list(nm_i, i);
- nm_i->fcnt--;
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
+ list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST],
+ list) {
+ __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
+ spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
- spin_lock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
}
- f2fs_bug_on(sbi, nm_i->fcnt);
- spin_unlock(&nm_i->free_nid_list_lock);
+ f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]);
+ f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]);
+ f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST]));
+ spin_unlock(&nm_i->nid_list_lock);
/* destroy nat cache */
down_write(&nm_i->nat_tree_lock);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 868bec6..e7997e2 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -169,14 +169,15 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *fnid;
- spin_lock(&nm_i->free_nid_list_lock);
- if (nm_i->fcnt <= 0) {
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
+ if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) {
+ spin_unlock(&nm_i->nid_list_lock);
return;
}
- fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
+ fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next,
+ struct free_nid, list);
*nid = fnid->nid;
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_unlock(&nm_i->nid_list_lock);
}
/*
@@ -313,7 +314,7 @@ static inline bool is_recoverable_dnode(struct page *page)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
- return cpu_to_le64(cp_ver) == cpver_of_node(page);
+ return cp_ver == cpver_of_node(page);
}
/*
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 2fc84a9..981a958 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -180,13 +180,15 @@ static void recover_inode(struct inode *inode, struct page *page)
inode->i_mode = le16_to_cpu(raw->i_mode);
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
- inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
+ inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
- inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
+ inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
+ F2FS_I(inode)->i_advise = raw->i_advise;
+
if (file_enc_name(inode))
name = "<encrypted>";
else
@@ -196,32 +198,6 @@ static void recover_inode(struct inode *inode, struct page *page)
ino_of_node(page), name);
}
-static bool is_same_inode(struct inode *inode, struct page *ipage)
-{
- struct f2fs_inode *ri = F2FS_INODE(ipage);
- struct timespec disk;
-
- if (!IS_INODE(ipage))
- return true;
-
- disk.tv_sec = le64_to_cpu(ri->i_ctime);
- disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
- if (timespec_compare(&inode->i_ctime, &disk) > 0)
- return false;
-
- disk.tv_sec = le64_to_cpu(ri->i_atime);
- disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
- if (timespec_compare(&inode->i_atime, &disk) > 0)
- return false;
-
- disk.tv_sec = le64_to_cpu(ri->i_mtime);
- disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
- if (timespec_compare(&inode->i_mtime, &disk) > 0)
- return false;
-
- return true;
-}
-
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
struct curseg_info *curseg;
@@ -248,10 +224,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
- if (entry) {
- if (!is_same_inode(entry->inode, page))
- goto next;
- } else {
+ if (!entry) {
if (IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
@@ -454,7 +427,8 @@ retry_dn:
continue;
}
- if ((start + 1) << PAGE_SHIFT > i_size_read(inode))
+ if (!file_keep_isize(inode) &&
+ (i_size_read(inode) <= (start << PAGE_SHIFT)))
f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
/*
@@ -507,8 +481,10 @@ err:
f2fs_put_dnode(&dn);
out:
f2fs_msg(sbi->sb, KERN_NOTICE,
- "recover_data: ino = %lx, recovered = %d blocks, err = %d",
- inode->i_ino, recovered, err);
+ "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
+ inode->i_ino,
+ file_keep_isize(inode) ? "keep" : "recover",
+ recovered, err);
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fc886f0..0738f48 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -259,7 +259,7 @@ static int __commit_inmem_pages(struct inode *inode,
.sbi = sbi,
.type = DATA,
.op = REQ_OP_WRITE,
- .op_flags = WRITE_SYNC | REQ_PRIO,
+ .op_flags = REQ_SYNC | REQ_PRIO,
.encrypted_page = NULL,
};
bool submit_bio = false;
@@ -274,8 +274,10 @@ static int __commit_inmem_pages(struct inode *inode,
set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA, true);
- if (clear_page_dirty_for_io(page))
+ if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
+ remove_dirty_inode(inode);
+ }
fio.page = page;
err = do_write_data_page(&fio);
@@ -287,7 +289,6 @@ static int __commit_inmem_pages(struct inode *inode,
/* record old blkaddr for revoking */
cur->old_addr = fio.old_blkaddr;
- clear_cold_data(page);
submit_bio = true;
}
unlock_page(page);
@@ -363,7 +364,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
*/
if (has_not_enough_free_secs(sbi, 0, 0)) {
mutex_lock(&sbi->gc_mutex);
- f2fs_gc(sbi, false);
+ f2fs_gc(sbi, false, false);
}
}
@@ -380,14 +381,17 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (!available_free_memory(sbi, FREE_NIDS))
try_to_free_nids(sbi, MAX_FREE_NIDS);
else
- build_free_nids(sbi);
+ build_free_nids(sbi, false);
+
+ if (!is_idle(sbi))
+ return;
/* checkpoint is the only way to shrink partial cached entries */
if (!available_free_memory(sbi, NAT_ENTRIES) ||
!available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
- (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
+ f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
@@ -400,6 +404,33 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
}
}
+static int __submit_flush_wait(struct block_device *bdev)
+{
+ struct bio *bio = f2fs_bio_alloc(0);
+ int ret;
+
+ bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+ bio->bi_bdev = bdev;
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ return ret;
+}
+
+static int submit_flush_wait(struct f2fs_sb_info *sbi)
+{
+ int ret = __submit_flush_wait(sbi->sb->s_bdev);
+ int i;
+
+ if (sbi->s_ndevs && !ret) {
+ for (i = 1; i < sbi->s_ndevs; i++) {
+ ret = __submit_flush_wait(FDEV(i).bdev);
+ if (ret)
+ break;
+ }
+ }
+ return ret;
+}
+
static int issue_flush_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
@@ -410,25 +441,18 @@ repeat:
return 0;
if (!llist_empty(&fcc->issue_list)) {
- struct bio *bio;
struct flush_cmd *cmd, *next;
int ret;
- bio = f2fs_bio_alloc(0);
-
fcc->dispatch_list = llist_del_all(&fcc->issue_list);
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
- bio->bi_bdev = sbi->sb->s_bdev;
- bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
- ret = submit_bio_wait(bio);
-
+ ret = submit_flush_wait(sbi);
llist_for_each_entry_safe(cmd, next,
fcc->dispatch_list, llnode) {
cmd->ret = ret;
complete(&cmd->wait);
}
- bio_put(bio);
fcc->dispatch_list = NULL;
}
@@ -449,15 +473,11 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
return 0;
if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
- struct bio *bio = f2fs_bio_alloc(0);
int ret;
atomic_inc(&fcc->submit_flush);
- bio->bi_bdev = sbi->sb->s_bdev;
- bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
- ret = submit_bio_wait(bio);
+ ret = submit_flush_wait(sbi);
atomic_dec(&fcc->submit_flush);
- bio_put(bio);
return ret;
}
@@ -469,8 +489,13 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
if (!fcc->dispatch_list)
wake_up(&fcc->flush_wait_queue);
- wait_for_completion(&cmd.wait);
- atomic_dec(&fcc->submit_flush);
+ if (fcc->f2fs_issue_flush) {
+ wait_for_completion(&cmd.wait);
+ atomic_dec(&fcc->submit_flush);
+ } else {
+ llist_del_all(&fcc->issue_list);
+ atomic_set(&fcc->submit_flush, 0);
+ }
return cmd.ret;
}
@@ -481,6 +506,11 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
struct flush_cmd_control *fcc;
int err = 0;
+ if (SM_I(sbi)->cmd_control_info) {
+ fcc = SM_I(sbi)->cmd_control_info;
+ goto init_thread;
+ }
+
fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
if (!fcc)
return -ENOMEM;
@@ -488,6 +518,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
init_waitqueue_head(&fcc->flush_wait_queue);
init_llist_head(&fcc->issue_list);
SM_I(sbi)->cmd_control_info = fcc;
+init_thread:
fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
@@ -500,14 +531,20 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
return err;
}
-void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
{
struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
- if (fcc && fcc->f2fs_issue_flush)
- kthread_stop(fcc->f2fs_issue_flush);
- kfree(fcc);
- SM_I(sbi)->cmd_control_info = NULL;
+ if (fcc && fcc->f2fs_issue_flush) {
+ struct task_struct *flush_thread = fcc->f2fs_issue_flush;
+
+ fcc->f2fs_issue_flush = NULL;
+ kthread_stop(flush_thread);
+ }
+ if (free) {
+ kfree(fcc);
+ SM_I(sbi)->cmd_control_info = NULL;
+ }
}
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -633,15 +670,23 @@ static void f2fs_submit_bio_wait_endio(struct bio *bio)
}
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
-int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t blkstart, block_t blklen)
{
- struct block_device *bdev = sbi->sb->s_bdev;
struct bio *bio = NULL;
int err;
- err = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
- &bio);
+ trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+
+ if (sbi->s_ndevs) {
+ int devi = f2fs_target_device_index(sbi, blkstart);
+
+ blkstart -= FDEV(devi).start_blk;
+ }
+ err = __blkdev_issue_discard(bdev,
+ SECTOR_FROM_BLOCK(blkstart),
+ SECTOR_FROM_BLOCK(blklen),
+ GFP_NOFS, 0, &bio);
if (!err && bio) {
struct bio_entry *be = __add_bio_entry(sbi, bio);
@@ -654,24 +699,101 @@ int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector,
return err;
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t blkstart, block_t blklen)
+{
+ sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
+ sector_t sector;
+ int devi = 0;
+
+ if (sbi->s_ndevs) {
+ devi = f2fs_target_device_index(sbi, blkstart);
+ blkstart -= FDEV(devi).start_blk;
+ }
+ sector = SECTOR_FROM_BLOCK(blkstart);
+
+ if (sector & (bdev_zone_size(bdev) - 1) ||
+ nr_sects != bdev_zone_size(bdev)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "(%d) %s: Unaligned discard attempted (block %x + %x)",
+ devi, sbi->s_ndevs ? FDEV(devi).path: "",
+ blkstart, blklen);
+ return -EIO;
+ }
+
+ /*
+ * We need to know the type of the zone: for conventional zones,
+ * use regular discard if the drive supports it. For sequential
+ * zones, reset the zone write pointer.
+ */
+ switch (get_blkz_type(sbi, bdev, blkstart)) {
+
+ case BLK_ZONE_TYPE_CONVENTIONAL:
+ if (!blk_queue_discard(bdev_get_queue(bdev)))
+ return 0;
+ return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
+ case BLK_ZONE_TYPE_SEQWRITE_REQ:
+ case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ trace_f2fs_issue_reset_zone(sbi->sb, blkstart);
+ return blkdev_reset_zones(bdev, sector,
+ nr_sects, GFP_NOFS);
+ default:
+ /* Unknown zone type: broken device ? */
+ return -EIO;
+ }
+}
+#endif
+
+static int __issue_discard_async(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t blkstart, block_t blklen)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
+ bdev_zoned_model(bdev) != BLK_ZONED_NONE)
+ return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
+#endif
+ return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
+}
+
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
block_t blkstart, block_t blklen)
{
- sector_t start = SECTOR_FROM_BLOCK(blkstart);
- sector_t len = SECTOR_FROM_BLOCK(blklen);
+ sector_t start = blkstart, len = 0;
+ struct block_device *bdev;
struct seg_entry *se;
unsigned int offset;
block_t i;
+ int err = 0;
+
+ bdev = f2fs_target_device(sbi, blkstart, NULL);
+
+ for (i = blkstart; i < blkstart + blklen; i++, len++) {
+ if (i != start) {
+ struct block_device *bdev2 =
+ f2fs_target_device(sbi, i, NULL);
+
+ if (bdev2 != bdev) {
+ err = __issue_discard_async(sbi, bdev,
+ start, len);
+ if (err)
+ return err;
+ bdev = bdev2;
+ start = i;
+ len = 0;
+ }
+ }
- for (i = blkstart; i < blkstart + blklen; i++) {
se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
offset = GET_BLKOFF_FROM_SEG0(sbi, i);
if (!f2fs_test_and_set_bit(offset, se->discard_map))
sbi->discard_blks--;
}
- trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
- return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0);
+
+ if (len)
+ err = __issue_discard_async(sbi, bdev, start, len);
+ return err;
}
static void __add_discard_entry(struct f2fs_sb_info *sbi,
@@ -1296,25 +1418,21 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
-static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
-{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
- unsigned int old_segno;
-
- old_segno = curseg->segno;
- SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
- locate_dirty_segment(sbi, old_segno);
-}
-
void allocate_new_segments(struct f2fs_sb_info *sbi)
{
+ struct curseg_info *curseg;
+ unsigned int old_segno;
int i;
if (test_opt(sbi, LFS))
return;
- for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
- __allocate_new_segments(sbi, i);
+ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+ curseg = CURSEG_I(sbi, i);
+ old_segno = curseg->segno;
+ SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
+ locate_dirty_segment(sbi, old_segno);
+ }
}
static const struct segment_allocation default_salloc_ops = {
@@ -1448,21 +1566,11 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_summary *sum, int type)
{
struct sit_info *sit_i = SIT_I(sbi);
- struct curseg_info *curseg;
- bool direct_io = (type == CURSEG_DIRECT_IO);
-
- type = direct_io ? CURSEG_WARM_DATA : type;
-
- curseg = CURSEG_I(sbi, type);
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
mutex_lock(&sit_i->sentry_lock);
- /* direct_io'ed data is aligned to the segment for better performance */
- if (direct_io && curseg->next_blkoff &&
- !has_not_enough_free_secs(sbi, 0, 0))
- __allocate_new_segments(sbi, type);
-
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/*
@@ -1515,7 +1623,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
.sbi = sbi,
.type = META,
.op = REQ_OP_WRITE,
- .op_flags = WRITE_SYNC | REQ_META | REQ_PRIO,
+ .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
.old_blkaddr = page->index,
.new_blkaddr = page->index,
.page = page,
@@ -2166,7 +2274,6 @@ out:
static int build_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
- struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
char *src_bitmap, *dst_bitmap;
@@ -2233,7 +2340,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
- sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
+ sit_i->written_valid_blocks = 0;
sit_i->sit_bitmap = dst_bitmap;
sit_i->bitmap_size = bitmap_size;
sit_i->dirty_sentries = 0;
@@ -2315,10 +2422,10 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
int sit_blk_cnt = SIT_BLK_CNT(sbi);
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
- int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
do {
- readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
+ readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
+ META_SIT, true);
start = start_blk * sit_i->sents_per_block;
end = (start_blk + readed) * sit_i->sents_per_block;
@@ -2387,6 +2494,9 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
struct seg_entry *sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
+ else
+ SIT_I(sbi)->written_valid_blocks +=
+ sentry->valid_blocks;
}
/* set use the current segments */
@@ -2645,7 +2755,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
if (!sm_info)
return;
- destroy_flush_cmd_control(sbi);
+ destroy_flush_cmd_control(sbi, true);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index fecb856..9d44ce8 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -18,6 +18,8 @@
#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
+#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
+
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
@@ -102,8 +104,6 @@
(((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
#define SECTOR_TO_BLOCK(sectors) \
(sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
-#define MAX_BIO_BLOCKS(sbi) \
- ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
/*
* indicate a block allocation direction: RIGHT and LEFT.
@@ -471,11 +471,12 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
+ int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (test_opt(sbi, LFS))
return false;
- return free_sections(sbi) <= (node_secs + 2 * dent_secs +
+ return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + 1);
}
@@ -484,14 +485,14 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
-
- node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
+ int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
return (free_sections(sbi) + freed) <=
- (node_secs + 2 * dent_secs + reserved_sections(sbi) + needed);
+ (node_secs + 2 * dent_secs + imeta_secs +
+ reserved_sections(sbi) + needed);
}
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
@@ -695,13 +696,6 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
return false;
}
-static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
-{
- struct block_device *bdev = sbi->sb->s_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
- return SECTOR_TO_BLOCK(queue_max_sectors(q));
-}
-
/*
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
@@ -719,7 +713,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
else if (type == NODE)
return 8 * sbi->blocks_per_seg;
else if (type == META)
- return 8 * MAX_BIO_BLOCKS(sbi);
+ return 8 * BIO_MAX_PAGES;
else
return 0;
}
@@ -736,11 +730,9 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
return 0;
nr_to_write = wbc->nr_to_write;
-
+ desired = BIO_MAX_PAGES;
if (type == NODE)
- desired = 2 * max_hw_blocks(sbi);
- else
- desired = MAX_BIO_BLOCKS(sbi);
+ desired <<= 1;
wbc->nr_to_write = desired;
return desired - nr_to_write;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 46c9154..5c60fc2 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -21,14 +21,16 @@ static unsigned int shrinker_run_no;
static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
{
- return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
+ long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
+
+ return count > 0 ? count : 0;
}
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
{
- if (NM_I(sbi)->fcnt > MAX_FREE_NIDS)
- return NM_I(sbi)->fcnt - MAX_FREE_NIDS;
- return 0;
+ long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS;
+
+ return count > 0 ? count : 0;
}
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 6132b4c..702638e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -412,14 +412,20 @@ static int parse_options(struct super_block *sb, char *options)
q = bdev_get_queue(sb->s_bdev);
if (blk_queue_discard(q)) {
set_opt(sbi, DISCARD);
- } else {
+ } else if (!f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"mounting with \"discard\" option, but "
"the device does not support discard");
}
break;
case Opt_nodiscard:
+ if (f2fs_sb_mounted_blkzoned(sb)) {
+ f2fs_msg(sb, KERN_WARNING,
+ "discard is required for zoned block devices");
+ return -EINVAL;
+ }
clear_opt(sbi, DISCARD);
+ break;
case Opt_noheap:
set_opt(sbi, NOHEAP);
break;
@@ -512,6 +518,13 @@ static int parse_options(struct super_block *sb, char *options)
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
+ if (f2fs_sb_mounted_blkzoned(sb)) {
+ f2fs_msg(sb, KERN_WARNING,
+ "adaptive mode is not allowed with "
+ "zoned block device feature");
+ kfree(name);
+ return -EINVAL;
+ }
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
} else if (strlen(name) == 3 &&
!strncmp(name, "lfs", 3)) {
@@ -558,13 +571,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
init_once((void *) fi);
- if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) {
- kmem_cache_free(f2fs_inode_cachep, fi);
- return NULL;
- }
-
/* Initialize f2fs-specific inode info */
fi->vfs_inode.i_version = 1;
+ atomic_set(&fi->dirty_pages, 0);
fi->i_current_depth = 1;
fi->i_advise = 0;
init_rwsem(&fi->i_sem);
@@ -620,24 +629,25 @@ static int f2fs_drop_inode(struct inode *inode)
return generic_drop_inode(inode);
}
-int f2fs_inode_dirtied(struct inode *inode)
+int f2fs_inode_dirtied(struct inode *inode, bool sync)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int ret = 0;
spin_lock(&sbi->inode_lock[DIRTY_META]);
if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
- spin_unlock(&sbi->inode_lock[DIRTY_META]);
- return 1;
+ ret = 1;
+ } else {
+ set_inode_flag(inode, FI_DIRTY_INODE);
+ stat_inc_dirty_inode(sbi, DIRTY_META);
}
-
- set_inode_flag(inode, FI_DIRTY_INODE);
- list_add_tail(&F2FS_I(inode)->gdirty_list,
+ if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
+ list_add_tail(&F2FS_I(inode)->gdirty_list,
&sbi->inode_list[DIRTY_META]);
- inc_page_count(sbi, F2FS_DIRTY_IMETA);
- stat_inc_dirty_inode(sbi, DIRTY_META);
+ inc_page_count(sbi, F2FS_DIRTY_IMETA);
+ }
spin_unlock(&sbi->inode_lock[DIRTY_META]);
-
- return 0;
+ return ret;
}
void f2fs_inode_synced(struct inode *inode)
@@ -649,10 +659,12 @@ void f2fs_inode_synced(struct inode *inode)
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return;
}
- list_del_init(&F2FS_I(inode)->gdirty_list);
+ if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
+ list_del_init(&F2FS_I(inode)->gdirty_list);
+ dec_page_count(sbi, F2FS_DIRTY_IMETA);
+ }
clear_inode_flag(inode, FI_DIRTY_INODE);
clear_inode_flag(inode, FI_AUTO_RECOVER);
- dec_page_count(sbi, F2FS_DIRTY_IMETA);
stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
}
@@ -676,7 +688,7 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
clear_inode_flag(inode, FI_AUTO_RECOVER);
- f2fs_inode_dirtied(inode);
+ f2fs_inode_dirtied(inode, false);
}
static void f2fs_i_callback(struct rcu_head *head)
@@ -687,20 +699,28 @@ static void f2fs_i_callback(struct rcu_head *head)
static void f2fs_destroy_inode(struct inode *inode)
{
- percpu_counter_destroy(&F2FS_I(inode)->dirty_pages);
call_rcu(&inode->i_rcu, f2fs_i_callback);
}
static void destroy_percpu_info(struct f2fs_sb_info *sbi)
{
- int i;
-
- for (i = 0; i < NR_COUNT_TYPE; i++)
- percpu_counter_destroy(&sbi->nr_pages[i]);
percpu_counter_destroy(&sbi->alloc_valid_block_count);
percpu_counter_destroy(&sbi->total_valid_inode_count);
}
+static void destroy_device_list(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ blkdev_put(FDEV(i).bdev, FMODE_EXCL);
+#ifdef CONFIG_BLK_DEV_ZONED
+ kfree(FDEV(i).blkz_type);
+#endif
+ }
+ kfree(sbi->devs);
+}
+
static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -738,7 +758,6 @@ static void f2fs_put_super(struct super_block *sb)
* In addition, EIO will skip do checkpoint, we need this as well.
*/
release_ino_entry(sbi, true);
- release_discard_addrs(sbi);
f2fs_leave_shrinker(sbi);
mutex_unlock(&sbi->umount_mutex);
@@ -762,6 +781,8 @@ static void f2fs_put_super(struct super_block *sb)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->raw_super);
+ destroy_device_list(sbi);
+
destroy_percpu_info(sbi);
kfree(sbi);
}
@@ -789,13 +810,17 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
static int f2fs_freeze(struct super_block *sb)
{
- int err;
-
if (f2fs_readonly(sb))
return 0;
- err = f2fs_sync_fs(sb, 1);
- return err;
+ /* IO error happened before */
+ if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
+ return -EIO;
+
+ /* must be clean, since sync_filesystem() was already called */
+ if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
+ return -EINVAL;
+ return 0;
}
static int f2fs_unfreeze(struct super_block *sb)
@@ -822,7 +847,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = user_block_count - valid_user_blocks(sbi);
buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
- buf->f_ffree = buf->f_files - valid_inode_count(sbi);
+ buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
+ buf->f_bavail);
buf->f_namelen = F2FS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
@@ -974,7 +1000,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, EXTENT_CACHE);
sbi->sb->s_flags |= MS_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- if (f2fs_sb_mounted_hmsmr(sbi->sb)) {
+ if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
set_opt(sbi, DISCARD);
} else {
@@ -1076,8 +1102,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* or if flush_merge is not passed in mount option.
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
- destroy_flush_cmd_control(sbi);
- } else if (!SM_I(sbi)->cmd_control_info) {
+ clear_opt(sbi, FLUSH_MERGE);
+ destroy_flush_cmd_control(sbi, false);
+ } else {
err = create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
@@ -1238,7 +1265,7 @@ static int __f2fs_commit_super(struct buffer_head *bh,
unlock_buffer(bh);
/* it's rare case, we can do fua all the time */
- return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
+ return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA);
}
static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
@@ -1426,6 +1453,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ unsigned int ovp_segments, reserved_segments;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -1437,6 +1465,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta >= total))
return 1;
+ ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
+ reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
+
+ if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
+ ovp_segments == 0 || reserved_segments == 0)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong layout: check mkfs.f2fs version");
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
@@ -1447,6 +1485,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
static void init_sb_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = sbi->raw_super;
+ int i;
sbi->log_sectors_per_block =
le32_to_cpu(raw_super->log_sectors_per_block);
@@ -1471,6 +1510,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
+ for (i = 0; i < NR_COUNT_TYPE; i++)
+ atomic_set(&sbi->nr_pages[i], 0);
+
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
mutex_init(&sbi->wio_mutex[NODE]);
@@ -1486,13 +1528,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
static int init_percpu_info(struct f2fs_sb_info *sbi)
{
- int i, err;
-
- for (i = 0; i < NR_COUNT_TYPE; i++) {
- err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
- if (err)
- return err;
- }
+ int err;
err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
if (err)
@@ -1502,6 +1538,71 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
GFP_KERNEL);
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
+{
+ struct block_device *bdev = FDEV(devi).bdev;
+ sector_t nr_sectors = bdev->bd_part->nr_sects;
+ sector_t sector = 0;
+ struct blk_zone *zones;
+ unsigned int i, nr_zones;
+ unsigned int n = 0;
+ int err = -EIO;
+
+ if (!f2fs_sb_mounted_blkzoned(sbi->sb))
+ return 0;
+
+ if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
+ SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
+ return -EINVAL;
+ sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
+ if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
+ __ilog2_u32(sbi->blocks_per_blkz))
+ return -EINVAL;
+ sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
+ FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
+ sbi->log_blocks_per_blkz;
+ if (nr_sectors & (bdev_zone_size(bdev) - 1))
+ FDEV(devi).nr_blkz++;
+
+ FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
+ if (!FDEV(devi).blkz_type)
+ return -ENOMEM;
+
+#define F2FS_REPORT_NR_ZONES 4096
+
+ zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
+ GFP_KERNEL);
+ if (!zones)
+ return -ENOMEM;
+
+ /* Get block zones type */
+ while (zones && sector < nr_sectors) {
+
+ nr_zones = F2FS_REPORT_NR_ZONES;
+ err = blkdev_report_zones(bdev, sector,
+ zones, &nr_zones,
+ GFP_KERNEL);
+ if (err)
+ break;
+ if (!nr_zones) {
+ err = -EIO;
+ break;
+ }
+
+ for (i = 0; i < nr_zones; i++) {
+ FDEV(devi).blkz_type[n] = zones[i].type;
+ sector += zones[i].len;
+ n++;
+ }
+ }
+
+ kfree(zones);
+
+ return err;
+}
+#endif
+
/*
* Read f2fs raw super block.
* Because we have two copies of super block, so read both of them
@@ -1594,6 +1695,77 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
return err;
}
+static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ int i;
+
+ for (i = 0; i < MAX_DEVICES; i++) {
+ if (!RDEV(i).path[0])
+ return 0;
+
+ if (i == 0) {
+ sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
+ MAX_DEVICES, GFP_KERNEL);
+ if (!sbi->devs)
+ return -ENOMEM;
+ }
+
+ memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
+ FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
+ if (i == 0) {
+ FDEV(i).start_blk = 0;
+ FDEV(i).end_blk = FDEV(i).start_blk +
+ (FDEV(i).total_segments <<
+ sbi->log_blocks_per_seg) - 1 +
+ le32_to_cpu(raw_super->segment0_blkaddr);
+ } else {
+ FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
+ FDEV(i).end_blk = FDEV(i).start_blk +
+ (FDEV(i).total_segments <<
+ sbi->log_blocks_per_seg) - 1;
+ }
+
+ FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
+ sbi->sb->s_mode, sbi->sb->s_type);
+ if (IS_ERR(FDEV(i).bdev))
+ return PTR_ERR(FDEV(i).bdev);
+
+ /* to release errored devices */
+ sbi->s_ndevs = i + 1;
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
+ !f2fs_sb_mounted_blkzoned(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Zoned block device feature not enabled\n");
+ return -EINVAL;
+ }
+ if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
+ if (init_blkz_info(sbi, i)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Failed to initialize F2FS blkzone information");
+ return -EINVAL;
+ }
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk,
+ bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
+ "Host-aware" : "Host-managed");
+ continue;
+ }
+#endif
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Mount Device [%2d]: %20s, %8u, %8x - %8x",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk);
+ }
+ return 0;
+}
+
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
@@ -1641,6 +1813,18 @@ try_onemore:
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
+ /*
+ * The BLKZONED feature indicates that the drive was formatted with
+ * zone alignment optimization. This is optional for host-aware
+ * devices, but mandatory for host-managed zoned block devices.
+ */
+#ifndef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_mounted_blkzoned(sb)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Zoned block device support is not enabled\n");
+ goto free_sb_buf;
+ }
+#endif
default_options(sbi);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
@@ -1710,6 +1894,13 @@ try_onemore:
goto free_meta_inode;
}
+ /* Initialize device list */
+ err = f2fs_scan_devices(sbi);
+ if (err) {
+ f2fs_msg(sb, KERN_ERR, "Failed to find devices");
+ goto free_devices;
+ }
+
sbi->total_valid_node_count =
le32_to_cpu(sbi->ckpt->valid_node_count);
percpu_counter_set(&sbi->total_valid_inode_count,
@@ -1893,12 +2084,21 @@ free_node_inode:
mutex_lock(&sbi->umount_mutex);
release_ino_entry(sbi, true);
f2fs_leave_shrinker(sbi);
+ /*
+ * Some dirty meta pages can be produced by recover_orphan_inodes()
+ * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
+ * followed by write_checkpoint() through f2fs_write_node_pages(), which
+ * falls into an infinite loop in sync_meta_pages().
+ */
+ truncate_inode_pages_final(META_MAPPING(sbi));
iput(sbi->node_inode);
mutex_unlock(&sbi->umount_mutex);
free_nm:
destroy_node_manager(sbi);
free_sm:
destroy_segment_manager(sbi);
+free_devices:
+ destroy_device_list(sbi);
kfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
@@ -2044,3 +2244,4 @@ module_exit(exit_f2fs_fs)
MODULE_AUTHOR("Samsung Electronics's Praesto Team");
MODULE_DESCRIPTION("Flash Friendly File System");
MODULE_LICENSE("GPL");
+
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 3e1c028..c47ce2f 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
@@ -554,7 +554,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (index == F2FS_XATTR_INDEX_ENCRYPTION &&
!strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
f2fs_set_encrypted_inode(inode);
- f2fs_mark_inode_dirty_sync(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05713a5..ef60059 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1769,15 +1769,13 @@ static long wb_writeback(struct bdi_writeback *wb,
* become available for writeback. Otherwise
* we'll just busyloop.
*/
- if (!list_empty(&wb->b_more_io)) {
- trace_writeback_wait(wb, work);
- inode = wb_inode(wb->b_more_io.prev);
- spin_lock(&inode->i_lock);
- spin_unlock(&wb->list_lock);
- /* This function drops i_lock... */
- inode_sleep_on_writeback(inode);
- spin_lock(&wb->list_lock);
- }
+ trace_writeback_wait(wb, work);
+ inode = wb_inode(wb->b_more_io.prev);
+ spin_lock(&inode->i_lock);
+ spin_unlock(&wb->list_lock);
+ /* This function drops i_lock... */
+ inode_sleep_on_writeback(inode);
+ spin_lock(&wb->list_lock);
}
spin_unlock(&wb->list_lock);
blk_finish_plug(&plug);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 6a4d0e5..096f799 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -286,6 +286,11 @@ const struct dentry_operations fuse_dentry_operations = {
.d_release = fuse_dentry_release,
};
+const struct dentry_operations fuse_root_dentry_operations = {
+ .d_init = fuse_dentry_init,
+ .d_release = fuse_dentry_release,
+};
+
int fuse_valid_type(int m)
{
return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
@@ -1734,8 +1739,6 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
* This should be done on write(), truncate() and chown().
*/
if (!fc->handle_killpriv) {
- int kill;
-
/*
* ia_mode calculation may have used stale i_mode.
* Refresh and recalculate.
@@ -1745,12 +1748,11 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
return ret;
attr->ia_mode = inode->i_mode;
- kill = should_remove_suid(entry);
- if (kill & ATTR_KILL_SUID) {
+ if (inode->i_mode & S_ISUID) {
attr->ia_valid |= ATTR_MODE;
attr->ia_mode &= ~S_ISUID;
}
- if (kill & ATTR_KILL_SGID) {
+ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
attr->ia_valid |= ATTR_MODE;
attr->ia_mode &= ~S_ISGID;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index abc66a6..2401c5d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1985,6 +1985,10 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
{
struct inode *inode = page->mapping->host;
+ /* Haven't copied anything? Skip zeroing, size extending, dirtying. */
+ if (!copied)
+ goto unlock;
+
if (!PageUptodate(page)) {
/* Zero any unwritten bytes at the end of the page */
size_t endoff = (pos + copied) & ~PAGE_MASK;
@@ -1995,6 +1999,8 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
fuse_write_update_size(inode, pos + copied);
set_page_dirty(page);
+
+unlock:
unlock_page(page);
put_page(page);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 0dfbb13..9130794 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -692,6 +692,7 @@ static inline u64 get_node_id(struct inode *inode)
extern const struct file_operations fuse_dev_operations;
extern const struct dentry_operations fuse_dentry_operations;
+extern const struct dentry_operations fuse_root_dentry_operations;
/**
* Inode to nodeid comparison.
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1714109..6fe6a88 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1131,10 +1131,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
err = -ENOMEM;
root = fuse_get_root_inode(sb, d.rootmode);
+ sb->s_d_op = &fuse_root_dentry_operations;
root_dentry = d_make_root(root);
if (!root_dentry)
goto err_dev_free;
- /* only now - we want root dentry with NULL ->d_op */
+ /* Root dentry doesn't have .d_revalidate */
sb->s_d_op = &fuse_dentry_operations;
init_req = fuse_request_alloc(0);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 3cdde5f..7911321 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -62,6 +62,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/vmalloc.h>
+#include <linux/bio.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e58ccef0..27c00a1 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -657,7 +657,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
- int op_flags = WRITE_FLUSH_FUA | REQ_META;
+ int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META;
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
lh = page_address(page);
@@ -682,7 +682,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
gfs2_ordered_wait(sdp);
log_flush_wait(sdp);
- op_flags = WRITE_SYNC | REQ_META | REQ_PRIO;
+ op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
}
sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 49d5a1b..b1f9144 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -231,7 +231,7 @@ static void gfs2_end_log_write(struct bio *bio)
* gfs2_log_flush_bio - Submit any pending log bio
* @sdp: The superblock
* @op: REQ_OP
- * @op_flags: rq_flag_bits
+ * @op_flags: req_flag_bits
*
* Submit any pending part-built or full bio to the block device. If
* there is no pending bio, then this is a no-op.
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 373639a5..49db8ef 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
{
struct buffer_head *bh, *head;
int nr_underway = 0;
- int write_flags = REQ_META | REQ_PRIO |
- (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
+ int write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc);
BUG_ON(!PageLocked(page));
BUG_ON(!page_has_buffers(page));
@@ -285,7 +284,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
}
}
- gfs2_submit_bhs(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
+ gfs2_submit_bhs(REQ_OP_READ, REQ_META | REQ_PRIO, bhs, num);
if (!(flags & DIO_WAIT))
return 0;
@@ -453,7 +452,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh))
- ll_rw_block(REQ_OP_READ, READ_SYNC | REQ_META, 1, &first_bh);
+ ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh);
dblock++;
extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ff72ac6..a34308d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -246,7 +246,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
bio->bi_end_io = end_bio_io_page;
bio->bi_private = page;
- bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC | REQ_META);
+ bio_set_op_attrs(bio, REQ_OP_READ, REQ_META);
submit_bio(bio);
wait_on_page_locked(page);
bio_put(bio);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 11854dd..67aedf4 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -221,7 +221,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
error2 = hfsplus_submit_bio(sb,
sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
sbi->s_vhdr_buf, NULL, REQ_OP_WRITE,
- WRITE_SYNC);
+ REQ_SYNC);
if (!error)
error = error2;
if (!write_backup)
@@ -230,7 +230,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
error2 = hfsplus_submit_bio(sb,
sbi->part_start + sbi->sect_count - 2,
sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE,
- WRITE_SYNC);
+ REQ_SYNC);
if (!error)
error2 = error;
out:
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 44af14b..9bb2fe3 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/bio.h>
#include <linux/vmalloc.h>
#include <linux/zlib.h>
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 98b3eb7..0ec1373 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -377,9 +377,9 @@ repeat:
{
int p;
for (p = 0; p < rr->u.ER.len_id; p++)
- printk("%c", rr->u.ER.data[p]);
+ printk(KERN_CONT "%c", rr->u.ER.data[p]);
}
- printk("\n");
+ printk(KERN_CONT "\n");
break;
case SIG('P', 'X'):
inode->i_mode = isonum_733(rr->u.PX.mode);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 684996c..4055f51 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -186,7 +186,7 @@ __flush_batch(journal_t *journal, int *batch_count)
blk_start_plug(&plug);
for (i = 0; i < *batch_count; i++)
- write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC);
+ write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
blk_finish_plug(&plug);
for (i = 0; i < *batch_count; i++) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 31f8ca0..8c51436 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -155,9 +155,10 @@ static int journal_submit_commit_record(journal_t *journal,
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_has_feature_async_commit(journal))
- ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC | WRITE_FLUSH_FUA, bh);
+ ret = submit_bh(REQ_OP_WRITE,
+ REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh);
else
- ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
+ ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
*cbh = bh;
return ret;
@@ -402,7 +403,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd2_journal_update_sb_log_tail(journal,
journal->j_tail_sequence,
journal->j_tail,
- WRITE_SYNC);
+ REQ_SYNC);
mutex_unlock(&journal->j_checkpoint_mutex);
} else {
jbd_debug(3, "superblock not updated\n");
@@ -717,7 +718,7 @@ start_journal_io:
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
- submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
+ submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
}
cond_resched();
stats.run.rs_blocks_logged += bufs;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 927da49..8ed971e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -913,7 +913,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
* space and if we lose sb update during power failure we'd replay
* old transaction with possibly newly overwritten data.
*/
- ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+ ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA);
if (ret)
goto out;
@@ -1306,7 +1306,7 @@ static int journal_reset(journal_t *journal)
/* Lock here to make assertions happy... */
mutex_lock(&journal->j_checkpoint_mutex);
/*
- * Update log tail information. We use WRITE_FUA since new
+ * Update log tail information. We use REQ_FUA since new
* transaction will start reusing journal space and so we
* must make sure information about current log tail is on
* disk before that.
@@ -1314,7 +1314,7 @@ static int journal_reset(journal_t *journal)
jbd2_journal_update_sb_log_tail(journal,
journal->j_tail_sequence,
journal->j_tail,
- WRITE_FUA);
+ REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
return jbd2_journal_start_thread(journal);
@@ -1454,7 +1454,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
sb->s_errno = cpu_to_be32(journal->j_errno);
read_unlock(&journal->j_state_lock);
- jbd2_write_superblock(journal, WRITE_FUA);
+ jbd2_write_superblock(journal, REQ_FUA);
}
EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
@@ -1720,7 +1720,8 @@ int jbd2_journal_destroy(journal_t *journal)
++journal->j_transaction_sequence;
write_unlock(&journal->j_state_lock);
- jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
+ jbd2_mark_journal_empty(journal,
+ REQ_PREFLUSH | REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
} else
err = -EIO;
@@ -1979,7 +1980,7 @@ int jbd2_journal_flush(journal_t *journal)
* the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current
* s_start value. */
- jbd2_mark_journal_empty(journal, WRITE_FUA);
+ jbd2_mark_journal_empty(journal, REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
write_lock(&journal->j_state_lock);
J_ASSERT(!journal->j_running_transaction);
@@ -2025,7 +2026,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
if (write) {
/* Lock to make assertions happy... */
mutex_lock(&journal->j_checkpoint_mutex);
- jbd2_mark_journal_empty(journal, WRITE_FUA);
+ jbd2_mark_journal_empty(journal, REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 91171dc..cfc38b5 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -648,7 +648,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(descriptor);
BUFFER_TRACE(descriptor, "write");
set_buffer_dirty(descriptor);
- write_dirty_buffer(descriptor, WRITE_SYNC);
+ write_dirty_buffer(descriptor, REQ_SYNC);
}
#endif
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 8653cac..b6fd1ff 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -121,7 +121,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
jfs_set_inode_flags(inode);
inode_unlock(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
setflags_out:
mnt_drop_write_file(filp);
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index a21ea8b..bb1da1f 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2002,7 +2002,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bio->bi_end_io = lbmIODone;
bio->bi_private = bp;
- bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
+ bio->bi_opf = REQ_OP_READ;
/*check if journaling to disk has been disabled*/
if (log->no_integrity) {
bio->bi_iter.bi_size = 0;
@@ -2146,7 +2146,7 @@ static void lbmStartIO(struct lbuf * bp)
bio->bi_end_io = lbmIODone;
bio->bi_private = bp;
- bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
/* check if journaling to disk has been disabled */
if (log->no_integrity) {
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index a198211..ac9e108 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -335,7 +335,7 @@ static int kernfs_xattr_set(const struct xattr_handler *handler,
return simple_xattr_set(&attrs->xattrs, name, value, size, flags);
}
-const struct xattr_handler kernfs_trusted_xattr_handler = {
+static const struct xattr_handler kernfs_trusted_xattr_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.get = kernfs_xattr_get,
.set = kernfs_xattr_set,
@@ -372,7 +372,7 @@ static int kernfs_security_xattr_set(const struct xattr_handler *handler,
return error;
}
-const struct xattr_handler kernfs_security_xattr_handler = {
+static const struct xattr_handler kernfs_security_xattr_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.get = kernfs_xattr_get,
.set = kernfs_security_xattr_set,
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 5426189..fb8cac8 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -15,6 +15,6 @@ struct lockd_net {
struct list_head nsm_handles;
};
-extern int lockd_net_id;
+extern unsigned int lockd_net_id;
#endif
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index fc4084e..1c13dd8 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -57,7 +57,7 @@ static struct task_struct *nlmsvc_task;
static struct svc_rqst *nlmsvc_rqst;
unsigned long nlmsvc_timeout;
-int lockd_net_id;
+unsigned int lockd_net_id;
/*
* These can be set at insmod time (useful for NFS as root filesystem),
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a8329cc..9bfa015 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -19,16 +19,10 @@ static int sync_request(struct page *page, struct block_device *bdev, int op)
struct bio bio;
struct bio_vec bio_vec;
- bio_init(&bio);
- bio.bi_max_vecs = 1;
- bio.bi_io_vec = &bio_vec;
- bio_vec.bv_page = page;
- bio_vec.bv_len = PAGE_SIZE;
- bio_vec.bv_offset = 0;
- bio.bi_vcnt = 1;
+ bio_init(&bio, &bio_vec, 1);
bio.bi_bdev = bdev;
+ bio_add_page(&bio, page, PAGE_SIZE, 0);
bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9);
- bio.bi_iter.bi_size = PAGE_SIZE;
bio_set_op_attrs(&bio, op, 0);
return submit_bio_wait(&bio);
@@ -77,56 +71,45 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
{
struct logfs_super *super = logfs_super(sb);
struct address_space *mapping = super->s_mapping_inode->i_mapping;
- struct bio *bio;
+ struct bio *bio = NULL;
struct page *page;
unsigned int max_pages;
- int i;
+ int i, ret;
max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES);
- bio = bio_alloc(GFP_NOFS, max_pages);
- BUG_ON(!bio);
-
for (i = 0; i < nr_pages; i++) {
- if (i >= max_pages) {
- /* Block layer cannot split bios :( */
- bio->bi_vcnt = i;
- bio->bi_iter.bi_size = i * PAGE_SIZE;
+ if (!bio) {
+ bio = bio_alloc(GFP_NOFS, max_pages);
+ BUG_ON(!bio);
+
bio->bi_bdev = super->s_bdev;
bio->bi_iter.bi_sector = ofs >> 9;
bio->bi_private = sb;
bio->bi_end_io = writeseg_end_io;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- atomic_inc(&super->s_pending_writes);
- submit_bio(bio);
-
- ofs += i * PAGE_SIZE;
- index += i;
- nr_pages -= i;
- i = 0;
-
- bio = bio_alloc(GFP_NOFS, max_pages);
- BUG_ON(!bio);
}
page = find_lock_page(mapping, index + i);
BUG_ON(!page);
- bio->bi_io_vec[i].bv_page = page;
- bio->bi_io_vec[i].bv_len = PAGE_SIZE;
- bio->bi_io_vec[i].bv_offset = 0;
+ ret = bio_add_page(bio, page, PAGE_SIZE, 0);
BUG_ON(PageWriteback(page));
set_page_writeback(page);
unlock_page(page);
+
+ if (!ret) {
+ /* Block layer cannot split bios :( */
+ ofs += bio->bi_iter.bi_size;
+ atomic_inc(&super->s_pending_writes);
+ submit_bio(bio);
+ bio = NULL;
+ }
+ }
+
+ if (bio) {
+ atomic_inc(&super->s_pending_writes);
+ submit_bio(bio);
}
- bio->bi_vcnt = nr_pages;
- bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
- bio->bi_bdev = super->s_bdev;
- bio->bi_iter.bi_sector = ofs >> 9;
- bio->bi_private = sb;
- bio->bi_end_io = writeseg_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- atomic_inc(&super->s_pending_writes);
- submit_bio(bio);
return 0;
}
@@ -160,7 +143,6 @@ static void erase_end_io(struct bio *bio)
struct logfs_super *super = logfs_super(sb);
BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */
- BUG_ON(bio->bi_vcnt == 0);
bio_put(bio);
if (atomic_dec_and_test(&super->s_pending_writes))
wake_up(&wq);
@@ -170,49 +152,35 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
size_t nr_pages)
{
struct logfs_super *super = logfs_super(sb);
- struct bio *bio;
+ struct bio *bio = NULL;
unsigned int max_pages;
- int i;
+ int i, ret;
max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES);
- bio = bio_alloc(GFP_NOFS, max_pages);
- BUG_ON(!bio);
-
for (i = 0; i < nr_pages; i++) {
- if (i >= max_pages) {
- /* Block layer cannot split bios :( */
- bio->bi_vcnt = i;
- bio->bi_iter.bi_size = i * PAGE_SIZE;
+ if (!bio) {
+ bio = bio_alloc(GFP_NOFS, max_pages);
+ BUG_ON(!bio);
+
bio->bi_bdev = super->s_bdev;
bio->bi_iter.bi_sector = ofs >> 9;
bio->bi_private = sb;
bio->bi_end_io = erase_end_io;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ }
+ ret = bio_add_page(bio, super->s_erase_page, PAGE_SIZE, 0);
+ if (!ret) {
+ /* Block layer cannot split bios :( */
+ ofs += bio->bi_iter.bi_size;
atomic_inc(&super->s_pending_writes);
submit_bio(bio);
-
- ofs += i * PAGE_SIZE;
- index += i;
- nr_pages -= i;
- i = 0;
-
- bio = bio_alloc(GFP_NOFS, max_pages);
- BUG_ON(!bio);
}
- bio->bi_io_vec[i].bv_page = super->s_erase_page;
- bio->bi_io_vec[i].bv_len = PAGE_SIZE;
- bio->bi_io_vec[i].bv_offset = 0;
}
- bio->bi_vcnt = nr_pages;
- bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
- bio->bi_bdev = super->s_bdev;
- bio->bi_iter.bi_sector = ofs >> 9;
- bio->bi_private = sb;
- bio->bi_end_io = erase_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- atomic_inc(&super->s_pending_writes);
- submit_bio(bio);
+ if (bio) {
+ atomic_inc(&super->s_pending_writes);
+ submit_bio(bio);
+ }
return 0;
}
diff --git a/fs/mpage.c b/fs/mpage.c
index d2413af..98fc11a 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
- int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
+ int op_flags = wbc_to_write_flags(wbc);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -705,7 +705,7 @@ mpage_writepages(struct address_space *mapping,
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
if (mpd.bio) {
int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
- WRITE_SYNC : 0);
+ REQ_SYNC : 0);
mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
}
}
@@ -726,7 +726,7 @@ int mpage_writepage(struct page *page, get_block_t get_block,
int ret = __mpage_writepage(page, wbc, &mpd);
if (mpd.bio) {
int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
- WRITE_SYNC : 0);
+ REQ_SYNC : 0);
mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
}
return ret;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 532d8e24..484bebc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -197,7 +197,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
}
ret = -EPROTONOSUPPORT;
- if (minorversion == 0)
+ if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0)
ret = nfs4_callback_up_net(serv, net);
else if (xprt->ops->bc_up)
ret = xprt->ops->bc_up(serv, net);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7555ba8..ebecfb8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -314,7 +314,8 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
/* Match the full socket address */
if (!rpc_cmp_addr_port(sap, clap))
/* Match all xprt_switch full socket addresses */
- if (!rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient,
+ if (IS_ERR(clp->cl_rpcclient) ||
+ !rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient,
sap))
continue;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bf4ec5e..ce42dd0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2015,7 +2015,7 @@ static void nfsiod_stop(void)
destroy_workqueue(wq);
}
-int nfs_net_id;
+unsigned int nfs_net_id;
EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index c8162c6..5551e8e 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -98,7 +98,7 @@ rename_retry:
return end;
}
namelen = strlen(base);
- if (flags & NFS_PATH_CANONICAL) {
+ if (*end == '/') {
/* Strip off excess slashes in base string */
while (namelen > 0 && base[namelen - 1] == '/')
namelen--;
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index fbce0d8..5fbd2bd 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -35,6 +35,6 @@ struct nfs_net {
#endif
};
-extern int nfs_net_id;
+extern unsigned int nfs_net_id;
#endif
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9b3a82a..1452177 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
}
+static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state,
+ const nfs4_stateid *stateid)
+{
+ return test_bit(NFS_OPEN_STATE, &state->flags) &&
+ nfs4_stateid_match_other(&state->open_stateid, stateid);
+}
+
#else
#define nfs4_close_state(a, b) do { } while (0)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7897826..241da19 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1451,7 +1451,6 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
}
static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
- nfs4_stateid *arg_stateid,
nfs4_stateid *stateid, fmode_t fmode)
{
clear_bit(NFS_O_RDWR_STATE, &state->flags);
@@ -1469,10 +1468,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
}
if (stateid == NULL)
return;
- /* Handle races with OPEN */
- if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
- (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
- !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
+ /* Handle OPEN+OPEN_DOWNGRADE races */
+ if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+ !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
nfs_resync_open_stateid_locked(state);
return;
}
@@ -1486,7 +1484,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
nfs4_stateid *stateid, fmode_t fmode)
{
write_seqlock(&state->seqlock);
- nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
+ /* Ignore, if the CLOSE argment doesn't match the current stateid */
+ if (nfs4_state_match_open_stateid_other(state, arg_stateid))
+ nfs_clear_open_stateid_locked(state, stateid, fmode);
write_sequnlock(&state->seqlock);
if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
@@ -2564,15 +2564,23 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
static int nfs41_check_expired_locks(struct nfs4_state *state)
{
int status, ret = NFS_OK;
- struct nfs4_lock_state *lsp;
+ struct nfs4_lock_state *lsp, *prev = NULL;
struct nfs_server *server = NFS_SERVER(state->inode);
if (!test_bit(LK_STATE_IN_USE, &state->flags))
goto out;
+
+ spin_lock(&state->state_lock);
list_for_each_entry(lsp, &state->lock_states, ls_locks) {
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
+ atomic_inc(&lsp->ls_count);
+ spin_unlock(&state->state_lock);
+
+ nfs4_put_lock_state(prev);
+ prev = lsp;
+
status = nfs41_test_and_free_expired_stateid(server,
&lsp->ls_stateid,
cred);
@@ -2585,10 +2593,14 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
} else if (status != NFS_OK) {
ret = status;
- break;
+ nfs4_put_lock_state(prev);
+ goto out;
}
+ spin_lock(&state->state_lock);
}
- };
+ }
+ spin_unlock(&state->state_lock);
+ nfs4_put_lock_state(prev);
out:
return ret;
}
@@ -3122,7 +3134,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
} else if (is_rdwr)
calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
- if (!nfs4_valid_open_stateid(state))
+ if (!nfs4_valid_open_stateid(state) ||
+ test_bit(NFS_OPEN_STATE, &state->flags) == 0)
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -5569,6 +5582,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
renew_lease(data->res.server, data->timestamp);
+ break;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_EXPIRED:
@@ -5579,8 +5593,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
task->tk_status = 0;
- if (data->roc)
- pnfs_roc_set_barrier(data->inode, data->roc_barrier);
break;
default:
if (nfs4_async_handle_error(task, data->res.server,
@@ -5590,6 +5602,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
}
}
data->rpc_status = task->tk_status;
+ if (data->roc && data->rpc_status == 0)
+ pnfs_roc_set_barrier(data->inode, data->roc_barrier);
}
static void nfs4_delegreturn_release(void *calldata)
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index b629730..a61350f 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -178,12 +178,14 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid,
__must_hold(&tbl->slot_tbl_lock)
{
struct nfs4_slot *slot;
+ int ret;
slot = nfs4_lookup_slot(tbl, slotid);
- if (IS_ERR(slot))
- return PTR_ERR(slot);
- *seq_nr = slot->seq_nr;
- return 0;
+ ret = PTR_ERR_OR_ZERO(slot);
+ if (!ret)
+ *seq_nr = slot->seq_nr;
+
+ return ret;
}
/*
@@ -196,7 +198,7 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table *tbl, u32 slotid,
static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl,
u32 slotid, u32 seq_nr)
{
- u32 cur_seq;
+ u32 cur_seq = 0;
bool ret = false;
spin_lock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5f4281e..0959c96 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1547,6 +1547,7 @@ restart:
ssleep(1);
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_RECLAIM_BAD:
case -NFS4ERR_RECLAIM_CONFLICT:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 56b2d96..259ef85 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -146,6 +146,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
u32 id;
int i;
+ if (fsinfo->nlayouttypes == 0)
+ goto out_no_driver;
if (!(server->nfs_client->cl_exchange_flags &
(EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index fd8c9a5..420d3a0 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -9,7 +9,7 @@
#include <net/netns/generic.h>
#include <linux/fs.h>
-static int grace_net_id;
+static unsigned int grace_net_id;
static DEFINE_SPINLOCK(grace_lock);
/**
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index ee36efd..3714231 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -124,5 +124,5 @@ struct nfsd_net {
/* Simple check to find out if a given net was properly initialized */
#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
-extern int nfsd_net_id;
+extern unsigned int nfsd_net_id;
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 36b2af9..2857e46 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1201,7 +1201,7 @@ static int create_proc_exports_entry(void)
}
#endif
-int nfsd_net_id;
+unsigned int nfsd_net_id;
static __net_init int nfsd_init_net(struct net *net)
{
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index c95d369..12eeae6 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -189,7 +189,7 @@ static int nilfs_sync_super(struct super_block *sb, int flag)
set_buffer_dirty(nilfs->ns_sbh[0]);
if (nilfs_test_opt(nilfs, BARRIER)) {
err = __sync_dirty_buffer(nilfs->ns_sbh[0],
- WRITE_SYNC | WRITE_FLUSH_FUA);
+ REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
} else {
err = sync_dirty_buffer(nilfs->ns_sbh[0]);
}
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8718af8..8c9fb29 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -118,7 +118,7 @@ again:
return ret;
}
-static int open_related_ns(struct ns_common *ns,
+int open_related_ns(struct ns_common *ns,
struct ns_common *(*get_ns)(struct ns_common *ns))
{
struct path path = {};
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index fe251f1..d0cf6fe 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -29,6 +29,7 @@
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/bit_spinlock.h>
+#include <linux/bio.h>
#include "aops.h"
#include "attrib.h"
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index a186135..0ee19ec 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1544,8 +1544,6 @@ const struct file_operations ntfs_dir_ops = {
.iterate = ntfs_readdir, /* Read directory contents. */
#ifdef NTFS_RW
.fsync = ntfs_dir_fsync, /* Sync a directory to disk. */
- /*.aio_fsync = ,*/ /* Sync all outstanding async
- i/o operations on a kiocb. */
#endif /* NTFS_RW */
/*.ioctl = ,*/ /* Perform function on the
mounted filesystem. */
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 761f12f..353379f 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -27,6 +27,7 @@
#include <linux/buffer_head.h>
#include <linux/bitops.h>
#include <linux/log2.h>
+#include <linux/bio.h>
#include "attrib.h"
#include "aops.h"
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index d3c0096..b6f4021 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -23,6 +23,7 @@
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/swap.h>
+#include <linux/bio.h>
#include "attrib.h"
#include "aops.h"
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c5c5b97..9a88984 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1950,8 +1950,7 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
}
int ocfs2_write_end_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ loff_t pos, unsigned len, unsigned copied, void *fsdata)
{
int i, ret;
unsigned from, to, start = pos & (PAGE_SIZE - 1);
@@ -2064,7 +2063,7 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
int ret;
struct inode *inode = mapping->host;
- ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
+ ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
ocfs2_inode_unlock(inode, 1);
@@ -2241,7 +2240,7 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
dwc->dw_zero_count++;
}
- ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
+ ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc);
BUG_ON(ret != len);
ret = 0;
unlock:
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index b1c9f28..8614ff0 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -44,8 +44,7 @@ int walk_page_buffers( handle_t *handle,
struct buffer_head *bh));
int ocfs2_write_end_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata);
+ loff_t pos, unsigned len, unsigned copied, void *fsdata);
typedef enum {
OCFS2_WRITE_BUFFER = 0,
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 8f040f8..d9ebe11 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -26,6 +26,7 @@
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/highmem.h>
+#include <linux/bio.h>
#include <cluster/masklog.h>
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 636abcb..96a155a 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -627,7 +627,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg,
slot = o2nm_this_node();
bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE,
- WRITE_SYNC);
+ REQ_SYNC);
if (IS_ERR(bio)) {
status = PTR_ERR(bio);
mlog_errno(status);
@@ -741,7 +741,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
hb_block = (struct o2hb_disk_heartbeat_block *)slot->ds_raw_block;
memset(hb_block, 0, reg->hr_block_bytes);
/* TODO: time stuff */
- cputime = CURRENT_TIME.tv_sec;
+ cputime = ktime_get_real_seconds();
if (!cputime)
cputime = 1;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index e7054e2..3ecb9f3 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3699,7 +3699,7 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
struct ocfs2_dx_root_block *dx_root)
{
- int credits = ocfs2_clusters_to_blocks(osb->sb, 2);
+ int credits = ocfs2_clusters_to_blocks(osb->sb, 3);
credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list);
credits += ocfs2_quota_trans_credits(osb->sb);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3f828a1..a464c80 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1609,8 +1609,6 @@ way_up_top:
__dlm_insert_mle(dlm, mle);
response = DLM_MASTER_RESP_NO;
} else {
- // mlog(0, "mle was found\n");
- set_maybe = 1;
spin_lock(&tmpmle->spinlock);
if (tmpmle->master == dlm->node_num) {
mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n");
@@ -1625,8 +1623,7 @@ way_up_top:
response = DLM_MASTER_RESP_NO;
} else
response = DLM_MASTER_RESP_MAYBE;
- if (set_maybe)
- set_bit(request->node_idx, tmpmle->maybe_map);
+ set_bit(request->node_idx, tmpmle->maybe_map);
spin_unlock(&tmpmle->spinlock);
}
spin_unlock(&dlm->master_lock);
@@ -1644,12 +1641,6 @@ send_response:
* dlm_assert_master_worker() isn't called, we drop it here.
*/
if (dispatch_assert) {
- if (response != DLM_MASTER_RESP_YES)
- mlog(ML_ERROR, "invalid response %d\n", response);
- if (!res) {
- mlog(ML_ERROR, "bad lockres while trying to assert!\n");
- BUG();
- }
mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
dlm->node_num, res->lockname.len, res->lockname.name);
spin_lock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index dd5cb8b..74407c6 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2966,8 +2966,6 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
spin_unlock(&dlm->spinlock);
dlm_kick_recovery_thread(dlm);
break;
- default:
- BUG();
}
mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n",
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index c56a767..382401d 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -703,7 +703,7 @@ static int ocfs2_remove_inode(struct inode *inode,
goto bail_commit;
}
- di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec);
+ di->i_dtime = cpu_to_le64(ktime_get_real_seconds());
di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a244f14..d5e5fa7 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1947,7 +1947,7 @@ static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
*/
seqno++;
os->os_count++;
- os->os_scantime = CURRENT_TIME;
+ os->os_scantime = ktime_get_seconds();
unlock:
ocfs2_orphan_scan_unlock(osb, seqno);
out:
@@ -2004,7 +2004,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
struct ocfs2_orphan_scan *os;
os = &osb->osb_orphan_scan;
- os->os_scantime = CURRENT_TIME;
+ os->os_scantime = ktime_get_seconds();
if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
else {
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 71545ad..4290887 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -120,8 +120,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
ret = VM_FAULT_NOPAGE;
goto out;
}
- ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
- fsdata);
+ ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
BUG_ON(ret != len);
ret = VM_FAULT_LOCKED;
out:
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8d887c7..3b0a10d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -516,6 +516,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
struct ocfs2_extent_list *fel;
u16 feat;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct timespec64 ts;
*new_fe_bh = NULL;
@@ -564,10 +565,11 @@ static int __ocfs2_mknod_locked(struct inode *dir,
fe->i_last_eb_blk = 0;
strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL);
+ ktime_get_real_ts64(&ts);
fe->i_atime = fe->i_ctime = fe->i_mtime =
- cpu_to_le64(CURRENT_TIME.tv_sec);
+ cpu_to_le64(ts.tv_sec);
fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
- cpu_to_le32(CURRENT_TIME.tv_nsec);
+ cpu_to_le32(ts.tv_nsec);
fe->i_dtime = 0;
/*
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index e63af7d..7e5958b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,7 +224,7 @@ struct ocfs2_orphan_scan {
struct ocfs2_super *os_osb;
struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */
struct delayed_work os_orphan_scan_work;
- struct timespec os_scantime; /* time this node ran the scan */
+ time64_t os_scantime; /* time this node ran the scan */
u32 os_count; /* tracks node specific scans */
u32 os_seqno; /* tracks cluster wide scans */
atomic_t os_state; /* ACTIVE or INACTIVE */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 1923851..738b4ea 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -478,7 +478,6 @@ again:
if (ret) {
mlog_errno(ret);
ocfs2_unlock_refcount_tree(osb, tree, rw);
- ocfs2_refcount_tree_put(tree);
goto out;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f56fe39..c894d94 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -337,7 +337,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
out += snprintf(buf + out, len - out, "Disabled\n");
else
out += snprintf(buf + out, len - out, "%lu seconds ago\n",
- (get_seconds() - os->os_scantime.tv_sec));
+ (unsigned long)(ktime_get_seconds() - os->os_scantime));
out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
"Slots", "Num", "RecoGen");
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index ef3b4eb..551bc74 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -8,6 +8,7 @@
* Linux VFS inode operations.
*/
+#include <linux/bvec.h>
#include "protocol.h"
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index eb09aa0..38887cc 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -114,6 +114,7 @@ static const struct seq_operations help_debug_ops = {
};
const struct file_operations debug_help_fops = {
+ .owner = THIS_MODULE,
.open = orangefs_debug_help_open,
.read = seq_read,
.release = seq_release,
@@ -121,6 +122,7 @@ const struct file_operations debug_help_fops = {
};
static const struct file_operations kernel_debug_fops = {
+ .owner = THIS_MODULE,
.open = orangefs_debug_open,
.read = orangefs_debug_read,
.write = orangefs_debug_write,
@@ -141,6 +143,9 @@ static struct client_debug_mask client_debug_mask;
*/
static DEFINE_MUTEX(orangefs_debug_lock);
+/* Used to protect data in ORANGEFS_KMOD_DEBUG_HELP_FILE */
+static DEFINE_MUTEX(orangefs_help_file_lock);
+
/*
* initialize kmod debug operations, create orangefs debugfs dir and
* ORANGEFS_KMOD_DEBUG_HELP_FILE.
@@ -289,6 +294,8 @@ static void *help_start(struct seq_file *m, loff_t *pos)
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_start: start\n");
+ mutex_lock(&orangefs_help_file_lock);
+
if (*pos == 0)
payload = m->private;
@@ -305,6 +312,7 @@ static void *help_next(struct seq_file *m, void *v, loff_t *pos)
static void help_stop(struct seq_file *m, void *p)
{
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_stop: start\n");
+ mutex_unlock(&orangefs_help_file_lock);
}
static int help_show(struct seq_file *m, void *v)
@@ -610,32 +618,54 @@ out:
* /sys/kernel/debug/orangefs/debug-help can be catted to
* see all the available kernel and client debug keywords.
*
- * When the kernel boots, we have no idea what keywords the
+ * When orangefs.ko initializes, we have no idea what keywords the
* client supports, nor their associated masks.
*
- * We pass through this function once at boot and stamp a
+ * We pass through this function once at module-load and stamp a
* boilerplate "we don't know" message for the client in the
* debug-help file. We pass through here again when the client
* starts and then we can fill out the debug-help file fully.
*
* The client might be restarted any number of times between
- * reboots, we only build the debug-help file the first time.
+ * module reloads, we only build the debug-help file the first time.
*/
int orangefs_prepare_debugfs_help_string(int at_boot)
{
- int rc = -EINVAL;
- int i;
- int byte_count = 0;
char *client_title = "Client Debug Keywords:\n";
char *kernel_title = "Kernel Debug Keywords:\n";
+ size_t string_size = DEBUG_HELP_STRING_SIZE;
+ size_t result_size;
+ size_t i;
+ char *new;
+ int rc = -EINVAL;
gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
- if (at_boot) {
- byte_count += strlen(HELP_STRING_UNINITIALIZED);
+ if (at_boot)
client_title = HELP_STRING_UNINITIALIZED;
- } else {
- /*
+
+ /* build a new debug_help_string. */
+ new = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
+ if (!new) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * strlcat(dst, src, size) will append at most
+ * "size - strlen(dst) - 1" bytes of src onto dst,
+ * null terminating the result, and return the total
+ * length of the string it tried to create.
+ *
+ * We'll just plow through here building our new debug
+ * help string and let strlcat take care of assuring that
+ * dst doesn't overflow.
+ */
+ strlcat(new, client_title, string_size);
+
+ if (!at_boot) {
+
+ /*
* fill the client keyword/mask array and remember
* how many elements there were.
*/
@@ -644,64 +674,40 @@ int orangefs_prepare_debugfs_help_string(int at_boot)
if (cdm_element_count <= 0)
goto out;
- /* Count the bytes destined for debug_help_string. */
- byte_count += strlen(client_title);
-
for (i = 0; i < cdm_element_count; i++) {
- byte_count += strlen(cdm_array[i].keyword + 2);
- if (byte_count >= DEBUG_HELP_STRING_SIZE) {
- pr_info("%s: overflow 1!\n", __func__);
- goto out;
- }
+ strlcat(new, "\t", string_size);
+ strlcat(new, cdm_array[i].keyword, string_size);
+ strlcat(new, "\n", string_size);
}
-
- gossip_debug(GOSSIP_UTILS_DEBUG,
- "%s: cdm_element_count:%d:\n",
- __func__,
- cdm_element_count);
}
- byte_count += strlen(kernel_title);
+ strlcat(new, "\n", string_size);
+ strlcat(new, kernel_title, string_size);
+
for (i = 0; i < num_kmod_keyword_mask_map; i++) {
- byte_count +=
- strlen(s_kmod_keyword_mask_map[i].keyword + 2);
- if (byte_count >= DEBUG_HELP_STRING_SIZE) {
- pr_info("%s: overflow 2!\n", __func__);
- goto out;
- }
+ strlcat(new, "\t", string_size);
+ strlcat(new, s_kmod_keyword_mask_map[i].keyword, string_size);
+ result_size = strlcat(new, "\n", string_size);
}
- /* build debug_help_string. */
- debug_help_string = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
- if (!debug_help_string) {
- rc = -ENOMEM;
+ /* See if we tried to put too many bytes into "new"... */
+ if (result_size >= string_size) {
+ kfree(new);
goto out;
}
- strcat(debug_help_string, client_title);
-
- if (!at_boot) {
- for (i = 0; i < cdm_element_count; i++) {
- strcat(debug_help_string, "\t");
- strcat(debug_help_string, cdm_array[i].keyword);
- strcat(debug_help_string, "\n");
- }
- }
-
- strcat(debug_help_string, "\n");
- strcat(debug_help_string, kernel_title);
-
- for (i = 0; i < num_kmod_keyword_mask_map; i++) {
- strcat(debug_help_string, "\t");
- strcat(debug_help_string, s_kmod_keyword_mask_map[i].keyword);
- strcat(debug_help_string, "\n");
+ if (at_boot) {
+ debug_help_string = new;
+ } else {
+ mutex_lock(&orangefs_help_file_lock);
+ memset(debug_help_string, 0, DEBUG_HELP_STRING_SIZE);
+ strlcat(debug_help_string, new, string_size);
+ mutex_unlock(&orangefs_help_file_lock);
}
rc = 0;
-out:
-
- return rc;
+out: return rc;
}
@@ -959,8 +965,12 @@ int orangefs_debugfs_new_client_string(void __user *arg)
ret = copy_from_user(&client_debug_array_string,
(void __user *)arg,
ORANGEFS_MAX_DEBUG_STRING_LEN);
- if (ret != 0)
+
+ if (ret != 0) {
+ pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
+ __func__);
return -EIO;
+ }
/*
* The real client-core makes an effort to ensure
@@ -975,45 +985,18 @@ int orangefs_debugfs_new_client_string(void __user *arg)
client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
'\0';
- if (ret != 0) {
- pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
- __func__);
- return -EIO;
- }
-
pr_info("%s: client debug array string has been received.\n",
__func__);
if (!help_string_initialized) {
- /* Free the "we don't know yet" default string... */
- kfree(debug_help_string);
-
- /* build a proper debug help string */
+ /* Build a proper debug help string. */
if (orangefs_prepare_debugfs_help_string(0)) {
gossip_err("%s: no debug help string \n",
__func__);
return -EIO;
}
- /* Replace the boilerplate boot-time debug-help file. */
- debugfs_remove(help_file_dentry);
-
- help_file_dentry =
- debugfs_create_file(
- ORANGEFS_KMOD_DEBUG_HELP_FILE,
- 0444,
- debug_dir,
- debug_help_string,
- &debug_help_fops);
-
- if (!help_file_dentry) {
- gossip_err("%s: debugfs_create_file failed for"
- " :%s:!\n",
- __func__,
- ORANGEFS_KMOD_DEBUG_HELP_FILE);
- return -EIO;
- }
}
debug_mask_to_string(&client_debug_mask, 1);
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
index 2e5b030..4113eb0 100644
--- a/fs/orangefs/orangefs-mod.c
+++ b/fs/orangefs/orangefs-mod.c
@@ -124,7 +124,7 @@ static int __init orangefs_init(void)
* unknown at boot time.
*
* orangefs_prepare_debugfs_help_string will be used again
- * later to rebuild the debug-help file after the client starts
+ * later to rebuild the debug-help-string after the client starts
* and passes along the needed info. The argument signifies
* which time orangefs_prepare_debugfs_help_string is being
* called.
@@ -152,7 +152,9 @@ static int __init orangefs_init(void)
ret = register_filesystem(&orangefs_fs_type);
if (ret == 0) {
- pr_info("orangefs: module version %s loaded\n", ORANGEFS_VERSION);
+ pr_info("%s: module version %s loaded\n",
+ __func__,
+ ORANGEFS_VERSION);
ret = 0;
goto out;
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index edd46a0..0e10085 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -328,11 +328,11 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
if (!real)
goto bug;
+ /* Handle recursion */
+ real = d_real(real, inode, open_flags);
+
if (!inode || inode == d_inode(real))
return real;
-
- /* Handle recursion */
- return d_real(real, inode, open_flags);
bug:
WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 81818ad..51a4213 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -245,7 +245,7 @@ void render_sigset_t(struct seq_file *m, const char *header,
if (sigismember(set, i+2)) x |= 2;
if (sigismember(set, i+3)) x |= 4;
if (sigismember(set, i+4)) x |= 8;
- seq_printf(m, "%x", x);
+ seq_putc(m, hex_asc[x]);
} while (i >= 4);
seq_putc(m, '\n');
@@ -342,10 +342,11 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
{
+ seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p));
#ifdef CONFIG_SECCOMP
- seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode);
- seq_putc(m, '\n');
+ seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ seq_putc(m, '\n');
}
static inline void task_context_switch_counts(struct seq_file *m,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ca651ac..9b99df4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,9 +104,12 @@
* in /proc for a task before it execs a suid executable.
*/
+static u8 nlink_tid;
+static u8 nlink_tgid;
+
struct pid_entry {
const char *name;
- int len;
+ unsigned int len;
umode_t mode;
const struct inode_operations *iop;
const struct file_operations *fop;
@@ -139,13 +142,13 @@ struct pid_entry {
* Count the number of hardlinks for the pid_entry table, excluding the .
* and .. links.
*/
-static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
+static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
unsigned int n)
{
unsigned int i;
unsigned int count;
- count = 0;
+ count = 2;
for (i = 0; i < n; ++i) {
if (S_ISDIR(entries[i].mode))
++count;
@@ -1967,7 +1970,7 @@ out:
struct map_files_info {
fmode_t mode;
- unsigned long len;
+ unsigned int len;
unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
};
@@ -2412,14 +2415,14 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
* Yes, it does not scale. And it should not. Don't add
* new entries into /proc/<tgid>/ without very good reasons.
*/
- last = &ents[nents - 1];
- for (p = ents; p <= last; p++) {
+ last = &ents[nents];
+ for (p = ents; p < last; p++) {
if (p->len != dentry->d_name.len)
continue;
if (!memcmp(dentry->d_name.name, p->name, p->len))
break;
}
- if (p > last)
+ if (p >= last)
goto out;
error = proc_pident_instantiate(dir, dentry, task, p);
@@ -2444,7 +2447,7 @@ static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
if (ctx->pos >= nents + 2)
goto out;
- for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) {
+ for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
if (!proc_fill_cache(file, ctx, p->name, p->len,
proc_pident_instantiate, task, p))
break;
@@ -3068,8 +3071,7 @@ static int proc_pid_instantiate(struct inode *dir,
inode->i_fop = &proc_tgid_base_operations;
inode->i_flags|=S_IMMUTABLE;
- set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
- ARRAY_SIZE(tgid_base_stuff)));
+ set_nlink(inode, nlink_tgid);
d_set_d_op(dentry, &pid_dentry_operations);
@@ -3361,8 +3363,7 @@ static int proc_task_instantiate(struct inode *dir,
inode->i_fop = &proc_tid_base_operations;
inode->i_flags|=S_IMMUTABLE;
- set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
- ARRAY_SIZE(tid_base_stuff)));
+ set_nlink(inode, nlink_tid);
d_set_d_op(dentry, &pid_dentry_operations);
@@ -3552,3 +3553,9 @@ static const struct file_operations proc_task_operations = {
.iterate_shared = proc_task_readdir,
.llseek = generic_file_llseek,
};
+
+void __init set_proc_pid_nlink(void)
+{
+ nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
+ nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
+}
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5f2dc20..7eb3cef 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -479,6 +479,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
}
return ent;
}
+EXPORT_SYMBOL(proc_create_mount_point);
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
struct proc_dir_entry *parent,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index e69ebe6..783bc19 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -138,6 +138,16 @@ static void unuse_pde(struct proc_dir_entry *pde)
/* pde is locked */
static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
{
+ /*
+ * close() (proc_reg_release()) can't delete an entry and proceed:
+ * ->release hook needs to be available at the right moment.
+ *
+ * rmmod (remove_proc_entry() et al) can't delete an entry and proceed:
+ * "struct file" needs to be available at the right moment.
+ *
+ * Therefore, first process to enter this function does ->release() and
+ * signals its completion to the other process which does nothing.
+ */
if (pdeo->closing) {
/* somebody else is doing that, just wait */
DECLARE_COMPLETION_ONSTACK(c);
@@ -147,12 +157,13 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
spin_lock(&pde->pde_unload_lock);
} else {
struct file *file;
- pdeo->closing = 1;
+ pdeo->closing = true;
spin_unlock(&pde->pde_unload_lock);
file = pdeo->file;
pde->proc_fops->release(file_inode(file), file);
spin_lock(&pde->pde_unload_lock);
- list_del_init(&pdeo->lh);
+ /* After ->release. */
+ list_del(&pdeo->lh);
if (pdeo->c)
complete(pdeo->c);
kfree(pdeo);
@@ -167,6 +178,8 @@ void proc_entry_rundown(struct proc_dir_entry *de)
if (atomic_add_return(BIAS, &de->in_use) != BIAS)
wait_for_completion(&c);
+ /* ->pde_openers list can't grow from now on. */
+
spin_lock(&de->pde_unload_lock);
while (!list_empty(&de->pde_openers)) {
struct pde_opener *pdeo;
@@ -312,16 +325,17 @@ static int proc_reg_open(struct inode *inode, struct file *file)
struct pde_opener *pdeo;
/*
- * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
- * sequence. ->release won't be called because ->proc_fops will be
- * cleared. Depending on complexity of ->release, consequences vary.
+ * Ensure that
+ * 1) PDE's ->release hook will be called no matter what
+ * either normally by close()/->release, or forcefully by
+ * rmmod/remove_proc_entry.
+ *
+ * 2) rmmod isn't blocked by opening file in /proc and sitting on
+ * the descriptor (including "rmmod foo </proc/foo" scenario).
*
- * We can't wait for mercy when close will be done for real, it's
- * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
- * by hand in remove_proc_entry(). For this, save opener's credentials
- * for later.
+ * Save every "struct file" with custom ->release hook.
*/
- pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
+ pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
if (!pdeo)
return -ENOMEM;
@@ -338,7 +352,8 @@ static int proc_reg_open(struct inode *inode, struct file *file)
if (rv == 0 && release) {
/* To know what to release. */
pdeo->file = file;
- /* Strictly for "too late" ->release in proc_reg_release(). */
+ pdeo->closing = false;
+ pdeo->c = NULL;
spin_lock(&pde->pde_unload_lock);
list_add(&pdeo->lh, &pde->pde_openers);
spin_unlock(&pde->pde_unload_lock);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5378441..109876a2 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -195,7 +195,6 @@ static inline bool is_empty_pde(const struct proc_dir_entry *pde)
{
return S_ISDIR(pde->mode) && !pde->proc_iops;
}
-struct proc_dir_entry *proc_create_mount_point(const char *name);
/*
* inode.c
@@ -203,7 +202,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name);
struct pde_opener {
struct file *file;
struct list_head lh;
- int closing;
+ bool closing;
struct completion *c;
};
extern const struct inode_operations proc_link_inode_operations;
@@ -211,6 +210,7 @@ extern const struct inode_operations proc_link_inode_operations;
extern const struct inode_operations proc_pid_link_inode_operations;
extern void proc_init_inodecache(void);
+void set_proc_pid_nlink(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
extern int proc_fill_super(struct super_block *, void *data, int flags);
extern void proc_entry_rundown(struct proc_dir_entry *);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 8d3e484..4bd0373 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -122,6 +122,7 @@ void __init proc_root_init(void)
int err;
proc_init_inodecache();
+ set_proc_pid_nlink();
err = register_filesystem(&proc_fs_type);
if (err)
return;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 35b92d8..958f325 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1588,6 +1588,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(orig_pte, ptl);
+ cond_resched();
return 0;
}
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index be40813..b42e5bd 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -86,4 +86,4 @@ config PSTORE_RAM
Note that for historical reasons, the module will be named
"ramoops.ko".
- For more information, see Documentation/ramoops.txt.
+ For more information, see Documentation/admin-guide/ramoops.rst.
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index d488770..899d0ba 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -27,6 +27,9 @@
#include <asm/barrier.h>
#include "internal.h"
+/* This doesn't need to be atomic: speed is chosen over correctness here. */
+static u64 pstore_ftrace_stamp;
+
static void notrace pstore_ftrace_call(unsigned long ip,
unsigned long parent_ip,
struct ftrace_ops *op,
@@ -42,6 +45,7 @@ static void notrace pstore_ftrace_call(unsigned long ip,
rec.ip = ip;
rec.parent_ip = parent_ip;
+ pstore_ftrace_write_timestamp(&rec, pstore_ftrace_stamp++);
pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id());
psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec,
0, sizeof(rec), psinfo);
@@ -71,10 +75,13 @@ static ssize_t pstore_ftrace_knob_write(struct file *f, const char __user *buf,
if (!on ^ pstore_ftrace_enabled)
goto out;
- if (on)
+ if (on) {
+ ftrace_ops_set_global_filter(&pstore_ftrace_ops);
ret = register_ftrace_function(&pstore_ftrace_ops);
- else
+ } else {
ret = unregister_ftrace_function(&pstore_ftrace_ops);
+ }
+
if (ret) {
pr_err("%s: unable to %sregister ftrace ops: %zd\n",
__func__, on ? "" : "un", ret);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 1781dc5..57c0646 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -107,9 +107,11 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
struct pstore_ftrace_seq_data *data = v;
struct pstore_ftrace_record *rec = (void *)(ps->data + data->off);
- seq_printf(s, "%d %08lx %08lx %pf <- %pF\n",
- pstore_ftrace_decode_cpu(rec), rec->ip, rec->parent_ip,
- (void *)rec->ip, (void *)rec->parent_ip);
+ seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %pf <- %pF\n",
+ pstore_ftrace_decode_cpu(rec),
+ pstore_ftrace_read_timestamp(rec),
+ rec->ip, rec->parent_ip, (void *)rec->ip,
+ (void *)rec->parent_ip);
return 0;
}
@@ -197,11 +199,14 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
if (err)
return err;
- if (p->psi->erase)
+ if (p->psi->erase) {
+ mutex_lock(&p->psi->read_mutex);
p->psi->erase(p->type, p->id, p->count,
d_inode(dentry)->i_ctime, p->psi);
- else
+ mutex_unlock(&p->psi->read_mutex);
+ } else {
return -EPERM;
+ }
return simple_unlink(dir, dentry);
}
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index e38a22b..da416e6 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -5,40 +5,6 @@
#include <linux/time.h>
#include <linux/pstore.h>
-#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB)
-#define PSTORE_CPU_IN_IP 0x1
-#elif NR_CPUS <= 4 && defined(CONFIG_ARM)
-#define PSTORE_CPU_IN_IP 0x3
-#endif
-
-struct pstore_ftrace_record {
- unsigned long ip;
- unsigned long parent_ip;
-#ifndef PSTORE_CPU_IN_IP
- unsigned int cpu;
-#endif
-};
-
-static inline void
-pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu)
-{
-#ifndef PSTORE_CPU_IN_IP
- rec->cpu = cpu;
-#else
- rec->ip |= cpu;
-#endif
-}
-
-static inline unsigned int
-pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
-{
-#ifndef PSTORE_CPU_IN_IP
- return rec->cpu;
-#else
- return rec->ip & PSTORE_CPU_IN_IP;
-#endif
-}
-
#ifdef CONFIG_PSTORE_FTRACE
extern void pstore_register_ftrace(void);
extern void pstore_unregister_ftrace(void);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 14984d9..729677e 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -493,6 +493,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
if (!is_locked) {
pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
, in_nmi() ? "NMI" : why);
+ return;
}
} else {
spin_lock_irqsave(&psinfo->buf_lock, flags);
@@ -584,8 +585,8 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
} else {
spin_lock_irqsave(&psinfo->buf_lock, flags);
}
- memcpy(psinfo->buf, s, c);
- psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo);
+ psinfo->write_buf(PSTORE_TYPE_CONSOLE, 0, &id, 0,
+ s, 0, c, psinfo);
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
s += c;
c = e - s;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 6ad831b..27c059e 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -85,10 +85,10 @@ MODULE_PARM_DESC(ramoops_ecc,
"bytes ECC)");
struct ramoops_context {
- struct persistent_ram_zone **przs;
- struct persistent_ram_zone *cprz;
- struct persistent_ram_zone *fprz;
- struct persistent_ram_zone *mprz;
+ struct persistent_ram_zone **dprzs; /* Oops dump zones */
+ struct persistent_ram_zone *cprz; /* Console zone */
+ struct persistent_ram_zone **fprzs; /* Ftrace zones */
+ struct persistent_ram_zone *mprz; /* PMSG zone */
phys_addr_t phys_addr;
unsigned long size;
unsigned int memtype;
@@ -97,12 +97,14 @@ struct ramoops_context {
size_t ftrace_size;
size_t pmsg_size;
int dump_oops;
+ u32 flags;
struct persistent_ram_ecc_info ecc_info;
unsigned int max_dump_cnt;
unsigned int dump_write_cnt;
/* _read_cnt need clear on ramoops_pstore_open */
unsigned int dump_read_cnt;
unsigned int console_read_cnt;
+ unsigned int max_ftrace_cnt;
unsigned int ftrace_read_cnt;
unsigned int pmsg_read_cnt;
struct pstore_info pstore;
@@ -180,16 +182,69 @@ static bool prz_ok(struct persistent_ram_zone *prz)
persistent_ram_ecc_string(prz, NULL, 0));
}
+static ssize_t ftrace_log_combine(struct persistent_ram_zone *dest,
+ struct persistent_ram_zone *src)
+{
+ size_t dest_size, src_size, total, dest_off, src_off;
+ size_t dest_idx = 0, src_idx = 0, merged_idx = 0;
+ void *merged_buf;
+ struct pstore_ftrace_record *drec, *srec, *mrec;
+ size_t record_size = sizeof(struct pstore_ftrace_record);
+
+ dest_off = dest->old_log_size % record_size;
+ dest_size = dest->old_log_size - dest_off;
+
+ src_off = src->old_log_size % record_size;
+ src_size = src->old_log_size - src_off;
+
+ total = dest_size + src_size;
+ merged_buf = kmalloc(total, GFP_KERNEL);
+ if (!merged_buf)
+ return -ENOMEM;
+
+ drec = (struct pstore_ftrace_record *)(dest->old_log + dest_off);
+ srec = (struct pstore_ftrace_record *)(src->old_log + src_off);
+ mrec = (struct pstore_ftrace_record *)(merged_buf);
+
+ while (dest_size > 0 && src_size > 0) {
+ if (pstore_ftrace_read_timestamp(&drec[dest_idx]) <
+ pstore_ftrace_read_timestamp(&srec[src_idx])) {
+ mrec[merged_idx++] = drec[dest_idx++];
+ dest_size -= record_size;
+ } else {
+ mrec[merged_idx++] = srec[src_idx++];
+ src_size -= record_size;
+ }
+ }
+
+ while (dest_size > 0) {
+ mrec[merged_idx++] = drec[dest_idx++];
+ dest_size -= record_size;
+ }
+
+ while (src_size > 0) {
+ mrec[merged_idx++] = srec[src_idx++];
+ src_size -= record_size;
+ }
+
+ kfree(dest->old_log);
+ dest->old_log = merged_buf;
+ dest->old_log_size = total;
+
+ return 0;
+}
+
static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
int *count, struct timespec *time,
char **buf, bool *compressed,
ssize_t *ecc_notice_size,
struct pstore_info *psi)
{
- ssize_t size;
+ ssize_t size = 0;
struct ramoops_context *cxt = psi->data;
struct persistent_ram_zone *prz = NULL;
int header_length = 0;
+ bool free_prz = false;
/* Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but
* PSTORE_TYPE_CONSOLE and PSTORE_TYPE_FTRACE don't currently have
@@ -201,7 +256,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
/* Find the next valid persistent_ram_zone for DMESG */
while (cxt->dump_read_cnt < cxt->max_dump_cnt && !prz) {
- prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt,
+ prz = ramoops_get_next_prz(cxt->dprzs, &cxt->dump_read_cnt,
cxt->max_dump_cnt, id, type,
PSTORE_TYPE_DMESG, 1);
if (!prz_ok(prz))
@@ -219,14 +274,56 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
if (!prz_ok(prz))
prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt,
1, id, type, PSTORE_TYPE_CONSOLE, 0);
- if (!prz_ok(prz))
- prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt,
- 1, id, type, PSTORE_TYPE_FTRACE, 0);
+
if (!prz_ok(prz))
prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt,
1, id, type, PSTORE_TYPE_PMSG, 0);
- if (!prz_ok(prz))
- return 0;
+
+ /* ftrace is last since it may want to dynamically allocate memory. */
+ if (!prz_ok(prz)) {
+ if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) {
+ prz = ramoops_get_next_prz(cxt->fprzs,
+ &cxt->ftrace_read_cnt, 1, id, type,
+ PSTORE_TYPE_FTRACE, 0);
+ } else {
+ /*
+ * Build a new dummy record which combines all the
+ * per-cpu records including metadata and ecc info.
+ */
+ struct persistent_ram_zone *tmp_prz, *prz_next;
+
+ tmp_prz = kzalloc(sizeof(struct persistent_ram_zone),
+ GFP_KERNEL);
+ if (!tmp_prz)
+ return -ENOMEM;
+ free_prz = true;
+
+ while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) {
+ prz_next = ramoops_get_next_prz(cxt->fprzs,
+ &cxt->ftrace_read_cnt,
+ cxt->max_ftrace_cnt, id,
+ type, PSTORE_TYPE_FTRACE, 0);
+
+ if (!prz_ok(prz_next))
+ continue;
+
+ tmp_prz->ecc_info = prz_next->ecc_info;
+ tmp_prz->corrected_bytes +=
+ prz_next->corrected_bytes;
+ tmp_prz->bad_blocks += prz_next->bad_blocks;
+ size = ftrace_log_combine(tmp_prz, prz_next);
+ if (size)
+ goto out;
+ }
+ *id = 0;
+ prz = tmp_prz;
+ }
+ }
+
+ if (!prz_ok(prz)) {
+ size = 0;
+ goto out;
+ }
size = persistent_ram_old_size(prz) - header_length;
@@ -234,12 +331,21 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
*ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
*buf = kmalloc(size + *ecc_notice_size + 1, GFP_KERNEL);
- if (*buf == NULL)
- return -ENOMEM;
+ if (*buf == NULL) {
+ size = -ENOMEM;
+ goto out;
+ }
memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size);
+
persistent_ram_ecc_string(prz, *buf + size, *ecc_notice_size + 1);
+out:
+ if (free_prz) {
+ kfree(prz->old_log);
+ kfree(prz);
+ }
+
return size;
}
@@ -283,15 +389,23 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
persistent_ram_write(cxt->cprz, buf, size);
return 0;
} else if (type == PSTORE_TYPE_FTRACE) {
- if (!cxt->fprz)
+ int zonenum;
+
+ if (!cxt->fprzs)
return -ENOMEM;
- persistent_ram_write(cxt->fprz, buf, size);
+ /*
+ * Choose zone by if we're using per-cpu buffers.
+ */
+ if (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
+ zonenum = smp_processor_id();
+ else
+ zonenum = 0;
+
+ persistent_ram_write(cxt->fprzs[zonenum], buf, size);
return 0;
} else if (type == PSTORE_TYPE_PMSG) {
- if (!cxt->mprz)
- return -ENOMEM;
- persistent_ram_write(cxt->mprz, buf, size);
- return 0;
+ pr_warn_ratelimited("PMSG shouldn't call %s\n", __func__);
+ return -EINVAL;
}
if (type != PSTORE_TYPE_DMESG)
@@ -316,10 +430,10 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
if (part != 1)
return -ENOSPC;
- if (!cxt->przs)
+ if (!cxt->dprzs)
return -ENOSPC;
- prz = cxt->przs[cxt->dump_write_cnt];
+ prz = cxt->dprzs[cxt->dump_write_cnt];
hlen = ramoops_write_kmsg_hdr(prz, compressed);
if (size + hlen > prz->buffer_size)
@@ -359,13 +473,15 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
case PSTORE_TYPE_DMESG:
if (id >= cxt->max_dump_cnt)
return -EINVAL;
- prz = cxt->przs[id];
+ prz = cxt->dprzs[id];
break;
case PSTORE_TYPE_CONSOLE:
prz = cxt->cprz;
break;
case PSTORE_TYPE_FTRACE:
- prz = cxt->fprz;
+ if (id >= cxt->max_ftrace_cnt)
+ return -EINVAL;
+ prz = cxt->fprzs[id];
break;
case PSTORE_TYPE_PMSG:
prz = cxt->mprz;
@@ -396,68 +512,113 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
{
int i;
- if (!cxt->przs)
- return;
+ /* Free dump PRZs */
+ if (cxt->dprzs) {
+ for (i = 0; i < cxt->max_dump_cnt; i++)
+ persistent_ram_free(cxt->dprzs[i]);
- for (i = 0; i < cxt->max_dump_cnt; i++)
- persistent_ram_free(cxt->przs[i]);
+ kfree(cxt->dprzs);
+ cxt->max_dump_cnt = 0;
+ }
- kfree(cxt->przs);
- cxt->max_dump_cnt = 0;
+ /* Free ftrace PRZs */
+ if (cxt->fprzs) {
+ for (i = 0; i < cxt->max_ftrace_cnt; i++)
+ persistent_ram_free(cxt->fprzs[i]);
+ kfree(cxt->fprzs);
+ cxt->max_ftrace_cnt = 0;
+ }
}
-static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
- phys_addr_t *paddr, size_t dump_mem_sz)
+static int ramoops_init_przs(const char *name,
+ struct device *dev, struct ramoops_context *cxt,
+ struct persistent_ram_zone ***przs,
+ phys_addr_t *paddr, size_t mem_sz,
+ ssize_t record_size,
+ unsigned int *cnt, u32 sig, u32 flags)
{
int err = -ENOMEM;
int i;
+ size_t zone_sz;
+ struct persistent_ram_zone **prz_ar;
- if (!cxt->record_size)
+ /* Allocate nothing for 0 mem_sz or 0 record_size. */
+ if (mem_sz == 0 || record_size == 0) {
+ *cnt = 0;
return 0;
+ }
- if (*paddr + dump_mem_sz - cxt->phys_addr > cxt->size) {
- dev_err(dev, "no room for dumps\n");
- return -ENOMEM;
+ /*
+ * If we have a negative record size, calculate it based on
+ * mem_sz / *cnt. If we have a positive record size, calculate
+ * cnt from mem_sz / record_size.
+ */
+ if (record_size < 0) {
+ if (*cnt == 0)
+ return 0;
+ record_size = mem_sz / *cnt;
+ if (record_size == 0) {
+ dev_err(dev, "%s record size == 0 (%zu / %u)\n",
+ name, mem_sz, *cnt);
+ goto fail;
+ }
+ } else {
+ *cnt = mem_sz / record_size;
+ if (*cnt == 0) {
+ dev_err(dev, "%s record count == 0 (%zu / %zu)\n",
+ name, mem_sz, record_size);
+ goto fail;
+ }
}
- cxt->max_dump_cnt = dump_mem_sz / cxt->record_size;
- if (!cxt->max_dump_cnt)
- return -ENOMEM;
+ if (*paddr + mem_sz - cxt->phys_addr > cxt->size) {
+ dev_err(dev, "no room for %s mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n",
+ name,
+ mem_sz, (unsigned long long)*paddr,
+ cxt->size, (unsigned long long)cxt->phys_addr);
+ goto fail;
+ }
- cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_dump_cnt,
- GFP_KERNEL);
- if (!cxt->przs) {
- dev_err(dev, "failed to initialize a prz array for dumps\n");
- goto fail_mem;
+ zone_sz = mem_sz / *cnt;
+ if (!zone_sz) {
+ dev_err(dev, "%s zone size == 0\n", name);
+ goto fail;
}
- for (i = 0; i < cxt->max_dump_cnt; i++) {
- cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0,
+ prz_ar = kcalloc(*cnt, sizeof(**przs), GFP_KERNEL);
+ if (!prz_ar)
+ goto fail;
+
+ for (i = 0; i < *cnt; i++) {
+ prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig,
&cxt->ecc_info,
- cxt->memtype);
- if (IS_ERR(cxt->przs[i])) {
- err = PTR_ERR(cxt->przs[i]);
- dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
- cxt->record_size, (unsigned long long)*paddr, err);
+ cxt->memtype, flags);
+ if (IS_ERR(prz_ar[i])) {
+ err = PTR_ERR(prz_ar[i]);
+ dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
+ name, record_size,
+ (unsigned long long)*paddr, err);
while (i > 0) {
i--;
- persistent_ram_free(cxt->przs[i]);
+ persistent_ram_free(prz_ar[i]);
}
- goto fail_prz;
+ kfree(prz_ar);
+ goto fail;
}
- *paddr += cxt->record_size;
+ *paddr += zone_sz;
}
+ *przs = prz_ar;
return 0;
-fail_prz:
- kfree(cxt->przs);
-fail_mem:
- cxt->max_dump_cnt = 0;
+
+fail:
+ *cnt = 0;
return err;
}
-static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
+static int ramoops_init_prz(const char *name,
+ struct device *dev, struct ramoops_context *cxt,
struct persistent_ram_zone **prz,
phys_addr_t *paddr, size_t sz, u32 sig)
{
@@ -465,18 +626,19 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
return 0;
if (*paddr + sz - cxt->phys_addr > cxt->size) {
- dev_err(dev, "no room for mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n",
- sz, (unsigned long long)*paddr,
+ dev_err(dev, "no room for %s mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n",
+ name, sz, (unsigned long long)*paddr,
cxt->size, (unsigned long long)cxt->phys_addr);
return -ENOMEM;
}
- *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype);
+ *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
+ cxt->memtype, 0);
if (IS_ERR(*prz)) {
int err = PTR_ERR(*prz);
- dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
- sz, (unsigned long long)*paddr, err);
+ dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
+ name, sz, (unsigned long long)*paddr, err);
return err;
}
@@ -543,6 +705,7 @@ static int ramoops_parse_dt(struct platform_device *pdev,
parse_size("ftrace-size", pdata->ftrace_size);
parse_size("pmsg-size", pdata->pmsg_size);
parse_size("ecc-size", pdata->ecc_info.ecc_size);
+ parse_size("flags", pdata->flags);
#undef parse_size
@@ -561,6 +724,7 @@ static int ramoops_probe(struct platform_device *pdev)
if (dev_of_node(dev) && !pdata) {
pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
if (!pdata) {
+ pr_err("cannot allocate platform data buffer\n");
err = -ENOMEM;
goto fail_out;
}
@@ -570,11 +734,20 @@ static int ramoops_probe(struct platform_device *pdev)
goto fail_out;
}
- /* Only a single ramoops area allowed at a time, so fail extra
+ /*
+ * Only a single ramoops area allowed at a time, so fail extra
* probes.
*/
- if (cxt->max_dump_cnt)
+ if (cxt->max_dump_cnt) {
+ pr_err("already initialized\n");
goto fail_out;
+ }
+
+ /* Make sure we didn't get bogus platform data pointer. */
+ if (!pdata) {
+ pr_err("NULL platform data\n");
+ goto fail_out;
+ }
if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size &&
!pdata->ftrace_size && !pdata->pmsg_size)) {
@@ -600,27 +773,37 @@ static int ramoops_probe(struct platform_device *pdev)
cxt->ftrace_size = pdata->ftrace_size;
cxt->pmsg_size = pdata->pmsg_size;
cxt->dump_oops = pdata->dump_oops;
+ cxt->flags = pdata->flags;
cxt->ecc_info = pdata->ecc_info;
paddr = cxt->phys_addr;
dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size
- cxt->pmsg_size;
- err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz);
+ err = ramoops_init_przs("dump", dev, cxt, &cxt->dprzs, &paddr,
+ dump_mem_sz, cxt->record_size,
+ &cxt->max_dump_cnt, 0, 0);
if (err)
goto fail_out;
- err = ramoops_init_prz(dev, cxt, &cxt->cprz, &paddr,
+ err = ramoops_init_prz("console", dev, cxt, &cxt->cprz, &paddr,
cxt->console_size, 0);
if (err)
goto fail_init_cprz;
- err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size,
- LINUX_VERSION_CODE);
+ cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
+ ? nr_cpu_ids
+ : 1;
+ err = ramoops_init_przs("ftrace", dev, cxt, &cxt->fprzs, &paddr,
+ cxt->ftrace_size, -1,
+ &cxt->max_ftrace_cnt, LINUX_VERSION_CODE,
+ (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
+ ? PRZ_FLAG_NO_LOCK : 0);
if (err)
goto fail_init_fprz;
- err = ramoops_init_prz(dev, cxt, &cxt->mprz, &paddr, cxt->pmsg_size, 0);
+ err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr,
+ cxt->pmsg_size, 0);
if (err)
goto fail_init_mprz;
@@ -680,7 +863,6 @@ fail_clear:
cxt->pstore.bufsize = 0;
persistent_ram_free(cxt->mprz);
fail_init_mprz:
- persistent_ram_free(cxt->fprz);
fail_init_fprz:
persistent_ram_free(cxt->cprz);
fail_init_cprz:
@@ -699,7 +881,6 @@ static int ramoops_remove(struct platform_device *pdev)
cxt->pstore.bufsize = 0;
persistent_ram_free(cxt->mprz);
- persistent_ram_free(cxt->fprz);
persistent_ram_free(cxt->cprz);
ramoops_free_przs(cxt);
@@ -741,6 +922,8 @@ static void ramoops_register_dummy(void)
dummy_data->ftrace_size = ramoops_ftrace_size;
dummy_data->pmsg_size = ramoops_pmsg_size;
dummy_data->dump_oops = dump_oops;
+ dummy_data->flags = RAMOOPS_FLAG_FTRACE_PER_CPU;
+
/*
* For backwards compatibility ramoops.ecc=1 means 16 bytes ECC
* (using 1 byte for ECC isn't much of use anyway).
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 3975dee..a857338 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -48,16 +48,15 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
return atomic_read(&prz->buffer->start);
}
-static DEFINE_RAW_SPINLOCK(buffer_lock);
-
/* increase and wrap the start pointer, returning the old value */
static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
{
int old;
int new;
- unsigned long flags;
+ unsigned long flags = 0;
- raw_spin_lock_irqsave(&buffer_lock, flags);
+ if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+ raw_spin_lock_irqsave(&prz->buffer_lock, flags);
old = atomic_read(&prz->buffer->start);
new = old + a;
@@ -65,7 +64,8 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
new -= prz->buffer_size;
atomic_set(&prz->buffer->start, new);
- raw_spin_unlock_irqrestore(&buffer_lock, flags);
+ if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+ raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
return old;
}
@@ -75,9 +75,10 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
{
size_t old;
size_t new;
- unsigned long flags;
+ unsigned long flags = 0;
- raw_spin_lock_irqsave(&buffer_lock, flags);
+ if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+ raw_spin_lock_irqsave(&prz->buffer_lock, flags);
old = atomic_read(&prz->buffer->size);
if (old == prz->buffer_size)
@@ -89,7 +90,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
atomic_set(&prz->buffer->size, new);
exit:
- raw_spin_unlock_irqrestore(&buffer_lock, flags);
+ if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+ raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
}
static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
@@ -465,7 +467,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
}
static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
- struct persistent_ram_ecc_info *ecc_info)
+ struct persistent_ram_ecc_info *ecc_info,
+ unsigned long flags)
{
int ret;
@@ -493,6 +496,8 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
prz->buffer->sig = sig;
persistent_ram_zap(prz);
+ prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock);
+ prz->flags = flags;
return 0;
}
@@ -517,7 +522,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
u32 sig, struct persistent_ram_ecc_info *ecc_info,
- unsigned int memtype)
+ unsigned int memtype, u32 flags)
{
struct persistent_ram_zone *prz;
int ret = -ENOMEM;
@@ -532,7 +537,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
if (ret)
goto err;
- ret = persistent_ram_post_init(prz, sig, ecc_info);
+ ret = persistent_ram_post_init(prz, sig, ecc_info, flags);
if (ret)
goto err;
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 8b25267..e99b1a7 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -12,14 +12,8 @@ static const struct genl_multicast_group quota_mcgrps[] = {
};
/* Netlink family structure for quota */
-static struct genl_family quota_genl_family = {
- /*
- * Needed due to multicast group ID abuse - old code assumed
- * the family ID was also a valid multicast group ID (which
- * isn't true) and userspace might thus rely on it. Assign a
- * static ID for this group to make dealing with that easier.
- */
- .id = GENL_ID_VFS_DQUOT,
+static struct genl_family quota_genl_family __ro_after_init = {
+ .module = THIS_MODULE,
.hdrsize = 0,
.name = "VFS_DQUOT",
.version = 1,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 58b2ded..cfeae9b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -19,6 +19,7 @@
#include <linux/quotaops.h>
#include <linux/swap.h>
#include <linux/uio.h>
+#include <linux/bio.h>
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index bc2dde2..aa40c24 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1111,7 +1111,8 @@ static int flush_commit_list(struct super_block *s,
mark_buffer_dirty(jl->j_commit_bh) ;
depth = reiserfs_write_unlock_nested(s);
if (reiserfs_barrier_flush(s))
- __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
+ __sync_dirty_buffer(jl->j_commit_bh,
+ REQ_PREFLUSH | REQ_FUA);
else
sync_dirty_buffer(jl->j_commit_bh);
reiserfs_write_lock_nested(s, depth);
@@ -1269,7 +1270,8 @@ static int _update_journal_header_block(struct super_block *sb,
depth = reiserfs_write_unlock_nested(sb);
if (reiserfs_barrier_flush(sb))
- __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
+ __sync_dirty_buffer(journal->j_header_bh,
+ REQ_PREFLUSH | REQ_FUA);
else
sync_dirty_buffer(journal->j_header_bh);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index a97e352..0037aea 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -11,6 +11,7 @@
#include <linux/time.h>
#include <linux/string.h>
#include <linux/pagemap.h>
+#include <linux/bio.h>
#include "reiserfs.h"
#include <linux/buffer_head.h>
#include <linux/quotaops.h>
diff --git a/fs/splice.c b/fs/splice.c
index 153d4f3..8ed7c9d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -17,6 +17,7 @@
* Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
*
*/
+#include <linux/bvec.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/pagemap.h>
@@ -299,13 +300,8 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
{
struct iov_iter to;
struct kiocb kiocb;
- loff_t isize;
int idx, ret;
- isize = i_size_read(in->f_mapping->host);
- if (unlikely(*ppos >= isize))
- return 0;
-
iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
idx = to.idx;
init_sync_kiocb(&kiocb, in);
@@ -413,7 +409,8 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
if (res <= 0)
return -ENOMEM;
- nr_pages = res / PAGE_SIZE;
+ BUG_ON(dummy);
+ nr_pages = DIV_ROUND_UP(res, PAGE_SIZE);
vec = __vec;
if (nr_pages > PIPE_DEF_BUFFERS) {
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index ce62a38..2751476 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
+#include <linux/bio.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index aaec13c..2d0e028 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,6 +30,7 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/bio.h>
#include "udf_i.h"
#include "udf_sb.h"
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 988d535..7aa48bd 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/string.h>
+#include <linux/bio.h>
struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
struct udf_fileident_bh *fibh,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index aad4640..0f3db71 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -38,6 +38,7 @@
#include <linux/crc-itu-t.h>
#include <linux/mpage.h>
#include <linux/uio.h>
+#include <linux/bio.h>
#include "udf_i.h"
#include "udf_sb.h"
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 67e085d..b035af5 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -15,6 +15,7 @@
#include <linux/buffer_head.h>
#include <linux/capability.h>
#include <linux/bitops.h>
+#include <linux/bio.h>
#include <asm/byteorder.h>
#include "ufs_fs.h"
diff --git a/fs/xattr.c b/fs/xattr.c
index 3368659..2d13b4e 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -170,7 +170,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
- int error = -EOPNOTSUPP;
+ int error = -EAGAIN;
int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN);
@@ -183,15 +183,21 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
security_inode_post_setxattr(dentry, name, value,
size, flags);
}
- } else if (issec) {
- const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
-
+ } else {
if (unlikely(is_bad_inode(inode)))
return -EIO;
- error = security_inode_setsecurity(inode, suffix, value,
- size, flags);
- if (!error)
- fsnotify_xattr(dentry);
+ }
+ if (error == -EAGAIN) {
+ error = -EOPNOTSUPP;
+
+ if (issec) {
+ const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+
+ error = security_inode_setsecurity(inode, suffix, value,
+ size, flags);
+ if (!error)
+ fsnotify_xattr(dentry);
+ }
}
return error;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 613c5cf1..5c2929f 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -199,9 +199,9 @@ xfs_defer_intake_work(
struct xfs_defer_pending *dfp;
list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
- trace_xfs_defer_intake_work(tp->t_mountp, dfp);
dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
dfp->dfp_count);
+ trace_xfs_defer_intake_work(tp->t_mountp, dfp);
list_sort(tp->t_mountp, &dfp->dfp_work,
dfp->dfp_type->diff_items);
list_for_each(li, &dfp->dfp_work)
@@ -221,21 +221,14 @@ xfs_defer_trans_abort(
struct xfs_defer_pending *dfp;
trace_xfs_defer_trans_abort(tp->t_mountp, dop);
- /*
- * If the transaction was committed, drop the intent reference
- * since we're bailing out of here. The other reference is
- * dropped when the intent hits the AIL. If the transaction
- * was not committed, the intent is freed by the intent item
- * unlock handler on abort.
- */
- if (!dop->dop_committed)
- return;
- /* Abort intent items. */
+ /* Abort intent items that don't have a done item. */
list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
- if (!dfp->dfp_done)
+ if (dfp->dfp_intent && !dfp->dfp_done) {
dfp->dfp_type->abort_intent(dfp->dfp_intent);
+ dfp->dfp_intent = NULL;
+ }
}
/* Shut down FS. */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 561cf14..38755ca 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -495,8 +495,8 @@ xfs_submit_ioend(
ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = xfs_end_bio;
- bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+
/*
* If we are failing the IO now, just mark the ioend with an
* error and finish it. This will run IO completion immediately
@@ -567,8 +567,7 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
- bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b5b9bff..33c435f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1304,7 +1304,7 @@ _xfs_buf_ioapply(
if (bp->b_flags & XBF_WRITE) {
op = REQ_OP_WRITE;
if (bp->b_flags & XBF_SYNCIO)
- op_flags = WRITE_SYNC;
+ op_flags = REQ_SYNC;
if (bp->b_flags & XBF_FUA)
op_flags |= REQ_FUA;
if (bp->b_flags & XBF_FLUSH)
OpenPOWER on IntegriCloud