summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c18
-rw-r--r--fs/buffer.c11
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ext4/extents.c2
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/inode.c8
-rw-r--r--fs/gfs2/inode.h14
-rw-r--r--fs/gfs2/ops_file.c8
-rw-r--r--fs/gfs2/ops_fstype.c5
-rw-r--r--fs/gfs2/ops_inode.c1
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/inode.c36
-rw-r--r--fs/ocfs2/file.c94
-rw-r--r--fs/pipe.c42
-rw-r--r--fs/splice.c371
15 files changed, 349 insertions, 277 deletions
diff --git a/fs/bio.c b/fs/bio.c
index e0c9e54..cd42bb8 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -348,6 +348,24 @@ err:
return NULL;
}
+/**
+ * bio_alloc - allocate a bio for I/O
+ * @gfp_mask: the GFP_ mask given to the slab allocator
+ * @nr_iovecs: number of iovecs to pre-allocate
+ *
+ * Description:
+ * bio_alloc will allocate a bio and associated bio_vec array that can hold
+ * at least @nr_iovecs entries. Allocations will be done from the
+ * fs_bio_set. Also see @bio_alloc_bioset.
+ *
+ * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
+ * a bio. This is due to the mempool guarantees. To make this work, callers
+ * must never allocate more than 1 bio at the time from this pool. Callers
+ * that need to allocate more than 1 bio must always submit the previously
+ * allocate bio for IO before attempting to allocate a new one. Failure to
+ * do so can cause livelocks under memory pressure.
+ *
+ **/
struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
{
struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
diff --git a/fs/buffer.c b/fs/buffer.c
index 13edf7a..ff8bb1f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -547,7 +547,7 @@ repeat:
return err;
}
-void do_thaw_all(unsigned long unused)
+void do_thaw_all(struct work_struct *work)
{
struct super_block *sb;
char b[BDEVNAME_SIZE];
@@ -567,6 +567,7 @@ restart:
goto restart;
}
spin_unlock(&sb_lock);
+ kfree(work);
printk(KERN_WARNING "Emergency Thaw complete\n");
}
@@ -577,7 +578,13 @@ restart:
*/
void emergency_thaw_all(void)
{
- pdflush_operation(do_thaw_all, 0);
+ struct work_struct *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work) {
+ INIT_WORK(work, do_thaw_all);
+ schedule_work(work);
+ }
}
/**
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da258e7..05763bb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -307,8 +307,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
struct bio *bio;
bio = bio_alloc(GFP_KERNEL, nr_vecs);
- if (bio == NULL)
- return -ENOMEM;
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 6132353..2a1cb09 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2416,8 +2416,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
len = ee_len;
bio = bio_alloc(GFP_NOIO, len);
- if (!bio)
- return -ENOMEM;
bio->bi_sector = ee_pblock;
bio->bi_bdev = inode->i_sb->s_bdev;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3984e47..1afd9f2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -597,7 +597,6 @@ __acquires(&gl->gl_spin)
GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
- down_read(&gfs2_umount_flush_sem);
if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
gl->gl_demote_state != gl->gl_state) {
if (find_first_holder(gl))
@@ -614,15 +613,14 @@ __acquires(&gl->gl_spin)
if (ret == 0)
goto out_unlock;
if (ret == 2)
- goto out_sem;
+ goto out;
gh = find_first_waiter(gl);
gl->gl_target = gh->gh_state;
if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
do_error(gl, 0); /* Fail queued try locks */
}
do_xmote(gl, gh, gl->gl_target);
-out_sem:
- up_read(&gfs2_umount_flush_sem);
+out:
return;
out_sched:
@@ -631,7 +629,7 @@ out_sched:
gfs2_glock_put(gl);
out_unlock:
clear_bit(GLF_LOCK, &gl->gl_flags);
- goto out_sem;
+ goto out;
}
static void glock_work_func(struct work_struct *work)
@@ -641,6 +639,7 @@ static void glock_work_func(struct work_struct *work)
if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags))
finish_xmote(gl, gl->gl_reply);
+ down_read(&gfs2_umount_flush_sem);
spin_lock(&gl->gl_spin);
if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
gl->gl_state != LM_ST_UNLOCKED &&
@@ -653,6 +652,7 @@ static void glock_work_func(struct work_struct *work)
}
run_queue(gl, 0);
spin_unlock(&gl->gl_spin);
+ up_read(&gfs2_umount_flush_sem);
if (!delay ||
queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7b277d44..5a31d42 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -137,15 +137,15 @@ void gfs2_set_iop(struct inode *inode)
if (S_ISREG(mode)) {
inode->i_op = &gfs2_file_iops;
if (gfs2_localflocks(sdp))
- inode->i_fop = gfs2_file_fops_nolock;
+ inode->i_fop = &gfs2_file_fops_nolock;
else
- inode->i_fop = gfs2_file_fops;
+ inode->i_fop = &gfs2_file_fops;
} else if (S_ISDIR(mode)) {
inode->i_op = &gfs2_dir_iops;
if (gfs2_localflocks(sdp))
- inode->i_fop = gfs2_dir_fops_nolock;
+ inode->i_fop = &gfs2_dir_fops_nolock;
else
- inode->i_fop = gfs2_dir_fops;
+ inode->i_fop = &gfs2_dir_fops;
} else if (S_ISLNK(mode)) {
inode->i_op = &gfs2_symlink_iops;
} else {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index dca4fee..c30be2b 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -101,21 +101,23 @@ void gfs2_dinode_print(const struct gfs2_inode *ip);
extern const struct inode_operations gfs2_file_iops;
extern const struct inode_operations gfs2_dir_iops;
extern const struct inode_operations gfs2_symlink_iops;
-extern const struct file_operations *gfs2_file_fops_nolock;
-extern const struct file_operations *gfs2_dir_fops_nolock;
+extern const struct file_operations gfs2_file_fops_nolock;
+extern const struct file_operations gfs2_dir_fops_nolock;
extern void gfs2_set_inode_flags(struct inode *inode);
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
-extern const struct file_operations *gfs2_file_fops;
-extern const struct file_operations *gfs2_dir_fops;
+extern const struct file_operations gfs2_file_fops;
+extern const struct file_operations gfs2_dir_fops;
+
static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
{
return sdp->sd_args.ar_localflocks;
}
#else /* Single node only */
-#define gfs2_file_fops NULL
-#define gfs2_dir_fops NULL
+#define gfs2_file_fops gfs2_file_fops_nolock
+#define gfs2_dir_fops gfs2_dir_fops_nolock
+
static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
{
return 1;
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 70b9b85..101caf3 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -705,7 +705,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
}
}
-const struct file_operations *gfs2_file_fops = &(const struct file_operations){
+const struct file_operations gfs2_file_fops = {
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
@@ -723,7 +723,7 @@ const struct file_operations *gfs2_file_fops = &(const struct file_operations){
.setlease = gfs2_setlease,
};
-const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
+const struct file_operations gfs2_dir_fops = {
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
@@ -735,7 +735,7 @@ const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
-const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){
+const struct file_operations gfs2_file_fops_nolock = {
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
@@ -751,7 +751,7 @@ const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operat
.setlease = generic_setlease,
};
-const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){
+const struct file_operations gfs2_dir_fops_nolock = {
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 51883b3..650a730 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -272,11 +272,6 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
lock_page(page);
bio = bio_alloc(GFP_NOFS, 1);
- if (unlikely(!bio)) {
- __free_page(page);
- return -ENOBUFS;
- }
-
bio->bi_sector = sector * (sb->s_blocksize >> 9);
bio->bi_bdev = sb->s_bdev;
bio_add_page(bio, page, PAGE_SIZE, 0);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index abd5429..1c70fa5 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -371,6 +371,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
ip = ghs[1].gh_gl->gl_object;
ip->i_disksize = size;
+ i_size_write(inode, size);
error = gfs2_meta_inode_buffer(ip, &dibh);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8d53f66..152e6c4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -81,7 +81,7 @@ struct gfs2_quota_change_host {
static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
-static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(qd_lru_lock);
int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
{
@@ -1364,7 +1364,7 @@ int gfs2_quotad(void *data)
refrigerator();
t = min(quotad_timeo, statfs_timeo);
- prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
spin_lock(&sdp->sd_trunc_lock);
empty = list_empty(&sdp->sd_trunc_list);
spin_unlock(&sdp->sd_trunc_lock);
diff --git a/fs/inode.c b/fs/inode.c
index d06d6d2..6ad14a1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode)
spin_lock(&inode_lock);
}
-/*
- * We rarely want to lock two inodes that do not have a parent/child
- * relationship (such as directory, child inode) simultaneously. The
- * vast majority of file systems should be able to get along fine
- * without this. Do not use these functions except as a last resort.
- */
-void inode_double_lock(struct inode *inode1, struct inode *inode2)
-{
- if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
- if (inode1)
- mutex_lock(&inode1->i_mutex);
- else if (inode2)
- mutex_lock(&inode2->i_mutex);
- return;
- }
-
- if (inode1 < inode2) {
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
- }
-}
-EXPORT_SYMBOL(inode_double_lock);
-
-void inode_double_unlock(struct inode *inode1, struct inode *inode2)
-{
- if (inode1)
- mutex_unlock(&inode1->i_mutex);
-
- if (inode2 && inode2 != inode1)
- mutex_unlock(&inode2->i_mutex);
-}
-EXPORT_SYMBOL(inode_double_unlock);
-
static __initdata unsigned long ihash_entries;
static int __init set_ihash_entries(char *str)
{
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8672b95..c2a87c8 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1912,6 +1912,22 @@ out_sems:
return written ? written : ret;
}
+static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
+ struct file *out,
+ struct splice_desc *sd)
+{
+ int ret;
+
+ ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
+ sd->total_len, 0, NULL);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ return splice_from_pipe_feed(pipe, sd, pipe_to_file);
+}
+
static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out,
loff_t *ppos,
@@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
unsigned int flags)
{
int ret;
- struct inode *inode = out->f_path.dentry->d_inode;
+ struct address_space *mapping = out->f_mapping;
+ struct inode *inode = mapping->host;
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ .u.file = out,
+ };
mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
(unsigned int)len,
out->f_path.dentry->d_name.len,
out->f_path.dentry->d_name.name);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ if (pipe->inode)
+ mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
- ret = ocfs2_rw_lock(inode, 1);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
+ splice_from_pipe_begin(&sd);
+ do {
+ ret = splice_from_pipe_next(pipe, &sd);
+ if (ret <= 0)
+ break;
- ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,
- NULL);
- if (ret < 0) {
- mlog_errno(ret);
- goto out_unlock;
- }
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ ret = ocfs2_rw_lock(inode, 1);
+ if (ret < 0)
+ mlog_errno(ret);
+ else {
+ ret = ocfs2_splice_to_file(pipe, out, &sd);
+ ocfs2_rw_unlock(inode, 1);
+ }
+ mutex_unlock(&inode->i_mutex);
+ } while (ret > 0);
+ splice_from_pipe_end(pipe, &sd);
if (pipe->inode)
- mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
- ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
- if (pipe->inode)
mutex_unlock(&pipe->inode->i_mutex);
-out_unlock:
- ocfs2_rw_unlock(inode, 1);
-out:
- mutex_unlock(&inode->i_mutex);
+ if (sd.num_spliced)
+ ret = sd.num_spliced;
+
+ if (ret > 0) {
+ unsigned long nr_pages;
+
+ *ppos += ret;
+ nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ /*
+ * If file or inode is SYNC and we actually wrote some data,
+ * sync it.
+ */
+ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+ int err;
+
+ mutex_lock(&inode->i_mutex);
+ err = ocfs2_rw_lock(inode, 1);
+ if (err < 0) {
+ mlog_errno(err);
+ } else {
+ err = generic_osync_inode(inode, mapping,
+ OSYNC_METADATA|OSYNC_DATA);
+ ocfs2_rw_unlock(inode, 1);
+ }
+ mutex_unlock(&inode->i_mutex);
+
+ if (err)
+ ret = err;
+ }
+ balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
+ }
mlog_exit(ret);
return ret;
diff --git a/fs/pipe.c b/fs/pipe.c
index 4af7aa5..13414ec 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -37,6 +37,42 @@
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
*/
+static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+{
+ if (pipe->inode)
+ mutex_lock_nested(&pipe->inode->i_mutex, subclass);
+}
+
+void pipe_lock(struct pipe_inode_info *pipe)
+{
+ /*
+ * pipe_lock() nests non-pipe inode locks (for writing to a file)
+ */
+ pipe_lock_nested(pipe, I_MUTEX_PARENT);
+}
+EXPORT_SYMBOL(pipe_lock);
+
+void pipe_unlock(struct pipe_inode_info *pipe)
+{
+ if (pipe->inode)
+ mutex_unlock(&pipe->inode->i_mutex);
+}
+EXPORT_SYMBOL(pipe_unlock);
+
+void pipe_double_lock(struct pipe_inode_info *pipe1,
+ struct pipe_inode_info *pipe2)
+{
+ BUG_ON(pipe1 == pipe2);
+
+ if (pipe1 < pipe2) {
+ pipe_lock_nested(pipe1, I_MUTEX_PARENT);
+ pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+ } else {
+ pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+ pipe_lock_nested(pipe1, I_MUTEX_PARENT);
+ }
+}
+
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe)
{
@@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
* is considered a noninteractive wait:
*/
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
schedule();
finish_wait(&pipe->wait, &wait);
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
}
static int
diff --git a/fs/splice.c b/fs/splice.c
index c18aa7e..5384a90 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
do_wakeup = 0;
page_nr = 0;
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
for (;;) {
if (!pipe->readers) {
@@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
pipe->waiting_writers--;
}
- if (pipe->inode) {
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ if (do_wakeup) {
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
while (page_nr < spd_pages)
@@ -555,8 +552,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
* SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
* a new page in the output file page cache and fill/dirty that.
*/
-static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
- struct splice_desc *sd)
+int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
{
struct file *file = sd->u.file;
struct address_space *mapping = file->f_mapping;
@@ -600,108 +597,178 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
out:
return ret;
}
+EXPORT_SYMBOL(pipe_to_file);
+
+static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
+{
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
+}
/**
- * __splice_from_pipe - splice data from a pipe to given actor
+ * splice_from_pipe_feed - feed available data from a pipe to a file
* @pipe: pipe to splice from
* @sd: information to @actor
* @actor: handler that splices the data
*
* Description:
- * This function does little more than loop over the pipe and call
- * @actor to do the actual moving of a single struct pipe_buffer to
- * the desired destination. See pipe_to_file, pipe_to_sendpage, or
- * pipe_to_user.
+
+ * This function loops over the pipe and calls @actor to do the
+ * actual moving of a single struct pipe_buffer to the desired
+ * destination. It returns when there's no more buffers left in
+ * the pipe or if the requested number of bytes (@sd->total_len)
+ * have been copied. It returns a positive number (one) if the
+ * pipe needs to be filled with more data, zero if the required
+ * number of bytes have been copied and -errno on error.
*
+ * This, together with splice_from_pipe_{begin,end,next}, may be
+ * used to implement the functionality of __splice_from_pipe() when
+ * locking is required around copying the pipe buffers to the
+ * destination.
*/
-ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
- splice_actor *actor)
+int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
+ splice_actor *actor)
{
- int ret, do_wakeup, err;
-
- ret = 0;
- do_wakeup = 0;
-
- for (;;) {
- if (pipe->nrbufs) {
- struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
- const struct pipe_buf_operations *ops = buf->ops;
+ int ret;
- sd->len = buf->len;
- if (sd->len > sd->total_len)
- sd->len = sd->total_len;
+ while (pipe->nrbufs) {
+ struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+ const struct pipe_buf_operations *ops = buf->ops;
- err = actor(pipe, buf, sd);
- if (err <= 0) {
- if (!ret && err != -ENODATA)
- ret = err;
+ sd->len = buf->len;
+ if (sd->len > sd->total_len)
+ sd->len = sd->total_len;
- break;
- }
+ ret = actor(pipe, buf, sd);
+ if (ret <= 0) {
+ if (ret == -ENODATA)
+ ret = 0;
+ return ret;
+ }
+ buf->offset += ret;
+ buf->len -= ret;
- ret += err;
- buf->offset += err;
- buf->len -= err;
+ sd->num_spliced += ret;
+ sd->len -= ret;
+ sd->pos += ret;
+ sd->total_len -= ret;
- sd->len -= err;
- sd->pos += err;
- sd->total_len -= err;
- if (sd->len)
- continue;
+ if (!buf->len) {
+ buf->ops = NULL;
+ ops->release(pipe, buf);
+ pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
+ pipe->nrbufs--;
+ if (pipe->inode)
+ sd->need_wakeup = true;
+ }
- if (!buf->len) {
- buf->ops = NULL;
- ops->release(pipe, buf);
- pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
- pipe->nrbufs--;
- if (pipe->inode)
- do_wakeup = 1;
- }
+ if (!sd->total_len)
+ return 0;
+ }
- if (!sd->total_len)
- break;
- }
+ return 1;
+}
+EXPORT_SYMBOL(splice_from_pipe_feed);
- if (pipe->nrbufs)
- continue;
+/**
+ * splice_from_pipe_next - wait for some data to splice from
+ * @pipe: pipe to splice from
+ * @sd: information about the splice operation
+ *
+ * Description:
+ * This function will wait for some data and return a positive
+ * value (one) if pipe buffers are available. It will return zero
+ * or -errno if no more data needs to be spliced.
+ */
+int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+ while (!pipe->nrbufs) {
if (!pipe->writers)
- break;
- if (!pipe->waiting_writers) {
- if (ret)
- break;
- }
+ return 0;
- if (sd->flags & SPLICE_F_NONBLOCK) {
- if (!ret)
- ret = -EAGAIN;
- break;
- }
+ if (!pipe->waiting_writers && sd->num_spliced)
+ return 0;
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
- break;
- }
+ if (sd->flags & SPLICE_F_NONBLOCK)
+ return -EAGAIN;
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- do_wakeup = 0;
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ if (sd->need_wakeup) {
+ wakeup_pipe_writers(pipe);
+ sd->need_wakeup = false;
}
pipe_wait(pipe);
}
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
- }
+ return 1;
+}
+EXPORT_SYMBOL(splice_from_pipe_next);
- return ret;
+/**
+ * splice_from_pipe_begin - start splicing from pipe
+ * @pipe: pipe to splice from
+ *
+ * Description:
+ * This function should be called before a loop containing
+ * splice_from_pipe_next() and splice_from_pipe_feed() to
+ * initialize the necessary fields of @sd.
+ */
+void splice_from_pipe_begin(struct splice_desc *sd)
+{
+ sd->num_spliced = 0;
+ sd->need_wakeup = false;
+}
+EXPORT_SYMBOL(splice_from_pipe_begin);
+
+/**
+ * splice_from_pipe_end - finish splicing from pipe
+ * @pipe: pipe to splice from
+ * @sd: information about the splice operation
+ *
+ * Description:
+ * This function will wake up pipe writers if necessary. It should
+ * be called after a loop containing splice_from_pipe_next() and
+ * splice_from_pipe_feed().
+ */
+void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+ if (sd->need_wakeup)
+ wakeup_pipe_writers(pipe);
+}
+EXPORT_SYMBOL(splice_from_pipe_end);
+
+/**
+ * __splice_from_pipe - splice data from a pipe to given actor
+ * @pipe: pipe to splice from
+ * @sd: information to @actor
+ * @actor: handler that splices the data
+ *
+ * Description:
+ * This function does little more than loop over the pipe and call
+ * @actor to do the actual moving of a single struct pipe_buffer to
+ * the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ * pipe_to_user.
+ *
+ */
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
+ splice_actor *actor)
+{
+ int ret;
+
+ splice_from_pipe_begin(sd);
+ do {
+ ret = splice_from_pipe_next(pipe, sd);
+ if (ret > 0)
+ ret = splice_from_pipe_feed(pipe, sd, actor);
+ } while (ret > 0);
+ splice_from_pipe_end(pipe, sd);
+
+ return sd->num_spliced ? sd->num_spliced : ret;
}
EXPORT_SYMBOL(__splice_from_pipe);
@@ -715,7 +782,7 @@ EXPORT_SYMBOL(__splice_from_pipe);
* @actor: handler that splices the data
*
* Description:
- * See __splice_from_pipe. This function locks the input and output inodes,
+ * See __splice_from_pipe. This function locks the pipe inode,
* otherwise it's identical to __splice_from_pipe().
*
*/
@@ -724,7 +791,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
splice_actor *actor)
{
ssize_t ret;
- struct inode *inode = out->f_mapping->host;
struct splice_desc sd = {
.total_len = len,
.flags = flags,
@@ -732,30 +798,15 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
.u.file = out,
};
- /*
- * The actor worker might be calling ->write_begin and
- * ->write_end. Most of the time, these expect i_mutex to
- * be held. Since this may result in an ABBA deadlock with
- * pipe->inode, we have to order lock acquiry here.
- *
- * Outer lock must be inode->i_mutex, as pipe_wait() will
- * release and reacquire pipe->inode->i_mutex, AND inode must
- * never be a pipe.
- */
- WARN_ON(S_ISFIFO(inode->i_mode));
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- if (pipe->inode)
- mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
+ pipe_lock(pipe);
ret = __splice_from_pipe(pipe, &sd, actor);
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
- mutex_unlock(&inode->i_mutex);
+ pipe_unlock(pipe);
return ret;
}
/**
- * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
+ * generic_file_splice_write - splice data from a pipe to a file
* @pipe: pipe info
* @out: file to write to
* @ppos: position in @out
@@ -764,13 +815,12 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
*
* Description:
* Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file. The caller is responsible
- * for acquiring i_mutex on both inodes.
+ * the given pipe inode to the given file.
*
*/
ssize_t
-generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags)
+generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
{
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
@@ -781,76 +831,28 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
.u.file = out,
};
ssize_t ret;
- int err;
-
- err = file_remove_suid(out);
- if (unlikely(err))
- return err;
-
- ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
- if (ret > 0) {
- unsigned long nr_pages;
- *ppos += ret;
- nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ pipe_lock(pipe);
- /*
- * If file or inode is SYNC and we actually wrote some data,
- * sync it.
- */
- if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
- err = generic_osync_inode(inode, mapping,
- OSYNC_METADATA|OSYNC_DATA);
-
- if (err)
- ret = err;
- }
- balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
- }
+ splice_from_pipe_begin(&sd);
+ do {
+ ret = splice_from_pipe_next(pipe, &sd);
+ if (ret <= 0)
+ break;
- return ret;
-}
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ ret = file_remove_suid(out);
+ if (!ret)
+ ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
+ mutex_unlock(&inode->i_mutex);
+ } while (ret > 0);
+ splice_from_pipe_end(pipe, &sd);
-EXPORT_SYMBOL(generic_file_splice_write_nolock);
+ pipe_unlock(pipe);
-/**
- * generic_file_splice_write - splice data from a pipe to a file
- * @pipe: pipe info
- * @out: file to write to
- * @ppos: position in @out
- * @len: number of bytes to splice
- * @flags: splice modifier flags
- *
- * Description:
- * Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file.
- *
- */
-ssize_t
-generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags)
-{
- struct address_space *mapping = out->f_mapping;
- struct inode *inode = mapping->host;
- struct splice_desc sd = {
- .total_len = len,
- .flags = flags,
- .pos = *ppos,
- .u.file = out,
- };
- ssize_t ret;
+ if (sd.num_spliced)
+ ret = sd.num_spliced;
- WARN_ON(S_ISFIFO(inode->i_mode));
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- ret = file_remove_suid(out);
- if (likely(!ret)) {
- if (pipe->inode)
- mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
- ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
- }
- mutex_unlock(&inode->i_mutex);
if (ret > 0) {
unsigned long nr_pages;
@@ -1339,8 +1341,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
if (!pipe)
return -EBADF;
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
error = ret = 0;
while (nr_segs) {
@@ -1395,8 +1396,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
iov++;
}
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
if (!ret)
ret = error;
@@ -1524,7 +1524,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
return 0;
ret = 0;
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
while (!pipe->nrbufs) {
if (signal_pending(current)) {
@@ -1542,7 +1542,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
pipe_wait(pipe);
}
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
return ret;
}
@@ -1562,7 +1562,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
return 0;
ret = 0;
- mutex_lock(&pipe->inode->i_mutex);
+ pipe_lock(pipe);
while (pipe->nrbufs >= PIPE_BUFFERS) {
if (!pipe->readers) {
@@ -1583,7 +1583,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
pipe->waiting_writers--;
}
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
return ret;
}
@@ -1599,10 +1599,10 @@ static int link_pipe(struct pipe_inode_info *ipipe,
/*
* Potential ABBA deadlock, work around it by ordering lock
- * grabbing by inode address. Otherwise two different processes
+ * grabbing by pipe info address. Otherwise two different processes
* could deadlock (one doing tee from A -> B, the other from B -> A).
*/
- inode_double_lock(ipipe->inode, opipe->inode);
+ pipe_double_lock(ipipe, opipe);
do {
if (!opipe->readers) {
@@ -1653,7 +1653,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
ret = -EAGAIN;
- inode_double_unlock(ipipe->inode, opipe->inode);
+ pipe_unlock(ipipe);
+ pipe_unlock(opipe);
/*
* If we put data in the output pipe, wakeup any potential readers.
OpenPOWER on IntegriCloud