diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-04-20 17:27:45 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-04-20 17:27:45 -0400 |
commit | 9707b27100a48950f1e15e08a7c5028786e47f55 (patch) | |
tree | 5745b1e7497ae1499a2e2e9e0a567996419ab34f /fs | |
parent | 8fc65162a8f25929be80c8d6321a3479e92b5aae (diff) | |
parent | 402a26f0c040077ed6f941eefac5a6971f0d5f40 (diff) | |
download | op-kernel-dev-9707b27100a48950f1e15e08a7c5028786e47f55.zip op-kernel-dev-9707b27100a48950f1e15e08a7c5028786e47f55.tar.gz |
Merge branch 'master'
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig | 6 | ||||
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | fs/ext3/resize.c | 1 | ||||
-rw-r--r-- | fs/lockd/svclock.c | 2 | ||||
-rw-r--r-- | fs/locks.c | 9 | ||||
-rw-r--r-- | fs/nfs/dir.c | 5 | ||||
-rw-r--r-- | fs/nfs/direct.c | 8 | ||||
-rw-r--r-- | fs/nfs/file.c | 5 | ||||
-rw-r--r-- | fs/nfs/inode.c | 5 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 10 | ||||
-rw-r--r-- | fs/open.c | 24 | ||||
-rw-r--r-- | fs/partitions/check.c | 5 | ||||
-rw-r--r-- | fs/proc/base.c | 21 | ||||
-rw-r--r-- | fs/splice.c | 196 |
14 files changed, 208 insertions, 91 deletions
@@ -842,6 +842,12 @@ config TMPFS config HUGETLBFS bool "HugeTLB file system support" depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN + help + hugetlbfs is a filesystem backing for HugeTLB pages, based on + ramfs. For architectures that support it, say Y here and read + <file:Documentation/vm/hugetlbpage.txt> for details. + + If unsure, say N. config HUGETLB_PAGE def_bool HUGETLBFS @@ -712,7 +712,7 @@ static int de_thread(struct task_struct *tsk) attach_pid(current, PIDTYPE_PID, current->pid); attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); attach_pid(current, PIDTYPE_SID, current->signal->session); - list_add_tail(¤t->tasks, &init_task.tasks); + list_add_tail_rcu(¤t->tasks, &init_task.tasks); current->group_leader = current; leader->group_leader = current; diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 14f5f6e..c5ffa85 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -767,6 +767,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) if (input->group != sbi->s_groups_count) { ext3_warning(sb, __FUNCTION__, "multiple resizers run on filesystem!"); + unlock_super(sb); err = -EBUSY; goto exit_journal; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index d2b66ba..3ef7391 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -650,7 +650,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) svc_wake_up(block->b_daemon); } -void nlmsvc_grant_release(void *data) +static void nlmsvc_grant_release(void *data) { struct nlm_rqst *call = data; @@ -2230,7 +2230,12 @@ void steal_locks(fl_owner_t from) lock_kernel(); j = 0; - rcu_read_lock(); + + /* + * We are not taking a ref to the file structures, so + * we need to acquire ->file_lock. + */ + spin_lock(&files->file_lock); fdt = files_fdtable(files); for (;;) { unsigned long set; @@ -2248,7 +2253,7 @@ void steal_locks(fl_owner_t from) set >>= 1; } } - rcu_read_unlock(); + spin_unlock(&files->file_lock); unlock_kernel(); } EXPORT_SYMBOL(steal_locks); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a23f348..cae74dd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -128,15 +128,14 @@ struct inode_operations nfs4_dir_inode_operations = { static int nfs_opendir(struct inode *inode, struct file *filp) { - int res = 0; + int res; dfprintk(VFS, "NFS: opendir(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); lock_kernel(); /* Call generic open code in order to cache credentials */ - if (!res) - res = nfs_open(inode, filp); + res = nfs_open(inode, filp); unlock_kernel(); return res; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0f583cb..3c72b0c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -112,10 +112,9 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode */ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { - struct dentry *dentry = iocb->ki_filp->f_dentry; - dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", - dentry->d_name.name, (long long) pos, nr_segs); + iocb->ki_filp->f_dentry->d_name.name, + (long long) pos, nr_segs); return -EINVAL; } @@ -468,7 +467,6 @@ static const struct rpc_call_ops nfs_commit_direct_ops = { static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { struct nfs_write_data *data = dreq->commit_data; - struct rpc_task *task = &data->task; data->inode = dreq->inode; data->cred = dreq->ctx->cred; @@ -489,7 +487,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ dreq->commit_data = NULL; - dprintk("NFS: %5u initiated commit call\n", task->tk_pid); + dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); lock_kernel(); rpc_execute(&data->task); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f1df2c8..fade02c 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -534,10 +534,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) */ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) { - struct inode * inode = filp->f_mapping->host; - dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", - inode->i_sb->s_id, inode->i_ino, + filp->f_dentry->d_inode->i_sb->s_id, + filp->f_dentry->d_inode->i_ino, fl->fl_type, fl->fl_flags); /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2f7656b..d0b991a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -700,12 +700,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) /* * Display superblock I/O counters */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_possible_cpu(cpu) { struct nfs_iostats *stats; - if (!cpu_possible(cpu)) - continue; - preempt_disable(); stats = per_cpu_ptr(nfss->io_stats, cpu); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 47ece1d..d86c0db 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1218,7 +1218,7 @@ out: return status; } -static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) { struct file *filp; @@ -1227,8 +1227,10 @@ static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, st struct nfs_open_context *ctx; ctx = (struct nfs_open_context *)filp->private_data; ctx->state = state; - } else - nfs4_close_state(state, nd->intent.open.flags); + return 0; + } + nfs4_close_state(state, nd->intent.open.flags); + return PTR_ERR(filp); } struct dentry * @@ -1835,7 +1837,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, nfs_setattr_update_inode(state->inode, sattr); } if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) - nfs4_intent_set_file(nd, dentry, state); + status = nfs4_intent_set_file(nd, dentry, state); else nfs4_close_state(state, flags); out: @@ -331,7 +331,10 @@ out: asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) { - return do_sys_ftruncate(fd, length, 1); + long ret = do_sys_ftruncate(fd, length, 1); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } /* LFS versions of truncate are only needed on 32 bit machines */ @@ -343,7 +346,10 @@ asmlinkage long sys_truncate64(const char __user * path, loff_t length) asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) { - return do_sys_ftruncate(fd, length, 0); + long ret = do_sys_ftruncate(fd, length, 0); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } #endif @@ -1093,20 +1099,30 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) asmlinkage long sys_open(const char __user *filename, int flags, int mode) { + long ret; + if (force_o_largefile()) flags |= O_LARGEFILE; - return do_sys_open(AT_FDCWD, filename, flags, mode); + ret = do_sys_open(AT_FDCWD, filename, flags, mode); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } EXPORT_SYMBOL_GPL(sys_open); asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, int mode) { + long ret; + if (force_o_largefile()) flags |= O_LARGEFILE; - return do_sys_open(dfd, filename, flags, mode); + ret = do_sys_open(dfd, filename, flags, mode); + /* avoid REGPARM breakage on x86: */ + prevent_tail_call(ret); + return ret; } EXPORT_SYMBOL_GPL(sys_openat); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f3b6af0..45ae7dd 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -372,6 +372,7 @@ static char *make_block_name(struct gendisk *disk) char *name; static char *block_str = "block:"; int size; + char *s; size = strlen(block_str) + strlen(disk->disk_name) + 1; name = kmalloc(size, GFP_KERNEL); @@ -379,6 +380,10 @@ static char *make_block_name(struct gendisk *disk) return NULL; strcpy(name, block_str); strcat(name, disk->disk_name); + /* ewww... some of these buggers have / in name... */ + s = strchr(name, '/'); + if (s) + *s = '!'; return name; } diff --git a/fs/proc/base.c b/fs/proc/base.c index a3a3eec..6cc77dc 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -297,16 +297,20 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm files = get_files_struct(task); if (files) { - rcu_read_lock(); + /* + * We are not taking a ref to the file structure, so we must + * hold ->file_lock. + */ + spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { *mnt = mntget(file->f_vfsmnt); *dentry = dget(file->f_dentry); - rcu_read_unlock(); + spin_unlock(&files->file_lock); put_files_struct(files); return 0; } - rcu_read_unlock(); + spin_unlock(&files->file_lock); put_files_struct(files); } return -ENOENT; @@ -1523,7 +1527,12 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, if (!files) goto out_unlock; inode->i_mode = S_IFLNK; - rcu_read_lock(); + + /* + * We are not taking a ref to the file structure, so we must + * hold ->file_lock. + */ + spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (!file) goto out_unlock2; @@ -1531,7 +1540,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, inode->i_mode |= S_IRUSR | S_IXUSR; if (file->f_mode & 2) inode->i_mode |= S_IWUSR | S_IXUSR; - rcu_read_unlock(); + spin_unlock(&files->file_lock); put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; @@ -1541,7 +1550,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, return NULL; out_unlock2: - rcu_read_unlock(); + spin_unlock(&files->file_lock); put_files_struct(files); out_unlock: iput(inode); diff --git a/fs/splice.c b/fs/splice.c index 8d57e89..0559e75 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -50,7 +50,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, struct page *page = buf->page; struct address_space *mapping = page_mapping(page); - WARN_ON(!PageLocked(page)); + lock_page(page); + WARN_ON(!PageUptodate(page)); /* @@ -65,8 +66,10 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, if (PagePrivate(page)) try_to_release_page(page, mapping_gfp_mask(mapping)); - if (!remove_mapping(mapping, page)) + if (!remove_mapping(mapping, page)) { + unlock_page(page); return 1; + } buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; return 0; @@ -145,8 +148,8 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = { * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). */ static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages, - int nr_pages, unsigned long offset, - unsigned long len, unsigned int flags) + int nr_pages, unsigned long len, + unsigned int offset, unsigned int flags) { int ret, do_wakeup, i; @@ -243,14 +246,16 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, unsigned int flags) { struct address_space *mapping = in->f_mapping; - unsigned int offset, nr_pages; + unsigned int loff, offset, nr_pages; struct page *pages[PIPE_BUFFERS]; struct page *page; - pgoff_t index; + pgoff_t index, end_index; + loff_t isize; + size_t bytes; int i, error; index = *ppos >> PAGE_CACHE_SHIFT; - offset = *ppos & ~PAGE_CACHE_MASK; + loff = offset = *ppos & ~PAGE_CACHE_MASK; nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (nr_pages > PIPE_BUFFERS) @@ -268,7 +273,17 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * Now fill in the holes: */ error = 0; + bytes = 0; for (i = 0; i < nr_pages; i++, index++) { + unsigned int this_len; + + if (!len) + break; + + /* + * this_len is the max we'll use from this page + */ + this_len = min(len, PAGE_CACHE_SIZE - loff); find_page: /* * lookup the page for this index @@ -276,14 +291,6 @@ find_page: page = find_get_page(mapping, index); if (!page) { /* - * If in nonblock mode then dont block on - * readpage (we've kicked readahead so there - * will be asynchronous progress): - */ - if (flags & SPLICE_F_NONBLOCK) - break; - - /* * page didn't exist, allocate one */ page = page_cache_alloc_cold(mapping); @@ -304,6 +311,13 @@ find_page: * If the page isn't uptodate, we may need to start io on it */ if (!PageUptodate(page)) { + /* + * If in nonblock mode then dont block on waiting + * for an in-flight io page + */ + if (flags & SPLICE_F_NONBLOCK) + break; + lock_page(page); /* @@ -336,13 +350,43 @@ readpage: goto find_page; break; } + + /* + * i_size must be checked after ->readpage(). + */ + isize = i_size_read(mapping->host); + end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!isize || index > end_index)) { + page_cache_release(page); + break; + } + + /* + * if this is the last page, see if we need to shrink + * the length and stop + */ + if (end_index == index) { + loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); + if (bytes + loff > isize) { + page_cache_release(page); + break; + } + /* + * force quit after adding this page + */ + nr_pages = i; + this_len = min(this_len, loff); + } } fill_it: pages[i] = page; + bytes += this_len; + len -= this_len; + loff = 0; } if (i) - return move_to_pipe(pipe, pages, i, offset, len, flags); + return move_to_pipe(pipe, pages, i, bytes, offset, flags); return error; } @@ -369,17 +413,20 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, while (len) { ret = __generic_file_splice_read(in, ppos, pipe, len, flags); - if (ret <= 0) + if (ret < 0) break; + else if (!ret) { + if (spliced) + break; + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; + } + } *ppos += ret; len -= ret; spliced += ret; - - if (!(flags & SPLICE_F_NONBLOCK)) - continue; - ret = -EAGAIN; - break; } if (spliced) @@ -474,14 +521,12 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, if (sd->flags & SPLICE_F_MOVE) { /* * If steal succeeds, buf->page is now pruned from the vm - * side (LRU and page cache) and we can reuse it. + * side (LRU and page cache) and we can reuse it. The page + * will also be looked on successful return. */ if (buf->ops->steal(info, buf)) goto find_page; - /* - * this will also set the page locked - */ page = buf->page; if (add_to_page_cache(page, mapping, index, gfp_mask)) goto find_page; @@ -490,15 +535,27 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, lru_cache_add(page); } else { find_page: - ret = -ENOMEM; - page = find_or_create_page(mapping, index, gfp_mask); - if (!page) - goto out_nomem; + page = find_lock_page(mapping, index); + if (!page) { + ret = -ENOMEM; + page = page_cache_alloc_cold(mapping); + if (unlikely(!page)) + goto out_nomem; + + /* + * This will also lock the page + */ + ret = add_to_page_cache_lru(page, mapping, index, + gfp_mask); + if (unlikely(ret)) + goto out; + } /* - * If the page is uptodate, it is also locked. If it isn't - * uptodate, we can mark it uptodate if we are filling the - * full page. Otherwise we need to read it in first... + * We get here with the page locked. If the page is also + * uptodate, we don't need to do more. If it isn't, we + * may need to bring it in if we are not going to overwrite + * the full page. */ if (!PageUptodate(page)) { if (sd->len < PAGE_CACHE_SIZE) { @@ -520,10 +577,8 @@ find_page: ret = -EIO; goto out; } - } else { - WARN_ON(!PageLocked(page)); + } else SetPageUptodate(page); - } } } @@ -552,10 +607,10 @@ find_page: mark_page_accessed(page); balance_dirty_pages_ratelimited(mapping); out: - if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { + if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) page_cache_release(page); - unlock_page(page); - } + + unlock_page(page); out_nomem: buf->ops->unmap(info, buf); return ret; @@ -687,22 +742,26 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ssize_t ret; ret = move_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); - - /* - * If file or inode is SYNC and we actually wrote some data, sync it. - */ - if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) - && ret > 0) { + if (ret > 0) { struct inode *inode = mapping->host; - int err; - mutex_lock(&inode->i_mutex); - err = generic_osync_inode(mapping->host, mapping, - OSYNC_METADATA|OSYNC_DATA); - mutex_unlock(&inode->i_mutex); + *ppos += ret; + + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + int err; + + mutex_lock(&inode->i_mutex); + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + mutex_unlock(&inode->i_mutex); - if (err) - ret = err; + if (err) + ret = err; + } } return ret; @@ -904,6 +963,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, { struct pipe_inode_info *pipe; loff_t offset, *off; + long ret; pipe = in->f_dentry->d_inode->i_pipe; if (pipe) { @@ -918,7 +978,12 @@ static long do_splice(struct file *in, loff_t __user *off_in, } else off = &out->f_pos; - return do_splice_from(pipe, out, off, len, flags); + ret = do_splice_from(pipe, out, off, len, flags); + + if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) + ret = -EFAULT; + + return ret; } pipe = out->f_dentry->d_inode->i_pipe; @@ -934,7 +999,12 @@ static long do_splice(struct file *in, loff_t __user *off_in, } else off = &in->f_pos; - return do_splice_to(in, off, pipe, len, flags); + ret = do_splice_to(in, off, pipe, len, flags); + + if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) + ret = -EFAULT; + + return ret; } return -EINVAL; @@ -979,7 +1049,9 @@ static int link_pipe(struct pipe_inode_info *ipipe, size_t len, unsigned int flags) { struct pipe_buffer *ibuf, *obuf; - int ret = 0, do_wakeup = 0, i; + int ret, do_wakeup, i, ipipe_first; + + ret = do_wakeup = ipipe_first = 0; /* * Potential ABBA deadlock, work around it by ordering lock @@ -987,6 +1059,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, * could deadlock (one doing tee from A -> B, the other from B -> A). */ if (ipipe->inode < opipe->inode) { + ipipe_first = 1; mutex_lock(&ipipe->inode->i_mutex); mutex_lock(&opipe->inode->i_mutex); } else { @@ -1035,9 +1108,11 @@ static int link_pipe(struct pipe_inode_info *ipipe, /* * We have input available, but no output room. - * If we already copied data, return that. + * If we already copied data, return that. If we + * need to drop the opipe lock, it must be ordered + * last to avoid deadlocks. */ - if (flags & SPLICE_F_NONBLOCK) { + if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) { if (!ret) ret = -EAGAIN; break; @@ -1071,7 +1146,12 @@ static int link_pipe(struct pipe_inode_info *ipipe, if (ret) break; } - if (flags & SPLICE_F_NONBLOCK) { + /* + * pipe_wait() drops the ipipe mutex. To avoid deadlocks + * with another process, we can only safely do that if + * the ipipe lock is ordered last. + */ + if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { if (!ret) ret = -EAGAIN; break; |