diff options
author | David Woodhouse <dwmw2@infradead.org> | 2007-04-26 09:31:28 +0100 |
---|---|---|
committer | David Woodhouse <dwmw2@infradead.org> | 2007-04-26 09:31:28 +0100 |
commit | ef2e58ea6b9931c3a4816c66593da49bb20e3b24 (patch) | |
tree | ce7432add3becbe78de4ea06425cd2d9e91f4ada /fs | |
parent | 06d63cc51d47f572009138a7f3ac34d95773405d (diff) | |
parent | de46c33745f5e2ad594c72f2cf5f490861b16ce1 (diff) | |
download | op-kernel-dev-ef2e58ea6b9931c3a4816c66593da49bb20e3b24.zip op-kernel-dev-ef2e58ea6b9931c3a4816c66593da49bb20e3b24.tar.gz |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
58 files changed, 768 insertions, 680 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 8ada4c5..6a82d39 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -40,7 +40,6 @@ extern struct file_system_type v9fs_fs_type; extern const struct address_space_operations v9fs_addr_operations; extern const struct file_operations v9fs_file_operations; -extern const struct file_operations v9fs_cached_file_operations; extern const struct file_operations v9fs_dir_operations; extern struct dentry_operations v9fs_dentry_operations; extern struct dentry_operations v9fs_cached_dentry_operations; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 653dfa5..c7b6772 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -42,6 +42,8 @@ #include "v9fs_vfs.h" #include "fid.h" +static const struct file_operations v9fs_cached_file_operations; + /** * v9fs_file_open - open a file (or directory) * @inode: inode to be opened @@ -245,7 +247,7 @@ v9fs_file_write(struct file *filp, const char __user * data, return total; } -const struct file_operations v9fs_cached_file_operations = { +static const struct file_operations v9fs_cached_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .aio_read = generic_file_aio_read, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 124a085..b01b0a4 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -415,7 +415,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) file_inode = file->d_inode; sb = file_inode->i_sb; v9ses = v9fs_inode2v9ses(file_inode); - v9fid = v9fs_fid_lookup(file); + v9fid = v9fs_fid_clone(file); if(IS_ERR(v9fid)) return PTR_ERR(v9fid); @@ -136,7 +136,6 @@ static int aio_setup_ring(struct kioctx *ctx) 0); if (IS_ERR((void *)info->mmap_base)) { up_write(&ctx->mm->mmap_sem); - printk("mmap err: %ld\n", -info->mmap_base); info->mmap_size = 0; aio_free_ring(ctx); return -EAGAIN; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index b463104..d0e9b3a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -470,9 +470,6 @@ void autofs4_dentry_release(struct dentry *de) if (inf) { struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); - inf->dentry = NULL; - inf->inode = NULL; - if (sbi) { spin_lock(&sbi->rehash_lock); if (!list_empty(&inf->rehash)) @@ -480,6 +477,9 @@ void autofs4_dentry_release(struct dentry *de) spin_unlock(&sbi->rehash_lock); } + inf->dentry = NULL; + inf->inode = NULL; + autofs4_free_ino(inf); } } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 51db118..9cc4f0a 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -507,7 +507,7 @@ out: #define INTERPRETER_ELF 2 #ifndef STACK_RND_MASK -#define STACK_RND_MASK 0x7ff /* with 4K pages 8MB of VA */ +#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */ #endif static unsigned long randomize_stack_top(unsigned long stack_top) @@ -1704,7 +1704,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) DUMP_SEEK(PAGE_SIZE); } else { if (page == ZERO_PAGE(addr)) { - DUMP_SEEK(PAGE_SIZE); + if (!dump_seek(file, PAGE_SIZE)) { + page_cache_release(page); + goto end_coredump; + } } else { void *kaddr; flush_cache_page(vma, addr, diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 5810aa1..f3ddca4 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -179,6 +179,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, int executable_stack; int retval, i; + kdebug("____ LOAD %d ____", current->pid); + memset(&exec_params, 0, sizeof(exec_params)); memset(&interp_params, 0, sizeof(interp_params)); @@ -941,8 +943,11 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( if (mm) { if (phdr->p_flags & PF_X) { - mm->start_code = seg->addr; - mm->end_code = seg->addr + phdr->p_memsz; + if (!mm->start_code) { + mm->start_code = seg->addr; + mm->end_code = seg->addr + + phdr->p_memsz; + } } else if (!mm->start_data) { mm->start_data = seg->addr; #ifndef CONFIG_MMU @@ -1123,8 +1128,10 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, if (mm) { if (phdr->p_flags & PF_X) { - mm->start_code = maddr; - mm->end_code = maddr + phdr->p_memsz; + if (!mm->start_code) { + mm->start_code = maddr; + mm->end_code = maddr + phdr->p_memsz; + } } else if (!mm->start_data) { mm->start_data = maddr; mm->end_data = maddr + phdr->p_memsz; @@ -1473,8 +1480,8 @@ static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm, DUMP_SEEK(file->f_pos + PAGE_SIZE); } else if (page == ZERO_PAGE(addr)) { - DUMP_SEEK(file->f_pos + PAGE_SIZE); page_cache_release(page); + DUMP_SEEK(file->f_pos + PAGE_SIZE); } else { void *kaddr; diff --git a/fs/char_dev.c b/fs/char_dev.c index 78ced72..164a45c 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -109,8 +109,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, /* temporary */ if (major == 0) { for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) { - if (is_lanana_major(i)) - continue; if (chrdevs[i] == NULL) break; } diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 6247628..5d1f487 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -4,6 +4,12 @@ Fix mtime bouncing around from local idea of last write times to remote time. Fix hang (in i_size_read) when simultaneous size update of same remote file on smp system corrupts sequence number. Do not reread unnecessarily partial page (which we are about to overwrite anyway) when writing out file opened rw. +When DOS attribute of file on non-Unix server's file changes on the server side +from read-only back to read-write, reflect this change in default file mode +(we had been leaving a file's mode read-only until the inode were reloaded). +Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute +when archive dos attribute not set and we are changing mode back to writeable +on server which does not support the Unix Extensions). Version 1.47 ------------ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 0efdf35..4d8948e 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -220,7 +220,7 @@ */ #define CIFS_NO_HANDLE 0xFFFF -#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL +#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL) #define NO_CHANGE_32 0xFFFFFFFFUL /* IPC$ in ASCII */ @@ -1887,7 +1887,13 @@ typedef struct { calls including posix open and posix unlink */ #ifdef CONFIG_CIFS_POSIX -#define CIFS_UNIX_CAP_MASK 0x0000003b +/* Can not set pathnames cap yet until we send new posix create SMB since + otherwise server can treat such handles opened with older ntcreatex + (by a new client which knows how to send posix path ops) + as non-posix handles (can affect write behavior with byte range locks. + We can add back in POSIX_PATH_OPS cap when Posix Create/Mkdir finished */ +/* #define CIFS_UNIX_CAP_MASK 0x0000003b */ +#define CIFS_UNIX_CAP_MASK 0x0000001b #else #define CIFS_UNIX_CAP_MASK 0x00000013 #endif /* CONFIG_CIFS_POSIX */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 86b9dbb..f414526 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -494,6 +494,12 @@ int cifs_get_inode_info(struct inode **pinode, mode e.g. 555 */ if (cifsInfo->cifsAttrs & ATTR_READONLY) inode->i_mode &= ~(S_IWUGO); + else if ((inode->i_mode & S_IWUGO) == 0) + /* the ATTR_READONLY flag may have been */ + /* changed on server -- set any w bits */ + /* allowed by mnt_file_mode */ + inode->i_mode |= (S_IWUGO & + cifs_sb->mnt_file_mode); /* BB add code here - validate if device or weird share or device type? */ } @@ -1190,6 +1196,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) struct cifsFileInfo *open_file = NULL; FILE_BASIC_INFO time_buf; int set_time = FALSE; + int set_dosattr = FALSE; __u64 mode = 0xFFFFFFFFFFFFFFFFULL; __u64 uid = 0xFFFFFFFFFFFFFFFFULL; __u64 gid = 0xFFFFFFFFFFFFFFFFULL; @@ -1326,15 +1333,23 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) else if (attrs->ia_valid & ATTR_MODE) { rc = 0; if ((mode & S_IWUGO) == 0) /* not writeable */ { - if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) + if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { + set_dosattr = TRUE; time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | ATTR_READONLY); + } } else if ((mode & S_IWUGO) == S_IWUGO) { - if (cifsInode->cifsAttrs & ATTR_READONLY) + if (cifsInode->cifsAttrs & ATTR_READONLY) { + set_dosattr = TRUE; time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs & (~ATTR_READONLY)); + /* Windows ignores set to zero */ + if(time_buf.Attributes == 0) + time_buf.Attributes |= + cpu_to_le32(ATTR_NORMAL); + } } /* BB to be implemented - via Windows security descriptors or streams */ @@ -1372,7 +1387,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } else time_buf.ChangeTime = 0; - if (set_time || time_buf.Attributes) { + if (set_time || set_dosattr) { time_buf.CreationTime = 0; /* do not change */ /* In the future we should experiment - try setting timestamps via Handle (SetFileInfo) instead of by path */ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 44cfb52..2a374d52 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -219,6 +219,10 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, tmp_inode->i_mode |= S_IFREG; if (attr & ATTR_READONLY) tmp_inode->i_mode &= ~(S_IWUGO); + else if ((tmp_inode->i_mode & S_IWUGO) == 0) + /* the ATTR_READONLY flag may have been changed on */ + /* server -- set any w bits allowed by mnt_file_mode */ + tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); } /* could add code here - to validate if device or weird share type? */ /* can not fill in nlink here as in qpathinfo version and Unx search */ diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c81c958..8b1c5d8 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2553,11 +2553,15 @@ HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl) HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl) /* wireless */ HANDLE_IOCTL(SIOCGIWRANGE, do_wireless_ioctl) +HANDLE_IOCTL(SIOCGIWPRIV, do_wireless_ioctl) +HANDLE_IOCTL(SIOCGIWSTATS, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIWSPY, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWSPY, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIWTHRSPY, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWTHRSPY, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWMLME, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWAPLIST, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWSCAN, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWSCAN, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIWESSID, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWESSID, do_wireless_ioctl) @@ -2565,6 +2569,11 @@ HANDLE_IOCTL(SIOCSIWNICKN, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWNICKN, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl) HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWGENIE, do_wireless_ioctl) +HANDLE_IOCTL(SIOCGIWGENIE, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWENCODEEXT, do_wireless_ioctl) +HANDLE_IOCTL(SIOCGIWENCODEEXT, do_wireless_ioctl) +HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl) HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl) HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl) HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 34750d5..5e6e37e 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1141,25 +1141,22 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = -ENOMEM; dentry = d_alloc(configfs_sb->s_root, &name); - if (!dentry) - goto out_release; - - d_add(dentry, NULL); + if (dentry) { + d_add(dentry, NULL); - err = configfs_attach_group(sd->s_element, &group->cg_item, - dentry); - if (!err) - dentry = NULL; - else - d_delete(dentry); + err = configfs_attach_group(sd->s_element, &group->cg_item, + dentry); + if (err) { + d_delete(dentry); + dput(dentry); + } + } mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); - if (dentry) { - dput(dentry); -out_release: - unlink_group(group); - configfs_release_fs(); + if (err) { + unlink_group(group); + configfs_release_fs(); } return err; diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 329efcd..cb20b96 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -78,18 +78,13 @@ struct kmem_cache *ecryptfs_dentry_info_cache; */ static void ecryptfs_d_release(struct dentry *dentry) { - struct dentry *lower_dentry; - - lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (ecryptfs_dentry_to_private(dentry)) + if (ecryptfs_dentry_to_private(dentry)) { + if (ecryptfs_dentry_to_lower(dentry)) { + mntput(ecryptfs_dentry_to_lower_mnt(dentry)); + dput(ecryptfs_dentry_to_lower(dentry)); + } kmem_cache_free(ecryptfs_dentry_info_cache, ecryptfs_dentry_to_private(dentry)); - if (lower_dentry) { - struct vfsmount *lower_mnt = - ecryptfs_dentry_to_lower_mnt(dentry); - - mntput(lower_mnt); - dput(lower_dentry); } return; } @@ -1244,13 +1244,17 @@ EXPORT_SYMBOL(set_binfmt); * name into corename, which must have space for at least * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. */ -static void format_corename(char *corename, const char *pattern, long signr) +static int format_corename(char *corename, const char *pattern, long signr) { const char *pat_ptr = pattern; char *out_ptr = corename; char *const out_end = corename + CORENAME_MAX_SIZE; int rc; int pid_in_pattern = 0; + int ispipe = 0; + + if (*pattern == '|') + ispipe = 1; /* Repeat as long as we have more pattern to process and more output space */ @@ -1341,8 +1345,8 @@ static void format_corename(char *corename, const char *pattern, long signr) * * If core_pattern does not include a %p (as is the default) * and core_uses_pid is set, then .%pid will be appended to - * the filename */ - if (!pid_in_pattern + * the filename. Do not do this for piped commands. */ + if (!ispipe && !pid_in_pattern && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) { rc = snprintf(out_ptr, out_end - out_ptr, ".%d", current->tgid); @@ -1350,8 +1354,9 @@ static void format_corename(char *corename, const char *pattern, long signr) goto out; out_ptr += rc; } - out: +out: *out_ptr = 0; + return ispipe; } static void zap_process(struct task_struct *start) @@ -1502,16 +1507,15 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * uses lock_kernel() */ lock_kernel(); - format_corename(corename, core_pattern, signr); + ispipe = format_corename(corename, core_pattern, signr); unlock_kernel(); - if (corename[0] == '|') { + if (ispipe) { /* SIGPIPE can happen, but it's just never processed */ if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) { printk(KERN_INFO "Core dump to %s pipe failed\n", corename); goto fail_unlock; } - ispipe = 1; } else file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 8a824f4..a5b150f 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1148,102 +1148,37 @@ static int do_journal_get_write_access(handle_t *handle, return ext3_journal_get_write_access(handle, bh); } -/* - * The idea of this helper function is following: - * if prepare_write has allocated some blocks, but not all of them, the - * transaction must include the content of the newly allocated blocks. - * This content is expected to be set to zeroes by block_prepare_write(). - * 2006/10/14 SAW - */ -static int ext3_prepare_failure(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct address_space *mapping; - struct buffer_head *bh, *head, *next; - unsigned block_start, block_end; - unsigned blocksize; - int ret; - handle_t *handle = ext3_journal_current_handle(); - - mapping = page->mapping; - if (ext3_should_writeback_data(mapping->host)) { - /* optimization: no constraints about data */ -skip: - return ext3_journal_stop(handle); - } - - head = page_buffers(page); - blocksize = head->b_size; - for ( bh = head, block_start = 0; - bh != head || !block_start; - block_start = block_end, bh = next) - { - next = bh->b_this_page; - block_end = block_start + blocksize; - if (block_end <= from) - continue; - if (block_start >= to) { - block_start = to; - break; - } - if (!buffer_mapped(bh)) - /* prepare_write failed on this bh */ - break; - if (ext3_should_journal_data(mapping->host)) { - ret = do_journal_get_write_access(handle, bh); - if (ret) { - ext3_journal_stop(handle); - return ret; - } - } - /* - * block_start here becomes the first block where the current iteration - * of prepare_write failed. - */ - } - if (block_start <= from) - goto skip; - - /* commit allocated and zeroed buffers */ - return mapping->a_ops->commit_write(file, page, from, block_start); -} - static int ext3_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; - int ret, ret2; - int needed_blocks = ext3_writepage_trans_blocks(inode); + int ret, needed_blocks = ext3_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; retry: handle = ext3_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) ret = nobh_prepare_write(page, from, to, ext3_get_block); else ret = block_prepare_write(page, from, to, ext3_get_block); if (ret) - goto failure; + goto prepare_write_failed; if (ext3_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); - if (ret) - /* fatal error, just put the handle and return */ - journal_stop(handle); } - return ret; - -failure: - ret2 = ext3_prepare_failure(file, page, from, to); - if (ret2 < 0) - return ret2; +prepare_write_failed: + if (ret) + ext3_journal_stop(handle); if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; - /* retry number exceeded, or other error like -EDQUOT */ +out: return ret; } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 12f7dda..f58cbb2 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -495,7 +495,8 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode, BHDR(bh)->h_refcount = cpu_to_le32( le32_to_cpu(BHDR(bh)->h_refcount) - 1); error = ext3_journal_dirty_metadata(handle, bh); - handle->h_sync = 1; + if (IS_SYNC(inode)) + handle->h_sync = 1; DQUOT_FREE_BLOCK(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index fbff4b9..810b6d6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1147,102 +1147,37 @@ static int do_journal_get_write_access(handle_t *handle, return ext4_journal_get_write_access(handle, bh); } -/* - * The idea of this helper function is following: - * if prepare_write has allocated some blocks, but not all of them, the - * transaction must include the content of the newly allocated blocks. - * This content is expected to be set to zeroes by block_prepare_write(). - * 2006/10/14 SAW - */ -static int ext4_prepare_failure(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct address_space *mapping; - struct buffer_head *bh, *head, *next; - unsigned block_start, block_end; - unsigned blocksize; - int ret; - handle_t *handle = ext4_journal_current_handle(); - - mapping = page->mapping; - if (ext4_should_writeback_data(mapping->host)) { - /* optimization: no constraints about data */ -skip: - return ext4_journal_stop(handle); - } - - head = page_buffers(page); - blocksize = head->b_size; - for ( bh = head, block_start = 0; - bh != head || !block_start; - block_start = block_end, bh = next) - { - next = bh->b_this_page; - block_end = block_start + blocksize; - if (block_end <= from) - continue; - if (block_start >= to) { - block_start = to; - break; - } - if (!buffer_mapped(bh)) - /* prepare_write failed on this bh */ - break; - if (ext4_should_journal_data(mapping->host)) { - ret = do_journal_get_write_access(handle, bh); - if (ret) { - ext4_journal_stop(handle); - return ret; - } - } - /* - * block_start here becomes the first block where the current iteration - * of prepare_write failed. - */ - } - if (block_start <= from) - goto skip; - - /* commit allocated and zeroed buffers */ - return mapping->a_ops->commit_write(file, page, from, block_start); -} - static int ext4_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; - int ret, ret2; - int needed_blocks = ext4_writepage_trans_blocks(inode); + int ret, needed_blocks = ext4_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; retry: handle = ext4_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) ret = nobh_prepare_write(page, from, to, ext4_get_block); else ret = block_prepare_write(page, from, to, ext4_get_block); if (ret) - goto failure; + goto prepare_write_failed; if (ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); - if (ret) - /* fatal error, just put the handle and return */ - ext4_journal_stop(handle); } - return ret; - -failure: - ret2 = ext4_prepare_failure(file, page, from, to); - if (ret2 < 0) - return ret2; +prepare_write_failed: + if (ret) + ext4_journal_stop(handle); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - /* retry number exceeded, or other error like -EDQUOT */ +out: return ret; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 406bf61..8890eba 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -195,7 +195,7 @@ static struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, }; -static int valid_mode(int m) +int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); @@ -248,7 +248,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, fuse_put_request(fc, req); /* Zero nodeid is same as -ENOENT, but with valid timeout */ if (!err && outarg.nodeid && - (invalid_nodeid(outarg.nodeid) || !valid_mode(outarg.attr.mode))) + (invalid_nodeid(outarg.nodeid) || + !fuse_valid_type(outarg.attr.mode))) err = -EIO; if (!err && outarg.nodeid) { inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b98b20d..68ae87c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -552,3 +552,8 @@ int fuse_ctl_add_conn(struct fuse_conn *fc); * Remove connection from control filesystem */ void fuse_ctl_remove_conn(struct fuse_conn *fc); + +/** + * Is file type valid? + */ +int fuse_valid_type(int m); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5ab8e50..608db81 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -330,6 +330,8 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) case OPT_ROOTMODE: if (match_octal(&args[0], &value)) return 0; + if (!fuse_valid_type(value)) + return 0; d->rootmode = value; d->rootmode_present = 1; break; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 9baf697..fd301a9 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -20,7 +20,6 @@ #include "hostfs.h" #include "kern_util.h" #include "kern.h" -#include "user_util.h" #include "init.h" struct hostfs_inode_info { @@ -939,7 +938,7 @@ static const struct address_space_operations hostfs_link_aops = { static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) { struct inode *root_inode; - char *name, *data = d; + char *host_root_path, *req_root = d; int err; sb->s_blocksize = 1024; @@ -948,16 +947,16 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_op = &hostfs_sbops; /* NULL is printed as <NULL> by sprintf: avoid that. */ - if (data == NULL) - data = ""; + if (req_root == NULL) + req_root = ""; err = -ENOMEM; - name = kmalloc(strlen(root_ino) + 1 - + strlen(data) + 1, GFP_KERNEL); - if(name == NULL) + host_root_path = kmalloc(strlen(root_ino) + 1 + + strlen(req_root) + 1, GFP_KERNEL); + if(host_root_path == NULL) goto out; - sprintf(name, "%s/%s", root_ino, data); + sprintf(host_root_path, "%s/%s", root_ino, req_root); root_inode = iget(sb, 0); if(root_inode == NULL) @@ -967,10 +966,10 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) if(err) goto out_put; - HOSTFS_I(root_inode)->host_filename = name; - /* Avoid that in the error path, iput(root_inode) frees again name through - * hostfs_destroy_inode! */ - name = NULL; + HOSTFS_I(root_inode)->host_filename = host_root_path; + /* Avoid that in the error path, iput(root_inode) frees again + * host_root_path through hostfs_destroy_inode! */ + host_root_path = NULL; err = -ENOMEM; sb->s_root = d_alloc_root(root_inode); @@ -990,7 +989,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) out_put: iput(root_inode); out_free: - kfree(name); + kfree(host_root_path); out: return(err); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 92d8ec8..cd34697 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1684,7 +1684,8 @@ go_ahead: * ... prune child dentries and writebacks if needed. */ if (atomic_read(&old_dentry->d_count) > 1) { - nfs_wb_all(old_inode); + if (S_ISREG(old_inode->i_mode)) + nfs_wb_all(old_inode); shrink_dcache_parent(old_dentry); } nfs_inode_return_delegation(old_inode); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b1c98ea..2877744c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -432,10 +432,10 @@ static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) return; if (unlikely(task->tk_status < 0)) { - dreq->error = task->tk_status; + dprintk("NFS: %5u commit failed with error %d.\n", + task->tk_pid, task->tk_status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } - if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { dprintk("NFS: %5u commit verify failed\n", task->tk_pid); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } @@ -531,9 +531,12 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) spin_lock(&dreq->lock); + if (unlikely(dreq->error != 0)) + goto out_unlock; if (unlikely(status < 0)) { + /* An error has occured, so we should not commit */ + dreq->flags = 0; dreq->error = status; - goto out_unlock; } dreq->count += data->res.count; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index af53c02..44aa9b7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -341,8 +341,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) lock_kernel(); nfs_begin_data_update(inode); /* Write all dirty data */ - filemap_write_and_wait(inode->i_mapping); - nfs_wb_all(inode); + if (S_ISREG(inode->i_mode)) { + filemap_write_and_wait(inode->i_mapping); + nfs_wb_all(inode); + } /* * Return any delegations if we're going to change ACLs */ @@ -429,7 +431,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int err; /* Flush out writes to the server in order to update c/mtime */ - nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); + if (S_ISREG(inode->i_mode)) + nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); /* * We may force a getattr if the user cares about atime. diff --git a/fs/nfs/super.c b/fs/nfs/super.c index bb516a2..f1eae44 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -151,10 +151,10 @@ int __init register_nfs_fs(void) if (ret < 0) goto error_0; -#ifdef CONFIG_NFS_V4 ret = nfs_register_sysctl(); if (ret < 0) goto error_1; +#ifdef CONFIG_NFS_V4 ret = register_filesystem(&nfs4_fs_type); if (ret < 0) goto error_2; @@ -165,9 +165,9 @@ int __init register_nfs_fs(void) #ifdef CONFIG_NFS_V4 error_2: nfs_unregister_sysctl(); +#endif error_1: unregister_filesystem(&nfs_fs_type); -#endif error_0: return ret; } diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index fcdcafb..b62481da 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -50,6 +50,14 @@ static ctl_table nfs_cb_sysctls[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_congestion_kb", + .data = &nfs_congestion_kb, + .maxlen = sizeof(nfs_congestion_kb), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index febdade..7975589 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -12,6 +12,7 @@ #include <linux/pagemap.h> #include <linux/file.h> #include <linux/writeback.h> +#include <linux/swap.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> @@ -37,8 +38,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, struct page *, unsigned int, unsigned int); -static void nfs_mark_request_dirty(struct nfs_page *req); -static int nfs_wait_on_write_congestion(struct address_space *, int); static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; @@ -48,8 +47,6 @@ static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; -static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); - struct nfs_write_data *nfs_commit_alloc(void) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); @@ -211,6 +208,43 @@ static int wb_priority(struct writeback_control *wbc) } /* + * NFS congestion control + */ + +int nfs_congestion_kb; + +#define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) +#define NFS_CONGESTION_OFF_THRESH \ + (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) + +static int nfs_set_page_writeback(struct page *page) +{ + int ret = test_set_page_writeback(page); + + if (!ret) { + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + if (atomic_inc_return(&nfss->writeback) > + NFS_CONGESTION_ON_THRESH) + set_bdi_congested(&nfss->backing_dev_info, WRITE); + } + return ret; +} + +static void nfs_end_page_writeback(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct nfs_server *nfss = NFS_SERVER(inode); + + end_page_writeback(page); + if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { + clear_bdi_congested(&nfss->backing_dev_info, WRITE); + congestion_end(WRITE); + } +} + +/* * Find an associated nfs write request, and prepare to flush it out * Returns 1 if there was no write request, or if the request was * already tagged by nfs_set_page_dirty.Returns 0 if the request @@ -220,7 +254,8 @@ static int wb_priority(struct writeback_control *wbc) static int nfs_page_mark_flush(struct page *page) { struct nfs_page *req; - spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + spinlock_t *req_lock = &nfsi->req_lock; int ret; spin_lock(req_lock); @@ -244,11 +279,23 @@ static int nfs_page_mark_flush(struct page *page) return ret; spin_lock(req_lock); } - spin_unlock(req_lock); - if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { - nfs_mark_request_dirty(req); - set_page_writeback(page); + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + /* This request is marked for commit */ + spin_unlock(req_lock); + nfs_unlock_request(req); + return 1; } + if (nfs_set_page_writeback(page) == 0) { + nfs_list_remove_request(req); + /* add the request to the inode's dirty list. */ + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_add_request(req, &nfsi->dirty); + nfsi->ndirty++; + spin_unlock(req_lock); + __mark_inode_dirty(page->mapping->host, I_DIRTY_PAGES); + } else + spin_unlock(req_lock); ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); nfs_unlock_request(req); return ret; @@ -302,13 +349,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) return err; } -/* - * Note: causes nfs_update_request() to block on the assumption - * that the writeback is generated due to memory pressure. - */ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct backing_dev_info *bdi = mapping->backing_dev_info; struct inode *inode = mapping->host; int err; @@ -317,20 +359,12 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = generic_writepages(mapping, wbc); if (err) return err; - while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { - if (wbc->nonblocking) - return 0; - nfs_wait_on_write_congestion(mapping, 0); - } err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); if (err < 0) goto out; nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); err = 0; out: - clear_bit(BDI_write_congested, &bdi->state); - wake_up_all(&nfs_write_congestion); - congestion_end(WRITE); return err; } @@ -354,13 +388,15 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) } SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); + if (PageDirty(req->wb_page)) + set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; atomic_inc(&req->wb_count); return 0; } /* - * Insert a write request into an inode + * Remove a write request from an inode */ static void nfs_inode_remove_request(struct nfs_page *req) { @@ -373,6 +409,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); + if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) + __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); @@ -384,28 +422,9 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfs_release_request(req); } -/* - * Add a request to the inode's dirty list. - */ -static void -nfs_mark_request_dirty(struct nfs_page *req) -{ - struct inode *inode = req->wb_context->dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - - spin_lock(&nfsi->req_lock); - radix_tree_tag_set(&nfsi->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_DIRTY); - nfs_list_add_request(req, &nfsi->dirty); - nfsi->ndirty++; - spin_unlock(&nfsi->req_lock); - __mark_inode_dirty(inode, I_DIRTY_PAGES); -} - static void nfs_redirty_request(struct nfs_page *req) { - clear_bit(PG_FLUSHING, &req->wb_flags); __set_page_dirty_nobuffers(req->wb_page); } @@ -415,7 +434,11 @@ nfs_redirty_request(struct nfs_page *req) static inline int nfs_dirty_request(struct nfs_page *req) { - return test_bit(PG_FLUSHING, &req->wb_flags) == 0; + struct page *page = req->wb_page; + + if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags)) + return 0; + return !PageWriteback(req->wb_page); } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -431,10 +454,48 @@ nfs_mark_request_commit(struct nfs_page *req) spin_lock(&nfsi->req_lock); nfs_list_add_request(req, &nfsi->commit); nfsi->ncommit++; + set_bit(PG_NEED_COMMIT, &(req)->wb_flags); spin_unlock(&nfsi->req_lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } + +static inline +int nfs_write_need_commit(struct nfs_write_data *data) +{ + return data->verf.committed != NFS_FILE_SYNC; +} + +static inline +int nfs_reschedule_unstable_write(struct nfs_page *req) +{ + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_mark_request_commit(req); + return 1; + } + if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { + nfs_redirty_request(req); + return 1; + } + return 0; +} +#else +static inline void +nfs_mark_request_commit(struct nfs_page *req) +{ +} + +static inline +int nfs_write_need_commit(struct nfs_write_data *data) +{ + return 0; +} + +static inline +int nfs_reschedule_unstable_write(struct nfs_page *req) +{ + return 0; +} #endif /* @@ -481,6 +542,7 @@ static void nfs_cancel_dirty_list(struct list_head *head) while(!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); + nfs_end_page_writeback(req->wb_page); nfs_inode_remove_request(req); nfs_clear_page_writeback(req); } @@ -494,6 +556,7 @@ static void nfs_cancel_commit_list(struct list_head *head) req = nfs_list_entry(head->next); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); nfs_list_remove_request(req); + clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); nfs_inode_remove_request(req); nfs_unlock_request(req); } @@ -531,10 +594,10 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, un } #endif -static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) +static int nfs_wait_on_write_congestion(struct address_space *mapping) { + struct inode *inode = mapping->host; struct backing_dev_info *bdi = mapping->backing_dev_info; - DEFINE_WAIT(wait); int ret = 0; might_sleep(); @@ -542,31 +605,23 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) if (!bdi_write_congested(bdi)) return 0; - nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); + nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT); - if (intr) { - struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); + do { + struct rpc_clnt *clnt = NFS_CLIENT(inode); sigset_t oldset; rpc_clnt_sigmask(clnt, &oldset); - prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE); - if (bdi_write_congested(bdi)) { - if (signalled()) - ret = -ERESTARTSYS; - else - schedule(); - } + ret = congestion_wait_interruptible(WRITE, HZ/10); rpc_clnt_sigunmask(clnt, &oldset); - } else { - prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE); - if (bdi_write_congested(bdi)) - schedule(); - } - finish_wait(&nfs_write_congestion, &wait); + if (ret == -ERESTARTSYS) + break; + ret = 0; + } while (bdi_write_congested(bdi)); + return ret; } - /* * Try to update any existing write request, or create one if there is none. * In order to match, the request's credentials must match those of @@ -577,14 +632,15 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, struct page *page, unsigned int offset, unsigned int bytes) { - struct inode *inode = page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req, *new = NULL; unsigned long rqend, end; end = offset + bytes; - if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR)) + if (nfs_wait_on_write_congestion(mapping)) return ERR_PTR(-ERESTARTSYS); for (;;) { /* Loop over all inode entries and see if we find @@ -727,26 +783,12 @@ int nfs_updatepage(struct file *file, struct page *page, static void nfs_writepage_release(struct nfs_page *req) { - end_page_writeback(req->wb_page); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (!PageError(req->wb_page)) { - if (NFS_NEED_RESCHED(req)) { - nfs_redirty_request(req); - goto out; - } else if (NFS_NEED_COMMIT(req)) { - nfs_mark_request_commit(req); - goto out; - } - } - nfs_inode_remove_request(req); - -out: - nfs_clear_commit(req); - nfs_clear_reschedule(req); -#else - nfs_inode_remove_request(req); -#endif + if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } @@ -879,6 +921,7 @@ out_bad: nfs_writedata_release(data); } nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); return -ENOMEM; } @@ -924,6 +967,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } return -ENOMEM; @@ -959,6 +1003,7 @@ out_err: req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); nfs_clear_page_writeback(req); } return error; @@ -986,22 +1031,28 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) nfs_set_pageerror(page); req->wb_context->error = task->tk_status; dprintk(", error = %d\n", task->tk_status); - } else { -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (data->verf.committed < NFS_FILE_SYNC) { - if (!NFS_NEED_COMMIT(req)) { - nfs_defer_commit(req); - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - dprintk(" defer commit\n"); - } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { - nfs_defer_reschedule(req); - dprintk(" server reboot detected\n"); - } - } else -#endif - dprintk(" OK\n"); + goto out; } + if (nfs_write_need_commit(data)) { + spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; + + spin_lock(req_lock); + if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { + /* Do nothing we need to resend the writes */ + } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + dprintk(" defer commit\n"); + } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { + set_bit(PG_NEED_RESCHED, &req->wb_flags); + clear_bit(PG_NEED_COMMIT, &req->wb_flags); + dprintk(" server reboot detected\n"); + } + spin_unlock(req_lock); + } else + dprintk(" OK\n"); + +out: if (atomic_dec_and_test(&req->wb_complete)) nfs_writepage_release(req); } @@ -1042,25 +1093,21 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); req->wb_context->error = task->tk_status; - end_page_writeback(page); - nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); - goto next; + goto remove_request; } - end_page_writeback(page); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { - nfs_inode_remove_request(req); - dprintk(" OK\n"); + if (nfs_write_need_commit(data)) { + memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); + nfs_mark_request_commit(req); + nfs_end_page_writeback(page); + dprintk(" marked for commit\n"); goto next; } - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req); - dprintk(" marked for commit\n"); -#else + dprintk(" OK\n"); +remove_request: + nfs_end_page_writeback(page); nfs_inode_remove_request(req); -#endif next: nfs_clear_page_writeback(req); } @@ -1248,6 +1295,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); + clear_bit(PG_NEED_COMMIT, &(req)->wb_flags); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dprintk("NFS: commit (%s/%Ld %d@%Ld)", @@ -1483,15 +1531,22 @@ int nfs_wb_page(struct inode *inode, struct page* page) int nfs_set_page_dirty(struct page *page) { + spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock; struct nfs_page *req; + int ret; - req = nfs_page_find_request(page); + spin_lock(req_lock); + req = nfs_page_find_request_locked(page); if (req != NULL) { /* Mark any existing write requests for flushing */ - set_bit(PG_NEED_FLUSH, &req->wb_flags); + ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); + spin_unlock(req_lock); nfs_release_request(req); + return ret; } - return __set_page_dirty_nobuffers(page); + ret = __set_page_dirty_nobuffers(page); + spin_unlock(req_lock); + return ret; } @@ -1514,6 +1569,26 @@ int __init nfs_init_writepagecache(void) if (nfs_commit_mempool == NULL) return -ENOMEM; + /* + * NFS congestion size, scale with available memory. + * + * 64MB: 8192k + * 128MB: 11585k + * 256MB: 16384k + * 512MB: 23170k + * 1GB: 32768k + * 2GB: 46340k + * 4GB: 65536k + * 8GB: 92681k + * 16GB: 131072k + * + * This allows larger machines to have larger/more transfers. + * Limit the default to 256M + */ + nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); + if (nfs_congestion_kb > 256*1024) + nfs_congestion_kb = 256*1024; + return 0; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 6f67798..7e4bb0a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -859,8 +859,8 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, #define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1) #define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) static int -encode_entry(struct readdir_cd *ccd, const char *name, - int namlen, off_t offset, ino_t ino, unsigned int d_type, int plus) +encode_entry(struct readdir_cd *ccd, const char *name, int namlen, + loff_t offset, ino_t ino, unsigned int d_type, int plus) { struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, common); @@ -880,7 +880,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, *cd->offset1 = htonl(offset64 & 0xffffffff); cd->offset1 = NULL; } else { - xdr_encode_hyper(cd->offset, (u64) offset); + xdr_encode_hyper(cd->offset, offset64); } } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 832673b..673a53c 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -228,7 +228,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, struct posix_acl_summary pas; unsigned short deny; int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ? - NFS4_INHERITANCE_FLAGS : 0); + NFS4_INHERITANCE_FLAGS | NFS4_ACE_INHERIT_ONLY_ACE : 0); BUG_ON(pacl->a_count < 3); summarize_posix_acl(pacl, &pas); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9e40679..af36070 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -750,9 +750,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_clid_inuse; if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred) || conf->cl_addr != sin->sin_addr.s_addr) { - printk("NFSD: setclientid: string in use by client" - "(clientid %08x/%08x)\n", - conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id); + dprintk("NFSD: setclientid: string in use by client" + "at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr)); goto out; } } @@ -3261,7 +3260,6 @@ __nfs4_state_shutdown(void) unhash_delegation(dp); } - cancel_delayed_work(&laundromat_work); nfsd4_shutdown_recdir(); nfs4_init = 0; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c2660cb..8d995bc 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -17,7 +17,6 @@ #include <linux/stat.h> #include <linux/dcache.h> #include <linux/mount.h> -#include <asm/pgtable.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/svc.h> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 93628b0..875c114 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -614,6 +614,27 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, ocfs2_rw_unlock(inode, 0); } +/* + * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen + * from ext3. PageChecked() bits have been removed as OCFS2 does not + * do journalled data. + */ +static void ocfs2_invalidatepage(struct page *page, unsigned long offset) +{ + journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; + + journal_invalidatepage(journal, page, offset); +} + +static int ocfs2_releasepage(struct page *page, gfp_t wait) +{ + journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; + + if (!page_has_buffers(page)) + return 0; + return journal_try_to_free_buffers(journal, page, wait); +} + static ssize_t ocfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, @@ -661,5 +682,8 @@ const struct address_space_operations ocfs2_aops = { .commit_write = ocfs2_commit_write, .bmap = ocfs2_bmap, .sync_page = block_sync_page, - .direct_IO = ocfs2_direct_IO + .direct_IO = ocfs2_direct_IO, + .invalidatepage = ocfs2_invalidatepage, + .releasepage = ocfs2_releasepage, + .migratepage = buffer_migrate_page, }; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5a9779b..eba282d 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1234,6 +1234,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, const char *page, size_t count) { + struct task_struct *hb_task; long fd; int sectsize; char *p = (char *)page; @@ -1319,20 +1320,28 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, */ atomic_set(®->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); - reg->hr_task = kthread_run(o2hb_thread, reg, "o2hb-%s", - reg->hr_item.ci_name); - if (IS_ERR(reg->hr_task)) { - ret = PTR_ERR(reg->hr_task); + hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", + reg->hr_item.ci_name); + if (IS_ERR(hb_task)) { + ret = PTR_ERR(hb_task); mlog_errno(ret); - reg->hr_task = NULL; goto out; } + spin_lock(&o2hb_live_lock); + reg->hr_task = hb_task; + spin_unlock(&o2hb_live_lock); + ret = wait_event_interruptible(o2hb_steady_queue, atomic_read(®->hr_steady_iterations) == 0); if (ret) { - kthread_stop(reg->hr_task); + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; reg->hr_task = NULL; + spin_unlock(&o2hb_live_lock); + + if (hb_task) + kthread_stop(hb_task); goto out; } @@ -1354,10 +1363,17 @@ out: static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, char *page) { - if (!reg->hr_task) + pid_t pid = 0; + + spin_lock(&o2hb_live_lock); + if (reg->hr_task) + pid = reg->hr_task->pid; + spin_unlock(&o2hb_live_lock); + + if (!pid) return 0; - return sprintf(page, "%u\n", reg->hr_task->pid); + return sprintf(page, "%u\n", pid); } struct o2hb_region_attribute { @@ -1495,13 +1511,17 @@ out: static void o2hb_heartbeat_group_drop_item(struct config_group *group, struct config_item *item) { + struct task_struct *hb_task; struct o2hb_region *reg = to_o2hb_region(item); /* stop the thread when the user removes the region dir */ - if (reg->hr_task) { - kthread_stop(reg->hr_task); - reg->hr_task = NULL; - } + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; + reg->hr_task = NULL; + spin_unlock(&o2hb_live_lock); + + if (hb_task) + kthread_stop(hb_task); config_item_put(item); } @@ -1682,7 +1702,7 @@ out: } EXPORT_SYMBOL_GPL(o2hb_register_callback); -int o2hb_unregister_callback(struct o2hb_callback_func *hc) +void o2hb_unregister_callback(struct o2hb_callback_func *hc) { BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); @@ -1690,15 +1710,13 @@ int o2hb_unregister_callback(struct o2hb_callback_func *hc) __builtin_return_address(0), hc); if (list_empty(&hc->hc_item)) - return 0; + return; down_write(&o2hb_callback_sem); list_del_init(&hc->hc_item); up_write(&o2hb_callback_sem); - - return 0; } EXPORT_SYMBOL_GPL(o2hb_unregister_callback); diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index cac6223..cc6d40b 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -70,7 +70,7 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc, void *data, int priority); int o2hb_register_callback(struct o2hb_callback_func *hc); -int o2hb_unregister_callback(struct o2hb_callback_func *hc); +void o2hb_unregister_callback(struct o2hb_callback_func *hc); void o2hb_fill_node_map(unsigned long *map, unsigned bytes); void o2hb_init(void); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1718215..69caf3e 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1638,17 +1638,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, void o2net_unregister_hb_callbacks(void) { - int ret; - - ret = o2hb_unregister_callback(&o2net_hb_up); - if (ret < 0) - mlog(ML_ERROR, "Status return %d unregistering heartbeat up " - "callback!\n", ret); - - ret = o2hb_unregister_callback(&o2net_hb_down); - if (ret < 0) - mlog(ML_ERROR, "Status return %d unregistering heartbeat down " - "callback!\n", ret); + o2hb_unregister_callback(&o2net_hb_up); + o2hb_unregister_callback(&o2net_hb_down); } int o2net_register_hb_callbacks(void) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6087c47..c558442 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -138,8 +138,10 @@ static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) { - hlist_del_init(&lockres->hash_node); - dlm_lockres_put(lockres); + if (!hlist_unhashed(&lockres->hash_node)) { + hlist_del_init(&lockres->hash_node); + dlm_lockres_put(lockres); + } } void __dlm_insert_lockres(struct dlm_ctxt *dlm, @@ -655,6 +657,8 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) dlm_kick_thread(dlm, NULL); while (dlm_migrate_all_locks(dlm)) { + /* Give dlm_thread time to purge the lockres' */ + msleep(500); mlog(0, "%s: more migration to do\n", dlm->name); } dlm_mark_domain_leaving(dlm); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 77e4e61..6edffca 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2424,6 +2424,57 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) dlm_lockres_put(res); } +/* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 + * if not. If 0, numlocks is set to the number of locks in the lockres. + */ +static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, + int *numlocks) +{ + int ret; + int i; + int count = 0; + struct list_head *queue, *iter; + struct dlm_lock *lock; + + assert_spin_locked(&res->spinlock); + + ret = -EINVAL; + if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { + mlog(0, "cannot migrate lockres with unknown owner!\n"); + goto leave; + } + + if (res->owner != dlm->node_num) { + mlog(0, "cannot migrate lockres this node doesn't own!\n"); + goto leave; + } + + ret = 0; + queue = &res->granted; + for (i = 0; i < 3; i++) { + list_for_each(iter, queue) { + lock = list_entry(iter, struct dlm_lock, list); + ++count; + if (lock->ml.node == dlm->node_num) { + mlog(0, "found a lock owned by this node still " + "on the %s queue! will not migrate this " + "lockres\n", (i == 0 ? "granted" : + (i == 1 ? "converting" : + "blocked"))); + ret = -ENOTEMPTY; + goto leave; + } + } + queue++; + } + + *numlocks = count; + mlog(0, "migrateable lockres having %d locks\n", *numlocks); + +leave: + return ret; +} /* * DLM_MIGRATE_LOCKRES @@ -2437,14 +2488,12 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle = NULL; struct dlm_master_list_entry *oldmle = NULL; struct dlm_migratable_lockres *mres = NULL; - int ret = -EINVAL; + int ret = 0; const char *name; unsigned int namelen; int mle_added = 0; - struct list_head *queue, *iter; - int i; - struct dlm_lock *lock; - int empty = 1, wake = 0; + int numlocks; + int wake = 0; if (!dlm_grab(dlm)) return -EINVAL; @@ -2458,42 +2507,16 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, * ensure this lockres is a proper candidate for migration */ spin_lock(&res->spinlock); - if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { - mlog(0, "cannot migrate lockres with unknown owner!\n"); - spin_unlock(&res->spinlock); - goto leave; - } - if (res->owner != dlm->node_num) { - mlog(0, "cannot migrate lockres this node doesn't own!\n"); + ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); + if (ret < 0) { spin_unlock(&res->spinlock); goto leave; } - mlog(0, "checking queues...\n"); - queue = &res->granted; - for (i=0; i<3; i++) { - list_for_each(iter, queue) { - lock = list_entry (iter, struct dlm_lock, list); - empty = 0; - if (lock->ml.node == dlm->node_num) { - mlog(0, "found a lock owned by this node " - "still on the %s queue! will not " - "migrate this lockres\n", - i==0 ? "granted" : - (i==1 ? "converting" : "blocked")); - spin_unlock(&res->spinlock); - ret = -ENOTEMPTY; - goto leave; - } - } - queue++; - } - mlog(0, "all locks on this lockres are nonlocal. continuing\n"); spin_unlock(&res->spinlock); /* no work to do */ - if (empty) { + if (numlocks == 0) { mlog(0, "no locks were found on this lockres! done!\n"); - ret = 0; goto leave; } @@ -2729,15 +2752,26 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { int ret; int lock_dropped = 0; + int numlocks; + spin_lock(&res->spinlock); if (res->owner != dlm->node_num) { if (!__dlm_lockres_unused(res)) { mlog(ML_ERROR, "%s:%.*s: this node is not master, " "trying to free this but locks remain\n", dlm->name, res->lockname.len, res->lockname.name); } + spin_unlock(&res->spinlock); + goto leave; + } + + /* No need to migrate a lockres having no locks */ + ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); + if (ret >= 0 && numlocks == 0) { + spin_unlock(&res->spinlock); goto leave; } + spin_unlock(&res->spinlock); /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ spin_unlock(&dlm->spinlock); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 8ffa091..2b264c6 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -256,18 +256,14 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, break; } - mlog(0, "removing lockres %.*s:%p from purgelist\n", - lockres->lockname.len, lockres->lockname.name, lockres); - list_del_init(&lockres->purge); - dlm_lockres_put(lockres); - dlm->purge_count--; + dlm_lockres_get(lockres); /* This may drop and reacquire the dlm spinlock if it * has to do migration. */ - mlog(0, "calling dlm_purge_lockres!\n"); if (dlm_purge_lockres(dlm, lockres)) BUG(); - mlog(0, "DONE calling dlm_purge_lockres!\n"); + + dlm_lockres_put(lockres); /* Avoid adding any scheduling latencies */ cond_resched_lock(&dlm->spinlock); diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 8fc52d6..b25ef63 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -164,8 +164,10 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb) } status = o2hb_register_callback(&osb->osb_hb_up); - if (status < 0) + if (status < 0) { mlog_errno(status); + o2hb_unregister_callback(&osb->osb_hb_down); + } bail: return status; @@ -173,18 +175,11 @@ bail: void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) { - int status; - if (ocfs2_mount_local(osb)) return; - status = o2hb_unregister_callback(&osb->osb_hb_down); - if (status < 0) - mlog_errno(status); - - status = o2hb_unregister_callback(&osb->osb_hb_up); - if (status < 0) - mlog_errno(status); + o2hb_unregister_callback(&osb->osb_hb_down); + o2hb_unregister_callback(&osb->osb_hb_up); } void ocfs2_stop_heartbeat(struct ocfs2_super *osb) diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index 74552c6..6e8bb66 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -235,5 +235,4 @@ config EFI_PARTITION select CRC32 help Say Y here if you would like to use hard disks under Linux which - were partitioned using EFI GPT. Presently only useful on the - IA-64 platform. + were partitioned using EFI GPT. diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e46d237..8a7d003 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -541,7 +541,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; if (IS_ERR(state)) /* I/O error reading the partition table */ - return PTR_ERR(state); + return -EIO; for (p = 1; p < state->limit; p++) { sector_t size = state->parts[p].size; sector_t from = state->parts[p].from; diff --git a/fs/proc/Makefile b/fs/proc/Makefile index a6b3a8f..bce38e3 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,8 +8,9 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o proc_misc.o proc_sysctl.o + proc_tty.o proc_misc.o +proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o diff --git a/fs/proc/base.c b/fs/proc/base.c index 01f7769..989af5e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1558,29 +1558,20 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file->f_path.dentry->d_inode; - unsigned long page; + char *p = NULL; ssize_t length; struct task_struct *task = get_proc_task(inode); - length = -ESRCH; if (!task) - goto out_no_task; - - if (count > PAGE_SIZE) - count = PAGE_SIZE; - length = -ENOMEM; - if (!(page = __get_free_page(GFP_KERNEL))) - goto out; + return -ESRCH; length = security_getprocattr(task, (char*)file->f_path.dentry->d_name.name, - (void*)page, count); - if (length >= 0) - length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); - free_page(page); -out: + &p); put_task_struct(task); -out_no_task: + if (length > 0) + length = simple_read_from_buffer(buf, count, ppos, p, length); + kfree(p); return length; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c932aa6..f771889 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,7 +11,11 @@ #include <linux/proc_fs.h> +#ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); +#else +static inline void proc_sys_init(void) { } +#endif struct vmalloc_info { unsigned long used; diff --git a/fs/proc/root.c b/fs/proc/root.c index 5834a744..41f1703 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -79,9 +79,7 @@ void __init proc_root_init(void) proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", NULL); -#ifdef CONFIG_SYSCTL proc_sys_init(); -#endif } static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index b9b423b..9475557 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c @@ -23,7 +23,7 @@ static void sd_decrement_key(struct cpu_key *key) { key->on_disk_key.k_objectid--; set_cpu_key_k_type(key, TYPE_ANY); - set_cpu_key_k_offset(key, (loff_t) (-1)); + set_cpu_key_k_offset(key, (loff_t)(~0ULL >> 1)); } static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize) diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index f01389f..c8178b7 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -54,82 +54,48 @@ static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char *prefix); -static struct dentry *create_xa_root(struct super_block *sb) +/* Returns the dentry referring to the root of the extended attribute + * directory tree. If it has already been retrieved, it is used. If it + * hasn't been created and the flags indicate creation is allowed, we + * attempt to create it. On error, we return a pointer-encoded error. + */ +static struct dentry *get_xa_root(struct super_block *sb, int flags) { struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root); struct dentry *xaroot; /* This needs to be created at mount-time */ if (!privroot) - return ERR_PTR(-EOPNOTSUPP); + return ERR_PTR(-ENODATA); - xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); - if (IS_ERR(xaroot)) { + mutex_lock(&privroot->d_inode->i_mutex); + if (REISERFS_SB(sb)->xattr_root) { + xaroot = dget(REISERFS_SB(sb)->xattr_root); goto out; - } else if (!xaroot->d_inode) { - int err; - mutex_lock(&privroot->d_inode->i_mutex); - err = - privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot, - 0700); - mutex_unlock(&privroot->d_inode->i_mutex); - - if (err) { - dput(xaroot); - dput(privroot); - return ERR_PTR(err); - } - REISERFS_SB(sb)->xattr_root = dget(xaroot); } - out: - dput(privroot); - return xaroot; -} - -/* This will return a dentry, or error, refering to the xa root directory. - * If the xa root doesn't exist yet, the dentry will be returned without - * an associated inode. This dentry can be used with ->mkdir to create - * the xa directory. */ -static struct dentry *__get_xa_root(struct super_block *s) -{ - struct dentry *privroot = dget(REISERFS_SB(s)->priv_root); - struct dentry *xaroot = NULL; - - if (IS_ERR(privroot) || !privroot) - return privroot; - xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME)); if (IS_ERR(xaroot)) { goto out; } else if (!xaroot->d_inode) { - dput(xaroot); - xaroot = NULL; - goto out; + int err = -ENODATA; + if (flags == 0 || flags & XATTR_CREATE) + err = privroot->d_inode->i_op->mkdir(privroot->d_inode, + xaroot, 0700); + if (err) { + dput(xaroot); + xaroot = ERR_PTR(err); + goto out; + } } - - REISERFS_SB(s)->xattr_root = dget(xaroot); + REISERFS_SB(sb)->xattr_root = dget(xaroot); out: + mutex_unlock(&privroot->d_inode->i_mutex); dput(privroot); return xaroot; } -/* Returns the dentry (or NULL) referring to the root of the extended - * attribute directory tree. If it has already been retrieved, it is used. - * Otherwise, we attempt to retrieve it from disk. It may also return - * a pointer-encoded error. - */ -static inline struct dentry *get_xa_root(struct super_block *s) -{ - struct dentry *dentry = dget(REISERFS_SB(s)->xattr_root); - - if (!dentry) - dentry = __get_xa_root(s); - - return dentry; -} - /* Opens the directory corresponding to the inode's extended attribute store. * If flags allow, the tree to the directory may be created. If creation is * prohibited, -ENODATA is returned. */ @@ -138,21 +104,11 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) struct dentry *xaroot, *xadir; char namebuf[17]; - xaroot = get_xa_root(inode->i_sb); - if (IS_ERR(xaroot)) { + xaroot = get_xa_root(inode->i_sb, flags); + if (IS_ERR(xaroot)) return xaroot; - } else if (!xaroot) { - if (flags == 0 || flags & XATTR_CREATE) { - xaroot = create_xa_root(inode->i_sb); - if (IS_ERR(xaroot)) - return xaroot; - } - if (!xaroot) - return ERR_PTR(-ENODATA); - } /* ok, we have xaroot open */ - snprintf(namebuf, sizeof(namebuf), "%X.%X", le32_to_cpu(INODE_PKEY(inode)->k_objectid), inode->i_generation); @@ -821,7 +777,7 @@ int reiserfs_delete_xattrs(struct inode *inode) /* Leftovers besides . and .. -- that's not good. */ if (dir->d_inode->i_nlink <= 2) { - root = get_xa_root(inode->i_sb); + root = get_xa_root(inode->i_sb, XATTR_REPLACE); reiserfs_write_lock_xattrs(inode->i_sb); err = vfs_rmdir(root->d_inode, dir); reiserfs_write_unlock_xattrs(inode->i_sb); diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index 42261db..723f7c6 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -181,6 +181,7 @@ static int smb_setup_request(struct smb_request *req) req->rq_errno = 0; req->rq_fragment = 0; kfree(req->rq_trans2buffer); + req->rq_trans2buffer = NULL; return 0; } diff --git a/fs/splice.c b/fs/splice.c index 2fca6eb..5428b0f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -576,76 +576,21 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; - /* - * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full - * page. - */ - if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { - /* - * If steal succeeds, buf->page is now pruned from the - * pagecache and we can reuse it. The page will also be - * locked on successful return. - */ - if (buf->ops->steal(pipe, buf)) - goto find_page; - - page = buf->page; - if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) { - unlock_page(page); - goto find_page; - } - - page_cache_get(page); - - if (!(buf->flags & PIPE_BUF_FLAG_LRU)) - lru_cache_add(page); - } else { find_page: - page = find_lock_page(mapping, index); - if (!page) { - ret = -ENOMEM; - page = page_cache_alloc_cold(mapping); - if (unlikely(!page)) - goto out_ret; - - /* - * This will also lock the page - */ - ret = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL); - if (unlikely(ret)) - goto out; - } + page = find_lock_page(mapping, index); + if (!page) { + ret = -ENOMEM; + page = page_cache_alloc_cold(mapping); + if (unlikely(!page)) + goto out_ret; /* - * We get here with the page locked. If the page is also - * uptodate, we don't need to do more. If it isn't, we - * may need to bring it in if we are not going to overwrite - * the full page. + * This will also lock the page */ - if (!PageUptodate(page)) { - if (this_len < PAGE_CACHE_SIZE) { - ret = mapping->a_ops->readpage(file, page); - if (unlikely(ret)) - goto out; - - lock_page(page); - - if (!PageUptodate(page)) { - /* - * Page got invalidated, repeat. - */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto find_page; - } - ret = -EIO; - goto out; - } - } else - SetPageUptodate(page); - } + ret = add_to_page_cache_lru(page, mapping, index, + GFP_KERNEL); + if (unlikely(ret)) + goto out; } ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); @@ -682,18 +627,25 @@ find_page: } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); - if (!ret) { + if (ret) { + if (ret == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto find_page; + } + if (ret < 0) + goto out; /* - * Return the number of bytes written and mark page as - * accessed, we are now done! + * Partial write has happened, so 'ret' already initialized by + * number of bytes written, Where is nothing we have to do here. */ + } else ret = this_len; - mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); - } else if (ret == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto find_page; - } + /* + * Return the number of bytes written and mark page as + * accessed, we are now done! + */ + mark_page_accessed(page); + balance_dirty_pages_ratelimited(mapping); out: page_cache_release(page); unlock_page(page); @@ -706,9 +658,9 @@ out_ret: * key here is the 'actor' worker passed in that actually moves the data * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. */ -static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, - struct file *out, loff_t *ppos, size_t len, - unsigned int flags, splice_actor *actor) +ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, splice_actor *actor) { int ret, do_wakeup, err; struct splice_desc sd; @@ -802,6 +754,7 @@ static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, return ret; } +EXPORT_SYMBOL(__splice_from_pipe); ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags, diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 8d4d839..fc46333 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -168,12 +168,12 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) ssize_t retval = 0; down(&buffer->sem); - if (buffer->orphaned) { - retval = -ENODEV; - goto out; - } if (buffer->needs_read_fill) { - if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) + if (buffer->orphaned) + retval = -ENODEV; + else + retval = fill_read_buffer(file->f_path.dentry,buffer); + if (retval) goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", @@ -629,6 +629,60 @@ void sysfs_remove_file_from_group(struct kobject *kobj, } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); +struct sysfs_schedule_callback_struct { + struct kobject *kobj; + void (*func)(void *); + void *data; + struct work_struct work; +}; + +static void sysfs_schedule_callback_work(struct work_struct *work) +{ + struct sysfs_schedule_callback_struct *ss = container_of(work, + struct sysfs_schedule_callback_struct, work); + + (ss->func)(ss->data); + kobject_put(ss->kobj); + kfree(ss); +} + +/** + * sysfs_schedule_callback - helper to schedule a callback for a kobject + * @kobj: object we're acting for. + * @func: callback function to invoke later. + * @data: argument to pass to @func. + * + * sysfs attribute methods must not unregister themselves or their parent + * kobject (which would amount to the same thing). Attempts to do so will + * deadlock, since unregistration is mutually exclusive with driver + * callbacks. + * + * Instead methods can call this routine, which will attempt to allocate + * and schedule a workqueue request to call back @func with @data as its + * argument in the workqueue's process context. @kobj will be pinned + * until @func returns. + * + * Returns 0 if the request was submitted, -ENOMEM if storage could not + * be allocated. + */ +int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), + void *data) +{ + struct sysfs_schedule_callback_struct *ss; + + ss = kmalloc(sizeof(*ss), GFP_KERNEL); + if (!ss) + return -ENOMEM; + kobject_get(kobj); + ss->kobj = kobj; + ss->func = func; + ss->data = data; + INIT_WORK(&ss->work, sysfs_schedule_callback_work); + schedule_work(&ss->work); + return 0; +} +EXPORT_SYMBOL_GPL(sysfs_schedule_callback); + EXPORT_SYMBOL_GPL(sysfs_create_file); EXPORT_SYMBOL_GPL(sysfs_remove_file); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index ccb7d72..4de5c6b 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -222,13 +222,17 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) static inline void orphan_all_buffers(struct inode *node) { - struct sysfs_buffer_collection *set = node->i_private; + struct sysfs_buffer_collection *set; struct sysfs_buffer *buf; mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD); - if (node->i_private) { - list_for_each_entry(buf, &set->associates, associates) + set = node->i_private; + if (set) { + list_for_each_entry(buf, &set->associates, associates) { + down(&buf->sem); buf->orphaned = 1; + up(&buf->sem); + } } mutex_unlock(&node->i_mutex); } diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index bcc4408..841ac25 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -244,62 +244,87 @@ failed: * We can come here from ufs_writepage or ufs_prepare_write, * locked_page is argument of these functions, so we already lock it. */ -static void ufs_change_blocknr(struct inode *inode, unsigned int beg, - unsigned int count, unsigned int oldb, - unsigned int newb, struct page *locked_page) +static void ufs_change_blocknr(struct inode *inode, sector_t beg, + unsigned int count, sector_t oldb, + sector_t newb, struct page *locked_page) { - const unsigned mask = (1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1; + const unsigned blks_per_page = + 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits); + const unsigned mask = blks_per_page - 1; struct address_space * const mapping = inode->i_mapping; - pgoff_t index, cur_index; - unsigned end, pos, j; + pgoff_t index, cur_index, last_index; + unsigned pos, j, lblock; + sector_t end, i; struct page *page; struct buffer_head *head, *bh; - UFSD("ENTER, ino %lu, count %u, oldb %u, newb %u\n", - inode->i_ino, count, oldb, newb); + UFSD("ENTER, ino %lu, count %u, oldb %llu, newb %llu\n", + inode->i_ino, count, + (unsigned long long)oldb, (unsigned long long)newb); BUG_ON(!locked_page); BUG_ON(!PageLocked(locked_page)); cur_index = locked_page->index; - - for (end = count + beg; beg < end; beg = (beg | mask) + 1) { - index = beg >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + end = count + beg; + last_index = end >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + for (i = beg; i < end; i = (i | mask) + 1) { + index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits); if (likely(cur_index != index)) { page = ufs_get_locked_page(mapping, index); - if (!page || IS_ERR(page)) /* it was truncated or EIO */ + if (!page)/* it was truncated */ + continue; + if (IS_ERR(page)) {/* or EIO */ + ufs_error(inode->i_sb, __FUNCTION__, + "read of page %llu failed\n", + (unsigned long long)index); continue; + } } else page = locked_page; head = page_buffers(page); bh = head; - pos = beg & mask; + pos = i & mask; for (j = 0; j < pos; ++j) bh = bh->b_this_page; - j = 0; + + + if (unlikely(index == last_index)) + lblock = end & mask; + else + lblock = blks_per_page; + do { - if (buffer_mapped(bh)) { - pos = bh->b_blocknr - oldb; - if (pos < count) { - UFSD(" change from %llu to %llu\n", - (unsigned long long)pos + oldb, - (unsigned long long)pos + newb); - bh->b_blocknr = newb + pos; - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); - mark_buffer_dirty(bh); - ++j; + if (j >= lblock) + break; + pos = (i - beg) + j; + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, oldb + pos); + if (!buffer_uptodate(bh)) { + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + ufs_error(inode->i_sb, __FUNCTION__, + "read of block failed\n"); + break; } } + UFSD(" change from %llu to %llu, pos %u\n", + (unsigned long long)pos + oldb, + (unsigned long long)pos + newb, pos); + + bh->b_blocknr = newb + pos; + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + mark_buffer_dirty(bh); + ++j; bh = bh->b_this_page; } while (bh != head); - if (j) - set_page_dirty(page); - if (likely(cur_index != index)) ufs_put_locked_page(page); } @@ -457,8 +482,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, if (result) { ufs_clear_frags(inode, result + oldcount, newcount - oldcount, locked_page != NULL); - ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp, - result, locked_page); + ufs_change_blocknr(inode, fragment - oldcount, oldcount, + uspi->s_sbbase + tmp, + uspi->s_sbbase + result, locked_page); ufs_cpu_to_data_ptr(sb, p, result); *err = 0; UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index b868878..c28a8b6 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -343,9 +343,8 @@ cg_found: lock_buffer(bh); ufs2_inode = (struct ufs2_inode *)bh->b_data; ufs2_inode += ufs_inotofsbo(inode->i_ino); - ufs2_inode->ui_birthtime.tv_sec = - cpu_to_fs32(sb, CURRENT_TIME_SEC.tv_sec); - ufs2_inode->ui_birthtime.tv_usec = 0; + ufs2_inode->ui_birthtime = cpu_to_fs64(sb, CURRENT_TIME.tv_sec); + ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, CURRENT_TIME.tv_nsec); mark_buffer_dirty(bh); unlock_buffer(bh); if (sb->s_flags & MS_SYNCHRONOUS) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index fb34ad0..f18b791 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -212,7 +212,7 @@ repeat: brelse (result); goto repeat; } else { - *phys = tmp + blockoff; + *phys = uspi->s_sbbase + tmp + blockoff; return NULL; } } @@ -282,9 +282,9 @@ repeat: } if (!phys) { - result = sb_getblk(sb, tmp + blockoff); + result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); } else { - *phys = tmp + blockoff; + *phys = uspi->s_sbbase + tmp + blockoff; result = NULL; *err = 0; *new = 1; @@ -368,7 +368,7 @@ repeat: brelse (result); goto repeat; } else { - *phys = tmp + blockoff; + *phys = uspi->s_sbbase + tmp + blockoff; goto out; } } @@ -389,9 +389,9 @@ repeat: if (!phys) { - result = sb_getblk(sb, tmp + blockoff); + result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff); } else { - *phys = tmp + blockoff; + *phys = uspi->s_sbbase + tmp + blockoff; *new = 1; } @@ -601,7 +601,7 @@ static void ufs_set_inode_ops(struct inode *inode) ufs_get_inode_dev(inode->i_sb, UFS_I(inode))); } -static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) +static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; @@ -613,8 +613,10 @@ static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) */ inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode); inode->i_nlink = fs16_to_cpu(sb, ufs_inode->ui_nlink); - if (inode->i_nlink == 0) + if (inode->i_nlink == 0) { ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); + return -1; + } /* * Linux now has 32-bit uid and gid, so we can support EFT. @@ -643,9 +645,10 @@ static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i]; } + return 0; } -static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) +static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; @@ -658,8 +661,10 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) */ inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode); inode->i_nlink = fs16_to_cpu(sb, ufs2_inode->ui_nlink); - if (inode->i_nlink == 0) + if (inode->i_nlink == 0) { ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); + return -1; + } /* * Linux now has 32-bit uid and gid, so we can support EFT. @@ -668,12 +673,12 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) inode->i_gid = fs32_to_cpu(sb, ufs2_inode->ui_gid); inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size); - inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_atime.tv_sec); - inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_ctime.tv_sec); - inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_mtime.tv_sec); - inode->i_mtime.tv_nsec = 0; - inode->i_atime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; + inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime); + inode->i_ctime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_ctime); + inode->i_mtime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_mtime); + inode->i_atime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_atimensec); + inode->i_ctime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_ctimensec); + inode->i_mtime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_mtimensec); inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks); inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen); ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags); @@ -690,6 +695,7 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++) ufsi->i_u1.i_symlink[i] = ufs2_inode->ui_u2.ui_symlink[i]; } + return 0; } void ufs_read_inode(struct inode * inode) @@ -698,6 +704,7 @@ void ufs_read_inode(struct inode * inode) struct super_block * sb; struct ufs_sb_private_info * uspi; struct buffer_head * bh; + int err; UFSD("ENTER, ino %lu\n", inode->i_ino); @@ -720,14 +727,17 @@ void ufs_read_inode(struct inode * inode) if ((UFS_SB(sb)->s_flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { struct ufs2_inode *ufs2_inode = (struct ufs2_inode *)bh->b_data; - ufs2_read_inode(inode, - ufs2_inode + ufs_inotofsbo(inode->i_ino)); + err = ufs2_read_inode(inode, + ufs2_inode + ufs_inotofsbo(inode->i_ino)); } else { struct ufs_inode *ufs_inode = (struct ufs_inode *)bh->b_data; - ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino)); + err = ufs1_read_inode(inode, + ufs_inode + ufs_inotofsbo(inode->i_ino)); } + if (err) + goto bad_inode; inode->i_version++; ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -803,12 +813,12 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode) ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid); ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); - ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec); - ufs_inode->ui_atime.tv_usec = 0; - ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec); - ufs_inode->ui_ctime.tv_usec = 0; - ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec); - ufs_inode->ui_mtime.tv_usec = 0; + ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec); + ufs_inode->ui_atimensec = cpu_to_fs32(sb, inode->i_atime.tv_nsec); + ufs_inode->ui_ctime = cpu_to_fs64(sb, inode->i_ctime.tv_sec); + ufs_inode->ui_ctimensec = cpu_to_fs32(sb, inode->i_ctime.tv_nsec); + ufs_inode->ui_mtime = cpu_to_fs64(sb, inode->i_mtime.tv_sec); + ufs_inode->ui_mtimensec = cpu_to_fs32(sb, inode->i_mtime.tv_nsec); ufs_inode->ui_blocks = cpu_to_fs64(sb, inode->i_blocks); ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags); @@ -888,6 +898,8 @@ void ufs_delete_inode (struct inode * inode) loff_t old_i_size; truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) + goto no_delete; /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ lock_kernel(); mark_inode_dirty(inode); @@ -898,4 +910,7 @@ void ufs_delete_inode (struct inode * inode) ufs_warning(inode->i_sb, __FUNCTION__, "ufs_truncate failed\n"); ufs_free_inode (inode); unlock_kernel(); + return; +no_delete: + clear_inode(inode); /* We must guarantee clearing of inode... */ } diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 749581f..79c54c85 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -74,7 +74,7 @@ static int ufs_trunc_direct(struct inode *inode) unsigned i, tmp; int retry; - UFSD("ENTER\n"); + UFSD("ENTER: ino %lu\n", inode->i_ino); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -96,8 +96,8 @@ static int ufs_trunc_direct(struct inode *inode) block2 = ufs_fragstoblks (frag3); } - UFSD("frag1 %llu, frag2 %llu, block1 %llu, block2 %llu, frag3 %llu," - " frag4 %llu\n", + UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu," + " frag3 %llu, frag4 %llu\n", inode->i_ino, (unsigned long long)frag1, (unsigned long long)frag2, (unsigned long long)block1, (unsigned long long)block2, (unsigned long long)frag3, (unsigned long long)frag4); @@ -163,7 +163,7 @@ next1: mark_inode_dirty(inode); next3: - UFSD("EXIT\n"); + UFSD("EXIT: ino %lu\n", inode->i_ino); return retry; } @@ -248,7 +248,7 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) } ubh_brelse (ind_ubh); - UFSD("EXIT\n"); + UFSD("EXIT: ino %lu\n", inode->i_ino); return retry; } @@ -262,7 +262,7 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) void *dind; int retry = 0; - UFSD("ENTER\n"); + UFSD("ENTER: ino %lu\n", inode->i_ino); sb = inode->i_sb; uspi = UFS_SB(sb)->s_uspi; @@ -312,7 +312,7 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) } ubh_brelse (dind_bh); - UFSD("EXIT\n"); + UFSD("EXIT: ino %lu\n", inode->i_ino); return retry; } @@ -327,7 +327,7 @@ static int ufs_trunc_tindirect(struct inode *inode) void *tind, *p; int retry; - UFSD("ENTER\n"); + UFSD("ENTER: ino %lu\n", inode->i_ino); retry = 0; @@ -348,7 +348,7 @@ static int ufs_trunc_tindirect(struct inode *inode) } for (i = tindirect_block ; i < uspi->s_apb ; i++) { - tind = ubh_get_addr32 (tind_bh, i); + tind = ubh_get_data_ptr(uspi, tind_bh, i); retry |= ufs_trunc_dindirect(inode, UFS_NDADDR + uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind); ubh_mark_buffer_dirty(tind_bh); @@ -372,19 +372,21 @@ static int ufs_trunc_tindirect(struct inode *inode) } ubh_brelse (tind_bh); - UFSD("EXIT\n"); + UFSD("EXIT: ino %lu\n", inode->i_ino); return retry; } static int ufs_alloc_lastblock(struct inode *inode) { int err = 0; + struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; - struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; unsigned i, end; sector_t lastfrag; struct page *lastpage; struct buffer_head *bh; + u64 phys64; lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -424,6 +426,20 @@ static int ufs_alloc_lastblock(struct inode *inode) set_page_dirty(lastpage); } + if (lastfrag >= UFS_IND_FRAGMENT) { + end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1; + phys64 = bh->b_blocknr + 1; + for (i = 0; i < end; ++i) { + bh = sb_getblk(sb, i + phys64); + lock_buffer(bh); + memset(bh->b_data, 0, sb->s_blocksize); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + sync_dirty_buffer(bh); + brelse(bh); + } + } out_unlock: ufs_put_locked_page(lastpage); out: diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e2bea6a..69e9e80 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1829,11 +1829,11 @@ xfs_buf_init(void) if (!xfs_buf_zone) goto out_free_trace_buf; - xfslogd_workqueue = create_freezeable_workqueue("xfslogd"); + xfslogd_workqueue = create_workqueue("xfslogd"); if (!xfslogd_workqueue) goto out_free_buf_zone; - xfsdatad_workqueue = create_freezeable_workqueue("xfsdatad"); + xfsdatad_workqueue = create_workqueue("xfsdatad"); if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; |