diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-11 16:08:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-11 16:08:54 -0700 |
commit | c2d95729e3094ecdd8c54e856bbe971adbbd7f48 (patch) | |
tree | 76cc5b551227d3d55d68a93105c1fe8080dfb812 /fs | |
parent | bbda1baeeb2f4aff3addac3d086a1e56c3f2503e (diff) | |
parent | b34081f1cd59585451efaa69e1dff1b9507e6c89 (diff) | |
download | op-kernel-dev-c2d95729e3094ecdd8c54e856bbe971adbbd7f48.zip op-kernel-dev-c2d95729e3094ecdd8c54e856bbe971adbbd7f48.tar.gz |
Merge branch 'akpm' (patches from Andrew Morton)
Merge first patch-bomb from Andrew Morton:
- Some pidns/fork/exec tweaks
- OCFS2 updates
- Most of MM - there remain quite a few memcg parts which depend on
pending core cgroups changes. Which might have been already merged -
I'll check tomorrow...
- Various misc stuff all over the place
- A few block bits which I never got around to sending to Jens -
relatively minor things.
- MAINTAINERS maintenance
- A small number of lib/ updates
- checkpatch updates
- epoll
- firmware/dmi-scan
- Some kprobes work for S390
- drivers/rtc updates
- hfsplus feature work
- vmcore feature work
- rbtree upgrades
- AOE updates
- pktcdvd cleanups
- PPS
- memstick
- w1
- New "inittmpfs" feature, which does the obvious
- More IPC work from Davidlohr.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (303 commits)
lz4: fix compression/decompression signedness mismatch
ipc: drop ipc_lock_check
ipc, shm: drop shm_lock_check
ipc: drop ipc_lock_by_ptr
ipc, shm: guard against non-existant vma in shmdt(2)
ipc: document general ipc locking scheme
ipc,msg: drop msg_unlock
ipc: rename ids->rw_mutex
ipc,shm: shorten critical region for shmat
ipc,shm: cleanup do_shmat pasta
ipc,shm: shorten critical region for shmctl
ipc,shm: make shmctl_nolock lockless
ipc,shm: introduce shmctl_nolock
ipc: drop ipcctl_pre_down
ipc,shm: shorten critical region in shmctl_down
ipc,shm: introduce lockless functions to obtain the ipc object
initmpfs: use initramfs if rootfstype= or root= specified
initmpfs: make rootfs use tmpfs when CONFIG_TMPFS enabled
initmpfs: move rootfs code from fs/ramfs/ to init/
initmpfs: move bdi setup from init_rootfs to init_ramfs
...
Diffstat (limited to 'fs')
51 files changed, 874 insertions, 333 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c index af3261b..776e393 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -836,7 +836,7 @@ affs_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; int res; res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8fb42916..6025084 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -716,13 +716,14 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size) return 0; bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab); - - bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); - if (!bs->bvec_integrity_pool) + if (!bs->bio_integrity_pool) return -1; - if (!bs->bio_integrity_pool) + bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); + if (!bs->bvec_integrity_pool) { + mempool_destroy(bs->bio_integrity_pool); return -1; + } return 0; } diff --git a/fs/coredump.c b/fs/coredump.c index 72f816d..9bdeca1 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -190,6 +190,11 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) err = cn_printf(cn, "%d", task_tgid_vnr(current)); break; + /* global pid */ + case 'P': + err = cn_printf(cn, "%d", + task_tgid_nr(current)); + break; /* uid */ case 'u': err = cn_printf(cn, "%d", cred->uid); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 293f867..473e09d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -740,6 +740,7 @@ static void ep_free(struct eventpoll *ep) epi = rb_entry(rbp, struct epitem, rbn); ep_unregister_pollwait(ep, epi); + cond_resched(); } /* @@ -754,6 +755,7 @@ static void ep_free(struct eventpoll *ep) while ((rbp = rb_first(&ep->rbr)) != NULL) { epi = rb_entry(rbp, struct epitem, rbn); ep_remove(ep, epi); + cond_resched(); } mutex_unlock(&ep->mtx); @@ -74,6 +74,8 @@ static DEFINE_RWLOCK(binfmt_lock); void __register_binfmt(struct linux_binfmt * fmt, int insert) { BUG_ON(!fmt); + if (WARN_ON(!fmt->load_binary)) + return; write_lock(&binfmt_lock); insert ? list_add(&fmt->lh, &formats) : list_add_tail(&fmt->lh, &formats); @@ -266,7 +268,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; - vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; + vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); @@ -1365,18 +1367,18 @@ out: } EXPORT_SYMBOL(remove_arg_zero); +#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) /* * cycle the list of binary formats handler, until one recognizes the image */ int search_binary_handler(struct linux_binprm *bprm) { - unsigned int depth = bprm->recursion_depth; - int try,retval; + bool need_retry = IS_ENABLED(CONFIG_MODULES); struct linux_binfmt *fmt; - pid_t old_pid, old_vpid; + int retval; /* This allows 4 levels of binfmt rewrites before failing hard. */ - if (depth > 5) + if (bprm->recursion_depth > 5) return -ELOOP; retval = security_bprm_check(bprm); @@ -1387,71 +1389,67 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval) return retval; + retval = -ENOENT; + retry: + read_lock(&binfmt_lock); + list_for_each_entry(fmt, &formats, lh) { + if (!try_module_get(fmt->module)) + continue; + read_unlock(&binfmt_lock); + bprm->recursion_depth++; + retval = fmt->load_binary(bprm); + bprm->recursion_depth--; + if (retval >= 0 || retval != -ENOEXEC || + bprm->mm == NULL || bprm->file == NULL) { + put_binfmt(fmt); + return retval; + } + read_lock(&binfmt_lock); + put_binfmt(fmt); + } + read_unlock(&binfmt_lock); + + if (need_retry && retval == -ENOEXEC) { + if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && + printable(bprm->buf[2]) && printable(bprm->buf[3])) + return retval; + if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0) + return retval; + need_retry = false; + goto retry; + } + + return retval; +} +EXPORT_SYMBOL(search_binary_handler); + +static int exec_binprm(struct linux_binprm *bprm) +{ + pid_t old_pid, old_vpid; + int ret; + /* Need to fetch pid before load_binary changes it */ old_pid = current->pid; rcu_read_lock(); old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); rcu_read_unlock(); - retval = -ENOENT; - for (try=0; try<2; try++) { - read_lock(&binfmt_lock); - list_for_each_entry(fmt, &formats, lh) { - int (*fn)(struct linux_binprm *) = fmt->load_binary; - if (!fn) - continue; - if (!try_module_get(fmt->module)) - continue; - read_unlock(&binfmt_lock); - bprm->recursion_depth = depth + 1; - retval = fn(bprm); - bprm->recursion_depth = depth; - if (retval >= 0) { - if (depth == 0) { - trace_sched_process_exec(current, old_pid, bprm); - ptrace_event(PTRACE_EVENT_EXEC, old_vpid); - } - put_binfmt(fmt); - allow_write_access(bprm->file); - if (bprm->file) - fput(bprm->file); - bprm->file = NULL; - current->did_exec = 1; - proc_exec_connector(current); - return retval; - } - read_lock(&binfmt_lock); - put_binfmt(fmt); - if (retval != -ENOEXEC || bprm->mm == NULL) - break; - if (!bprm->file) { - read_unlock(&binfmt_lock); - return retval; - } + ret = search_binary_handler(bprm); + if (ret >= 0) { + trace_sched_process_exec(current, old_pid, bprm); + ptrace_event(PTRACE_EVENT_EXEC, old_vpid); + current->did_exec = 1; + proc_exec_connector(current); + + if (bprm->file) { + allow_write_access(bprm->file); + fput(bprm->file); + bprm->file = NULL; /* to catch use-after-free */ } - read_unlock(&binfmt_lock); -#ifdef CONFIG_MODULES - if (retval != -ENOEXEC || bprm->mm == NULL) { - break; - } else { -#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) - if (printable(bprm->buf[0]) && - printable(bprm->buf[1]) && - printable(bprm->buf[2]) && - printable(bprm->buf[3])) - break; /* -ENOEXEC */ - if (try) - break; /* -ENOEXEC */ - request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); - } -#else - break; -#endif } - return retval; -} -EXPORT_SYMBOL(search_binary_handler); + return ret; +} /* * sys_execve() executes a new program. @@ -1541,7 +1539,7 @@ static int do_execve_common(const char *filename, if (retval < 0) goto out; - retval = search_binary_handler(bprm); + retval = exec_binprm(bprm); if (retval < 0) goto out; diff --git a/fs/file_table.c b/fs/file_table.c index 322cd37..abdd15a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -311,8 +311,7 @@ void fput(struct file *file) return; /* * After this task has run exit_task_work(), - * task_work_add() will fail. free_ipc_ns()-> - * shm_destroy() can do this. Fall through to delayed + * task_work_add() will fail. Fall through to delayed * fput to avoid leaking *file. */ } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 68851ff..30f6f27 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -723,7 +723,7 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb, return wrote; } -long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, +static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason) { struct wb_writeback_work work = { @@ -1049,10 +1049,8 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) { struct backing_dev_info *bdi; - if (!nr_pages) { - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - } + if (!nr_pages) + nr_pages = get_nr_dirty_pages(); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { @@ -1173,6 +1171,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) bool wakeup_bdi = false; bdi = inode_to_bdi(inode); + spin_unlock(&inode->i_lock); + spin_lock(&bdi->wb.list_lock); if (bdi_cap_writeback_dirty(bdi)) { WARN(!test_bit(BDI_registered, &bdi->state), "bdi-%s not registered\n", bdi->name); @@ -1187,8 +1187,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) wakeup_bdi = true; } - spin_unlock(&inode->i_lock); - spin_lock(&bdi->wb.list_lock); inode->dirtied_when = jiffies; list_move(&inode->i_wb_list, &bdi->wb.b_dirty); spin_unlock(&bdi->wb.list_lock); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 8702b73..73899c1 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -913,7 +913,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, (1 << FSCACHE_OP_WAITING) | (1 << FSCACHE_OP_UNUSE_COOKIE); - ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); + ret = radix_tree_maybe_preload(gfp & ~__GFP_HIGHMEM); if (ret < 0) goto nomem_free; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e0fe703..8443459 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -930,7 +930,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) fc->bdi.name = "fuse"; fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; /* fuse does it's own writeback accounting */ - fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; + fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; err = bdi_init(&fc->bdi); if (err) diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig index a633718..24bc20f 100644 --- a/fs/hfsplus/Kconfig +++ b/fs/hfsplus/Kconfig @@ -11,3 +11,21 @@ config HFSPLUS_FS MacOS 8. It includes all Mac specific filesystem data such as data forks and creator codes, but it also has several UNIX style features such as file ownership and permissions. + +config HFSPLUS_FS_POSIX_ACL + bool "HFS+ POSIX Access Control Lists" + depends on HFSPLUS_FS + select FS_POSIX_ACL + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + It needs to understand that POSIX ACLs are treated only under + Linux. POSIX ACLs doesn't mean something under Mac OS X. + Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs, + which are part of the NFSv4 standard. + + If you don't know what Access Control Lists are, say N diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile index 09d278b..683fca2 100644 --- a/fs/hfsplus/Makefile +++ b/fs/hfsplus/Makefile @@ -7,3 +7,5 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \ bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \ attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o + +hfsplus-$(CONFIG_HFSPLUS_FS_POSIX_ACL) += posix_acl.o diff --git a/fs/hfsplus/acl.h b/fs/hfsplus/acl.h new file mode 100644 index 0000000..07c0d49 --- /dev/null +++ b/fs/hfsplus/acl.h @@ -0,0 +1,30 @@ +/* + * linux/fs/hfsplus/acl.h + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for Posix Access Control Lists (ACLs) support. + */ + +#include <linux/posix_acl_xattr.h> + +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + +/* posix_acl.c */ +struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type); +extern int hfsplus_posix_acl_chmod(struct inode *); +extern int hfsplus_init_posix_acl(struct inode *, struct inode *); + +#else /* CONFIG_HFSPLUS_FS_POSIX_ACL */ +#define hfsplus_get_posix_acl NULL + +static inline int hfsplus_posix_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */ diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index d8ce4bd..4a4fea0 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -16,6 +16,7 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" #include "xattr.h" +#include "acl.h" static inline void hfsplus_instantiate(struct dentry *dentry, struct inode *inode, u32 cnid) @@ -529,6 +530,9 @@ const struct inode_operations hfsplus_dir_inode_operations = { .getxattr = generic_getxattr, .listxattr = hfsplus_listxattr, .removexattr = hfsplus_removexattr, +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + .get_acl = hfsplus_get_posix_acl, +#endif }; const struct file_operations hfsplus_dir_operations = { diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index ede7931..2b9cd016 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -30,6 +30,7 @@ #define DBG_EXTENT 0x00000020 #define DBG_BITMAP 0x00000040 #define DBG_ATTR_MOD 0x00000080 +#define DBG_ACL_MOD 0x00000100 #if 0 #define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index f833d35..4d2edae 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -19,6 +19,7 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" #include "xattr.h" +#include "acl.h" static int hfsplus_readpage(struct file *file, struct page *page) { @@ -316,6 +317,13 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) setattr_copy(inode, attr); mark_inode_dirty(inode); + + if (attr->ia_valid & ATTR_MODE) { + error = hfsplus_posix_acl_chmod(inode); + if (unlikely(error)) + return error; + } + return 0; } @@ -383,6 +391,9 @@ static const struct inode_operations hfsplus_file_inode_operations = { .getxattr = generic_getxattr, .listxattr = hfsplus_listxattr, .removexattr = hfsplus_removexattr, +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + .get_acl = hfsplus_get_posix_acl, +#endif }; static const struct file_operations hfsplus_file_operations = { diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c new file mode 100644 index 0000000..b609cc1 --- /dev/null +++ b/fs/hfsplus/posix_acl.c @@ -0,0 +1,274 @@ +/* + * linux/fs/hfsplus/posix_acl.c + * + * Vyacheslav Dubeyko <slava@dubeyko.com> + * + * Handler for Posix Access Control Lists (ACLs) support. + */ + +#include "hfsplus_fs.h" +#include "xattr.h" +#include "acl.h" + +struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) +{ + struct posix_acl *acl; + char *xattr_name; + char *value = NULL; + ssize_t size; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + switch (type) { + case ACL_TYPE_ACCESS: + xattr_name = POSIX_ACL_XATTR_ACCESS; + break; + case ACL_TYPE_DEFAULT: + xattr_name = POSIX_ACL_XATTR_DEFAULT; + break; + default: + return ERR_PTR(-EINVAL); + } + + size = __hfsplus_getxattr(inode, xattr_name, NULL, 0); + + if (size > 0) { + value = (char *)hfsplus_alloc_attr_entry(); + if (unlikely(!value)) + return ERR_PTR(-ENOMEM); + size = __hfsplus_getxattr(inode, xattr_name, value, size); + } + + if (size > 0) + acl = posix_acl_from_xattr(&init_user_ns, value, size); + else if (size == -ENODATA) + acl = NULL; + else + acl = ERR_PTR(size); + + hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); + + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + return acl; +} + +static int hfsplus_set_posix_acl(struct inode *inode, + int type, + struct posix_acl *acl) +{ + int err; + char *xattr_name; + size_t size = 0; + char *value = NULL; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + xattr_name = POSIX_ACL_XATTR_ACCESS; + if (acl) { + err = posix_acl_equiv_mode(acl, &inode->i_mode); + if (err < 0) + return err; + } + err = 0; + break; + + case ACL_TYPE_DEFAULT: + xattr_name = POSIX_ACL_XATTR_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + + if (acl) { + size = posix_acl_xattr_size(acl->a_count); + if (unlikely(size > HFSPLUS_MAX_INLINE_DATA_SIZE)) + return -ENOMEM; + value = (char *)hfsplus_alloc_attr_entry(); + if (unlikely(!value)) + return -ENOMEM; + err = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (unlikely(err < 0)) + goto end_set_acl; + } + + err = __hfsplus_setxattr(inode, xattr_name, value, size, 0); + +end_set_acl: + hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); + + if (!err) + set_cached_acl(inode, type, acl); + + return err; +} + +int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) +{ + int err = 0; + struct posix_acl *acl = NULL; + + hfs_dbg(ACL_MOD, + "[%s]: ino %lu, dir->ino %lu\n", + __func__, inode->i_ino, dir->i_ino); + + if (S_ISLNK(inode->i_mode)) + return 0; + + acl = hfsplus_get_posix_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (acl) { + if (S_ISDIR(inode->i_mode)) { + err = hfsplus_set_posix_acl(inode, + ACL_TYPE_DEFAULT, + acl); + if (unlikely(err)) + goto init_acl_cleanup; + } + + err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + if (unlikely(err < 0)) + return err; + + if (err > 0) + err = hfsplus_set_posix_acl(inode, + ACL_TYPE_ACCESS, + acl); + } else + inode->i_mode &= ~current_umask(); + +init_acl_cleanup: + posix_acl_release(acl); + return err; +} + +int hfsplus_posix_acl_chmod(struct inode *inode) +{ + int err; + struct posix_acl *acl; + + hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino); + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + acl = hfsplus_get_posix_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + err = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (unlikely(err)) + return err; + + err = hfsplus_set_posix_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + return err; +} + +static int hfsplus_xattr_get_posix_acl(struct dentry *dentry, + const char *name, + void *buffer, + size_t size, + int type) +{ + int err = 0; + struct posix_acl *acl; + + hfs_dbg(ACL_MOD, + "[%s]: ino %lu, buffer %p, size %zu, type %#x\n", + __func__, dentry->d_inode->i_ino, buffer, size, type); + + if (strcmp(name, "") != 0) + return -EINVAL; + + acl = hfsplus_get_posix_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + posix_acl_release(acl); + + return err; +} + +static int hfsplus_xattr_set_posix_acl(struct dentry *dentry, + const char *name, + const void *value, + size_t size, + int flags, + int type) +{ + int err = 0; + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + + hfs_dbg(ACL_MOD, + "[%s]: ino %lu, value %p, size %zu, flags %#x, type %#x\n", + __func__, inode->i_ino, value, size, flags, type); + + if (strcmp(name, "") != 0) + return -EINVAL; + + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + err = posix_acl_valid(acl); + if (err) + goto end_xattr_set_acl; + } + } + + err = hfsplus_set_posix_acl(inode, type, acl); + +end_xattr_set_acl: + posix_acl_release(acl); + return err; +} + +static size_t hfsplus_xattr_list_posix_acl(struct dentry *dentry, + char *list, + size_t list_size, + const char *name, + size_t name_len, + int type) +{ + /* + * This method is not used. + * It is used hfsplus_listxattr() instead of generic_listxattr(). + */ + return -EOPNOTSUPP; +} + +const struct xattr_handler hfsplus_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .list = hfsplus_xattr_list_posix_acl, + .get = hfsplus_xattr_get_posix_acl, + .set = hfsplus_xattr_set_posix_acl, +}; + +const struct xattr_handler hfsplus_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .list = hfsplus_xattr_list_posix_acl, + .get = hfsplus_xattr_get_posix_acl, + .set = hfsplus_xattr_set_posix_acl, +}; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index f6634615..bd8471f 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -8,11 +8,16 @@ #include "hfsplus_fs.h" #include "xattr.h" +#include "acl.h" const struct xattr_handler *hfsplus_xattr_handlers[] = { &hfsplus_xattr_osx_handler, &hfsplus_xattr_user_handler, &hfsplus_xattr_trusted_handler, +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + &hfsplus_xattr_acl_access_handler, + &hfsplus_xattr_acl_default_handler, +#endif &hfsplus_xattr_security_handler, NULL }; @@ -46,11 +51,58 @@ static inline int is_known_namespace(const char *name) return true; } +static int can_set_system_xattr(struct inode *inode, const char *name, + const void *value, size_t size) +{ +#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL + struct posix_acl *acl; + int err; + + if (!inode_owner_or_capable(inode)) + return -EPERM; + + /* + * POSIX_ACL_XATTR_ACCESS is tied to i_mode + */ + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + err = posix_acl_equiv_mode(acl, &inode->i_mode); + posix_acl_release(acl); + if (err < 0) + return err; + mark_inode_dirty(inode); + } + /* + * We're changing the ACL. Get rid of the cached one + */ + forget_cached_acl(inode, ACL_TYPE_ACCESS); + + return 0; + } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + posix_acl_release(acl); + + /* + * We're changing the default ACL. Get rid of the cached one + */ + forget_cached_acl(inode, ACL_TYPE_DEFAULT); + + return 0; + } +#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */ + return -EOPNOTSUPP; +} + static int can_set_xattr(struct inode *inode, const char *name, const void *value, size_t value_len) { if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) - return -EOPNOTSUPP; /* TODO: implement ACL support */ + return can_set_system_xattr(inode, name, value, value_len); if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) { /* @@ -253,11 +305,10 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len) return len; } -static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry, +static ssize_t hfsplus_getxattr_finder_info(struct inode *inode, void *value, size_t size) { ssize_t res = 0; - struct inode *inode = dentry->d_inode; struct hfs_find_data fd; u16 entry_type; u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo); @@ -304,10 +355,9 @@ end_getxattr_finder_info: return res; } -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, +ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size) { - struct inode *inode = dentry->d_inode; struct hfs_find_data fd; hfsplus_attr_entry *entry; __be32 xattr_record_type; @@ -333,7 +383,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, } if (!strcmp_xattr_finder_info(name)) - return hfsplus_getxattr_finder_info(dentry, value, size); + return hfsplus_getxattr_finder_info(inode, value, size); if (!HFSPLUS_SB(inode->i_sb)->attr_tree) return -EOPNOTSUPP; diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h index 847b695..841b569 100644 --- a/fs/hfsplus/xattr.h +++ b/fs/hfsplus/xattr.h @@ -14,8 +14,8 @@ extern const struct xattr_handler hfsplus_xattr_osx_handler; extern const struct xattr_handler hfsplus_xattr_user_handler; extern const struct xattr_handler hfsplus_xattr_trusted_handler; -/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/ -/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/ +extern const struct xattr_handler hfsplus_xattr_acl_access_handler; +extern const struct xattr_handler hfsplus_xattr_acl_default_handler; extern const struct xattr_handler hfsplus_xattr_security_handler; extern const struct xattr_handler *hfsplus_xattr_handlers[]; @@ -29,9 +29,17 @@ static inline int hfsplus_setxattr(struct dentry *dentry, const char *name, return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags); } -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, +ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size); +static inline ssize_t hfsplus_getxattr(struct dentry *dentry, + const char *name, + void *value, + size_t size) +{ + return __hfsplus_getxattr(dentry->d_inode, name, value, size); +} + ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); int hfsplus_removexattr(struct dentry *dentry, const char *name); @@ -39,22 +47,7 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name); int hfsplus_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr); -static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir) -{ - /*TODO: implement*/ - return 0; -} - -static inline int hfsplus_init_inode_security(struct inode *inode, - struct inode *dir, - const struct qstr *qstr) -{ - int err; - - err = hfsplus_init_acl(inode, dir); - if (!err) - err = hfsplus_init_security(inode, dir, qstr); - return err; -} +int hfsplus_init_inode_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr); #endif diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index 83b842f..0072276 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -9,6 +9,7 @@ #include <linux/security.h> #include "hfsplus_fs.h" #include "xattr.h" +#include "acl.h" static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size, int type) @@ -96,6 +97,18 @@ int hfsplus_init_security(struct inode *inode, struct inode *dir, &hfsplus_initxattrs, NULL); } +int hfsplus_init_inode_security(struct inode *inode, + struct inode *dir, + const struct qstr *qstr) +{ + int err; + + err = hfsplus_init_posix_acl(inode, dir); + if (!err) + err = hfsplus_init_security(inode, dir, qstr); + return err; +} + const struct xattr_handler hfsplus_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .list = hfsplus_security_listxattr, diff --git a/fs/namespace.c b/fs/namespace.c index 25845d1..da5c494 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -17,7 +17,7 @@ #include <linux/security.h> #include <linux/idr.h> #include <linux/acct.h> /* acct_auto_close_mnt */ -#include <linux/ramfs.h> /* init_rootfs */ +#include <linux/init.h> /* init_rootfs */ #include <linux/fs_struct.h> /* get_fs_root et.al. */ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ #include <linux/uaccess.h> diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 8a40457..b4f788e 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -51,10 +51,6 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size) return ERR_PTR(-EINVAL); count = size / sizeof(struct posix_acl_entry); - if (count < 0) - return ERR_PTR(-EINVAL); - if (count == 0) - return NULL; acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 94417a8..f37d3c0 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2044,7 +2044,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping, out_write_size: pos += copied; - if (pos > inode->i_size) { + if (pos > i_size_read(inode)) { i_size_write(inode, pos); mark_inode_dirty(inode); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5c1c864..363f0dc 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -628,11 +628,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall, struct o2nm_node *node, int idx) { - struct list_head *iter; struct o2hb_callback_func *f; - list_for_each(iter, &hbcall->list) { - f = list_entry(iter, struct o2hb_callback_func, hc_item); + list_for_each_entry(f, &hbcall->list, hc_item) { mlog(ML_HEARTBEAT, "calling funcs %p\n", f); (f->hc_func)(node, idx, f->hc_data); } @@ -641,16 +639,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall, /* Will run the list in order until we process the passed event */ static void o2hb_run_event_list(struct o2hb_node_event *queued_event) { - int empty; struct o2hb_callback *hbcall; struct o2hb_node_event *event; - spin_lock(&o2hb_live_lock); - empty = list_empty(&queued_event->hn_item); - spin_unlock(&o2hb_live_lock); - if (empty) - return; - /* Holding callback sem assures we don't alter the callback * lists when doing this, and serializes ourselves with other * processes wanting callbacks. */ @@ -709,6 +700,7 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) struct o2hb_node_event event = { .hn_item = LIST_HEAD_INIT(event.hn_item), }; struct o2nm_node *node; + int queued = 0; node = o2nm_get_node_by_num(slot->ds_node_num); if (!node) @@ -726,11 +718,13 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, slot->ds_node_num); + queued = 1; } } spin_unlock(&o2hb_live_lock); - o2hb_run_event_list(&event); + if (queued) + o2hb_run_event_list(&event); o2nm_node_put(node); } @@ -790,6 +784,7 @@ static int o2hb_check_slot(struct o2hb_region *reg, unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; unsigned int slot_dead_ms; int tmp; + int queued = 0; memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); @@ -883,6 +878,7 @@ fire_callbacks: slot->ds_node_num); changed = 1; + queued = 1; } list_add_tail(&slot->ds_live_item, @@ -934,6 +930,7 @@ fire_callbacks: node, slot->ds_node_num); changed = 1; + queued = 1; } /* We don't clear this because the node is still @@ -949,7 +946,8 @@ fire_callbacks: out: spin_unlock(&o2hb_live_lock); - o2hb_run_event_list(&event); + if (queued) + o2hb_run_event_list(&event); if (node) o2nm_node_put(node); @@ -2516,8 +2514,7 @@ unlock: int o2hb_register_callback(const char *region_uuid, struct o2hb_callback_func *hc) { - struct o2hb_callback_func *tmp; - struct list_head *iter; + struct o2hb_callback_func *f; struct o2hb_callback *hbcall; int ret; @@ -2540,10 +2537,9 @@ int o2hb_register_callback(const char *region_uuid, down_write(&o2hb_callback_sem); - list_for_each(iter, &hbcall->list) { - tmp = list_entry(iter, struct o2hb_callback_func, hc_item); - if (hc->hc_priority < tmp->hc_priority) { - list_add_tail(&hc->hc_item, iter); + list_for_each_entry(f, &hbcall->list, hc_item) { + if (hc->hc_priority < f->hc_priority) { + list_add_tail(&hc->hc_item, &f->hc_item); break; } } diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d644dc6..2cd2406 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -543,8 +543,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, } if (was_valid && !valid) { - printk(KERN_NOTICE "o2net: No longer connected to " - SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); + if (old_sc) + printk(KERN_NOTICE "o2net: No longer connected to " + SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); o2net_complete_nodes_nsw(nn); } @@ -765,32 +766,32 @@ static struct o2net_msg_handler * o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p, struct rb_node **ret_parent) { - struct rb_node **p = &o2net_handler_tree.rb_node; - struct rb_node *parent = NULL; + struct rb_node **p = &o2net_handler_tree.rb_node; + struct rb_node *parent = NULL; struct o2net_msg_handler *nmh, *ret = NULL; int cmp; - while (*p) { - parent = *p; - nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); + while (*p) { + parent = *p; + nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); cmp = o2net_handler_cmp(nmh, msg_type, key); - if (cmp < 0) - p = &(*p)->rb_left; - else if (cmp > 0) - p = &(*p)->rb_right; - else { + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else { ret = nmh; - break; + break; } - } + } - if (ret_p != NULL) - *ret_p = p; - if (ret_parent != NULL) - *ret_parent = parent; + if (ret_p != NULL) + *ret_p = p; + if (ret_parent != NULL) + *ret_parent = parent; - return ret; + return ret; } static void o2net_handler_kref_release(struct kref *kref) @@ -1695,13 +1696,12 @@ static void o2net_start_connect(struct work_struct *work) ret = 0; out: - if (ret) { + if (ret && sc) { printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); /* 0 err so that another will be queued and attempted * from set_nn_state */ - if (sc) - o2net_ensure_shutdown(nn, sc, 0); + o2net_ensure_shutdown(nn, sc, 0); } if (sc) sc_put(sc); @@ -1873,12 +1873,16 @@ static int o2net_accept_one(struct socket *sock) if (o2nm_this_node() >= node->nd_num) { local_node = o2nm_get_node_by_num(o2nm_this_node()); - printk(KERN_NOTICE "o2net: Unexpected connect attempt seen " - "at node '%s' (%u, %pI4:%d) from node '%s' (%u, " - "%pI4:%d)\n", local_node->nd_name, local_node->nd_num, - &(local_node->nd_ipv4_address), - ntohs(local_node->nd_ipv4_port), node->nd_name, - node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port)); + if (local_node) + printk(KERN_NOTICE "o2net: Unexpected connect attempt " + "seen at node '%s' (%u, %pI4:%d) from " + "node '%s' (%u, %pI4:%d)\n", + local_node->nd_name, local_node->nd_num, + &(local_node->nd_ipv4_address), + ntohs(local_node->nd_ipv4_port), + node->nd_name, + node->nd_num, &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); ret = -EINVAL; goto out; } diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index fbec0be..b46278f 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -292,7 +292,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_lock *lock = NULL; struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; char *name; - struct list_head *iter, *head=NULL; + struct list_head *head = NULL; __be64 cookie; u32 flags; u8 node; @@ -373,8 +373,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, /* try convert queue for both ast/bast */ head = &res->converting; lock = NULL; - list_for_each(iter, head) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry(lock, head, list) { if (lock->ml.cookie == cookie) goto do_ast; } @@ -385,8 +384,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, else head = &res->granted; - list_for_each(iter, head) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry(lock, head, list) { if (lock->ml.cookie == cookie) goto do_ast; } diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index de854cc..e051776 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1079,11 +1079,9 @@ static inline int dlm_lock_compatible(int existing, int request) static inline int dlm_lock_on_list(struct list_head *head, struct dlm_lock *lock) { - struct list_head *iter; struct dlm_lock *tmplock; - list_for_each(iter, head) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, head, list) { if (tmplock == lock) return 1; } diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 29a886d..e36d63f 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -123,7 +123,6 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, int *kick_thread) { enum dlm_status status = DLM_NORMAL; - struct list_head *iter; struct dlm_lock *tmplock=NULL; assert_spin_locked(&res->spinlock); @@ -185,16 +184,14 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, /* upconvert from here on */ status = DLM_NORMAL; - list_for_each(iter, &res->granted) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, &res->granted, list) { if (tmplock == lock) continue; if (!dlm_lock_compatible(tmplock->ml.type, type)) goto switch_queues; } - list_for_each(iter, &res->converting) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, &res->converting, list) { if (!dlm_lock_compatible(tmplock->ml.type, type)) goto switch_queues; /* existing conversion requests take precedence */ @@ -424,8 +421,8 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_ctxt *dlm = data; struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; struct dlm_lock_resource *res = NULL; - struct list_head *iter; struct dlm_lock *lock = NULL; + struct dlm_lock *tmp_lock; struct dlm_lockstatus *lksb; enum dlm_status status = DLM_NORMAL; u32 flags; @@ -471,14 +468,13 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, dlm_error(status); goto leave; } - list_for_each(iter, &res->granted) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock->ml.cookie == cnv->cookie && - lock->ml.node == cnv->node_idx) { + list_for_each_entry(tmp_lock, &res->granted, list) { + if (tmp_lock->ml.cookie == cnv->cookie && + tmp_lock->ml.node == cnv->node_idx) { + lock = tmp_lock; dlm_lock_get(lock); break; } - lock = NULL; } spin_unlock(&res->spinlock); if (!lock) { diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 0e28e24..e33cd7a 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -96,7 +96,6 @@ static void __dlm_print_lock(struct dlm_lock *lock) void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) { - struct list_head *iter2; struct dlm_lock *lock; char buf[DLM_LOCKID_NAME_MAX]; @@ -118,18 +117,15 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) res->inflight_locks, atomic_read(&res->asts_reserved)); dlm_print_lockres_refmap(res); printk(" granted queue:\n"); - list_for_each(iter2, &res->granted) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { __dlm_print_lock(lock); } printk(" converting queue:\n"); - list_for_each(iter2, &res->converting) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->converting, list) { __dlm_print_lock(lock); } printk(" blocked queue:\n"); - list_for_each(iter2, &res->blocked) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->blocked, list) { __dlm_print_lock(lock); } } @@ -446,7 +442,6 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) { struct dlm_master_list_entry *mle; struct hlist_head *bucket; - struct hlist_node *list; int i, out = 0; unsigned long total = 0, longest = 0, bucket_count = 0; @@ -456,9 +451,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) spin_lock(&dlm->master_lock); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry(mle, bucket, master_hash_node) { ++total; ++bucket_count; if (len - out < 200) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index dbb17c0..8b3382a 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -193,7 +193,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, unsigned int hash) { struct hlist_head *bucket; - struct hlist_node *list; + struct dlm_lock_resource *res; mlog(0, "%.*s\n", len, name); @@ -201,9 +201,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, bucket = dlm_lockres_hash(dlm, hash); - hlist_for_each(list, bucket) { - struct dlm_lock_resource *res = hlist_entry(list, - struct dlm_lock_resource, hash_node); + hlist_for_each_entry(res, bucket, hash_node) { if (res->lockname.name[0] != name[0]) continue; if (unlikely(res->lockname.len != len)) @@ -262,22 +260,19 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) { - struct dlm_ctxt *tmp = NULL; - struct list_head *iter; + struct dlm_ctxt *tmp; assert_spin_locked(&dlm_domain_lock); /* tmp->name here is always NULL terminated, * but domain may not be! */ - list_for_each(iter, &dlm_domains) { - tmp = list_entry (iter, struct dlm_ctxt, list); + list_for_each_entry(tmp, &dlm_domains, list) { if (strlen(tmp->name) == len && memcmp(tmp->name, domain, len)==0) - break; - tmp = NULL; + return tmp; } - return tmp; + return NULL; } /* For null terminated domain strings ONLY */ @@ -366,25 +361,22 @@ static void __dlm_get(struct dlm_ctxt *dlm) * you shouldn't trust your pointer. */ struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) { - struct list_head *iter; - struct dlm_ctxt *target = NULL; + struct dlm_ctxt *target; + struct dlm_ctxt *ret = NULL; spin_lock(&dlm_domain_lock); - list_for_each(iter, &dlm_domains) { - target = list_entry (iter, struct dlm_ctxt, list); - + list_for_each_entry(target, &dlm_domains, list) { if (target == dlm) { __dlm_get(target); + ret = target; break; } - - target = NULL; } spin_unlock(&dlm_domain_lock); - return target; + return ret; } int dlm_domain_fully_joined(struct dlm_ctxt *dlm) @@ -2296,13 +2288,10 @@ static DECLARE_RWSEM(dlm_callback_sem); void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, int node_num) { - struct list_head *iter; struct dlm_eviction_cb *cb; down_read(&dlm_callback_sem); - list_for_each(iter, &dlm->dlm_eviction_callbacks) { - cb = list_entry(iter, struct dlm_eviction_cb, ec_item); - + list_for_each_entry(cb, &dlm->dlm_eviction_callbacks, ec_item) { cb->ec_func(node_num, cb->ec_data); } up_read(&dlm_callback_sem); diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 47e67c2..5d32f75 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -91,19 +91,14 @@ void dlm_destroy_lock_cache(void) static int dlm_can_grant_new_lock(struct dlm_lock_resource *res, struct dlm_lock *lock) { - struct list_head *iter; struct dlm_lock *tmplock; - list_for_each(iter, &res->granted) { - tmplock = list_entry(iter, struct dlm_lock, list); - + list_for_each_entry(tmplock, &res->granted, list) { if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) return 0; } - list_for_each(iter, &res->converting) { - tmplock = list_entry(iter, struct dlm_lock, list); - + list_for_each_entry(tmplock, &res->converting, list) { if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) return 0; if (!dlm_lock_compatible(tmplock->ml.convert_type, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 33ecbe0..cf0f103 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -342,16 +342,13 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, { struct dlm_master_list_entry *tmpmle; struct hlist_head *bucket; - struct hlist_node *list; unsigned int hash; assert_spin_locked(&dlm->master_lock); hash = dlm_lockid_hash(name, namelen); bucket = dlm_master_hash(dlm, hash); - hlist_for_each(list, bucket) { - tmpmle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry(tmpmle, bucket, master_hash_node) { if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) continue; dlm_get_mle(tmpmle); @@ -3183,7 +3180,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) struct dlm_master_list_entry *mle; struct dlm_lock_resource *res; struct hlist_head *bucket; - struct hlist_node *list; + struct hlist_node *tmp; unsigned int i; mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node); @@ -3194,10 +3191,7 @@ top: spin_lock(&dlm->master_lock); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); - + hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { BUG_ON(mle->type != DLM_MLE_BLOCK && mle->type != DLM_MLE_MASTER && mle->type != DLM_MLE_MIGRATION); @@ -3378,7 +3372,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) int i; struct hlist_head *bucket; struct dlm_master_list_entry *mle; - struct hlist_node *tmp, *list; + struct hlist_node *tmp; /* * We notified all other nodes that we are exiting the domain and @@ -3394,9 +3388,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each_safe(list, tmp, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { if (mle->type != DLM_MLE_BLOCK) { mlog(ML_ERROR, "bad mle: %p\n", mle); dlm_print_one_mle(mle); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 773bd32..0b5adca 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -787,6 +787,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, { struct dlm_lock_request lr; int ret; + int status; mlog(0, "\n"); @@ -800,13 +801,15 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, // send message ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, - &lr, sizeof(lr), request_from, NULL); + &lr, sizeof(lr), request_from, &status); /* negative status is handled by caller */ if (ret < 0) mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " "to recover dead node %u\n", dlm->name, ret, request_from, dead_node); + else + ret = status; // return from here, then // sleep until all received or error return ret; @@ -2328,6 +2331,14 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) } else if (res->owner == dlm->node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); + } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { + if (test_bit(dead_node, res->refmap)) { + mlog(0, "%s:%.*s: dead node %u had a ref, but had " + "no locks and had not purged before dying\n", + dlm->name, res->lockname.len, + res->lockname.name, dead_node); + dlm_lockres_clear_refmap_bit(dlm, res, dead_node); + } } spin_unlock(&res->spinlock); } diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index e73c833..9db869d 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -286,8 +286,6 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { struct dlm_lock *lock, *target; - struct list_head *iter; - struct list_head *head; int can_grant = 1; /* @@ -314,9 +312,7 @@ converting: dlm->name, res->lockname.len, res->lockname.name); BUG(); } - head = &res->granted; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, @@ -333,9 +329,8 @@ converting: target->ml.convert_type; } } - head = &res->converting; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + + list_for_each_entry(lock, &res->converting, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, @@ -384,9 +379,7 @@ blocked: goto leave; target = list_entry(res->blocked.next, struct dlm_lock, list); - head = &res->granted; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { @@ -400,9 +393,7 @@ blocked: } } - head = &res->converting; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->converting, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 850aa7e..5698b52 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -388,7 +388,6 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_ctxt *dlm = data; struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; struct dlm_lock_resource *res = NULL; - struct list_head *iter; struct dlm_lock *lock = NULL; enum dlm_status status = DLM_NORMAL; int found = 0, i; @@ -458,8 +457,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, } for (i=0; i<3; i++) { - list_for_each(iter, queue) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, queue, list) { if (lock->ml.cookie == unlock->cookie && lock->ml.node == unlock->node_idx) { dlm_lock_get(lock); diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 12bafb7..efa2b3d 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -401,11 +401,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); umode_t mode = S_IFDIR | 0755; - struct dlmfs_inode_private *ip; if (inode) { - ip = DLMFS_I(inode); - inode->i_ino = get_next_ino(); inode_init_owner(inode, NULL, mode); inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2487116..767370b 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, cpos = map_start >> osb->s_clustersize_bits; mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, map_start + map_len); - mapping_end -= cpos; is_last = 0; while (cpos < mapping_end && !is_last) { u32 fe_flags; @@ -852,20 +851,20 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) down_read(&OCFS2_I(inode)->ip_alloc_sem); - if (*offset >= inode->i_size) { + if (*offset >= i_size_read(inode)) { ret = -ENXIO; goto out_unlock; } if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { if (whence == SEEK_HOLE) - *offset = inode->i_size; + *offset = i_size_read(inode); goto out_unlock; } clen = 0; cpos = *offset >> cs_bits; - cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size); + cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); while (cpos < cend && !is_last) { ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, @@ -904,8 +903,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) extlen = clen; extlen <<= cs_bits; - if ((extoff + extlen) > inode->i_size) - extlen = inode->i_size - extoff; + if ((extoff + extlen) > i_size_read(inode)) + extlen = i_size_read(inode) - extoff; extoff += extlen; if (extoff > *offset) *offset = extoff; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3261d71..4f8197c 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -671,11 +671,7 @@ restarted_transaction: } else { BUG_ON(why != RESTART_TRANS); - /* TODO: This can be more intelligent. */ - credits = ocfs2_calc_extend_credits(osb->sb, - &fe->id2.i_list, - clusters_to_add); - status = ocfs2_extend_trans(handle, credits); + status = ocfs2_allocate_extend_trans(handle, 1); if (status < 0) { /* handle still has to be committed at * this point. */ @@ -1800,6 +1796,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); out: + ocfs2_free_path(path); ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &dealloc); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 0c60ef2..fa32ce9 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -303,7 +303,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode, if (o2info_from_user(oij, req)) goto bail; - oij.ij_journal_size = osb->journal->j_inode->i_size; + oij.ij_journal_size = i_size_read(osb->journal->j_inode); o2info_set_request_filled(&oij.ij_req); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 242170d..44fc3e5 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -455,6 +455,41 @@ bail: return status; } +/* + * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA. + * If that fails, restart the transaction & regain write access for the + * buffer head which is used for metadata modifications. + * Taken from Ext4: extend_or_restart_transaction() + */ +int ocfs2_allocate_extend_trans(handle_t *handle, int thresh) +{ + int status, old_nblks; + + BUG_ON(!handle); + + old_nblks = handle->h_buffer_credits; + trace_ocfs2_allocate_extend_trans(old_nblks, thresh); + + if (old_nblks < thresh) + return 0; + + status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + if (status > 0) { + status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA); + if (status < 0) + mlog_errno(status); + } + +bail: + return status; +} + + struct ocfs2_triggers { struct jbd2_buffer_trigger_type ot_triggers; int ot_offset; @@ -801,14 +836,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) inode_lock = 1; di = (struct ocfs2_dinode *)bh->b_data; - if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { + if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) { mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", - inode->i_size); + i_size_read(inode)); status = -EINVAL; goto done; } - trace_ocfs2_journal_init(inode->i_size, + trace_ocfs2_journal_init(i_size_read(inode), (unsigned long long)inode->i_blocks, OCFS2_I(inode)->ip_clusters); @@ -1096,7 +1131,7 @@ static int ocfs2_force_read_journal(struct inode *inode) memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); - num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); + num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); v_blkno = 0; while (v_blkno < num_blocks) { status = ocfs2_extent_map_get_blocks(inode, v_blkno, diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 0a99273..0b479ba 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -258,6 +258,17 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int ocfs2_commit_trans(struct ocfs2_super *osb, handle_t *handle); int ocfs2_extend_trans(handle_t *handle, int nblocks); +int ocfs2_allocate_extend_trans(handle_t *handle, + int thresh); + +/* + * Define an arbitrary limit for the amount of data we will anticipate + * writing to any given transaction. For unbounded transactions such as + * fallocate(2) we can write more than this, but we always + * start off at the maximum transaction size and grow the transaction + * optimistically as we go. + */ +#define OCFS2_MAX_TRANS_DATA 64U /* * Create access is for when we get a newly created buffer and we're diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index aebeacd..cd5496b 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -1082,7 +1082,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, } retry_enospc: - (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; + (*ac)->ac_bits_wanted = osb->local_alloc_bits; status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); if (status == -ENOSPC) { if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == @@ -1154,7 +1154,7 @@ retry_enospc: OCFS2_LA_DISABLED) goto bail; - ac->ac_bits_wanted = osb->local_alloc_default_bits; + ac->ac_bits_wanted = osb->local_alloc_bits; status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, &cluster_off, diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 452068b..3d3f3c8 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -152,6 +152,7 @@ static int __ocfs2_move_extent(handle_t *handle, } out: + ocfs2_free_path(path); return ret; } @@ -845,7 +846,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, struct ocfs2_move_extents *range = context->range; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if ((inode->i_size == 0) || (range->me_len == 0)) + if ((i_size_read(inode) == 0) || (range->me_len == 0)) return 0; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 3b481f4..1b60c62 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -2579,6 +2579,8 @@ DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans); DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); +DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans); + DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access); DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty); diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 332a281..aaa5061 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; } - if (gqinode->i_size < off + len) { + if (i_size_read(gqinode) < off + len) { loff_t rounded_end = ocfs2_align_bytes_to_blocks(sb, off + len); @@ -778,8 +778,8 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) */ WARN_ON(journal_current_handle()); status = ocfs2_extend_no_holes(gqinode, NULL, - gqinode->i_size + (need_alloc << sb->s_blocksize_bits), - gqinode->i_size); + i_size_read(gqinode) + (need_alloc << sb->s_blocksize_bits), + i_size_read(gqinode)); if (status < 0) goto out_dq; } diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 27fe7ee..2e4344b 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -982,14 +982,14 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( /* We are protected by dqio_sem so no locking needed */ status = ocfs2_extend_no_holes(lqinode, NULL, - lqinode->i_size + 2 * sb->s_blocksize, - lqinode->i_size); + i_size_read(lqinode) + 2 * sb->s_blocksize, + i_size_read(lqinode)); if (status < 0) { mlog_errno(status); goto out; } status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, - lqinode->i_size + 2 * sb->s_blocksize); + i_size_read(lqinode) + 2 * sb->s_blocksize); if (status < 0) { mlog_errno(status); goto out; @@ -1125,14 +1125,14 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( /* We are protected by dqio_sem so no locking needed */ status = ocfs2_extend_no_holes(lqinode, NULL, - lqinode->i_size + sb->s_blocksize, - lqinode->i_size); + i_size_read(lqinode) + sb->s_blocksize, + i_size_read(lqinode)); if (status < 0) { mlog_errno(status); goto out; } status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, - lqinode->i_size + sb->s_blocksize); + i_size_read(lqinode) + sb->s_blocksize); if (status < 0) { mlog_errno(status); goto out; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index a70d604..bf4dfc1 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -3854,7 +3854,10 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, while (cpos < clusters) { ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, &ext_flags); - + if (ret) { + mlog_errno(ret); + goto unlock; + } if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { ret = ocfs2_add_refcount_flag(inode, &di_et, &ref_tree->rf_ci, @@ -4025,7 +4028,10 @@ static int ocfs2_duplicate_extent_list(struct inode *s_inode, while (cpos < clusters) { ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, &num_clusters, &ext_flags); - + if (ret) { + mlog_errno(ret); + goto out; + } if (p_cluster) { ret = ocfs2_add_refcounted_extent(t_inode, &et, ref_ci, ref_root_bh, diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 317ef0a..6ce0686 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3505,7 +3505,7 @@ int ocfs2_xattr_set(struct inode *inode, int ret, credits, ref_meta = 0, ref_credits = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; - struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; + struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; struct ocfs2_refcount_tree *ref_tree = NULL; struct ocfs2_xattr_info xi = { @@ -3609,13 +3609,14 @@ int ocfs2_xattr_set(struct inode *inode, if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); mlog_errno(ret); - goto cleanup; + goto out_free_ac; } ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); ocfs2_commit_trans(osb, ctxt.handle); +out_free_ac: if (ctxt.data_ac) ocfs2_free_alloc_context(ctxt.data_ac); if (ctxt.meta_ac) @@ -5881,6 +5882,10 @@ static int ocfs2_xattr_value_attach_refcount(struct inode *inode, while (cpos < clusters) { ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, &num_clusters, el, &ext_flags); + if (ret) { + mlog_errno(ret); + break; + } cpos += num_clusters; if ((ext_flags & OCFS2_EXT_REFCOUNTED)) @@ -6797,7 +6802,7 @@ out: if (ret) { if (*meta_ac) { ocfs2_free_alloc_context(*meta_ac); - meta_ac = NULL; + *meta_ac = NULL; } } diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 0ff80f9..985ea88 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -286,7 +286,7 @@ int proc_fd_permission(struct inode *inode, int mask) int rv = generic_permission(inode, mask); if (rv == 0) return 0; - if (task_pid(current) == proc_pid(inode)) + if (task_tgid(current) == proc_pid(inode)) rv = 0; return rv; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 107d026..7366e9d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -740,6 +740,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ptent = pte_file_clear_soft_dirty(ptent); } + if (vma->vm_flags & VM_SOFTDIRTY) + vma->vm_flags &= ~VM_SOFTDIRTY; + set_pte_at(vma->vm_mm, addr, pte, ptent); #endif } @@ -949,13 +952,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, if (is_migration_entry(entry)) page = migration_entry_to_page(entry); } else { - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + if (vma->vm_flags & VM_SOFTDIRTY) + flags2 |= __PM_SOFT_DIRTY; + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); return; } if (page && !PageAnon(page)) flags |= PM_FILE; - if (pte_soft_dirty(pte)) + if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte)) flags2 |= __PM_SOFT_DIRTY; *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); @@ -974,7 +979,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2)); } #else static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, @@ -997,7 +1002,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { int pmd_flags2; - pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); + if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) + pmd_flags2 = __PM_SOFT_DIRTY; + else + pmd_flags2 = 0; + for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; @@ -1015,12 +1024,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) return 0; for (; addr != end; addr += PAGE_SIZE) { + int flags2; /* check to see if we've left 'vma' behind * and need a new, higher one */ if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); - pme = make_pme(PM_NOT_PRESENT(pm->v2)); + if (vma && (vma->vm_flags & VM_SOFTDIRTY)) + flags2 = __PM_SOFT_DIRTY; + else + flags2 = 0; + pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); } /* check that 'vma' actually covers this address, @@ -1044,13 +1058,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, #ifdef CONFIG_HUGETLB_PAGE static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, - pte_t pte, int offset) + pte_t pte, int offset, int flags2) { if (pte_present(pte)) - *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) - | PM_STATUS2(pm->v2, 0) | PM_PRESENT); + *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | + PM_STATUS2(pm->v2, flags2) | + PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | + PM_STATUS2(pm->v2, flags2)); } /* This function walks within one hugetlb entry in the single call */ @@ -1059,12 +1075,22 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, struct mm_walk *walk) { struct pagemapread *pm = walk->private; + struct vm_area_struct *vma; int err = 0; + int flags2; pagemap_entry_t pme; + vma = find_vma(walk->mm, addr); + WARN_ON_ONCE(!vma); + + if (vma && (vma->vm_flags & VM_SOFTDIRTY)) + flags2 = __PM_SOFT_DIRTY; + else + flags2 = 0; + for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; - huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); + huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2); err = add_to_pagemap(addr, &pme, pm); if (err) return err; @@ -1376,8 +1402,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.mm = mm; pol = get_vma_policy(task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol); + n = mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); + if (n < 0) + return n; seq_printf(m, "%08lx %s", vma->vm_start, buffer); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index a1a16eb..9100d69 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -21,6 +21,7 @@ #include <linux/crash_dump.h> #include <linux/list.h> #include <linux/vmalloc.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> #include <asm/io.h> #include "internal.h" @@ -123,11 +124,65 @@ static ssize_t read_from_oldmem(char *buf, size_t count, return read; } +/* + * Architectures may override this function to allocate ELF header in 2nd kernel + */ +int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) +{ + return 0; +} + +/* + * Architectures may override this function to free header + */ +void __weak elfcorehdr_free(unsigned long long addr) +{} + +/* + * Architectures may override this function to read from ELF header + */ +ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + return read_from_oldmem(buf, count, ppos, 0); +} + +/* + * Architectures may override this function to read from notes sections + */ +ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) +{ + return read_from_oldmem(buf, count, ppos, 0); +} + +/* + * Architectures may override this function to map oldmem + */ +int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Copy to either kernel or user space + */ +static int copy_to(void *target, void *src, size_t size, int userbuf) +{ + if (userbuf) { + if (copy_to_user((char __user *) target, src, size)) + return -EFAULT; + } else { + memcpy(target, src, size); + } + return 0; +} + /* Read from the ELF header and then the crash dump. On error, negative value is * returned otherwise number of bytes read are returned. */ -static ssize_t read_vmcore(struct file *file, char __user *buffer, - size_t buflen, loff_t *fpos) +static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, + int userbuf) { ssize_t acc = 0, tmp; size_t tsz; @@ -144,7 +199,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, /* Read ELF core header */ if (*fpos < elfcorebuf_sz) { tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); - if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) + if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf)) return -EFAULT; buflen -= tsz; *fpos += tsz; @@ -162,7 +217,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; - if (copy_to_user(buffer, kaddr, tsz)) + if (copy_to(buffer, kaddr, tsz, userbuf)) return -EFAULT; buflen -= tsz; *fpos += tsz; @@ -178,7 +233,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, if (*fpos < m->offset + m->size) { tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); start = m->paddr + *fpos - m->offset; - tmp = read_from_oldmem(buffer, tsz, &start, 1); + tmp = read_from_oldmem(buffer, tsz, &start, userbuf); if (tmp < 0) return tmp; buflen -= tsz; @@ -195,6 +250,55 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } +static ssize_t read_vmcore(struct file *file, char __user *buffer, + size_t buflen, loff_t *fpos) +{ + return __read_vmcore((__force char *) buffer, buflen, fpos, 1); +} + +/* + * The vmcore fault handler uses the page cache and fills data using the + * standard __vmcore_read() function. + * + * On s390 the fault handler is used for memory regions that can't be mapped + * directly with remap_pfn_range(). + */ +static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#ifdef CONFIG_S390 + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t index = vmf->pgoff; + struct page *page; + loff_t offset; + char *buf; + int rc; + + page = find_or_create_page(mapping, index, GFP_KERNEL); + if (!page) + return VM_FAULT_OOM; + if (!PageUptodate(page)) { + offset = (loff_t) index << PAGE_CACHE_SHIFT; + buf = __va((page_to_pfn(page) << PAGE_SHIFT)); + rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0); + if (rc < 0) { + unlock_page(page); + page_cache_release(page); + return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + } + SetPageUptodate(page); + } + unlock_page(page); + vmf->page = page; + return 0; +#else + return VM_FAULT_SIGBUS; +#endif +} + +static const struct vm_operations_struct vmcore_mmap_ops = { + .fault = mmap_vmcore_fault, +}; + /** * alloc_elfnotes_buf - allocate buffer for ELF note segment in * vmalloc memory @@ -223,7 +327,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz) * regions in the 1st kernel pointed to by PT_LOAD entries) into * virtually contiguous user-space in ELF layout. */ -#if defined(CONFIG_MMU) && !defined(CONFIG_S390) +#ifdef CONFIG_MMU static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; @@ -241,6 +345,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); vma->vm_flags |= VM_MIXEDMAP; + vma->vm_ops = &vmcore_mmap_ops; len = 0; @@ -282,9 +387,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min_t(size_t, m->offset + m->size - start, size); paddr = m->paddr + start - m->offset; - if (remap_pfn_range(vma, vma->vm_start + len, - paddr >> PAGE_SHIFT, tsz, - vma->vm_page_prot)) + if (remap_oldmem_pfn_range(vma, vma->vm_start + len, + paddr >> PAGE_SHIFT, tsz, + vma->vm_page_prot)) goto fail; size -= tsz; start += tsz; @@ -357,7 +462,7 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) notes_section = kmalloc(max_sz, GFP_KERNEL); if (!notes_section) return -ENOMEM; - rc = read_from_oldmem(notes_section, max_sz, &offset, 0); + rc = elfcorehdr_read_notes(notes_section, max_sz, &offset); if (rc < 0) { kfree(notes_section); return rc; @@ -444,7 +549,8 @@ static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) if (phdr_ptr->p_type != PT_NOTE) continue; offset = phdr_ptr->p_offset; - rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz, + &offset); if (rc < 0) return rc; notes_buf += phdr_ptr->p_memsz; @@ -536,7 +642,7 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) notes_section = kmalloc(max_sz, GFP_KERNEL); if (!notes_section) return -ENOMEM; - rc = read_from_oldmem(notes_section, max_sz, &offset, 0); + rc = elfcorehdr_read_notes(notes_section, max_sz, &offset); if (rc < 0) { kfree(notes_section); return rc; @@ -623,7 +729,8 @@ static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) if (phdr_ptr->p_type != PT_NOTE) continue; offset = phdr_ptr->p_offset; - rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz, + &offset); if (rc < 0) return rc; notes_buf += phdr_ptr->p_memsz; @@ -810,7 +917,7 @@ static int __init parse_crash_elf64_headers(void) addr = elfcorehdr_addr; /* Read Elf header */ - rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0); + rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr); if (rc < 0) return rc; @@ -837,7 +944,7 @@ static int __init parse_crash_elf64_headers(void) if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr); if (rc < 0) goto fail; @@ -866,7 +973,7 @@ static int __init parse_crash_elf32_headers(void) addr = elfcorehdr_addr; /* Read Elf header */ - rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0); + rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr); if (rc < 0) return rc; @@ -892,7 +999,7 @@ static int __init parse_crash_elf32_headers(void) if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr); if (rc < 0) goto fail; @@ -919,7 +1026,7 @@ static int __init parse_crash_elf_headers(void) int rc=0; addr = elfcorehdr_addr; - rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0); + rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr); if (rc < 0) return rc; if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { @@ -952,7 +1059,14 @@ static int __init vmcore_init(void) { int rc = 0; - /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ + /* Allow architectures to allocate ELF header in 2nd kernel */ + rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size); + if (rc) + return rc; + /* + * If elfcorehdr= has been passed in cmdline or created in 2nd kernel, + * then capture the dump. + */ if (!(is_vmcore_usable())) return rc; rc = parse_crash_elf_headers(); @@ -960,6 +1074,8 @@ static int __init vmcore_init(void) pr_warn("Kdump: vmcore not initialized\n"); return rc; } + elfcorehdr_free(elfcorehdr_addr); + elfcorehdr_addr = ELFCORE_ADDR_ERR; proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); if (proc_vmcore) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index c24f1e1..39d1465 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -244,12 +244,6 @@ struct dentry *ramfs_mount(struct file_system_type *fs_type, return mount_nodev(fs_type, flags, data, ramfs_fill_super); } -static struct dentry *rootfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super); -} - static void ramfs_kill_sb(struct super_block *sb) { kfree(sb->s_fs_info); @@ -262,29 +256,23 @@ static struct file_system_type ramfs_fs_type = { .kill_sb = ramfs_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; -static struct file_system_type rootfs_fs_type = { - .name = "rootfs", - .mount = rootfs_mount, - .kill_sb = kill_litter_super, -}; -static int __init init_ramfs_fs(void) -{ - return register_filesystem(&ramfs_fs_type); -} -module_init(init_ramfs_fs) - -int __init init_rootfs(void) +int __init init_ramfs_fs(void) { + static unsigned long once; int err; + if (test_and_set_bit(0, &once)) + return 0; + err = bdi_init(&ramfs_backing_dev_info); if (err) return err; - err = register_filesystem(&rootfs_fs_type); + err = register_filesystem(&ramfs_fs_type); if (err) bdi_destroy(&ramfs_backing_dev_info); return err; } +module_init(init_ramfs_fs) |