From 29c0177e6a4ac094302bed54a1d4bbb6b740a9ef Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 13 Dec 2008 21:20:25 +1030 Subject: cpumask: change cpumask_scnprintf, cpumask_parse_user, cpulist_parse, and cpulist_scnprintf to take pointers. Impact: change calling convention of existing cpumask APIs Most cpumask functions started with cpus_: these have been replaced by cpumask_ ones which take struct cpumask pointers as expected. These four functions don't have good replacement names; fortunately they're rarely used, so we just change them over. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ingo Molnar Cc: paulus@samba.org Cc: mingo@redhat.com Cc: tony.luck@intel.com Cc: ralf@linux-mips.org Cc: Greg Kroah-Hartman Cc: cl@linux-foundation.org Cc: srostedt@redhat.com --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index a2cd47d..8e516e2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3626,7 +3626,7 @@ static int list_locations(struct kmem_cache *s, char *buf, len < PAGE_SIZE - 60) { len += sprintf(buf + len, " cpus="); len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, - l->cpus); + &l->cpus); } if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && -- cgit v1.1 From 3e597945384dee1457240158eb81e3afb90b68c2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:24 +1030 Subject: cpumask: remove any_online_cpu() users: mm/ Impact: Remove obsolete API usage any_online_cpu() is a good name, but it takes a cpumask_t, not a pointer. There are several places where any_online_cpu() doesn't really want a mask arg at all. Replace all callers with cpumask_any() and cpumask_any_and(). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/vmscan.c b/mm/vmscan.c index 62e7f62..240f062 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2141,7 +2141,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, pg_data_t *pgdat = NODE_DATA(nid); node_to_cpumask_ptr(mask, pgdat->node_id); - if (any_online_cpu(*mask) < nr_cpu_ids) + if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids) /* One of our CPUs online: restore mask */ set_cpus_allowed_ptr(pgdat->kswapd, mask); } -- cgit v1.1 From 174596a0b9f21e8844d70566a6bb29bf48a87750 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:29 +1030 Subject: cpumask: convert mm/ Impact: Use new API Convert kernel mm functions to use struct cpumask. We skip include/linux/percpu.h and mm/allocpercpu.c, which are in flux. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Reviewed-by: Christoph Lameter --- mm/pdflush.c | 16 +++++++++++++--- mm/slab.c | 2 +- mm/slub.c | 20 +++++++++++--------- mm/vmscan.c | 2 +- mm/vmstat.c | 4 ++-- 5 files changed, 28 insertions(+), 16 deletions(-) (limited to 'mm') diff --git a/mm/pdflush.c b/mm/pdflush.c index a0a14c4..15de509 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -172,7 +172,16 @@ static int __pdflush(struct pdflush_work *my_work) static int pdflush(void *dummy) { struct pdflush_work my_work; - cpumask_t cpus_allowed; + cpumask_var_t cpus_allowed; + + /* + * Since the caller doesn't even check kthread_run() worked, let's not + * freak out too much if this fails. + */ + if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { + printk(KERN_WARNING "pdflush failed to allocate cpumask\n"); + return 0; + } /* * pdflush can spend a lot of time doing encryption via dm-crypt. We @@ -187,8 +196,9 @@ static int pdflush(void *dummy) * This is needed as pdflush's are dynamically created and destroyed. * The boottime pdflush's are easily placed w/o these 2 lines. */ - cpuset_cpus_allowed(current, &cpus_allowed); - set_cpus_allowed_ptr(current, &cpus_allowed); + cpuset_cpus_allowed(current, cpus_allowed); + set_cpus_allowed_ptr(current, cpus_allowed); + free_cpumask_var(cpus_allowed); return __pdflush(&my_work); } diff --git a/mm/slab.c b/mm/slab.c index f97e564..ddc41f3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2157,7 +2157,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* * We use cache_chain_mutex to ensure a consistent view of - * cpu_online_map as well. Please see cpuup_callback + * cpu_online_mask as well. Please see cpuup_callback */ get_online_cpus(); mutex_lock(&cache_chain_mutex); diff --git a/mm/slub.c b/mm/slub.c index 0d861c3..f0e2892 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1970,7 +1970,7 @@ static DEFINE_PER_CPU(struct kmem_cache_cpu, kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); -static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE; +static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, int cpu, gfp_t flags) @@ -2045,13 +2045,13 @@ static void init_alloc_cpu_cpu(int cpu) { int i; - if (cpu_isset(cpu, kmem_cach_cpu_free_init_once)) + if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once))) return; for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); - cpu_set(cpu, kmem_cach_cpu_free_init_once); + cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)); } static void __init init_alloc_cpu(void) @@ -3451,7 +3451,7 @@ struct location { long max_time; long min_pid; long max_pid; - cpumask_t cpus; + DECLARE_BITMAP(cpus, NR_CPUS); nodemask_t nodes; }; @@ -3526,7 +3526,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, if (track->pid > l->max_pid) l->max_pid = track->pid; - cpu_set(track->cpu, l->cpus); + cpumask_set_cpu(track->cpu, + to_cpumask(l->cpus)); } node_set(page_to_nid(virt_to_page(track)), l->nodes); return 1; @@ -3556,8 +3557,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, l->max_time = age; l->min_pid = track->pid; l->max_pid = track->pid; - cpus_clear(l->cpus); - cpu_set(track->cpu, l->cpus); + cpumask_clear(to_cpumask(l->cpus)); + cpumask_set_cpu(track->cpu, to_cpumask(l->cpus)); nodes_clear(l->nodes); node_set(page_to_nid(virt_to_page(track)), l->nodes); return 1; @@ -3638,11 +3639,12 @@ static int list_locations(struct kmem_cache *s, char *buf, len += sprintf(buf + len, " pid=%ld", l->min_pid); - if (num_online_cpus() > 1 && !cpus_empty(l->cpus) && + if (num_online_cpus() > 1 && + !cpumask_empty(to_cpumask(l->cpus)) && len < PAGE_SIZE - 60) { len += sprintf(buf + len, " cpus="); len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, - &l->cpus); + to_cpumask(l->cpus)); } if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && diff --git a/mm/vmscan.c b/mm/vmscan.c index 240f062..d196f46 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1902,7 +1902,7 @@ static int kswapd(void *p) }; node_to_cpumask_ptr(cpumask, pgdat->node_id); - if (!cpus_empty(*cpumask)) + if (!cpumask_empty(cpumask)) set_cpus_allowed_ptr(tsk, cpumask); current->reclaim_state = &reclaim_state; diff --git a/mm/vmstat.c b/mm/vmstat.c index c3ccfda..9114974 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; EXPORT_PER_CPU_SYMBOL(vm_event_states); -static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask) +static void sum_vm_events(unsigned long *ret, const struct cpumask *cpumask) { int cpu; int i; @@ -43,7 +43,7 @@ static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask) void all_vm_events(unsigned long *ret) { get_online_cpus(); - sum_vm_events(ret, &cpu_online_map); + sum_vm_events(ret, cpu_online_mask); put_online_cpus(); } EXPORT_SYMBOL_GPL(all_vm_events); -- cgit v1.1 From 2e4e27c7d082b2198b63041310609d7191185a9d Mon Sep 17 00:00:00 2001 From: Adam Lackorzynski Date: Sun, 4 Jan 2009 12:00:46 -0800 Subject: vmalloc.c: fix flushing in vmap_page_range() The flush_cache_vmap in vmap_page_range() is called with the end of the range twice. The following patch fixes this for me. Signed-off-by: Adam Lackorzynski Cc: Nick Piggin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1ddb77b..7465f22 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -151,11 +151,12 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, * * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] */ -static int vmap_page_range(unsigned long addr, unsigned long end, +static int vmap_page_range(unsigned long start, unsigned long end, pgprot_t prot, struct page **pages) { pgd_t *pgd; unsigned long next; + unsigned long addr = start; int err = 0; int nr = 0; @@ -167,7 +168,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end, if (err) break; } while (pgd++, addr = next, addr != end); - flush_cache_vmap(addr, end); + flush_cache_vmap(start, end); if (unlikely(err)) return err; -- cgit v1.1 From 54566b2c1594c2326a645a3551f9d989f7ba3c5e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 4 Jan 2009 12:00:53 -0800 Subject: fs: symlink write_begin allocation context fix With the write_begin/write_end aops, page_symlink was broken because it could no longer pass a GFP_NOFS type mask into the point where the allocations happened. They are done in write_begin, which would always assume that the filesystem can be entered from reclaim. This bug could cause filesystem deadlocks. The funny thing with having a gfp_t mask there is that it doesn't really allow the caller to arbitrarily tinker with the context in which it can be called. It couldn't ever be GFP_ATOMIC, for example, because it needs to take the page lock. The only thing any callers care about is __GFP_FS anyway, so turn that into a single flag. Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on this flag in their write_begin function. Change __grab_cache_page to accept a nofs argument as well, to honour that flag (while we're there, change the name to grab_cache_page_write_begin which is more instructive and does away with random leading underscores). This is really a more flexible way to go in the end anyway -- if a filesystem happens to want any extra allocations aside from the pagecache ones in ints write_begin function, it may now use GFP_KERNEL (rather than GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a random example). [kosaki.motohiro@jp.fujitsu.com: fix ubifs] [kosaki.motohiro@jp.fujitsu.com: fix fuse] Signed-off-by: Nick Piggin Reviewed-by: KOSAKI Motohiro Cc: [2.6.28.x] Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton [ Cleaned up the calling convention: just pass in the AOP flags untouched to the grab_cache_page_write_begin() function. That just simplifies everybody, and may even allow future expansion of the logic. - Linus ] Signed-off-by: Linus Torvalds --- mm/filemap.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index f3e5f89..f8c6927 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2140,19 +2140,24 @@ EXPORT_SYMBOL(generic_file_direct_write); * Find or create a page at the given pagecache position. Return the locked * page. This function is specifically for buffered writes. */ -struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index) +struct page *grab_cache_page_write_begin(struct address_space *mapping, + pgoff_t index, unsigned flags) { int status; struct page *page; + gfp_t gfp_notmask = 0; + if (flags & AOP_FLAG_NOFS) + gfp_notmask = __GFP_FS; repeat: page = find_lock_page(mapping, index); if (likely(page)) return page; - page = page_cache_alloc(mapping); + page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); if (!page) return NULL; - status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); + status = add_to_page_cache_lru(page, mapping, index, + GFP_KERNEL & ~gfp_notmask); if (unlikely(status)) { page_cache_release(page); if (status == -EEXIST) @@ -2161,7 +2166,7 @@ repeat: } return page; } -EXPORT_SYMBOL(__grab_cache_page); +EXPORT_SYMBOL(grab_cache_page_write_begin); static ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) -- cgit v1.1 From 7f5ff766a7babd72fc192125e12ef5570effff4c Mon Sep 17 00:00:00 2001 From: Dmitri Monakhov Date: Mon, 1 Dec 2008 14:34:56 -0800 Subject: kill suid bit only for regular files We don't have to do it because it is useless for non regular files. In fact block device may trigger this path without dentry->d_inode->i_mutex. (akpm: concerns were expressed (by me) about S_ISDIR inodes) Signed-off-by: Dmitri Monakhov Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index f3e5f89..ed53ce8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1766,7 +1766,7 @@ int should_remove_suid(struct dentry *dentry) if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) kill |= ATTR_KILL_SGID; - if (unlikely(kill && !capable(CAP_FSETID))) + if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) return kill; return 0; -- cgit v1.1 From acfa4380efe77e290d3a96b11cd4c9f24f4fbb18 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 4 Dec 2008 10:06:33 -0500 Subject: inode->i_op is never NULL We used to have rather schizophrenic set of checks for NULL ->i_op even though it had been eliminated years ago. You'd need to go out of your way to set it to NULL explicitly _and_ a bunch of code would die on such inodes anyway. After killing two remaining places that still did that bogosity, all that crap can go away. Signed-off-by: Al Viro --- mm/memory.c | 4 ++-- mm/nommu.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/memory.c b/mm/memory.c index 0a2010a..7b9db65 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2266,7 +2266,7 @@ int vmtruncate(struct inode * inode, loff_t offset) unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); } - if (inode->i_op && inode->i_op->truncate) + if (inode->i_op->truncate) inode->i_op->truncate(inode); return 0; @@ -2286,7 +2286,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) * a way to truncate a range of blocks (punch a hole) - * we should return failure right now. */ - if (!inode->i_op || !inode->i_op->truncate_range) + if (!inode->i_op->truncate_range) return -ENOSYS; mutex_lock(&inode->i_mutex); diff --git a/mm/nommu.c b/mm/nommu.c index 7695dc8..1c28ea3 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -86,7 +86,7 @@ do_expand: i_size_write(inode, offset); out_truncate: - if (inode->i_op && inode->i_op->truncate) + if (inode->i_op->truncate) inode->i_op->truncate(inode); return 0; out_sig: -- cgit v1.1 From 4c728ef583b3d82266584da5cb068294c09df31e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Dec 2008 21:11:15 +0100 Subject: add a vfs_fsync helper Fsync currently has a fdatawrite/fdatawait pair around the method call, and a mutex_lock/unlock of the inode mutex. All callers of fsync have to duplicate this, but we have a few and most of them don't quite get it right. This patch adds a new vfs_fsync that takes care of this. It's a little more complicated as usual as ->fsync might get a NULL file pointer and just a dentry from nfsd, but otherwise gets afile and we want to take the mapping and file operations from it when it is there. Notes on the fsync callers: - ecryptfs wasn't calling filemap_fdatawrite / filemap_fdatawait on the lower file - coda wasn't calling filemap_fdatawrite / filemap_fdatawait on the host file, and returning 0 when ->fsync was missing - shm wasn't calling either filemap_fdatawrite / filemap_fdatawait nor taking i_mutex. Now given that shared memory doesn't have disk backing not doing anything in fsync seems fine and I left it out of the vfs_fsync conversion for now, but in that case we might just not pass it through to the lower file at all but just call the no-op simple_sync_file directly. [and now actually export vfs_fsync] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- mm/msync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/msync.c b/mm/msync.c index 144a757..07dae08 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -82,7 +82,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags) (vma->vm_flags & VM_SHARED)) { get_file(file); up_read(&mm->mmap_sem); - error = do_fsync(file, 0); + error = vfs_fsync(file, file->f_path.dentry, 0); fput(file); if (error || start >= end) goto out; -- cgit v1.1 From 046c68842bce6b77509cf56e94a561029124b0ce Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 5 Jan 2009 14:06:29 +0000 Subject: mm: update my address Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- mm/mmap.c | 2 +- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/mmap.c b/mm/mmap.c index d4855a68..2c778fc 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3,7 +3,7 @@ * * Written by obz. * - * Address space accounting code + * Address space accounting code */ #include diff --git a/mm/mprotect.c b/mm/mprotect.c index fded06f..cfb4c48 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -4,7 +4,7 @@ * (C) Copyright 1994 Linus Torvalds * (C) Copyright 2002 Christoph Hellwig * - * Address space accounting code + * Address space accounting code * (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ diff --git a/mm/mremap.c b/mm/mremap.c index 58a2908..646de95 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -3,7 +3,7 @@ * * (C) Copyright 1996 Linus Torvalds * - * Address space accounting code + * Address space accounting code * (C) Copyright 2002 Red Hat Inc, All Rights Reserved */ -- cgit v1.1