From 2bc4657c15e4a33d9a192579c7627a397dbcbebc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 3 Apr 2009 02:51:10 -0700 Subject: sysctl ipc: Remove dead binary sysctl support code. Now that sys_sysctl is a generic wrapper around /proc/sys .ctl_name and .strategy members of sysctl tables are dead code. Remove them. Signed-off-by: Eric W. Biederman --- ipc/ipc_sysctl.c | 77 -------------------------------------------------------- ipc/mq_sysctl.c | 7 +++--- 2 files changed, 3 insertions(+), 81 deletions(-) (limited to 'ipc') diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 7d37047..56410fa 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -129,136 +129,60 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #define proc_ipcauto_dointvec_minmax NULL #endif -#ifdef CONFIG_SYSCTL_SYSCALL -/* The generic sysctl ipc data routine. */ -static int sysctl_ipc_data(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - size_t len; - void *data; - - /* Get out of I don't have a variable */ - if (!table->data || !table->maxlen) - return -ENOTDIR; - - data = get_ipc(table); - if (!data) - return -ENOTDIR; - - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, data, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - if (newval && newlen) { - if (newlen > table->maxlen) - newlen = table->maxlen; - - if (copy_from_user(data, newval, newlen)) - return -EFAULT; - } - return 1; -} - -static int sysctl_ipc_registered_data(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int rc; - - rc = sysctl_ipc_data(table, oldval, oldlenp, newval, newlen); - - if (newval && newlen && rc > 0) - /* - * Tunable has successfully been changed from userland - */ - unregister_ipcns_notifier(current->nsproxy->ipc_ns); - - return rc; -} -#else -#define sysctl_ipc_data NULL -#define sysctl_ipc_registered_data NULL -#endif - static int zero; static int one = 1; static struct ctl_table ipc_kern_table[] = { { - .ctl_name = KERN_SHMMAX, .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, .maxlen = sizeof (init_ipc_ns.shm_ctlmax), .mode = 0644, .proc_handler = proc_ipc_doulongvec_minmax, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_SHMALL, .procname = "shmall", .data = &init_ipc_ns.shm_ctlall, .maxlen = sizeof (init_ipc_ns.shm_ctlall), .mode = 0644, .proc_handler = proc_ipc_doulongvec_minmax, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_SHMMNI, .procname = "shmmni", .data = &init_ipc_ns.shm_ctlmni, .maxlen = sizeof (init_ipc_ns.shm_ctlmni), .mode = 0644, .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_MSGMAX, .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), .mode = 0644, .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_MSGMNI, .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, .proc_handler = proc_ipc_callback_dointvec, - .strategy = sysctl_ipc_registered_data, }, { - .ctl_name = KERN_MSGMNB, .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), .mode = 0644, .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, }, { - .ctl_name = KERN_SEM, .procname = "sem", .data = &init_ipc_ns.sem_ctls, .maxlen = 4*sizeof (int), .mode = 0644, .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "auto_msgmni", .data = &init_ipc_ns.auto_msgmni, .maxlen = sizeof(int), @@ -272,7 +196,6 @@ static struct ctl_table ipc_kern_table[] = { static struct ctl_table ipc_root_table[] = { { - .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = ipc_kern_table, diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 8a05871..0c09366 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -88,7 +88,7 @@ static ctl_table mq_sysctls[] = { .extra1 = &msg_maxsize_limit_min, .extra2 = &msg_maxsize_limit_max, }, - { .ctl_name = 0 } + {} }; static ctl_table mq_sysctl_dir[] = { @@ -97,17 +97,16 @@ static ctl_table mq_sysctl_dir[] = { .mode = 0555, .child = mq_sysctls, }, - { .ctl_name = 0 } + {} }; static ctl_table mq_sysctl_root[] = { { - .ctl_name = CTL_FS, .procname = "fs", .mode = 0555, .child = mq_sysctl_dir, }, - { .ctl_name = 0 } + {} }; struct ctl_table_header *mq_register_sysctl_table(void) -- cgit v1.1 From f1970c48ef06ece4e23765501976507ab52b0edd Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Mon, 19 Oct 2009 01:54:29 +0300 Subject: ipc: fix unused variable warning Commit a0d092f introduced the following warning: ipc/msg.c: In function ?msgctl_down?: ipc/msg.c:415: warning: ?msqid64? may be used uninitialized in this function The gcc warning in this case is actually bogus, as msqid64 is touched only iff cmd == IPC_SET, and in such case, copy_msqid_from_user() initializes it properly. Signed-off-by: Felipe Contreras Signed-off-by: Jiri Kosina --- ipc/msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ipc') diff --git a/ipc/msg.c b/ipc/msg.c index 2ceab7f..085bd58 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -412,7 +412,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, struct msqid_ds __user *buf, int version) { struct kern_ipc_perm *ipcp; - struct msqid64_ds msqid64; + struct msqid64_ds uninitialized_var(msqid64); struct msg_queue *msq; int err; -- cgit v1.1 From c4caa778157dbbf04116f0ac2111e389b5cd7a29 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Nov 2009 08:38:43 -0500 Subject: file ->get_unmapped_area() shouldn't duplicate work of get_unmapped_area() ... we should call mm ->get_unmapped_area() instead and let our caller do the final checks. Acked-by: David S. Miller Signed-off-by: Al Viro --- ipc/shm.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 464694e..11bec62 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -290,28 +290,28 @@ static unsigned long shm_get_unmapped_area(struct file *file, unsigned long flags) { struct shm_file_data *sfd = shm_file_data(file); - return get_unmapped_area(sfd->file, addr, len, pgoff, flags); -} - -int is_file_shm_hugepages(struct file *file) -{ - int ret = 0; - - if (file->f_op == &shm_file_operations) { - struct shm_file_data *sfd; - sfd = shm_file_data(file); - ret = is_file_hugepages(sfd->file); - } - return ret; + return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, + pgoff, flags); } static const struct file_operations shm_file_operations = { .mmap = shm_mmap, .fsync = shm_fsync, .release = shm_release, +}; + +static const struct file_operations shm_file_operations_huge = { + .mmap = shm_mmap, + .fsync = shm_fsync, + .release = shm_release, .get_unmapped_area = shm_get_unmapped_area, }; +int is_file_shm_hugepages(struct file *file) +{ + return file->f_op == &shm_file_operations_huge; +} + static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ @@ -889,7 +889,10 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) if (!sfd) goto out_put_dentry; - file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); + file = alloc_file(path.mnt, path.dentry, f_mode, + is_file_hugepages(shp->shm_file) ? + &shm_file_operations_huge : + &shm_file_operations); if (!file) goto out_free; ima_counts_get(file); -- cgit v1.1 From 7d6feeb287c61aafa88f06345387b1188edf4b86 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 15 Dec 2009 16:47:27 -0800 Subject: ipc ns: fix memory leak (idr) We have apparently had a memory leak since 7ca7e564e049d8b350ec9d958ff25eaa24226352 "ipc: store ipcs into IDRs" in 2007. The idr of which 3 exist for each ipc namespace is never freed. This patch simply frees them when the ipcns is freed. I don't believe any idr_remove() are done from rcu (and could therefore be delayed until after this idr_destroy()), so the patch should be safe. Some quick testing showed no harm, and the memory leak fixed. Caught by kmemleak. Signed-off-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msg.c | 1 + ipc/sem.c | 1 + ipc/shm.c | 1 + 3 files changed, 3 insertions(+) (limited to 'ipc') diff --git a/ipc/msg.c b/ipc/msg.c index 085bd58..af42ef8 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -125,6 +125,7 @@ void msg_init_ns(struct ipc_namespace *ns) void msg_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &msg_ids(ns), freeque); + idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); } #endif diff --git a/ipc/sem.c b/ipc/sem.c index 87c2b64..2f2a479 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -129,6 +129,7 @@ void sem_init_ns(struct ipc_namespace *ns) void sem_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &sem_ids(ns), freeary); + idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); } #endif diff --git a/ipc/shm.c b/ipc/shm.c index 11bec62..e9b039f 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -101,6 +101,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) void shm_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &shm_ids(ns), do_shm_rmid); + idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); } #endif -- cgit v1.1 From bf17bb717759d50a2733a7a8157a7c4a25d93abc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 15 Dec 2009 16:47:28 -0800 Subject: ipc/sem.c: sem optimise undo list search Around a month ago, there was some discussion about an improvement of the sysv sem algorithm: Most (at least: some important) users only use simple semaphore operations, therefore it's worthwile to optimize this use case. This patch: Move last looked up sem_undo struct to the head of the task's undo list. Attempt to move common entries to the front of the list so search time is reduced. This reduces lookup_undo on oprofile of problematic SAP workload by 30% (see patch 4 for a description of SAP workload). Signed-off-by: Nick Piggin Signed-off-by: Manfred Spraul Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index 2f2a479..cb0070e 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -962,17 +962,31 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) return 0; } -static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) +static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) { - struct sem_undo *walk; + struct sem_undo *un; - list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) { - if (walk->semid == semid) - return walk; + list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { + if (un->semid == semid) + return un; } return NULL; } +static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) +{ + struct sem_undo *un; + + assert_spin_locked(&ulp->lock); + + un = __lookup_undo(ulp, semid); + if (un) { + list_del_rcu(&un->list_proc); + list_add_rcu(&un->list_proc, &ulp->list_proc); + } + return un; +} + /** * find_alloc_undo - Lookup (and if not present create) undo array * @ns: namespace @@ -1308,7 +1322,7 @@ void exit_sem(struct task_struct *tsk) if (IS_ERR(sma)) continue; - un = lookup_undo(ulp, semid); + un = __lookup_undo(ulp, semid); if (un == NULL) { /* exit_sem raced with IPC_RMID+semget() that created * exactly the same semid. Nothing to do. -- cgit v1.1 From 9cad200c7686708b326520a45dd680a4147568a6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 15 Dec 2009 16:47:29 -0800 Subject: ipc/sem.c: sem use list operations Replace the handcoded list operations in update_queue() with the standard list_for_each_entry macros. list_for_each_entry_safe() must be used, because list entries can disappear immediately uppon the wakeup event. Signed-off-by: Nick Piggin Signed-off-by: Manfred Spraul Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 75 ++++++++++++++++++++++++++------------------------------------- 1 file changed, 31 insertions(+), 44 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index cb0070e..d377b3a 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -403,58 +403,45 @@ undo: */ static void update_queue (struct sem_array * sma) { - int error; - struct sem_queue * q; + struct sem_queue *q, *tq; + +again: + list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { + int error; + int alter; - q = list_entry(sma->sem_pending.next, struct sem_queue, list); - while (&q->list != &sma->sem_pending) { error = try_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); /* Does q->sleeper still need to sleep? */ - if (error <= 0) { - struct sem_queue *n; + if (error > 0) + continue; - /* - * Continue scanning. The next operation - * that must be checked depends on the type of the - * completed operation: - * - if the operation modified the array, then - * restart from the head of the queue and - * check for threads that might be waiting - * for semaphore values to become 0. - * - if the operation didn't modify the array, - * then just continue. - * The order of list_del() and reading ->next - * is crucial: In the former case, the list_del() - * must be done first [because we might be the - * first entry in ->sem_pending], in the latter - * case the list_del() must be done last - * [because the list is invalid after the list_del()] - */ - if (q->alter) { - list_del(&q->list); - n = list_entry(sma->sem_pending.next, - struct sem_queue, list); - } else { - n = list_entry(q->list.next, struct sem_queue, - list); - list_del(&q->list); - } + list_del(&q->list); - /* wake up the waiting thread */ - q->status = IN_WAKEUP; + /* + * The next operation that must be checked depends on the type + * of the completed operation: + * - if the operation modified the array, then restart from the + * head of the queue and check for threads that might be + * waiting for semaphore values to become 0. + * - if the operation didn't modify the array, then just + * continue. + */ + alter = q->alter; + + /* wake up the waiting thread */ + q->status = IN_WAKEUP; - wake_up_process(q->sleeper); - /* hands-off: q will disappear immediately after - * writing q->status. - */ - smp_wmb(); - q->status = error; - q = n; - } else { - q = list_entry(q->list.next, struct sem_queue, list); - } + wake_up_process(q->sleeper); + /* hands-off: q will disappear immediately after + * writing q->status. + */ + smp_wmb(); + q->status = error; + + if (alter) + goto again; } } -- cgit v1.1 From d4212093dca95c1f52197017d969cc66d5d962aa Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 15 Dec 2009 16:47:30 -0800 Subject: ipc/sem.c: sem preempt improve The strange sysv semaphore wakeup scheme has a kind of busy-wait lock involved, which could deadlock if preemption is enabled during the "lock". It is an implementation detail (due to a spinlock being held) that this is actually the case. However if "spinlocks" are made preemptible, or if the sem lock is changed to a sleeping lock for example, then the wakeup would become buggy. So this might be a bugfix for -rt kernels. Imagine waker being preempted by wakee and never clearing IN_WAKEUP -- if wakee has higher RT priority then there is a priority inversion deadlock. Even if there is not a priority inversion to cause a deadlock, then there is still time wasted spinning. Signed-off-by: Nick Piggin Signed-off-by: Manfred Spraul Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index d377b3a..2705fbb 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -398,6 +398,27 @@ undo: return result; } +/* + * Wake up a process waiting on the sem queue with a given error. + * The queue is invalid (may not be accessed) after the function returns. + */ +static void wake_up_sem_queue(struct sem_queue *q, int error) +{ + /* + * Hold preempt off so that we don't get preempted and have the + * wakee busy-wait until we're scheduled back on. We're holding + * locks here so it may not strictly be needed, however if the + * locks become preemptible then this prevents such a problem. + */ + preempt_disable(); + q->status = IN_WAKEUP; + wake_up_process(q->sleeper); + /* hands-off: q can disappear immediately after writing q->status. */ + smp_wmb(); + q->status = error; + preempt_enable(); +} + /* Go through the pending queue for the indicated semaphore * looking for tasks that can be completed. */ @@ -429,17 +450,7 @@ again: * continue. */ alter = q->alter; - - /* wake up the waiting thread */ - q->status = IN_WAKEUP; - - wake_up_process(q->sleeper); - /* hands-off: q will disappear immediately after - * writing q->status. - */ - smp_wmb(); - q->status = error; - + wake_up_sem_queue(q, error); if (alter) goto again; } @@ -523,10 +534,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { list_del(&q->list); - q->status = IN_WAKEUP; - wake_up_process(q->sleeper); /* doesn't sleep */ - smp_wmb(); - q->status = -EIDRM; /* hands-off q */ + wake_up_sem_queue(q, -EIDRM); } /* Remove the semaphore set from the IDR */ -- cgit v1.1 From b6e90822e77cd476c18410f7003197d466e36ac6 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Tue, 15 Dec 2009 16:47:31 -0800 Subject: ipc/sem.c: optimize if semops fail Reduce the amount of scanning of the list of pending semaphore operations: If try_atomic_semop failed, then no changes were applied. Thus no need to restart. Additionally, this patch correct an incorrect comment: It's possible to wait for arbitrary semaphore values (do a dec by , wait-for-zero, inc by in one atomic operation) Both changes are from Nick Piggin, the patch is the result of a different split of the individual changes. Signed-off-by: Manfred Spraul Cc: Nick Piggin Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index 2705fbb..eac3f46 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -445,13 +445,13 @@ again: * of the completed operation: * - if the operation modified the array, then restart from the * head of the queue and check for threads that might be - * waiting for semaphore values to become 0. + * waiting for the new semaphore values. * - if the operation didn't modify the array, then just * continue. */ alter = q->alter; wake_up_sem_queue(q, error); - if (alter) + if (alter && !error) goto again; } } -- cgit v1.1 From b97e820ffffbf49e94ed60c9c26f1a54bccae924 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Tue, 15 Dec 2009 16:47:32 -0800 Subject: ipc/sem.c: add a per-semaphore pending list Based on Nick's findings: sysv sem has the concept of semaphore arrays that consist out of multiple semaphores. Atomic operations that affect multiple semaphores are supported. The patch is the first step for optimizing simple, single semaphore operations: In addition to the global list of all pending operations, a 2nd, per-semaphore list with the simple operations is added. Note: this patch does not make sense by itself, the new list is used nowhere. Signed-off-by: Manfred Spraul Cc: Nick Piggin Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index eac3f46..4252a8e 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -241,6 +241,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) key_t key = params->key; int nsems = params->u.nsems; int semflg = params->flg; + int i; if (!nsems) return -EINVAL; @@ -273,6 +274,11 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) ns->used_sems += nsems; sma->sem_base = (struct sem *) &sma[1]; + + for (i = 0; i < nsems; i++) + INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); + + sma->complex_count = 0; INIT_LIST_HEAD(&sma->sem_pending); INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; @@ -419,6 +425,15 @@ static void wake_up_sem_queue(struct sem_queue *q, int error) preempt_enable(); } +static void unlink_queue(struct sem_array *sma, struct sem_queue *q) +{ + list_del(&q->list); + if (q->nsops == 1) + list_del(&q->simple_list); + else + sma->complex_count--; +} + /* Go through the pending queue for the indicated semaphore * looking for tasks that can be completed. */ @@ -438,7 +453,7 @@ again: if (error > 0) continue; - list_del(&q->list); + unlink_queue(sma, q); /* * The next operation that must be checked depends on the type @@ -532,8 +547,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) /* Wake up all pending processes and let them fail with EIDRM. */ list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { - list_del(&q->list); - + unlink_queue(sma, q); wake_up_sem_queue(q, -EIDRM); } @@ -1191,6 +1205,19 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, else list_add(&queue.list, &sma->sem_pending); + if (nsops == 1) { + struct sem *curr; + curr = &sma->sem_base[sops->sem_num]; + + if (alter) + list_add_tail(&queue.simple_list, &curr->sem_pending); + else + list_add(&queue.simple_list, &curr->sem_pending); + } else { + INIT_LIST_HEAD(&queue.simple_list); + sma->complex_count++; + } + queue.status = -EINTR; queue.sleeper = current; current->state = TASK_INTERRUPTIBLE; @@ -1232,7 +1259,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, */ if (timeout && jiffies_left == 0) error = -EAGAIN; - list_del(&queue.list); + unlink_queue(sma, &queue); out_unlock_free: sem_unlock(sma); @@ -1375,7 +1402,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) struct sem_array *sma = it; return seq_printf(s, - "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", + "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", sma->sem_perm.key, sma->sem_perm.id, sma->sem_perm.mode, -- cgit v1.1 From 636c6be823870d829b37fc96655bb8820a6a9be9 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Tue, 15 Dec 2009 16:47:33 -0800 Subject: ipc/sem.c: optimize single semop operations sysv sem has the concept of semaphore arrays that consist out of multiple semaphores. Atomic operations that affect multiple semaphores are supported. The patch optimizes single semaphore operation calls that affect only one semaphore: It's not necessary to scan all pending operations, it is sufficient to scan the per-semaphore list. The idea is from Nick Piggin version of an ipc sem improvement, the implementation is different: The code tries to keep as much common code as possible. As the result, the patch is simpler, but optimizes fewer cases. Signed-off-by: Manfred Spraul Cc: Nick Piggin Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 51 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 11 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index 4252a8e..7eb6f04 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -434,17 +434,45 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q) sma->complex_count--; } -/* Go through the pending queue for the indicated semaphore - * looking for tasks that can be completed. + +/** + * update_queue(sma, semnum): Look for tasks that can be completed. + * @sma: semaphore array. + * @semnum: semaphore that was modified. + * + * update_queue must be called after a semaphore in a semaphore array + * was modified. If multiple semaphore were modified, then @semnum + * must be set to -1. */ -static void update_queue (struct sem_array * sma) +static void update_queue(struct sem_array *sma, int semnum) { - struct sem_queue *q, *tq; + struct sem_queue *q; + struct list_head *walk; + struct list_head *pending_list; + int offset; + + /* if there are complex operations around, then knowing the semaphore + * that was modified doesn't help us. Assume that multiple semaphores + * were modified. + */ + if (sma->complex_count) + semnum = -1; + + if (semnum == -1) { + pending_list = &sma->sem_pending; + offset = offsetof(struct sem_queue, list); + } else { + pending_list = &sma->sem_base[semnum].sem_pending; + offset = offsetof(struct sem_queue, simple_list); + } again: - list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { - int error; - int alter; + walk = pending_list->next; + while (walk != pending_list) { + int error, alter; + + q = (struct sem_queue *)((char *)walk - offset); + walk = walk->next; error = try_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); @@ -769,7 +797,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, } sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ - update_queue(sma); + update_queue(sma, -1); err = 0; goto out_unlock; } @@ -811,7 +839,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, curr->sempid = task_tgid_vnr(current); sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ - update_queue(sma); + update_queue(sma, semnum); err = 0; goto out_unlock; } @@ -1187,7 +1215,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); if (error <= 0) { if (alter && error == 0) - update_queue (sma); + update_queue(sma, (nsops == 1) ? sops[0].sem_num : -1); + goto out_unlock_free; } @@ -1388,7 +1417,7 @@ void exit_sem(struct task_struct *tsk) } sma->sem_otime = get_seconds(); /* maybe some queued-up processes were waiting for this */ - update_queue(sma); + update_queue(sma, -1); sem_unlock(sma); call_rcu(&un->rcu, free_un); -- cgit v1.1 From d987f8b213f2cdcc52b2ca9ee67161516e4d256a Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Tue, 15 Dec 2009 16:47:34 -0800 Subject: ipc/sem.c: optimize single sops when semval is zero If multiple simple decrements on the same semaphore are pending, then the current code scans all decrement operations, even if the semaphore value is already 0. The patch optimizes that: if the semaphore value is 0, then there is no need to scan the q->alter entries. Note that this is a common case: It happens if 100 decrements by one are pending and now an increment by one increases the semaphore value from 0 to 1. Without this patch, all 100 entries are scanned. With the patch, only one entry is scanned, then woken up. Then the new rule triggers and the scanning is aborted, without looking at the remaining 99 tasks. With this patch, single sop increment/decrement by 1 are now O(1). (same as with Nick's patch) Signed-off-by: Manfred Spraul Cc: Nick Piggin Cc: Pierre Peiffer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index 7eb6f04..82518d6 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -474,6 +474,17 @@ again: q = (struct sem_queue *)((char *)walk - offset); walk = walk->next; + /* If we are scanning the single sop, per-semaphore list of + * one semaphore and that semaphore is 0, then it is not + * necessary to scan the "alter" entries: simple increments + * that affect only one entry succeed immediately and cannot + * be in the per semaphore pending queue, and decrements + * cannot be successful if the value is already 0. + */ + if (semnum != -1 && sma->sem_base[semnum].semval == 0 && + q->alter) + break; + error = try_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); -- cgit v1.1 From e5cc9c7b1a3e7ae4d700d9fce168fb597bcfe9b6 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 15 Dec 2009 16:47:35 -0800 Subject: ipc: remove unreachable code in sem.c This line is unreachable, remove it. [akpm@linux-foundation.org: remove unneeded initialisation of `err'] Signed-off-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index 82518d6..dbef95b 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -624,7 +624,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, static int semctl_nolock(struct ipc_namespace *ns, int semid, int cmd, int version, union semun arg) { - int err = -EINVAL; + int err; struct sem_array *sma; switch(cmd) { @@ -701,7 +701,6 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, default: return -EINVAL; } - return err; out_unlock: sem_unlock(sma); return err; -- cgit v1.1 From 2c48b9c45579a9b5e3e74694eebf3d2451f3dbd3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Aug 2009 00:52:35 +0400 Subject: switch alloc_file() to passing struct path ... and have the caller grab both mnt and dentry; kill leak in infiniband, while we are at it. Signed-off-by: Al Viro --- ipc/shm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 11bec62..16e3923 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -878,8 +878,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) if (err) goto out_unlock; - path.dentry = dget(shp->shm_file->f_path.dentry); - path.mnt = shp->shm_file->f_path.mnt; + path = shp->shm_file->f_path; + path_get(&path); shp->shm_nattch++; size = i_size_read(path.dentry->d_inode); shm_unlock(shp); @@ -889,8 +889,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) if (!sfd) goto out_put_dentry; - file = alloc_file(path.mnt, path.dentry, f_mode, - is_file_hugepages(shp->shm_file) ? + file = alloc_file(&path, f_mode, + is_file_hugepages(shp->shm_file) ? &shm_file_operations_huge : &shm_file_operations); if (!file) @@ -950,7 +950,7 @@ out_unlock: out_free: kfree(sfd); out_put_dentry: - dput(path.dentry); + path_put(&path); goto out_nattch; } -- cgit v1.1 From 0552f879d45cecc35d8e372a591fc5ed863bca58 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Dec 2009 04:53:03 -0500 Subject: Untangling ima mess, part 1: alloc_file() There are 2 groups of alloc_file() callers: * ones that are followed by ima_counts_get * ones giving non-regular files So let's pull that ima_counts_get() into alloc_file(); it's a no-op in case of non-regular files. Signed-off-by: Al Viro --- ipc/shm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 16e3923..02620fa 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -39,7 +39,6 @@ #include #include #include -#include #include @@ -895,7 +894,6 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) &shm_file_operations); if (!file) goto out_free; - ima_counts_get(file); file->private_data = sfd; file->f_mapping = shp->shm_file->f_mapping; -- cgit v1.1 From b65a9cfc2c38eebc33533280b8ad5841caee8b6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Dec 2009 06:27:40 -0500 Subject: Untangling ima mess, part 2: deal with counters * do ima_get_count() in __dentry_open() * stop doing that in followups * move ima_path_check() to right after nameidata_to_filp() * don't bump counters on it Signed-off-by: Al Viro --- ipc/mqueue.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'ipc') diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ee9d697..c79bd57 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include "util.h" @@ -734,7 +733,6 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, error = PTR_ERR(filp); goto out_putfd; } - ima_counts_get(filp); fd_install(fd, filp); goto out_upsem; -- cgit v1.1 From ed5e5894b234ce4793d78078c026915b853e0678 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 15 Jan 2010 17:01:32 -0800 Subject: nommu: fix SYSV SHM for NOMMU Commit c4caa778157dbbf04116f0ac2111e389b5cd7a29 ("file ->get_unmapped_area() shouldn't duplicate work of get_unmapped_area()") broke SYSV SHM for NOMMU by taking away the pointer to shm_get_unmapped_area() from shm_file_operations. Put it back conditionally on CONFIG_MMU=n. file->f_ops->get_unmapped_area() is used to find out the base address for a mapping of a mappable chardev device or mappable memory-based file (such as a ramfs file). It needs to be called prior to file->f_ops->mmap() being called. Signed-off-by: David Howells Acked-by: Al Viro Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 92fe923..23256b8 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -298,6 +298,9 @@ static const struct file_operations shm_file_operations = { .mmap = shm_mmap, .fsync = shm_fsync, .release = shm_release, +#ifndef CONFIG_MMU + .get_unmapped_area = shm_get_unmapped_area, +#endif }; static const struct file_operations shm_file_operations_huge = { -- cgit v1.1