From c022a0acad534fd5f5d5f17280f6d4d135e74e81 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 4 May 2010 18:03:50 +0200 Subject: rlimits: implement prlimit64 syscall This patch adds the code to support the sys_prlimit64 syscall which modifies-and-returns the rlim values of a selected process atomically. The first parameter, pid, being 0 means current process. Unlike the current implementation, it is a generic interface, architecture indepentent so that we needn't handle compat stuff anymore. In the future, after glibc start to use this we can deprecate sys_setrlimit and sys_getrlimit in favor to clean up the code finally. It also adds a possibility of changing limits of other processes. We check the user's permissions to do that and if it succeeds, the new limits are propagated online. This is good for large scale applications such as SAP or databases where administrators need to change limits time by time (e.g. on crashes increase core size). And it is unacceptable to restart the service. For safety, all rlim users now either use accessors or doesn't need them due to - locking - the fact a process was just forked and nobody else knows about it yet (and nobody can't thus read/write limits) hence it is safe to modify limits now. The limitation is that we currently stay at ulong internal representation. So the rlim64_is_infinity check is used where value is compared against ULONG_MAX on 32-bit which is the maximum value there. And since internally the limits are held in struct rlimit, converters which are used before and after do_prlimit call in sys_prlimit64 are introduced. Signed-off-by: Jiri Slaby --- include/linux/syscalls.h | 4 +++ kernel/sys.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7f614ce..a60943b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -35,6 +35,7 @@ struct oldold_utsname; struct old_utsname; struct pollfd; struct rlimit; +struct rlimit64; struct rusage; struct sched_param; struct sel_arg_struct; @@ -644,6 +645,9 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r #endif asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim); +asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, + const struct rlimit64 __user *new_rlim, + struct rlimit64 __user *old_rlim); asmlinkage long sys_getrusage(int who, struct rusage __user *ru); asmlinkage long sys_umask(int mask); diff --git a/kernel/sys.c b/kernel/sys.c index 9da98dd..e9ad444 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1271,6 +1271,39 @@ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, #endif +static inline bool rlim64_is_infinity(__u64 rlim64) +{ +#if BITS_PER_LONG < 64 + return rlim64 >= ULONG_MAX; +#else + return rlim64 == RLIM64_INFINITY; +#endif +} + +static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) +{ + if (rlim->rlim_cur == RLIM_INFINITY) + rlim64->rlim_cur = RLIM64_INFINITY; + else + rlim64->rlim_cur = rlim->rlim_cur; + if (rlim->rlim_max == RLIM_INFINITY) + rlim64->rlim_max = RLIM64_INFINITY; + else + rlim64->rlim_max = rlim->rlim_max; +} + +static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) +{ + if (rlim64_is_infinity(rlim64->rlim_cur)) + rlim->rlim_cur = RLIM_INFINITY; + else + rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; + if (rlim64_is_infinity(rlim64->rlim_max)) + rlim->rlim_max = RLIM_INFINITY; + else + rlim->rlim_max = (unsigned long)rlim64->rlim_max; +} + /* make sure you are allowed to change @tsk limits before calling this */ int do_prlimit(struct task_struct *tsk, unsigned int resource, struct rlimit *new_rlim, struct rlimit *old_rlim) @@ -1336,6 +1369,67 @@ out: return retval; } +/* rcu lock must be held */ +static int check_prlimit_permission(struct task_struct *task) +{ + const struct cred *cred = current_cred(), *tcred; + + tcred = __task_cred(task); + if ((cred->uid != tcred->euid || + cred->uid != tcred->suid || + cred->uid != tcred->uid || + cred->gid != tcred->egid || + cred->gid != tcred->sgid || + cred->gid != tcred->gid) && + !capable(CAP_SYS_RESOURCE)) { + return -EPERM; + } + + return 0; +} + +SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, + const struct rlimit64 __user *, new_rlim, + struct rlimit64 __user *, old_rlim) +{ + struct rlimit64 old64, new64; + struct rlimit old, new; + struct task_struct *tsk; + int ret; + + if (new_rlim) { + if (copy_from_user(&new64, new_rlim, sizeof(new64))) + return -EFAULT; + rlim64_to_rlim(&new64, &new); + } + + rcu_read_lock(); + tsk = pid ? find_task_by_vpid(pid) : current; + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + ret = check_prlimit_permission(tsk); + if (ret) { + rcu_read_unlock(); + return ret; + } + get_task_struct(tsk); + rcu_read_unlock(); + + ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, + old_rlim ? &old : NULL); + + if (!ret && old_rlim) { + rlim_to_rlim64(&old, &old64); + if (copy_to_user(old_rlim, &old64, sizeof(old64))) + ret = -EFAULT; + } + + put_task_struct(tsk); + return ret; +} + SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit new_rlim; -- cgit v1.1