From b667f507a0d1bcaa8341f4a46d61715783cde521 Mon Sep 17 00:00:00 2001 From: jhb Date: Mon, 14 May 2007 22:40:04 +0000 Subject: Rework the support for ABIs to override resource limits (used by 32-bit processes under 64-bit kernels). Previously, each 32-bit process overwrote its resource limits at exec() time. The problem with this approach is that the new limits affect all child processes of the 32-bit process, including if the child process forks and execs a 64-bit process. To fix this, don't ovewrite the resource limits during exec(). Instead, sv_fixlimits() is now replaced with a different function sv_fixlimit() which asks the ABI to sanitize a single resource limit. We then use this when querying and setting resource limits. Thus, if a 32-bit process sets a limit, then that new limit will be inherited by future children. However, if the 32-bit process doesn't change a limit, then a future 64-bit child will see the "full" 64-bit limit rather than the 32-bit limit. MFC is tentative since it will break the ABI of old linux.ko modules (no other modules are affected). MFC after: 1 week --- sys/amd64/linux32/linux32_sysvec.c | 62 +++++++++++++++++--------------------- sys/compat/ia32/ia32_sysvec.c | 60 ++++++++++++++++++------------------ sys/kern/imgact_elf.c | 2 +- sys/kern/kern_exec.c | 9 ------ sys/kern/kern_resource.c | 10 +++--- sys/sys/sysent.h | 4 +-- 6 files changed, 64 insertions(+), 83 deletions(-) (limited to 'sys') diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c index d3c4f2e..89fa72a 100644 --- a/sys/amd64/linux32/linux32_sysvec.c +++ b/sys/amd64/linux32/linux32_sysvec.c @@ -121,7 +121,7 @@ static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void exec_linux_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings); -static void linux32_fixlimits(struct proc *p); +static void linux32_fixlimit(struct rlimit *rl, int which); extern LIST_HEAD(futex_list, futex) futex_list; extern struct sx futex_sx; @@ -965,42 +965,36 @@ static u_long linux32_maxvmem = LINUX32_MAXVMEM; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, &linux32_maxvmem, 0, ""); -/* - * XXX copied from ia32_sysvec.c. - */ static void -linux32_fixlimits(struct proc *p) +linux32_fixlimit(struct rlimit *rl, int which) { - struct plimit *oldlim, *newlim; - if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 && - linux32_maxvmem == 0) - return; - newlim = lim_alloc(); - PROC_LOCK(p); - oldlim = p->p_limit; - lim_copy(newlim, oldlim); - if (linux32_maxdsiz != 0) { - if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz) - newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz; - if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz) - newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz; - } - if (linux32_maxssiz != 0) { - if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz) - newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz; - if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz) - newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz; - } - if (linux32_maxvmem != 0) { - if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem) - newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem; - if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem) - newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem; + switch (which) { + case RLIMIT_DATA: + if (linux32_maxdsiz != 0) { + if (rl->rlim_cur > linux32_maxdsiz) + rl->rlim_cur = linux32_maxdsiz; + if (rl->rlim_max > linux32_maxdsiz) + rl->rlim_max = linux32_maxdsiz; + } + break; + case RLIMIT_STACK: + if (linux32_maxssiz != 0) { + if (rl->rlim_cur > linux32_maxssiz) + rl->rlim_cur = linux32_maxssiz; + if (rl->rlim_max > linux32_maxssiz) + rl->rlim_max = linux32_maxssiz; + } + break; + case RLIMIT_VMEM: + if (linux32_maxvmem != 0) { + if (rl->rlim_cur > linux32_maxvmem) + rl->rlim_cur = linux32_maxvmem; + if (rl->rlim_max > linux32_maxvmem) + rl->rlim_max = linux32_maxvmem; + } + break; } - p->p_limit = newlim; - PROC_UNLOCK(p); - lim_free(oldlim); } struct sysentvec elf_linux_sysvec = { @@ -1029,7 +1023,7 @@ struct sysentvec elf_linux_sysvec = { VM_PROT_ALL, linux_copyout_strings, exec_linux_setregs, - linux32_fixlimits + linux32_fixlimit }; static Elf32_Brandinfo linux_brand = { diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c index b31400c..436fda3 100644 --- a/sys/compat/ia32/ia32_sysvec.c +++ b/sys/compat/ia32/ia32_sysvec.c @@ -94,7 +94,7 @@ CTASSERT(sizeof(struct ia32_sigframe4) == 408); #endif static register_t *ia32_copyout_strings(struct image_params *imgp); -static void ia32_fixlimits(struct proc *p); +static void ia32_fixlimit(struct rlimit *rl, int which); extern struct sysent freebsd32_sysent[]; @@ -126,7 +126,7 @@ struct sysentvec ia32_freebsd_sysvec = { VM_PROT_ALL, ia32_copyout_strings, ia32_setregs, - ia32_fixlimits + ia32_fixlimit }; @@ -281,35 +281,33 @@ static u_long ia32_maxvmem = IA32_MAXVMEM; SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxvmem, CTLFLAG_RW, &ia32_maxvmem, 0, ""); static void -ia32_fixlimits(struct proc *p) +ia32_fixlimit(struct rlimit *rl, int which) { - struct plimit *oldlim, *newlim; - - if (ia32_maxdsiz == 0 && ia32_maxssiz == 0 && ia32_maxvmem == 0) - return; - newlim = lim_alloc(); - PROC_LOCK(p); - oldlim = p->p_limit; - lim_copy(newlim, oldlim); - if (ia32_maxdsiz != 0) { - if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > ia32_maxdsiz) - newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = ia32_maxdsiz; - if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > ia32_maxdsiz) - newlim->pl_rlimit[RLIMIT_DATA].rlim_max = ia32_maxdsiz; - } - if (ia32_maxssiz != 0) { - if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > ia32_maxssiz) - newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = ia32_maxssiz; - if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > ia32_maxssiz) - newlim->pl_rlimit[RLIMIT_STACK].rlim_max = ia32_maxssiz; - } - if (ia32_maxvmem != 0) { - if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > ia32_maxvmem) - newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = ia32_maxvmem; - if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > ia32_maxvmem) - newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = ia32_maxvmem; + + switch (which) { + case RLIMIT_DATA: + if (ia32_maxdsiz != 0) { + if (rl->rlim_cur > ia32_maxdsiz) + rl->rlim_cur = ia32_maxdsiz; + if (rl->rlim_max > ia32_maxdsiz) + rl->rlim_max = ia32_maxdsiz; + } + break; + case RLIMIT_STACK: + if (ia32_maxssiz != 0) { + if (rl->rlim_cur > ia32_maxssiz) + rl->rlim_cur = ia32_maxssiz; + if (rl->rlim_max > ia32_maxssiz) + rl->rlim_max = ia32_maxssiz; + } + break; + case RLIMIT_VMEM: + if (ia32_maxvmem != 0) { + if (rl->rlim_cur > ia32_maxvmem) + rl->rlim_cur = ia32_maxvmem; + if (rl->rlim_max > ia32_maxvmem) + rl->rlim_max = ia32_maxvmem; + } + break; } - p->p_limit = newlim; - PROC_UNLOCK(p); - lim_free(oldlim); } diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 09d7d5f..b992f2b 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -667,6 +667,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) VOP_UNLOCK(imgp->vp, 0, td); exec_new_vmspace(imgp, sv); + imgp->proc->p_sysent = sv; vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY, td); @@ -784,7 +785,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) imgp->entry_addr = entry; - imgp->proc->p_sysent = sv; if (interp != NULL) { VOP_UNLOCK(imgp->vp, 0, td); if (brand_info->emul_path != NULL && diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index bbe12bf..65ee108b 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -898,15 +898,6 @@ exec_new_vmspace(imgp, sv) EVENTHANDLER_INVOKE(process_exec, p, imgp); /* - * Here is as good a place as any to do any resource limit cleanups. - * This is needed if a 64 bit binary exec's a 32 bit binary - the - * data size limit may need to be changed to a value that makes - * sense for the 32 bit binary. - */ - if (sv->sv_fixlimits != NULL) - sv->sv_fixlimits(p); - - /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 32c27ed..647cee8 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -701,6 +701,8 @@ kern_setrlimit(td, which, limp) limp->rlim_max = 1; break; } + if (td->td_proc->p_sysent->sv_fixlimit != NULL) + td->td_proc->p_sysent->sv_fixlimit(limp, which); *alimp = *limp; p->p_limit = newlim; PROC_UNLOCK(p); @@ -734,12 +736,6 @@ kern_setrlimit(td, which, limp) } } - /* - * The data size limit may need to be changed to a value - * that makes sense for the 32 bit binary. - */ - if (p->p_sysent->sv_fixlimits != NULL) - p->p_sysent->sv_fixlimits(p); return (0); } @@ -1062,6 +1058,8 @@ lim_rlimit(struct proc *p, int which, struct rlimit *rlp) KASSERT(which >= 0 && which < RLIM_NLIMITS, ("request for invalid resource limit")); *rlp = p->p_limit->pl_rlimit[which]; + if (p->p_sysent->sv_fixlimit != NULL) + p->p_sysent->sv_fixlimit(rlp, which); } /* diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index edf1717..ba8fc4b 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -34,7 +34,7 @@ #include -struct proc; +struct rlimit; struct sysent; struct thread; struct ksiginfo; @@ -98,7 +98,7 @@ struct sysentvec { int sv_stackprot; /* vm protection for stack */ register_t *(*sv_copyout_strings)(struct image_params *); void (*sv_setregs)(struct thread *, u_long, u_long, u_long); - void (*sv_fixlimits)(struct proc *); + void (*sv_fixlimit)(struct rlimit *, int); }; #ifdef _KERNEL -- cgit v1.1