summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2001-09-08 20:02:33 +0000
committerdillon <dillon@FreeBSD.org>2001-09-08 20:02:33 +0000
commitd73b3c59f0f82580650e5b2965bdd3dd4cac7bd5 (patch)
tree5f36b4fb47a0f9f3d486afb708c5f95a6d1f5472 /sys/kern
parentdf61d9eb64550a7afc1b41ccc9e0261af45c98c1 (diff)
downloadFreeBSD-src-d73b3c59f0f82580650e5b2965bdd3dd4cac7bd5.zip
FreeBSD-src-d73b3c59f0f82580650e5b2965bdd3dd4cac7bd5.tar.gz
This brings in a Yahoo coredump patch from Paul, with additional mods by
me (addition of vn_rdwr_inchunks). The problem Yahoo is solving is that if you have large process images core dumping, or you have a large number of forked processes all core dumping at the same time, the original coredump code would leave the vnode locked throughout. This can cause the directory vnode to get locked up, which can cause the parent directory vnode to get locked up, and so on all the way to the root node, locking the entire machine up for extremely long periods of time. This patch solves the problem in two ways. First it uses an advisory non-blocking lock to abort multiple processes trying to core to the same file. Second (my contribution) it chunks up the writes and uses bwillwrite() to avoid holding the vnode locked while blocking in the buffer cache. Submitted by: ps Reviewed by: dillon MFC after: 2 weeks
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/imgact_aout.c8
-rw-r--r--sys/kern/imgact_elf.c9
-rw-r--r--sys/kern/kern_sig.c23
-rw-r--r--sys/kern/vfs_vnops.c39
4 files changed, 67 insertions, 12 deletions
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c
index 9994ad5..9aa8b3d 100644
--- a/sys/kern/imgact_aout.c
+++ b/sys/kern/imgact_aout.c
@@ -264,15 +264,15 @@ aout_coredump(p, vp, limit)
fill_kinfo_proc(p, &p->p_addr->u_kproc);
error = cpu_coredump(p, vp, cred);
if (error == 0)
- error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
+ error = vn_rdwr_inchunks(UIO_WRITE, vp, vm->vm_daddr,
(int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE,
- IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+ IO_UNIT, cred, (int *) NULL, p);
if (error == 0)
- error = vn_rdwr(UIO_WRITE, vp,
+ error = vn_rdwr_inchunks(UIO_WRITE, vp,
(caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)),
round_page(ctob(vm->vm_ssize)),
(off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE,
- IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
+ IO_UNIT, cred, (int *) NULL, p);
return (error);
}
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 6157650..63999d1 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -794,9 +794,10 @@ elf_coredump(p, vp, limit)
php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
offset = hdrsize;
for (i = 0; i < seginfo.count; i++) {
- error = vn_rdwr(UIO_WRITE, vp, (caddr_t)php->p_vaddr,
+ error = vn_rdwr_inchunks(UIO_WRITE, vp,
+ (caddr_t)php->p_vaddr,
php->p_filesz, offset, UIO_USERSPACE,
- IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p);
+ IO_UNIT, cred, (int *)NULL, p);
if (error != 0)
break;
offset += php->p_filesz;
@@ -958,8 +959,8 @@ elf_corehdr(p, vp, cred, numsegs, hdr, hdrsize)
free(tempdata, M_TEMP);
/* Write it to the core file. */
- return vn_rdwr(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
- UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p);
+ return vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
+ UIO_SYSSPACE, IO_UNIT, cred, NULL, p);
}
static void
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index c64af8f..607c78c 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -68,6 +68,7 @@
#include <sys/sysent.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
+#include <sys/unistd.h>
#include <machine/cpu.h>
@@ -1853,6 +1854,7 @@ coredump(p)
{
register struct vnode *vp;
register struct ucred *cred = p->p_ucred;
+ struct flock lf;
struct nameidata nd;
struct vattr vattr;
int error, error1, flags;
@@ -1895,8 +1897,19 @@ restart:
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
+
+ VOP_UNLOCK(vp, 0, p);
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_WRLCK;
+ error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK);
+ if (error)
+ goto out2;
+
if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
- VOP_UNLOCK(vp, 0, p);
+ lf.l_type = F_UNLCK;
+ VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
if ((error = vn_close(vp, FWRITE, cred, p)) != 0)
return (error);
if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
@@ -1908,7 +1921,7 @@ restart:
if (vp->v_type != VREG ||
VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
error = EFAULT;
- goto out;
+ goto out1;
}
VATTR_NULL(&vattr);
vattr.va_size = 0;
@@ -1922,9 +1935,11 @@ restart:
p->p_sysent->sv_coredump(p, vp, limit) :
ENOSYS;
-out:
- VOP_UNLOCK(vp, 0, p);
+out1:
+ lf.l_type = F_UNLCK;
+ VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
vn_finished_write(mp);
+out2:
error1 = vn_close(vp, FWRITE, cred, p);
if (error == 0)
error = error1;
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 1cd6c99..9a6f87a 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -335,6 +335,45 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
}
/*
+ * Package up an I/O request on a vnode into a uio and do it. The I/O
+ * request is split up into smaller chunks and we try to avoid saturating
+ * the buffer cache while potentially holding a vnode locked, so we
+ * check bwillwrite() before calling vn_rdwr()
+ */
+int
+vn_rdwr_inchunks(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
+ enum uio_rw rw;
+ struct vnode *vp;
+ caddr_t base;
+ int len;
+ off_t offset;
+ enum uio_seg segflg;
+ int ioflg;
+ struct ucred *cred;
+ int *aresid;
+ struct proc *p;
+{
+ int error = 0;
+
+ do {
+ int chunk = (len > MAXBSIZE) ? MAXBSIZE : len;
+
+ if (rw != UIO_READ && vp->v_type == VREG)
+ bwillwrite();
+ error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
+ ioflg, cred, aresid, p);
+ len -= chunk; /* aresid calc already includes length */
+ if (error)
+ break;
+ offset += chunk;
+ base += chunk;
+ } while (len);
+ if (aresid)
+ *aresid += len;
+ return (error);
+}
+
+/*
* File table vnode read routine.
*/
static int
OpenPOWER on IntegriCloud