summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/conf/NOTES9
-rw-r--r--sys/conf/options1
-rw-r--r--sys/kern/kern_ktrace.c673
-rw-r--r--sys/sys/ktrace.h28
4 files changed, 472 insertions, 239 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 9b945a5..2cada40 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -216,9 +216,16 @@ options DDB_UNATTENDED
options GDB_REMOTE_CHAT
#
-# KTRACE enables the system-call tracing facility ktrace(2).
+# KTRACE enables the system-call tracing facility ktrace(2). To be more
+# SMP-friendly, KTRACE uses a worker thread to process most trace events
+# asynchronously to the thread generating the event. This requires a
+# pre-allocated store of objects representing trace events. The
+# KTRACE_REQUEST_POOL option specifies the initial size of this store.
+# The size of the pool can be adjusted both at boottime and runtime via
+# the kern.ktrace_request_pool tunable and sysctl.
#
options KTRACE #kernel tracing
+options KTRACE_REQUEST_POOL=101
#
# KTR is a kernel tracing mechanism imported from BSD/OS. Currently it
diff --git a/sys/conf/options b/sys/conf/options
index cc09da6..b339496 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -77,6 +77,7 @@ GDB_REMOTE_CHAT opt_ddb.h
NODEVFS opt_devfs.h
HW_WDOG
KTRACE
+KTRACE_REQUEST_POOL opt_ktrace.h # Size of ktrace request pool
LIBICONV
MD_ROOT opt_md.h
MD_ROOT_SIZE opt_md.h
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index 92560fc..b71f695 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -38,232 +38,420 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/mutex.h>
-#include <sys/sysproto.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/fcntl.h>
+#include <sys/malloc.h>
#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/unistd.h>
#include <sys/vnode.h>
#include <sys/ktrace.h>
-#include <sys/malloc.h>
+#include <sys/sema.h>
#include <sys/sx.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <sys/jail.h>
+#include <sys/sysproto.h>
static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
#ifdef KTRACE
-static struct ktr_header *ktrgetheader(int type);
-static void ktrwrite(struct vnode *, struct ktr_header *, struct uio *);
-static int ktrcanset(struct thread *, struct proc *);
-static int ktrsetchildren(struct thread *, struct proc *, int, int, struct vnode *);
-static int ktrops(struct thread *, struct proc *, int, int, struct vnode *);
+#ifndef KTRACE_REQUEST_POOL
+#define KTRACE_REQUEST_POOL 100
+#endif
+
+struct ktr_request {
+ struct ktr_header ktr_header;
+ struct ucred *ktr_cred;
+ struct vnode *ktr_vp;
+ union {
+ struct ktr_syscall ktr_syscall;
+ struct ktr_sysret ktr_sysret;
+ struct ktr_genio ktr_genio;
+ struct ktr_psig ktr_psig;
+ struct ktr_csw ktr_csw;
+ } ktr_data;
+ int ktr_synchronous;
+ STAILQ_ENTRY(ktr_request) ktr_list;
+};
+
+static int data_lengths[] = {
+ 0, /* none */
+ offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */
+ sizeof(struct ktr_sysret), /* KTR_SYSRET */
+ 0, /* KTR_NAMEI */
+ sizeof(struct ktr_genio), /* KTR_GENIO */
+ sizeof(struct ktr_psig), /* KTR_PSIG */
+ sizeof(struct ktr_csw), /* KTR_CSW */
+ 0 /* KTR_USER */
+};
+
+static STAILQ_HEAD(, ktr_request) ktr_todo;
+static STAILQ_HEAD(, ktr_request) ktr_free;
+
+static uint ktr_requestpool = KTRACE_REQUEST_POOL;
+TUNABLE_INT("kern.ktrace_request_pool", &ktr_requestpool);
+
+static int print_message = 1;
+struct mtx ktrace_mtx;
+static struct sema ktrace_sema;
+
+static void ktrace_init(void *dummy);
+static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
+static uint ktrace_resize_pool(uint newsize);
+static struct ktr_request *ktr_getrequest(int type);
+static void ktr_submitrequest(struct ktr_request *req);
+static void ktr_freerequest(struct ktr_request *req);
+static void ktr_loop(void *dummy);
+static void ktr_writerequest(struct ktr_request *req);
+static int ktrcanset(struct thread *,struct proc *);
+static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
+static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
+
+static void
+ktrace_init(void *dummy)
+{
+ struct ktr_request *req;
+ int i;
+
+ mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
+ sema_init(&ktrace_sema, 0, "ktrace");
+ STAILQ_INIT(&ktr_todo);
+ STAILQ_INIT(&ktr_free);
+ for (i = 0; i < ktr_requestpool; i++) {
+ req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
+ STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
+ }
+ kthread_create(ktr_loop, NULL, NULL, RFHIGHPID, "ktrace");
+}
+SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
+
+static int
+sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
+{
+ struct thread *td;
+ uint newsize, oldsize, wantsize;
+ int error;
+
+ /* Handle easy read-only case first to avoid warnings from GCC. */
+ if (!req->newptr) {
+ mtx_lock(&ktrace_mtx);
+ oldsize = ktr_requestpool;
+ mtx_unlock(&ktrace_mtx);
+ return (SYSCTL_OUT(req, &oldsize, sizeof(uint)));
+ }
+
+ error = SYSCTL_IN(req, &wantsize, sizeof(uint));
+ if (error)
+ return (error);
+ td = curthread;
+ td->td_inktrace = 1;
+ mtx_lock(&ktrace_mtx);
+ oldsize = ktr_requestpool;
+ newsize = ktrace_resize_pool(wantsize);
+ mtx_unlock(&ktrace_mtx);
+ td->td_inktrace = 0;
+ error = SYSCTL_OUT(req, &oldsize, sizeof(uint));
+ if (error)
+ return (error);
+ if (newsize != wantsize)
+ return (ENOSPC);
+ return (0);
+}
+SYSCTL_PROC(_kern, OID_AUTO, ktrace_request_pool, CTLTYPE_UINT|CTLFLAG_RW,
+ &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "");
+
+static uint
+ktrace_resize_pool(uint newsize)
+{
+ struct ktr_request *req;
+
+ mtx_assert(&ktrace_mtx, MA_OWNED);
+ print_message = 1;
+ if (newsize == ktr_requestpool)
+ return (newsize);
+ if (newsize < ktr_requestpool)
+ /* Shrink pool down to newsize if possible. */
+ while (ktr_requestpool > newsize) {
+ req = STAILQ_FIRST(&ktr_free);
+ if (req == NULL)
+ return (ktr_requestpool);
+ STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
+ ktr_requestpool--;
+ mtx_unlock(&ktrace_mtx);
+ free(req, M_KTRACE);
+ mtx_lock(&ktrace_mtx);
+ }
+ else
+ /* Grow pool up to newsize. */
+ while (ktr_requestpool < newsize) {
+ mtx_unlock(&ktrace_mtx);
+ req = malloc(sizeof(struct ktr_request), M_KTRACE,
+ M_WAITOK);
+ mtx_lock(&ktrace_mtx);
+ STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
+ ktr_requestpool++;
+ }
+ return (ktr_requestpool);
+}
+
+static struct ktr_request *
+ktr_getrequest(int type)
+{
+ struct ktr_request *req;
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ int pm;
+
+ td->td_inktrace = 1;
+ mtx_lock(&ktrace_mtx);
+ if (!KTRCHECK(td, type)) {
+ mtx_unlock(&ktrace_mtx);
+ td->td_inktrace = 0;
+ return (NULL);
+ }
+ req = STAILQ_FIRST(&ktr_free);
+ if (req != NULL) {
+ STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
+ req->ktr_header.ktr_type = type;
+ KASSERT(p->p_tracep != NULL, ("ktrace: no trace vnode"));
+ req->ktr_vp = p->p_tracep;
+ VREF(p->p_tracep);
+ mtx_unlock(&ktrace_mtx);
+ microtime(&req->ktr_header.ktr_time);
+ req->ktr_header.ktr_pid = p->p_pid;
+ bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1);
+ req->ktr_cred = crhold(td->td_ucred);
+ req->ktr_header.ktr_buffer = NULL;
+ req->ktr_header.ktr_len = 0;
+ req->ktr_synchronous = 0;
+ } else {
+ pm = print_message;
+ print_message = 0;
+ mtx_unlock(&ktrace_mtx);
+ if (pm)
+ printf("Out of ktrace request objects.\n");
+ td->td_inktrace = 0;
+ }
+ return (req);
+}
-static struct ktr_header *
-ktrgetheader(type)
- int type;
+static void
+ktr_submitrequest(struct ktr_request *req)
{
- register struct ktr_header *kth;
- struct proc *p = curproc; /* XXX */
-
- MALLOC(kth, struct ktr_header *, sizeof (struct ktr_header),
- M_KTRACE, M_WAITOK);
- kth->ktr_type = type;
- microtime(&kth->ktr_time);
- kth->ktr_pid = p->p_pid;
- bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN + 1);
- return (kth);
+
+ mtx_lock(&ktrace_mtx);
+ STAILQ_INSERT_TAIL(&ktr_todo, req, ktr_list);
+ sema_post(&ktrace_sema);
+ if (req->ktr_synchronous) {
+ /*
+ * For a synchronous request, we wait for the ktrace thread
+ * to get to our item in the todo list and wake us up. Then
+ * we write the request out ourselves and wake the ktrace
+ * thread back up.
+ */
+ msleep(req, &ktrace_mtx, curthread->td_priority, "ktrsync", 0);
+ mtx_unlock(&ktrace_mtx);
+ ktr_writerequest(req);
+ mtx_lock(&ktrace_mtx);
+ wakeup(req);
+ }
+ mtx_unlock(&ktrace_mtx);
+ curthread->td_inktrace = 0;
+}
+
+static void
+ktr_freerequest(struct ktr_request *req)
+{
+
+ crfree(req->ktr_cred);
+ mtx_lock(&Giant);
+ vrele(req->ktr_vp);
+ mtx_unlock(&Giant);
+ mtx_lock(&ktrace_mtx);
+ STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
+ mtx_unlock(&ktrace_mtx);
+}
+
+static void
+ktr_loop(void *dummy)
+{
+ struct ktr_request *req;
+ struct thread *td;
+ struct ucred *cred;
+
+ /* Only cache these values once. */
+ td = curthread;
+ cred = td->td_ucred;
+ for (;;) {
+ sema_wait(&ktrace_sema);
+ mtx_lock(&ktrace_mtx);
+ req = STAILQ_FIRST(&ktr_todo);
+ STAILQ_REMOVE_HEAD(&ktr_todo, ktr_list);
+ KASSERT(req != NULL, ("got a NULL request"));
+ if (req->ktr_synchronous) {
+ wakeup(req);
+ msleep(req, &ktrace_mtx, curthread->td_priority,
+ "ktrwait", 0);
+ mtx_unlock(&ktrace_mtx);
+ } else {
+ mtx_unlock(&ktrace_mtx);
+ /*
+ * It is not enough just to pass the cached cred
+ * to the VOP's in ktr_writerequest(). Some VFS
+ * operations use curthread->td_ucred, so we need
+ * to modify our thread's credentials as well.
+ * Evil.
+ */
+ td->td_ucred = req->ktr_cred;
+ ktr_writerequest(req);
+ td->td_ucred = cred;
+ }
+ ktr_freerequest(req);
+ }
}
/*
* MPSAFE
*/
void
-ktrsyscall(vp, code, narg, args)
- struct vnode *vp;
+ktrsyscall(code, narg, args)
int code, narg;
register_t args[];
{
- struct ktr_header *kth;
- struct ktr_syscall *ktp;
- register int len = offsetof(struct ktr_syscall, ktr_args) +
- (narg * sizeof(register_t));
- struct proc *p = curproc; /* XXX */
- register_t *argp;
- int i;
+ struct ktr_request *req;
+ struct ktr_syscall *ktp;
+ size_t buflen;
- mtx_lock(&Giant);
- p->p_traceflag |= KTRFAC_ACTIVE;
- kth = ktrgetheader(KTR_SYSCALL);
- MALLOC(ktp, struct ktr_syscall *, len, M_KTRACE, M_WAITOK);
+ req = ktr_getrequest(KTR_SYSCALL);
+ if (req == NULL)
+ return;
+ ktp = &req->ktr_data.ktr_syscall;
ktp->ktr_code = code;
ktp->ktr_narg = narg;
- argp = &ktp->ktr_args[0];
- for (i = 0; i < narg; i++)
- *argp++ = args[i];
- kth->ktr_buffer = (caddr_t)ktp;
- kth->ktr_len = len;
- ktrwrite(vp, kth, NULL);
- FREE(ktp, M_KTRACE);
- FREE(kth, M_KTRACE);
- p->p_traceflag &= ~KTRFAC_ACTIVE;
- mtx_unlock(&Giant);
+ buflen = sizeof(register_t) * narg;
+ if (buflen > 0) {
+ req->ktr_header.ktr_buffer = malloc(buflen, M_KTRACE, M_WAITOK);
+ bcopy(args, req->ktr_header.ktr_buffer, buflen);
+ req->ktr_header.ktr_len = buflen;
+ }
+ ktr_submitrequest(req);
}
/*
* MPSAFE
*/
void
-ktrsysret(vp, code, error, retval)
- struct vnode *vp;
+ktrsysret(code, error, retval)
int code, error;
register_t retval;
{
- struct ktr_header *kth;
- struct ktr_sysret ktp;
- struct proc *p = curproc; /* XXX */
+ struct ktr_request *req;
+ struct ktr_sysret *ktp;
- mtx_lock(&Giant);
- p->p_traceflag |= KTRFAC_ACTIVE;
- kth = ktrgetheader(KTR_SYSRET);
- ktp.ktr_code = code;
- ktp.ktr_error = error;
- ktp.ktr_retval = retval; /* what about val2 ? */
-
- kth->ktr_buffer = (caddr_t)&ktp;
- kth->ktr_len = sizeof(struct ktr_sysret);
-
- ktrwrite(vp, kth, NULL);
- FREE(kth, M_KTRACE);
- p->p_traceflag &= ~KTRFAC_ACTIVE;
- mtx_unlock(&Giant);
+ req = ktr_getrequest(KTR_SYSRET);
+ if (req == NULL)
+ return;
+ ktp = &req->ktr_data.ktr_sysret;
+ ktp->ktr_code = code;
+ ktp->ktr_error = error;
+ ktp->ktr_retval = retval; /* what about val2 ? */
+ ktr_submitrequest(req);
}
void
-ktrnamei(vp, path)
- struct vnode *vp;
+ktrnamei(path)
char *path;
{
- struct ktr_header *kth;
- struct proc *p = curproc; /* XXX */
+ struct ktr_request *req;
+ int namelen;
- /*
- * don't let p_tracep get ripped out from under us
- */
- if (vp)
- VREF(vp);
- p->p_traceflag |= KTRFAC_ACTIVE;
- kth = ktrgetheader(KTR_NAMEI);
- kth->ktr_len = strlen(path);
- kth->ktr_buffer = path;
-
- ktrwrite(vp, kth, NULL);
- if (vp)
- vrele(vp);
- FREE(kth, M_KTRACE);
- p->p_traceflag &= ~KTRFAC_ACTIVE;
+ req = ktr_getrequest(KTR_NAMEI);
+ if (req == NULL)
+ return;
+ namelen = strlen(path);
+ if (namelen > 0) {
+ req->ktr_header.ktr_len = namelen;
+ req->ktr_header.ktr_buffer = malloc(namelen, M_KTRACE,
+ M_WAITOK);
+ bcopy(path, req->ktr_header.ktr_buffer, namelen);
+ }
+ ktr_submitrequest(req);
}
+/*
+ * Since the uio may not stay valid, we can not hand off this request to
+ * the thread and need to process it synchronously. However, we wish to
+ * keep the relative order of records in a trace file correct, so we
+ * do put this request on the queue (if it isn't empty) and then block.
+ * The ktrace thread waks us back up when it is time for this event to
+ * be posted and blocks until we have completed writing out the event
+ * and woken it back up.
+ */
void
-ktrgenio(vp, fd, rw, uio, error)
- struct vnode *vp;
+ktrgenio(fd, rw, uio, error)
int fd;
enum uio_rw rw;
struct uio *uio;
int error;
{
- struct ktr_header *kth;
- struct ktr_genio ktg;
- struct proc *p = curproc; /* XXX */
+ struct ktr_request *req;
+ struct ktr_genio *ktg;
if (error)
return;
-
- mtx_lock(&Giant);
- /*
- * don't let p_tracep get ripped out from under us
- */
- if (vp)
- VREF(vp);
- p->p_traceflag |= KTRFAC_ACTIVE;
- kth = ktrgetheader(KTR_GENIO);
- ktg.ktr_fd = fd;
- ktg.ktr_rw = rw;
- kth->ktr_buffer = (caddr_t)&ktg;
- kth->ktr_len = sizeof(struct ktr_genio);
+ req = ktr_getrequest(KTR_GENIO);
+ if (req == NULL)
+ return;
+ ktg = &req->ktr_data.ktr_genio;
+ ktg->ktr_fd = fd;
+ ktg->ktr_rw = rw;
+ req->ktr_header.ktr_buffer = uio;
uio->uio_offset = 0;
uio->uio_rw = UIO_WRITE;
-
- ktrwrite(vp, kth, uio);
- if (vp)
- vrele(vp);
- FREE(kth, M_KTRACE);
- p->p_traceflag &= ~KTRFAC_ACTIVE;
- mtx_unlock(&Giant);
+ req->ktr_synchronous = 1;
+ ktr_submitrequest(req);
}
void
-ktrpsig(vp, sig, action, mask, code)
- struct vnode *vp;
+ktrpsig(sig, action, mask, code)
int sig;
sig_t action;
sigset_t *mask;
int code;
{
- struct ktr_header *kth;
- struct ktr_psig kp;
- struct proc *p = curproc; /* XXX */
+ struct ktr_request *req;
+ struct ktr_psig *kp;
- /*
- * don't let vp get ripped out from under us
- */
- if (vp)
- VREF(vp);
- p->p_traceflag |= KTRFAC_ACTIVE;
- kth = ktrgetheader(KTR_PSIG);
- kp.signo = (char)sig;
- kp.action = action;
- kp.mask = *mask;
- kp.code = code;
- kth->ktr_buffer = (caddr_t)&kp;
- kth->ktr_len = sizeof (struct ktr_psig);
-
- ktrwrite(vp, kth, NULL);
- if (vp)
- vrele(vp);
- FREE(kth, M_KTRACE);
- p->p_traceflag &= ~KTRFAC_ACTIVE;
+ req = ktr_getrequest(KTR_PSIG);
+ if (req == NULL)
+ return;
+ kp = &req->ktr_data.ktr_psig;
+ kp->signo = (char)sig;
+ kp->action = action;
+ kp->mask = *mask;
+ kp->code = code;
+ ktr_submitrequest(req);
}
void
-ktrcsw(vp, out, user)
- struct vnode *vp;
+ktrcsw(out, user)
int out, user;
{
- struct ktr_header *kth;
- struct ktr_csw kc;
- struct proc *p = curproc; /* XXX */
+ struct ktr_request *req;
+ struct ktr_csw *kc;
- /*
- * don't let vp get ripped out from under us
- */
- if (vp)
- VREF(vp);
- p->p_traceflag |= KTRFAC_ACTIVE;
- kth = ktrgetheader(KTR_CSW);
- kc.out = out;
- kc.user = user;
- kth->ktr_buffer = (caddr_t)&kc;
- kth->ktr_len = sizeof (struct ktr_csw);
-
- ktrwrite(vp, kth, NULL);
- if (vp)
- vrele(vp);
- FREE(kth, M_KTRACE);
- p->p_traceflag &= ~KTRFAC_ACTIVE;
+ req = ktr_getrequest(KTR_CSW);
+ if (req == NULL)
+ return;
+ kc = &req->ktr_data.ktr_csw;
+ kc->out = out;
+ kc->user = user;
+ ktr_submitrequest(req);
}
#endif
@@ -287,7 +475,6 @@ ktrace(td, uap)
register struct ktrace_args *uap;
{
#ifdef KTRACE
- struct proc *curp = td->td_proc;
register struct vnode *vp = NULL;
register struct proc *p;
struct pgrp *pg;
@@ -298,7 +485,7 @@ ktrace(td, uap)
int flags, error = 0;
struct nameidata nd;
- curp->p_traceflag |= KTRFAC_ACTIVE;
+ td->td_inktrace = 1;
if (ops != KTROP_CLEAR) {
/*
* an operation which requires a file argument.
@@ -307,7 +494,7 @@ ktrace(td, uap)
flags = FREAD | FWRITE | O_NOFOLLOW;
error = vn_open(&nd, &flags, 0);
if (error) {
- curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ td->td_inktrace = 0;
return (error);
}
NDFREE(&nd, NDF_ONLY_PNBUF);
@@ -315,7 +502,7 @@ ktrace(td, uap)
VOP_UNLOCK(vp, 0, td);
if (vp->v_type != VREG) {
(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
- curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ td->td_inktrace = 0;
return (EACCES);
}
}
@@ -327,9 +514,11 @@ ktrace(td, uap)
LIST_FOREACH(p, &allproc, p_list) {
PROC_LOCK(p);
if (p->p_tracep == vp) {
- if (ktrcanset(td, p) && p->p_tracep == vp) {
+ if (ktrcanset(td, p)) {
+ mtx_lock(&ktrace_mtx);
p->p_tracep = NULL;
p->p_traceflag = 0;
+ mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
(void) vn_close(vp, FREAD|FWRITE,
td->td_ucred, td);
@@ -396,7 +585,7 @@ ktrace(td, uap)
done:
if (vp != NULL)
(void) vn_close(vp, FWRITE, td->td_ucred, td);
- curp->p_traceflag &= ~KTRFAC_ACTIVE;
+ td->td_inktrace = 0;
return (error);
#else
return ENOSYS;
@@ -414,31 +603,23 @@ utrace(td, uap)
{
#ifdef KTRACE
- struct ktr_header *kth;
- struct proc *p = curproc; /* XXX */
- struct vnode *vp;
+ struct ktr_request *req;
register caddr_t cp;
- if (!KTRPOINT(p, KTR_USER))
- return (0);
if (uap->len > KTR_USER_MAXLEN)
return (EINVAL);
- p->p_traceflag |= KTRFAC_ACTIVE;
- if ((vp = p->p_tracep) != NULL)
- VREF(vp);
- kth = ktrgetheader(KTR_USER);
+ req = ktr_getrequest(KTR_USER);
+ if (req == NULL)
+ return (0);
MALLOC(cp, caddr_t, uap->len, M_KTRACE, M_WAITOK);
if (!copyin(uap->addr, cp, uap->len)) {
- kth->ktr_buffer = cp;
- kth->ktr_len = uap->len;
- ktrwrite(vp, kth, NULL);
+ req->ktr_header.ktr_buffer = cp;
+ req->ktr_header.ktr_len = uap->len;
+ ktr_submitrequest(req);
+ } else {
+ ktr_freerequest(req);
+ td->td_inktrace = 0;
}
- if (vp)
- vrele(vp);
- FREE(kth, M_KTRACE);
- FREE(cp, M_KTRACE);
- p->p_traceflag &= ~KTRFAC_ACTIVE;
-
return (0);
#else
return (ENOSYS);
@@ -453,23 +634,22 @@ ktrops(td, p, ops, facs, vp)
int ops, facs;
struct vnode *vp;
{
- struct vnode *vtmp = NULL, *newvp = NULL;
+ struct vnode *tracevp = NULL;
PROC_LOCK(p);
if (!ktrcanset(td, p)) {
PROC_UNLOCK(p);
return (0);
}
+ mtx_lock(&ktrace_mtx);
if (ops == KTROP_SET) {
if (p->p_tracep != vp) {
- struct vnode *vtmp;
-
/*
* if trace file already in use, relinquish below
*/
- newvp = vp;
- vtmp = p->p_tracep;
- p->p_tracep = NULL;
+ tracevp = p->p_tracep;
+ VREF(vp);
+ p->p_tracep = vp;
}
p->p_traceflag |= facs;
if (td->td_ucred->cr_uid == 0)
@@ -477,33 +657,17 @@ ktrops(td, p, ops, facs, vp)
} else {
/* KTROP_CLEAR */
if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
- struct vnode *vtmp;
-
/* no more tracing */
p->p_traceflag = 0;
- vtmp = p->p_tracep;
+ tracevp = p->p_tracep;
p->p_tracep = NULL;
}
}
+ mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
+ if (tracevp != NULL)
+ vrele(tracevp);
- /* Release old trace file if requested. */
- if (vtmp != NULL)
- vrele(vtmp);
-
- /* Setup new trace file if requested. */
- /*
- * XXX: Doing this before the PROC_UNLOCK above would result in
- * fewer lock operations but would break old behavior where the
- * above vrele() would not be traced when changing trace files.
- */
- if (newvp != NULL) {
- VREF(newvp);
- PROC_LOCK(p);
- p->p_tracep = newvp;
- PROC_UNLOCK(p);
- }
-
return (1);
}
@@ -544,20 +708,32 @@ ktrsetchildren(td, top, ops, facs, vp)
}
static void
-ktrwrite(vp, kth, uio)
- struct vnode *vp;
- register struct ktr_header *kth;
- struct uio *uio;
+ktr_writerequest(struct ktr_request *req)
{
+ struct ktr_header *kth;
+ struct vnode *vp;
+ struct uio *uio = NULL;
+ struct proc *p;
+ struct thread *td;
+ struct ucred *cred;
struct uio auio;
- struct iovec aiov[2];
- struct thread *td = curthread; /* XXX */
- struct proc *p = td->td_proc; /* XXX */
+ struct iovec aiov[3];
struct mount *mp;
+ int datalen, buflen, vrele_count;
int error;
+ vp = req->ktr_vp;
+ /*
+ * If vp is NULL, the vp has been cleared out from under this
+ * request, so just drop it.
+ */
if (vp == NULL)
return;
+ kth = &req->ktr_header;
+ datalen = data_lengths[kth->ktr_type];
+ buflen = kth->ktr_len;
+ cred = req->ktr_cred;
+ td = curthread;
auio.uio_iov = &aiov[0];
auio.uio_offset = 0;
auio.uio_segflg = UIO_SYSSPACE;
@@ -566,42 +742,85 @@ ktrwrite(vp, kth, uio)
aiov[0].iov_len = sizeof(struct ktr_header);
auio.uio_resid = sizeof(struct ktr_header);
auio.uio_iovcnt = 1;
- auio.uio_td = curthread;
- if (kth->ktr_len > 0) {
+ auio.uio_td = td;
+ if (datalen != 0) {
+ aiov[1].iov_base = (caddr_t)&req->ktr_data;
+ aiov[1].iov_len = datalen;
+ auio.uio_resid += datalen;
auio.uio_iovcnt++;
- aiov[1].iov_base = kth->ktr_buffer;
- aiov[1].iov_len = kth->ktr_len;
- auio.uio_resid += kth->ktr_len;
- if (uio != NULL)
- kth->ktr_len += uio->uio_resid;
+ kth->ktr_len += datalen;
}
+ if (buflen != 0) {
+ KASSERT(kth->ktr_buffer != NULL, ("ktrace: nothing to write"));
+ aiov[auio.uio_iovcnt].iov_base = kth->ktr_buffer;
+ aiov[auio.uio_iovcnt].iov_len = buflen;
+ auio.uio_resid += buflen;
+ auio.uio_iovcnt++;
+ } else
+ uio = kth->ktr_buffer;
+ KASSERT((uio == NULL) ^ (kth->ktr_type == KTR_GENIO),
+ ("ktrace: uio and genio mismatch"));
+ if (uio != NULL)
+ kth->ktr_len += uio->uio_resid;
+ mtx_lock(&Giant);
vn_start_write(vp, &mp, V_WAIT);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- (void)VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
- error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, td->td_ucred);
+ (void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
+ error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
if (error == 0 && uio != NULL) {
- (void)VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
- error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, td->td_ucred);
+ (void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
+ error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, cred);
}
VOP_UNLOCK(vp, 0, td);
vn_finished_write(mp);
+ mtx_unlock(&Giant);
+ if (buflen != 0)
+ free(kth->ktr_buffer, M_KTRACE);
if (!error)
return;
/*
- * If error encountered, give up tracing on this vnode. XXX what
- * happens to the loop if vrele() blocks?
+ * If error encountered, give up tracing on this vnode. We defer
+ * all the vrele()'s on the vnode until after we are finished walking
+ * the various lists to avoid needlessly holding locks.
*/
log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
error);
+ vrele_count = 0;
+ /*
+ * First, clear this vnode from being used by any processes in the
+ * system.
+ * XXX - If one process gets an EPERM writing to the vnode, should
+ * we really do this? Other processes might have suitable
+ * credentials for the operation.
+ */
sx_slock(&allproc_lock);
LIST_FOREACH(p, &allproc, p_list) {
+ PROC_LOCK(p);
if (p->p_tracep == vp) {
+ mtx_lock(&ktrace_mtx);
p->p_tracep = NULL;
p->p_traceflag = 0;
- vrele(vp);
+ mtx_unlock(&ktrace_mtx);
+ vrele_count++;
}
+ PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
+ /*
+ * Second, clear this vnode from any pending requests.
+ */
+ mtx_lock(&ktrace_mtx);
+ STAILQ_FOREACH(req, &ktr_todo, ktr_list) {
+ if (req->ktr_vp == vp) {
+ req->ktr_vp = NULL;
+ vrele_count++;
+ }
+ }
+ mtx_unlock(&ktrace_mtx);
+ mtx_lock(&Giant);
+ while (vrele_count-- > 0)
+ vrele(vp);
+ mtx_unlock(&Giant);
}
/*
diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h
index e72a4cb..2970ce0 100644
--- a/sys/sys/ktrace.h
+++ b/sys/sys/ktrace.h
@@ -58,14 +58,19 @@ struct ktr_header {
pid_t ktr_pid; /* process id */
char ktr_comm[MAXCOMLEN+1]; /* command name */
struct timeval ktr_time; /* timestamp */
- caddr_t ktr_buffer;
+ void *ktr_buffer;
};
/*
- * Test for kernel trace point (MP SAFE)
+ * Test for kernel trace point (MP SAFE).
+ *
+ * KTRCHECK() just checks that the type is enabled and is only for
+ * internal use in the ktrace subsystem. KTRPOINT() checks against
+ * ktrace recursion as well as checking that the type is enabled and
+ * is the public interface.
*/
-#define KTRPOINT(p, type) \
- (((p)->p_traceflag & ((1<<(type))|KTRFAC_ACTIVE)) == (1<<(type)))
+#define KTRCHECK(td, type) ((td)->td_proc->p_traceflag & (1 << type))
+#define KTRPOINT(td, type) (KTRCHECK((td), (type)) && !(td)->td_inktrace)
/*
* ktrace record types
@@ -155,15 +160,16 @@ struct ktr_csw {
*/
#define KTRFAC_ROOT 0x80000000 /* root set this trace */
#define KTRFAC_INHERIT 0x40000000 /* pass trace flags to children */
-#define KTRFAC_ACTIVE 0x20000000 /* ktrace logging in progress, ignore */
#ifdef _KERNEL
-void ktrnamei(struct vnode *,char *);
-void ktrcsw(struct vnode *,int,int);
-void ktrpsig(struct vnode *, int, sig_t, sigset_t *, int);
-void ktrgenio(struct vnode *, int, enum uio_rw, struct uio *, int);
-void ktrsyscall(struct vnode *, int, int narg, register_t args[]);
-void ktrsysret(struct vnode *, int, int, register_t);
+extern struct mtx ktrace_mtx;
+
+void ktrnamei(char *);
+void ktrcsw(int, int);
+void ktrpsig(int, sig_t, sigset_t *, int);
+void ktrgenio(int, enum uio_rw, struct uio *, int);
+void ktrsyscall(int, int narg, register_t args[]);
+void ktrsysret(int, int, register_t);
#else
OpenPOWER on IntegriCloud