diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/init_main.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_ktrace.c | 282 | ||||
-rw-r--r-- | sys/kern/kern_proc.c | 1 | ||||
-rw-r--r-- | sys/kern/subr_trap.c | 4 | ||||
-rw-r--r-- | sys/sys/ktrace.h | 11 | ||||
-rw-r--r-- | sys/sys/proc.h | 1 |
8 files changed, 213 insertions, 92 deletions
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 7e2396d..837b066 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -369,6 +369,7 @@ proc0_init(void *dummy __unused) p->p_sflag = PS_INMEM; p->p_state = PRS_NORMAL; knlist_init(&p->p_klist, &p->p_mtx, NULL, NULL, NULL); + STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; td->td_state = TDS_RUNNING; kg->kg_pri_class = PRI_TIMESHARE; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 46f411f..6be9c07 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -366,8 +366,10 @@ retry: (void)acct_process(td); #ifdef KTRACE /* - * release trace file + * Drain any pending records on the thread and release the trace + * file. It might be better if drain-and-clear were atomic. */ + ktrprocexit(td); PROC_LOCK(p); mtx_lock(&ktrace_mtx); p->p_traceflag = 0; /* don't trace the vrele() */ diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 56ad939..5d300c9 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -272,6 +272,7 @@ fork1(td, flags, pages, procp) mac_init_proc(newproc); #endif knlist_init(&newproc->p_klist, &newproc->p_mtx, NULL, NULL, NULL); + STAILQ_INIT(&newproc->p_ktr); /* We have to lock the process tree while we look for a pid. */ sx_slock(&proctree_lock); diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index a276e63..73a2439 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -1,6 +1,8 @@ /*- * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. + * The Regents of the University of California. + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -54,6 +56,25 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <sys/sysproto.h> +/* + * The ktrace facility allows the tracing of certain key events in user space + * processes, such as system calls, signal delivery, context switches, and + * user generated events using utrace(2). It works by streaming event + * records and data to a vnode associated with the process using the + * ktrace(2) system call. In general, records can be written directly from + * the context that generates the event. One important exception to this is + * during a context switch, where sleeping is not permitted. To handle this + * case, trace events are generated using in-kernel ktr_request records, and + * then delivered to disk at a convenient moment -- either immediately, the + * next traceable event, at system call return, or at process exit. + * + * When dealing with multiple threads or processes writing to the same event + * log, ordering guarantees are weak: specifically, if an event has multiple + * records (i.e., system call enter and return), they may be interlaced with + * records from another event. Process and thread ID information is provided + * in the record, and user applications can de-interlace events if required. + */ + static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); #ifdef KTRACE @@ -65,8 +86,6 @@ static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); struct ktr_request { struct ktr_header ktr_header; void *ktr_buffer; - struct ucred *ktr_cred; - struct vnode *ktr_vp; union { struct ktr_syscall ktr_syscall; struct ktr_sysret ktr_sysret; @@ -88,7 +107,6 @@ static int data_lengths[] = { 0 /* KTR_USER */ }; -static STAILQ_HEAD(, ktr_request) ktr_todo; static STAILQ_HEAD(, ktr_request) ktr_free; static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); @@ -104,19 +122,48 @@ SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, static int print_message = 1; struct mtx ktrace_mtx; static struct cv ktrace_cv; +static struct sx ktrace_sx; static void ktrace_init(void *dummy); static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); static u_int ktrace_resize_pool(u_int newsize); static struct ktr_request *ktr_getrequest(int type); -static void ktr_submitrequest(struct ktr_request *req); +static void ktr_submitrequest(struct thread *td, struct ktr_request *req); static void ktr_freerequest(struct ktr_request *req); -static void ktr_loop(void *dummy); -static void ktr_writerequest(struct ktr_request *req); +static void ktr_writerequest(struct thread *td, struct ktr_request *req); static int ktrcanset(struct thread *,struct proc *); static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); +/* + * ktrace itself generates events, such as context switches, which we do not + * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine + * whether or not it is in a region where tracing of events should be + * suppressed. + */ +static void +ktrace_enter(struct thread *td) +{ + + KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); + td->td_pflags |= TDP_INKTRACE; +} + +static void +ktrace_exit(struct thread *td) +{ + + KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); + td->td_pflags &= ~TDP_INKTRACE; +} + +static void +ktrace_assert(struct thread *td) +{ + + KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); +} + static void ktrace_init(void *dummy) { @@ -124,14 +171,13 @@ ktrace_init(void *dummy) int i; mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); + sx_init(&ktrace_sx, "ktrace_sx"); cv_init(&ktrace_cv, "ktrace"); - STAILQ_INIT(&ktr_todo); STAILQ_INIT(&ktr_free); for (i = 0; i < ktr_requestpool; i++) { req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); } - kthread_create(ktr_loop, NULL, NULL, RFHIGHPID, 0, "ktrace"); } SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); @@ -154,12 +200,12 @@ sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) if (error) return (error); td = curthread; - td->td_pflags |= TDP_INKTRACE; + ktrace_enter(td); mtx_lock(&ktrace_mtx); oldsize = ktr_requestpool; newsize = ktrace_resize_pool(wantsize); mtx_unlock(&ktrace_mtx); - td->td_pflags &= ~TDP_INKTRACE; + ktrace_exit(td); error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); if (error) return (error); @@ -214,27 +260,21 @@ ktr_getrequest(int type) struct proc *p = td->td_proc; int pm; - td->td_pflags |= TDP_INKTRACE; - mtx_lock(&ktrace_mtx); + ktrace_enter(td); /* XXX: In caller instead? */ if (!KTRCHECK(td, type)) { - mtx_unlock(&ktrace_mtx); - td->td_pflags &= ~TDP_INKTRACE; + ktrace_exit(td); return (NULL); } + mtx_lock(&ktrace_mtx); req = STAILQ_FIRST(&ktr_free); if (req != NULL) { STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); + mtx_unlock(&ktrace_mtx); req->ktr_header.ktr_type = type; if (p->p_traceflag & KTRFAC_DROP) { req->ktr_header.ktr_type |= KTR_DROP; p->p_traceflag &= ~KTRFAC_DROP; } - KASSERT(p->p_tracevp != NULL, ("ktrace: no trace vnode")); - KASSERT(p->p_tracecred != NULL, ("ktrace: no trace cred")); - req->ktr_vp = p->p_tracevp; - VREF(p->p_tracevp); - req->ktr_cred = crhold(p->p_tracecred); - mtx_unlock(&ktrace_mtx); microtime(&req->ktr_header.ktr_time); req->ktr_header.ktr_pid = p->p_pid; req->ktr_header.ktr_tid = td->td_tid; @@ -248,69 +288,87 @@ ktr_getrequest(int type) mtx_unlock(&ktrace_mtx); if (pm) printf("Out of ktrace request objects.\n"); - td->td_pflags &= ~TDP_INKTRACE; + ktrace_exit(td); } return (req); } +/* + * Some trace generation environments don't permit direct access to VFS, + * such as during a context switch where sleeping is not allowed. Under these + * circumstances, queue a request to the thread to be written asynchronously + * later. + */ static void -ktr_submitrequest(struct ktr_request *req) +ktr_enqueuerequest(struct thread *td, struct ktr_request *req) { mtx_lock(&ktrace_mtx); - STAILQ_INSERT_TAIL(&ktr_todo, req, ktr_list); - cv_signal(&ktrace_cv); + STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); mtx_unlock(&ktrace_mtx); - curthread->td_pflags &= ~TDP_INKTRACE; + ktrace_exit(td); } +/* + * Drain any pending ktrace records from the per-thread queue to disk. This + * is used both internally before committing other records, and also on + * system call return. We drain all the ones we can find at the time when + * drain is requested, but don't keep draining after that as those events + * may me approximately "after" the current event. + */ static void -ktr_freerequest(struct ktr_request *req) +ktr_drain(struct thread *td) { + struct ktr_request *queued_req; + STAILQ_HEAD(, ktr_request) local_queue; - crfree(req->ktr_cred); - if (req->ktr_vp != NULL) { - mtx_lock(&Giant); - vrele(req->ktr_vp); - mtx_unlock(&Giant); + ktrace_assert(td); + sx_assert(&ktrace_sx, SX_XLOCKED); + + STAILQ_INIT(&local_queue); /* XXXRW: needed? */ + + if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { + mtx_lock(&ktrace_mtx); + STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); + mtx_unlock(&ktrace_mtx); + + while ((queued_req = STAILQ_FIRST(&local_queue))) { + STAILQ_REMOVE_HEAD(&local_queue, ktr_list); + ktr_writerequest(td, queued_req); + ktr_freerequest(queued_req); + } } - if (req->ktr_buffer != NULL) - free(req->ktr_buffer, M_KTRACE); - mtx_lock(&ktrace_mtx); - STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); - mtx_unlock(&ktrace_mtx); } +/* + * Submit a trace record for immediate commit to disk -- to be used only + * where entering VFS is OK. First drain any pending records that may have + * been cached in the thread. + */ static void -ktr_loop(void *dummy) +ktr_submitrequest(struct thread *td, struct ktr_request *req) { - struct ktr_request *req; - struct thread *td; - struct ucred *cred; - /* Only cache these values once. */ - td = curthread; - cred = td->td_ucred; - for (;;) { - mtx_lock(&ktrace_mtx); - while (STAILQ_EMPTY(&ktr_todo)) - cv_wait(&ktrace_cv, &ktrace_mtx); - req = STAILQ_FIRST(&ktr_todo); - STAILQ_REMOVE_HEAD(&ktr_todo, ktr_list); - KASSERT(req != NULL, ("got a NULL request")); - mtx_unlock(&ktrace_mtx); - /* - * It is not enough just to pass the cached cred - * to the VOP's in ktr_writerequest(). Some VFS - * operations use curthread->td_ucred, so we need - * to modify our thread's credentials as well. - * Evil. - */ - td->td_ucred = req->ktr_cred; - ktr_writerequest(req); - td->td_ucred = cred; - ktr_freerequest(req); - } + ktrace_assert(td); + + sx_xlock(&ktrace_sx); + ktr_drain(td); + ktr_writerequest(td, req); + ktr_freerequest(req); + sx_xunlock(&ktrace_sx); + + ktrace_exit(td); +} + +static void +ktr_freerequest(struct ktr_request *req) +{ + + if (req->ktr_buffer != NULL) + free(req->ktr_buffer, M_KTRACE); + mtx_lock(&ktrace_mtx); + STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); + mtx_unlock(&ktrace_mtx); } /* @@ -344,7 +402,7 @@ ktrsyscall(code, narg, args) req->ktr_header.ktr_len = buflen; req->ktr_buffer = buf; } - ktr_submitrequest(req); + ktr_submitrequest(curthread, req); } /* @@ -365,7 +423,36 @@ ktrsysret(code, error, retval) ktp->ktr_code = code; ktp->ktr_error = error; ktp->ktr_retval = retval; /* what about val2 ? */ - ktr_submitrequest(req); + ktr_submitrequest(curthread, req); +} + +/* + * When a process exits, drain per-process asynchronous trace records. + */ +void +ktrprocexit(struct thread *td) +{ + + ktrace_enter(td); + sx_xlock(&ktrace_sx); + ktr_drain(td); + sx_xunlock(&ktrace_sx); + ktrace_exit(td); +} + +/* + * When a thread returns, drain any asynchronous records generated by the + * system call. + */ +void +ktruserret(struct thread *td) +{ + + ktrace_enter(td); + sx_xlock(&ktrace_sx); + ktr_drain(td); + sx_xunlock(&ktrace_sx); + ktrace_exit(td); } void @@ -391,7 +478,7 @@ ktrnamei(path) req->ktr_header.ktr_len = namelen; req->ktr_buffer = buf; } - ktr_submitrequest(req); + ktr_submitrequest(curthread, req); } /* @@ -439,7 +526,7 @@ ktrgenio(fd, rw, uio, error) ktg->ktr_rw = rw; req->ktr_header.ktr_len = datalen; req->ktr_buffer = buf; - ktr_submitrequest(req); + ktr_submitrequest(curthread, req); } void @@ -460,7 +547,7 @@ ktrpsig(sig, action, mask, code) kp->action = action; kp->mask = *mask; kp->code = code; - ktr_submitrequest(req); + ktr_enqueuerequest(curthread, req); } void @@ -476,7 +563,7 @@ ktrcsw(out, user) kc = &req->ktr_data.ktr_csw; kc->out = out; kc->user = user; - ktr_submitrequest(req); + ktr_enqueuerequest(curthread, req); } #endif /* KTRACE */ @@ -519,7 +606,7 @@ ktrace(td, uap) if (ops != KTROP_CLEARFILE && facs == 0) return (EINVAL); - td->td_pflags |= TDP_INKTRACE; + ktrace_enter(td); if (ops != KTROP_CLEAR) { /* * an operation which requires a file argument. @@ -530,7 +617,7 @@ ktrace(td, uap) error = vn_open(&nd, &flags, 0, -1); if (error) { mtx_unlock(&Giant); - td->td_pflags &= ~TDP_INKTRACE; + ktrace_exit(td); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); @@ -539,7 +626,7 @@ ktrace(td, uap) if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); mtx_unlock(&Giant); - td->td_pflags &= ~TDP_INKTRACE; + ktrace_exit(td); return (EACCES); } mtx_unlock(&Giant); @@ -647,7 +734,7 @@ done: (void) vn_close(vp, FWRITE, td->td_ucred, td); mtx_unlock(&Giant); } - td->td_pflags &= ~TDP_INKTRACE; + ktrace_exit(td); return (error); #else /* !KTRACE */ return (ENOSYS); @@ -688,7 +775,7 @@ utrace(td, uap) } req->ktr_buffer = cp; req->ktr_header.ktr_len = uap->len; - ktr_submitrequest(req); + ktr_submitrequest(td, req); return (0); #else /* !KTRACE */ return (ENOSYS); @@ -787,12 +874,11 @@ ktrsetchildren(td, top, ops, facs, vp) } static void -ktr_writerequest(struct ktr_request *req) +ktr_writerequest(struct thread *td, struct ktr_request *req) { struct ktr_header *kth; struct vnode *vp; struct proc *p; - struct thread *td; struct ucred *cred; struct uio auio; struct iovec aiov[3]; @@ -800,18 +886,36 @@ ktr_writerequest(struct ktr_request *req) int datalen, buflen, vrele_count; int error; - vp = req->ktr_vp; + /* + * We hold the vnode and credential for use in I/O in case ktrace is + * disabled on the process as we write out the request. + * + * XXXRW: This is not ideal: we could end up performing a write after + * the vnode has been closed. + */ + mtx_lock(&ktrace_mtx); + vp = td->td_proc->p_tracevp; + if (vp != NULL) + VREF(vp); + cred = td->td_proc->p_tracecred; + if (cred != NULL) + crhold(cred); + mtx_unlock(&ktrace_mtx); + /* * If vp is NULL, the vp has been cleared out from under this - * request, so just drop it. + * request, so just drop it. Make sure the credential and vnode are + * in sync: we should have both or neither. */ - if (vp == NULL) + if (vp == NULL) { + KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); return; + } + KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); + kth = &req->ktr_header; datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; buflen = kth->ktr_len; - cred = req->ktr_cred; - td = curthread; auio.uio_iov = &aiov[0]; auio.uio_offset = 0; auio.uio_segflg = UIO_SYSSPACE; @@ -835,6 +939,7 @@ ktr_writerequest(struct ktr_request *req) auio.uio_resid += buflen; auio.uio_iovcnt++; } + mtx_lock(&Giant); vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); @@ -884,17 +989,12 @@ ktr_writerequest(struct ktr_request *req) } } sx_sunlock(&allproc_lock); + /* - * Second, clear this vnode from any pending requests. + * We can't clear any pending requests in threads that have cached + * them but not yet committed them, as those are per-thread. The + * thread will have to clear it itself on system call return. */ - mtx_lock(&ktrace_mtx); - STAILQ_FOREACH(req, &ktr_todo, ktr_list) { - if (req->ktr_vp == vp) { - req->ktr_vp = NULL; - vrele_count++; - } - } - mtx_unlock(&ktrace_mtx); mtx_lock(&Giant); while (vrele_count-- > 0) vrele(vp); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 43f1c0e..28003d9 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -156,6 +156,7 @@ proc_dtor(void *mem, int size, void *arg) KASSERT((td != NULL), ("proc_dtor: bad thread pointer")); kg = FIRST_KSEGRP_IN_PROC(p); KASSERT((kg != NULL), ("proc_dtor: bad kg pointer")); + KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr")); #endif /* Dispose of an alternate kstack, if it exists. diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index c305dc0..7b6c5b8 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -94,6 +94,10 @@ userret(td, frame, oticks) PROC_UNLOCK(p); #endif +#ifdef KTRACE + KTRUSERRET(td); +#endif + /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h index f4171c3..337062c 100644 --- a/sys/sys/ktrace.h +++ b/sys/sys/ktrace.h @@ -68,6 +68,15 @@ struct ktr_header { #define KTRCHECK(td, type) ((td)->td_proc->p_traceflag & (1 << type)) #define KTRPOINT(td, type) \ (KTRCHECK((td), (type)) && !((td)->td_pflags & TDP_INKTRACE)) +#define KTRCHECKDRAIN(td) (!(STAILQ_EMPTY(&(td)->td_proc->p_ktr))) +#define KTRUSERRET(td) do { \ + if (KTRCHECKDRAIN(td)) \ + ktruserret(td); \ +} while (0) +#define KTRPROCEXIT(td) do { \ + if (KTRCHECKDRAIN(td)) \ + ktrprocexit(td); \ +} while (0) /* * ktrace record types @@ -174,6 +183,8 @@ void ktrpsig(int, sig_t, sigset_t *, int); void ktrgenio(int, enum uio_rw, struct uio *, int); void ktrsyscall(int, int narg, register_t args[]); void ktrsysret(int, int, register_t); +void ktrprocexit(struct thread *); +void ktruserret(struct thread *); #else diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 661ea70..b997e21 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -608,6 +608,7 @@ struct proc { void *p_emuldata; /* (c) Emulator state data. */ struct label *p_label; /* (*) Proc (not subject) MAC label. */ struct p_sched *p_sched; /* (*) Scheduler-specific data. */ + STAILQ_HEAD(, ktr_request) p_ktr; /* (o) KTR event queue. */ }; #define p_session p_pgrp->pg_session |