summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/security/audit/audit_worker.c247
1 files changed, 91 insertions, 156 deletions
diff --git a/sys/security/audit/audit_worker.c b/sys/security/audit/audit_worker.c
index 102e9f9..467c5b2 100644
--- a/sys/security/audit/audit_worker.c
+++ b/sys/security/audit/audit_worker.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 1999-2005 Apple Computer, Inc.
- * Copyright (c) 2006 Robert N. M. Watson
+ * Copyright (c) 2006-2008 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,6 +48,7 @@
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/domain.h>
+#include <sys/sx.h>
#include <sys/sysproto.h>
#include <sys/sysent.h>
#include <sys/systm.h>
@@ -75,31 +76,18 @@
static struct proc *audit_thread;
/*
- * When an audit log is rotated, the actual rotation must be performed by the
- * audit worker thread, as it may have outstanding writes on the current
- * audit log. audit_replacement_vp holds the vnode replacing the current
- * vnode. We can't let more than one replacement occur at a time, so if more
- * than one thread requests a replacement, only one can have the replacement
- * "in progress" at any given moment. If a thread tries to replace the audit
- * vnode and discovers a replacement is already in progress (i.e.,
- * audit_replacement_flag != 0), then it will sleep on audit_replacement_cv
- * waiting its turn to perform a replacement. When a replacement is
- * completed, this cv is signalled by the worker thread so a waiting thread
- * can start another replacement. We also store a credential to perform
- * audit log write operations with.
- *
- * The current credential and vnode are thread-local to audit_worker.
- */
-static struct cv audit_replacement_cv;
-
-static int audit_replacement_flag;
-static struct vnode *audit_replacement_vp;
-static struct ucred *audit_replacement_cred;
-
-/*
- * Flags related to Kernel->user-space communication.
+ * audit_cred and audit_vp are the stored credential and vnode to use for
+ * active audit trail. They are protected by audit_worker_sx, which will be
+ * held across all I/O and all rotation to prevent them from being replaced
+ * (rotated) while in use. The audit_file_rotate_wait flag is set when the
+ * kernel has delivered a trigger to auditd to rotate the trail, and is
+ * cleared when the next rotation takes place. It is also protected by
+ * audit_worker_sx.
*/
-static int audit_file_rotate_wait;
+static int audit_file_rotate_wait;
+static struct sx audit_worker_sx;
+static struct ucred *audit_cred;
+static struct vnode *audit_vp;
/*
* Write an audit record to a file, performed as the last stage after both
@@ -110,8 +98,8 @@ static int audit_file_rotate_wait;
* the audit daemon, since the message is asynchronous anyway.
*/
static void
-audit_record_write(struct vnode *vp, struct ucred *cred, struct thread *td,
- void *data, size_t len)
+audit_record_write(struct vnode *vp, struct ucred *cred, void *data,
+ size_t len)
{
static struct timeval last_lowspace_trigger;
static struct timeval last_fail;
@@ -122,6 +110,8 @@ audit_record_write(struct vnode *vp, struct ucred *cred, struct thread *td,
struct vattr vattr;
long temp;
+ sx_assert(&audit_worker_sx, SA_LOCKED); /* audit_file_rotate_wait. */
+
if (vp == NULL)
return;
@@ -133,11 +123,11 @@ audit_record_write(struct vnode *vp, struct ucred *cred, struct thread *td,
* that we know how we're doing on space. Consider failure of these
* operations to indicate a future inability to write to the file.
*/
- error = VFS_STATFS(vp->v_mount, mnt_stat, td);
+ error = VFS_STATFS(vp->v_mount, mnt_stat, curthread);
if (error)
goto fail;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- error = VOP_GETATTR(vp, &vattr, cred, td);
+ error = VOP_GETATTR(vp, &vattr, cred, curthread);
VOP_UNLOCK(vp, 0);
if (error)
goto fail;
@@ -200,6 +190,8 @@ audit_record_write(struct vnode *vp, struct ucred *cred, struct thread *td,
*/
if ((audit_fstat.af_filesz != 0) && (audit_file_rotate_wait == 0) &&
(vattr.va_size >= audit_fstat.af_filesz)) {
+ sx_assert(&audit_worker_sx, SA_XLOCKED);
+
audit_file_rotate_wait = 1;
(void)send_trigger(AUDIT_TRIGGER_ROTATE_KERNEL);
}
@@ -234,7 +226,7 @@ audit_record_write(struct vnode *vp, struct ucred *cred, struct thread *td,
}
error = vn_rdwr(UIO_WRITE, vp, data, len, (off_t)0, UIO_SYSSPACE,
- IO_APPEND|IO_UNIT, cred, NULL, NULL, td);
+ IO_APPEND|IO_UNIT, cred, NULL, NULL, curthread);
if (error == ENOSPC)
goto fail_enospc;
else if (error)
@@ -252,7 +244,7 @@ audit_record_write(struct vnode *vp, struct ucred *cred, struct thread *td,
if (audit_in_failure) {
if (audit_q_len == 0 && audit_pre_q_len == 0) {
VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
- (void)VOP_FSYNC(vp, MNT_WAIT, td);
+ (void)VOP_FSYNC(vp, MNT_WAIT, curthread);
VOP_UNLOCK(vp, 0);
panic("Audit store overflow; record queue drained.");
}
@@ -269,7 +261,7 @@ fail_enospc:
*/
if (audit_fail_stop) {
VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
- (void)VOP_FSYNC(vp, MNT_WAIT, td);
+ (void)VOP_FSYNC(vp, MNT_WAIT, curthread);
VOP_UNLOCK(vp, 0);
panic("Audit log space exhausted and fail-stop set.");
}
@@ -284,7 +276,7 @@ fail:
*/
if (audit_panic_on_write_fail) {
VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
- (void)VOP_FSYNC(vp, MNT_WAIT, td);
+ (void)VOP_FSYNC(vp, MNT_WAIT, curthread);
VOP_UNLOCK(vp, 0);
panic("audit_worker: write error %d\n", error);
} else if (ppsratecheck(&last_fail, &cur_fail, 1))
@@ -293,62 +285,6 @@ fail:
}
/*
- * If an appropriate signal has been received rotate the audit log based on
- * the global replacement variables. Signal consumers as needed that the
- * rotation has taken place.
- *
- * The global variables and CVs used to signal the audit_worker to perform a
- * rotation are essentially a message queue of depth 1. It would be much
- * nicer to actually use a message queue.
- */
-static void
-audit_worker_rotate(struct ucred **audit_credp, struct vnode **audit_vpp,
- struct thread *audit_td)
-{
- int do_replacement_signal, vfslocked;
- struct ucred *old_cred;
- struct vnode *old_vp;
-
- mtx_assert(&audit_mtx, MA_OWNED);
-
- do_replacement_signal = 0;
- while (audit_replacement_flag != 0) {
- old_cred = *audit_credp;
- old_vp = *audit_vpp;
- *audit_credp = audit_replacement_cred;
- *audit_vpp = audit_replacement_vp;
- audit_replacement_cred = NULL;
- audit_replacement_vp = NULL;
- audit_replacement_flag = 0;
-
- audit_enabled = (*audit_vpp != NULL);
-
- if (old_vp != NULL) {
- mtx_unlock(&audit_mtx);
- vfslocked = VFS_LOCK_GIANT(old_vp->v_mount);
- vn_close(old_vp, AUDIT_CLOSE_FLAGS, old_cred,
- audit_td);
- VFS_UNLOCK_GIANT(vfslocked);
- crfree(old_cred);
- mtx_lock(&audit_mtx);
- old_cred = NULL;
- old_vp = NULL;
- }
- do_replacement_signal = 1;
- }
-
- /*
- * Signal that replacement have occurred to wake up and start any
- * other replacements started in parallel. We can continue about our
- * business in the mean time. We broadcast so that both new
- * replacements can be inserted, but also so that the source(s) of
- * replacement can return successfully.
- */
- if (do_replacement_signal)
- cv_broadcast(&audit_replacement_cv);
-}
-
-/*
* Given a kernel audit record, process as required. Kernel audit records
* are converted to one, or possibly two, BSM records, depending on whether
* there is a user audit record present also. Kernel records need be
@@ -356,23 +292,38 @@ audit_worker_rotate(struct ucred **audit_credp, struct vnode **audit_vpp,
* written to disk, and audit pipes.
*/
static void
-audit_worker_process_record(struct vnode *audit_vp, struct ucred *audit_cred,
- struct thread *audit_td, struct kaudit_record *ar)
+audit_worker_process_record(struct kaudit_record *ar)
{
struct au_record *bsm;
au_class_t class;
au_event_t event;
au_id_t auid;
int error, sorf;
+ int trail_locked;
+
+ /*
+ * We hold the audit_worker_sx lock over both writes, if there are
+ * two, so that the two records won't be split across a rotation and
+ * end up in two different trail files.
+ */
+ if (((ar->k_ar_commit & AR_COMMIT_USER) &&
+ (ar->k_ar_commit & AR_PRESELECT_USER_TRAIL)) ||
+ (ar->k_ar_commit & AR_PRESELECT_TRAIL)) {
+ sx_xlock(&audit_worker_sx);
+ trail_locked = 1;
+ } else
+ trail_locked = 0;
/*
* First, handle the user record, if any: commit to the system trail
* and audit pipes as selected.
*/
if ((ar->k_ar_commit & AR_COMMIT_USER) &&
- (ar->k_ar_commit & AR_PRESELECT_USER_TRAIL))
- audit_record_write(audit_vp, audit_cred, audit_td,
- ar->k_udata, ar->k_ulen);
+ (ar->k_ar_commit & AR_PRESELECT_USER_TRAIL)) {
+ sx_assert(&audit_worker_sx, SA_XLOCKED);
+ audit_record_write(audit_vp, audit_cred, ar->k_udata,
+ ar->k_ulen);
+ }
if ((ar->k_ar_commit & AR_COMMIT_USER) &&
(ar->k_ar_commit & AR_PRESELECT_USER_PIPE))
@@ -381,7 +332,7 @@ audit_worker_process_record(struct vnode *audit_vp, struct ucred *audit_cred,
if (!(ar->k_ar_commit & AR_COMMIT_KERNEL) ||
((ar->k_ar_commit & AR_PRESELECT_PIPE) == 0 &&
(ar->k_ar_commit & AR_PRESELECT_TRAIL) == 0))
- return;
+ goto out;
auid = ar->k_ar.ar_subj_auid;
event = ar->k_ar.ar_event;
@@ -394,11 +345,11 @@ audit_worker_process_record(struct vnode *audit_vp, struct ucred *audit_cred,
error = kaudit_to_bsm(ar, &bsm);
switch (error) {
case BSM_NOAUDIT:
- return;
+ goto out;
case BSM_FAILURE:
printf("audit_worker_process_record: BSM_FAILURE\n");
- return;
+ goto out;
case BSM_SUCCESS:
break;
@@ -407,9 +358,10 @@ audit_worker_process_record(struct vnode *audit_vp, struct ucred *audit_cred,
panic("kaudit_to_bsm returned %d", error);
}
- if (ar->k_ar_commit & AR_PRESELECT_TRAIL)
- audit_record_write(audit_vp, audit_cred, audit_td, bsm->data,
- bsm->len);
+ if (ar->k_ar_commit & AR_PRESELECT_TRAIL) {
+ sx_assert(&audit_worker_sx, SA_XLOCKED);
+ audit_record_write(audit_vp, audit_cred, bsm->data, bsm->len);
+ }
if (ar->k_ar_commit & AR_PRESELECT_PIPE)
audit_pipe_submit(auid, event, class, sorf,
@@ -417,50 +369,39 @@ audit_worker_process_record(struct vnode *audit_vp, struct ucred *audit_cred,
bsm->len);
kau_free(bsm);
+out:
+ if (trail_locked)
+ sx_xunlock(&audit_worker_sx);
}
/*
* The audit_worker thread is responsible for watching the event queue,
* dequeueing records, converting them to BSM format, and committing them to
* disk. In order to minimize lock thrashing, records are dequeued in sets
- * to a thread-local work queue. In addition, the audit_work performs the
- * actual exchange of audit log vnode pointer, as audit_vp is a thread-local
- * variable.
+ * to a thread-local work queue.
+ *
+ * Note: this means that the effect bound on the size of the pending record
+ * queue is 2x the length of the global queue.
*/
static void
audit_worker(void *arg)
{
struct kaudit_queue ar_worklist;
struct kaudit_record *ar;
- struct ucred *audit_cred;
- struct thread *audit_td;
- struct vnode *audit_vp;
int lowater_signal;
- /*
- * These are thread-local variables requiring no synchronization.
- */
TAILQ_INIT(&ar_worklist);
- audit_cred = NULL;
- audit_td = curthread;
- audit_vp = NULL;
-
mtx_lock(&audit_mtx);
while (1) {
mtx_assert(&audit_mtx, MA_OWNED);
/*
- * Wait for record or rotation events.
+ * Wait for a record.
*/
- while (!audit_replacement_flag && TAILQ_EMPTY(&audit_q))
+ while (TAILQ_EMPTY(&audit_q))
cv_wait(&audit_worker_cv, &audit_mtx);
/*
- * First priority: replace the audit log target if requested.
- */
- audit_worker_rotate(&audit_cred, &audit_vp, audit_td);
-
- /*
* If there are records in the global audit record queue,
* transfer them to a thread-local queue and process them
* one by one. If we cross the low watermark threshold,
@@ -481,8 +422,7 @@ audit_worker(void *arg)
mtx_unlock(&audit_mtx);
while ((ar = TAILQ_FIRST(&ar_worklist))) {
TAILQ_REMOVE(&ar_worklist, ar, k_q);
- audit_worker_process_record(audit_vp, audit_cred,
- audit_td, ar);
+ audit_worker_process_record(ar);
audit_free(ar);
}
mtx_lock(&audit_mtx);
@@ -492,50 +432,45 @@ audit_worker(void *arg)
/*
* audit_rotate_vnode() is called by a user or kernel thread to configure or
* de-configure auditing on a vnode. The arguments are the replacement
- * credential and vnode to substitute for the current credential and vnode,
- * if any. If either is set to NULL, both should be NULL, and this is used
- * to indicate that audit is being disabled. The real work is done in the
- * audit_worker thread, but audit_rotate_vnode() waits synchronously for that
- * to complete.
- *
- * The vnode should be referenced and opened by the caller. The credential
- * should be referenced. audit_rotate_vnode() will own both references as of
- * this call, so the caller should not release either.
- *
- * XXXAUDIT: Review synchronize communication logic. Really, this is a
- * message queue of depth 1. We are essentially acquiring ownership of the
- * communications queue, inserting our message, and waiting for an
- * acknowledgement.
+ * credential (referenced) and vnode (referenced and opened) to substitute
+ * for the current credential and vnode, if any. If either is set to NULL,
+ * both should be NULL, and this is used to indicate that audit is being
+ * disabled. Any previous cred/vnode will be closed and freed. We re-enable
+ * generating rotation requests to auditd.
*/
void
audit_rotate_vnode(struct ucred *cred, struct vnode *vp)
{
+ struct ucred *old_audit_cred;
+ struct vnode *old_audit_vp;
+ int vfslocked;
- /*
- * If other parallel log replacements have been requested, we wait
- * until they've finished before continuing.
- */
- mtx_lock(&audit_mtx);
- while (audit_replacement_flag != 0)
- cv_wait(&audit_replacement_cv, &audit_mtx);
- audit_replacement_cred = cred;
- audit_replacement_flag = 1;
- audit_replacement_vp = vp;
+ KASSERT((cred != NULL && vp != NULL) || (cred == NULL && vp == NULL),
+ ("audit_rotate_vnode: cred %p vp %p", cred, vp));
/*
- * Wake up the audit worker to perform the exchange once we release
- * the mutex.
+ * Rotate the vnode/cred, and clear the rotate flag so that we will
+ * send a rotate trigger if the new file fills.
*/
- cv_signal(&audit_worker_cv);
+ sx_xlock(&audit_worker_sx);
+ old_audit_cred = audit_cred;
+ old_audit_vp = audit_vp;
+ audit_cred = cred;
+ audit_vp = vp;
+ audit_file_rotate_wait = 0;
+ audit_enabled = (audit_vp != NULL);
+ sx_xunlock(&audit_worker_sx);
/*
- * Wait for the audit_worker to broadcast that a replacement has
- * taken place; we know that once this has happened, our vnode has
- * been replaced in, so we can return successfully.
+ * If there was an old vnode/credential, close and free.
*/
- cv_wait(&audit_replacement_cv, &audit_mtx);
- audit_file_rotate_wait = 0; /* We can now request another rotation */
- mtx_unlock(&audit_mtx);
+ if (old_audit_vp != NULL) {
+ vfslocked = VFS_LOCK_GIANT(old_audit_vp->v_mount);
+ vn_close(old_audit_vp, AUDIT_CLOSE_FLAGS, old_audit_cred,
+ curthread);
+ VFS_UNLOCK_GIANT(vfslocked);
+ crfree(old_audit_cred);
+ }
}
void
@@ -543,7 +478,7 @@ audit_worker_init(void)
{
int error;
- cv_init(&audit_replacement_cv, "audit_replacement_cv");
+ sx_init(&audit_worker_sx, "audit_worker_sx");
error = kproc_create(audit_worker, NULL, &audit_thread, RFHIGHPID,
0, "audit");
if (error)
OpenPOWER on IntegriCloud