1 files changed, 62 insertions, 123 deletions
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index e508105..1d99a56 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -130,12 +130,12 @@ static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "Async IO management");
 static int max_aio_procs = MAX_AIO_PROCS;
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs,
 	CTLFLAG_RW, &max_aio_procs, 0,
-	"Maximum number of kernel threads to use for handling async IO ");
+	"Maximum number of kernel processes to use for handling async IO ");
 
 static int num_aio_procs = 0;
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs,
 	CTLFLAG_RD, &num_aio_procs, 0,
-	"Number of presently active kernel threads for async IO");
+	"Number of presently active kernel processes for async IO");
 
 /*
  * The code will adjust the actual number of AIO processes towards this
@@ -143,7 +143,7 @@ SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs,
  */
 static int target_aio_procs = TARGET_AIO_PROCS;
 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs,
-	0, "Preferred number of ready kernel threads for async IO");
+	0, "Preferred number of ready kernel processes for async IO");
 
 static int max_queue_count = MAX_AIO_QUEUE;
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0,
@@ -157,7 +157,7 @@ static int num_buf_aio = 0;
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0,
     "Number of aio requests presently handled by the buf subsystem");
 
-/* Number of async I/O thread in the process of being started */
+/* Number of async I/O processes in the process of being started */
 /* XXX This should be local to aio_aqueue() */
 static int num_aio_resv_start = 0;
 
@@ -210,8 +210,8 @@ typedef struct oaiocb {
  * Current, there is only two backends: BIO and generic file I/O.
  * socket I/O is served by generic file I/O, this is not a good idea, since
  * disk file I/O and any other types without O_NONBLOCK flag can block daemon
- * threads, if there is no thread to serve socket I/O, the socket I/O will be
- * delayed too long or starved, we should create some threads dedicated to
+ * processes, if there is no thread to serve socket I/O, the socket I/O will be
+ * delayed too long or starved, we should create some processes dedicated to
  * sockets to do non-blocking I/O, same for pipe and fifo, for these I/O
  * systems we really need non-blocking interface, fiddling O_NONBLOCK in file
  * structure is not safe because there is race between userland and aio
@@ -253,10 +253,10 @@ struct aiocblist {
  */
 #define AIOP_FREE	0x1			/* proc on free queue */
 
-struct aiothreadlist {
-	int aiothreadflags;			/* (c) AIO proc flags */
-	TAILQ_ENTRY(aiothreadlist) list;	/* (c) list of processes */
-	struct thread *aiothread;		/* (*) the AIO thread */
+struct aioproc {
+	int aioprocflags;			/* (c) AIO proc flags */
+	TAILQ_ENTRY(aioproc) list;		/* (c) list of processes */
+	struct proc *aioproc;			/* (*) the AIO proc */
 };
 
 /*
@@ -293,11 +293,8 @@ struct kaioinfo {
 	TAILQ_HEAD(,aioliojob) kaio_liojoblist; /* (a) list of lio jobs */
 	TAILQ_HEAD(,aiocblist) kaio_jobqueue;	/* (a) job queue for process */
 	TAILQ_HEAD(,aiocblist) kaio_bufqueue;	/* (a) buffer job queue for process */
-	TAILQ_HEAD(,aiocblist) kaio_sockqueue;  /* (a) queue for aios waiting on sockets,
-						 *  NOT USED YET.
-						 */
 	TAILQ_HEAD(,aiocblist) kaio_syncqueue;	/* (a) queue for aio_fsync */
-	struct	task	kaio_task;	/* (*) task to kick aio threads */
+	struct	task	kaio_task;	/* (*) task to kick aio processes */
 };
 
 #define AIO_LOCK(ki)		mtx_lock(&(ki)->kaio_mtx)
@@ -322,10 +319,9 @@ struct aiocb_ops {
 	int	(*store_aiocb)(struct aiocb **ujobp, struct aiocb *ujob);
 };
 
-static TAILQ_HEAD(,aiothreadlist) aio_freeproc;		/* (c) Idle daemons */
+static TAILQ_HEAD(,aioproc) aio_freeproc;		/* (c) Idle daemons */
 static struct sema aio_newproc_sem;
 static struct mtx aio_job_mtx;
-static struct mtx aio_sock_mtx;
 static TAILQ_HEAD(,aiocblist) aio_jobs;			/* (c) Async job list */
 static struct unrhdr *aiod_unr;
 
@@ -361,7 +357,7 @@ static int	filt_lio(struct knote *kn, long hint);
 /*
  * Zones for:
  * 	kaio	Per process async io info
- *	aiop	async io thread data
+ *	aiop	async io process data
  *	aiocb	async io jobs
  *	aiol	list io job pointer - internal to aio_suspend XXX
  *	aiolio	list io jobs
@@ -483,12 +479,11 @@ aio_onceonly(void)
 	TAILQ_INIT(&aio_freeproc);
 	sema_init(&aio_newproc_sem, 0, "aio_new_proc");
 	mtx_init(&aio_job_mtx, "aio_job", NULL, MTX_DEF);
-	mtx_init(&aio_sock_mtx, "aio_sock", NULL, MTX_DEF);
 	TAILQ_INIT(&aio_jobs);
 	aiod_unr = new_unrhdr(1, INT_MAX, NULL);
 	kaio_zone = uma_zcreate("AIO", sizeof(struct kaioinfo), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
-	aiop_zone = uma_zcreate("AIOP", sizeof(struct aiothreadlist), NULL,
+	aiop_zone = uma_zcreate("AIOP", sizeof(struct aioproc), NULL,
 	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	aiocb_zone = uma_zcreate("AIOCB", sizeof(struct aiocblist), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
@@ -556,7 +551,6 @@ aio_unload(void)
 	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
 	EVENTHANDLER_DEREGISTER(process_exec, exec_tag);
 	mtx_destroy(&aio_job_mtx);
-	mtx_destroy(&aio_sock_mtx);
 	sema_destroy(&aio_newproc_sem);
 	p31b_setcfg(CTL_P1003_1B_AIO_LISTIO_MAX, -1);
 	p31b_setcfg(CTL_P1003_1B_AIO_MAX, -1);
@@ -587,7 +581,6 @@ aio_init_aioinfo(struct proc *p)
 	TAILQ_INIT(&ki->kaio_jobqueue);
 	TAILQ_INIT(&ki->kaio_bufqueue);
 	TAILQ_INIT(&ki->kaio_liojoblist);
-	TAILQ_INIT(&ki->kaio_sockqueue);
 	TAILQ_INIT(&ki->kaio_syncqueue);
 	TASK_INIT(&ki->kaio_task, 0, aio_kick_helper, p);
 	PROC_LOCK(p);
@@ -803,7 +796,7 @@ restart:
  * Select a job to run (called by an AIO daemon).
  */
 static struct aiocblist *
-aio_selectjob(struct aiothreadlist *aiop)
+aio_selectjob(struct aioproc *aiop)
 {
 	struct aiocblist *aiocbe;
 	struct kaioinfo *ki;
@@ -1048,6 +1041,13 @@ notification_done:
 	}
 }
 
+static void
+aio_switch_vmspace(struct aiocblist *aiocbe)
+{
+
+	vmspace_switch_aio(aiocbe->userproc->p_vmspace);
+}
+
 /*
  * The AIO daemon, most of the actual work is done in aio_process_*,
  * but the setup (and address space mgmt) is done in this routine.
@@ -1056,31 +1056,30 @@ static void
 aio_daemon(void *_id)
 {
 	struct aiocblist *aiocbe;
-	struct aiothreadlist *aiop;
+	struct aioproc *aiop;
 	struct kaioinfo *ki;
-	struct proc *curcp, *mycp, *userp;
-	struct vmspace *myvm, *tmpvm;
+	struct proc *p, *userp;
+	struct vmspace *myvm;
 	struct thread *td = curthread;
 	int id = (intptr_t)_id;
 
 	/*
-	 * Local copies of curproc (cp) and vmspace (myvm)
+	 * Grab an extra reference on the daemon's vmspace so that it
+	 * doesn't get freed by jobs that switch to a different
+	 * vmspace.
 	 */
-	mycp = td->td_proc;
-	myvm = mycp->p_vmspace;
+	p = td->td_proc;
+	myvm = vmspace_acquire_ref(p);
 
-	KASSERT(mycp->p_textvp == NULL, ("kthread has a textvp"));
+	KASSERT(p->p_textvp == NULL, ("kthread has a textvp"));
 
 	/*
 	 * Allocate and ready the aio control info.  There is one aiop structure
 	 * per daemon.
 	 */
 	aiop = uma_zalloc(aiop_zone, M_WAITOK);
-	aiop->aiothread = td;
-	aiop->aiothreadflags = 0;
-
-	/* The daemon resides in its own pgrp. */
-	sys_setsid(td, NULL);
+	aiop->aioproc = p;
+	aiop->aioprocflags = 0;
 
 	/*
 	 * Wakeup parent process.  (Parent sleeps to keep from blasting away
@@ -1091,17 +1090,11 @@ aio_daemon(void *_id)
 	mtx_lock(&aio_job_mtx);
 	for (;;) {
 		/*
-		 * curcp is the current daemon process context.
-		 * userp is the current user process context.
-		 */
-		curcp = mycp;
-
-		/*
 		 * Take daemon off of free queue
 		 */
-		if (aiop->aiothreadflags & AIOP_FREE) {
+		if (aiop->aioprocflags & AIOP_FREE) {
 			TAILQ_REMOVE(&aio_freeproc, aiop, list);
-			aiop->aiothreadflags &= ~AIOP_FREE;
+			aiop->aioprocflags &= ~AIOP_FREE;
 		}
 
 		/*
@@ -1114,34 +1107,7 @@ aio_daemon(void *_id)
 			/*
 			 * Connect to process address space for user program.
 			 */
-			if (userp != curcp) {
-				/*
-				 * Save the current address space that we are
-				 * connected to.
-				 */
-				tmpvm = mycp->p_vmspace;
-
-				/*
-				 * Point to the new user address space, and
-				 * refer to it.
-				 */
-				mycp->p_vmspace = userp->p_vmspace;
-				atomic_add_int(&mycp->p_vmspace->vm_refcnt, 1);
-
-				/* Activate the new mapping. */
-				pmap_activate(FIRST_THREAD_IN_PROC(mycp));
-
-				/*
-				 * If the old address space wasn't the daemons
-				 * own address space, then we need to remove the
-				 * daemon's reference from the other process
-				 * that it was acting on behalf of.
-				 */
-				if (tmpvm != myvm) {
-					vmspace_free(tmpvm);
-				}
-				curcp = userp;
-			}
+			aio_switch_vmspace(aiocbe);
 
 			ki = userp->p_aioinfo;
 
@@ -1175,34 +1141,13 @@ aio_daemon(void *_id)
 		/*
 		 * Disconnect from user address space.
 		 */
-		if (curcp != mycp) {
-
+		if (p->p_vmspace != myvm) {
 			mtx_unlock(&aio_job_mtx);
-
-			/* Get the user address space to disconnect from. */
-			tmpvm = mycp->p_vmspace;
-
-			/* Get original address space for daemon. */
-			mycp->p_vmspace = myvm;
-
-			/* Activate the daemon's address space. */
-			pmap_activate(FIRST_THREAD_IN_PROC(mycp));
-#ifdef DIAGNOSTIC
-			if (tmpvm == myvm) {
-				printf("AIOD: vmspace problem -- %d\n",
-				    mycp->p_pid);
-			}
-#endif
-			/* Remove our vmspace reference. */
-			vmspace_free(tmpvm);
-
-			curcp = mycp;
-
+			vmspace_switch_aio(myvm);
 			mtx_lock(&aio_job_mtx);
 			/*
 			 * We have to restart to avoid race, we only sleep if
-			 * no job can be selected, that should be
-			 * curcp == mycp.
+			 * no job can be selected.
 			 */
 			continue;
 		}
@@ -1210,36 +1155,30 @@ aio_daemon(void *_id)
 		mtx_assert(&aio_job_mtx, MA_OWNED);
 
 		TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
-		aiop->aiothreadflags |= AIOP_FREE;
+		aiop->aioprocflags |= AIOP_FREE;
 
 		/*
 		 * If daemon is inactive for a long time, allow it to exit,
 		 * thereby freeing resources.
 		 */
-		if (msleep(aiop->aiothread, &aio_job_mtx, PRIBIO, "aiordy",
-		    aiod_lifetime)) {
-			if (TAILQ_EMPTY(&aio_jobs)) {
-				if ((aiop->aiothreadflags & AIOP_FREE) &&
-				    (num_aio_procs > target_aio_procs)) {
-					TAILQ_REMOVE(&aio_freeproc, aiop, list);
-					num_aio_procs--;
-					mtx_unlock(&aio_job_mtx);
-					uma_zfree(aiop_zone, aiop);
-					free_unr(aiod_unr, id);
-#ifdef DIAGNOSTIC
-					if (mycp->p_vmspace->vm_refcnt <= 1) {
-						printf("AIOD: bad vm refcnt for"
-						    " exiting daemon: %d\n",
-						    mycp->p_vmspace->vm_refcnt);
-					}
-#endif
-					kproc_exit(0);
-				}
-			}
-		}
+		if (msleep(p, &aio_job_mtx, PRIBIO, "aiordy",
+		    aiod_lifetime) == EWOULDBLOCK && TAILQ_EMPTY(&aio_jobs) &&
+		    (aiop->aioprocflags & AIOP_FREE) &&
+		    num_aio_procs > target_aio_procs)
+			break;
 	}
+	TAILQ_REMOVE(&aio_freeproc, aiop, list);
+	num_aio_procs--;
 	mtx_unlock(&aio_job_mtx);
-	panic("shouldn't be here\n");
+	uma_zfree(aiop_zone, aiop);
+	free_unr(aiod_unr, id);
+	vmspace_free(myvm);
+
+	KASSERT(p->p_vmspace == myvm,
+	    ("AIOD: bad vmspace for exiting daemon"));
+	KASSERT(myvm->vm_refcnt > 1,
+	    ("AIOD: bad vm refcnt for exiting daemon: %d", myvm->vm_refcnt));
+	kproc_exit(0);
 }
 
 /*
@@ -1842,13 +1781,13 @@ static void
 aio_kick_nowait(struct proc *userp)
 {
 	struct kaioinfo *ki = userp->p_aioinfo;
-	struct aiothreadlist *aiop;
+	struct aioproc *aiop;
 
 	mtx_assert(&aio_job_mtx, MA_OWNED);
 	if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) {
 		TAILQ_REMOVE(&aio_freeproc, aiop, list);
-		aiop->aiothreadflags &= ~AIOP_FREE;
-		wakeup(aiop->aiothread);
+		aiop->aioprocflags &= ~AIOP_FREE;
+		wakeup(aiop->aioproc);
 	} else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) &&
 	    ((ki->kaio_active_count + num_aio_resv_start) <
 	    ki->kaio_maxactive_count)) {
@@ -1860,15 +1799,15 @@ static int
 aio_kick(struct proc *userp)
 {
 	struct kaioinfo *ki = userp->p_aioinfo;
-	struct aiothreadlist *aiop;
+	struct aioproc *aiop;
 	int error, ret = 0;
 
 	mtx_assert(&aio_job_mtx, MA_OWNED);
 retryproc:
 	if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) {
 		TAILQ_REMOVE(&aio_freeproc, aiop, list);
-		aiop->aiothreadflags &= ~AIOP_FREE;
-		wakeup(aiop->aiothread);
+		aiop->aioprocflags &= ~AIOP_FREE;
+		wakeup(aiop->aioproc);
 	} else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) &&
 	    ((ki->kaio_active_count + num_aio_resv_start) <
 	    ki->kaio_maxactive_count)) {