diff options
author | grog <grog@FreeBSD.org> | 2000-05-04 07:33:40 +0000 |
---|---|---|
committer | grog <grog@FreeBSD.org> | 2000-05-04 07:33:40 +0000 |
commit | d0290ff674c66a4238f0e4143605558dbcb1c348 (patch) | |
tree | 7256a7bb23343347d248e203e4dbac1d070059ee /sys/dev/vinum | |
parent | a18a701503bed79ea18d1870ee8acfbd19d589c8 (diff) | |
download | FreeBSD-src-d0290ff674c66a4238f0e4143605558dbcb1c348.zip FreeBSD-src-d0290ff674c66a4238f0e4143605558dbcb1c348.tar.gz |
Don't hide bio structure behind macros like b_ioflags.
Get counting volume I/Os right.
Count buffer sizes correctly for architectures where ints are not 32 bits.
complete_rqe: Move decrementing active count until after call to
complete_raid5_write, thus possibly avoiding a race condition.
Suggested-by: dillon
Rename user bp to ubp to avoid confusion.
Tidy up comments.
Diffstat (limited to 'sys/dev/vinum')
-rw-r--r-- | sys/dev/vinum/vinuminterrupt.c | 62 |
1 files changed, 35 insertions, 27 deletions
diff --git a/sys/dev/vinum/vinuminterrupt.c b/sys/dev/vinum/vinuminterrupt.c index 7d3e163..4effd06 100644 --- a/sys/dev/vinum/vinuminterrupt.c +++ b/sys/dev/vinum/vinuminterrupt.c @@ -83,13 +83,13 @@ complete_rqe(struct buf *bp) if ((drive->active == (DRIVE_MAXACTIVE - 1)) /* we were at the drive limit */ ||(vinum_conf.active == VINUM_MAXACTIVE)) /* or the global limit */ wakeup(&launch_requests); /* let another one at it */ - if ((bp->b_ioflags & BIO_ERROR) != 0) { /* transfer in error */ + if ((bp->b_io.bio_flags & BIO_ERROR) != 0) { /* transfer in error */ if (bp->b_error != 0) /* did it return a number? */ rq->error = bp->b_error; /* yes, put it in. */ else if (rq->error == 0) /* no: do we have one already? */ rq->error = EIO; /* no: catchall "I/O error" */ SD[rqe->sdno].lasterror = rq->error; - if (bp->b_iocmd == BIO_READ) { + if (bp->b_iocmd == BIO_READ) { /* read operation */ log(LOG_ERR, "%s: fatal read I/O error\n", SD[rqe->sdno].name); set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* subdisk is crashed */ } else { /* write operation */ @@ -112,8 +112,10 @@ complete_rqe(struct buf *bp) SD[rqe->sdno].bytes_read += bp->b_bcount; PLEX[rqe->rqg->plexno].reads++; PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount; - if (PLEX[rqe->rqg->plexno].volno >= 0) + if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */ + VOL[PLEX[rqe->rqg->plexno].volno].reads++; VOL[PLEX[rqe->rqg->plexno].volno].bytes_read += bp->b_bcount; + } } else { /* write operation */ DRIVE[rqe->driveno].writes++; DRIVE[rqe->driveno].bytes_written += bp->b_bcount; @@ -121,9 +123,11 @@ complete_rqe(struct buf *bp) SD[rqe->sdno].bytes_written += bp->b_bcount; PLEX[rqe->rqg->plexno].writes++; PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount; - if (PLEX[rqe->rqg->plexno].volno >= 0) + if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */ + VOL[PLEX[rqe->rqg->plexno].volno].writes++; VOL[PLEX[rqe->rqg->plexno].volno].bytes_written += bp->b_bcount; } + } if (rqg->flags & XFR_RECOVERY_READ) { /* recovery read, */ int *sdata; /* source */ int *data; /* and group data */ @@ -134,7 +138,7 @@ complete_rqe(struct buf *bp) /* XOR destination is the user data */ sdata = (int *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* old data contents */ data = (int *) &urqe->b.b_data[urqe->groupoffset << DEV_BSHIFT]; /* destination */ - length = urqe->grouplen << (DEV_BSHIFT - 2); /* and count involved */ + length = urqe->grouplen * (DEV_BSIZE / sizeof(int)); /* and number of ints */ for (count = 0; count < length; count++) data[count] ^= sdata[count]; @@ -154,9 +158,15 @@ complete_rqe(struct buf *bp) bcopy(src, dst, length); /* move it */ } } else if ((rqg->flags & (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE)) /* RAID 4/5 group write operation */ - &&(rqg->active == 1)) /* and this is the last rq of phase 1 */ + &&(rqg->active == 1)) /* and this is the last active request */ complete_raid5_write(rqe); - rqg->active--; /* one less request active */ + /* + * This is the earliest place where we can be + * sure that the request has really finished, + * since complete_raid5_write can issue new + * requests. + */ + rqg->active--; /* this request now finished */ if (rqg->active == 0) { /* request group finished, */ rq->active--; /* one less */ if (rqg->lock) { /* got a lock? */ @@ -174,7 +184,7 @@ complete_rqe(struct buf *bp) if (rq->error) { /* did we have an error? */ if (rq->isplex) { /* plex operation, */ - ubp->b_ioflags |= BIO_ERROR; /* yes, propagate to user */ + ubp->b_io.bio_flags |= BIO_ERROR; /* yes, propagate to user */ ubp->b_error = rq->error; } else /* try to recover */ queue_daemon_request(daemonrq_ioerror, (union daemoninfo) rq); /* let the daemon complete */ @@ -216,8 +226,8 @@ sdio_done(struct buf *bp) struct sdbuf *sbp; sbp = (struct sdbuf *) bp; - if (sbp->b.b_ioflags & BIO_ERROR) { /* had an error */ - sbp->bp->b_ioflags |= BIO_ERROR; /* propagate upwards */ + if (sbp->b.b_io.bio_flags & BIO_ERROR) { /* had an error */ + sbp->bp->b_io.bio_flags |= BIO_ERROR; /* propagate upwards */ sbp->bp->b_error = sbp->b.b_error; } #ifdef VINUMDEBUG @@ -251,7 +261,7 @@ complete_raid5_write(struct rqelement *rqe) int count; /* loop counter */ int rqno; /* request index */ int rqoffset; /* offset of request data from parity data */ - struct buf *bp; /* user buffer header */ + struct buf *ubp; /* user buffer header */ struct request *rq; /* pointer to our request */ struct rqgroup *rqg; /* and to the request group */ struct rqelement *prqe; /* point to the parity block */ @@ -259,7 +269,7 @@ complete_raid5_write(struct rqelement *rqe) rqg = rqe->rqg; /* and to our request group */ rq = rqg->rq; /* point to our request */ - bp = rq->bp; /* user's buffer header */ + ubp = rq->bp; /* user's buffer header */ prqe = &rqg->rqe[0]; /* point to the parity block */ /* @@ -270,25 +280,18 @@ complete_raid5_write(struct rqelement *rqe) * difference is the origin of the data and the * address range. */ - if (rqe->flags & XFR_DEGRADED_WRITE) { /* do the degraded write stuff */ pdata = (int *) (&prqe->b.b_data[(prqe->groupoffset) << DEV_BSHIFT]); /* parity data pointer */ bzero(pdata, prqe->grouplen << DEV_BSHIFT); /* start with nothing in the parity block */ /* Now get what data we need from each block */ for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */ - /* - * This can do with improvement. If we're doing - * both a degraded and a normal write, we don't - * need to xor (nor to read) the part of the block - * that we're going to overwrite. FIXME XXX - */ rqe = &rqg->rqe[rqno]; /* this request */ sdata = (int *) (&rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]); /* old data */ length = rqe->grouplen << (DEV_BSHIFT - 2); /* and count involved */ /* - * add the data block to the parity block. Before + * Add the data block to the parity block. Before * we started the request, we zeroed the parity * block, so the result of adding all the other * blocks and the block we want to write will be @@ -312,7 +315,8 @@ complete_raid5_write(struct rqelement *rqe) sdata = (int *) &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* old data contents */ rqoffset = rqe->dataoffset + rqe->sdoffset - prqe->sdoffset; /* corresponding parity block offset */ pdata = (int *) (&prqe->b.b_data[rqoffset << DEV_BSHIFT]); /* parity data pointer */ - length = rqe->datalen << (DEV_BSHIFT - 2); /* and count involved */ + length = rqe->datalen * (DEV_BSIZE / sizeof(int)); /* and number of ints */ + /* * "remove" the old data block * from the parity block @@ -326,9 +330,9 @@ complete_raid5_write(struct rqelement *rqe) pdata[count] ^= sdata[count]; /* "add" the new data block */ - sdata = (int *) (&bp->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */ - if ((sdata < ((int *) bp->b_data)) - || (&sdata[length] > ((int *) (bp->b_data + bp->b_bcount)))) + sdata = (int *) (&ubp->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */ + if ((sdata < ((int *) ubp->b_data)) + || (&sdata[length] > ((int *) (ubp->b_data + ubp->b_bcount)))) panic("complete_raid5_write: bounds overflow"); for (count = 0; count < length; count++) pdata[count] ^= sdata[count]; @@ -346,7 +350,7 @@ complete_raid5_write(struct rqelement *rqe) rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */ rqe->b.b_iodone = complete_rqe; /* call us here when done */ rqe->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */ - rqe->b.b_data = &bp->b_data[rqe->useroffset << DEV_BSHIFT]; /* point to the user data */ + rqe->b.b_data = &ubp->b_data[rqe->useroffset << DEV_BSHIFT]; /* point to the user data */ rqe->b.b_bcount = rqe->datalen << DEV_BSHIFT; /* length to write */ rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim more */ rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */ @@ -373,7 +377,7 @@ complete_raid5_write(struct rqelement *rqe) rqe->b.b_blkno, rqe->b.b_bcount); if (debug & DEBUG_LASTREQS) - logrq(loginfo_raid5_data, (union rqinfou) rqe, bp); + logrq(loginfo_raid5_data, (union rqinfou) rqe, ubp); #endif DEV_STRATEGY(&rqe->b, 0); } @@ -412,7 +416,11 @@ complete_raid5_write(struct rqelement *rqe) rqe->b.b_blkno, rqe->b.b_bcount); if (debug & DEBUG_LASTREQS) - logrq(loginfo_raid5_parity, (union rqinfou) rqe, bp); + logrq(loginfo_raid5_parity, (union rqinfou) rqe, ubp); #endif DEV_STRATEGY(&rqe->b, 0); } + +/* Local Variables: */ +/* fill-column: 50 */ +/* End: */ |