summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-core.c82
-rw-r--r--block/cfq-iosched.c7
-rw-r--r--block/deadline-iosched.c2
-rw-r--r--drivers/block/aoe/aoecmd.c2
-rw-r--r--drivers/block/brd.c3
-rw-r--r--drivers/block/floppy.c1
-rw-r--r--drivers/block/pktcdvd.c102
-rw-r--r--drivers/block/rbd.c2
-rw-r--r--drivers/md/dm-crypt.c3
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm-verity.c4
-rw-r--r--drivers/md/faulty.c6
-rw-r--r--drivers/md/linear.c3
-rw-r--r--drivers/md/md.c17
-rw-r--r--drivers/md/raid0.c9
-rw-r--r--drivers/md/raid1.c133
-rw-r--r--drivers/md/raid10.c78
-rw-r--r--drivers/md/raid5.c49
-rw-r--r--drivers/message/fusion/mptsas.c6
-rw-r--r--drivers/s390/block/dcssblk.c3
-rw-r--r--drivers/scsi/libsas/sas_expander.c6
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_transport.c10
-rw-r--r--fs/bio-integrity.c144
-rw-r--r--fs/bio.c366
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/exofs/ore.c2
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/logfs/dev_bdev.c5
-rw-r--r--include/linux/bio.h115
-rw-r--r--include/linux/blk_types.h3
-rw-r--r--include/trace/events/block.h12
-rw-r--r--mm/bounce.c75
-rw-r--r--mm/page_io.c1
39 files changed, 645 insertions, 630 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 441f348..f224d17 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -159,20 +159,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
error = -EIO;
- if (unlikely(nbytes > bio->bi_size)) {
- printk(KERN_ERR "%s: want %u bytes done, %u left\n",
- __func__, nbytes, bio->bi_size);
- nbytes = bio->bi_size;
- }
-
if (unlikely(rq->cmd_flags & REQ_QUIET))
set_bit(BIO_QUIET, &bio->bi_flags);
- bio->bi_size -= nbytes;
- bio->bi_sector += (nbytes >> 9);
-
- if (bio_integrity(bio))
- bio_integrity_advance(bio, nbytes);
+ bio_advance(bio, nbytes);
/* don't actually finish bio if it's part of flush sequence */
if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
@@ -1609,7 +1599,7 @@ static void handle_bad_sector(struct bio *bio)
printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
bdevname(bio->bi_bdev, b),
bio->bi_rw,
- (unsigned long long)bio->bi_sector + bio_sectors(bio),
+ (unsigned long long)bio_end_sector(bio),
(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
set_bit(BIO_EOF, &bio->bi_flags);
@@ -2292,8 +2282,7 @@ EXPORT_SYMBOL(blk_fetch_request);
**/
bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
{
- int total_bytes, bio_nbytes, next_idx = 0;
- struct bio *bio;
+ int total_bytes;
if (!req->bio)
return false;
@@ -2339,56 +2328,21 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
blk_account_io_completion(req, nr_bytes);
- total_bytes = bio_nbytes = 0;
- while ((bio = req->bio) != NULL) {
- int nbytes;
+ total_bytes = 0;
+ while (req->bio) {
+ struct bio *bio = req->bio;
+ unsigned bio_bytes = min(bio->bi_size, nr_bytes);
- if (nr_bytes >= bio->bi_size) {
+ if (bio_bytes == bio->bi_size)
req->bio = bio->bi_next;
- nbytes = bio->bi_size;
- req_bio_endio(req, bio, nbytes, error);
- next_idx = 0;
- bio_nbytes = 0;
- } else {
- int idx = bio->bi_idx + next_idx;
-
- if (unlikely(idx >= bio->bi_vcnt)) {
- blk_dump_rq_flags(req, "__end_that");
- printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
- __func__, idx, bio->bi_vcnt);
- break;
- }
-
- nbytes = bio_iovec_idx(bio, idx)->bv_len;
- BIO_BUG_ON(nbytes > bio->bi_size);
-
- /*
- * not a complete bvec done
- */
- if (unlikely(nbytes > nr_bytes)) {
- bio_nbytes += nr_bytes;
- total_bytes += nr_bytes;
- break;
- }
- /*
- * advance to the next vector
- */
- next_idx++;
- bio_nbytes += nbytes;
- }
+ req_bio_endio(req, bio, bio_bytes, error);
- total_bytes += nbytes;
- nr_bytes -= nbytes;
+ total_bytes += bio_bytes;
+ nr_bytes -= bio_bytes;
- bio = req->bio;
- if (bio) {
- /*
- * end more in this run, or just return 'not-done'
- */
- if (unlikely(nr_bytes <= 0))
- break;
- }
+ if (!nr_bytes)
+ break;
}
/*
@@ -2404,16 +2358,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
return false;
}
- /*
- * if the request wasn't completed, update state
- */
- if (bio_nbytes) {
- req_bio_endio(req, bio, bio_nbytes, error);
- bio->bi_idx += next_idx;
- bio_iovec(bio)->bv_offset += nr_bytes;
- bio_iovec(bio)->bv_len -= nr_bytes;
- }
-
req->__data_len -= total_bytes;
req->buffer = bio_data(req->bio);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 4f0ade7..d5cd3131 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2270,11 +2270,8 @@ cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
return NULL;
cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
- if (cfqq) {
- sector_t sector = bio->bi_sector + bio_sectors(bio);
-
- return elv_rb_find(&cfqq->sort_list, sector);
- }
+ if (cfqq)
+ return elv_rb_find(&cfqq->sort_list, bio_end_sector(bio));
return NULL;
}
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 90037b5..ba19a3a 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -132,7 +132,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
* check for front merge
*/
if (dd->front_merges) {
- sector_t sector = bio->bi_sector + bio_sectors(bio);
+ sector_t sector = bio_end_sector(bio);
__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
if (__rq) {
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 25ef5c0..8188ad1 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -927,7 +927,7 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio)
buf->resid = bio->bi_size;
buf->sector = bio->bi_sector;
bio_pageinc(bio);
- buf->bv = bv = &bio->bi_io_vec[bio->bi_idx];
+ buf->bv = bio_iovec(bio);
buf->bv_resid = bv->bv_len;
WARN_ON(buf->bv_resid == 0);
}
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 531ceb3..f1a29f8 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -334,8 +334,7 @@ static void brd_make_request(struct request_queue *q, struct bio *bio)
int err = -EIO;
sector = bio->bi_sector;
- if (sector + (bio->bi_size >> SECTOR_SHIFT) >
- get_capacity(bdev->bd_disk))
+ if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
goto out;
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 2ddd64a..8323263 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3777,7 +3777,6 @@ static int __floppy_read_block_0(struct block_device *bdev)
bio_vec.bv_len = size;
bio_vec.bv_offset = 0;
bio.bi_vcnt = 1;
- bio.bi_idx = 0;
bio.bi_size = size;
bio.bi_bdev = bdev;
bio.bi_sector = 0;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 2e7de7a..1119042 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -901,7 +901,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
pd->iosched.successive_reads += bio->bi_size >> 10;
else {
pd->iosched.successive_reads = 0;
- pd->iosched.last_write = bio->bi_sector + bio_sectors(bio);
+ pd->iosched.last_write = bio_end_sector(bio);
}
if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
if (pd->read_speed == pd->write_speed) {
@@ -948,31 +948,6 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que
}
/*
- * Copy CD_FRAMESIZE bytes from src_bio into a destination page
- */
-static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct page *dst_page, int dst_offs)
-{
- unsigned int copy_size = CD_FRAMESIZE;
-
- while (copy_size > 0) {
- struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
- void *vfrom = kmap_atomic(src_bvl->bv_page) +
- src_bvl->bv_offset + offs;
- void *vto = page_address(dst_page) + dst_offs;
- int len = min_t(int, copy_size, src_bvl->bv_len - offs);
-
- BUG_ON(len < 0);
- memcpy(vto, vfrom, len);
- kunmap_atomic(vfrom);
-
- seg++;
- offs = 0;
- dst_offs += len;
- copy_size -= len;
- }
-}
-
-/*
* Copy all data for this packet to pkt->pages[], so that
* a) The number of required segments for the write bio is minimized, which
* is necessary for some scsi controllers.
@@ -1181,16 +1156,15 @@ static int pkt_start_recovery(struct packet_data *pkt)
new_sector = new_block * (CD_FRAMESIZE >> 9);
pkt->sector = new_sector;
+ bio_reset(pkt->bio);
+ pkt->bio->bi_bdev = pd->bdev;
+ pkt->bio->bi_rw = REQ_WRITE;
pkt->bio->bi_sector = new_sector;
- pkt->bio->bi_next = NULL;
- pkt->bio->bi_flags = 1 << BIO_UPTODATE;
- pkt->bio->bi_idx = 0;
+ pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE;
+ pkt->bio->bi_vcnt = pkt->frames;
- BUG_ON(pkt->bio->bi_rw != REQ_WRITE);
- BUG_ON(pkt->bio->bi_vcnt != pkt->frames);
- BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE);
- BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write);
- BUG_ON(pkt->bio->bi_private != pkt);
+ pkt->bio->bi_end_io = pkt_end_io_packet_write;
+ pkt->bio->bi_private = pkt;
drop_super(sb);
return 1;
@@ -1325,55 +1299,35 @@ try_next_bio:
*/
static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
{
- struct bio *bio;
int f;
- int frames_write;
struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
+ bio_reset(pkt->w_bio);
+ pkt->w_bio->bi_sector = pkt->sector;
+ pkt->w_bio->bi_bdev = pd->bdev;
+ pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
+ pkt->w_bio->bi_private = pkt;
+
+ /* XXX: locking? */
for (f = 0; f < pkt->frames; f++) {
bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+ if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
+ BUG();
}
+ VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
/*
* Fill-in bvec with data from orig_bios.
*/
- frames_write = 0;
spin_lock(&pkt->lock);
- bio_list_for_each(bio, &pkt->orig_bios) {
- int segment = bio->bi_idx;
- int src_offs = 0;
- int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
- int num_frames = bio->bi_size / CD_FRAMESIZE;
- BUG_ON(first_frame < 0);
- BUG_ON(first_frame + num_frames > pkt->frames);
- for (f = first_frame; f < first_frame + num_frames; f++) {
- struct bio_vec *src_bvl = bio_iovec_idx(bio, segment);
-
- while (src_offs >= src_bvl->bv_len) {
- src_offs -= src_bvl->bv_len;
- segment++;
- BUG_ON(segment >= bio->bi_vcnt);
- src_bvl = bio_iovec_idx(bio, segment);
- }
+ bio_copy_data(pkt->w_bio, pkt->orig_bios.head);
- if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) {
- bvec[f].bv_page = src_bvl->bv_page;
- bvec[f].bv_offset = src_bvl->bv_offset + src_offs;
- } else {
- pkt_copy_bio_data(bio, segment, src_offs,
- bvec[f].bv_page, bvec[f].bv_offset);
- }
- src_offs += CD_FRAMESIZE;
- frames_write++;
- }
- }
pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
spin_unlock(&pkt->lock);
VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n",
- frames_write, (unsigned long long)pkt->sector);
- BUG_ON(frames_write != pkt->write_size);
+ pkt->write_size, (unsigned long long)pkt->sector);
if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) {
pkt_make_local_copy(pkt, bvec);
@@ -1383,16 +1337,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
}
/* Start the write request */
- bio_reset(pkt->w_bio);
- pkt->w_bio->bi_sector = pkt->sector;
- pkt->w_bio->bi_bdev = pd->bdev;
- pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
- pkt->w_bio->bi_private = pkt;
- for (f = 0; f < pkt->frames; f++)
- if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
- BUG();
- VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
-
atomic_set(&pkt->io_wait, 1);
pkt->w_bio->bi_rw = WRITE;
pkt_queue_bio(pd, pkt->w_bio);
@@ -2433,7 +2377,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
cloned_bio->bi_bdev = pd->bdev;
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
- pd->stats.secs_r += bio->bi_size >> 9;
+ pd->stats.secs_r += bio_sectors(bio);
pkt_queue_bio(pd, cloned_bio);
return;
}
@@ -2454,7 +2398,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
zone = ZONE(bio->bi_sector, pd);
VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n",
(unsigned long long)bio->bi_sector,
- (unsigned long long)(bio->bi_sector + bio_sectors(bio)));
+ (unsigned long long)bio_end_sector(bio));
/* Check if we have to split the bio */
{
@@ -2462,7 +2406,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
sector_t last_zone;
int first_sectors;
- last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd);
+ last_zone = ZONE(bio_end_sector(bio) - 1, pd);
if (last_zone != zone) {
BUG_ON(last_zone != zone + pd->settings.size);
first_sectors = last_zone - bio->bi_sector;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6c81a4c..11e1798 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -952,7 +952,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
/* Find first affected segment... */
resid = offset;
- __bio_for_each_segment(bv, bio_src, idx, 0) {
+ bio_for_each_segment(bv, bio_src, idx) {
if (resid < bv->bv_len)
break;
resid -= bv->bv_len;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 13c1548..6d2d41a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -858,8 +858,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
unsigned int i;
struct bio_vec *bv;
- for (i = 0; i < clone->bi_vcnt; i++) {
- bv = bio_iovec_idx(clone, i);
+ bio_for_each_segment_all(bv, clone, i) {
BUG_ON(!bv->bv_page);
mempool_free(bv->bv_page, cc->page_pool);
bv->bv_page = NULL;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d053098..699b5be 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -458,7 +458,7 @@ static void map_region(struct dm_io_region *io, struct mirror *m,
{
io->bdev = m->dev->bdev;
io->sector = map_sector(m, bio);
- io->count = bio->bi_size >> 9;
+ io->count = bio_sectors(bio);
}
static void hold_bio(struct mirror_set *ms, struct bio *bio)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index d8837d3..ea5e878 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -258,7 +258,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
sector_t begin, end;
stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
- stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio),
+ stripe_map_range_sector(sc, bio_end_sector(bio),
target_stripe, &end);
if (begin < end) {
bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 6ad5383..4f06d9a 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -472,7 +472,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
return -EIO;
}
- if ((bio->bi_sector + bio_sectors(bio)) >>
+ if (bio_end_sector(bio) >>
(v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
DMERR_LIMIT("io out of range");
return -EIO;
@@ -490,7 +490,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
bio->bi_end_io = verity_end_io;
bio->bi_private = io;
- io->io_vec_size = bio->bi_vcnt - bio->bi_idx;
+ io->io_vec_size = bio_segments(bio);
if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
io->io_vec = io->io_vec_inline;
else
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 5e7dc77..3193aef 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -185,8 +185,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
return;
}
- if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9),
- WRITE))
+ if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE))
failit = 1;
if (check_mode(conf, WritePersistent)) {
add_sector(conf, bio->bi_sector, WritePersistent);
@@ -196,8 +195,7 @@ static void make_request(struct mddev *mddev, struct bio *bio)
failit = 1;
} else {
/* read request */
- if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9),
- READ))
+ if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ))
failit = 1;
if (check_mode(conf, ReadTransient))
failit = 1;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 2101483..f03fabd 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -317,8 +317,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
bio_io_error(bio);
return;
}
- if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
- tmp_dev->end_sector)) {
+ if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fcb878f..d323676 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -194,21 +194,12 @@ void md_trim_bio(struct bio *bio, int offset, int size)
if (offset == 0 && size == bio->bi_size)
return;
- bio->bi_sector += offset;
- bio->bi_size = size;
- offset <<= 9;
clear_bit(BIO_SEG_VALID, &bio->bi_flags);
- while (bio->bi_idx < bio->bi_vcnt &&
- bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
- /* remove this whole bio_vec */
- offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
- bio->bi_idx++;
- }
- if (bio->bi_idx < bio->bi_vcnt) {
- bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
- bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
- }
+ bio_advance(bio, offset << 9);
+
+ bio->bi_size = size;
+
/* avoid any complications with bi_idx being non-zero*/
if (bio->bi_idx) {
memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 0505452..fcf65e5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -502,11 +502,11 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
{
if (likely(is_power_of_2(chunk_sects))) {
return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
- + (bio->bi_size >> 9));
+ + bio_sectors(bio));
} else{
sector_t sector = bio->bi_sector;
return chunk_sects >= (sector_div(sector, chunk_sects)
- + (bio->bi_size >> 9));
+ + bio_sectors(bio));
}
}
@@ -527,8 +527,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
sector_t sector = bio->bi_sector;
struct bio_pair *bp;
/* Sanity check -- queue functions should prevent this happening */
- if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
- bio->bi_idx != 0)
+ if (bio_segments(bio) > 1)
goto bad_map;
/* This is a one page bio that upper layers
* refuse to split for us, so we need to split it.
@@ -567,7 +566,7 @@ bad_map:
printk("md/raid0:%s: make_request bug: can't convert block across chunks"
" or bigger than %dk %llu %d\n",
mdname(mddev), chunk_sects / 2,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
+ (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
bio_io_error(bio);
return;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fd86b37..aeb4e3f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -92,7 +92,6 @@ static void r1bio_pool_free(void *r1_bio, void *data)
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
{
struct pool_info *pi = data;
- struct page *page;
struct r1bio *r1_bio;
struct bio *bio;
int i, j;
@@ -122,14 +121,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
j = 1;
while(j--) {
bio = r1_bio->bios[j];
- for (i = 0; i < RESYNC_PAGES; i++) {
- page = alloc_page(gfp_flags);
- if (unlikely(!page))
- goto out_free_pages;
+ bio->bi_vcnt = RESYNC_PAGES;
- bio->bi_io_vec[i].bv_page = page;
- bio->bi_vcnt = i+1;
- }
+ if (bio_alloc_pages(bio, gfp_flags))
+ goto out_free_bio;
}
/* If not user-requests, copy the page pointers to all bios */
if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
@@ -143,11 +138,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
return r1_bio;
-out_free_pages:
- for (j=0 ; j < pi->raid_disks; j++)
- for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++)
- put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
- j = -1;
out_free_bio:
while (++j < pi->raid_disks)
bio_put(r1_bio->bios[j]);
@@ -267,7 +257,7 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
(bio_data_dir(bio) == WRITE) ? "write" : "read",
(unsigned long long) bio->bi_sector,
(unsigned long long) bio->bi_sector +
- (bio->bi_size >> 9) - 1);
+ bio_sectors(bio) - 1);
call_bio_endio(r1_bio);
}
@@ -458,7 +448,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
" %llu-%llu\n",
(unsigned long long) mbio->bi_sector,
(unsigned long long) mbio->bi_sector +
- (mbio->bi_size >> 9) - 1);
+ bio_sectors(mbio) - 1);
call_bio_endio(r1_bio);
}
}
@@ -925,7 +915,7 @@ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
if (unlikely(!bvecs))
return;
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i) {
bvecs[i] = *bvec;
bvecs[i].bv_page = alloc_page(GFP_NOIO);
if (unlikely(!bvecs[i].bv_page))
@@ -1018,7 +1008,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
md_write_start(mddev, bio); /* wait on superblock update early */
if (bio_data_dir(bio) == WRITE &&
- bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
+ bio_end_sector(bio) > mddev->suspend_lo &&
bio->bi_sector < mddev->suspend_hi) {
/* As the suspend_* range is controlled by
* userspace, we want an interruptible
@@ -1029,7 +1019,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
flush_signals(current);
prepare_to_wait(&conf->wait_barrier,
&w, TASK_INTERRUPTIBLE);
- if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
+ if (bio_end_sector(bio) <= mddev->suspend_lo ||
bio->bi_sector >= mddev->suspend_hi)
break;
schedule();
@@ -1049,7 +1039,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = bio->bi_size >> 9;
+ r1_bio->sectors = bio_sectors(bio);
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector;
@@ -1127,7 +1117,7 @@ read_again:
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r1_bio->sectors = bio_sectors(bio) - sectors_handled;
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector + sectors_handled;
@@ -1284,14 +1274,10 @@ read_again:
struct bio_vec *bvec;
int j;
- /* Yes, I really want the '__' version so that
- * we clear any unused pointer in the io_vec, rather
- * than leave them unchanged. This is important
- * because when we come to free the pages, we won't
- * know the original bi_idx, so we just free
- * them all
+ /*
+ * We trimmed the bio, so _all is legit
*/
- __bio_for_each_segment(bvec, mbio, j, 0)
+ bio_for_each_segment_all(bvec, mbio, j)
bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining);
@@ -1329,14 +1315,14 @@ read_again:
/* Mustn't call r1_bio_write_done before this next test,
* as it could result in the bio being freed.
*/
- if (sectors_handled < (bio->bi_size >> 9)) {
+ if (sectors_handled < bio_sectors(bio)) {
r1_bio_write_done(r1_bio);
/* We need another r1_bio. It has already been counted
* in bio->bi_phys_segments
*/
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio;
- r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r1_bio->sectors = bio_sectors(bio) - sectors_handled;
r1_bio->state = 0;
r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector + sectors_handled;
@@ -1862,7 +1848,7 @@ static int process_checks(struct r1bio *r1_bio)
struct bio *sbio = r1_bio->bios[i];
int size;
- if (r1_bio->bios[i]->bi_end_io != end_sync_read)
+ if (sbio->bi_end_io != end_sync_read)
continue;
if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
@@ -1887,16 +1873,15 @@ static int process_checks(struct r1bio *r1_bio)
continue;
}
/* fixup the bio for reuse */
+ bio_reset(sbio);
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
- sbio->bi_idx = 0;
- sbio->bi_phys_segments = 0;
- sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
- sbio->bi_flags |= 1 << BIO_UPTODATE;
- sbio->bi_next = NULL;
sbio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+ sbio->bi_end_io = end_sync_read;
+ sbio->bi_private = r1_bio;
+
size = sbio->bi_size;
for (j = 0; j < vcnt ; j++) {
struct bio_vec *bi;
@@ -1907,10 +1892,9 @@ static int process_checks(struct r1bio *r1_bio)
else
bi->bv_len = size;
size -= PAGE_SIZE;
- memcpy(page_address(bi->bv_page),
- page_address(pbio->bi_io_vec[j].bv_page),
- PAGE_SIZE);
}
+
+ bio_copy_data(sbio, pbio);
}
return 0;
}
@@ -1947,7 +1931,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
wbio->bi_rw = WRITE;
wbio->bi_end_io = end_sync_write;
atomic_inc(&r1_bio->remaining);
- md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
+ md_sync_acct(conf->mirrors[i].rdev->bdev, bio_sectors(wbio));
generic_make_request(wbio);
}
@@ -2059,32 +2043,11 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
}
}
-static void bi_complete(struct bio *bio, int error)
-{
- complete((struct completion *)bio->bi_private);
-}
-
-static int submit_bio_wait(int rw, struct bio *bio)
-{
- struct completion event;
- rw |= REQ_SYNC;
-
- init_completion(&event);
- bio->bi_private = &event;
- bio->bi_end_io = bi_complete;
- submit_bio(rw, bio);
- wait_for_completion(&event);
-
- return test_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
static int narrow_write_error(struct r1bio *r1_bio, int i)
{
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
struct md_rdev *rdev = conf->mirrors[i].rdev;
- int vcnt, idx;
- struct bio_vec *vec;
/* bio has the data to be written to device 'i' where
* we just recently had a write error.
@@ -2112,30 +2075,32 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
& ~(sector_t)(block_sectors - 1))
- sector;
- if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
- vcnt = r1_bio->behind_page_count;
- vec = r1_bio->behind_bvecs;
- idx = 0;
- while (vec[idx].bv_page == NULL)
- idx++;
- } else {
- vcnt = r1_bio->master_bio->bi_vcnt;
- vec = r1_bio->master_bio->bi_io_vec;
- idx = r1_bio->master_bio->bi_idx;
- }
while (sect_to_write) {
struct bio *wbio;
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors'*/
- wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
- memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
- wbio->bi_sector = r1_bio->sector;
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+ unsigned vcnt = r1_bio->behind_page_count;
+ struct bio_vec *vec = r1_bio->behind_bvecs;
+
+ while (!vec->bv_page) {
+ vec++;
+ vcnt--;
+ }
+
+ wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
+ memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
+
+ wbio->bi_vcnt = vcnt;
+ } else {
+ wbio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
+ }
+
wbio->bi_rw = WRITE;
- wbio->bi_vcnt = vcnt;
+ wbio->bi_sector = r1_bio->sector;
wbio->bi_size = r1_bio->sectors << 9;
- wbio->bi_idx = idx;
md_trim_bio(wbio, sector - r1_bio->sector, sectors);
wbio->bi_sector += rdev->data_offset;
@@ -2284,8 +2249,7 @@ read_more:
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = mbio;
- r1_bio->sectors = (mbio->bi_size >> 9)
- - sectors_handled;
+ r1_bio->sectors = bio_sectors(mbio) - sectors_handled;
r1_bio->state = 0;
set_bit(R1BIO_ReadError, &r1_bio->state);
r1_bio->mddev = mddev;
@@ -2459,18 +2423,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
for (i = 0; i < conf->raid_disks * 2; i++) {
struct md_rdev *rdev;
bio = r1_bio->bios[i];
-
- /* take from bio_init */
- bio->bi_next = NULL;
- bio->bi_flags &= ~(BIO_POOL_MASK-1);
- bio->bi_flags |= 1 << BIO_UPTODATE;
- bio->bi_rw = READ;
- bio->bi_vcnt = 0;
- bio->bi_idx = 0;
- bio->bi_phys_segments = 0;
- bio->bi_size = 0;
- bio->bi_end_io = NULL;
- bio->bi_private = NULL;
+ bio_reset(bio);
rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev == NULL ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 77b562d..e32e8b1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1169,14 +1169,13 @@ static void make_request(struct mddev *mddev, struct bio * bio)
/* If this request crosses a chunk boundary, we need to
* split it. This will only happen for 1 PAGE (or less) requests.
*/
- if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9)
+ if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
> chunk_sects
&& (conf->geo.near_copies < conf->geo.raid_disks
|| conf->prev.near_copies < conf->prev.raid_disks))) {
struct bio_pair *bp;
/* Sanity check -- queue functions should prevent this happening */
- if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
- bio->bi_idx != 0)
+ if (bio_segments(bio) > 1)
goto bad_map;
/* This is a one page bio that upper layers
* refuse to split for us, so we need to split it.
@@ -1209,7 +1208,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
bad_map:
printk("md/raid10:%s: make_request bug: can't convert block across chunks"
" or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
- (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
+ (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
bio_io_error(bio);
return;
@@ -1224,7 +1223,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
*/
wait_barrier(conf);
- sectors = bio->bi_size >> 9;
+ sectors = bio_sectors(bio);
while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
bio->bi_sector < conf->reshape_progress &&
bio->bi_sector + sectors > conf->reshape_progress) {
@@ -1326,8 +1325,7 @@ read_again:
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
r10_bio->master_bio = bio;
- r10_bio->sectors = ((bio->bi_size >> 9)
- - sectors_handled);
+ r10_bio->sectors = bio_sectors(bio) - sectors_handled;
r10_bio->state = 0;
r10_bio->mddev = mddev;
r10_bio->sector = bio->bi_sector + sectors_handled;
@@ -1569,7 +1567,7 @@ retry_write:
* after checking if we need to go around again.
*/
- if (sectors_handled < (bio->bi_size >> 9)) {
+ if (sectors_handled < bio_sectors(bio)) {
one_write_done(r10_bio);
/* We need another r10_bio. It has already been counted
* in bio->bi_phys_segments.
@@ -1577,7 +1575,7 @@ retry_write:
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
r10_bio->master_bio = bio;
- r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
+ r10_bio->sectors = bio_sectors(bio) - sectors_handled;
r10_bio->mddev = mddev;
r10_bio->sector = bio->bi_sector + sectors_handled;
@@ -2079,13 +2077,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* First we need to fixup bv_offset, bv_len and
* bi_vecs, as the read request might have corrupted these
*/
+ bio_reset(tbio);
+
tbio->bi_vcnt = vcnt;
tbio->bi_size = r10_bio->sectors << 9;
- tbio->bi_idx = 0;
- tbio->bi_phys_segments = 0;
- tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
- tbio->bi_flags |= 1 << BIO_UPTODATE;
- tbio->bi_next = NULL;
tbio->bi_rw = WRITE;
tbio->bi_private = r10_bio;
tbio->bi_sector = r10_bio->devs[i].addr;
@@ -2103,7 +2098,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
d = r10_bio->devs[i].devnum;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
- md_sync_acct(conf->mirrors[d].rdev->bdev, tbio->bi_size >> 9);
+ md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
@@ -2128,7 +2123,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
d = r10_bio->devs[i].devnum;
atomic_inc(&r10_bio->remaining);
md_sync_acct(conf->mirrors[d].replacement->bdev,
- tbio->bi_size >> 9);
+ bio_sectors(tbio));
generic_make_request(tbio);
}
@@ -2254,13 +2249,13 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
wbio2 = r10_bio->devs[1].repl_bio;
if (wbio->bi_end_io) {
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
- md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
+ md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
generic_make_request(wbio);
}
if (wbio2 && wbio2->bi_end_io) {
atomic_inc(&conf->mirrors[d].replacement->nr_pending);
md_sync_acct(conf->mirrors[d].replacement->bdev,
- wbio2->bi_size >> 9);
+ bio_sectors(wbio2));
generic_make_request(wbio2);
}
}
@@ -2531,25 +2526,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
}
}
-static void bi_complete(struct bio *bio, int error)
-{
- complete((struct completion *)bio->bi_private);
-}
-
-static int submit_bio_wait(int rw, struct bio *bio)
-{
- struct completion event;
- rw |= REQ_SYNC;
-
- init_completion(&event);
- bio->bi_private = &event;
- bio->bi_end_io = bi_complete;
- submit_bio(rw, bio);
- wait_for_completion(&event);
-
- return test_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
static int narrow_write_error(struct r10bio *r10_bio, int i)
{
struct bio *bio = r10_bio->master_bio;
@@ -2690,8 +2666,7 @@ read_more:
r10_bio = mempool_alloc(conf->r10bio_pool,
GFP_NOIO);
r10_bio->master_bio = mbio;
- r10_bio->sectors = (mbio->bi_size >> 9)
- - sectors_handled;
+ r10_bio->sectors = bio_sectors(mbio) - sectors_handled;
r10_bio->state = 0;
set_bit(R10BIO_ReadError,
&r10_bio->state);
@@ -3112,6 +3087,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
}
}
bio = r10_bio->devs[0].bio;
+ bio_reset(bio);
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
@@ -3136,6 +3112,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
rdev = mirror->rdev;
if (!test_bit(In_sync, &rdev->flags)) {
bio = r10_bio->devs[1].bio;
+ bio_reset(bio);
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
@@ -3164,6 +3141,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
if (rdev == NULL || bio == NULL ||
test_bit(Faulty, &rdev->flags))
break;
+ bio_reset(bio);
bio->bi_next = biolist;
biolist = bio;
bio->bi_private = r10_bio;
@@ -3262,7 +3240,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
bio = r10_bio->devs[i].bio;
- bio->bi_end_io = NULL;
+ bio_reset(bio);
clear_bit(BIO_UPTODATE, &bio->bi_flags);
if (conf->mirrors[d].rdev == NULL ||
test_bit(Faulty, &conf->mirrors[d].rdev->flags))
@@ -3299,6 +3277,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* Need to set up for writing to the replacement */
bio = r10_bio->devs[i].repl_bio;
+ bio_reset(bio);
clear_bit(BIO_UPTODATE, &bio->bi_flags);
sector = r10_bio->devs[i].addr;
@@ -3332,17 +3311,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
}
}
- for (bio = biolist; bio ; bio=bio->bi_next) {
-
- bio->bi_flags &= ~(BIO_POOL_MASK - 1);
- if (bio->bi_end_io)
- bio->bi_flags |= 1 << BIO_UPTODATE;
- bio->bi_vcnt = 0;
- bio->bi_idx = 0;
- bio->bi_phys_segments = 0;
- bio->bi_size = 0;
- }
-
nr_sectors = 0;
if (sector_nr + max_sync < max_sector)
max_sector = sector_nr + max_sync;
@@ -4389,7 +4357,6 @@ read_more:
read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
read_bio->bi_flags |= 1 << BIO_UPTODATE;
read_bio->bi_vcnt = 0;
- read_bio->bi_idx = 0;
read_bio->bi_size = 0;
r10_bio->master_bio = read_bio;
r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
@@ -4413,17 +4380,14 @@ read_more:
}
if (!rdev2 || test_bit(Faulty, &rdev2->flags))
continue;
+
+ bio_reset(b);
b->bi_bdev = rdev2->bdev;
b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset;
b->bi_private = r10_bio;
b->bi_end_io = end_reshape_write;
b->bi_rw = WRITE;
- b->bi_flags &= ~(BIO_POOL_MASK - 1);
- b->bi_flags |= 1 << BIO_UPTODATE;
b->bi_next = blist;
- b->bi_vcnt = 0;
- b->bi_idx = 0;
- b->bi_size = 0;
blist = b;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3ee2912..7bbd285 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -90,7 +90,7 @@ static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
*/
static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
{
- int sectors = bio->bi_size >> 9;
+ int sectors = bio_sectors(bio);
if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
return bio->bi_next;
else
@@ -567,14 +567,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi = &sh->dev[i].req;
rbi = &sh->dev[i].rreq; /* For writing to replacement */
- bi->bi_rw = rw;
- rbi->bi_rw = rw;
- if (rw & WRITE) {
- bi->bi_end_io = raid5_end_write_request;
- rbi->bi_end_io = raid5_end_write_request;
- } else
- bi->bi_end_io = raid5_end_read_request;
-
rcu_read_lock();
rrdev = rcu_dereference(conf->disks[i].replacement);
smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
@@ -649,7 +641,14 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
set_bit(STRIPE_IO_STARTED, &sh->state);
+ bio_reset(bi);
bi->bi_bdev = rdev->bdev;
+ bi->bi_rw = rw;
+ bi->bi_end_io = (rw & WRITE)
+ ? raid5_end_write_request
+ : raid5_end_read_request;
+ bi->bi_private = sh;
+
pr_debug("%s: for %llu schedule op %ld on disc %d\n",
__func__, (unsigned long long)sh->sector,
bi->bi_rw, i);
@@ -663,12 +662,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
bi->bi_rw |= REQ_FLUSH;
- bi->bi_flags = 1 << BIO_UPTODATE;
- bi->bi_idx = 0;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
- bi->bi_next = NULL;
if (rrdev)
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
@@ -683,7 +679,13 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
set_bit(STRIPE_IO_STARTED, &sh->state);
+ bio_reset(rbi);
rbi->bi_bdev = rrdev->bdev;
+ rbi->bi_rw = rw;
+ BUG_ON(!(rw & WRITE));
+ rbi->bi_end_io = raid5_end_write_request;
+ rbi->bi_private = sh;
+
pr_debug("%s: for %llu schedule op %ld on "
"replacement disc %d\n",
__func__, (unsigned long long)sh->sector,
@@ -695,12 +697,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
else
rbi->bi_sector = (sh->sector
+ rrdev->data_offset);
- rbi->bi_flags = 1 << BIO_UPTODATE;
- rbi->bi_idx = 0;
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_size = STRIPE_SIZE;
- rbi->bi_next = NULL;
trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
rbi, disk_devt(conf->mddev->gendisk),
sh->dev[i].sector);
@@ -2384,11 +2383,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
} else
bip = &sh->dev[dd_idx].toread;
while (*bip && (*bip)->bi_sector < bi->bi_sector) {
- if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
+ if (bio_end_sector(*bip) > bi->bi_sector)
goto overlap;
bip = & (*bip)->bi_next;
}
- if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9))
+ if (*bip && (*bip)->bi_sector < bio_end_sector(bi))
goto overlap;
BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
@@ -2404,8 +2403,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
bi && bi->bi_sector <= sector;
bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
- if (bi->bi_sector + (bi->bi_size>>9) >= sector)
- sector = bi->bi_sector + (bi->bi_size>>9);
+ if (bio_end_sector(bi) >= sector)
+ sector = bio_end_sector(bi);
}
if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
@@ -3804,7 +3803,7 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
{
sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
unsigned int chunk_sectors = mddev->chunk_sectors;
- unsigned int bio_sectors = bio->bi_size >> 9;
+ unsigned int bio_sectors = bio_sectors(bio);
if (mddev->new_chunk_sectors < mddev->chunk_sectors)
chunk_sectors = mddev->new_chunk_sectors;
@@ -3894,7 +3893,7 @@ static int bio_fits_rdev(struct bio *bi)
{
struct request_queue *q = bdev_get_queue(bi->bi_bdev);
- if ((bi->bi_size>>9) > queue_max_sectors(q))
+ if (bio_sectors(bi) > queue_max_sectors(q))
return 0;
blk_recount_segments(q, bi);
if (bi->bi_phys_segments > queue_max_segments(q))
@@ -3941,7 +3940,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
0,
&dd_idx, NULL);
- end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9);
+ end_sector = bio_end_sector(align_bi);
rcu_read_lock();
rdev = rcu_dereference(conf->disks[dd_idx].replacement);
if (!rdev || test_bit(Faulty, &rdev->flags) ||
@@ -3964,7 +3963,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
if (!bio_fits_rdev(align_bi) ||
- is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
+ is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi),
&first_bad, &bad_sectors)) {
/* too big in some way, or has a known bad block */
bio_put(align_bi);
@@ -4216,7 +4215,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
}
logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
- last_sector = bi->bi_sector + (bi->bi_size>>9);
+ last_sector = bio_end_sector(bi);
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
@@ -4679,7 +4678,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
sector = raid5_compute_sector(conf, logical_sector,
0, &dd_idx, NULL);
- last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
+ last_sector = bio_end_sector(raid_bio);
for (; logical_sector < last_sector;
logical_sector += STRIPE_SECTORS,
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index fa43c39..2bb0154 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -2235,10 +2235,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
}
/* do we need to support multiple segments? */
- if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
+ if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n",
- ioc->name, __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
- rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
+ ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req),
+ bio_segments(rsp->bio), blk_rq_bytes(rsp));
return -EINVAL;
}
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index b6ad0de..12d08b4 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -826,8 +826,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0)
/* Request is not page-aligned. */
goto fail;
- if (((bio->bi_size >> 9) + bio->bi_sector)
- > get_capacity(bio->bi_bdev->bd_disk)) {
+ if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) {
/* Request beyond end of DCSS segment. */
goto fail;
}
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index aec2e0d..7af7767 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2151,10 +2151,10 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
}
/* do we need to support multiple segments? */
- if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) {
+ if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
printk("%s: multiple segments req %u %u, rsp %u %u\n",
- __func__, req->bio->bi_vcnt, blk_rq_bytes(req),
- rsp->bio->bi_vcnt, blk_rq_bytes(rsp));
+ __func__, bio_segments(req->bio), blk_rq_bytes(req),
+ bio_segments(rsp->bio), blk_rq_bytes(rsp));
return -EINVAL;
}
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index 8c2ffbe..193e7ae 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1939,7 +1939,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
ioc->transport_cmds.status = MPT2_CMD_PENDING;
/* Check if the request is split across multiple segments */
- if (req->bio->bi_vcnt > 1) {
+ if (bio_segments(req->bio) > 1) {
u32 offset = 0;
/* Allocate memory and copy the request */
@@ -1971,7 +1971,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
/* Check if the response needs to be populated across
* multiple segments */
- if (rsp->bio->bi_vcnt > 1) {
+ if (bio_segments(rsp->bio) > 1) {
pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp),
&pci_dma_in);
if (!pci_addr_in) {
@@ -2038,7 +2038,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
- if (req->bio->bi_vcnt > 1) {
+ if (bio_segments(req->bio) > 1) {
ioc->base_add_sg_single(psge, sgl_flags |
(blk_rq_bytes(req) - 4), pci_dma_out);
} else {
@@ -2054,7 +2054,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
MPI2_SGE_FLAGS_END_OF_LIST);
sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
- if (rsp->bio->bi_vcnt > 1) {
+ if (bio_segments(rsp->bio) > 1) {
ioc->base_add_sg_single(psge, sgl_flags |
(blk_rq_bytes(rsp) + 4), pci_dma_in);
} else {
@@ -2099,7 +2099,7 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
le16_to_cpu(mpi_reply->ResponseDataLength);
/* check if the resp needs to be copied from the allocated
* pci mem */
- if (rsp->bio->bi_vcnt > 1) {
+ if (bio_segments(rsp->bio) > 1) {
u32 offset = 0;
u32 bytes_to_copy =
le16_to_cpu(mpi_reply->ResponseDataLength);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index a3f28f3..8fb42916 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -27,48 +27,11 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
-struct integrity_slab {
- struct kmem_cache *slab;
- unsigned short nr_vecs;
- char name[8];
-};
-
-#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
-struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
- IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
-};
-#undef IS
+#define BIP_INLINE_VECS 4
+static struct kmem_cache *bip_slab;
static struct workqueue_struct *kintegrityd_wq;
-static inline unsigned int vecs_to_idx(unsigned int nr)
-{
- switch (nr) {
- case 1:
- return 0;
- case 2 ... 4:
- return 1;
- case 5 ... 16:
- return 2;
- case 17 ... 64:
- return 3;
- case 65 ... 128:
- return 4;
- case 129 ... BIO_MAX_PAGES:
- return 5;
- default:
- BUG();
- }
-}
-
-static inline int use_bip_pool(unsigned int idx)
-{
- if (idx == BIOVEC_MAX_IDX)
- return 1;
-
- return 0;
-}
-
/**
* bio_integrity_alloc - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to
@@ -84,37 +47,41 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
unsigned int nr_vecs)
{
struct bio_integrity_payload *bip;
- unsigned int idx = vecs_to_idx(nr_vecs);
struct bio_set *bs = bio->bi_pool;
-
- if (!bs)
- bs = fs_bio_set;
-
- BUG_ON(bio == NULL);
- bip = NULL;
-
- /* Lower order allocations come straight from slab */
- if (!use_bip_pool(idx))
- bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
-
- /* Use mempool if lower order alloc failed or max vecs were requested */
- if (bip == NULL) {
- idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */
+ unsigned long idx = BIO_POOL_NONE;
+ unsigned inline_vecs;
+
+ if (!bs) {
+ bip = kmalloc(sizeof(struct bio_integrity_payload) +
+ sizeof(struct bio_vec) * nr_vecs, gfp_mask);
+ inline_vecs = nr_vecs;
+ } else {
bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
-
- if (unlikely(bip == NULL)) {
- printk(KERN_ERR "%s: could not alloc bip\n", __func__);
- return NULL;
- }
+ inline_vecs = BIP_INLINE_VECS;
}
+ if (unlikely(!bip))
+ return NULL;
+
memset(bip, 0, sizeof(*bip));
+ if (nr_vecs > inline_vecs) {
+ bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
+ bs->bvec_integrity_pool);
+ if (!bip->bip_vec)
+ goto err;
+ } else {
+ bip->bip_vec = bip->bip_inline_vecs;
+ }
+
bip->bip_slab = idx;
bip->bip_bio = bio;
bio->bi_integrity = bip;
return bip;
+err:
+ mempool_free(bip, bs->bio_integrity_pool);
+ return NULL;
}
EXPORT_SYMBOL(bio_integrity_alloc);
@@ -130,20 +97,18 @@ void bio_integrity_free(struct bio *bio)
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_set *bs = bio->bi_pool;
- if (!bs)
- bs = fs_bio_set;
-
- BUG_ON(bip == NULL);
-
- /* A cloned bio doesn't own the integrity metadata */
- if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
- && bip->bip_buf != NULL)
+ if (bip->bip_owns_buf)
kfree(bip->bip_buf);
- if (use_bip_pool(bip->bip_slab))
+ if (bs) {
+ if (bip->bip_slab != BIO_POOL_NONE)
+ bvec_free(bs->bvec_integrity_pool, bip->bip_vec,
+ bip->bip_slab);
+
mempool_free(bip, bs->bio_integrity_pool);
- else
- kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
+ } else {
+ kfree(bip);
+ }
bio->bi_integrity = NULL;
}
@@ -419,6 +384,7 @@ int bio_integrity_prep(struct bio *bio)
return -EIO;
}
+ bip->bip_owns_buf = 1;
bip->bip_buf = buf;
bip->bip_size = len;
bip->bip_sector = bio->bi_sector;
@@ -694,11 +660,11 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
bp->bio1.bi_integrity = &bp->bip1;
bp->bio2.bi_integrity = &bp->bip2;
- bp->iv1 = bip->bip_vec[0];
- bp->iv2 = bip->bip_vec[0];
+ bp->iv1 = bip->bip_vec[bip->bip_idx];
+ bp->iv2 = bip->bip_vec[bip->bip_idx];
- bp->bip1.bip_vec[0] = bp->iv1;
- bp->bip2.bip_vec[0] = bp->iv2;
+ bp->bip1.bip_vec = &bp->iv1;
+ bp->bip2.bip_vec = &bp->iv2;
bp->iv1.bv_len = sectors * bi->tuple_size;
bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -746,13 +712,14 @@ EXPORT_SYMBOL(bio_integrity_clone);
int bioset_integrity_create(struct bio_set *bs, int pool_size)
{
- unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
-
if (bs->bio_integrity_pool)
return 0;
- bs->bio_integrity_pool =
- mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
+ bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
+
+ bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
+ if (!bs->bvec_integrity_pool)
+ return -1;
if (!bs->bio_integrity_pool)
return -1;
@@ -765,13 +732,14 @@ void bioset_integrity_free(struct bio_set *bs)
{
if (bs->bio_integrity_pool)
mempool_destroy(bs->bio_integrity_pool);
+
+ if (bs->bvec_integrity_pool)
+ mempool_destroy(bs->bio_integrity_pool);
}
EXPORT_SYMBOL(bioset_integrity_free);
void __init bio_integrity_init(void)
{
- unsigned int i;
-
/*
* kintegrityd won't block much but may burn a lot of CPU cycles.
* Make it highpri CPU intensive wq with max concurrency of 1.
@@ -781,14 +749,10 @@ void __init bio_integrity_init(void)
if (!kintegrityd_wq)
panic("Failed to create kintegrityd\n");
- for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
- unsigned int size;
-
- size = sizeof(struct bio_integrity_payload)
- + bip_slab[i].nr_vecs * sizeof(struct bio_vec);
-
- bip_slab[i].slab =
- kmem_cache_create(bip_slab[i].name, size, 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- }
+ bip_slab = kmem_cache_create("bio_integrity_payload",
+ sizeof(struct bio_integrity_payload) +
+ sizeof(struct bio_vec) * BIP_INLINE_VECS,
+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ if (!bip_slab)
+ panic("Failed to create slab\n");
}
diff --git a/fs/bio.c b/fs/bio.c
index bb5768f..9238a54 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -160,12 +160,12 @@ unsigned int bvec_nr_vecs(unsigned short idx)
return bvec_slabs[idx].nr_vecs;
}
-void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
+void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
{
BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
if (idx == BIOVEC_MAX_IDX)
- mempool_free(bv, bs->bvec_pool);
+ mempool_free(bv, pool);
else {
struct biovec_slab *bvs = bvec_slabs + idx;
@@ -173,8 +173,8 @@ void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
}
}
-struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
- struct bio_set *bs)
+struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
+ mempool_t *pool)
{
struct bio_vec *bvl;
@@ -210,7 +210,7 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
*/
if (*idx == BIOVEC_MAX_IDX) {
fallback:
- bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
+ bvl = mempool_alloc(pool, gfp_mask);
} else {
struct biovec_slab *bvs = bvec_slabs + *idx;
gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
@@ -252,8 +252,8 @@ static void bio_free(struct bio *bio)
__bio_free(bio);
if (bs) {
- if (bio_has_allocated_vec(bio))
- bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
+ if (bio_flagged(bio, BIO_OWNS_VEC))
+ bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
/*
* If we have front padding, adjust the bio pointer before freeing
@@ -297,6 +297,54 @@ void bio_reset(struct bio *bio)
}
EXPORT_SYMBOL(bio_reset);
+static void bio_alloc_rescue(struct work_struct *work)
+{
+ struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
+ struct bio *bio;
+
+ while (1) {
+ spin_lock(&bs->rescue_lock);
+ bio = bio_list_pop(&bs->rescue_list);
+ spin_unlock(&bs->rescue_lock);
+
+ if (!bio)
+ break;
+
+ generic_make_request(bio);
+ }
+}
+
+static void punt_bios_to_rescuer(struct bio_set *bs)
+{
+ struct bio_list punt, nopunt;
+ struct bio *bio;
+
+ /*
+ * In order to guarantee forward progress we must punt only bios that
+ * were allocated from this bio_set; otherwise, if there was a bio on
+ * there for a stacking driver higher up in the stack, processing it
+ * could require allocating bios from this bio_set, and doing that from
+ * our own rescuer would be bad.
+ *
+ * Since bio lists are singly linked, pop them all instead of trying to
+ * remove from the middle of the list:
+ */
+
+ bio_list_init(&punt);
+ bio_list_init(&nopunt);
+
+ while ((bio = bio_list_pop(current->bio_list)))
+ bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+
+ *current->bio_list = nopunt;
+
+ spin_lock(&bs->rescue_lock);
+ bio_list_merge(&bs->rescue_list, &punt);
+ spin_unlock(&bs->rescue_lock);
+
+ queue_work(bs->rescue_workqueue, &bs->rescue_work);
+}
+
/**
* bio_alloc_bioset - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
@@ -314,11 +362,27 @@ EXPORT_SYMBOL(bio_reset);
* previously allocated bio for IO before attempting to allocate a new one.
* Failure to do so can cause deadlocks under memory pressure.
*
+ * Note that when running under generic_make_request() (i.e. any block
+ * driver), bios are not submitted until after you return - see the code in
+ * generic_make_request() that converts recursion into iteration, to prevent
+ * stack overflows.
+ *
+ * This would normally mean allocating multiple bios under
+ * generic_make_request() would be susceptible to deadlocks, but we have
+ * deadlock avoidance code that resubmits any blocked bios from a rescuer
+ * thread.
+ *
+ * However, we do not guarantee forward progress for allocations from other
+ * mempools. Doing multiple allocations from the same mempool under
+ * generic_make_request() should be avoided - instead, use bio_set's front_pad
+ * for per bio allocations.
+ *
* RETURNS:
* Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
+ gfp_t saved_gfp = gfp_mask;
unsigned front_pad;
unsigned inline_vecs;
unsigned long idx = BIO_POOL_NONE;
@@ -336,7 +400,37 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
front_pad = 0;
inline_vecs = nr_iovecs;
} else {
+ /*
+ * generic_make_request() converts recursion to iteration; this
+ * means if we're running beneath it, any bios we allocate and
+ * submit will not be submitted (and thus freed) until after we
+ * return.
+ *
+ * This exposes us to a potential deadlock if we allocate
+ * multiple bios from the same bio_set() while running
+ * underneath generic_make_request(). If we were to allocate
+ * multiple bios (say a stacking block driver that was splitting
+ * bios), we would deadlock if we exhausted the mempool's
+ * reserve.
+ *
+ * We solve this, and guarantee forward progress, with a rescuer
+ * workqueue per bio_set. If we go to allocate and there are
+ * bios on current->bio_list, we first try the allocation
+ * without __GFP_WAIT; if that fails, we punt those bios we
+ * would be blocking to the rescuer workqueue before we retry
+ * with the original gfp_flags.
+ */
+
+ if (current->bio_list && !bio_list_empty(current->bio_list))
+ gfp_mask &= ~__GFP_WAIT;
+
p = mempool_alloc(bs->bio_pool, gfp_mask);
+ if (!p && gfp_mask != saved_gfp) {
+ punt_bios_to_rescuer(bs);
+ gfp_mask = saved_gfp;
+ p = mempool_alloc(bs->bio_pool, gfp_mask);
+ }
+
front_pad = bs->front_pad;
inline_vecs = BIO_INLINE_VECS;
}
@@ -348,9 +442,17 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
bio_init(bio);
if (nr_iovecs > inline_vecs) {
- bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+ bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
+ if (!bvl && gfp_mask != saved_gfp) {
+ punt_bios_to_rescuer(bs);
+ gfp_mask = saved_gfp;
+ bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
+ }
+
if (unlikely(!bvl))
goto err_free;
+
+ bio->bi_flags |= 1 << BIO_OWNS_VEC;
} else if (nr_iovecs) {
bvl = bio->bi_inline_vecs;
}
@@ -652,6 +754,181 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
}
EXPORT_SYMBOL(bio_add_page);
+struct submit_bio_ret {
+ struct completion event;
+ int error;
+};
+
+static void submit_bio_wait_endio(struct bio *bio, int error)
+{
+ struct submit_bio_ret *ret = bio->bi_private;
+
+ ret->error = error;
+ complete(&ret->event);
+}
+
+/**
+ * submit_bio_wait - submit a bio, and wait until it completes
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bio: The &struct bio which describes the I/O
+ *
+ * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
+ * bio_endio() on failure.
+ */
+int submit_bio_wait(int rw, struct bio *bio)
+{
+ struct submit_bio_ret ret;
+
+ rw |= REQ_SYNC;
+ init_completion(&ret.event);
+ bio->bi_private = &ret;
+ bio->bi_end_io = submit_bio_wait_endio;
+ submit_bio(rw, bio);
+ wait_for_completion(&ret.event);
+
+ return ret.error;
+}
+EXPORT_SYMBOL(submit_bio_wait);
+
+/**
+ * bio_advance - increment/complete a bio by some number of bytes
+ * @bio: bio to advance
+ * @bytes: number of bytes to complete
+ *
+ * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
+ * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
+ * be updated on the last bvec as well.
+ *
+ * @bio will then represent the remaining, uncompleted portion of the io.
+ */
+void bio_advance(struct bio *bio, unsigned bytes)
+{
+ if (bio_integrity(bio))
+ bio_integrity_advance(bio, bytes);
+
+ bio->bi_sector += bytes >> 9;
+ bio->bi_size -= bytes;
+
+ if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
+ return;
+
+ while (bytes) {
+ if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
+ WARN_ONCE(1, "bio idx %d >= vcnt %d\n",
+ bio->bi_idx, bio->bi_vcnt);
+ break;
+ }
+
+ if (bytes >= bio_iovec(bio)->bv_len) {
+ bytes -= bio_iovec(bio)->bv_len;
+ bio->bi_idx++;
+ } else {
+ bio_iovec(bio)->bv_len -= bytes;
+ bio_iovec(bio)->bv_offset += bytes;
+ bytes = 0;
+ }
+ }
+}
+EXPORT_SYMBOL(bio_advance);
+
+/**
+ * bio_alloc_pages - allocates a single page for each bvec in a bio
+ * @bio: bio to allocate pages for
+ * @gfp_mask: flags for allocation
+ *
+ * Allocates pages up to @bio->bi_vcnt.
+ *
+ * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
+ * freed.
+ */
+int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
+{
+ int i;
+ struct bio_vec *bv;
+
+ bio_for_each_segment_all(bv, bio, i) {
+ bv->bv_page = alloc_page(gfp_mask);
+ if (!bv->bv_page) {
+ while (--bv >= bio->bi_io_vec)
+ __free_page(bv->bv_page);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(bio_alloc_pages);
+
+/**
+ * bio_copy_data - copy contents of data buffers from one chain of bios to
+ * another
+ * @src: source bio list
+ * @dst: destination bio list
+ *
+ * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
+ * @src and @dst as linked lists of bios.
+ *
+ * Stops when it reaches the end of either @src or @dst - that is, copies
+ * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
+ */
+void bio_copy_data(struct bio *dst, struct bio *src)
+{
+ struct bio_vec *src_bv, *dst_bv;
+ unsigned src_offset, dst_offset, bytes;
+ void *src_p, *dst_p;
+
+ src_bv = bio_iovec(src);
+ dst_bv = bio_iovec(dst);
+
+ src_offset = src_bv->bv_offset;
+ dst_offset = dst_bv->bv_offset;
+
+ while (1) {
+ if (src_offset == src_bv->bv_offset + src_bv->bv_len) {
+ src_bv++;
+ if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) {
+ src = src->bi_next;
+ if (!src)
+ break;
+
+ src_bv = bio_iovec(src);
+ }
+
+ src_offset = src_bv->bv_offset;
+ }
+
+ if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) {
+ dst_bv++;
+ if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) {
+ dst = dst->bi_next;
+ if (!dst)
+ break;
+
+ dst_bv = bio_iovec(dst);
+ }
+
+ dst_offset = dst_bv->bv_offset;
+ }
+
+ bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset,
+ src_bv->bv_offset + src_bv->bv_len - src_offset);
+
+ src_p = kmap_atomic(src_bv->bv_page);
+ dst_p = kmap_atomic(dst_bv->bv_page);
+
+ memcpy(dst_p + dst_bv->bv_offset,
+ src_p + src_bv->bv_offset,
+ bytes);
+
+ kunmap_atomic(dst_p);
+ kunmap_atomic(src_p);
+
+ src_offset += bytes;
+ dst_offset += bytes;
+ }
+}
+EXPORT_SYMBOL(bio_copy_data);
+
struct bio_map_data {
struct bio_vec *iovecs;
struct sg_iovec *sgvecs;
@@ -714,7 +991,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
int iov_idx = 0;
unsigned int iov_off = 0;
- __bio_for_each_segment(bvec, bio, i, 0) {
+ bio_for_each_segment_all(bvec, bio, i) {
char *bv_addr = page_address(bvec->bv_page);
unsigned int bv_len = iovecs[i].bv_len;
@@ -896,7 +1173,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
return bio;
cleanup:
if (!map_data)
- bio_for_each_segment(bvec, bio, i)
+ bio_for_each_segment_all(bvec, bio, i)
__free_page(bvec->bv_page);
bio_put(bio);
@@ -1110,7 +1387,7 @@ static void __bio_unmap_user(struct bio *bio)
/*
* make sure we dirty pages we wrote to
*/
- __bio_for_each_segment(bvec, bio, i, 0) {
+ bio_for_each_segment_all(bvec, bio, i) {
if (bio_data_dir(bio) == READ)
set_page_dirty_lock(bvec->bv_page);
@@ -1216,7 +1493,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
int i;
char *p = bmd->sgvecs[0].iov_base;
- __bio_for_each_segment(bvec, bio, i, 0) {
+ bio_for_each_segment_all(bvec, bio, i) {
char *addr = page_address(bvec->bv_page);
int len = bmd->iovecs[i].bv_len;
@@ -1256,7 +1533,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
if (!reading) {
void *p = data;
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, i) {
char *addr = page_address(bvec->bv_page);
memcpy(addr, p, bvec->bv_len);
@@ -1301,11 +1578,11 @@ EXPORT_SYMBOL(bio_copy_kern);
*/
void bio_set_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec *bvec;
int i;
- for (i = 0; i < bio->bi_vcnt; i++) {
- struct page *page = bvec[i].bv_page;
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
if (page && !PageCompound(page))
set_page_dirty_lock(page);
@@ -1314,11 +1591,11 @@ void bio_set_pages_dirty(struct bio *bio)
static void bio_release_pages(struct bio *bio)
{
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec *bvec;
int i;
- for (i = 0; i < bio->bi_vcnt; i++) {
- struct page *page = bvec[i].bv_page;
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
if (page)
put_page(page);
@@ -1367,16 +1644,16 @@ static void bio_dirty_fn(struct work_struct *work)
void bio_check_pages_dirty(struct bio *bio)
{
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec *bvec;
int nr_clean_pages = 0;
int i;
- for (i = 0; i < bio->bi_vcnt; i++) {
- struct page *page = bvec[i].bv_page;
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
if (PageDirty(page) || PageCompound(page)) {
page_cache_release(page);
- bvec[i].bv_page = NULL;
+ bvec->bv_page = NULL;
} else {
nr_clean_pages++;
}
@@ -1479,8 +1756,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
bi->bi_sector + first_sectors);
- BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0);
- BUG_ON(bi->bi_idx != 0);
+ BUG_ON(bio_segments(bi) > 1);
atomic_set(&bp->cnt, 3);
bp->error = 0;
bp->bio1 = *bi;
@@ -1490,8 +1766,8 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
bp->bio1.bi_size = first_sectors << 9;
if (bi->bi_vcnt != 0) {
- bp->bv1 = bi->bi_io_vec[0];
- bp->bv2 = bi->bi_io_vec[0];
+ bp->bv1 = *bio_iovec(bi);
+ bp->bv2 = *bio_iovec(bi);
if (bio_is_rw(bi)) {
bp->bv2.bv_offset += first_sectors << 9;
@@ -1543,7 +1819,7 @@ sector_t bio_sector_offset(struct bio *bio, unsigned short index,
if (index >= bio->bi_idx)
index = bio->bi_vcnt - 1;
- __bio_for_each_segment(bv, bio, i, 0) {
+ bio_for_each_segment_all(bv, bio, i) {
if (i == index) {
if (offset > bv->bv_offset)
sectors += (offset - bv->bv_offset) / sector_sz;
@@ -1561,29 +1837,25 @@ EXPORT_SYMBOL(bio_sector_offset);
* create memory pools for biovec's in a bio_set.
* use the global biovec slabs created for general use.
*/
-static int biovec_create_pools(struct bio_set *bs, int pool_entries)
+mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries)
{
struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
- bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
- if (!bs->bvec_pool)
- return -ENOMEM;
-
- return 0;
-}
-
-static void biovec_free_pools(struct bio_set *bs)
-{
- mempool_destroy(bs->bvec_pool);
+ return mempool_create_slab_pool(pool_entries, bp->slab);
}
void bioset_free(struct bio_set *bs)
{
+ if (bs->rescue_workqueue)
+ destroy_workqueue(bs->rescue_workqueue);
+
if (bs->bio_pool)
mempool_destroy(bs->bio_pool);
+ if (bs->bvec_pool)
+ mempool_destroy(bs->bvec_pool);
+
bioset_integrity_free(bs);
- biovec_free_pools(bs);
bio_put_slab(bs);
kfree(bs);
@@ -1614,6 +1886,10 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
bs->front_pad = front_pad;
+ spin_lock_init(&bs->rescue_lock);
+ bio_list_init(&bs->rescue_list);
+ INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
+
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
if (!bs->bio_slab) {
kfree(bs);
@@ -1624,9 +1900,15 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
if (!bs->bio_pool)
goto bad;
- if (!biovec_create_pools(bs, pool_size))
- return bs;
+ bs->bvec_pool = biovec_create_pool(bs, pool_size);
+ if (!bs->bvec_pool)
+ goto bad;
+
+ bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
+ if (!bs->rescue_workqueue)
+ goto bad;
+ return bs;
bad:
bioset_free(bs);
return NULL;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5a..bed072a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2527,8 +2527,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (old_compressed)
contig = bio->bi_sector == sector;
else
- contig = bio->bi_sector + (bio->bi_size >> 9) ==
- sector;
+ contig = bio_end_sector(bio) == sector;
if (prev_bio_flags != bio_flags || !contig ||
merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5989a92..d90e048 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5166,7 +5166,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
}
prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if ((bio->bi_size >> 9) > max_sectors)
+ if (bio_sectors(bio) > max_sectors)
return 0;
if (!q->merge_bvec_fn)
diff --git a/fs/buffer.c b/fs/buffer.c
index b4dcb34..ecd3792 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2979,7 +2979,6 @@ int submit_bh(int rw, struct buffer_head * bh)
bio->bi_io_vec[0].bv_offset = bh_offset(bh);
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
bio->bi_size = bh->b_size;
bio->bi_end_io = end_bio_bh_io_sync;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f853263..38484b0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -441,8 +441,8 @@ static struct bio *dio_await_one(struct dio *dio)
static int dio_bio_complete(struct dio *dio, struct bio *bio)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec;
- int page_no;
+ struct bio_vec *bvec;
+ unsigned i;
if (!uptodate)
dio->io_error = -EIO;
@@ -450,8 +450,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
if (dio->is_async && dio->rw == READ) {
bio_check_pages_dirty(bio); /* transfers ownership */
} else {
- for (page_no = 0; page_no < bio->bi_vcnt; page_no++) {
- struct page *page = bvec[page_no].bv_page;
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
if (dio->rw == READ && !PageCompound(page))
set_page_dirty_lock(page);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index f936cb5..b744228 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -401,7 +401,7 @@ static void _clear_bio(struct bio *bio)
struct bio_vec *bv;
unsigned i;
- __bio_for_each_segment(bv, bio, i, 0) {
+ bio_for_each_segment_all(bv, bio, i) {
unsigned this_count = bv->bv_len;
if (likely(PAGE_SIZE == this_count))
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index b963f38..7682b97 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -432,7 +432,7 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
if (!bio)
continue;
- __bio_for_each_segment(bv, bio, i, 0) {
+ bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
SetPageUptodate(page);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index a505597..5c37ef9 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -300,7 +300,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
u64 nblk;
if (bio) {
- nblk = bio->bi_sector + bio_sectors(bio);
+ nblk = bio_end_sector(bio);
nblk >>= sdp->sd_fsb2bb_shift;
if (blkno == nblk)
return bio;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 2eb952c..8ae5e35 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2004,7 +2004,6 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bio->bi_io_vec[0].bv_offset = bp->l_offset;
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
bio->bi_size = LOGPSIZE;
bio->bi_end_io = lbmIODone;
@@ -2145,7 +2144,6 @@ static void lbmStartIO(struct lbuf * bp)
bio->bi_io_vec[0].bv_offset = bp->l_offset;
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
bio->bi_size = LOGPSIZE;
bio->bi_end_io = lbmIODone;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index e784a21..550475c 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -32,7 +32,6 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
bio_vec.bv_len = PAGE_SIZE;
bio_vec.bv_offset = 0;
bio.bi_vcnt = 1;
- bio.bi_idx = 0;
bio.bi_size = PAGE_SIZE;
bio.bi_bdev = bdev;
bio.bi_sector = page->index * (PAGE_SIZE >> 9);
@@ -108,7 +107,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
if (i >= max_pages) {
/* Block layer cannot split bios :( */
bio->bi_vcnt = i;
- bio->bi_idx = 0;
bio->bi_size = i * PAGE_SIZE;
bio->bi_bdev = super->s_bdev;
bio->bi_sector = ofs >> 9;
@@ -136,7 +134,6 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
unlock_page(page);
}
bio->bi_vcnt = nr_pages;
- bio->bi_idx = 0;
bio->bi_size = nr_pages * PAGE_SIZE;
bio->bi_bdev = super->s_bdev;
bio->bi_sector = ofs >> 9;
@@ -202,7 +199,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
if (i >= max_pages) {
/* Block layer cannot split bios :( */
bio->bi_vcnt = i;
- bio->bi_idx = 0;
bio->bi_size = i * PAGE_SIZE;
bio->bi_bdev = super->s_bdev;
bio->bi_sector = ofs >> 9;
@@ -224,7 +220,6 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
bio->bi_io_vec[i].bv_offset = 0;
}
bio->bi_vcnt = nr_pages;
- bio->bi_idx = 0;
bio->bi_size = nr_pages * PAGE_SIZE;
bio->bi_bdev = super->s_bdev;
bio->bi_sector = ofs >> 9;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 820e7aa..ef24466 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -67,6 +67,7 @@
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
+#define bio_end_sector(bio) ((bio)->bi_sector + bio_sectors((bio)))
static inline unsigned int bio_cur_bytes(struct bio *bio)
{
@@ -84,11 +85,6 @@ static inline void *bio_data(struct bio *bio)
return NULL;
}
-static inline int bio_has_allocated_vec(struct bio *bio)
-{
- return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
-}
-
/*
* will die
*/
@@ -136,16 +132,27 @@ static inline int bio_has_allocated_vec(struct bio *bio)
#define bio_io_error(bio) bio_endio((bio), -EIO)
/*
- * drivers should not use the __ version unless they _really_ want to
- * run through the entire bio and not just pending pieces
+ * drivers should not use the __ version unless they _really_ know what
+ * they're doing
*/
#define __bio_for_each_segment(bvl, bio, i, start_idx) \
for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \
i < (bio)->bi_vcnt; \
bvl++, i++)
+/*
+ * drivers should _never_ use the all version - the bio may have been split
+ * before it got to the driver and the driver won't own all of it
+ */
+#define bio_for_each_segment_all(bvl, bio, i) \
+ for (i = 0; \
+ bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \
+ i++)
+
#define bio_for_each_segment(bvl, bio, i) \
- __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx)
+ for (i = (bio)->bi_idx; \
+ bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt; \
+ i++)
/*
* get a reference to a bio, so it won't disappear. the intended use is
@@ -180,9 +187,12 @@ struct bio_integrity_payload {
unsigned short bip_slab; /* slab the bip came from */
unsigned short bip_vcnt; /* # of integrity bio_vecs */
unsigned short bip_idx; /* current bip_vec index */
+ unsigned bip_owns_buf:1; /* should free bip_buf */
struct work_struct bip_work; /* I/O completion */
- struct bio_vec bip_vec[0]; /* embedded bvec array */
+
+ struct bio_vec *bip_vec;
+ struct bio_vec bip_inline_vecs[0];/* embedded bvec array */
};
#endif /* CONFIG_BLK_DEV_INTEGRITY */
@@ -211,6 +221,7 @@ extern void bio_pair_release(struct bio_pair *dbio);
extern struct bio_set *bioset_create(unsigned int, unsigned int);
extern void bioset_free(struct bio_set *);
+extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);
extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
extern void bio_put(struct bio *);
@@ -245,6 +256,9 @@ extern void bio_endio(struct bio *, int);
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
+extern int submit_bio_wait(int rw, struct bio *bio);
+extern void bio_advance(struct bio *, unsigned);
+
extern void bio_init(struct bio *);
extern void bio_reset(struct bio *);
@@ -279,6 +293,9 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
}
#endif
+extern void bio_copy_data(struct bio *dst, struct bio *src);
+extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
+
extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
unsigned long, unsigned int, int, gfp_t);
extern struct bio *bio_copy_user_iov(struct request_queue *,
@@ -286,8 +303,8 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
int, int, gfp_t);
extern int bio_uncopy_user(struct bio *);
void zero_fill_bio(struct bio *bio);
-extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
-extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
+extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
+extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
extern unsigned int bvec_nr_vecs(unsigned short idx);
#ifdef CONFIG_BLK_CGROUP
@@ -298,39 +315,6 @@ static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
static inline void bio_disassociate_task(struct bio *bio) { }
#endif /* CONFIG_BLK_CGROUP */
-/*
- * bio_set is used to allow other portions of the IO system to
- * allocate their own private memory pools for bio and iovec structures.
- * These memory pools in turn all allocate from the bio_slab
- * and the bvec_slabs[].
- */
-#define BIO_POOL_SIZE 2
-#define BIOVEC_NR_POOLS 6
-#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
-
-struct bio_set {
- struct kmem_cache *bio_slab;
- unsigned int front_pad;
-
- mempool_t *bio_pool;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
- mempool_t *bio_integrity_pool;
-#endif
- mempool_t *bvec_pool;
-};
-
-struct biovec_slab {
- int nr_vecs;
- char *name;
- struct kmem_cache *slab;
-};
-
-/*
- * a small number of entries is fine, not going to be performance critical.
- * basically we just need to survive
- */
-#define BIO_SPLIT_ENTRIES 2
-
#ifdef CONFIG_HIGHMEM
/*
* remember never ever reenable interrupts between a bvec_kmap_irq and
@@ -527,6 +511,49 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
return bio;
}
+/*
+ * bio_set is used to allow other portions of the IO system to
+ * allocate their own private memory pools for bio and iovec structures.
+ * These memory pools in turn all allocate from the bio_slab
+ * and the bvec_slabs[].
+ */
+#define BIO_POOL_SIZE 2
+#define BIOVEC_NR_POOLS 6
+#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
+
+struct bio_set {
+ struct kmem_cache *bio_slab;
+ unsigned int front_pad;
+
+ mempool_t *bio_pool;
+ mempool_t *bvec_pool;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ mempool_t *bio_integrity_pool;
+ mempool_t *bvec_integrity_pool;
+#endif
+
+ /*
+ * Deadlock avoidance for stacking block drivers: see comments in
+ * bio_alloc_bioset() for details
+ */
+ spinlock_t rescue_lock;
+ struct bio_list rescue_list;
+ struct work_struct rescue_work;
+ struct workqueue_struct *rescue_workqueue;
+};
+
+struct biovec_slab {
+ int nr_vecs;
+ char *name;
+ struct kmem_cache *slab;
+};
+
+/*
+ * a small number of entries is fine, not going to be performance critical.
+ * basically we just need to survive
+ */
+#define BIO_SPLIT_ENTRIES 2
+
#if defined(CONFIG_BLK_DEV_INTEGRITY)
#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fcc1ce2..e8de670 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -117,6 +117,7 @@ struct bio {
* BIO_POOL_IDX()
*/
#define BIO_RESET_BITS 12
+#define BIO_OWNS_VEC 12 /* bio_free() should free bvec */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
@@ -198,6 +199,8 @@ enum rq_flag_bits {
REQ_SECURE)
#define REQ_CLONE_MASK REQ_COMMON_MASK
+#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME)
+
/* This mask is used for both bio and request merge checking */
#define REQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 9961726..5a28843 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -244,7 +244,7 @@ TRACE_EVENT(block_bio_bounce,
__entry->dev = bio->bi_bdev ?
bio->bi_bdev->bd_dev : 0;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -281,7 +281,7 @@ TRACE_EVENT(block_bio_complete,
__entry->dev = bio->bi_bdev ?
bio->bi_bdev->bd_dev : 0;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
__entry->error = error;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
),
@@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(block_bio_merge,
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -376,7 +376,7 @@ TRACE_EVENT(block_bio_queue,
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -404,7 +404,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
TP_fast_assign(
__entry->dev = bio ? bio->bi_bdev->bd_dev : 0;
__entry->sector = bio ? bio->bi_sector : 0;
- __entry->nr_sector = bio ? bio->bi_size >> 9 : 0;
+ __entry->nr_sector = bio ? bio_sectors(bio) : 0;
blk_fill_rwbs(__entry->rwbs,
bio ? bio->bi_rw : 0, __entry->nr_sector);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
@@ -580,7 +580,7 @@ TRACE_EVENT(block_bio_remap,
TP_fast_assign(
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
- __entry->nr_sector = bio->bi_size >> 9;
+ __entry->nr_sector = bio_sectors(bio);
__entry->old_dev = dev;
__entry->old_sector = from;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
diff --git a/mm/bounce.c b/mm/bounce.c
index 5f89017..f5326b2 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -101,7 +101,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
struct bio_vec *tovec, *fromvec;
int i;
- __bio_for_each_segment(tovec, to, i, 0) {
+ bio_for_each_segment(tovec, to, i) {
fromvec = from->bi_io_vec + i;
/*
@@ -134,7 +134,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
/*
* free up bounce indirect pages used
*/
- __bio_for_each_segment(bvec, bio, i, 0) {
+ bio_for_each_segment_all(bvec, bio, i) {
org_vec = bio_orig->bi_io_vec + i;
if (bvec->bv_page == org_vec->bv_page)
continue;
@@ -218,78 +218,43 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
mempool_t *pool, int force)
{
- struct page *page;
- struct bio *bio = NULL;
- int i, rw = bio_data_dir(*bio_orig);
+ struct bio *bio;
+ int rw = bio_data_dir(*bio_orig);
struct bio_vec *to, *from;
+ unsigned i;
- bio_for_each_segment(from, *bio_orig, i) {
- page = from->bv_page;
+ bio_for_each_segment(from, *bio_orig, i)
+ if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
+ goto bounce;
- /*
- * is destination page below bounce pfn?
- */
- if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
- continue;
+ return;
+bounce:
+ bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
- /*
- * irk, bounce it
- */
- if (!bio) {
- unsigned int cnt = (*bio_orig)->bi_vcnt;
-
- bio = bio_alloc(GFP_NOIO, cnt);
- memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));
- }
-
+ bio_for_each_segment_all(to, bio, i) {
+ struct page *page = to->bv_page;
- to = bio->bi_io_vec + i;
+ if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
+ continue;
- to->bv_page = mempool_alloc(pool, q->bounce_gfp);
- to->bv_len = from->bv_len;
- to->bv_offset = from->bv_offset;
inc_zone_page_state(to->bv_page, NR_BOUNCE);
+ to->bv_page = mempool_alloc(pool, q->bounce_gfp);
if (rw == WRITE) {
char *vto, *vfrom;
- flush_dcache_page(from->bv_page);
+ flush_dcache_page(page);
+
vto = page_address(to->bv_page) + to->bv_offset;
- vfrom = kmap(from->bv_page) + from->bv_offset;
+ vfrom = kmap_atomic(page) + to->bv_offset;
memcpy(vto, vfrom, to->bv_len);
- kunmap(from->bv_page);
+ kunmap_atomic(vfrom);
}
}
- /*
- * no pages bounced
- */
- if (!bio)
- return;
-
trace_block_bio_bounce(q, *bio_orig);
- /*
- * at least one page was bounced, fill in possible non-highmem
- * pages
- */
- __bio_for_each_segment(from, *bio_orig, i, 0) {
- to = bio_iovec_idx(bio, i);
- if (!to->bv_page) {
- to->bv_page = from->bv_page;
- to->bv_len = from->bv_len;
- to->bv_offset = from->bv_offset;
- }
- }
-
- bio->bi_bdev = (*bio_orig)->bi_bdev;
bio->bi_flags |= (1 << BIO_BOUNCED);
- bio->bi_sector = (*bio_orig)->bi_sector;
- bio->bi_rw = (*bio_orig)->bi_rw;
-
- bio->bi_vcnt = (*bio_orig)->bi_vcnt;
- bio->bi_idx = (*bio_orig)->bi_idx;
- bio->bi_size = (*bio_orig)->bi_size;
if (pool == page_pool) {
bio->bi_end_io = bounce_end_io_write;
diff --git a/mm/page_io.c b/mm/page_io.c
index 78eee32..8d3c0c0 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -35,7 +35,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
bio->bi_io_vec[0].bv_len = PAGE_SIZE;
bio->bi_io_vec[0].bv_offset = 0;
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
bio->bi_size = PAGE_SIZE;
bio->bi_end_io = end_io;
}
OpenPOWER on IntegriCloud