summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/biodoc.txt2
-rw-r--r--arch/s390/include/asm/eadm.h6
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--block/badblocks.c1
-rw-r--r--block/bfq-iosched.c59
-rw-r--r--block/bio-integrity.c8
-rw-r--r--block/bio.c85
-rw-r--r--block/blk-core.c331
-rw-r--r--block/blk-exec.c4
-rw-r--r--block/blk-flush.c16
-rw-r--r--block/blk-integrity.c4
-rw-r--r--block/blk-map.c7
-rw-r--r--block/blk-merge.c48
-rw-r--r--block/blk-mq-cpumap.c68
-rw-r--r--block/blk-mq-debugfs.c101
-rw-r--r--block/blk-mq-sched.c158
-rw-r--r--block/blk-mq-sched.h28
-rw-r--r--block/blk-mq.c399
-rw-r--r--block/blk-mq.h11
-rw-r--r--block/blk-settings.c5
-rw-r--r--block/blk-tag.c15
-rw-r--r--block/blk-timeout.c4
-rw-r--r--block/blk.h15
-rw-r--r--block/bounce.c47
-rw-r--r--block/bsg-lib.c5
-rw-r--r--block/bsg.c13
-rw-r--r--block/cfq-iosched.c9
-rw-r--r--block/elevator.c1
-rw-r--r--block/genhd.c4
-rw-r--r--block/ioprio.c3
-rw-r--r--block/kyber-iosched.c31
-rw-r--r--block/scsi_ioctl.c13
-rw-r--r--block/t10-pi.c32
-rw-r--r--drivers/block/DAC960.c2
-rw-r--r--drivers/block/amiflop.c10
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/aoe/aoecmd.c12
-rw-r--r--drivers/block/aoe/aoedev.c2
-rw-r--r--drivers/block/ataflop.c16
-rw-r--r--drivers/block/brd.c1
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c6
-rw-r--r--drivers/block/drbd/drbd_int.h5
-rw-r--r--drivers/block/drbd/drbd_main.c14
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c6
-rw-r--r--drivers/block/drbd/drbd_req.c8
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c16
-rw-r--r--drivers/block/floppy.c9
-rw-r--r--drivers/block/loop.c64
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c54
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h2
-rw-r--r--drivers/block/nbd.c44
-rw-r--r--drivers/block/null_blk.c125
-rw-r--r--drivers/block/paride/pcd.c9
-rw-r--r--drivers/block/paride/pd.c3
-rw-r--r--drivers/block/paride/pf.c19
-rw-r--r--drivers/block/pktcdvd.c40
-rw-r--r--drivers/block/ps3disk.c11
-rw-r--r--drivers/block/ps3vram.c16
-rw-r--r--drivers/block/rbd.c28
-rw-r--r--drivers/block/rsxx/dev.c17
-rw-r--r--drivers/block/rsxx/dma.c13
-rw-r--r--drivers/block/rsxx/rsxx_priv.h2
-rw-r--r--drivers/block/skd_main.c32
-rw-r--r--drivers/block/sunvdc.c4
-rw-r--r--drivers/block/swim.c8
-rw-r--r--drivers/block/swim3.c29
-rw-r--r--drivers/block/sx8.c20
-rw-r--r--drivers/block/umem.c4
-rw-r--r--drivers/block/virtio_blk.c23
-rw-r--r--drivers/block/xen-blkback/blkback.c19
-rw-r--r--drivers/block/xen-blkfront.c81
-rw-r--r--drivers/block/xsysace.c9
-rw-r--r--drivers/block/z2ram.c4
-rw-r--r--drivers/cdrom/cdrom.c7
-rw-r--r--drivers/cdrom/gdrom.c10
-rw-r--r--drivers/ide/ide-atapi.c12
-rw-r--r--drivers/ide/ide-cd.c11
-rw-r--r--drivers/ide/ide-cd_ioctl.c1
-rw-r--r--drivers/ide/ide-devsets.c1
-rw-r--r--drivers/ide/ide-disk.c1
-rw-r--r--drivers/ide/ide-dma.c2
-rw-r--r--drivers/ide/ide-eh.c16
-rw-r--r--drivers/ide/ide-floppy.c6
-rw-r--r--drivers/ide/ide-io.c10
-rw-r--r--drivers/ide/ide-ioctls.c2
-rw-r--r--drivers/ide/ide-park.c2
-rw-r--r--drivers/ide/ide-pm.c8
-rw-r--r--drivers/ide/ide-probe.c7
-rw-r--r--drivers/ide/ide-tape.c3
-rw-r--r--drivers/ide/ide-taskfile.c7
-rw-r--r--drivers/ide/siimage.c6
-rw-r--r--drivers/lightnvm/core.c13
-rw-r--r--drivers/lightnvm/pblk-cache.c8
-rw-r--r--drivers/lightnvm/pblk-core.c617
-rw-r--r--drivers/lightnvm/pblk-gc.c475
-rw-r--r--drivers/lightnvm/pblk-init.c389
-rw-r--r--drivers/lightnvm/pblk-map.c75
-rw-r--r--drivers/lightnvm/pblk-rb.c106
-rw-r--r--drivers/lightnvm/pblk-read.c93
-rw-r--r--drivers/lightnvm/pblk-recovery.c275
-rw-r--r--drivers/lightnvm/pblk-rl.c90
-rw-r--r--drivers/lightnvm/pblk-sysfs.c94
-rw-r--r--drivers/lightnvm/pblk-write.c353
-rw-r--r--drivers/lightnvm/pblk.h296
-rw-r--r--drivers/lightnvm/rrpc.c10
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/btree.c6
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/io.c6
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/movinggc.c10
-rw-r--r--drivers/md/bcache/request.c28
-rw-r--r--drivers/md/bcache/request.h2
-rw-r--r--drivers/md/bcache/super.c14
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.h2
-rw-r--r--drivers/md/dm-bufio.c28
-rw-r--r--drivers/md/dm-cache-target.c36
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-flakey.c13
-rw-r--r--drivers/md/dm-integrity.c30
-rw-r--r--drivers/md/dm-io.c13
-rw-r--r--drivers/md/dm-log-writes.c13
-rw-r--r--drivers/md/dm-mpath.c85
-rw-r--r--drivers/md/dm-raid1.c29
-rw-r--r--drivers/md/dm-rq.c30
-rw-r--r--drivers/md/dm-rq.h2
-rw-r--r--drivers/md/dm-snap.c15
-rw-r--r--drivers/md/dm-stripe.c17
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin.c67
-rw-r--r--drivers/md/dm-verity-target.c16
-rw-r--r--drivers/md/dm-zero.c4
-rw-r--r--drivers/md/dm.c88
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/md/multipath.c10
-rw-r--r--drivers/md/raid1.c38
-rw-r--r--drivers/md/raid10.c38
-rw-r--r--drivers/md/raid5-cache.c6
-rw-r--r--drivers/md/raid5-ppl.c4
-rw-r--r--drivers/md/raid5.c24
-rw-r--r--drivers/memstick/core/ms_block.c7
-rw-r--r--drivers/memstick/core/mspro_block.c8
-rw-r--r--drivers/mmc/core/block.c37
-rw-r--r--drivers/mmc/core/queue.c3
-rw-r--r--drivers/mtd/mtd_blkdevs.c31
-rw-r--r--drivers/mtd/ubi/block.c8
-rw-r--r--drivers/nvdimm/blk.c5
-rw-r--r--drivers/nvdimm/btt.c5
-rw-r--r--drivers/nvdimm/pmem.c29
-rw-r--r--drivers/nvme/host/Kconfig12
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c525
-rw-r--r--drivers/nvme/host/fabrics.c69
-rw-r--r--drivers/nvme/host/fabrics.h4
-rw-r--r--drivers/nvme/host/fc.c137
-rw-r--r--drivers/nvme/host/lightnvm.c18
-rw-r--r--drivers/nvme/host/nvme.h42
-rw-r--r--drivers/nvme/host/pci.c647
-rw-r--r--drivers/nvme/host/rdma.c212
-rw-r--r--drivers/nvme/host/scsi.c2460
-rw-r--r--drivers/nvme/target/admin-cmd.c65
-rw-r--r--drivers/nvme/target/configfs.c68
-rw-r--r--drivers/nvme/target/core.c3
-rw-r--r--drivers/nvme/target/discovery.c4
-rw-r--r--drivers/nvme/target/fc.c10
-rw-r--r--drivers/nvme/target/fcloop.c2
-rw-r--r--drivers/nvme/target/io-cmd.c4
-rw-r--r--drivers/nvme/target/loop.c67
-rw-r--r--drivers/nvme/target/nvmet.h1
-rw-r--r--drivers/nvme/target/rdma.c102
-rw-r--r--drivers/s390/block/dasd.c36
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/scm_blk.c8
-rw-r--r--drivers/s390/block/scm_blk.h4
-rw-r--r--drivers/s390/block/xpram.c2
-rw-r--r--drivers/s390/cio/eadm_sch.c6
-rw-r--r--drivers/s390/cio/scm.c2
-rw-r--r--drivers/sbus/char/jsflash.c5
-rw-r--r--drivers/scsi/osd/osd_initiator.c29
-rw-r--r--drivers/scsi/osst.c3
-rw-r--r--drivers/scsi/scsi_error.c3
-rw-r--r--drivers/scsi/scsi_lib.c104
-rw-r--r--drivers/scsi/scsi_transport_sas.c10
-rw-r--r--drivers/scsi/sg.c8
-rw-r--r--drivers/scsi/st.c3
-rw-r--r--drivers/target/target_core_iblock.c12
-rw-r--r--drivers/target/target_core_pscsi.c6
-rw-r--r--fs/aio.c15
-rw-r--r--fs/block_dev.c25
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/check-integrity.c4
-rw-r--r--fs/btrfs/compression.c46
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c75
-rw-r--r--fs/btrfs/disk-io.h12
-rw-r--r--fs/btrfs/extent_io.c27
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/file-item.c14
-rw-r--r--fs/btrfs/file.c33
-rw-r--r--fs/btrfs/inode.c82
-rw-r--r--fs/btrfs/raid56.c16
-rw-r--r--fs/btrfs/scrub.c26
-rw-r--r--fs/btrfs/volumes.c11
-rw-r--r--fs/buffer.c15
-rw-r--r--fs/crypto/bio.c2
-rw-r--r--fs/direct-io.c25
-rw-r--r--fs/ext4/file.c35
-rw-r--r--fs/ext4/page-io.c15
-rw-r--r--fs/ext4/readpage.c4
-rw-r--r--fs/f2fs/data.c10
-rw-r--r--fs/f2fs/segment.c2
-rw-r--r--fs/fcntl.c67
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/inode.c1
-rw-r--r--fs/iomap.c13
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_metapage.c4
-rw-r--r--fs/mpage.c4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/open.c1
-rw-r--r--fs/read_write.c12
-rw-r--r--fs/xfs/xfs_aops.c9
-rw-r--r--fs/xfs/xfs_buf.c7
-rw-r--r--fs/xfs/xfs_file.c32
-rw-r--r--fs/xfs/xfs_iomap.c22
-rw-r--r--fs/xfs/xfs_super.c3
-rw-r--r--include/linux/bio.h21
-rw-r--r--include/linux/blk-mq.h28
-rw-r--r--include/linux/blk_types.h26
-rw-r--r--include/linux/blkdev.h72
-rw-r--r--include/linux/device-mapper.h4
-rw-r--r--include/linux/elevator.h7
-rw-r--r--include/linux/fs.h74
-rw-r--r--include/linux/ide.h6
-rw-r--r--include/linux/iomap.h1
-rw-r--r--include/linux/nvme.h102
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/scsi/osd_initiator.h2
-rw-r--r--include/scsi/scsi_cmnd.h1
-rw-r--r--include/scsi/scsi_request.h2
-rw-r--r--include/uapi/linux/aio_abi.h2
-rw-r--r--include/uapi/linux/dm-ioctl.h4
-rw-r--r--include/uapi/linux/fcntl.h21
-rw-r--r--include/uapi/linux/fs.h4
-rw-r--r--include/uapi/linux/loop.h3
-rw-r--r--include/uapi/linux/nbd.h4
-rw-r--r--kernel/power/swap.c14
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--lib/scatterlist.c35
-rw-r--r--mm/filemap.c64
-rw-r--r--mm/page_io.c4
265 files changed, 5912 insertions, 6237 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 01ddeaf..9490f28 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -632,7 +632,7 @@ to i/o submission, if the bio fields are likely to be accessed after the
i/o is issued (since the bio may otherwise get freed in case i/o completion
happens in the meantime).
-The bio_clone() routine may be used to duplicate a bio, where the clone
+The bio_clone_fast() routine may be used to duplicate a bio, where the clone
shares the bio_vec_list with the original bio (i.e. both point to the
same bio_vec_list). This would typically be used for splitting i/o requests
in lvm or md.
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 6702630..144809a 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <linux/device.h>
+#include <linux/blkdev.h>
struct arqb {
u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
int (*probe) (struct scm_device *scmdev);
int (*remove) (struct scm_device *scmdev);
void (*notify) (struct scm_device *scmdev, enum scm_event event);
- void (*handler) (struct scm_device *scmdev, void *data, int error);
+ void (*handler) (struct scm_device *scmdev, void *data,
+ blk_status_t error);
};
int scm_driver_register(struct scm_driver *scmdrv);
void scm_driver_unregister(struct scm_driver *scmdrv);
int eadm_start_aob(struct aob *aob);
-void scm_irq_handler(struct aob *aob, int error);
+void scm_irq_handler(struct aob *aob, blk_status_t error);
#endif /* _ASM_S390_EADM_H */
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 8541027..b55fe9b 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -534,7 +534,7 @@ static void ubd_handler(void)
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
blk_end_request(
(*irq_req_buffer)[count]->req,
- 0,
+ BLK_STS_OK,
(*irq_req_buffer)[count]->length
);
kfree((*irq_req_buffer)[count]);
diff --git a/block/badblocks.c b/block/badblocks.c
index 6ebcef2..43c7116 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -533,6 +533,7 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
case 3:
if (newline != '\n')
return -EINVAL;
+ /* fall through */
case 2:
if (length <= 0)
return -EINVAL;
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index ed93da2..12bbc6b 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -725,8 +725,12 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
}
static void
-bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
+ struct bfq_io_cq *bic, bool bfq_already_existing)
{
+ unsigned int old_wr_coeff = bfqq->wr_coeff;
+ bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
+
if (bic->saved_idle_window)
bfq_mark_bfqq_idle_window(bfqq);
else
@@ -754,6 +758,14 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
/* make sure weight will be updated, however we got here */
bfqq->entity.prio_changed = 1;
+
+ if (likely(!busy))
+ return;
+
+ if (old_wr_coeff == 1 && bfqq->wr_coeff > 1)
+ bfqd->wr_busy_queues++;
+ else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1)
+ bfqd->wr_busy_queues--;
}
static int bfqq_process_refs(struct bfq_queue *bfqq)
@@ -4290,10 +4302,16 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
+static void bfq_finish_request(struct request *rq)
{
- struct bfq_queue *bfqq = RQ_BFQQ(rq);
- struct bfq_data *bfqd = bfqq->bfqd;
+ struct bfq_queue *bfqq;
+ struct bfq_data *bfqd;
+
+ if (!rq->elv.icq)
+ return;
+
+ bfqq = RQ_BFQQ(rq);
+ bfqd = bfqq->bfqd;
if (rq->rq_flags & RQF_STARTED)
bfqg_stats_update_completion(bfqq_group(bfqq),
@@ -4324,7 +4342,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
*/
if (!RB_EMPTY_NODE(&rq->rb_node))
- bfq_remove_request(q, rq);
+ bfq_remove_request(rq->q, rq);
bfq_put_rq_priv_body(bfqq);
}
@@ -4394,20 +4412,21 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
/*
* Allocate bfq data structures associated with this request.
*/
-static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
- struct bio *bio)
+static void bfq_prepare_request(struct request *rq, struct bio *bio)
{
+ struct request_queue *q = rq->q;
struct bfq_data *bfqd = q->elevator->elevator_data;
- struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq);
+ struct bfq_io_cq *bic;
const int is_sync = rq_is_sync(rq);
struct bfq_queue *bfqq;
bool new_queue = false;
- bool split = false;
+ bool bfqq_already_existing = false, split = false;
- spin_lock_irq(&bfqd->lock);
+ if (!rq->elv.icq)
+ return;
+ bic = icq_to_bic(rq->elv.icq);
- if (!bic)
- goto queue_fail;
+ spin_lock_irq(&bfqd->lock);
bfq_check_ioprio_change(bic, bio);
@@ -4432,6 +4451,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
true, is_sync,
NULL);
+ else
+ bfqq_already_existing = true;
}
}
@@ -4457,7 +4478,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
* queue: restore the idle window and the
* possible weight raising period.
*/
- bfq_bfqq_resume_state(bfqq, bic);
+ bfq_bfqq_resume_state(bfqq, bfqd, bic,
+ bfqq_already_existing);
}
}
@@ -4465,13 +4487,6 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
bfq_handle_burst(bfqd, bfqq);
spin_unlock_irq(&bfqd->lock);
-
- return 0;
-
-queue_fail:
- spin_unlock_irq(&bfqd->lock);
-
- return 1;
}
static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -4950,8 +4965,8 @@ static struct elv_fs_entry bfq_attrs[] = {
static struct elevator_type iosched_bfq_mq = {
.ops.mq = {
- .get_rq_priv = bfq_get_rq_private,
- .put_rq_priv = bfq_put_rq_private,
+ .prepare_request = bfq_prepare_request,
+ .finish_request = bfq_finish_request,
.exit_icq = bfq_exit_icq,
.insert_requests = bfq_insert_requests,
.dispatch_request = bfq_dispatch_request,
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index b5009a8..b8a3a65 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -224,7 +224,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
* @bio: bio to generate/verify integrity metadata for
* @proc_fn: Pointer to the relevant processing function
*/
-static int bio_integrity_process(struct bio *bio,
+static blk_status_t bio_integrity_process(struct bio *bio,
integrity_processing_fn *proc_fn)
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -232,7 +232,7 @@ static int bio_integrity_process(struct bio *bio,
struct bvec_iter bviter;
struct bio_vec bv;
struct bio_integrity_payload *bip = bio_integrity(bio);
- unsigned int ret = 0;
+ blk_status_t ret = BLK_STS_OK;
void *prot_buf = page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset;
@@ -369,7 +369,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
struct bio *bio = bip->bip_bio;
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn);
+ bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
/* Restore original bio completion handler */
bio->bi_end_io = bip->bip_end_io;
@@ -398,7 +398,7 @@ void bio_integrity_endio(struct bio *bio)
* integrity metadata. Restore original bio end_io handler
* and run it.
*/
- if (bio->bi_error) {
+ if (bio->bi_status) {
bio->bi_end_io = bip->bip_end_io;
bio_endio(bio);
diff --git a/block/bio.c b/block/bio.c
index 26b0810..1cfcd0d 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -315,8 +315,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
{
struct bio *parent = bio->bi_private;
- if (!parent->bi_error)
- parent->bi_error = bio->bi_error;
+ if (!parent->bi_status)
+ parent->bi_status = bio->bi_status;
bio_put(bio);
return parent;
}
@@ -369,6 +369,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
struct bio_list punt, nopunt;
struct bio *bio;
+ if (WARN_ON_ONCE(!bs->rescue_workqueue))
+ return;
/*
* In order to guarantee forward progress we must punt only bios that
* were allocated from this bio_set; otherwise, if there was a bio on
@@ -480,7 +482,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
if (current->bio_list &&
(!bio_list_empty(&current->bio_list[0]) ||
- !bio_list_empty(&current->bio_list[1])))
+ !bio_list_empty(&current->bio_list[1])) &&
+ bs->rescue_workqueue)
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -550,7 +553,7 @@ EXPORT_SYMBOL(zero_fill_bio);
*
* Description:
* Put a reference to a &struct bio, either one you have gotten with
- * bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
+ * bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
**/
void bio_put(struct bio *bio)
{
@@ -599,6 +602,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
bio->bi_bdev = bio_src->bi_bdev;
bio_set_flag(bio, BIO_CLONED);
bio->bi_opf = bio_src->bi_opf;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
@@ -682,6 +686,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
return NULL;
bio->bi_bdev = bio_src->bi_bdev;
bio->bi_opf = bio_src->bi_opf;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
@@ -924,7 +929,7 @@ static void submit_bio_wait_endio(struct bio *bio)
{
struct submit_bio_ret *ret = bio->bi_private;
- ret->error = bio->bi_error;
+ ret->error = blk_status_to_errno(bio->bi_status);
complete(&ret->event);
}
@@ -1823,8 +1828,8 @@ again:
}
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
- trace_block_bio_complete(bdev_get_queue(bio->bi_bdev),
- bio, bio->bi_error);
+ trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
+ blk_status_to_errno(bio->bi_status));
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
}
@@ -1927,9 +1932,29 @@ void bioset_free(struct bio_set *bs)
}
EXPORT_SYMBOL(bioset_free);
-static struct bio_set *__bioset_create(unsigned int pool_size,
- unsigned int front_pad,
- bool create_bvec_pool)
+/**
+ * bioset_create - Create a bio_set
+ * @pool_size: Number of bio and bio_vecs to cache in the mempool
+ * @front_pad: Number of bytes to allocate in front of the returned bio
+ * @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
+ * and %BIOSET_NEED_RESCUER
+ *
+ * Description:
+ * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ * to ask for a number of bytes to be allocated in front of the bio.
+ * Front pad allocation is useful for embedding the bio inside
+ * another structure, to avoid allocating extra data to go with the bio.
+ * Note that the bio must be embedded at the END of that structure always,
+ * or things will break badly.
+ * If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
+ * for allocating iovecs. This pool is not needed e.g. for bio_clone_fast().
+ * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
+ * dispatch queued requests when the mempool runs out of space.
+ *
+ */
+struct bio_set *bioset_create(unsigned int pool_size,
+ unsigned int front_pad,
+ int flags)
{
unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
struct bio_set *bs;
@@ -1954,12 +1979,15 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
if (!bs->bio_pool)
goto bad;
- if (create_bvec_pool) {
+ if (flags & BIOSET_NEED_BVECS) {
bs->bvec_pool = biovec_create_pool(pool_size);
if (!bs->bvec_pool)
goto bad;
}
+ if (!(flags & BIOSET_NEED_RESCUER))
+ return bs;
+
bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
if (!bs->rescue_workqueue)
goto bad;
@@ -1969,41 +1997,8 @@ bad:
bioset_free(bs);
return NULL;
}
-
-/**
- * bioset_create - Create a bio_set
- * @pool_size: Number of bio and bio_vecs to cache in the mempool
- * @front_pad: Number of bytes to allocate in front of the returned bio
- *
- * Description:
- * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- * to ask for a number of bytes to be allocated in front of the bio.
- * Front pad allocation is useful for embedding the bio inside
- * another structure, to avoid allocating extra data to go with the bio.
- * Note that the bio must be embedded at the END of that structure always,
- * or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
-{
- return __bioset_create(pool_size, front_pad, true);
-}
EXPORT_SYMBOL(bioset_create);
-/**
- * bioset_create_nobvec - Create a bio_set without bio_vec mempool
- * @pool_size: Number of bio to cache in the mempool
- * @front_pad: Number of bytes to allocate in front of the returned bio
- *
- * Description:
- * Same functionality as bioset_create() except that mempool is not
- * created for bio_vecs. Saving some memory for bio_clone_fast() users.
- */
-struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
-{
- return __bioset_create(pool_size, front_pad, false);
-}
-EXPORT_SYMBOL(bioset_create_nobvec);
-
#ifdef CONFIG_BLK_CGROUP
/**
@@ -2118,7 +2113,7 @@ static int __init init_bio(void)
bio_integrity_init();
biovec_init_slabs();
- fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+ fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!fs_bio_set)
panic("bio: can't allocate bios\n");
diff --git a/block/blk-core.c b/block/blk-core.c
index a7421b7..af393d5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -129,11 +129,70 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
}
EXPORT_SYMBOL(blk_rq_init);
+static const struct {
+ int errno;
+ const char *name;
+} blk_errors[] = {
+ [BLK_STS_OK] = { 0, "" },
+ [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
+ [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
+ [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
+ [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
+ [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
+ [BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
+ [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
+ [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
+ [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
+ [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
+
+ /* device mapper special case, should not leak out: */
+ [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
+
+ /* everything else not covered above: */
+ [BLK_STS_IOERR] = { -EIO, "I/O" },
+};
+
+blk_status_t errno_to_blk_status(int errno)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
+ if (blk_errors[i].errno == errno)
+ return (__force blk_status_t)i;
+ }
+
+ return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(errno_to_blk_status);
+
+int blk_status_to_errno(blk_status_t status)
+{
+ int idx = (__force int)status;
+
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+ return -EIO;
+ return blk_errors[idx].errno;
+}
+EXPORT_SYMBOL_GPL(blk_status_to_errno);
+
+static void print_req_error(struct request *req, blk_status_t status)
+{
+ int idx = (__force int)status;
+
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
+ return;
+
+ printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
+ __func__, blk_errors[idx].name, req->rq_disk ?
+ req->rq_disk->disk_name : "?",
+ (unsigned long long)blk_rq_pos(req));
+}
+
static void req_bio_endio(struct request *rq, struct bio *bio,
- unsigned int nbytes, int error)
+ unsigned int nbytes, blk_status_t error)
{
if (error)
- bio->bi_error = error;
+ bio->bi_status = error;
if (unlikely(rq->rq_flags & RQF_QUIET))
bio_set_flag(bio, BIO_QUIET);
@@ -177,10 +236,13 @@ static void blk_delay_work(struct work_struct *work)
* Description:
* Sometimes queueing needs to be postponed for a little while, to allow
* resources to come back. This function will make sure that queueing is
- * restarted around the specified time. Queue lock must be held.
+ * restarted around the specified time.
*/
void blk_delay_queue(struct request_queue *q, unsigned long msecs)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (likely(!blk_queue_dead(q)))
queue_delayed_work(kblockd_workqueue, &q->delay_work,
msecs_to_jiffies(msecs));
@@ -198,6 +260,9 @@ EXPORT_SYMBOL(blk_delay_queue);
**/
void blk_start_queue_async(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
blk_run_queue_async(q);
}
@@ -210,11 +275,13 @@ EXPORT_SYMBOL(blk_start_queue_async);
* Description:
* blk_start_queue() will clear the stop flag on the queue, and call
* the request_fn for the queue if it was in a stopped state when
- * entered. Also see blk_stop_queue(). Queue lock must be held.
+ * entered. Also see blk_stop_queue().
**/
void blk_start_queue(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
WARN_ON(!irqs_disabled());
+ WARN_ON_ONCE(q->mq_ops);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
__blk_run_queue(q);
@@ -233,10 +300,13 @@ EXPORT_SYMBOL(blk_start_queue);
* or if it simply chooses not to queue more I/O at one point, it can
* call this function to prevent the request_fn from being called until
* the driver has signalled it's ready to go again. This happens by calling
- * blk_start_queue() to restart queue operations. Queue lock must be held.
+ * blk_start_queue() to restart queue operations.
**/
void blk_stop_queue(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
cancel_delayed_work(&q->delay_work);
queue_flag_set(QUEUE_FLAG_STOPPED, q);
}
@@ -289,6 +359,9 @@ EXPORT_SYMBOL(blk_sync_queue);
*/
inline void __blk_run_queue_uncond(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (unlikely(blk_queue_dead(q)))
return;
@@ -310,11 +383,13 @@ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
* @q: The queue to run
*
* Description:
- * See @blk_run_queue. This variant must be called with the queue lock
- * held and interrupts disabled.
+ * See @blk_run_queue.
*/
void __blk_run_queue(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (unlikely(blk_queue_stopped(q)))
return;
@@ -328,10 +403,18 @@ EXPORT_SYMBOL(__blk_run_queue);
*
* Description:
* Tells kblockd to perform the equivalent of @blk_run_queue on behalf
- * of us. The caller must hold the queue lock.
+ * of us.
+ *
+ * Note:
+ * Since it is not allowed to run q->delay_work after blk_cleanup_queue()
+ * has canceled q->delay_work, callers must hold the queue lock to avoid
+ * race conditions between blk_cleanup_queue() and blk_run_queue_async().
*/
void blk_run_queue_async(struct request_queue *q)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
}
@@ -349,6 +432,8 @@ void blk_run_queue(struct request_queue *q)
{
unsigned long flags;
+ WARN_ON_ONCE(q->mq_ops);
+
spin_lock_irqsave(q->queue_lock, flags);
__blk_run_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
@@ -377,6 +462,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
int i;
lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
while (true) {
bool drain = false;
@@ -455,6 +541,8 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
*/
void blk_queue_bypass_start(struct request_queue *q)
{
+ WARN_ON_ONCE(q->mq_ops);
+
spin_lock_irq(q->queue_lock);
q->bypass_depth++;
queue_flag_set(QUEUE_FLAG_BYPASS, q);
@@ -481,6 +569,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
* @q: queue of interest
*
* Leave bypass mode and restore the normal queueing behavior.
+ *
+ * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
+ * this function is called for both blk-sq and blk-mq queues.
*/
void blk_queue_bypass_end(struct request_queue *q)
{
@@ -732,7 +823,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (q->id < 0)
goto fail_q;
- q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!q->bio_split)
goto fail_id;
@@ -878,6 +969,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
int blk_init_allocated_queue(struct request_queue *q)
{
+ WARN_ON_ONCE(q->mq_ops);
+
q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
if (!q->fq)
return -ENOMEM;
@@ -1015,6 +1108,8 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
struct request_list *rl;
int on_thresh, off_thresh;
+ WARN_ON_ONCE(q->mq_ops);
+
spin_lock_irq(q->queue_lock);
q->nr_requests = nr;
blk_queue_congestion_threshold(q);
@@ -1077,6 +1172,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
int may_queue;
req_flags_t rq_flags = RQF_ALLOCED;
+ lockdep_assert_held(q->queue_lock);
+
if (unlikely(blk_queue_dying(q)))
return ERR_PTR(-ENODEV);
@@ -1250,12 +1347,20 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
struct request_list *rl;
struct request *rq;
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
retry:
rq = __get_request(rl, op, bio, gfp_mask);
if (!IS_ERR(rq))
return rq;
+ if (op & REQ_NOWAIT) {
+ blk_put_rl(rl);
+ return ERR_PTR(-EAGAIN);
+ }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
blk_put_rl(rl);
return rq;
@@ -1283,16 +1388,18 @@ retry:
goto retry;
}
-static struct request *blk_old_get_request(struct request_queue *q, int rw,
- gfp_t gfp_mask)
+static struct request *blk_old_get_request(struct request_queue *q,
+ unsigned int op, gfp_t gfp_mask)
{
struct request *rq;
+ WARN_ON_ONCE(q->mq_ops);
+
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
spin_lock_irq(q->queue_lock);
- rq = get_request(q, rw, NULL, gfp_mask);
+ rq = get_request(q, op, NULL, gfp_mask);
if (IS_ERR(rq)) {
spin_unlock_irq(q->queue_lock);
return rq;
@@ -1305,14 +1412,24 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
return rq;
}
-struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+ gfp_t gfp_mask)
{
- if (q->mq_ops)
- return blk_mq_alloc_request(q, rw,
+ struct request *req;
+
+ if (q->mq_ops) {
+ req = blk_mq_alloc_request(q, op,
(gfp_mask & __GFP_DIRECT_RECLAIM) ?
0 : BLK_MQ_REQ_NOWAIT);
- else
- return blk_old_get_request(q, rw, gfp_mask);
+ if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
+ q->mq_ops->initialize_rq_fn(req);
+ } else {
+ req = blk_old_get_request(q, op, gfp_mask);
+ if (!IS_ERR(req) && q->initialize_rq_fn)
+ q->initialize_rq_fn(req);
+ }
+
+ return req;
}
EXPORT_SYMBOL(blk_get_request);
@@ -1328,6 +1445,9 @@ EXPORT_SYMBOL(blk_get_request);
*/
void blk_requeue_request(struct request_queue *q, struct request *rq)
{
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
@@ -1402,9 +1522,6 @@ static void blk_pm_put_request(struct request *rq)
static inline void blk_pm_put_request(struct request *rq) {}
#endif
-/*
- * queue lock must be held
- */
void __blk_put_request(struct request_queue *q, struct request *req)
{
req_flags_t rq_flags = req->rq_flags;
@@ -1417,6 +1534,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
return;
}
+ lockdep_assert_held(q->queue_lock);
+
blk_pm_put_request(req);
elv_completed_request(q, req);
@@ -1646,6 +1765,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
req->ioprio = ioc->ioprio;
else
req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
+ req->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(req->q, req, bio);
}
EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
@@ -1665,10 +1785,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
*/
blk_queue_bounce(q, &bio);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}
@@ -1726,7 +1846,10 @@ get_rq:
req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
if (IS_ERR(req)) {
__wbt_done(q->rq_wb, wb_acct);
- bio->bi_error = PTR_ERR(req);
+ if (PTR_ERR(req) == -ENOMEM)
+ bio->bi_status = BLK_STS_RESOURCE;
+ else
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
goto out_unlock;
}
@@ -1881,7 +2004,7 @@ generic_make_request_checks(struct bio *bio)
{
struct request_queue *q;
int nr_sectors = bio_sectors(bio);
- int err = -EIO;
+ blk_status_t status = BLK_STS_IOERR;
char b[BDEVNAME_SIZE];
struct hd_struct *part;
@@ -1900,6 +2023,14 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
+ /*
+ * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+ * if queue is not a request based queue.
+ */
+
+ if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
+ goto not_supported;
+
part = bio->bi_bdev->bd_part;
if (should_fail_request(part, bio->bi_iter.bi_size) ||
should_fail_request(&part_to_disk(part)->part0,
@@ -1924,7 +2055,7 @@ generic_make_request_checks(struct bio *bio)
!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
if (!nr_sectors) {
- err = 0;
+ status = BLK_STS_OK;
goto end_io;
}
}
@@ -1976,9 +2107,9 @@ generic_make_request_checks(struct bio *bio)
return true;
not_supported:
- err = -EOPNOTSUPP;
+ status = BLK_STS_NOTSUPP;
end_io:
- bio->bi_error = err;
+ bio->bi_status = status;
bio_endio(bio);
return false;
}
@@ -2057,7 +2188,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- if (likely(blk_queue_enter(q, false) == 0)) {
+ if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
@@ -2082,7 +2213,11 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_merge(&bio_list_on_stack[0], &same);
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} else {
- bio_io_error(bio);
+ if (unlikely(!blk_queue_dying(q) &&
+ (bio->bi_opf & REQ_NOWAIT)))
+ bio_wouldblock_error(bio);
+ else
+ bio_io_error(bio);
}
bio = bio_list_pop(&bio_list_on_stack[0]);
} while (bio);
@@ -2183,29 +2318,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
* @q: the queue to submit the request
* @rq: the request being queued
*/
-int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
unsigned long flags;
int where = ELEVATOR_INSERT_BACK;
if (blk_cloned_rq_check_limits(q, rq))
- return -EIO;
+ return BLK_STS_IOERR;
if (rq->rq_disk &&
should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
- return -EIO;
+ return BLK_STS_IOERR;
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
blk_mq_sched_insert_request(rq, false, true, false, false);
- return 0;
+ return BLK_STS_OK;
}
spin_lock_irqsave(q->queue_lock, flags);
if (unlikely(blk_queue_dying(q))) {
spin_unlock_irqrestore(q->queue_lock, flags);
- return -ENODEV;
+ return BLK_STS_IOERR;
}
/*
@@ -2222,7 +2357,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
__blk_run_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
- return 0;
+ return BLK_STS_OK;
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
@@ -2238,9 +2373,6 @@ EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
*
* Return:
* The number of bytes to fail.
- *
- * Context:
- * queue_lock must be held.
*/
unsigned int blk_rq_err_bytes(const struct request *rq)
{
@@ -2380,15 +2512,15 @@ void blk_account_io_start(struct request *rq, bool new_io)
* Return:
* Pointer to the request at the top of @q if available. Null
* otherwise.
- *
- * Context:
- * queue_lock must be held.
*/
struct request *blk_peek_request(struct request_queue *q)
{
struct request *rq;
int ret;
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
while ((rq = __elv_next_request(q)) != NULL) {
rq = blk_pm_peek_request(q, rq);
@@ -2456,15 +2588,14 @@ struct request *blk_peek_request(struct request_queue *q)
rq = NULL;
break;
} else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
- int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
-
rq->rq_flags |= RQF_QUIET;
/*
* Mark this request as started so we don't trigger
* any debug logic in the end I/O path.
*/
blk_start_request(rq);
- __blk_end_request_all(rq, err);
+ __blk_end_request_all(rq, ret == BLKPREP_INVALID ?
+ BLK_STS_TARGET : BLK_STS_IOERR);
} else {
printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
break;
@@ -2505,12 +2636,12 @@ void blk_dequeue_request(struct request *rq)
*
* Block internal functions which don't want to start timer should
* call blk_dequeue_request().
- *
- * Context:
- * queue_lock must be held.
*/
void blk_start_request(struct request *req)
{
+ lockdep_assert_held(req->q->queue_lock);
+ WARN_ON_ONCE(req->q->mq_ops);
+
blk_dequeue_request(req);
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
@@ -2535,14 +2666,14 @@ EXPORT_SYMBOL(blk_start_request);
* Return:
* Pointer to the request at the top of @q if available. Null
* otherwise.
- *
- * Context:
- * queue_lock must be held.
*/
struct request *blk_fetch_request(struct request_queue *q)
{
struct request *rq;
+ lockdep_assert_held(q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
rq = blk_peek_request(q);
if (rq)
blk_start_request(rq);
@@ -2553,7 +2684,7 @@ EXPORT_SYMBOL(blk_fetch_request);
/**
* blk_update_request - Special helper function for request stacking drivers
* @req: the request being processed
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete @req
*
* Description:
@@ -2572,49 +2703,19 @@ EXPORT_SYMBOL(blk_fetch_request);
* %false - this request doesn't have any more data
* %true - this request has more data
**/
-bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
+bool blk_update_request(struct request *req, blk_status_t error,
+ unsigned int nr_bytes)
{
int total_bytes;
- trace_block_rq_complete(req, error, nr_bytes);
+ trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
if (!req->bio)
return false;
- if (error && !blk_rq_is_passthrough(req) &&
- !(req->rq_flags & RQF_QUIET)) {
- char *error_type;
-
- switch (error) {
- case -ENOLINK:
- error_type = "recoverable transport";
- break;
- case -EREMOTEIO:
- error_type = "critical target";
- break;
- case -EBADE:
- error_type = "critical nexus";
- break;
- case -ETIMEDOUT:
- error_type = "timeout";
- break;
- case -ENOSPC:
- error_type = "critical space allocation";
- break;
- case -ENODATA:
- error_type = "critical medium";
- break;
- case -EIO:
- default:
- error_type = "I/O";
- break;
- }
- printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
- __func__, error_type, req->rq_disk ?
- req->rq_disk->disk_name : "?",
- (unsigned long long)blk_rq_pos(req));
-
- }
+ if (unlikely(error && !blk_rq_is_passthrough(req) &&
+ !(req->rq_flags & RQF_QUIET)))
+ print_req_error(req, error);
blk_account_io_completion(req, nr_bytes);
@@ -2680,7 +2781,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
}
EXPORT_SYMBOL_GPL(blk_update_request);
-static bool blk_update_bidi_request(struct request *rq, int error,
+static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes,
unsigned int bidi_bytes)
{
@@ -2718,13 +2819,13 @@ void blk_unprep_request(struct request *req)
}
EXPORT_SYMBOL_GPL(blk_unprep_request);
-/*
- * queue lock must be held
- */
-void blk_finish_request(struct request *req, int error)
+void blk_finish_request(struct request *req, blk_status_t error)
{
struct request_queue *q = req->q;
+ lockdep_assert_held(req->q->queue_lock);
+ WARN_ON_ONCE(q->mq_ops);
+
if (req->rq_flags & RQF_STATS)
blk_stat_add(req);
@@ -2758,7 +2859,7 @@ EXPORT_SYMBOL(blk_finish_request);
/**
* blk_end_bidi_request - Complete a bidi request
* @rq: the request to complete
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete @rq
* @bidi_bytes: number of bytes to complete @rq->next_rq
*
@@ -2772,12 +2873,14 @@ EXPORT_SYMBOL(blk_finish_request);
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-static bool blk_end_bidi_request(struct request *rq, int error,
+static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes, unsigned int bidi_bytes)
{
struct request_queue *q = rq->q;
unsigned long flags;
+ WARN_ON_ONCE(q->mq_ops);
+
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
return true;
@@ -2791,7 +2894,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
/**
* __blk_end_bidi_request - Complete a bidi request with queue lock held
* @rq: the request to complete
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete @rq
* @bidi_bytes: number of bytes to complete @rq->next_rq
*
@@ -2803,9 +2906,12 @@ static bool blk_end_bidi_request(struct request *rq, int error,
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes, unsigned int bidi_bytes)
{
+ lockdep_assert_held(rq->q->queue_lock);
+ WARN_ON_ONCE(rq->q->mq_ops);
+
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
return true;
@@ -2817,7 +2923,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
/**
* blk_end_request - Helper function for drivers to complete the request.
* @rq: the request being processed
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete
*
* Description:
@@ -2828,8 +2934,10 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool blk_end_request(struct request *rq, blk_status_t error,
+ unsigned int nr_bytes)
{
+ WARN_ON_ONCE(rq->q->mq_ops);
return blk_end_bidi_request(rq, error, nr_bytes, 0);
}
EXPORT_SYMBOL(blk_end_request);
@@ -2837,12 +2945,12 @@ EXPORT_SYMBOL(blk_end_request);
/**
* blk_end_request_all - Helper function for drives to finish the request.
* @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
*
* Description:
* Completely finish @rq.
*/
-void blk_end_request_all(struct request *rq, int error)
+void blk_end_request_all(struct request *rq, blk_status_t error)
{
bool pending;
unsigned int bidi_bytes = 0;
@@ -2858,7 +2966,7 @@ EXPORT_SYMBOL(blk_end_request_all);
/**
* __blk_end_request - Helper function for drivers to complete the request.
* @rq: the request being processed
- * @error: %0 for success, < %0 for error
+ * @error: block status code
* @nr_bytes: number of bytes to complete
*
* Description:
@@ -2868,8 +2976,12 @@ EXPORT_SYMBOL(blk_end_request_all);
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
+bool __blk_end_request(struct request *rq, blk_status_t error,
+ unsigned int nr_bytes)
{
+ lockdep_assert_held(rq->q->queue_lock);
+ WARN_ON_ONCE(rq->q->mq_ops);
+
return __blk_end_bidi_request(rq, error, nr_bytes, 0);
}
EXPORT_SYMBOL(__blk_end_request);
@@ -2877,16 +2989,19 @@ EXPORT_SYMBOL(__blk_end_request);
/**
* __blk_end_request_all - Helper function for drives to finish the request.
* @rq: the request to finish
- * @error: %0 for success, < %0 for error
+ * @error: block status code
*
* Description:
* Completely finish @rq. Must be called with queue lock held.
*/
-void __blk_end_request_all(struct request *rq, int error)
+void __blk_end_request_all(struct request *rq, blk_status_t error)
{
bool pending;
unsigned int bidi_bytes = 0;
+ lockdep_assert_held(rq->q->queue_lock);
+ WARN_ON_ONCE(rq->q->mq_ops);
+
if (unlikely(blk_bidi_rq(rq)))
bidi_bytes = blk_rq_bytes(rq->next_rq);
@@ -2898,7 +3013,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
/**
* __blk_end_request_cur - Helper function to finish the current request chunk.
* @rq: the request to finish the current chunk for
- * @error: %0 for success, < %0 for error
+ * @error: block status code
*
* Description:
* Complete the current consecutively mapped chunk from @rq. Must
@@ -2908,7 +3023,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
* %false - we are done with this request
* %true - still buffers pending for this request
*/
-bool __blk_end_request_cur(struct request *rq, int error)
+bool __blk_end_request_cur(struct request *rq, blk_status_t error)
{
return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
}
@@ -3151,6 +3266,8 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
bool from_schedule)
__releases(q->queue_lock)
{
+ lockdep_assert_held(q->queue_lock);
+
trace_block_unplug(q, depth, !from_schedule);
if (from_schedule)
@@ -3249,7 +3366,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
* Short-circuit if @q is dead
*/
if (unlikely(blk_queue_dying(q))) {
- __blk_end_request_all(rq, -ENODEV);
+ __blk_end_request_all(rq, BLK_STS_IOERR);
continue;
}
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a9451e3..5c0f3dc 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -16,7 +16,7 @@
* @rq: request to complete
* @error: end I/O status of the request
*/
-static void blk_end_sync_rq(struct request *rq, int error)
+static void blk_end_sync_rq(struct request *rq, blk_status_t error)
{
struct completion *waiting = rq->end_io_data;
@@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
if (unlikely(blk_queue_dying(q))) {
rq->rq_flags |= RQF_QUIET;
- __blk_end_request_all(rq, -ENXIO);
+ __blk_end_request_all(rq, BLK_STS_IOERR);
spin_unlock_irq(q->queue_lock);
return;
}
diff --git a/block/blk-flush.c b/block/blk-flush.c
index c4e0880..ed5fe32 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
*/
static bool blk_flush_complete_seq(struct request *rq,
struct blk_flush_queue *fq,
- unsigned int seq, int error)
+ unsigned int seq, blk_status_t error)
{
struct request_queue *q = rq->q;
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq,
return kicked | queued;
}
-static void flush_end_io(struct request *flush_rq, int error)
+static void flush_end_io(struct request *flush_rq, blk_status_t error)
{
struct request_queue *q = flush_rq->q;
struct list_head *running;
@@ -341,11 +341,13 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
return blk_flush_queue_rq(flush_rq, false);
}
-static void flush_data_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, blk_status_t error)
{
struct request_queue *q = rq->q;
struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+ lockdep_assert_held(q->queue_lock);
+
/*
* Updating q->in_flight[] here for making this tag usable
* early. Because in blk_queue_start_tag(),
@@ -382,7 +384,7 @@ static void flush_data_end_io(struct request *rq, int error)
blk_run_queue_async(q);
}
-static void mq_flush_data_end_io(struct request *rq, int error)
+static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
{
struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx;
@@ -411,9 +413,6 @@ static void mq_flush_data_end_io(struct request *rq, int error)
* or __blk_mq_run_hw_queue() to dispatch request.
* @rq is being submitted. Analyze what needs to be done and put it on the
* right queue.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock) in !mq case
*/
void blk_insert_flush(struct request *rq)
{
@@ -422,6 +421,9 @@ void blk_insert_flush(struct request *rq)
unsigned int policy = blk_flush_policy(fflags, rq);
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
+ if (!q->mq_ops)
+ lockdep_assert_held(q->queue_lock);
+
/*
* @policy now records what operations need to be done. Adjust
* REQ_PREFLUSH and FUA for the driver.
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 0f891a9..feb3057 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = {
.sysfs_ops = &integrity_ops,
};
-static int blk_integrity_nop_fn(struct blk_integrity_iter *iter)
+static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
{
- return 0;
+ return BLK_STS_OK;
}
static const struct blk_integrity_profile nop_profile = {
diff --git a/block/blk-map.c b/block/blk-map.c
index 3b5cb86..2547016 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -16,6 +16,8 @@
*/
int blk_rq_append_bio(struct request *rq, struct bio *bio)
{
+ blk_queue_bounce(rq->q, &bio);
+
if (!rq->bio) {
blk_rq_bio_prep(rq->q, rq, bio);
} else {
@@ -72,15 +74,13 @@ static int __blk_rq_map_user_iov(struct request *rq,
map_data->offset += bio->bi_iter.bi_size;
orig_bio = bio;
- blk_queue_bounce(q, &bio);
/*
* We link the bounce buffer in and could have to traverse it
* later so we have to get a ref to prevent it from being freed
*/
- bio_get(bio);
-
ret = blk_rq_append_bio(rq, bio);
+ bio_get(bio);
if (ret) {
bio_endio(bio);
__blk_rq_unmap_user(orig_bio);
@@ -249,7 +249,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
return ret;
}
- blk_queue_bounce(q, &rq->bio);
return 0;
}
EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3990ae4..99038830 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -108,31 +108,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
bool do_split = true;
struct bio *new = NULL;
const unsigned max_sectors = get_max_io_size(q, bio);
- unsigned bvecs = 0;
bio_for_each_segment(bv, bio, iter) {
/*
- * With arbitrary bio size, the incoming bio may be very
- * big. We have to split the bio into small bios so that
- * each holds at most BIO_MAX_PAGES bvecs because
- * bio_clone() can fail to allocate big bvecs.
- *
- * It should have been better to apply the limit per
- * request queue in which bio_clone() is involved,
- * instead of globally. The biggest blocker is the
- * bio_clone() in bio bounce.
- *
- * If bio is splitted by this reason, we should have
- * allowed to continue bios merging, but don't do
- * that now for making the change simple.
- *
- * TODO: deal with bio bounce's bio_clone() gracefully
- * and convert the global limit into per-queue limit.
- */
- if (bvecs++ >= BIO_MAX_PAGES)
- goto split;
-
- /*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, disallow it.
*/
@@ -202,8 +180,7 @@ split:
return do_split ? new : NULL;
}
-void blk_queue_split(struct request_queue *q, struct bio **bio,
- struct bio_set *bs)
+void blk_queue_split(struct request_queue *q, struct bio **bio)
{
struct bio *split, *res;
unsigned nsegs;
@@ -211,13 +188,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
switch (bio_op(*bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
- split = blk_bio_discard_split(q, *bio, bs, &nsegs);
+ split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
break;
case REQ_OP_WRITE_ZEROES:
- split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs);
+ split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
break;
case REQ_OP_WRITE_SAME:
- split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
+ split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
break;
default:
split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -671,6 +648,9 @@ static void blk_account_io_merge(struct request *req)
static struct request *attempt_merge(struct request_queue *q,
struct request *req, struct request *next)
{
+ if (!q->mq_ops)
+ lockdep_assert_held(q->queue_lock);
+
if (!rq_mergeable(req) || !rq_mergeable(next))
return NULL;
@@ -693,6 +673,13 @@ static struct request *attempt_merge(struct request_queue *q,
return NULL;
/*
+ * Don't allow merge of different write hints, or for a hint with
+ * non-hint IO.
+ */
+ if (req->write_hint != next->write_hint)
+ return NULL;
+
+ /*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
* will have updated segment counts, update sector
@@ -811,6 +798,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
!blk_write_same_mergeable(rq->bio, bio))
return false;
+ /*
+ * Don't allow merge of different write hints, or for a hint with
+ * non-hint IO.
+ */
+ if (rq->write_hint != bio->bi_write_hint)
+ return false;
+
return true;
}
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e86..2cca4fc 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,10 +14,15 @@
#include "blk.h"
#include "blk-mq.h"
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
- const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+ const struct cpumask *online_mask)
{
- return cpu * nr_queues / nr_cpus;
+ /*
+ * Non online CPU will be mapped to queue index 0.
+ */
+ if (!cpumask_test_cpu(cpu, online_mask))
+ return 0;
+ return cpu % nr_queues;
}
static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
unsigned int *map = set->mq_map;
unsigned int nr_queues = set->nr_hw_queues;
const struct cpumask *online_mask = cpu_online_mask;
- unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
- cpumask_var_t cpus;
-
- if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
- return -ENOMEM;
-
- cpumask_clear(cpus);
- nr_cpus = nr_uniq_cpus = 0;
- for_each_cpu(i, online_mask) {
- nr_cpus++;
- first_sibling = get_first_sibling(i);
- if (!cpumask_test_cpu(first_sibling, cpus))
- nr_uniq_cpus++;
- cpumask_set_cpu(i, cpus);
- }
-
- queue = 0;
- for_each_possible_cpu(i) {
- if (!cpumask_test_cpu(i, online_mask)) {
- map[i] = 0;
- continue;
- }
+ unsigned int cpu, first_sibling;
+ for_each_possible_cpu(cpu) {
/*
- * Easy case - we have equal or more hardware queues. Or
- * there are no thread siblings to take into account. Do
- * 1:1 if enough, or sequential mapping if less.
+ * First do sequential mapping between CPUs and queues.
+ * In case we still have CPUs to map, and we have some number of
+ * threads per cores then map sibling threads to the same queue for
+ * performace optimizations.
*/
- if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
- map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
- queue++;
- continue;
+ if (cpu < nr_queues) {
+ map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+ } else {
+ first_sibling = get_first_sibling(cpu);
+ if (first_sibling == cpu)
+ map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+ else
+ map[cpu] = map[first_sibling];
}
-
- /*
- * Less then nr_cpus queues, and we have some number of
- * threads per cores. Map sibling threads to the same
- * queue.
- */
- first_sibling = get_first_sibling(i);
- if (first_sibling == i) {
- map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
- queue);
- queue++;
- } else
- map[i] = map[first_sibling];
}
- free_cpumask_var(cpus);
return 0;
}
EXPORT_SYMBOL_GPL(blk_mq_map_queues);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 803aed4..9ebc294 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -114,10 +114,12 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
blk_mq_run_hw_queues(q, true);
} else if (strcmp(op, "start") == 0) {
blk_mq_start_stopped_hw_queues(q, true);
+ } else if (strcmp(op, "kick") == 0) {
+ blk_mq_kick_requeue_list(q);
} else {
pr_err("%s: unsupported operation '%s'\n", __func__, op);
inval:
- pr_err("%s: use either 'run' or 'start'\n", __func__);
+ pr_err("%s: use 'run', 'start' or 'kick'\n", __func__);
return -EINVAL;
}
return count;
@@ -133,6 +135,29 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
}
}
+static int queue_write_hint_show(void *data, struct seq_file *m)
+{
+ struct request_queue *q = data;
+ int i;
+
+ for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+ seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
+
+ return 0;
+}
+
+static ssize_t queue_write_hint_store(void *data, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct request_queue *q = data;
+ int i;
+
+ for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
+ q->write_hints[i] = 0;
+
+ return count;
+}
+
static int queue_poll_stat_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
@@ -267,6 +292,14 @@ static const char *const rqf_name[] = {
};
#undef RQF_NAME
+#define RQAF_NAME(name) [REQ_ATOM_##name] = #name
+static const char *const rqaf_name[] = {
+ RQAF_NAME(COMPLETE),
+ RQAF_NAME(STARTED),
+ RQAF_NAME(POLL_SLEPT),
+};
+#undef RQAF_NAME
+
int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
{
const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -283,6 +316,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
seq_puts(m, ", .rq_flags=");
blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
ARRAY_SIZE(rqf_name));
+ seq_puts(m, ", .atomic_flags=");
+ blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name));
seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
rq->internal_tag);
if (mq_ops->show_rq)
@@ -298,6 +333,37 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
}
EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+ __acquires(&q->requeue_lock)
+{
+ struct request_queue *q = m->private;
+
+ spin_lock_irq(&q->requeue_lock);
+ return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct request_queue *q = m->private;
+
+ return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+ __releases(&q->requeue_lock)
+{
+ struct request_queue *q = m->private;
+
+ spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+ .start = queue_requeue_list_start,
+ .next = queue_requeue_list_next,
+ .stop = queue_requeue_list_stop,
+ .show = blk_mq_debugfs_rq_show,
+};
+
static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
__acquires(&hctx->lock)
{
@@ -329,6 +395,36 @@ static const struct seq_operations hctx_dispatch_seq_ops = {
.show = blk_mq_debugfs_rq_show,
};
+struct show_busy_params {
+ struct seq_file *m;
+ struct blk_mq_hw_ctx *hctx;
+};
+
+/*
+ * Note: the state of a request may change while this function is in progress,
+ * e.g. due to a concurrent blk_mq_finish_request() call.
+ */
+static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
+{
+ const struct show_busy_params *params = data;
+
+ if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
+ test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+ __blk_mq_debugfs_rq_show(params->m,
+ list_entry_rq(&rq->queuelist));
+}
+
+static int hctx_busy_show(void *data, struct seq_file *m)
+{
+ struct blk_mq_hw_ctx *hctx = data;
+ struct show_busy_params params = { .m = m, .hctx = hctx };
+
+ blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
+ &params);
+
+ return 0;
+}
+
static int hctx_ctx_map_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
@@ -655,7 +751,9 @@ const struct file_operations blk_mq_debugfs_fops = {
static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
{"poll_stat", 0400, queue_poll_stat_show},
+ {"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
{"state", 0600, queue_state_show, queue_state_write},
+ {"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
{},
};
@@ -663,6 +761,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
{"state", 0400, hctx_state_show},
{"flags", 0400, hctx_flags_show},
{"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
+ {"busy", 0400, hctx_busy_show},
{"ctx_map", 0400, hctx_ctx_map_show},
{"tags", 0400, hctx_tags_show},
{"tags_bitmap", 0400, hctx_tags_bitmap_show},
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 0ded5e8..7f0dc48 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -31,11 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
-static void __blk_mq_sched_assign_ioc(struct request_queue *q,
- struct request *rq,
- struct bio *bio,
- struct io_context *ioc)
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
{
+ struct request_queue *q = rq->q;
+ struct io_context *ioc = rq_ioc(bio);
struct io_cq *icq;
spin_lock_irq(q->queue_lock);
@@ -47,25 +46,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
if (!icq)
return;
}
-
+ get_io_context(icq->ioc);
rq->elv.icq = icq;
- if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
- rq->rq_flags |= RQF_ELVPRIV;
- get_io_context(icq->ioc);
- return;
- }
-
- rq->elv.icq = NULL;
-}
-
-static void blk_mq_sched_assign_ioc(struct request_queue *q,
- struct request *rq, struct bio *bio)
-{
- struct io_context *ioc;
-
- ioc = rq_ioc(bio);
- if (ioc)
- __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
}
/*
@@ -107,71 +89,6 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
return false;
}
-struct request *blk_mq_sched_get_request(struct request_queue *q,
- struct bio *bio,
- unsigned int op,
- struct blk_mq_alloc_data *data)
-{
- struct elevator_queue *e = q->elevator;
- struct request *rq;
-
- blk_queue_enter_live(q);
- data->q = q;
- if (likely(!data->ctx))
- data->ctx = blk_mq_get_ctx(q);
- if (likely(!data->hctx))
- data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
-
- if (e) {
- data->flags |= BLK_MQ_REQ_INTERNAL;
-
- /*
- * Flush requests are special and go directly to the
- * dispatch list.
- */
- if (!op_is_flush(op) && e->type->ops.mq.get_request) {
- rq = e->type->ops.mq.get_request(q, op, data);
- if (rq)
- rq->rq_flags |= RQF_QUEUED;
- } else
- rq = __blk_mq_alloc_request(data, op);
- } else {
- rq = __blk_mq_alloc_request(data, op);
- }
-
- if (rq) {
- if (!op_is_flush(op)) {
- rq->elv.icq = NULL;
- if (e && e->type->icq_cache)
- blk_mq_sched_assign_ioc(q, rq, bio);
- }
- data->hctx->queued++;
- return rq;
- }
-
- blk_queue_exit(q);
- return NULL;
-}
-
-void blk_mq_sched_put_request(struct request *rq)
-{
- struct request_queue *q = rq->q;
- struct elevator_queue *e = q->elevator;
-
- if (rq->rq_flags & RQF_ELVPRIV) {
- blk_mq_sched_put_rq_priv(rq->q, rq);
- if (rq->elv.icq) {
- put_io_context(rq->elv.icq->ioc);
- rq->elv.icq = NULL;
- }
- }
-
- if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
- e->type->ops.mq.put_request(rq);
- else
- blk_mq_finish_request(rq);
-}
-
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
@@ -180,7 +97,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
bool did_work = false;
LIST_HEAD(rq_list);
- if (unlikely(blk_mq_hctx_stopped(hctx)))
+ /* RCU or SRCU read lock is needed before checking quiesced flag */
+ if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
return;
hctx->run++;
@@ -260,19 +178,73 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+ struct blk_mq_ctx *ctx, struct bio *bio)
+{
+ struct request *rq;
+ int checked = 8;
+
+ lockdep_assert_held(&ctx->lock);
+
+ list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+ bool merged = false;
+
+ if (!checked--)
+ break;
+
+ if (!blk_rq_merge_ok(rq, bio))
+ continue;
+
+ switch (blk_try_merge(rq, bio)) {
+ case ELEVATOR_BACK_MERGE:
+ if (blk_mq_sched_allow_merge(q, rq, bio))
+ merged = bio_attempt_back_merge(q, rq, bio);
+ break;
+ case ELEVATOR_FRONT_MERGE:
+ if (blk_mq_sched_allow_merge(q, rq, bio))
+ merged = bio_attempt_front_merge(q, rq, bio);
+ break;
+ case ELEVATOR_DISCARD_MERGE:
+ merged = bio_attempt_discard_merge(q, rq, bio);
+ break;
+ default:
+ continue;
+ }
+
+ if (merged)
+ ctx->rq_merged++;
+ return merged;
+ }
+
+ return false;
+}
+
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
+ struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ bool ret = false;
- if (e->type->ops.mq.bio_merge) {
- struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
+ if (e && e->type->ops.mq.bio_merge) {
blk_mq_put_ctx(ctx);
return e->type->ops.mq.bio_merge(hctx, bio);
}
- return false;
+ if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
+ /* default per sw-queue merge */
+ spin_lock(&ctx->lock);
+ ret = blk_mq_attempt_merge(q, ctx, bio);
+ spin_unlock(&ctx->lock);
+ }
+
+ blk_mq_put_ctx(ctx);
+ return ret;
}
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 5007ede..9267d0b7 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,8 +7,7 @@
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *));
-struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
-void blk_mq_sched_put_request(struct request *rq);
+void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
void blk_mq_sched_request_inserted(struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
@@ -38,35 +37,12 @@ int blk_mq_sched_init(struct request_queue *q);
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
- struct elevator_queue *e = q->elevator;
-
- if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
+ if (blk_queue_nomerges(q) || !bio_mergeable(bio))
return false;
return __blk_mq_sched_bio_merge(q, bio);
}
-static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
- struct request *rq,
- struct bio *bio)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e && e->type->ops.mq.get_rq_priv)
- return e->type->ops.mq.get_rq_priv(q, rq, bio);
-
- return 0;
-}
-
-static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
- struct request *rq)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e && e->type->ops.mq.put_rq_priv)
- e->type->ops.mq.put_rq_priv(q, rq);
-}
-
static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 958ceda..05dfa3f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,7 +42,6 @@ static LIST_HEAD(all_q_list);
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
static int blk_mq_poll_stats_bkt(const struct request *rq)
{
@@ -154,13 +153,28 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
+/*
+ * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
+ * mpt3sas driver such that this function can be removed.
+ */
+void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
+
/**
- * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
* @q: request queue.
*
* Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Additionally, it is not prevented that
- * new queue_rq() calls occur unless the queue has been stopped first.
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
*/
void blk_mq_quiesce_queue(struct request_queue *q)
{
@@ -168,11 +182,11 @@ void blk_mq_quiesce_queue(struct request_queue *q)
unsigned int i;
bool rcu = false;
- __blk_mq_stop_hw_queues(q, true);
+ blk_mq_quiesce_queue_nowait(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
- synchronize_srcu(&hctx->queue_rq_srcu);
+ synchronize_srcu(hctx->queue_rq_srcu);
else
rcu = true;
}
@@ -181,6 +195,26 @@ void blk_mq_quiesce_queue(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
+/*
+ * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
+ * @q: request queue.
+ *
+ * This function recovers queue into the state before quiescing
+ * which is done by blk_mq_quiesce_queue.
+ */
+void blk_mq_unquiesce_queue(struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ /* dispatch requests which are inserted during quiescing */
+ blk_mq_run_hw_queues(q, true);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+
void blk_mq_wake_waiters(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
@@ -204,15 +238,33 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
}
EXPORT_SYMBOL(blk_mq_can_queue);
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
- struct request *rq, unsigned int op)
+static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
+ unsigned int tag, unsigned int op)
{
+ struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+ struct request *rq = tags->static_rqs[tag];
+
+ rq->rq_flags = 0;
+
+ if (data->flags & BLK_MQ_REQ_INTERNAL) {
+ rq->tag = -1;
+ rq->internal_tag = tag;
+ } else {
+ if (blk_mq_tag_busy(data->hctx)) {
+ rq->rq_flags = RQF_MQ_INFLIGHT;
+ atomic_inc(&data->hctx->nr_active);
+ }
+ rq->tag = tag;
+ rq->internal_tag = -1;
+ data->hctx->tags->rqs[rq->tag] = rq;
+ }
+
INIT_LIST_HEAD(&rq->queuelist);
/* csd/requeue_work/fifo_time is initialized before use */
- rq->q = q;
- rq->mq_ctx = ctx;
+ rq->q = data->q;
+ rq->mq_ctx = data->ctx;
rq->cmd_flags = op;
- if (blk_queue_io_stat(q))
+ if (blk_queue_io_stat(data->q))
rq->rq_flags |= RQF_IO_STAT;
/* do not touch atomic flags, it needs atomic ops against the timer */
rq->cpu = -1;
@@ -241,44 +293,60 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
rq->end_io_data = NULL;
rq->next_rq = NULL;
- ctx->rq_dispatched[op_is_sync(op)]++;
+ data->ctx->rq_dispatched[op_is_sync(op)]++;
+ return rq;
}
-EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
- unsigned int op)
+static struct request *blk_mq_get_request(struct request_queue *q,
+ struct bio *bio, unsigned int op,
+ struct blk_mq_alloc_data *data)
{
+ struct elevator_queue *e = q->elevator;
struct request *rq;
unsigned int tag;
- tag = blk_mq_get_tag(data);
- if (tag != BLK_MQ_TAG_FAIL) {
- struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
+ blk_queue_enter_live(q);
+ data->q = q;
+ if (likely(!data->ctx))
+ data->ctx = blk_mq_get_ctx(q);
+ if (likely(!data->hctx))
+ data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+ if (op & REQ_NOWAIT)
+ data->flags |= BLK_MQ_REQ_NOWAIT;
- rq = tags->static_rqs[tag];
+ if (e) {
+ data->flags |= BLK_MQ_REQ_INTERNAL;
- if (data->flags & BLK_MQ_REQ_INTERNAL) {
- rq->tag = -1;
- rq->internal_tag = tag;
- } else {
- if (blk_mq_tag_busy(data->hctx)) {
- rq->rq_flags = RQF_MQ_INFLIGHT;
- atomic_inc(&data->hctx->nr_active);
- }
- rq->tag = tag;
- rq->internal_tag = -1;
- data->hctx->tags->rqs[rq->tag] = rq;
- }
+ /*
+ * Flush requests are special and go directly to the
+ * dispatch list.
+ */
+ if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+ e->type->ops.mq.limit_depth(op, data);
+ }
- blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
- return rq;
+ tag = blk_mq_get_tag(data);
+ if (tag == BLK_MQ_TAG_FAIL) {
+ blk_queue_exit(q);
+ return NULL;
}
- return NULL;
+ rq = blk_mq_rq_ctx_init(data, tag, op);
+ if (!op_is_flush(op)) {
+ rq->elv.icq = NULL;
+ if (e && e->type->ops.mq.prepare_request) {
+ if (e->type->icq_cache && rq_ioc(bio))
+ blk_mq_sched_assign_ioc(rq, bio);
+
+ e->type->ops.mq.prepare_request(rq, bio);
+ rq->rq_flags |= RQF_ELVPRIV;
+ }
+ }
+ data->hctx->queued++;
+ return rq;
}
-EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
unsigned int flags)
{
struct blk_mq_alloc_data alloc_data = { .flags = flags };
@@ -289,7 +357,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
if (ret)
return ERR_PTR(ret);
- rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+ rq = blk_mq_get_request(q, NULL, op, &alloc_data);
blk_mq_put_ctx(alloc_data.ctx);
blk_queue_exit(q);
@@ -304,8 +372,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
}
EXPORT_SYMBOL(blk_mq_alloc_request);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
- unsigned int flags, unsigned int hctx_idx)
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+ unsigned int op, unsigned int flags, unsigned int hctx_idx)
{
struct blk_mq_alloc_data alloc_data = { .flags = flags };
struct request *rq;
@@ -340,7 +408,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
cpu = cpumask_first(alloc_data.hctx->cpumask);
alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
- rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
+ rq = blk_mq_get_request(q, NULL, op, &alloc_data);
blk_queue_exit(q);
@@ -351,17 +419,28 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
- struct request *rq)
+void blk_mq_free_request(struct request *rq)
{
- const int sched_tag = rq->internal_tag;
struct request_queue *q = rq->q;
+ struct elevator_queue *e = q->elevator;
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+ const int sched_tag = rq->internal_tag;
+ if (rq->rq_flags & RQF_ELVPRIV) {
+ if (e && e->type->ops.mq.finish_request)
+ e->type->ops.mq.finish_request(rq);
+ if (rq->elv.icq) {
+ put_io_context(rq->elv.icq->ioc);
+ rq->elv.icq = NULL;
+ }
+ }
+
+ ctx->rq_completed[rq_is_sync(rq)]++;
if (rq->rq_flags & RQF_MQ_INFLIGHT)
atomic_dec(&hctx->nr_active);
wbt_done(q->rq_wb, &rq->issue_stat);
- rq->rq_flags = 0;
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
@@ -372,29 +451,9 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
blk_mq_sched_restart(hctx);
blk_queue_exit(q);
}
-
-static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
-{
- struct blk_mq_ctx *ctx = rq->mq_ctx;
-
- ctx->rq_completed[rq_is_sync(rq)]++;
- __blk_mq_finish_request(hctx, ctx, rq);
-}
-
-void blk_mq_finish_request(struct request *rq)
-{
- blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_finish_request);
-
-void blk_mq_free_request(struct request *rq)
-{
- blk_mq_sched_put_request(rq);
-}
EXPORT_SYMBOL_GPL(blk_mq_free_request);
-inline void __blk_mq_end_request(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
{
blk_account_io_done(rq);
@@ -409,7 +468,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
}
EXPORT_SYMBOL(__blk_mq_end_request);
-void blk_mq_end_request(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, blk_status_t error)
{
if (blk_update_request(rq, error, blk_rq_bytes(rq)))
BUG();
@@ -753,50 +812,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
blk_queue_exit(q);
}
-/*
- * Reverse check our software queue for entries that we could potentially
- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
- * too much time checking for merges.
- */
-static bool blk_mq_attempt_merge(struct request_queue *q,
- struct blk_mq_ctx *ctx, struct bio *bio)
-{
- struct request *rq;
- int checked = 8;
-
- list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
- bool merged = false;
-
- if (!checked--)
- break;
-
- if (!blk_rq_merge_ok(rq, bio))
- continue;
-
- switch (blk_try_merge(rq, bio)) {
- case ELEVATOR_BACK_MERGE:
- if (blk_mq_sched_allow_merge(q, rq, bio))
- merged = bio_attempt_back_merge(q, rq, bio);
- break;
- case ELEVATOR_FRONT_MERGE:
- if (blk_mq_sched_allow_merge(q, rq, bio))
- merged = bio_attempt_front_merge(q, rq, bio);
- break;
- case ELEVATOR_DISCARD_MERGE:
- merged = bio_attempt_discard_merge(q, rq, bio);
- break;
- default:
- continue;
- }
-
- if (merged)
- ctx->rq_merged++;
- return merged;
- }
-
- return false;
-}
-
struct flush_busy_ctx_data {
struct blk_mq_hw_ctx *hctx;
struct list_head *list;
@@ -968,7 +983,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
{
struct blk_mq_hw_ctx *hctx;
struct request *rq;
- int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
+ int errors, queued;
if (list_empty(list))
return false;
@@ -979,6 +994,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
errors = queued = 0;
do {
struct blk_mq_queue_data bd;
+ blk_status_t ret;
rq = list_first_entry(list, struct request, queuelist);
if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -1019,25 +1035,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
}
ret = q->mq_ops->queue_rq(hctx, &bd);
- switch (ret) {
- case BLK_MQ_RQ_QUEUE_OK:
- queued++;
- break;
- case BLK_MQ_RQ_QUEUE_BUSY:
+ if (ret == BLK_STS_RESOURCE) {
blk_mq_put_driver_tag_hctx(hctx, rq);
list_add(&rq->queuelist, list);
__blk_mq_requeue_request(rq);
break;
- default:
- pr_err("blk-mq: bad return on queue: %d\n", ret);
- case BLK_MQ_RQ_QUEUE_ERROR:
+ }
+
+ if (unlikely(ret != BLK_STS_OK)) {
errors++;
- blk_mq_end_request(rq, -EIO);
- break;
+ blk_mq_end_request(rq, BLK_STS_IOERR);
+ continue;
}
- if (ret == BLK_MQ_RQ_QUEUE_BUSY)
- break;
+ queued++;
} while (!list_empty(list));
hctx->dispatched[queued_to_index(queued)]++;
@@ -1075,7 +1086,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
* - blk_mq_run_hw_queue() checks whether or not a queue has
* been stopped before rerunning a queue.
* - Some but not all block drivers stop a queue before
- * returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq
+ * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
* and dm-rq.
*/
if (!blk_mq_sched_needs_restart(hctx) &&
@@ -1100,9 +1111,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
} else {
might_sleep();
- srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+ srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
blk_mq_sched_dispatch_requests(hctx);
- srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+ srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
}
}
@@ -1134,8 +1145,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
unsigned long msecs)
{
- if (unlikely(blk_mq_hctx_stopped(hctx) ||
- !blk_mq_hw_queue_mapped(hctx)))
+ if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
+ return;
+
+ if (unlikely(blk_mq_hctx_stopped(hctx)))
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
@@ -1201,34 +1214,39 @@ bool blk_mq_queue_stopped(struct request_queue *q)
}
EXPORT_SYMBOL(blk_mq_queue_stopped);
-static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queue() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{
- if (sync)
- cancel_delayed_work_sync(&hctx->run_work);
- else
- cancel_delayed_work(&hctx->run_work);
+ cancel_delayed_work(&hctx->run_work);
set_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
-
-void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
-{
- __blk_mq_stop_hw_queue(hctx, false);
-}
EXPORT_SYMBOL(blk_mq_stop_hw_queue);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
+/*
+ * This function is often used for pausing .queue_rq() by driver when
+ * there isn't enough resource or some conditions aren't satisfied, and
+ * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
+ *
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queues() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
+void blk_mq_stop_hw_queues(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i)
- __blk_mq_stop_hw_queue(hctx, sync);
-}
-
-void blk_mq_stop_hw_queues(struct request_queue *q)
-{
- __blk_mq_stop_hw_queues(q, false);
+ blk_mq_stop_hw_queue(hctx);
}
EXPORT_SYMBOL(blk_mq_stop_hw_queues);
@@ -1295,7 +1313,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
{
- if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
+ if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
return;
/*
@@ -1317,6 +1335,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
+ lockdep_assert_held(&ctx->lock);
+
trace_block_rq_insert(hctx->queue, rq);
if (at_head)
@@ -1330,6 +1350,8 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
+ lockdep_assert_held(&ctx->lock);
+
__blk_mq_insert_req_list(hctx, rq, at_head);
blk_mq_hctx_mark_pending(hctx, ctx);
}
@@ -1427,30 +1449,13 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
!blk_queue_nomerges(hctx->queue);
}
-static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx,
- struct request *rq, struct bio *bio)
+static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
+ struct blk_mq_ctx *ctx,
+ struct request *rq)
{
- if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
- blk_mq_bio_to_request(rq, bio);
- spin_lock(&ctx->lock);
-insert_rq:
- __blk_mq_insert_request(hctx, rq, false);
- spin_unlock(&ctx->lock);
- return false;
- } else {
- struct request_queue *q = hctx->queue;
-
- spin_lock(&ctx->lock);
- if (!blk_mq_attempt_merge(q, ctx, bio)) {
- blk_mq_bio_to_request(rq, bio);
- goto insert_rq;
- }
-
- spin_unlock(&ctx->lock);
- __blk_mq_finish_request(hctx, ctx, rq);
- return true;
- }
+ spin_lock(&ctx->lock);
+ __blk_mq_insert_request(hctx, rq, false);
+ spin_unlock(&ctx->lock);
}
static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1471,10 +1476,11 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
.last = true,
};
blk_qc_t new_cookie;
- int ret;
+ blk_status_t ret;
bool run_queue = true;
- if (blk_mq_hctx_stopped(hctx)) {
+ /* RCU or SRCU read lock is needed before checking quiesced flag */
+ if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
run_queue = false;
goto insert;
}
@@ -1493,18 +1499,19 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
* would have done
*/
ret = q->mq_ops->queue_rq(hctx, &bd);
- if (ret == BLK_MQ_RQ_QUEUE_OK) {
+ switch (ret) {
+ case BLK_STS_OK:
*cookie = new_cookie;
return;
- }
-
- if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+ case BLK_STS_RESOURCE:
+ __blk_mq_requeue_request(rq);
+ goto insert;
+ default:
*cookie = BLK_QC_T_NONE;
- blk_mq_end_request(rq, -EIO);
+ blk_mq_end_request(rq, ret);
return;
}
- __blk_mq_requeue_request(rq);
insert:
blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
}
@@ -1521,9 +1528,9 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
might_sleep();
- srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
+ srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
__blk_mq_try_issue_directly(hctx, rq, cookie, true);
- srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
+ srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
}
}
@@ -1541,7 +1548,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_queue_bounce(q, &bio);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
bio_io_error(bio);
@@ -1559,9 +1566,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
trace_block_getrq(q, bio, bio->bi_opf);
- rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
+ rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+ if (bio->bi_opf & REQ_NOWAIT)
+ bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
@@ -1639,11 +1648,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
blk_mq_sched_insert_request(rq, false, true, true, true);
- } else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+ } else {
blk_mq_put_ctx(data.ctx);
+ blk_mq_bio_to_request(rq, bio);
+ blk_mq_queue_io(data.hctx, data.ctx, rq);
blk_mq_run_hw_queue(data.hctx, true);
- } else
- blk_mq_put_ctx(data.ctx);
+ }
return cookie;
}
@@ -1866,7 +1876,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
set->ops->exit_hctx(hctx, hctx_idx);
if (hctx->flags & BLK_MQ_F_BLOCKING)
- cleanup_srcu_struct(&hctx->queue_rq_srcu);
+ cleanup_srcu_struct(hctx->queue_rq_srcu);
blk_mq_remove_cpuhp(hctx);
blk_free_flush_queue(hctx->fq);
@@ -1900,7 +1910,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch);
hctx->queue = q;
- hctx->queue_num = hctx_idx;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
@@ -1939,7 +1948,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
goto free_fq;
if (hctx->flags & BLK_MQ_F_BLOCKING)
- init_srcu_struct(&hctx->queue_rq_srcu);
+ init_srcu_struct(hctx->queue_rq_srcu);
blk_mq_debugfs_register_hctx(q, hctx);
@@ -2224,6 +2233,20 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
}
EXPORT_SYMBOL(blk_mq_init_queue);
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+ int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+ BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
+ __alignof__(struct blk_mq_hw_ctx)) !=
+ sizeof(struct blk_mq_hw_ctx));
+
+ if (tag_set->flags & BLK_MQ_F_BLOCKING)
+ hw_ctx_size += sizeof(struct srcu_struct);
+
+ return hw_ctx_size;
+}
+
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
{
@@ -2238,7 +2261,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
continue;
node = blk_mq_hw_queue_to_node(q->mq_map, i);
- hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+ hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
GFP_KERNEL, node);
if (!hctxs[i])
break;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48..1a06fdf 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,17 +128,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
return data->hctx->tags;
}
-/*
- * Internal helpers for request allocation/init/free
- */
-void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
- struct request *rq, unsigned int op);
-void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
- struct request *rq);
-void blk_mq_finish_request(struct request *rq);
-struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
- unsigned int op);
-
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4fa81ed3..be1f115 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -172,11 +172,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
q->nr_batching = BLK_BATCH_REQ;
blk_set_default_limits(&q->limits);
-
- /*
- * by default assume old behaviour and bounce for any highmem page
- */
- blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
}
EXPORT_SYMBOL(blk_queue_make_request);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 07cc329..2290f65 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -258,15 +258,14 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
* all transfers have been done for a request. It's important to call
* this function before end_that_request_last(), as that will put the
* request back on the free list thus corrupting the internal tag list.
- *
- * Notes:
- * queue lock must be held.
**/
void blk_queue_end_tag(struct request_queue *q, struct request *rq)
{
struct blk_queue_tag *bqt = q->queue_tags;
unsigned tag = rq->tag; /* negative tags invalid */
+ lockdep_assert_held(q->queue_lock);
+
BUG_ON(tag >= bqt->real_max_depth);
list_del_init(&rq->queuelist);
@@ -307,9 +306,6 @@ EXPORT_SYMBOL(blk_queue_end_tag);
* calling this function. The request will also be removed from
* the request queue, so it's the drivers responsibility to readd
* it if it should need to be restarted for some reason.
- *
- * Notes:
- * queue lock must be held.
**/
int blk_queue_start_tag(struct request_queue *q, struct request *rq)
{
@@ -317,6 +313,8 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
unsigned max_depth;
int tag;
+ lockdep_assert_held(q->queue_lock);
+
if (unlikely((rq->rq_flags & RQF_QUEUED))) {
printk(KERN_ERR
"%s: request %p for device [%s] already tagged %d",
@@ -389,14 +387,13 @@ EXPORT_SYMBOL(blk_queue_start_tag);
* Hardware conditions may dictate a need to stop all pending requests.
* In this case, we will safely clear the block side of the tag queue and
* readd all requests to the request queue in the right order.
- *
- * Notes:
- * queue lock must be held.
**/
void blk_queue_invalidate_tags(struct request_queue *q)
{
struct list_head *tmp, *n;
+ lockdep_assert_held(q->queue_lock);
+
list_for_each_safe(tmp, n, &q->tag_busy_list)
blk_requeue_request(q, list_entry_rq(tmp));
}
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index cbff183..17ec83b 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -189,13 +189,15 @@ unsigned long blk_rq_timeout(unsigned long timeout)
* Notes:
* Each request has its own timer, and as it is added to the queue, we
* set up the timer. When the request completes, we cancel the timer.
- * Queue lock must be held for the non-mq case, mq case doesn't care.
*/
void blk_add_timer(struct request *req)
{
struct request_queue *q = req->q;
unsigned long expiry;
+ if (!q->mq_ops)
+ lockdep_assert_held(q->queue_lock);
+
/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
if (!q->mq_ops && !q->rq_timed_out_fn)
return;
diff --git a/block/blk.h b/block/blk.h
index 83c8e11..01ebb81 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -143,6 +143,8 @@ static inline struct request *__elv_next_request(struct request_queue *q)
struct request *rq;
struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
+ WARN_ON_ONCE(q->mq_ops);
+
while (1) {
if (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
@@ -334,4 +336,17 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
#endif
+#ifdef CONFIG_BOUNCE
+extern int init_emergency_isa_pool(void);
+extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
+#else
+static inline int init_emergency_isa_pool(void)
+{
+ return 0;
+}
+static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
+{
+}
+#endif /* CONFIG_BOUNCE */
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index 1cb5dd3..5793c2d 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -22,10 +22,12 @@
#include <asm/tlbflush.h>
#include <trace/events/block.h>
+#include "blk.h"
#define POOL_SIZE 64
#define ISA_POOL_SIZE 16
+static struct bio_set *bounce_bio_set, *bounce_bio_split;
static mempool_t *page_pool, *isa_page_pool;
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
@@ -40,6 +42,14 @@ static __init int init_emergency_pool(void)
BUG_ON(!page_pool);
pr_info("pool size: %d pages\n", POOL_SIZE);
+ bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ BUG_ON(!bounce_bio_set);
+ if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+ BUG_ON(1);
+
+ bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
+ BUG_ON(!bounce_bio_split);
+
return 0;
}
@@ -143,7 +153,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
mempool_free(bvec->bv_page, pool);
}
- bio_orig->bi_error = bio->bi_error;
+ bio_orig->bi_status = bio->bi_status;
bio_endio(bio_orig);
bio_put(bio);
}
@@ -163,7 +173,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
{
struct bio *bio_orig = bio->bi_private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
copy_to_high_bio_irq(bio_orig, bio);
bounce_end_io(bio, pool);
@@ -186,20 +196,31 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
int rw = bio_data_dir(*bio_orig);
struct bio_vec *to, from;
struct bvec_iter iter;
- unsigned i;
-
- bio_for_each_segment(from, *bio_orig, iter)
- if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
- goto bounce;
+ unsigned i = 0;
+ bool bounce = false;
+ int sectors = 0;
+
+ bio_for_each_segment(from, *bio_orig, iter) {
+ if (i++ < BIO_MAX_PAGES)
+ sectors += from.bv_len >> 9;
+ if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
+ bounce = true;
+ }
+ if (!bounce)
+ return;
- return;
-bounce:
- bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
+ if (sectors < bio_sectors(*bio_orig)) {
+ bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+ bio_chain(bio, *bio_orig);
+ generic_make_request(*bio_orig);
+ *bio_orig = bio;
+ }
+ bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
bio_for_each_segment_all(to, bio, i) {
struct page *page = to->bv_page;
- if (page_to_pfn(page) <= queue_bounce_pfn(q))
+ if (page_to_pfn(page) <= q->limits.bounce_pfn)
continue;
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
@@ -251,7 +272,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
* don't waste time iterating over bio segments
*/
if (!(q->bounce_gfp & GFP_DMA)) {
- if (queue_bounce_pfn(q) >= blk_max_pfn)
+ if (q->limits.bounce_pfn >= blk_max_pfn)
return;
pool = page_pool;
} else {
@@ -264,5 +285,3 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
*/
__blk_queue_bounce(q, bio_orig, pool);
}
-
-EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 0a23dbb..c4513b2 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
struct bsg_job *job = container_of(kref, struct bsg_job, kref);
struct request *rq = job->req;
- blk_end_request_all(rq, scsi_req(rq)->result);
+ blk_end_request_all(rq, BLK_STS_OK);
put_device(job->dev); /* release reference for the request */
@@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q)
ret = bsg_create_job(dev, req);
if (ret) {
scsi_req(req)->result = ret;
- blk_end_request_all(req, ret);
+ blk_end_request_all(req, BLK_STS_OK);
spin_lock_irq(q->queue_lock);
continue;
}
@@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
q->bsg_job_size = dd_job_size;
q->bsg_job_fn = job_fn;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
blk_queue_softirq_done(q, bsg_softirq_done);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
diff --git a/block/bsg.c b/block/bsg.c
index 6fd0854..37663b6 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -236,7 +236,6 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
rq = blk_get_request(q, op, GFP_KERNEL);
if (IS_ERR(rq))
return rq;
- scsi_req_init(rq);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
if (ret)
@@ -294,14 +293,14 @@ out:
* async completion call-back from the block layer, when scsi/ide/whatever
* calls end_that_request_last() on a request
*/
-static void bsg_rq_end_io(struct request *rq, int uptodate)
+static void bsg_rq_end_io(struct request *rq, blk_status_t status)
{
struct bsg_command *bc = rq->end_io_data;
struct bsg_device *bd = bc->bd;
unsigned long flags;
- dprintk("%s: finished rq %p bc %p, bio %p stat %d\n",
- bd->name, rq, bc, bc->bio, uptodate);
+ dprintk("%s: finished rq %p bc %p, bio %p\n",
+ bd->name, rq, bc, bc->bio);
bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
@@ -750,6 +749,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
#ifdef BSG_DEBUG
unsigned char buf[32];
#endif
+
+ if (!blk_queue_scsi_passthrough(rq)) {
+ WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+ return ERR_PTR(-EINVAL);
+ }
+
if (!blk_get_queue(rq))
return ERR_PTR(-ENXIO);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b7e9c7f..3d5c289 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -982,15 +982,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
return min_vdisktime;
}
-static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
-{
- s64 delta = (s64)(vdisktime - min_vdisktime);
- if (delta < 0)
- min_vdisktime = vdisktime;
-
- return min_vdisktime;
-}
-
static void update_min_vdisktime(struct cfq_rb_root *st)
{
struct cfq_group *cfqg;
diff --git a/block/elevator.c b/block/elevator.c
index dac99fb..4bb2f0c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -681,6 +681,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
*/
if (elv_attempt_insert_merge(q, rq))
break;
+ /* fall through */
case ELEVATOR_INSERT_SORT:
BUG_ON(blk_rq_is_passthrough(rq));
rq->rq_flags |= RQF_SORTED;
diff --git a/block/genhd.c b/block/genhd.c
index d252d29..7f520fa 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -36,7 +36,7 @@ struct kobject *block_depr;
static DEFINE_SPINLOCK(ext_devt_lock);
static DEFINE_IDR(ext_devt_idr);
-static struct device_type disk_type;
+static const struct device_type disk_type;
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr);
@@ -1183,7 +1183,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
return NULL;
}
-static struct device_type disk_type = {
+static const struct device_type disk_type = {
.name = "disk",
.groups = disk_attr_groups,
.release = disk_release,
diff --git a/block/ioprio.c b/block/ioprio.c
index 4b120c9..6f5d0b6 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -75,7 +75,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
case IOPRIO_CLASS_RT:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- /* fall through, rt has prio field too */
+ /* fall through */
+ /* rt has prio field too */
case IOPRIO_CLASS_BE:
if (data >= IOPRIO_BE_NR || data < 0)
return -EINVAL;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc..a9f6fd3 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -426,33 +426,29 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
}
}
-static struct request *kyber_get_request(struct request_queue *q,
- unsigned int op,
- struct blk_mq_alloc_data *data)
+static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
{
- struct kyber_queue_data *kqd = q->elevator->elevator_data;
- struct request *rq;
-
/*
* We use the scheduler tags as per-hardware queue queueing tokens.
* Async requests can be limited at this stage.
*/
- if (!op_is_sync(op))
+ if (!op_is_sync(op)) {
+ struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
+
data->shallow_depth = kqd->async_depth;
+ }
+}
- rq = __blk_mq_alloc_request(data, op);
- if (rq)
- rq_set_domain_token(rq, -1);
- return rq;
+static void kyber_prepare_request(struct request *rq, struct bio *bio)
+{
+ rq_set_domain_token(rq, -1);
}
-static void kyber_put_request(struct request *rq)
+static void kyber_finish_request(struct request *rq)
{
- struct request_queue *q = rq->q;
- struct kyber_queue_data *kqd = q->elevator->elevator_data;
+ struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
rq_clear_domain_token(kqd, rq);
- blk_mq_finish_request(rq);
}
static void kyber_completed_request(struct request *rq)
@@ -815,8 +811,9 @@ static struct elevator_type kyber_sched = {
.exit_sched = kyber_exit_sched,
.init_hctx = kyber_init_hctx,
.exit_hctx = kyber_exit_hctx,
- .get_request = kyber_get_request,
- .put_request = kyber_put_request,
+ .limit_depth = kyber_limit_depth,
+ .prepare_request = kyber_prepare_request,
+ .finish_request = kyber_finish_request,
.completed_request = kyber_completed_request,
.dispatch_request = kyber_dispatch_request,
.has_work = kyber_has_work,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 4a294a5..7440de4 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -326,7 +326,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
if (IS_ERR(rq))
return PTR_ERR(rq);
req = scsi_req(rq);
- scsi_req_init(rq);
if (hdr->cmd_len > BLK_MAX_CDB) {
req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
@@ -456,7 +455,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
goto error_free_buffer;
}
req = scsi_req(rq);
- scsi_req_init(rq);
cmdlen = COMMAND_SIZE(opcode);
@@ -542,7 +540,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
if (IS_ERR(rq))
return PTR_ERR(rq);
- scsi_req_init(rq);
rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
scsi_req(rq)->cmd[0] = cmd;
scsi_req(rq)->cmd[4] = data;
@@ -744,10 +741,14 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
}
EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
-void scsi_req_init(struct request *rq)
+/**
+ * scsi_req_init - initialize certain fields of a scsi_request structure
+ * @req: Pointer to a scsi_request structure.
+ * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
+ * of struct scsi_request.
+ */
+void scsi_req_init(struct scsi_request *req)
{
- struct scsi_request *req = scsi_req(rq);
-
memset(req->__cmd, 0, sizeof(req->__cmd));
req->cmd = req->__cmd;
req->cmd_len = BLK_MAX_CDB;
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 680c6d6..3416dad 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
* 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
* tag.
*/
-static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
- unsigned int type)
+static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
+ csum_fn *fn, unsigned int type)
{
unsigned int i;
@@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
iter->seed++;
}
- return 0;
+ return BLK_STS_OK;
}
-static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
- unsigned int type)
+static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
+ csum_fn *fn, unsigned int type)
{
unsigned int i;
@@ -91,7 +91,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
"(rcvd %u)\n", iter->disk_name,
(unsigned long long)
iter->seed, be32_to_cpu(pi->ref_tag));
- return -EILSEQ;
+ return BLK_STS_PROTECTION;
}
break;
case 3:
@@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
"(rcvd %04x, want %04x)\n", iter->disk_name,
(unsigned long long)iter->seed,
be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
- return -EILSEQ;
+ return BLK_STS_PROTECTION;
}
next:
@@ -117,45 +117,45 @@ next:
iter->seed++;
}
- return 0;
+ return BLK_STS_OK;
}
-static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
{
return t10_pi_generate(iter, t10_pi_crc_fn, 1);
}
-static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
{
return t10_pi_generate(iter, t10_pi_ip_fn, 1);
}
-static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
{
return t10_pi_verify(iter, t10_pi_crc_fn, 1);
}
-static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
{
return t10_pi_verify(iter, t10_pi_ip_fn, 1);
}
-static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
{
return t10_pi_generate(iter, t10_pi_crc_fn, 3);
}
-static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
{
return t10_pi_generate(iter, t10_pi_ip_fn, 3);
}
-static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
{
return t10_pi_verify(iter, t10_pi_crc_fn, 3);
}
-static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
{
return t10_pi_verify(iter, t10_pi_ip_fn, 3);
}
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 26a51be..245a879 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
bool SuccessfulIO)
{
struct request *Request = Command->Request;
- int Error = SuccessfulIO ? 0 : -EIO;
+ blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
Command->SegmentCount, Command->DmaDirection);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a328f67..49908c7 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
struct amiga_floppy_struct *floppy;
char *data;
unsigned long flags;
- int err;
+ blk_status_t err;
next_req:
rq = set_next_request();
@@ -1392,7 +1392,7 @@ next_req:
next_segment:
/* Here someone could investigate to be more efficient */
- for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+ for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
#ifdef DEBUG
printk("fd: sector %ld + %d requested for %s\n",
blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ next_segment:
#endif
block = blk_rq_pos(rq) + cnt;
if ((int)block > floppy->blocks) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
@@ -1413,7 +1413,7 @@ next_segment:
#endif
if (get_track(drive, track) == -1) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
@@ -1424,7 +1424,7 @@ next_segment:
/* keep the drive spinning while writes are scheduled */
if (!fd_motor_on(drive)) {
- err = -EIO;
+ err = BLK_STS_IOERR;
break;
}
/*
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 027b876..6797e6c 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -388,6 +388,7 @@ aoeblk_gdalloc(void *vp)
d->aoemajor, d->aoeminor);
goto err_mempool;
}
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
spin_lock_irqsave(&d->lock, flags);
WARN_ON(!(d->flags & DEVFL_GD_NOW));
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3c606c0..dc43254 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1070,8 +1070,8 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
d->ip.rq = NULL;
do {
bio = rq->bio;
- bok = !fastfail && !bio->bi_error;
- } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
+ bok = !fastfail && !bio->bi_status;
+ } while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
/* cf. http://lkml.org/lkml/2006/10/31/28 */
if (!fastfail)
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
ahout->cmdstat, ahin->cmdstat,
d->aoemajor, d->aoeminor);
noskb: if (buf)
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
goto out;
}
@@ -1144,7 +1144,7 @@ noskb: if (buf)
"aoe: runt data size in read from",
(long) d->aoemajor, d->aoeminor,
skb->len, n);
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
break;
}
if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb: if (buf)
"aoe: too-large data size in read from",
(long) d->aoemajor, d->aoeminor,
n, f->iter.bi_size);
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
break;
}
bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
if (buf == NULL)
return;
buf->iter.bi_size = 0;
- buf->bio->bi_error = -EIO;
+ buf->bio->bi_status = BLK_STS_IOERR;
if (buf->nframesout == 0)
aoe_end_buf(d, buf);
}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index ffd1947..b28fefb 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
if (rq == NULL)
return;
while ((bio = d->ip.nxbio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
d->ip.nxbio = bio->bi_next;
n = (unsigned long) rq->special;
rq->special = (void *) --n;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index fa69ecd..92da886 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
static DEFINE_TIMER(fd_timer, check_change, 0, 0);
-static void fd_end_request_cur(int err)
+static void fd_end_request_cur(blk_status_t err)
{
if (!__blk_end_request_cur(fd_request, err))
fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
fd_request->error_count++;
if (fd_request->error_count >= MAX_ERRORS) {
printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
}
else if (fd_request->error_count == RECALIBRATE_ERRORS) {
printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
}
else {
/* all sectors finished */
- fd_end_request_cur(0);
+ fd_end_request_cur(BLK_STS_OK);
redo_fd_request();
return;
}
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
}
else {
/* all sectors finished */
- fd_end_request_cur(0);
+ fd_end_request_cur(BLK_STS_OK);
redo_fd_request();
}
return;
@@ -1445,7 +1445,7 @@ repeat:
if (!UD.connected) {
/* drive not connected */
printk(KERN_ERR "Unknown Device: fd%d\n", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
@@ -1461,12 +1461,12 @@ repeat:
/* user supplied disk type */
if (--type >= NUM_DISK_MINORS) {
printk(KERN_WARNING "fd%d: invalid disk format", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
if (minor2disktype[type].drive_types > DriveType) {
printk(KERN_WARNING "fd%d: unsupported disk format", drive );
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ repeat:
}
if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
- fd_end_request_cur(-EIO);
+ fd_end_request_cur(BLK_STS_IOERR);
goto repeat;
}
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 57b574f..6112e99 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,7 +418,6 @@ static struct brd_device *brd_alloc(int i)
blk_queue_make_request(brd->brd_queue, brd_make_request);
blk_queue_max_hw_sectors(brd->brd_queue, 1024);
- blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
/* This is so fdisk will align partitions on 4k, because of
* direct_access API needing 4k alignment, returning a PFN
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd37550..02a6119 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
/* set the residual count for pc requests */
if (blk_rq_is_passthrough(rq))
scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
- blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0);
+ blk_end_request_all(rq, scsi_req(rq)->result ?
+ BLK_STS_IOERR : BLK_STS_OK);
spin_lock_irqsave(&h->lock, flags);
cmd_free(h, c);
@@ -1956,6 +1957,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
disk->queue->cmd_size = sizeof(struct scsi_request);
disk->queue->request_fn = do_cciss_request;
disk->queue->queue_lock = &h->lock;
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
if (blk_init_allocated_queue(disk->queue) < 0)
goto cleanup_queue;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 8d7bcfa..e02c45c 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
else
submit_bio(bio);
wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
- if (!bio->bi_error)
+ if (!bio->bi_status)
err = device->md_io.error;
out:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a804a41..809fd24 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
!bm_test_page_unchanged(b->bm_pages[idx]))
drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* ctx error will hold the completed-last non-zero error code,
* in case error codes differ. */
- ctx->error = bio->bi_error;
+ ctx->error = blk_status_to_errno(bio->bi_status);
bm_set_page_io_err(b->bm_pages[idx]);
/* Not identical to on disk version of it.
* Is BM_PAGE_IO_ERROR enough? */
if (__ratelimit(&drbd_ratelimit_state))
drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
- bio->bi_error, idx);
+ bio->bi_status, idx);
} else {
bm_clear_page_io_err(b->bm_pages[idx]);
dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45b..d17b6e6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1441,6 +1441,9 @@ extern struct bio_set *drbd_md_io_bio_set;
/* to allocate from that set */
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
+/* And a bio_set for cloning */
+extern struct bio_set *drbd_io_bio_set;
+
extern struct mutex resources_mutex;
extern int conn_lowest_minor(struct drbd_connection *connection);
@@ -1627,7 +1630,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
__release(local);
if (!bio->bi_bdev) {
drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
- bio->bi_error = -ENODEV;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
}
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c3..5fb99e0 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -128,6 +128,7 @@ mempool_t *drbd_request_mempool;
mempool_t *drbd_ee_mempool;
mempool_t *drbd_md_io_page_pool;
struct bio_set *drbd_md_io_bio_set;
+struct bio_set *drbd_io_bio_set;
/* I do not use a standard mempool, because:
1) I want to hand out the pre-allocated objects first.
@@ -2098,6 +2099,8 @@ static void drbd_destroy_mempools(void)
/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
+ if (drbd_io_bio_set)
+ bioset_free(drbd_io_bio_set);
if (drbd_md_io_bio_set)
bioset_free(drbd_md_io_bio_set);
if (drbd_md_io_page_pool)
@@ -2115,6 +2118,7 @@ static void drbd_destroy_mempools(void)
if (drbd_al_ext_cache)
kmem_cache_destroy(drbd_al_ext_cache);
+ drbd_io_bio_set = NULL;
drbd_md_io_bio_set = NULL;
drbd_md_io_page_pool = NULL;
drbd_ee_mempool = NULL;
@@ -2142,6 +2146,7 @@ static int drbd_create_mempools(void)
drbd_pp_pool = NULL;
drbd_md_io_page_pool = NULL;
drbd_md_io_bio_set = NULL;
+ drbd_io_bio_set = NULL;
/* caches */
drbd_request_cache = kmem_cache_create(
@@ -2165,7 +2170,13 @@ static int drbd_create_mempools(void)
goto Enomem;
/* mempools */
- drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+ drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
+ if (drbd_io_bio_set == NULL)
+ goto Enomem;
+
+ drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER);
if (drbd_md_io_bio_set == NULL)
goto Enomem;
@@ -2839,7 +2850,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
q->queue_lock = &resource->req_lock;
device->md_io.page = alloc_page(GFP_KERNEL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 02255a0..ad0fcb4 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2294,7 +2294,7 @@ _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_
static enum drbd_ret_code
check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
{
- static enum drbd_ret_code rv;
+ enum drbd_ret_code rv;
struct drbd_peer_device *peer_device;
int i;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1b0a2be..c7e95e6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
struct drbd_device *device = octx->device;
struct issue_flush_context *ctx = octx->ctx;
- if (bio->bi_error) {
- ctx->error = bio->bi_error;
- drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
+ if (bio->bi_status) {
+ ctx->error = blk_status_to_errno(bio->bi_status);
+ drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
}
kfree(octx);
bio_put(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 6566243..f6e865b 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
void complete_master_bio(struct drbd_device *device,
struct bio_and_error *m)
{
- m->bio->bi_error = m->error;
+ m->bio->bi_status = errno_to_blk_status(m->error);
bio_endio(m->bio);
dec_ap_bio(device);
}
@@ -1157,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
GFP_NOIO, 0))
- req->private_bio->bi_error = -EIO;
+ req->private_bio->bi_status = BLK_STS_IOERR;
bio_endio(req->private_bio);
}
@@ -1225,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
/* only pass the error to the upper layers.
* if user cannot handle io errors, that's not our business. */
drbd_err(device, "could not kmalloc() req\n");
- bio->bi_error = -ENOMEM;
+ bio->bi_status = BLK_STS_RESOURCE;
bio_endio(bio);
return ERR_PTR(-ENOMEM);
}
@@ -1560,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
struct drbd_device *device = (struct drbd_device *) q->queuedata;
unsigned long start_jif;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
start_jif = jiffies;
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index eb49e7f..9e1866a 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -263,7 +263,7 @@ enum drbd_req_state_bits {
static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
{
struct bio *bio;
- bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+ bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
req->private_bio = bio;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1afcb4e..1d8726a 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
struct drbd_device *device;
device = bio->bi_private;
- device->md_io.error = bio->bi_error;
+ device->md_io.error = blk_status_to_errno(bio->bi_status);
/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
bio_op(bio) == REQ_OP_DISCARD;
- if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
+ if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "%s: error=%d s=%llus\n",
is_write ? (is_discard ? "discard" : "write")
- : "read", bio->bi_error,
+ : "read", bio->bi_status,
(unsigned long long)peer_req->i.sector);
- if (bio->bi_error)
+ if (bio->bi_status)
set_bit(__EE_WAS_ERROR, &peer_req->flags);
bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
if (__ratelimit(&drbd_ratelimit_state))
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
- if (!bio->bi_error)
+ if (!bio->bi_status)
drbd_panic_after_delayed_completion_of_aborted_request(device);
}
/* to avoid recursion in __req_mod */
- if (unlikely(bio->bi_error)) {
+ if (unlikely(bio->bi_status)) {
switch (bio_op(bio)) {
case REQ_OP_WRITE_ZEROES:
case REQ_OP_DISCARD:
- if (bio->bi_error == -EOPNOTSUPP)
+ if (bio->bi_status == BLK_STS_NOTSUPP)
what = DISCARD_COMPLETED_NOTSUPP;
else
what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
}
bio_put(req->private_bio);
- req->private_bio = ERR_PTR(bio->bi_error);
+ req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
/* not req_mod(), we need irqsave here! */
spin_lock_irqsave(&device->resource->req_lock, flags);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 60d4c76..ce82364 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
* =============================
*/
-static void floppy_end_request(struct request *req, int error)
+static void floppy_end_request(struct request *req, blk_status_t error)
{
unsigned int nr_sectors = current_count_sectors;
unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
DRWE->last_error_generation = DRS->generation;
}
spin_lock_irqsave(q->queue_lock, flags);
- floppy_end_request(req, -EIO);
+ floppy_end_request(req, BLK_STS_IOERR);
spin_unlock_irqrestore(q->queue_lock, flags);
}
}
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
int drive = cbdata->drive;
- if (bio->bi_error) {
+ if (bio->bi_status) {
pr_info("floppy: error %d while reading block 0\n",
- bio->bi_error);
+ bio->bi_status);
set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
}
complete(&cbdata->complete);
@@ -4203,6 +4203,7 @@ static int __init do_floppy_init(void)
goto out_put_disk;
}
+ blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
blk_queue_max_hw_sectors(disks[drive]->queue, 64);
disks[drive]->major = FLOPPY_MAJOR;
disks[drive]->first_minor = TOMINOR(drive);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ebbd0c3..0de1144 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
}
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
+ loff_t logical_blocksize)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
@@ -233,6 +234,12 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
+ if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
+ lo->lo_logical_blocksize = logical_blocksize;
+ blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
+ blk_queue_logical_block_size(lo->lo_queue,
+ lo->lo_logical_blocksize);
+ }
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
@@ -457,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
zero_fill_bio(bio);
}
- blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0);
+ blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
}
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -813,6 +820,7 @@ static void loop_config_discard(struct loop_device *lo)
struct file *file = lo->lo_backing_file;
struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue;
+ int lo_bits = 9;
/*
* We use punch hole to reclaim the free space used by the
@@ -832,8 +840,11 @@ static void loop_config_discard(struct loop_device *lo)
q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
- blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
- blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
+ if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
+ lo_bits = blksize_bits(lo->lo_logical_blocksize);
+
+ blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
+ blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
@@ -843,10 +854,16 @@ static void loop_unprepare_queue(struct loop_device *lo)
kthread_stop(lo->worker_task);
}
+static int loop_kthread_worker_fn(void *worker_ptr)
+{
+ current->flags |= PF_LESS_THROTTLE;
+ return kthread_worker_fn(worker_ptr);
+}
+
static int loop_prepare_queue(struct loop_device *lo)
{
kthread_init_worker(&lo->worker);
- lo->worker_task = kthread_run(kthread_worker_fn,
+ lo->worker_task = kthread_run(loop_kthread_worker_fn,
&lo->worker, "loop%d", lo->lo_number);
if (IS_ERR(lo->worker_task))
return -ENOMEM;
@@ -921,6 +938,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
lo->use_dio = false;
lo->lo_blocksize = lo_blocksize;
+ lo->lo_logical_blocksize = 512;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
@@ -1086,6 +1104,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
+ int lo_flags = lo->lo_flags;
if (lo->lo_encrypt_key_size &&
!uid_eq(lo->lo_key_owner, uid) &&
@@ -1118,12 +1137,30 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
if (err)
goto exit;
+ if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
+ if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
+ lo->lo_logical_blocksize = 512;
+ lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
+ if (LO_INFO_BLOCKSIZE(info) != 512 &&
+ LO_INFO_BLOCKSIZE(info) != 1024 &&
+ LO_INFO_BLOCKSIZE(info) != 2048 &&
+ LO_INFO_BLOCKSIZE(info) != 4096)
+ return -EINVAL;
+ if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
+ return -EINVAL;
+ }
+
if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit)
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
+ lo->lo_sizelimit != info->lo_sizelimit ||
+ lo->lo_flags != lo_flags ||
+ ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
+ lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
+ if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
+ LO_INFO_BLOCKSIZE(info))) {
err = -EFBIG;
goto exit;
}
+ }
loop_config_discard(lo);
@@ -1306,12 +1343,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
return err;
}
-static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
+static int loop_set_capacity(struct loop_device *lo)
{
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+ return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_logical_blocksize);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1369,7 +1407,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_SET_CAPACITY:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
- err = loop_set_capacity(lo, bdev);
+ err = loop_set_capacity(lo);
break;
case LOOP_SET_DIRECT_IO:
err = -EPERM;
@@ -1645,7 +1683,7 @@ int loop_unregister_transfer(int number)
EXPORT_SYMBOL(loop_register_transfer);
EXPORT_SYMBOL(loop_unregister_transfer);
-static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1654,7 +1692,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq);
if (lo->lo_state != Lo_bound)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
switch (req_op(cmd->rq)) {
case REQ_OP_FLUSH:
@@ -1669,7 +1707,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
kthread_queue_work(&lo->worker, &cmd->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void loop_handle_cmd(struct loop_cmd *cmd)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fecd3f9..2c096b9 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,6 +49,7 @@ struct loop_device {
struct file * lo_backing_file;
struct block_device *lo_device;
unsigned lo_blocksize;
+ unsigned lo_logical_blocksize;
void *key_data;
gfp_t old_gfp_mask;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3a779a4..61b046f 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
struct smart_attr *attrib);
-static void mtip_complete_command(struct mtip_cmd *cmd, int status)
+static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
{
struct request *req = blk_mq_rq_from_pdu(cmd);
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
- mtip_complete_command(cmd, -EIO);
+ mtip_complete_command(cmd, BLK_STS_IOERR);
return;
}
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
tag,
fail_reason != NULL ?
fail_reason : "unknown");
- mtip_complete_command(cmd, -ENODATA);
+ mtip_complete_command(cmd, BLK_STS_MEDIUM);
continue;
}
}
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
dev_warn(&port->dd->pdev->dev,
"retiring tag %d\n", tag);
- mtip_complete_command(cmd, -EIO);
+ mtip_complete_command(cmd, BLK_STS_IOERR);
}
}
print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -1063,23 +1063,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
/* insert request and run queue */
blk_execute_rq(rq->q, NULL, rq, true);
- rv = int_cmd->status;
- if (rv < 0) {
- if (rv == -ERESTARTSYS) { /* interrupted */
- dev_err(&dd->pdev->dev,
- "Internal command [%02X] was interrupted after %u ms\n",
- fis->command,
- jiffies_to_msecs(jiffies - start));
- rv = -EINTR;
- goto exec_ic_exit;
- } else if (rv == 0) /* timeout */
- dev_err(&dd->pdev->dev,
- "Internal command did not complete [%02X] within timeout of %lu ms\n",
- fis->command, timeout);
- else
- dev_err(&dd->pdev->dev,
- "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
- fis->command, rv, timeout);
+ if (int_cmd->status) {
+ dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
+ fis->command, int_cmd->status);
+ rv = -EIO;
if (mtip_check_surprise_removal(dd->pdev) ||
test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
@@ -2753,7 +2740,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
clear_bit(req->tag, dd->port->cmds_to_issue);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
mtip_softirq_done_fn(req);
}
@@ -3597,7 +3584,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
int err;
err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
- blk_mq_end_request(rq, err);
+ blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
return 0;
}
@@ -3633,8 +3620,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
return false;
}
-static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
- struct request *rq)
+static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+ struct request *rq)
{
struct driver_data *dd = hctx->queue->queuedata;
struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3629,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
struct mtip_cmd_sg *command_sg;
if (mtip_commands_active(dd->port))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
/* Populate the SG list */
cmd->command_header->opts =
@@ -3666,10 +3653,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
mtip_issue_non_ncq_command(dd->port, rq->tag);
- return BLK_MQ_RQ_QUEUE_OK;
+ return 0;
}
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
@@ -3681,15 +3668,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
return mtip_issue_reserved_cmd(hctx, rq);
if (unlikely(mtip_check_unal_depth(hctx, rq)))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
blk_mq_start_request(rq);
ret = mtip_submit_request(hctx, rq);
if (likely(!ret))
- return BLK_MQ_RQ_QUEUE_OK;
-
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_OK;
+ return BLK_STS_IOERR;
}
static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3730,7 +3716,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
if (reserved) {
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
- cmd->status = -ETIME;
+ cmd->status = BLK_STS_TIMEOUT;
return BLK_EH_HANDLED;
}
@@ -3961,7 +3947,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
{
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
- cmd->status = -ENODEV;
+ cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(rq);
}
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 37b8e3e..e8286af 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -342,7 +342,7 @@ struct mtip_cmd {
int retries; /* The number of retries left for this command. */
int direction; /* Data transfer direction */
- int status;
+ blk_status_t status;
};
/* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f3f191b..977ec96 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -116,7 +116,7 @@ struct nbd_cmd {
int index;
int cookie;
struct completion send_complete;
- int status;
+ blk_status_t status;
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
struct nbd_config *config;
if (!refcount_inc_not_zero(&nbd->config_refs)) {
- cmd->status = -EIO;
+ cmd->status = BLK_STS_TIMEOUT;
return BLK_EH_HANDLED;
}
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
"Connection timed out\n");
}
set_bit(NBD_TIMEDOUT, &config->runtime_flags);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
sock_shutdown(nbd);
nbd_config_put(nbd);
@@ -400,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
unsigned long size = blk_rq_bytes(req);
struct bio *bio;
u32 type;
+ u32 nbd_cmd_flags = 0;
u32 tag = blk_mq_unique_tag(req);
int sent = nsock->sent, skip = 0;
@@ -429,6 +430,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
return -EIO;
}
+ if (req->cmd_flags & REQ_FUA)
+ nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
+
/* We did a partial send previously, and we at least sent the whole
* request struct, so just go and send the rest of the pages in the
* request.
@@ -442,7 +446,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
}
cmd->index = index;
cmd->cookie = nsock->cookie;
- request.type = htonl(type);
+ request.type = htonl(type | nbd_cmd_flags);
if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
request.len = htonl(size);
@@ -465,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
nsock->pending = req;
nsock->sent = sent;
}
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
@@ -506,7 +510,7 @@ send_pages:
*/
nsock->pending = req;
nsock->sent = sent;
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n",
@@ -574,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (ntohl(reply.error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
ntohl(reply.error));
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
return cmd;
}
@@ -599,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
*/
if (nbd_disconnected(config) ||
config->num_connections <= 1) {
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
return cmd;
}
return ERR_PTR(-EIO);
@@ -651,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
if (!blk_mq_request_started(req))
return;
cmd = blk_mq_rq_to_pdu(req);
- cmd->status = -EIO;
+ cmd->status = BLK_STS_IOERR;
blk_mq_complete_request(req);
}
@@ -740,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
nbd_config_put(nbd);
return -EINVAL;
}
- cmd->status = 0;
+ cmd->status = BLK_STS_OK;
again:
nsock = config->socks[index];
mutex_lock(&nsock->tx_lock);
@@ -794,7 +798,7 @@ out:
return ret;
}
-static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -818,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
* appropriate.
*/
ret = nbd_handle_cmd(cmd, hctx->queue_num);
- if (ret < 0)
- ret = BLK_MQ_RQ_QUEUE_ERROR;
- if (!ret)
- ret = BLK_MQ_RQ_QUEUE_OK;
complete(&cmd->send_complete);
- return ret;
+ return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
}
static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
@@ -910,6 +910,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
continue;
}
sk_set_memalloc(sock->sk);
+ sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
atomic_inc(&config->recv_threads);
refcount_inc(&nbd->config_refs);
old = nsock->sock;
@@ -957,8 +958,12 @@ static void nbd_parse_flags(struct nbd_device *nbd)
set_disk_ro(nbd->disk, false);
if (config->flags & NBD_FLAG_SEND_TRIM)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
- if (config->flags & NBD_FLAG_SEND_FLUSH)
- blk_queue_write_cache(nbd->disk->queue, true, false);
+ if (config->flags & NBD_FLAG_SEND_FLUSH) {
+ if (config->flags & NBD_FLAG_SEND_FUA)
+ blk_queue_write_cache(nbd->disk->queue, true, true);
+ else
+ blk_queue_write_cache(nbd->disk->queue, true, false);
+ }
else
blk_queue_write_cache(nbd->disk->queue, false, false);
}
@@ -1071,6 +1076,7 @@ static int nbd_start_device(struct nbd_device *nbd)
return -ENOMEM;
}
sk_set_memalloc(config->socks[i]->sock->sk);
+ config->socks[i]->sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
atomic_inc(&config->recv_threads);
refcount_inc(&nbd->config_refs);
INIT_WORK(&args->work, recv_work);
@@ -1305,6 +1311,8 @@ static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
seq_puts(s, "NBD_FLAG_READ_ONLY\n");
if (flags & NBD_FLAG_SEND_FLUSH)
seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
+ if (flags & NBD_FLAG_SEND_FUA)
+ seq_puts(s, "NBD_FLAG_SEND_FUA\n");
if (flags & NBD_FLAG_SEND_TRIM)
seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d946e1e..71f4422 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,7 +35,8 @@ struct nullb {
struct request_queue *q;
struct gendisk *disk;
struct nvm_dev *ndev;
- struct blk_mq_tag_set tag_set;
+ struct blk_mq_tag_set *tag_set;
+ struct blk_mq_tag_set __tag_set;
struct hrtimer timer;
unsigned int queue_depth;
spinlock_t lock;
@@ -50,6 +51,7 @@ static struct mutex lock;
static int null_major;
static int nullb_indexes;
static struct kmem_cache *ppa_cache;
+static struct blk_mq_tag_set tag_set;
enum {
NULL_IRQ_NONE = 0,
@@ -109,7 +111,7 @@ static int bs = 512;
module_param(bs, int, S_IRUGO);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
-static int nr_devices = 2;
+static int nr_devices = 1;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
@@ -121,6 +123,10 @@ static bool blocking;
module_param(blocking, bool, S_IRUGO);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
+static bool shared_tags;
+module_param(shared_tags, bool, S_IRUGO);
+MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
+
static int irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -229,11 +235,11 @@ static void end_cmd(struct nullb_cmd *cmd)
switch (queue_mode) {
case NULL_Q_MQ:
- blk_mq_end_request(cmd->rq, 0);
+ blk_mq_end_request(cmd->rq, BLK_STS_OK);
return;
case NULL_Q_RQ:
INIT_LIST_HEAD(&cmd->rq->queuelist);
- blk_end_request_all(cmd->rq, 0);
+ blk_end_request_all(cmd->rq, BLK_STS_OK);
break;
case NULL_Q_BIO:
bio_endio(cmd->bio);
@@ -356,7 +362,7 @@ static void null_request_fn(struct request_queue *q)
}
}
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,34 +379,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq);
null_handle_cmd(cmd);
- return BLK_MQ_RQ_QUEUE_OK;
-}
-
-static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
-{
- BUG_ON(!nullb);
- BUG_ON(!nq);
-
- init_waitqueue_head(&nq->wait);
- nq->queue_depth = nullb->queue_depth;
-}
-
-static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
- unsigned int index)
-{
- struct nullb *nullb = data;
- struct nullb_queue *nq = &nullb->queues[index];
-
- hctx->driver_data = nq;
- null_init_queue(nullb, nq);
- nullb->nr_queues++;
-
- return 0;
+ return BLK_STS_OK;
}
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
- .init_hctx = null_init_hctx,
.complete = null_softirq_done_fn,
};
@@ -422,11 +405,12 @@ static void cleanup_queues(struct nullb *nullb)
#ifdef CONFIG_NVM
-static void null_lnvm_end_io(struct request *rq, int error)
+static void null_lnvm_end_io(struct request *rq, blk_status_t status)
{
struct nvm_rq *rqd = rq->end_io_data;
- rqd->error = error;
+ /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
+ rqd->error = status ? -EIO : 0;
nvm_end_io(rqd);
blk_put_request(rq);
@@ -591,8 +575,8 @@ static void null_del_dev(struct nullb *nullb)
else
del_gendisk(nullb->disk);
blk_cleanup_queue(nullb->q);
- if (queue_mode == NULL_Q_MQ)
- blk_mq_free_tag_set(&nullb->tag_set);
+ if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ blk_mq_free_tag_set(nullb->tag_set);
if (!use_lightnvm)
put_disk(nullb->disk);
cleanup_queues(nullb);
@@ -614,6 +598,32 @@ static const struct block_device_operations null_fops = {
.release = null_release,
};
+static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
+{
+ BUG_ON(!nullb);
+ BUG_ON(!nq);
+
+ init_waitqueue_head(&nq->wait);
+ nq->queue_depth = nullb->queue_depth;
+}
+
+static void null_init_queues(struct nullb *nullb)
+{
+ struct request_queue *q = nullb->q;
+ struct blk_mq_hw_ctx *hctx;
+ struct nullb_queue *nq;
+ int i;
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (!hctx->nr_ctx || !hctx->tags)
+ continue;
+ nq = &nullb->queues[i];
+ hctx->driver_data = nq;
+ null_init_queue(nullb, nq);
+ nullb->nr_queues++;
+ }
+}
+
static int setup_commands(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
@@ -694,6 +704,22 @@ static int null_gendisk_register(struct nullb *nullb)
return 0;
}
+static int null_init_tag_set(struct blk_mq_tag_set *set)
+{
+ set->ops = &null_mq_ops;
+ set->nr_hw_queues = submit_queues;
+ set->queue_depth = hw_queue_depth;
+ set->numa_node = home_node;
+ set->cmd_size = sizeof(struct nullb_cmd);
+ set->flags = BLK_MQ_F_SHOULD_MERGE;
+ set->driver_data = NULL;
+
+ if (blocking)
+ set->flags |= BLK_MQ_F_BLOCKING;
+
+ return blk_mq_alloc_tag_set(set);
+}
+
static int null_add_dev(void)
{
struct nullb *nullb;
@@ -715,26 +741,23 @@ static int null_add_dev(void)
goto out_free_nullb;
if (queue_mode == NULL_Q_MQ) {
- nullb->tag_set.ops = &null_mq_ops;
- nullb->tag_set.nr_hw_queues = submit_queues;
- nullb->tag_set.queue_depth = hw_queue_depth;
- nullb->tag_set.numa_node = home_node;
- nullb->tag_set.cmd_size = sizeof(struct nullb_cmd);
- nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
- nullb->tag_set.driver_data = nullb;
-
- if (blocking)
- nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
-
- rv = blk_mq_alloc_tag_set(&nullb->tag_set);
+ if (shared_tags) {
+ nullb->tag_set = &tag_set;
+ rv = 0;
+ } else {
+ nullb->tag_set = &nullb->__tag_set;
+ rv = null_init_tag_set(nullb->tag_set);
+ }
+
if (rv)
goto out_cleanup_queues;
- nullb->q = blk_mq_init_queue(&nullb->tag_set);
+ nullb->q = blk_mq_init_queue(nullb->tag_set);
if (IS_ERR(nullb->q)) {
rv = -ENOMEM;
goto out_cleanup_tags;
}
+ null_init_queues(nullb);
} else if (queue_mode == NULL_Q_BIO) {
nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
if (!nullb->q) {
@@ -787,8 +810,8 @@ static int null_add_dev(void)
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
- if (queue_mode == NULL_Q_MQ)
- blk_mq_free_tag_set(&nullb->tag_set);
+ if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
+ blk_mq_free_tag_set(nullb->tag_set);
out_cleanup_queues:
cleanup_queues(nullb);
out_free_nullb:
@@ -821,6 +844,9 @@ static int __init null_init(void)
queue_mode = NULL_Q_MQ;
}
+ if (queue_mode == NULL_Q_MQ && shared_tags)
+ null_init_tag_set(&tag_set);
+
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
pr_warn("null_blk: submit_queues param is set to %u.",
@@ -881,6 +907,9 @@ static void __exit null_exit(void)
}
mutex_unlock(&lock);
+ if (queue_mode == NULL_Q_MQ && shared_tags)
+ blk_mq_free_tag_set(&tag_set);
+
kmem_cache_destroy(ppa_cache);
}
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index b1267ef..7b8c636 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -305,6 +305,7 @@ static void pcd_init_units(void)
put_disk(disk);
continue;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
cd->disk = disk;
cd->pi = &cd->pia;
cd->present = 0;
@@ -783,7 +784,7 @@ static void pcd_request(void)
ps_set_intr(do_pcd_read, NULL, 0, nice);
return;
} else {
- __blk_end_request_all(pcd_req, -EIO);
+ __blk_end_request_all(pcd_req, BLK_STS_IOERR);
pcd_req = NULL;
}
}
@@ -794,7 +795,7 @@ static void do_pcd_request(struct request_queue *q)
pcd_request();
}
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
{
unsigned long saved_flags;
@@ -837,7 +838,7 @@ static void pcd_start(void)
if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
pcd_bufblk = -1;
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
@@ -871,7 +872,7 @@ static void do_pcd_read_drq(void)
return;
}
pcd_bufblk = -1;
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7d2402f..27a44b9 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -438,7 +438,7 @@ static void run_fsm(void)
phase = NULL;
spin_lock_irqsave(&pd_lock, saved_flags);
if (!__blk_end_request_cur(pd_req,
- res == Ok ? 0 : -EIO)) {
+ res == Ok ? 0 : BLK_STS_IOERR)) {
if (!set_next_request())
stop = 1;
}
@@ -863,6 +863,7 @@ static void pd_probe_drive(struct pd_unit *disk)
return;
}
blk_queue_max_hw_sectors(p->queue, cluster);
+ blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
if (disk->drive == -1) {
for (disk->drive = 0; disk->drive <= 1; disk->drive++)
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f24ca73..eef7a91 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -293,6 +293,7 @@ static void __init pf_init_units(void)
return;
}
blk_queue_max_segments(disk->queue, cluster);
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
pf->disk = disk;
pf->pi = &pf->pia;
pf->media_status = PF_NM;
@@ -801,7 +802,7 @@ static int set_next_request(void)
return pf_req != NULL;
}
-static void pf_end_request(int err)
+static void pf_end_request(blk_status_t err)
{
if (pf_req && !__blk_end_request_cur(pf_req, err))
pf_req = NULL;
@@ -821,7 +822,7 @@ repeat:
pf_count = blk_rq_cur_sectors(pf_req);
if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
- pf_end_request(-EIO);
+ pf_end_request(BLK_STS_IOERR);
goto repeat;
}
@@ -836,7 +837,7 @@ repeat:
pi_do_claimed(pf_current->pi, do_pf_write);
else {
pf_busy = 0;
- pf_end_request(-EIO);
+ pf_end_request(BLK_STS_IOERR);
goto repeat;
}
}
@@ -868,7 +869,7 @@ static int pf_next_buf(void)
return 0;
}
-static inline void next_request(int err)
+static inline void next_request(blk_status_t err)
{
unsigned long saved_flags;
@@ -896,7 +897,7 @@ static void do_pf_read_start(void)
pi_do_claimed(pf_current->pi, do_pf_read_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pf_mask = STAT_DRQ;
@@ -915,7 +916,7 @@ static void do_pf_read_drq(void)
pi_do_claimed(pf_current->pi, do_pf_read_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +943,7 @@ static void do_pf_write_start(void)
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
@@ -955,7 +956,7 @@ static void do_pf_write_start(void)
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +976,7 @@ static void do_pf_write_done(void)
pi_do_claimed(pf_current->pi, do_pf_write_start);
return;
}
- next_request(-EIO);
+ next_request(BLK_STS_IOERR);
return;
}
pi_disconnect(pf_current->pi);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 205b865..467beca 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -98,6 +98,7 @@ static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
static mempool_t *psd_pool;
+static struct bio_set *pkt_bio_set;
static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -707,7 +708,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
if (IS_ERR(rq))
return PTR_ERR(rq);
- scsi_req_init(rq);
if (cgc->buflen) {
ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
bio, (unsigned long long)pkt->sector,
- (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
+ (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
- if (bio->bi_error)
+ if (bio->bi_status)
atomic_inc(&pkt->io_errors);
if (atomic_dec_and_test(&pkt->io_wait)) {
atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
+ pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
pd->stats.pkt_ended++;
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
pkt_queue_bio(pd, pkt->w_bio);
}
-static void pkt_finish_packet(struct packet_data *pkt, int error)
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
{
struct bio *bio;
- if (error)
+ if (status)
pkt->cache_valid = 0;
/* Finish all bios corresponding to this packet */
while ((bio = bio_list_pop(&pkt->orig_bios))) {
- bio->bi_error = error;
+ bio->bi_status = status;
bio_endio(bio);
}
}
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
if (atomic_read(&pkt->io_wait) > 0)
return;
- if (!pkt->w_bio->bi_error) {
+ if (!pkt->w_bio->bi_status) {
pkt_set_state(pkt, PACKET_FINISHED_STATE);
} else {
pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
break;
case PACKET_FINISHED_STATE:
- pkt_finish_packet(pkt, pkt->w_bio->bi_error);
+ pkt_finish_packet(pkt, pkt->w_bio->bi_status);
return;
default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
struct packet_stacked_data *psd = bio->bi_private;
struct pktcdvd_device *pd = psd->pd;
- psd->bio->bi_error = bio->bi_error;
+ psd->bio->bi_status = bio->bi_status;
bio_put(bio);
bio_endio(psd->bio);
mempool_free(psd, psd_pool);
@@ -2310,7 +2310,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
- struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+ struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
psd->pd = pd;
@@ -2412,9 +2412,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
char b[BDEVNAME_SIZE];
struct bio *split;
- blk_queue_bounce(q, &bio);
-
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
pd = q->queuedata;
if (!pd) {
@@ -2455,7 +2453,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
split = bio_split(bio, last_zone -
bio->bi_iter.bi_sector,
- GFP_NOIO, fs_bio_set);
+ GFP_NOIO, pkt_bio_set);
bio_chain(split, bio);
} else {
split = bio;
@@ -2583,6 +2581,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
bdev = bdget(dev);
if (!bdev)
return -ENOMEM;
+ if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
+ WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
+ bdput(bdev);
+ return -EINVAL;
+ }
ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
if (ret)
return ret;
@@ -2919,6 +2922,11 @@ static int __init pkt_init(void)
sizeof(struct packet_stacked_data));
if (!psd_pool)
return -ENOMEM;
+ pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!pkt_bio_set) {
+ mempool_destroy(psd_pool);
+ return -ENOMEM;
+ }
ret = register_blkdev(pktdev_major, DRIVER_NAME);
if (ret < 0) {
@@ -2951,6 +2959,7 @@ out:
unregister_blkdev(pktdev_major, DRIVER_NAME);
out2:
mempool_destroy(psd_pool);
+ bioset_free(pkt_bio_set);
return ret;
}
@@ -2964,6 +2973,7 @@ static void __exit pkt_exit(void)
unregister_blkdev(pktdev_major, DRIVER_NAME);
mempool_destroy(psd_pool);
+ bioset_free(pkt_bio_set);
}
MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index a809e3e..075662f 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
if (res) {
dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
__LINE__, op, res);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return 0;
}
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
if (res) {
dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
__func__, __LINE__, res);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return 0;
}
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
break;
default:
blk_dump_rq_flags(req, DEVICE_NAME " bad request");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
}
}
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
struct ps3_storage_device *dev = data;
struct ps3disk_private *priv;
struct request *req;
- int res, read, error;
+ int res, read;
+ blk_status_t error;
u64 tag, status;
const char *op;
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
if (status) {
dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
__LINE__, op, status);
- error = -EIO;
+ error = BLK_STS_IOERR;
} else {
dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
__LINE__, op);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe..e0e81ca 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
kfree(priv->cache.tags);
}
-static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
size_t len, size_t *retlen, u_char *buf)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
(unsigned int)from, len);
if (from >= priv->size)
- return -EIO;
+ return BLK_STS_IOERR;
if (len > priv->size - from)
len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
return 0;
}
-static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
size_t len, size_t *retlen, const u_char *buf)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
unsigned int cached, count;
if (to >= priv->size)
- return -EIO;
+ return BLK_STS_IOERR;
if (len > priv->size - to)
len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
int write = bio_data_dir(bio) == WRITE;
const char *op = write ? "write" : "read";
loff_t offset = bio->bi_iter.bi_sector << 9;
- int error = 0;
+ blk_status_t error = 0;
struct bio_vec bvec;
struct bvec_iter iter;
struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
if (retlen != len) {
dev_err(&dev->core, "Short %s\n", op);
- error = -EIO;
+ error = BLK_STS_IOERR;
goto out;
}
@@ -593,7 +593,7 @@ out:
next = bio_list_peek(&priv->list);
spin_unlock_irq(&priv->lock);
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
return next;
}
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
dev_dbg(&dev->core, "%s\n", __func__);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
spin_lock_irq(&priv->lock);
busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c16f745..b008b6a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -442,6 +442,8 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
static struct kmem_cache *rbd_img_request_cache;
static struct kmem_cache *rbd_obj_request_cache;
+static struct bio_set *rbd_bio_clone;
+
static int rbd_major;
static DEFINE_IDA(rbd_dev_id_ida);
@@ -1363,7 +1365,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
{
struct bio *bio;
- bio = bio_clone(bio_src, gfpmask);
+ bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
if (!bio)
return NULL; /* ENOMEM */
@@ -2293,11 +2295,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
rbd_assert(img_request->obj_request != NULL);
more = obj_request->which < img_request->obj_request_count - 1;
} else {
+ blk_status_t status = errno_to_blk_status(result);
+
rbd_assert(img_request->rq != NULL);
- more = blk_update_request(img_request->rq, result, xferred);
+ more = blk_update_request(img_request->rq, status, xferred);
if (!more)
- __blk_mq_end_request(img_request->rq, result);
+ __blk_mq_end_request(img_request->rq, status);
}
return more;
@@ -4150,17 +4154,17 @@ err_rq:
obj_op_name(op_type), length, offset, result);
ceph_put_snap_context(snapc);
err:
- blk_mq_end_request(rq, result);
+ blk_mq_end_request(rq, errno_to_blk_status(result));
}
-static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
struct work_struct *work = blk_mq_rq_to_pdu(rq);
queue_work(rbd_wq, work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void rbd_free_disk(struct rbd_device *rbd_dev)
@@ -6414,8 +6418,16 @@ static int rbd_slab_init(void)
if (!rbd_obj_request_cache)
goto out_err;
+ rbd_assert(!rbd_bio_clone);
+ rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!rbd_bio_clone)
+ goto out_err_clone;
+
return 0;
+out_err_clone:
+ kmem_cache_destroy(rbd_obj_request_cache);
+ rbd_obj_request_cache = NULL;
out_err:
kmem_cache_destroy(rbd_img_request_cache);
rbd_img_request_cache = NULL;
@@ -6431,6 +6443,10 @@ static void rbd_slab_exit(void)
rbd_assert(rbd_img_request_cache);
kmem_cache_destroy(rbd_img_request_cache);
rbd_img_request_cache = NULL;
+
+ rbd_assert(rbd_bio_clone);
+ bioset_free(rbd_bio_clone);
+ rbd_bio_clone = NULL;
}
static int __init rbd_init(void)
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c56636..7f4aceb 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -149,9 +149,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
{
struct rsxx_cardinfo *card = q->queuedata;
struct rsxx_bio_meta *bio_meta;
- int st = -EINVAL;
+ blk_status_t st = BLK_STS_IOERR;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
might_sleep();
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
if (bio_end_sector(bio) > get_capacity(card->gendisk))
goto req_err;
- if (unlikely(card->halt)) {
- st = -EFAULT;
+ if (unlikely(card->halt))
goto req_err;
- }
- if (unlikely(card->dma_fault)) {
- st = (-EFAULT);
+ if (unlikely(card->dma_fault))
goto req_err;
- }
if (bio->bi_iter.bi_size == 0) {
dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
if (!bio_meta) {
- st = -ENOMEM;
+ st = BLK_STS_RESOURCE;
goto req_err;
}
@@ -205,7 +201,7 @@ queue_err:
kmem_cache_free(bio_meta_pool, bio_meta);
req_err:
if (st)
- bio->bi_error = st;
+ bio->bi_status = st;
bio_endio(bio);
return BLK_QC_T_NONE;
}
@@ -288,7 +284,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
}
blk_queue_make_request(card->queue, rsxx_make_request);
- blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 5a20385f8..6a1b217 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
mutex_unlock(&ctrl->work_lock);
}
-static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
struct list_head *q,
unsigned int laddr,
rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
if (!dma)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
dma->cmd = HW_CMD_BLK_DISCARD;
dma->laddr = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
return 0;
}
-static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
struct list_head *q,
int dir,
unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
if (!dma)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
dma->laddr = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
return 0;
}
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
unsigned int dma_len;
int dma_cnt[RSXX_MAX_TARGETS];
int tgt;
- int st;
+ blk_status_t st;
int i;
addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ bvec_err:
for (i = 0; i < card->n_targets; i++)
rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
FREE_DMA);
-
return st;
}
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6bbc64d..277f27e 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
void rsxx_dma_cleanup(void);
void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
int rsxx_dma_configure(struct rsxx_cardinfo *card);
-int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 27833e4..d036868 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
struct skd_special_context *skspcl);
static void skd_request_fn(struct request_queue *rq);
static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, int error);
-static int skd_preop_sg_list(struct skd_device *skdev,
+ struct skd_request_context *skreq, blk_status_t status);
+static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
static void skd_postop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
if (req == NULL)
break;
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
}
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
struct request *req = NULL;
struct skd_scsi_request *scsi_req;
unsigned long io_flags;
- int error;
u32 lba;
u32 count;
int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
if (!req->bio)
goto skip_sg;
- error = skd_preop_sg_list(skdev, skreq);
-
- if (error != 0) {
+ if (!skd_preop_sg_list(skdev, skreq)) {
/*
* Complete the native request with error.
* Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
*/
pr_debug("%s:%s:%d error Out\n",
skdev->name, __func__, __LINE__);
- skd_end_request(skdev, skreq, error);
+ skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
continue;
}
@@ -805,7 +802,7 @@ skip_sg:
}
static void skd_end_request(struct skd_device *skdev,
- struct skd_request_context *skreq, int error)
+ struct skd_request_context *skreq, blk_status_t error)
{
if (unlikely(error)) {
struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
__blk_end_request_all(skreq->req, error);
}
-static int skd_preop_sg_list(struct skd_device *skdev,
+static bool skd_preop_sg_list(struct skd_device *skdev,
struct skd_request_context *skreq)
{
struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
n_sg = blk_rq_map_sg(skdev->queue, req, sg);
if (n_sg <= 0)
- return -EINVAL;
+ return false;
/*
* Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
*/
n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
if (n_sg <= 0)
- return -EINVAL;
+ return false;
SKD_ASSERT(n_sg <= skdev->sgs_per_request);
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
}
}
- return 0;
+ return true;
}
static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
case SKD_CHECK_STATUS_REPORT_GOOD:
case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
- skd_end_request(skdev, skreq, 0);
+ skd_end_request(skdev, skreq, BLK_STS_OK);
break;
case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
case SKD_CHECK_STATUS_REPORT_ERROR:
default:
- skd_end_request(skdev, skreq, -EIO);
+ skd_end_request(skdev, skreq, BLK_STS_IOERR);
break;
}
}
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
* native request.
*/
if (likely(cmp_status == SAM_STAT_GOOD))
- skd_end_request(skdev, skreq, 0);
+ skd_end_request(skdev, skreq, BLK_STS_OK);
else
skd_resolve_req_exception(skdev, skreq);
}
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
SKD_MAX_RETRIES)
blk_requeue_request(skdev->queue, skreq->req);
else
- skd_end_request(skdev, skreq, -EIO);
+ skd_end_request(skdev, skreq, BLK_STS_IOERR);
skreq->req = NULL;
@@ -4276,6 +4273,7 @@ static int skd_cons_disk(struct skd_device *skdev)
rc = -ENOMEM;
goto err_out;
}
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
skdev->queue = q;
disk->queue = q;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 3f3a3ab..6b16ead 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
rqe->req = NULL;
- __blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+ __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
vdc_blk_queue_start(port);
}
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
struct request *req;
while ((req = blk_fetch_request(port->disk->queue)) != NULL)
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
static void vdc_ldc_reset_timer(unsigned long _arg)
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 3064be6..84434d3 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
return ret;
}
-static int floppy_read_sectors(struct floppy_state *fs,
+static blk_status_t floppy_read_sectors(struct floppy_state *fs,
int req_sector, int sectors_nb,
unsigned char *buffer)
{
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
ret = swim_read_sector(fs, side, track, sector,
buffer);
if (try-- == 0)
- return -EIO;
+ return BLK_STS_IOERR;
} while (ret != 512);
buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
req = swim_next_request(swd);
while (req) {
- int err = -EIO;
+ blk_status_t err = BLK_STS_IOERR;
fs = req->rq_disk->private_data;
if (blk_rq_pos(req) >= fs->total_secs)
@@ -864,6 +864,8 @@ static int swim_floppy_init(struct swim_priv *swd)
put_disk(swd->unit[drive].disk);
goto exit_put_disks;
}
+ blk_queue_bounce_limit(swd->unit[drive].disk->queue,
+ BLK_BOUNCE_HIGH);
swd->unit[drive].disk->queue->queuedata = swd;
swd->unit[drive].swd = swd;
}
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ba4809c..9f931f8 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
unsigned int clearing);
static int floppy_revalidate(struct gendisk *disk);
-static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes)
+static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
{
struct request *req = fs->cur_req;
int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
if (fs->mdev->media_bay &&
check_media_bay(fs->mdev->media_bay) != MB_FD) {
swim3_dbg("%s", " media bay absent, dropping req\n");
- swim3_end_request(fs, -ENODEV, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
if (blk_rq_pos(req) >= fs->total_secs) {
swim3_dbg(" pos out of bounds (%ld, max is %ld)\n",
(long)blk_rq_pos(req), (long)fs->total_secs);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
if (fs->ejected) {
swim3_dbg("%s", " disk ejected\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
fs->write_prot = swim3_readbit(fs, WRITE_PROT);
if (fs->write_prot) {
swim3_dbg("%s", " try to write, disk write protected\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
continue;
}
}
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
if (fs->retries > 5) {
swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
fs->req_cyl, fs->cur_cyl);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
return;
}
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
out_8(&sw->intr_enable, 0);
fs->cur_cyl = -1;
if (fs->retries > 5) {
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
} else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
out_8(&sw->select, RELAX);
out_8(&sw->intr_enable, 0);
swim3_err("%s", "Seek timeout\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
goto unlock;
}
swim3_err("%s", "Seek settle timeout\n");
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
swim3_err("Timeout %sing sector %ld\n",
(rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
(long)blk_rq_pos(fs->cur_req));
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
swim3_err("%s", "Seen sector but cyl=ff?\n");
fs->cur_cyl = -1;
if (fs->retries > 5) {
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
} else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
swim3_err("Error %sing block %ld (err=%x)\n",
rq_data_dir(req) == WRITE? "writ": "read",
(long)blk_rq_pos(req), err);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
}
} else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
swim3_err(" state=%d, dir=%x, intr=%x, err=%x\n",
fs->state, rq_data_dir(req), intr, err);
- swim3_end_request(fs, -EIO, 0);
+ swim3_end_request(fs, BLK_STS_IOERR, 0);
fs->state = idle;
start_request(fs);
break;
@@ -1223,6 +1223,7 @@ static int swim3_attach(struct macio_dev *mdev,
put_disk(disk);
return -ENOMEM;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
disk->queue->queuedata = &floppy_states[index];
if (index == 0) {
@@ -1245,7 +1246,7 @@ static int swim3_attach(struct macio_dev *mdev,
return 0;
}
-static struct of_device_id swim3_match[] =
+static const struct of_device_id swim3_match[] =
{
{
.name = "swim3",
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index c8e072c..08586dc 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
static inline void carm_end_request_queued(struct carm_host *host,
struct carm_request *crq,
- int error)
+ blk_status_t error)
{
struct request *req = crq->rq;
int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
}
static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
- int error)
+ blk_status_t error)
{
carm_end_request_queued(host, crq, error);
if (max_queue == 1)
@@ -869,14 +869,14 @@ queue_one_request:
sg = &crq->sg[0];
n_elem = blk_rq_map_sg(q, rq, sg);
if (n_elem <= 0) {
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
return; /* request with no s/g entries? */
}
/* map scatterlist to PCI bus addresses */
n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
if (n_elem <= 0) {
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
return; /* request with no s/g entries? */
}
crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ queue_one_request:
static void carm_handle_array_info(struct carm_host *host,
struct carm_request *crq, u8 *mem,
- int error)
+ blk_status_t error)
{
struct carm_port *port;
u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ out:
static void carm_handle_scan_chan(struct carm_host *host,
struct carm_request *crq, u8 *mem,
- int error)
+ blk_status_t error)
{
u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ out:
}
static void carm_handle_generic(struct carm_host *host,
- struct carm_request *crq, int error,
+ struct carm_request *crq, blk_status_t error,
int cur_state, int next_state)
{
DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
}
static inline void carm_handle_rw(struct carm_host *host,
- struct carm_request *crq, int error)
+ struct carm_request *crq, blk_status_t error)
{
int pci_dir;
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
u32 handle = le32_to_cpu(ret_handle_le);
unsigned int msg_idx;
struct carm_request *crq;
- int error = (status == RMSG_OK) ? 0 : -EIO;
+ blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
u8 *mem;
VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
err_out:
printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
- carm_end_rq(host, crq, -EIO);
+ carm_end_rq(host, crq, BLK_STS_IOERR);
}
static inline void carm_handle_responses(struct carm_host *host)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3..0677d25 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
if (control & DMASCR_HARD_ERROR) {
/* error */
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
dev_printk(KERN_WARNING, &card->dev->dev,
"I/O error on sector %d/%d\n",
le32_to_cpu(desc->local_addr)>>9,
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size);
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
spin_lock_irq(&card->lock);
*card->biotail = bio;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 553cc4c..0297ad7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -64,15 +64,15 @@ struct virtblk_req {
struct scatterlist sg[];
};
-static inline int virtblk_result(struct virtblk_req *vbr)
+static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
{
switch (vbr->status) {
case VIRTIO_BLK_S_OK:
- return 0;
+ return BLK_STS_OK;
case VIRTIO_BLK_S_UNSUPP:
- return -ENOTTY;
+ return BLK_STS_NOTSUPP;
default:
- return -EIO;
+ return BLK_STS_IOERR;
}
}
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
break;
default:
WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
/* Out of mem doesn't actually happen, since we fall back
* to direct descriptors */
if (err == -ENOMEM || err == -ENOSPC)
- return BLK_MQ_RQ_QUEUE_BUSY;
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
if (notify)
virtqueue_notify(vblk->vqs[qid].vq);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
/* return id (s/n) string for *disk to *id_str
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
goto out;
blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
- err = virtblk_result(blk_mq_rq_to_pdu(req));
+ err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
out:
blk_put_request(req);
return err;
@@ -720,9 +720,6 @@ static int virtblk_probe(struct virtio_device *vdev)
/* We can handle whatever the host told us to handle. */
blk_queue_max_segments(q, vblk->sg_elems-2);
- /* No need to bounce any requests */
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
-
/* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0e82409..fe7cd58 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1066,20 +1066,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
atomic_set(&blkif->drain, 0);
}
-/*
- * Completion callback on the bio's. Called as bh->b_end_io()
- */
-
-static void __end_block_io_op(struct pending_req *pending_req, int error)
+static void __end_block_io_op(struct pending_req *pending_req,
+ blk_status_t error)
{
/* An error fails the entire request. */
- if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
- (error == -EOPNOTSUPP)) {
+ if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
+ error == BLK_STS_NOTSUPP) {
pr_debug("flush diskcache op failed, not supported\n");
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
- } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
- (error == -EOPNOTSUPP)) {
+ } else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
+ error == BLK_STS_NOTSUPP) {
pr_debug("write barrier op failed, not supported\n");
xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1103,7 +1100,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
*/
static void end_block_io_op(struct bio *bio)
{
- __end_block_io_op(bio->bi_private, bio->bi_error);
+ __end_block_io_op(bio->bi_private, bio->bi_status);
bio_put(bio);
}
@@ -1420,7 +1417,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
for (i = 0; i < nbio; i++)
bio_put(biolist[i]);
atomic_set(&pending_req->pendcnt, 1);
- __end_block_io_op(pending_req, -EINVAL);
+ __end_block_io_op(pending_req, BLK_STS_RESOURCE);
msleep(1); /* back off a bit */
return -EIO;
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3945963..c852ed3 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -110,11 +110,6 @@ struct blk_shadow {
unsigned long associated_id;
};
-struct split_bio {
- struct bio *bio;
- atomic_t pending;
-};
-
struct blkif_req {
int error;
};
@@ -881,7 +876,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
!info->feature_fua));
}
-static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *qd)
{
unsigned long flags;
@@ -904,16 +899,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
flush_requests(rinfo);
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_err:
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
out_busy:
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
blk_mq_stop_hw_queue(hctx);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
static void blkif_complete_rq(struct request *rq)
@@ -958,9 +953,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
-
- /* Make sure we don't use bounce buffers. */
- blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
}
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -1601,14 +1593,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
continue;
}
- blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+ if (bret->status == BLKIF_RSP_OKAY)
+ blkif_req(req)->error = BLK_STS_OK;
+ else
+ blkif_req(req)->error = BLK_STS_IOERR;
+
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
printk(KERN_WARNING "blkfront: %s: %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- blkif_req(req)->error = -EOPNOTSUPP;
+ blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1622,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
- blkif_req(req)->error = -EOPNOTSUPP;
+ blkif_req(req)->error = BLK_STS_NOTSUPP;
}
if (unlikely(blkif_req(req)->error)) {
- if (blkif_req(req)->error == -EOPNOTSUPP)
- blkif_req(req)->error = 0;
+ if (blkif_req(req)->error == BLK_STS_NOTSUPP)
+ blkif_req(req)->error = BLK_STS_OK;
info->feature_fua = 0;
info->feature_flush = 0;
xlvbd_flush(info);
@@ -1996,28 +1992,13 @@ static int blkfront_probe(struct xenbus_device *dev,
return 0;
}
-static void split_bio_end(struct bio *bio)
-{
- struct split_bio *split_bio = bio->bi_private;
-
- if (atomic_dec_and_test(&split_bio->pending)) {
- split_bio->bio->bi_phys_segments = 0;
- split_bio->bio->bi_error = bio->bi_error;
- bio_endio(split_bio->bio);
- kfree(split_bio);
- }
- bio_put(bio);
-}
-
static int blkif_recover(struct blkfront_info *info)
{
- unsigned int i, r_index;
+ unsigned int r_index;
struct request *req, *n;
int rc;
- struct bio *bio, *cloned_bio;
- unsigned int segs, offset;
- int pending, size;
- struct split_bio *split_bio;
+ struct bio *bio;
+ unsigned int segs;
blkfront_gather_backend_features(info);
/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2056,34 +2037,6 @@ static int blkif_recover(struct blkfront_info *info)
while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
/* Traverse the list of pending bios and re-queue them */
- if (bio_segments(bio) > segs) {
- /*
- * This bio has more segments than what we can
- * handle, we have to split it.
- */
- pending = (bio_segments(bio) + segs - 1) / segs;
- split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
- BUG_ON(split_bio == NULL);
- atomic_set(&split_bio->pending, pending);
- split_bio->bio = bio;
- for (i = 0; i < pending; i++) {
- offset = (i * segs * XEN_PAGE_SIZE) >> 9;
- size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
- (unsigned int)bio_sectors(bio) - offset);
- cloned_bio = bio_clone(bio, GFP_NOIO);
- BUG_ON(cloned_bio == NULL);
- bio_trim(cloned_bio, offset, size);
- cloned_bio->bi_private = split_bio;
- cloned_bio->bi_end_io = split_bio_end;
- submit_bio(cloned_bio);
- }
- /*
- * Now we have to wait for all those smaller bios to
- * end, so we can also end the "parent" bio.
- */
- continue;
- }
- /* We don't need to split this bio */
submit_bio(bio);
}
@@ -2137,7 +2090,7 @@ static int blkfront_resume(struct xenbus_device *dev)
merge_bio.tail = shadow[j].request->biotail;
bio_list_merge(&info->bio_list, &merge_bio);
shadow[j].request->bio = NULL;
- blk_mq_end_request(shadow[j].request, 0);
+ blk_mq_end_request(shadow[j].request, BLK_STS_OK);
}
}
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 757dce2..14459d6 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
if (!blk_rq_is_passthrough(req))
break;
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
return req;
}
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
/* Drop all in-flight and pending requests */
if (ace->req) {
- __blk_end_request_all(ace->req, -EIO);
+ __blk_end_request_all(ace->req, BLK_STS_IOERR);
ace->req = NULL;
}
while ((req = blk_fetch_request(ace->queue)) != NULL)
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
/* Drop back to IDLE state and notify waiters */
ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
}
/* bio finished; is there another one? */
- if (__blk_end_request_cur(ace->req, 0)) {
+ if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
* blk_rq_sectors(ace->req),
* blk_rq_cur_sectors(ace->req));
@@ -993,6 +993,7 @@ static int ace_setup(struct ace_device *ace)
if (ace->queue == NULL)
goto err_blk_initq;
blk_queue_logical_block_size(ace->queue, 512);
+ blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
/*
* Allocate and initialize GD structure
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 968f9e5..41c95c9 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
while (req) {
unsigned long start = blk_rq_pos(req) << 9;
unsigned long len = blk_rq_cur_bytes(req);
- int err = 0;
+ blk_status_t err = BLK_STS_OK;
if (start + len > z2ram_size) {
pr_err(DEVICE_NAME ": bad access: block=%llu, "
"count=%u\n",
(unsigned long long)blk_rq_pos(req),
blk_rq_cur_sectors(req));
- err = -EIO;
+ err = BLK_STS_IOERR;
goto done;
}
while (len) {
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 76c952f..e36d160 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2178,6 +2178,12 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
if (!q)
return -ENXIO;
+ if (!blk_queue_scsi_passthrough(q)) {
+ WARN_ONCE(true,
+ "Attempt read CDDA info through a non-SCSI queue\n");
+ return -EINVAL;
+ }
+
cdi->last_sense = 0;
while (nframes) {
@@ -2195,7 +2201,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
break;
}
req = scsi_req(rq);
- scsi_req_init(rq);
ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
if (ret) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1372763..6495b03f 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
*/
static void gdrom_readdisk_dma(struct work_struct *work)
{
- int err, block, block_cnt;
+ int block, block_cnt;
+ blk_status_t err;
struct packet_command *read_command;
struct list_head *elem, *next;
struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
__raw_writeb(1, GDROM_DMA_STATUS_REG);
wait_event_interruptible_timeout(request_queue,
gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
- err = gd.transfer ? -EIO : 0;
+ err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
gd.transfer = 0;
gd.pending = 0;
/* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
break;
case REQ_OP_WRITE:
pr_notice("Read only device - write request ignored\n");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
break;
default:
printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
break;
}
}
@@ -812,6 +813,7 @@ static int probe_gdrom(struct platform_device *devptr)
err = -ENOMEM;
goto probe_fail_requestq;
}
+ blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
err = probe_gdrom_setupqueue();
if (err)
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5901937..14d1e7d 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,7 +93,6 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
int error;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
rq->special = (char *)pc;
@@ -200,7 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
memset(sense, 0, sizeof(*sense));
blk_rq_init(rq->q, sense_rq);
- scsi_req_init(sense_rq);
+ scsi_req_init(req);
err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
GFP_NOIO);
@@ -273,7 +272,7 @@ void ide_retry_pc(ide_drive_t *drive)
ide_requeue_and_plug(drive, failed_rq);
if (ide_queue_sense_rq(drive, pc)) {
blk_start_request(failed_rq);
- ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
}
}
EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +436,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
/* No more interrupts */
if ((stat & ATA_DRQ) == 0) {
- int uptodate, error;
+ int uptodate;
+ blk_status_t error;
debug_log("Packet command completed, %d bytes transferred\n",
blk_rq_bytes(rq));
@@ -490,7 +490,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
if (ata_misc_request(rq)) {
scsi_req(rq)->result = 0;
- error = 0;
+ error = BLK_STS_OK;
} else {
if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +498,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
scsi_req(rq)->result = -EIO;
}
- error = uptodate ? 0 : -EIO;
+ error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
}
ide_complete_rq(drive, error, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 07e5ff3..81e18f96 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
cdrom_analyze_sense_data(drive, failed);
- if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
+ if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
BUG();
} else
cdrom_analyze_sense_data(drive, NULL);
@@ -438,7 +438,6 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
rq = blk_get_request(drive->queue,
write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
ide_req(rq)->type = ATA_PRIV_PC;
rq->rq_flags |= rq_flags;
@@ -508,7 +507,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
nr_bytes -= cmd->last_xfer_len;
if (nr_bytes > 0) {
- ide_complete_rq(drive, 0, nr_bytes);
+ ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
return true;
}
@@ -674,7 +673,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
out_end:
if (blk_rq_is_scsi(rq) && rc == 0) {
scsi_req(rq)->resid_len = 0;
- blk_end_request_all(rq, 0);
+ blk_end_request_all(rq, BLK_STS_OK);
hwif->rq = NULL;
} else {
if (sense && uptodate)
@@ -699,7 +698,7 @@ out_end:
scsi_req(rq)->resid_len += cmd->last_xfer_len;
}
- ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
if (sense && rc == 2)
ide_error(drive, "request sense failure", stat);
@@ -844,7 +843,7 @@ out_end:
if (nsectors == 0)
nsectors = 1;
- ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9);
+ ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
return ide_stopped;
}
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 55cd736..9d26c97 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,6 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
int ret;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
rq->rq_flags = RQF_QUIET;
blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 9b69c32..ef7c8c4 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,6 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
return setting->set(drive, arg);
rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd_len = 5;
scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7c06237..241983d 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,6 @@ static int set_multcount(ide_drive_t *drive, int arg)
return -EBUSY;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
drive->mult_req = arg;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 51c8122..54d4d78 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
ide_finish_cmd(drive, cmd, stat);
else
- ide_complete_rq(drive, 0,
+ ide_complete_rq(drive, BLK_STS_OK,
blk_rq_sectors(cmd->rq) << 9);
return ide_stopped;
}
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 4b7ffd7..47d5f337 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
return ide_stopped;
}
scsi_req(rq)->result = err;
- ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
}
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
}
EXPORT_SYMBOL_GPL(ide_error);
-static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
+static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
{
struct request *rq = drive->hwif->rq;
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
if (err <= 0 && scsi_req(rq)->result == 0)
scsi_req(rq)->result = -EIO;
- ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err, blk_rq_bytes(rq));
}
}
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
}
/* done polling */
hwif->polling = 0;
- ide_complete_drive_reset(drive, 0);
+ ide_complete_drive_reset(drive, BLK_STS_OK);
return ide_stopped;
}
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
ide_hwif_t *hwif = drive->hwif;
const struct ide_port_ops *port_ops = hwif->port_ops;
u8 tmp;
- int err = 0;
+ blk_status_t err = BLK_STS_OK;
if (port_ops && port_ops->reset_poll) {
err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
hwif->name, tmp);
drive->failures++;
- err = -EIO;
+ err = BLK_STS_IOERR;
} else {
tmp = ide_read_error(drive);
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
} else {
ide_reset_report_error(hwif, tmp);
drive->failures++;
- err = -EIO;
+ err = BLK_STS_IOERR;
}
}
out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
if (io_ports->ctl_addr == 0) {
spin_unlock_irqrestore(&hwif->lock, flags);
- ide_complete_drive_reset(drive, -ENXIO);
+ ide_complete_drive_reset(drive, BLK_STS_IOERR);
return ide_stopped;
}
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8ac6048..627b1f6 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
drive->failed_pc = NULL;
drive->pc_callback(drive, 0);
- ide_complete_rq(drive, -EIO, done);
+ ide_complete_rq(drive, BLK_STS_IOERR, done);
return ide_stopped;
}
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
if (ata_misc_request(rq)) {
scsi_req(rq)->result = 0;
- ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
} else
goto out_end;
@@ -303,7 +303,7 @@ out_end:
drive->failed_pc = NULL;
if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
scsi_req(rq)->result = -EIO;
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
return ide_stopped;
}
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 323af72..3a234701 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,7 +54,7 @@
#include <linux/uaccess.h>
#include <asm/io.h>
-int ide_end_rq(ide_drive_t *drive, struct request *rq, int error,
+int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
unsigned int nr_bytes)
{
/*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
}
}
-int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
+int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
{
ide_hwif_t *hwif = drive->hwif;
struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
* if failfast is set on a request, override number of sectors
* and complete the whole request right now
*/
- if (blk_noretry_request(rq) && error <= 0)
+ if (blk_noretry_request(rq) && error)
nr_bytes = blk_rq_sectors(rq) << 9;
rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
scsi_req(rq)->result = -EIO;
}
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
}
static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
printk("%s: DRIVE_CMD (null)\n", drive->name);
#endif
scsi_req(rq)->result = 0;
- ide_complete_rq(drive, 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
return ide_stopped;
}
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 8c0d172..3661abb 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -126,7 +126,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
struct request *rq;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
blk_execute_rq(drive->queue, NULL, rq, 0);
err = scsi_req(rq)->result ? -EIO : 0;
@@ -224,7 +223,6 @@ static int generic_drive_reset(ide_drive_t *drive)
int ret = 0;
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd_len = 1;
scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 94e3107..1f264d5 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
spin_unlock_irq(&hwif->lock);
rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
scsi_req(rq)->cmd_len = 1;
ide_req(rq)->type = ATA_PRIV_MISC;
@@ -48,7 +47,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
* timeout has expired, so power management will be reenabled.
*/
rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
- scsi_req_init(rq);
if (IS_ERR(rq))
goto out;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0977fc1..544f02d 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
memset(&rqpm, 0, sizeof(rqpm));
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
rq->special = &rqpm;
rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -40,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
return ret;
}
-static void ide_end_sync_rq(struct request *rq, int error)
+static void ide_end_sync_rq(struct request *rq, blk_status_t error)
{
complete(rq->end_io_data);
}
@@ -57,7 +56,7 @@ static int ide_pm_execute_rq(struct request *rq)
if (unlikely(blk_queue_dying(q))) {
rq->rq_flags |= RQF_QUIET;
scsi_req(rq)->result = -ENXIO;
- __blk_end_request_all(rq, 0);
+ __blk_end_request_all(rq, BLK_STS_OK);
spin_unlock_irq(q->queue_lock);
return -ENXIO;
}
@@ -91,7 +90,6 @@ int generic_ide_resume(struct device *dev)
memset(&rqpm, 0, sizeof(rqpm));
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_PM_RESUME;
rq->rq_flags |= RQF_PREEMPT;
rq->special = &rqpm;
@@ -235,7 +233,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
drive->hwif->rq = NULL;
- if (blk_end_request(rq, 0, 0))
+ if (blk_end_request(rq, BLK_STS_OK, 0))
BUG();
}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 0235625..01b2adf 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -741,12 +741,12 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
}
}
-static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+static void ide_initialize_rq(struct request *rq)
{
struct ide_request *req = blk_mq_rq_to_pdu(rq);
+ scsi_req_init(&req->sreq);
req->sreq.sense = req->sense;
- return 0;
}
/*
@@ -771,8 +771,9 @@ static int ide_init_queue(ide_drive_t *drive)
return 1;
q->request_fn = do_ide_request;
- q->init_rq_fn = ide_init_rq;
+ q->initialize_rq_fn = ide_initialize_rq;
q->cmd_size = sizeof(struct ide_request);
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
return 1;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a0651f9..fd57e8c 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
drive->failed_pc = NULL;
drive->pc_callback(drive, 0);
- ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
+ ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
return ide_stopped;
}
ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
@@ -855,7 +855,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
BUG_ON(size < 0 || size % tape->blk_size);
rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_MISC;
scsi_req(rq)->cmd[13] = cmd;
rq->rq_disk = tape->disk;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index d71199d..4efe4c6 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
}
if (nr_bytes > 0)
- ide_complete_rq(drive, 0, nr_bytes);
+ ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
}
}
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
ide_driveid_update(drive);
}
- ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
+ ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
}
/*
@@ -394,7 +394,7 @@ out_end:
if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
ide_finish_cmd(drive, cmd, stat);
else
- ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9);
+ ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
return ide_stopped;
out_err:
ide_error_cmd(drive, cmd);
@@ -433,7 +433,6 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
rq = blk_get_request(drive->queue,
(cmd->tf_flags & IDE_TFLAG_WRITE) ?
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
- scsi_req_init(rq);
ide_req(rq)->type = ATA_PRIV_TASKFILE;
/*
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 6a1849b..57eea5a 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
* yet.
*/
-static int sil_sata_reset_poll(ide_drive_t *drive)
+static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
{
ide_hwif_t *hwif = drive->hwif;
void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
if ((sata_stat & 0x03) != 0x03) {
printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
hwif->name, sata_stat);
- return -ENXIO;
+ return BLK_STS_IOERR;
}
}
- return 0;
+ return BLK_STS_OK;
}
/**
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 6a4aa60..ddae430 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
}
mutex_unlock(&dev->mlock);
- if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
- return -ENOMEM;
+ ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
+ if (ret)
+ return ret;
t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
{
struct nvm_dev *dev = tgt_dev->parent;
+ int ret;
if (!dev->ops->submit_io)
return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
nvm_rq_tgt_to_dev(tgt_dev, rqd);
rqd->dev = tgt_dev;
- return dev->ops->submit_io(dev, rqd);
+
+ /* In case of error, fail with right address format */
+ ret = dev->ops->submit_io(dev, rqd);
+ if (ret)
+ nvm_rq_dev_to_tgt(tgt_dev, rqd);
+ return ret;
}
EXPORT_SYMBOL(nvm_submit_io);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 59bcea8..024a8fc 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
- if (ret == NVM_IO_REQUEUE) {
+ switch (ret) {
+ case NVM_IO_REQUEUE:
io_schedule();
goto retry;
+ case NVM_IO_ERR:
+ pblk_pipeline_stop(pblk);
+ goto out;
}
if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ retry:
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
+ pblk_rl_inserted(&pblk->rl, nr_entries);
+
out:
pblk_write_should_kick(pblk);
return ret;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768..11fe0c5 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -17,7 +17,6 @@
*/
#include "pblk.h"
-#include <linux/time.h>
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
line->id, pos);
- pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+ pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
}
static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
*ppa = rqd->ppa_addr;
pblk_mark_bb(pblk, line, ppa);
}
+
+ atomic_dec(&pblk->inflight_io);
}
/* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
- up(&pblk->erase_sem);
__pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, pblk->r_rq_pool);
+ mempool_free(rqd, pblk->g_rq_pool);
}
-static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
spin_unlock(&line->lock);
return;
}
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
if (line->state == PBLK_LINESTATE_CLOSED)
move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
__pblk_map_invalidate(pblk, line, paddr);
}
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- __pblk_map_invalidate(pblk, line, paddr);
-
- pblk_rb_sync_init(&pblk->rwb, NULL);
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
- pblk_rb_sync_end(&pblk->rwb, NULL);
-}
-
static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
unsigned int nr_secs)
{
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
pool = pblk->w_rq_pool;
rq_size = pblk_w_rq_size;
} else {
- pool = pblk->r_rq_pool;
- rq_size = pblk_r_rq_size;
+ pool = pblk->g_rq_pool;
+ rq_size = pblk_g_rq_size;
}
rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
if (rw == WRITE)
pool = pblk->w_rq_pool;
else
- pool = pblk->r_rq_pool;
+ pool = pblk->g_rq_pool;
mempool_free(rqd, pool);
}
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
complete(waiting);
}
-void pblk_flush_writer(struct pblk *pblk)
+void pblk_wait_for_meta(struct pblk *pblk)
{
- struct bio *bio;
- int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
-
- bio = bio_alloc(GFP_KERNEL, 1);
- if (!bio)
- return;
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
- bio->bi_private = &wait;
- bio->bi_end_io = pblk_end_bio_sync;
+ do {
+ if (!atomic_read(&pblk->inflight_io))
+ break;
- ret = pblk_write_to_cache(pblk, bio, 0);
- if (ret == NVM_IO_OK) {
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: flush cache timed out\n");
- }
- } else if (ret != NVM_IO_DONE) {
- pr_err("pblk: tear down bio failed\n");
- }
+ schedule();
+ } while (1);
+}
- if (bio->bi_error)
- pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+static void pblk_flush_writer(struct pblk *pblk)
+{
+ pblk_rb_flush(&pblk->rwb);
+ do {
+ if (!pblk_rb_sync_count(&pblk->rwb))
+ break;
- bio_put(bio);
+ pblk_write_kick(pblk);
+ schedule();
+ } while (1);
}
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
+ int vsc = le32_to_cpu(*line->vsc);
- if (!line->vsc) {
+ lockdep_assert_held(&line->lock);
+
+ if (!vsc) {
if (line->gc_group != PBLK_LINEGC_FULL) {
line->gc_group = PBLK_LINEGC_FULL;
move_list = &l_mg->gc_full_list;
}
- } else if (line->vsc < lm->mid_thrs) {
+ } else if (vsc < lm->high_thrs) {
if (line->gc_group != PBLK_LINEGC_HIGH) {
line->gc_group = PBLK_LINEGC_HIGH;
move_list = &l_mg->gc_high_list;
}
- } else if (line->vsc < lm->high_thrs) {
+ } else if (vsc < lm->mid_thrs) {
if (line->gc_group != PBLK_LINEGC_MID) {
line->gc_group = PBLK_LINEGC_MID;
move_list = &l_mg->gc_mid_list;
}
- } else if (line->vsc < line->sec_in_line) {
+ } else if (vsc < line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_LOW) {
line->gc_group = PBLK_LINEGC_LOW;
move_list = &l_mg->gc_low_list;
}
- } else if (line->vsc == line->sec_in_line) {
+ } else if (vsc == line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_EMPTY) {
line->gc_group = PBLK_LINEGC_EMPTY;
move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
line->gc_group = PBLK_LINEGC_NONE;
move_list = &l_mg->corrupt_list;
pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
- line->id, line->vsc,
+ line->id, vsc,
line->sec_in_line,
lm->high_thrs, lm->mid_thrs);
}
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
#endif
}
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
+{
+ pblk->sec_per_write = sec_per_write;
+}
+
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
}
}
#endif
+
+ atomic_inc(&pblk->inflight_io);
+
return nvm_submit_io(dev, rqd);
}
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask)
+ int alloc_type, gfp_t gfp_mask)
{
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
void *kaddr = data;
struct page *page;
struct bio *bio;
int i, ret;
- if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+ if (alloc_type == PBLK_KMALLOC_META)
return bio_map_kern(dev->q, kaddr, len, gfp_mask);
bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ out:
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush)
{
- int max = pblk->max_write_pgs;
+ int max = pblk->sec_per_write;
int min = pblk->min_write_pgs;
int secs_to_sync = 0;
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
return secs_to_sync;
}
-static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
- int nr_secs)
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+ u64 addr;
+ int i;
+
+ addr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ line->cur_sec = addr - nr_secs;
+
+ for (i = 0; i < nr_secs; i++, line->cur_sec--)
+ WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+}
+
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
{
u64 addr;
int i;
+ lockdep_assert_held(&line->lock);
+
/* logic error: ppa out-of-bounds. Prevent generating bad address */
if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
return addr;
}
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
+{
+ u64 paddr;
+
+ spin_lock(&line->lock);
+ paddr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ spin_unlock(&line->lock);
+
+ return paddr;
+}
+
/*
* Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
* taking the per LUN semaphore.
*/
static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
- u64 paddr, int dir)
+ void *emeta_buf, u64 paddr, int dir)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
+ void *ppa_list, *meta_list;
struct bio *bio;
struct nvm_rq rqd;
- struct ppa_addr *ppa_list;
- dma_addr_t dma_ppa_list;
- void *emeta = line->emeta;
+ dma_addr_t dma_ppa_list, dma_meta_list;
int min = pblk->min_write_pgs;
- int left_ppas = lm->emeta_sec;
+ int left_ppas = lm->emeta_sec[0];
int id = line->id;
int rq_ppas, rq_len;
int cmd_op, bio_op;
- int flags;
int i, j;
int ret;
DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
if (dir == WRITE) {
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
- flags = pblk_set_progr_mode(pblk, WRITE);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
} else
return -EINVAL;
- ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
- if (!ppa_list)
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &dma_meta_list);
+ if (!meta_list)
return -ENOMEM;
+ ppa_list = meta_list + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
next_rq:
memset(&rqd, 0, sizeof(struct nvm_rq));
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
rq_len = rq_ppas * geo->sec_size;
- bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
goto free_rqd_dma;
@@ -584,27 +601,38 @@ next_rq:
bio_set_op_attrs(bio, bio_op, 0);
rqd.bio = bio;
- rqd.opcode = cmd_op;
- rqd.flags = flags;
- rqd.nr_ppas = rq_ppas;
+ rqd.meta_list = meta_list;
rqd.ppa_list = ppa_list;
+ rqd.dma_meta_list = dma_meta_list;
rqd.dma_ppa_list = dma_ppa_list;
+ rqd.opcode = cmd_op;
+ rqd.nr_ppas = rq_ppas;
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
if (dir == WRITE) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
+ rqd.flags = pblk_set_progr_mode(pblk, WRITE);
for (i = 0; i < rqd.nr_ppas; ) {
spin_lock(&line->lock);
paddr = __pblk_alloc_page(pblk, line, min);
spin_unlock(&line->lock);
- for (j = 0; j < min; j++, i++, paddr++)
+ for (j = 0; j < min; j++, i++, paddr++) {
+ meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
rqd.ppa_list[i] =
addr_to_gen_ppa(pblk, paddr, id);
+ }
}
} else {
for (i = 0; i < rqd.nr_ppas; ) {
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
int pos = pblk_dev_ppa_to_pos(geo, ppa);
+ int read_type = PBLK_READ_RANDOM;
+
+ if (pblk_io_aligned(pblk, rq_ppas))
+ read_type = PBLK_READ_SEQUENTIAL;
+ rqd.flags = pblk_set_read_mode(pblk, read_type);
while (test_bit(pos, line->blk_bitmap)) {
paddr += min;
@@ -645,9 +673,11 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: emeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
- bio_put(bio);
+ if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
if (rqd.error) {
if (dir == WRITE)
@@ -656,12 +686,12 @@ next_rq:
pblk_log_read_err(pblk, &rqd);
}
- emeta += rq_len;
+ emeta_buf += rq_len;
left_ppas -= rq_ppas;
if (left_ppas)
goto next_rq;
free_rqd_dma:
- nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
flags = pblk_set_progr_mode(pblk, WRITE);
- lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ lba_list = emeta_to_lbas(pblk, line->emeta->buf);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
+ flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
} else
return -EINVAL;
memset(&rqd, 0, sizeof(struct nvm_rq));
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
return -ENOMEM;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
+
bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
rqd.private = &wait;
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- if (dir == WRITE)
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+
+ if (dir == WRITE) {
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ meta_list[i].lba = lba_list[paddr] = addr_empty;
+ }
}
/*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: smeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
}
free_ppa_list:
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
}
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf)
{
- return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+ return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
+ line->emeta_ssec, READ);
}
static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
{
struct nvm_rq rqd;
- int ret;
+ int ret = 0;
DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ out:
rqd.private = pblk;
__pblk_end_io_erase(pblk, &rqd);
- return 0;
+ return ret;
}
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
struct ppa_addr ppa;
- int bit = -1;
+ int ret, bit = -1;
/* Erase only good blocks, one at a time */
do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
spin_unlock(&line->lock);
- if (pblk_blk_erase_sync(pblk, ppa)) {
+ ret = pblk_blk_erase_sync(pblk, ppa);
+ if (ret) {
pr_err("pblk: failed to erase line %d\n", line->id);
- return -ENOMEM;
+ return ret;
}
} while (1);
return 0;
}
+static void pblk_line_setup_metadata(struct pblk_line *line,
+ struct pblk_line_mgmt *l_mg,
+ struct pblk_line_meta *lm)
+{
+ int meta_line;
+
+ lockdep_assert_held(&l_mg->free_lock);
+
+retry_meta:
+ meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+ if (meta_line == PBLK_DATA_LINES) {
+ spin_unlock(&l_mg->free_lock);
+ io_schedule();
+ spin_lock(&l_mg->free_lock);
+ goto retry_meta;
+ }
+
+ set_bit(meta_line, &l_mg->meta_bitmap);
+ line->meta_line = meta_line;
+
+ line->smeta = l_mg->sline_meta[meta_line];
+ line->emeta = l_mg->eline_meta[meta_line];
+
+ memset(line->smeta, 0, lm->smeta_len);
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
+
+ line->emeta->mem = 0;
+ atomic_set(&line->emeta->sync, 0);
+}
+
/* For now lines are always assumed full lines. Thus, smeta former and current
* lun bitmaps are omitted.
*/
-static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
struct pblk_line *cur)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct line_smeta *smeta = line->smeta;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+ struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
int nr_blk_line;
/* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
}
/* Run-time metadata */
- line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+ line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
/* Mark LUNs allocated in this line (all for now) */
bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
- smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
- memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
- smeta->header.id = cpu_to_le32(line->id);
- smeta->header.type = cpu_to_le16(line->type);
- smeta->header.version = cpu_to_le16(1);
+ smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
+ memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
+ smeta_buf->header.id = cpu_to_le32(line->id);
+ smeta_buf->header.type = cpu_to_le16(line->type);
+ smeta_buf->header.version = cpu_to_le16(1);
/* Start metadata */
- smeta->seq_nr = cpu_to_le64(line->seq_nr);
- smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
/* Fill metadata among lines */
if (cur) {
memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
- smeta->prev_id = cpu_to_le32(cur->id);
- cur->emeta->next_id = cpu_to_le32(line->id);
+ smeta_buf->prev_id = cpu_to_le32(cur->id);
+ cur->emeta->buf->next_id = cpu_to_le32(line->id);
} else {
- smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
}
/* All smeta must be set at this point */
- smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
- smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+ smeta_buf->header.crc = cpu_to_le32(
+ pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
+ smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
/* End metadata */
- memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
- emeta->seq_nr = cpu_to_le64(line->seq_nr);
- emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
- emeta->nr_valid_lbas = cpu_to_le64(0);
- emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
- emeta->crc = cpu_to_le32(0);
- emeta->prev_id = smeta->prev_id;
+ memcpy(&emeta_buf->header, &smeta_buf->header,
+ sizeof(struct line_header));
+ emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
+ emeta_buf->nr_valid_lbas = cpu_to_le64(0);
+ emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ emeta_buf->crc = cpu_to_le32(0);
+ emeta_buf->prev_id = smeta_buf->prev_id;
return 1;
}
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
/* Mark smeta metadata sectors as bad sectors */
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
off = bit * geo->sec_per_pl;
-retry_smeta:
bitmap_set(line->map_bitmap, off, lm->smeta_sec);
line->sec_in_line -= lm->smeta_sec;
line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ retry_smeta:
if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
pr_debug("pblk: line smeta I/O failed. Retry\n");
- off += geo->sec_per_pl;
- goto retry_smeta;
+ return 1;
}
bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ retry_smeta:
* blocks to make sure that there are enough sectors to store emeta
*/
bit = lm->sec_per_line;
- off = lm->sec_per_line - lm->emeta_sec;
- bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+ off = lm->sec_per_line - lm->emeta_sec[0];
+ bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
while (nr_bb) {
off -= geo->sec_per_pl;
if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ retry_smeta:
}
}
- line->sec_in_line -= lm->emeta_sec;
+ line->sec_in_line -= lm->emeta_sec[0];
line->emeta_ssec = off;
- line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+ line->nr_valid_lbas = 0;
+ line->left_msecs = line->sec_in_line;
+ *line->vsc = cpu_to_le32(line->sec_in_line);
if (lm->sec_per_line - line->sec_in_line !=
bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
spin_lock(&line->lock);
if (line->state != PBLK_LINESTATE_FREE) {
+ mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ mempool_free(line->map_bitmap, pblk->line_meta_pool);
spin_unlock(&line->lock);
- WARN(1, "pblk: corrupted line state\n");
- return -EINTR;
+ WARN(1, "pblk: corrupted line %d, state %d\n",
+ line->id, line->state);
+ return -EAGAIN;
}
+
line->state = PBLK_LINESTATE_OPEN;
atomic_set(&line->left_eblks, blk_in_line);
atomic_set(&line->left_seblks, blk_in_line);
+
+ line->meta_distance = lm->meta_distance;
spin_unlock(&line->lock);
/* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line = NULL;
- int bit;
+ struct pblk_line *line;
+ int ret, bit;
lockdep_assert_held(&l_mg->free_lock);
-retry_get:
+retry:
if (list_empty(&l_mg->free_list)) {
pr_err("pblk: no free lines\n");
- goto out;
+ return NULL;
}
line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ retry_get:
list_add_tail(&line->list, &l_mg->bad_list);
pr_debug("pblk: line %d is bad\n", line->id);
- goto retry_get;
+ goto retry;
}
- if (pblk_line_prepare(pblk, line)) {
- pr_err("pblk: failed to prepare line %d\n", line->id);
- list_add(&line->list, &l_mg->free_list);
- return NULL;
+ ret = pblk_line_prepare(pblk, line);
+ if (ret) {
+ if (ret == -EAGAIN) {
+ list_add(&line->list, &l_mg->corrupt_list);
+ goto retry;
+ } else {
+ pr_err("pblk: failed to prepare line %d\n", line->id);
+ list_add(&line->list, &l_mg->free_list);
+ l_mg->nr_free_lines++;
+ return NULL;
+ }
}
-out:
return line;
}
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *retry_line;
+retry:
spin_lock(&l_mg->free_lock);
retry_line = pblk_line_get(pblk);
if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
l_mg->data_line = retry_line;
spin_unlock(&l_mg->free_lock);
- if (pblk_line_erase(pblk, retry_line)) {
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = NULL;
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
pblk_rl_free_lines_dec(&pblk->rl, retry_line);
+ if (pblk_line_erase(pblk, retry_line))
+ goto retry;
+
return retry_line;
}
+static void pblk_set_space_limit(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+
+ atomic_set(&rl->rb_space, 0);
+}
+
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line;
- int meta_line;
int is_next = 0;
spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
line->type = PBLK_LINETYPE_DATA;
l_mg->data_line = line;
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- set_bit(meta_line, &l_mg->meta_bitmap);
- line->smeta = l_mg->sline_meta[meta_line].meta;
- line->emeta = l_mg->eline_meta[meta_line].meta;
- line->meta_line = meta_line;
+ pblk_line_setup_metadata(line, l_mg, &pblk->lm);
/* Allocate next line for preparation */
l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_set_space_limit(pblk);
+
+ l_mg->data_next = NULL;
+ } else {
l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
l_mg->data_next->type = PBLK_LINETYPE_DATA;
is_next = 1;
}
spin_unlock(&l_mg->free_lock);
+ if (pblk_line_erase(pblk, line)) {
+ line = pblk_line_retry(pblk, line);
+ if (!line)
+ return NULL;
+ }
+
pblk_rl_free_lines_dec(&pblk->rl, line);
if (is_next)
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
- if (pblk_line_erase(pblk, line))
- return NULL;
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, line, NULL)) {
+ if (!pblk_line_init_metadata(pblk, line, NULL)) {
line = pblk_line_retry(pblk, line);
if (!line)
return NULL;
@@ -1222,69 +1320,89 @@ retry_setup:
return line;
}
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
+{
+ lockdep_assert_held(&pblk->l_mg.free_lock);
+
+ pblk_set_space_limit(pblk);
+ pblk->state = PBLK_STATE_STOPPING;
+}
+
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ int ret;
+
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state == PBLK_STATE_RECOVERING ||
+ pblk->state == PBLK_STATE_STOPPED) {
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+ pblk->state = PBLK_STATE_RECOVERING;
+ spin_unlock(&l_mg->free_lock);
+
+ pblk_flush_writer(pblk);
+ pblk_wait_for_meta(pblk);
+
+ ret = pblk_recov_pad(pblk);
+ if (ret) {
+ pr_err("pblk: could not close data on teardown(%d)\n", ret);
+ return;
+ }
+
+ flush_workqueue(pblk->bb_wq);
+ pblk_line_close_meta_sync(pblk);
+
+ spin_lock(&l_mg->free_lock);
+ pblk->state = PBLK_STATE_STOPPED;
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+}
+
+void pblk_line_replace_data(struct pblk *pblk)
{
- struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *cur, *new;
unsigned int left_seblks;
- int meta_line;
int is_next = 0;
cur = l_mg->data_line;
new = l_mg->data_next;
if (!new)
- return NULL;
+ return;
l_mg->data_line = new;
-retry_line:
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state != PBLK_STATE_RUNNING) {
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+
+ pblk_line_setup_metadata(new, l_mg, &pblk->lm);
+ spin_unlock(&l_mg->free_lock);
+
+retry_erase:
left_seblks = atomic_read(&new->left_seblks);
if (left_seblks) {
/* If line is not fully erased, erase it */
if (atomic_read(&new->left_eblks)) {
if (pblk_line_erase(pblk, new))
- return NULL;
+ return;
} else {
io_schedule();
}
- goto retry_line;
+ goto retry_erase;
}
- spin_lock(&l_mg->free_lock);
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- is_next = 1;
- }
-
-retry_meta:
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- if (meta_line == PBLK_DATA_LINES) {
- spin_unlock(&l_mg->free_lock);
- io_schedule();
- spin_lock(&l_mg->free_lock);
- goto retry_meta;
- }
-
- set_bit(meta_line, &l_mg->meta_bitmap);
- new->smeta = l_mg->sline_meta[meta_line].meta;
- new->emeta = l_mg->eline_meta[meta_line].meta;
- new->meta_line = meta_line;
-
- memset(new->smeta, 0, lm->smeta_len);
- memset(new->emeta, 0, lm->emeta_len);
- spin_unlock(&l_mg->free_lock);
-
- if (is_next)
- pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, new, cur)) {
+ if (!pblk_line_init_metadata(pblk, new, cur)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
@@ -1292,12 +1410,30 @@ retry_setup:
if (!pblk_line_init_bb(pblk, new, 1)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
- return new;
+ /* Allocate next line for preparation */
+ spin_lock(&l_mg->free_lock);
+ l_mg->data_next = pblk_line_get(pblk);
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_stop_writes(pblk, new);
+ l_mg->data_next = NULL;
+ } else {
+ l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+ l_mg->data_next->type = PBLK_LINETYPE_DATA;
+ is_next = 1;
+ }
+ spin_unlock(&l_mg->free_lock);
+
+ if (is_next)
+ pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
}
void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
if (line->invalid_bitmap)
mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ *line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
line->map_bitmap = NULL;
line->invalid_bitmap = NULL;
line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
struct nvm_rq *rqd;
int err;
- rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
- memset(rqd, 0, pblk_r_rq_size);
+ rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
+ memset(rqd, 0, pblk_g_rq_size);
pblk_setup_e_rq(pblk, rqd, ppa);
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
return pblk->l_mg.data_line;
}
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+/* For now, always erase next line */
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
{
return pblk->l_mg.data_next;
}
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
return (line->left_msecs == 0);
}
+void pblk_line_close_meta_sync(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line *line, *tline;
+ LIST_HEAD(list);
+
+ spin_lock(&l_mg->close_lock);
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return;
+ }
+
+ list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
+ spin_unlock(&l_mg->close_lock);
+
+ list_for_each_entry_safe(line, tline, &list, list) {
+ struct pblk_emeta *emeta = line->emeta;
+
+ while (emeta->mem < lm->emeta_len[0]) {
+ int ret;
+
+ ret = pblk_submit_meta_io(pblk, line);
+ if (ret) {
+ pr_err("pblk: sync meta line %d failed (%d)\n",
+ line->id, ret);
+ return;
+ }
+ }
+ }
+
+ pblk_wait_for_meta(pblk);
+ flush_workqueue(pblk->close_wq);
+}
+
+static void pblk_line_should_sync_meta(struct pblk *pblk)
+{
+ if (pblk_rl_is_limit(&pblk->rl))
+ pblk_line_close_meta_sync(pblk);
+}
+
void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
- line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
-
- if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
- pr_err("pblk: line %d close I/O failed\n", line->id);
+#ifdef CONFIG_NVM_DEBUG
+ struct pblk_line_meta *lm = &pblk->lm;
- WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+ WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
"pblk: corrupt closed line %d\n", line->id);
+#endif
spin_lock(&l_mg->free_lock);
WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
spin_unlock(&line->lock);
spin_unlock(&l_mg->gc_lock);
+
+ pblk_gc_should_kick(pblk);
+}
+
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+
+ /* No need for exact vsc value; avoid a big line lock and take aprox. */
+ memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
+ memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+
+ emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
+ emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
+
+ spin_lock(&l_mg->close_lock);
+ spin_lock(&line->lock);
+ list_add_tail(&line->list, &l_mg->emeta_list);
+ spin_unlock(&line->lock);
+ spin_unlock(&l_mg->close_lock);
+
+ pblk_line_should_sync_meta(pblk);
}
void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
}
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *))
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq)
{
struct pblk_line_ws *line_ws;
@@ -1462,7 +1667,7 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
line_ws->priv = priv;
INIT_WORK(&line_ws->ws, work);
- queue_work(pblk->kw_wq, &line_ws->ws);
+ queue_work(wq, &line_ws->ws);
}
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -1471,7 +1676,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
- int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
int ret;
/*
@@ -1488,10 +1693,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
/* If the LUN has been locked for this same request, do no attempt to
* lock it again
*/
- if (test_and_set_bit(lun_id, lun_bitmap))
+ if (test_and_set_bit(pos, lun_bitmap))
return;
- rlun = &pblk->luns[lun_id];
+ rlun = &pblk->luns[pos];
ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
if (ret) {
switch (ret) {
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index eaf479c..6090d28 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -20,8 +20,7 @@
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
- kfree(gc_rq->data);
- kfree(gc_rq->lba_list);
+ vfree(gc_rq->data);
kfree(gc_rq);
}
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
return 1;
}
- list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
- list_move_tail(&gc_rq->list, &w_list);
- gc->w_entries--;
- }
+ list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
+ gc->w_entries = 0;
spin_unlock(&gc->w_lock);
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
gc_rq->nr_secs, gc_rq->secs_to_gc,
gc_rq->line, PBLK_IOTYPE_GC);
- kref_put(&gc_rq->line->ref, pblk_line_put);
-
list_del(&gc_rq->list);
+ kref_put(&gc_rq->line->ref, pblk_line_put);
pblk_gc_free_gc_rq(gc_rq);
}
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
* Responsible for managing all memory related to a gc request. Also in case of
* failure
*/
-static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
- u64 *lba_list, unsigned int nr_secs)
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_gc_rq *gc_rq;
+ struct pblk_line *line = gc_rq->line;
void *data;
unsigned int secs_to_gc;
- int ret = NVM_IO_OK;
+ int ret = 0;
- data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+ data = vmalloc(gc_rq->nr_secs * geo->sec_size);
if (!data) {
- ret = NVM_IO_ERR;
- goto free_lba_list;
+ ret = -ENOMEM;
+ goto out;
}
/* Read from GC victim block */
- if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+ if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
&secs_to_gc, line)) {
- ret = NVM_IO_ERR;
+ ret = -EFAULT;
goto free_data;
}
if (!secs_to_gc)
- goto free_data;
-
- gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
- if (!gc_rq) {
- ret = NVM_IO_ERR;
- goto free_data;
- }
+ goto free_rq;
- gc_rq->line = line;
gc_rq->data = data;
- gc_rq->lba_list = lba_list;
- gc_rq->nr_secs = nr_secs;
gc_rq->secs_to_gc = secs_to_gc;
- kref_get(&line->ref);
-
retry:
spin_lock(&gc->w_lock);
- if (gc->w_entries > 256) {
+ if (gc->w_entries >= PBLK_GC_W_QD) {
spin_unlock(&gc->w_lock);
- usleep_range(256, 1024);
+ pblk_gc_writer_kick(&pblk->gc);
+ usleep_range(128, 256);
goto retry;
}
gc->w_entries++;
@@ -120,13 +105,14 @@ retry:
pblk_gc_writer_kick(&pblk->gc);
- return NVM_IO_OK;
+ return 0;
+free_rq:
+ kfree(gc_rq);
free_data:
- kfree(data);
-free_lba_list:
- kfree(lba_list);
-
+ vfree(data);
+out:
+ kref_put(&line->ref, pblk_line_put);
return ret;
}
@@ -150,140 +136,206 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
static void pblk_gc_line_ws(struct work_struct *work)
{
+ struct pblk_line_ws *line_rq_ws = container_of(work,
+ struct pblk_line_ws, ws);
+ struct pblk *pblk = line_rq_ws->pblk;
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line = line_rq_ws->line;
+ struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
+
+ up(&gc->gc_sem);
+
+ if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
+ pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
+ line->id, *line->vsc,
+ gc_rq->nr_secs);
+ }
+
+ mempool_free(line_rq_ws, pblk->line_ws_pool);
+}
+
+static void pblk_gc_line_prepare_ws(struct work_struct *work)
+{
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
ws);
struct pblk *pblk = line_ws->pblk;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line = line_ws->line;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- __le64 *lba_list = line_ws->priv;
- u64 *gc_list;
- int sec_left;
- int nr_ppas, bit;
- int put_line = 1;
+ struct pblk_gc *gc = &pblk->gc;
+ struct line_emeta *emeta_buf;
+ struct pblk_line_ws *line_rq_ws;
+ struct pblk_gc_rq *gc_rq;
+ __le64 *lba_list;
+ int sec_left, nr_secs, bit;
+ int ret;
- pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+ emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
+ GFP_KERNEL);
+ if (!emeta_buf) {
+ pr_err("pblk: cannot use GC emeta\n");
+ return;
+ }
- spin_lock(&line->lock);
- sec_left = line->vsc;
- if (!sec_left) {
- /* Lines are erased before being used (l_mg->data_/log_next) */
- spin_unlock(&line->lock);
- goto out;
+ ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+ if (ret) {
+ pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+ goto fail_free_emeta;
+ }
+
+ /* If this read fails, it means that emeta is corrupted. For now, leave
+ * the line untouched. TODO: Implement a recovery routine that scans and
+ * moves all sectors on the line.
+ */
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
+ if (!lba_list) {
+ pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+ goto fail_free_emeta;
}
- spin_unlock(&line->lock);
+ sec_left = pblk_line_vsc(line);
if (sec_left < 0) {
pr_err("pblk: corrupted GC line (%d)\n", line->id);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
+ goto fail_free_emeta;
}
bit = -1;
next_rq:
- gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
- if (!gc_list) {
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+ if (!gc_rq)
+ goto fail_free_emeta;
- nr_ppas = 0;
+ nr_secs = 0;
do {
bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
bit + 1);
if (bit > line->emeta_ssec)
break;
- gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
- } while (nr_ppas < pblk->max_write_pgs);
+ gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
+ } while (nr_secs < pblk->max_write_pgs);
- if (unlikely(!nr_ppas)) {
- kfree(gc_list);
+ if (unlikely(!nr_secs)) {
+ kfree(gc_rq);
goto out;
}
- if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
- pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
- line->id, line->vsc,
- nr_ppas, nr_ppas);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq->nr_secs = nr_secs;
+ gc_rq->line = line;
+
+ line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_rq_ws)
+ goto fail_free_gc_rq;
- sec_left -= nr_ppas;
+ line_rq_ws->pblk = pblk;
+ line_rq_ws->line = line;
+ line_rq_ws->priv = gc_rq;
+
+ down(&gc->gc_sem);
+ kref_get(&line->ref);
+
+ INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
+ queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
+
+ sec_left -= nr_secs;
if (sec_left > 0)
goto next_rq;
out:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
mempool_free(line_ws, pblk->line_ws_pool);
- atomic_dec(&pblk->gc.inflight_gc);
- if (put_line)
- kref_put(&line->ref, pblk_line_put);
+
+ kref_put(&line->ref, pblk_line_put);
+ atomic_dec(&gc->inflight_gc);
+
+ return;
+
+fail_free_gc_rq:
+ kfree(gc_rq);
+fail_free_emeta:
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+ pblk_put_line_back(pblk, line);
+ kref_put(&line->ref, pblk_line_put);
+ mempool_free(line_ws, pblk->line_ws_pool);
+ atomic_dec(&gc->inflight_gc);
+
+ pr_err("pblk: Failed to GC line %d\n", line->id);
}
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *line_ws;
- __le64 *lba_list;
- int ret;
- line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
- line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
- GFP_KERNEL);
- if (!line->emeta) {
- pr_err("pblk: cannot use GC emeta\n");
- goto fail_free_ws;
- }
-
- ret = pblk_line_read_emeta(pblk, line);
- if (ret) {
- pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
- goto fail_free_emeta;
- }
+ pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
- /* If this read fails, it means that emeta is corrupted. For now, leave
- * the line untouched. TODO: Implement a recovery routine that scans and
- * moves all sectors on the line.
- */
- lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
- if (!lba_list) {
- pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
- goto fail_free_emeta;
- }
+ line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_ws)
+ return -ENOMEM;
line_ws->pblk = pblk;
line_ws->line = line;
- line_ws->priv = lba_list;
- INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
- queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+ INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
+ queue_work(gc->gc_reader_wq, &line_ws->ws);
return 0;
+}
-fail_free_emeta:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-fail_free_ws:
- mempool_free(line_ws, pblk->line_ws_pool);
- pblk_put_line_back(pblk, line);
+static int pblk_gc_read(struct pblk *pblk)
+{
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line;
+
+ spin_lock(&gc->r_lock);
+ if (list_empty(&gc->r_list)) {
+ spin_unlock(&gc->r_lock);
+ return 1;
+ }
+
+ line = list_first_entry(&gc->r_list, struct pblk_line, list);
+ list_del(&line->list);
+ spin_unlock(&gc->r_lock);
+
+ pblk_gc_kick(pblk);
- return 1;
+ if (pblk_gc_line(pblk, line))
+ pr_err("pblk: failed to GC line %d\n", line->id);
+
+ return 0;
}
-static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
- struct pblk_line *line, *tline;
+ wake_up_process(gc->gc_reader_ts);
+}
- list_for_each_entry_safe(line, tline, gc_list, list) {
- if (pblk_gc_line(pblk, line))
- pr_err("pblk: failed to GC line %d\n", line->id);
- list_del(&line->list);
+static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
+ struct list_head *group_list)
+{
+ struct pblk_line *line, *victim;
+ int line_vsc, victim_vsc;
+
+ victim = list_first_entry(group_list, struct pblk_line, list);
+ list_for_each_entry(line, group_list, list) {
+ line_vsc = le32_to_cpu(*line->vsc);
+ victim_vsc = le32_to_cpu(*victim->vsc);
+ if (line_vsc < victim_vsc)
+ victim = line;
}
+
+ return victim;
+}
+
+static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
+{
+ unsigned int nr_blocks_free, nr_blocks_need;
+
+ nr_blocks_need = pblk_rl_high_thrs(rl);
+ nr_blocks_free = pblk_rl_nr_free_blks(rl);
+
+ /* This is not critical, no need to take lock here */
+ return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
}
/*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line, *tline;
- unsigned int nr_blocks_free, nr_blocks_need;
+ struct pblk_line *line;
struct list_head *group_list;
- int run_gc, gc_group = 0;
- int prev_gc = 0;
- int inflight_gc = atomic_read(&gc->inflight_gc);
- LIST_HEAD(gc_list);
+ bool run_gc;
+ int inflight_gc, gc_group = 0, prev_group = 0;
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(&l_mg->gc_full_list)) {
+ spin_unlock(&l_mg->gc_lock);
+ break;
+ }
+
+ line = list_first_entry(&l_mg->gc_full_list,
+ struct pblk_line, list);
- spin_lock(&l_mg->gc_lock);
- list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
spin_unlock(&line->lock);
list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
kref_put(&line->ref, pblk_line_put);
- }
- spin_unlock(&l_mg->gc_lock);
+ } while (1);
- nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
- nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
+ return;
next_gc_group:
group_list = l_mg->gc_lists[gc_group++];
- spin_lock(&l_mg->gc_lock);
- while (run_gc && !list_empty(group_list)) {
- /* No need to queue up more GC lines than we can handle */
- if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(group_list)) {
spin_unlock(&l_mg->gc_lock);
- pblk_gc_lines(pblk, &gc_list);
- return;
+ break;
}
- line = list_first_entry(group_list, struct pblk_line, list);
- nr_blocks_free += atomic_read(&line->blk_in_line);
+ line = pblk_gc_get_victim_line(pblk, group_list);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
- list_move_tail(&line->list, &gc_list);
- atomic_inc(&gc->inflight_gc);
- inflight_gc++;
spin_unlock(&line->lock);
- prev_gc = 1;
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
- }
- spin_unlock(&l_mg->gc_lock);
+ list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
+ spin_lock(&gc->r_lock);
+ list_add_tail(&line->list, &gc->r_list);
+ spin_unlock(&gc->r_lock);
- pblk_gc_lines(pblk, &gc_list);
+ inflight_gc = atomic_inc_return(&gc->inflight_gc);
+ pblk_gc_reader_kick(gc);
- if (!prev_gc && pblk->rl.rb_state > gc_group &&
- gc_group < PBLK_NR_GC_LISTS)
+ prev_group = 1;
+
+ /* No need to queue up more GC lines than we can handle */
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
+ break;
+ } while (1);
+
+ if (!prev_group && pblk->rl.rb_state > gc_group &&
+ gc_group < PBLK_GC_NR_LISTS)
goto next_gc_group;
}
-
-static void pblk_gc_kick(struct pblk *pblk)
+void pblk_gc_kick(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
wake_up_process(gc->gc_ts);
pblk_gc_writer_kick(gc);
+ pblk_gc_reader_kick(gc);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
return 0;
}
-static void pblk_gc_start(struct pblk *pblk)
+static int pblk_gc_reader_ts(void *data)
{
- pblk->gc.gc_active = 1;
+ struct pblk *pblk = data;
- pr_debug("pblk: gc start\n");
+ while (!kthread_should_stop()) {
+ if (!pblk_gc_read(pblk))
+ continue;
+ set_current_state(TASK_INTERRUPTIBLE);
+ io_schedule();
+ }
+
+ return 0;
}
-int pblk_gc_status(struct pblk *pblk)
+static void pblk_gc_start(struct pblk *pblk)
{
- struct pblk_gc *gc = &pblk->gc;
- int ret;
-
- spin_lock(&gc->lock);
- ret = gc->gc_active;
- spin_unlock(&gc->lock);
-
- return ret;
+ pblk->gc.gc_active = 1;
+ pr_debug("pblk: gc start\n");
}
-static void __pblk_gc_should_start(struct pblk *pblk)
+void pblk_gc_should_start(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
- lockdep_assert_held(&gc->lock);
-
if (gc->gc_enabled && !gc->gc_active)
pblk_gc_start(pblk);
-}
-void pblk_gc_should_start(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&gc->lock);
- __pblk_gc_should_start(pblk);
- spin_unlock(&gc->lock);
+ pblk_gc_kick(pblk);
}
/*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
*/
static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
{
- spin_lock(&pblk->gc.lock);
pblk->gc.gc_active = 0;
- spin_unlock(&pblk->gc.lock);
-
pr_debug("pblk: gc stop\n");
}
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
spin_unlock(&gc->lock);
}
-void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+int pblk_gc_sysfs_force(struct pblk *pblk, int force)
{
struct pblk_gc *gc = &pblk->gc;
- int rsv = 0;
+
+ if (force < 0 || force > 1)
+ return -EINVAL;
spin_lock(&gc->lock);
- if (force) {
- gc->gc_enabled = 1;
- rsv = 64;
- }
- pblk_rl_set_gc_rsc(&pblk->rl, rsv);
gc->gc_forced = force;
- __pblk_gc_should_start(pblk);
+
+ if (force)
+ gc->gc_enabled = 1;
+ else
+ gc->gc_enabled = 0;
spin_unlock(&gc->lock);
+
+ pblk_gc_should_start(pblk);
+
+ return 0;
}
int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
goto fail_free_main_kthread;
}
+ gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
+ "pblk-gc-reader-ts");
+ if (IS_ERR(gc->gc_reader_ts)) {
+ pr_err("pblk: could not allocate GC reader kthread\n");
+ ret = PTR_ERR(gc->gc_reader_ts);
+ goto fail_free_writer_kthread;
+ }
+
setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
gc->gc_active = 0;
gc->gc_forced = 0;
gc->gc_enabled = 1;
- gc->gc_jobs_active = 8;
gc->w_entries = 0;
atomic_set(&gc->inflight_gc, 0);
- gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+ /* Workqueue that reads valid sectors from a line and submit them to the
+ * GC writer to be recycled.
+ */
+ gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
+ if (!gc->gc_line_reader_wq) {
+ pr_err("pblk: could not allocate GC line reader workqueue\n");
+ ret = -ENOMEM;
+ goto fail_free_reader_kthread;
+ }
+
+ /* Workqueue that prepare lines for GC */
+ gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!gc->gc_reader_wq) {
pr_err("pblk: could not allocate GC reader workqueue\n");
ret = -ENOMEM;
- goto fail_free_writer_kthread;
+ goto fail_free_reader_line_wq;
}
spin_lock_init(&gc->lock);
spin_lock_init(&gc->w_lock);
+ spin_lock_init(&gc->r_lock);
+
+ sema_init(&gc->gc_sem, 128);
+
INIT_LIST_HEAD(&gc->w_list);
+ INIT_LIST_HEAD(&gc->r_list);
return 0;
+fail_free_reader_line_wq:
+ destroy_workqueue(gc->gc_line_reader_wq);
+fail_free_reader_kthread:
+ kthread_stop(gc->gc_reader_ts);
fail_free_writer_kthread:
kthread_stop(gc->gc_writer_ts);
fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
struct pblk_gc *gc = &pblk->gc;
flush_workqueue(gc->gc_reader_wq);
+ flush_workqueue(gc->gc_line_reader_wq);
del_timer(&gc->gc_timer);
pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
if (gc->gc_ts)
kthread_stop(gc->gc_ts);
- if (pblk->gc.gc_reader_wq)
- destroy_workqueue(pblk->gc.gc_reader_wq);
+ if (gc->gc_reader_wq)
+ destroy_workqueue(gc->gc_reader_wq);
+
+ if (gc->gc_line_reader_wq)
+ destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts);
+
+ if (gc->gc_reader_ts)
+ kthread_stop(gc->gc_reader_ts);
}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d..1b0f612 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,9 +20,10 @@
#include "pblk.h"
-static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
- *pblk_w_rq_cache, *pblk_line_meta_cache;
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
+ *pblk_w_rq_cache, *pblk_line_meta_cache;
static DECLARE_RWSEM(pblk_lock);
+struct bio_set *pblk_bio_set;
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
* constraint. Writes can be of arbitrary size.
*/
if (bio_data_dir(bio) == READ) {
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
ret = pblk_submit_read(pblk, bio);
if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
* available for user I/O.
*/
if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
}
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
return -ENOMEM;
}
- pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+ pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
0, 0, NULL);
- if (!pblk_r_rq_cache) {
+ if (!pblk_g_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
if (!pblk_w_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
}
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
if (!pblk_line_meta_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- int max_write_ppas;
- int mod;
- pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
- max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
- pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
- max_write_ppas : nvm_max_phys_sects(dev);
pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
geo->nr_planes * geo->nr_luns;
- if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
- pr_err("pblk: cannot support device max_phys_sect\n");
- return -EINVAL;
- }
-
- div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
- if (mod) {
- pr_err("pblk: bad configuration of sectors/pages\n");
- return -EINVAL;
- }
-
if (pblk_init_global_caches(pblk))
return -ENOMEM;
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->page_pool)
return -ENOMEM;
- pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+ pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
pblk_blk_ws_cache);
if (!pblk->line_ws_pool)
goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->rec_pool)
goto free_blk_ws_pool;
- pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
- if (!pblk->r_rq_pool)
+ pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
+ pblk_g_rq_cache);
+ if (!pblk->g_rq_pool)
goto free_rec_pool;
- pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+ pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
+ pblk_w_rq_cache);
if (!pblk->w_rq_pool)
- goto free_r_rq_pool;
+ goto free_g_rq_pool;
pblk->line_meta_pool =
- mempool_create_slab_pool(16, pblk_line_meta_cache);
+ mempool_create_slab_pool(PBLK_META_POOL_SIZE,
+ pblk_line_meta_cache);
if (!pblk->line_meta_pool)
goto free_w_rq_pool;
- pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!pblk->kw_wq)
+ pblk->close_wq = alloc_workqueue("pblk-close-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
+ if (!pblk->close_wq)
goto free_line_meta_pool;
+ pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (!pblk->bb_wq)
+ goto free_close_wq;
+
if (pblk_set_ppaf(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
if (pblk_rwb_init(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
INIT_LIST_HEAD(&pblk->compl_list);
return 0;
-free_kw_wq:
- destroy_workqueue(pblk->kw_wq);
+free_bb_wq:
+ destroy_workqueue(pblk->bb_wq);
+free_close_wq:
+ destroy_workqueue(pblk->close_wq);
free_line_meta_pool:
mempool_destroy(pblk->line_meta_pool);
free_w_rq_pool:
mempool_destroy(pblk->w_rq_pool);
-free_r_rq_pool:
- mempool_destroy(pblk->r_rq_pool);
+free_g_rq_pool:
+ mempool_destroy(pblk->g_rq_pool);
free_rec_pool:
mempool_destroy(pblk->rec_pool);
free_blk_ws_pool:
@@ -322,19 +316,22 @@ free_page_pool:
static void pblk_core_free(struct pblk *pblk)
{
- if (pblk->kw_wq)
- destroy_workqueue(pblk->kw_wq);
+ if (pblk->close_wq)
+ destroy_workqueue(pblk->close_wq);
+
+ if (pblk->bb_wq)
+ destroy_workqueue(pblk->bb_wq);
mempool_destroy(pblk->page_pool);
mempool_destroy(pblk->line_ws_pool);
mempool_destroy(pblk->rec_pool);
- mempool_destroy(pblk->r_rq_pool);
+ mempool_destroy(pblk->g_rq_pool);
mempool_destroy(pblk->w_rq_pool);
mempool_destroy(pblk->line_meta_pool);
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
kmem_cache_destroy(pblk_line_meta_cache);
}
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
kfree(pblk->luns);
}
+static void pblk_free_line_bitmaps(struct pblk_line *line)
+{
+ kfree(line->blk_bitmap);
+ kfree(line->erase_bitmap);
+}
+
static void pblk_lines_free(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
line = &pblk->lines[i];
pblk_line_free(pblk, line);
- kfree(line->blk_bitmap);
- kfree(line->erase_bitmap);
+ pblk_free_line_bitmaps(line);
}
spin_unlock(&l_mg->free_lock);
}
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
kfree(l_mg->bb_template);
kfree(l_mg->bb_aux);
+ kfree(l_mg->vsc_list);
+ spin_lock(&l_mg->free_lock);
for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+ kfree(l_mg->sline_meta[i]);
+ pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+ kfree(l_mg->eline_meta[i]);
}
+ spin_unlock(&l_mg->free_lock);
kfree(pblk->lines);
}
@@ -411,13 +417,31 @@ out:
return ret;
}
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
+ int blk_per_line)
{
- struct pblk_line_meta *lm = &pblk->lm;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
int bb_cnt = 0;
int i;
+ for (i = 0; i < blk_per_line; i++) {
+ rlun = &pblk->luns[i];
+ if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+ continue;
+
+ set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
+ bb_cnt++;
+ }
+
+ return bb_cnt;
+}
+
+static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+
line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
if (!line->blk_bitmap)
return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
return -ENOMEM;
}
- for (i = 0; i < lm->blk_per_line; i++) {
- rlun = &pblk->luns[i];
- if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
- continue;
-
- set_bit(i, line->blk_bitmap);
- bb_cnt++;
- }
-
- return bb_cnt;
+ return 0;
}
static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
}
/* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+static unsigned int calc_emeta_len(struct pblk *pblk)
{
- return (sizeof(struct line_emeta) +
- ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
- (pblk->l_mg.nr_lines * sizeof(u32)) +
- lm->blk_bitmap_len);
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+
+ /* Round to sector size so that lba_list starts on its own sector */
+ lm->emeta_sec[1] = DIV_ROUND_UP(
+ sizeof(struct line_emeta) + lm->blk_bitmap_len,
+ geo->sec_size);
+ lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+
+ /* Round to sector size so that vsc_list starts on its own sector */
+ lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
+ lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
+ geo->sec_size);
+ lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+
+ lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
+ geo->sec_size);
+ lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+
+ lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
+
+ return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
}
static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
atomic_set(&pblk->rl.free_blocks, nr_free_blks);
}
+static int pblk_lines_alloc_metadata(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i;
+
+ /* smeta is always small enough to fit on a kmalloc memory allocation,
+ * emeta depends on the number of LUNs allocated to the pblk instance
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
+ if (!l_mg->sline_meta[i])
+ goto fail_free_smeta;
+ }
+
+ /* emeta allocates three different buffers for managing metadata with
+ * in-memory and in-media layouts
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ struct pblk_emeta *emeta;
+
+ emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
+ if (!emeta)
+ goto fail_free_emeta;
+
+ if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
+ l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+ emeta->buf = vmalloc(lm->emeta_len[0]);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ } else {
+ l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+ emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ }
+ }
+
+ l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+ if (!l_mg->vsc_list)
+ goto fail_free_emeta;
+
+ for (i = 0; i < l_mg->nr_lines; i++)
+ l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+
+ return 0;
+
+fail_free_emeta:
+ while (--i >= 0) {
+ vfree(l_mg->eline_meta[i]->buf);
+ kfree(l_mg->eline_meta[i]);
+ }
+
+fail_free_smeta:
+ for (i = 0; i < PBLK_DATA_LINES; i++)
+ kfree(l_mg->sline_meta[i]);
+
+ return -ENOMEM;
+}
+
static int pblk_lines_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *line;
unsigned int smeta_len, emeta_len;
- long nr_bad_blks, nr_meta_blks, nr_free_blks;
- int bb_distance;
- int i;
- int ret;
+ long nr_bad_blks, nr_free_blks;
+ int bb_distance, max_write_ppas, mod;
+ int i, ret;
+
+ pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+ max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+ pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+ max_write_ppas : nvm_max_phys_sects(dev);
+ pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+ if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+ pr_err("pblk: cannot support device max_phys_sect\n");
+ return -EINVAL;
+ }
+
+ div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+ if (mod) {
+ pr_err("pblk: bad configuration of sectors/pages\n");
+ return -EINVAL;
+ }
+
+ l_mg->nr_lines = geo->blks_per_lun;
+ l_mg->log_line = l_mg->data_line = NULL;
+ l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+ l_mg->nr_free_lines = 0;
+ bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
lm->high_thrs = lm->sec_per_line / 2;
lm->mid_thrs = lm->sec_per_line / 4;
+ lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
/* Calculate necessary pages for smeta. See comment over struct
* line_smeta definition
*/
- lm->smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
-
i = 1;
add_smeta_page:
lm->smeta_sec = i * geo->sec_per_pl;
lm->smeta_len = lm->smeta_sec * geo->sec_size;
- smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+ smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
if (smeta_len > lm->smeta_len) {
i++;
goto add_smeta_page;
@@ -578,66 +704,28 @@ add_smeta_page:
*/
i = 1;
add_emeta_page:
- lm->emeta_sec = i * geo->sec_per_pl;
- lm->emeta_len = lm->emeta_sec * geo->sec_size;
+ lm->emeta_sec[0] = i * geo->sec_per_pl;
+ lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
- emeta_len = calc_emeta_len(pblk, lm);
- if (emeta_len > lm->emeta_len) {
+ emeta_len = calc_emeta_len(pblk);
+ if (emeta_len > lm->emeta_len[0]) {
i++;
goto add_emeta_page;
}
- lm->emeta_bb = geo->nr_luns - i;
-
- nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
- (geo->sec_per_blk / 2)) / geo->sec_per_blk;
- lm->min_blk_line = nr_meta_blks + 1;
-
- l_mg->nr_lines = geo->blks_per_lun;
- l_mg->log_line = l_mg->data_line = NULL;
- l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
- l_mg->nr_free_lines = 0;
- bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- /* smeta is always small enough to fit on a kmalloc memory allocation,
- * emeta depends on the number of LUNs allocated to the pblk instance
- */
- l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
- if (!l_mg->sline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->sline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
+ lm->emeta_bb = geo->nr_luns - i;
+ lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
+ geo->sec_per_blk);
+ if (lm->min_blk_line > lm->blk_per_line) {
+ pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
+ lm->blk_per_line);
+ ret = -EINVAL;
+ goto fail;
}
- if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
- l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- vfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- } else {
- l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta =
- kmalloc(lm->emeta_len, GFP_KERNEL);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- }
+ ret = pblk_lines_alloc_metadata(pblk);
+ if (ret)
+ goto fail;
l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ add_emeta_page:
INIT_LIST_HEAD(&l_mg->gc_low_list);
INIT_LIST_HEAD(&l_mg->gc_empty_list);
+ INIT_LIST_HEAD(&l_mg->emeta_list);
+
l_mg->gc_lists[0] = &l_mg->gc_high_list;
l_mg->gc_lists[1] = &l_mg->gc_mid_list;
l_mg->gc_lists[2] = &l_mg->gc_low_list;
spin_lock_init(&l_mg->free_lock);
+ spin_lock_init(&l_mg->close_lock);
spin_lock_init(&l_mg->gc_lock);
pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ add_emeta_page:
line->type = PBLK_LINETYPE_FREE;
line->state = PBLK_LINESTATE_FREE;
line->gc_group = PBLK_LINEGC_NONE;
+ line->vsc = &l_mg->vsc_list[i];
spin_lock_init(&line->lock);
- nr_bad_blks = pblk_bb_line(pblk, line);
+ ret = pblk_alloc_line_bitmaps(pblk, line);
+ if (ret)
+ goto fail_free_lines;
+
+ nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+ pblk_free_line_bitmaps(line);
ret = -EINVAL;
goto fail_free_lines;
}
@@ -713,24 +810,20 @@ add_emeta_page:
pblk_set_provision(pblk, nr_free_blks);
- sema_init(&pblk->erase_sem, 1);
-
/* Cleanup per-LUN bad block lists - managed within lines on run-time */
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
return 0;
fail_free_lines:
- kfree(pblk->lines);
+ while (--i >= 0)
+ pblk_free_line_bitmaps(&pblk->lines[i]);
fail_free_bb_aux:
kfree(l_mg->bb_aux);
fail_free_bb_template:
kfree(l_mg->bb_template);
fail_free_meta:
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
- }
+ pblk_line_meta_free(pblk);
fail:
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
static void pblk_writer_stop(struct pblk *pblk)
{
+ /* The pipeline must be stopped and the write buffer emptied before the
+ * write thread is stopped
+ */
+ WARN(pblk_rb_read_count(&pblk->rwb),
+ "Stopping not fully persisted write buffer\n");
+
+ WARN(pblk_rb_sync_count(&pblk->rwb),
+ "Stopping not fully synced write buffer\n");
+
if (pblk->writer_ts)
kthread_stop(pblk->writer_ts);
del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
static void pblk_tear_down(struct pblk *pblk)
{
- pblk_flush_writer(pblk);
+ pblk_pipeline_stop(pblk);
pblk_writer_stop(pblk);
pblk_rb_sync_l2p(&pblk->rwb);
- pblk_recov_pad(pblk);
pblk_rwb_free(pblk);
pblk_rl_free(&pblk->rl);
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
pblk->dev = dev;
pblk->disk = tdisk;
+ pblk->state = PBLK_STATE_RUNNING;
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
atomic_long_set(&pblk->req_writes, 0);
atomic_long_set(&pblk->sub_writes, 0);
atomic_long_set(&pblk->sync_writes, 0);
- atomic_long_set(&pblk->compl_writes, 0);
atomic_long_set(&pblk->inflight_reads, 0);
+ atomic_long_set(&pblk->cache_reads, 0);
atomic_long_set(&pblk->sync_reads, 0);
atomic_long_set(&pblk->recov_writes, 0);
atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
static int __init pblk_module_init(void)
{
- return nvm_register_tgt_type(&tt_pblk);
+ int ret;
+
+ pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!pblk_bio_set)
+ return -ENOMEM;
+ ret = nvm_register_tgt_type(&tt_pblk);
+ if (ret)
+ bioset_free(pblk_bio_set);
+ return ret;
}
static void pblk_module_exit(void)
{
+ bioset_free(pblk_bio_set);
nvm_unregister_tgt_type(&tt_pblk);
}
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 17c1695..fddb924 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
struct pblk_w_ctx *w_ctx;
- __le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
w_ctx->ppa = ppa_list[i];
meta_list[i].lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
- le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+ line->nr_valid_lbas++;
} else {
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
- pblk_map_pad_invalidate(pblk, line, paddr);
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ lba_list[paddr] = meta_list[i].lba = addr_empty;
+ __pblk_map_invalidate(pblk, line, paddr);
}
}
if (pblk_line_is_full(line)) {
- line = pblk_line_replace_data(pblk);
- if (!line)
- return;
+ struct pblk_line *prev_line = line;
+
+ pblk_line_replace_data(pblk);
+ pblk_line_close_meta(pblk, prev_line);
}
pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+ struct pblk_line_meta *lm = &pblk->lm;
struct pblk_sec_meta *meta_list = rqd->meta_list;
+ struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
lun_bitmap, &meta_list[i], map_secs);
- erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
- rqd->ppa_list[i].g.ch;
+ erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
- if (!test_bit(erase_lun, e_line->erase_bitmap)) {
- if (down_trylock(&pblk->erase_sem))
- continue;
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+ valid_secs, i + min);
+ spin_lock(&e_line->lock);
+ if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
+
*erase_ppa = rqd->ppa_list[i];
erase_ppa->g.blk = e_line->id;
+ spin_unlock(&e_line->lock);
+
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
+ spin_unlock(&e_line->lock);
}
- /* Erase blocks that are bad in this line but might not be in next */
- if (unlikely(ppa_empty(*erase_ppa))) {
- struct pblk_line_meta *lm = &pblk->lm;
+ d_line = pblk_line_get_data(pblk);
+
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return;
- i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
- if (i == lm->blk_per_line)
+ /* Erase blocks that are bad in this line but might not be in next */
+ if (unlikely(ppa_empty(*erase_ppa)) &&
+ bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
+ int bit = -1;
+
+retry:
+ bit = find_next_bit(d_line->blk_bitmap,
+ lm->blk_per_line, bit + 1);
+ if (bit >= lm->blk_per_line)
return;
- set_bit(i, e_line->erase_bitmap);
+ spin_lock(&e_line->lock);
+ if (test_bit(bit, e_line->erase_bitmap)) {
+ spin_unlock(&e_line->lock);
+ goto retry;
+ }
+ spin_unlock(&e_line->lock);
+
+ set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
- *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+ *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->g.blk = e_line->id;
}
}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 045384d..5ecc154 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -150,6 +150,7 @@ try:
/* Release flags on context. Protect from writes and reads */
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
pblk_ppa_set_empty(&w_ctx->ppa);
+ w_ctx->lba = ADDR_EMPTY;
}
#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
return pblk_rb_ring_count(mem, subm, rb->nr_entries);
}
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+ unsigned int mem = READ_ONCE(rb->mem);
+ unsigned int sync = READ_ONCE(rb->sync);
+
+ return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
{
unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
struct pblk_line *line;
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ unsigned int user_io = 0, gc_io = 0;
unsigned int i;
+ int flags;
for (i = 0; i < to_update; i++) {
entry = &rb->entries[*l2p_upd];
w_ctx = &entry->w_ctx;
+ flags = READ_ONCE(entry->w_ctx.flags);
+ if (flags & PBLK_IOTYPE_USER)
+ user_io++;
+ else if (flags & PBLK_IOTYPE_GC)
+ gc_io++;
+ else
+ WARN(1, "pblk: unknown IO type\n");
+
pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
entry->cacheline);
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
}
+ pblk_rl_out(&pblk->rl, user_io, gc_io);
+
return 0;
}
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
/* Protect syncs */
smp_store_release(&rb->sync_point, sync_point);
+ if (!bio)
+ return 0;
+
spin_lock_irq(&rb->s_lock);
bio_list_add(&entry->w_ctx.bios, bio);
spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
return 1;
}
+void pblk_rb_flush(struct pblk_rb *rb)
+{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ unsigned int mem = READ_ONCE(rb->mem);
+
+ if (pblk_rb_sync_point_set(rb, NULL, mem))
+ return;
+
+ pblk_write_should_kick(pblk);
+}
+
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos, struct bio *bio,
int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
unsigned int nr_entries, unsigned int *pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
- int flush_done;
+ int io_ret;
spin_lock(&rb->w_lock);
- if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+ io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
+ if (io_ret) {
spin_unlock(&rb->w_lock);
- return NVM_IO_REQUEUE;
+ return io_ret;
}
- if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+ if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
spin_unlock(&rb->w_lock);
return NVM_IO_REQUEUE;
}
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
pblk_rl_user_in(&pblk->rl, nr_entries);
spin_unlock(&rb->w_lock);
- return flush_done;
+ return io_ret;
}
/*
@@ -521,20 +557,18 @@ out:
* This function is used by the write thread to form the write bio that will
* persist data on the write buffer to the media.
*/
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count)
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ struct request_queue *q = pblk->dev->q;
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct pblk_rb_entry *entry;
struct page *page;
- unsigned int pad = 0, read = 0, to_read = nr_entries;
- unsigned int user_io = 0, gc_io = 0;
+ unsigned int pad = 0, to_read = nr_entries;
unsigned int i;
int flags;
- int ret;
if (count < nr_entries) {
pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
*/
try:
flags = READ_ONCE(entry->w_ctx.flags);
- if (!(flags & PBLK_WRITTEN_DATA))
+ if (!(flags & PBLK_WRITTEN_DATA)) {
+ io_schedule();
goto try;
-
- if (flags & PBLK_IOTYPE_USER)
- user_io++;
- else if (flags & PBLK_IOTYPE_GC)
- gc_io++;
- else
- WARN(1, "pblk: unknown IO type\n");
+ }
page = virt_to_page(entry->data);
if (!page) {
@@ -570,17 +599,17 @@ try:
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
- ret = bio_add_page(bio, page, rb->seg_size, 0);
- if (ret != rb->seg_size) {
+ if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
+ rb->seg_size) {
pr_err("pblk: could not add page to write bio\n");
flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ try:
pos = (pos + 1) & (rb->nr_entries - 1);
}
- read = to_read;
- pblk_rl_out(&pblk->rl, user_io, gc_io);
+ if (pad) {
+ if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
+ pr_err("pblk: could not pad page in write bio\n");
+ return NVM_IO_ERR;
+ }
+ }
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(pad, &((struct pblk *)
(container_of(rb, struct pblk, rwb)))->padded_writes);
#endif
-out:
- return read;
+
+ return NVM_IO_OK;
}
/*
@@ -623,15 +657,17 @@ out:
* be directed to disk.
*/
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter)
+ struct ppa_addr ppa, int bio_iter)
{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ struct ppa_addr l2p_ppa;
+ u64 pos = pblk_addr_to_cacheline(ppa);
void *data;
int flags;
int ret = 1;
- spin_lock(&rb->w_lock);
#ifdef CONFIG_NVM_DEBUG
/* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
w_ctx = &entry->w_ctx;
flags = READ_ONCE(w_ctx->flags);
+ spin_lock(&rb->w_lock);
+ spin_lock(&pblk->trans_lock);
+ l2p_ppa = pblk_trans_map_get(pblk, lba);
+ spin_unlock(&pblk->trans_lock);
+
/* Check if the entry has been overwritten or is scheduled to be */
- if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+ if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
+ flags & PBLK_WRITABLE_ENTRY) {
ret = 0;
goto out;
}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14..4e5c48f 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
- pblk_addr_to_cacheline(ppa), bio_iter);
+ return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
}
static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ retry:
}
WARN_ON(test_and_set_bit(i, read_bitmap));
advanced_bio = 1;
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
/* Read from media non-cached sectors */
rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ retry:
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
+ if (pblk_io_aligned(pblk, nr_secs))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
{
int err;
- rqd->flags = pblk_set_read_mode(pblk);
-
err = pblk_submit_io(pblk, rqd);
if (err)
return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio;
if (rqd->error)
pblk_log_read_err(pblk, rqd);
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+ WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
#endif
- if (rqd->nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
bio_put(bio);
- if (r_ctx->orig_bio) {
+ if (r_ctx->private) {
+ struct bio *orig_bio = r_ctx->private;
+
#ifdef CONFIG_NVM_DEBUG
- WARN_ONCE(r_ctx->orig_bio->bi_error,
- "pblk: corrupted read bio\n");
+ WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
#endif
- bio_endio(r_ctx->orig_bio);
- bio_put(r_ctx->orig_bio);
+ bio_endio(orig_bio);
+ bio_put(orig_bio);
}
#ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
#endif
pblk_free_rqd(pblk, rqd, READ);
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd->end_io = NULL;
if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ retry:
goto retry;
}
WARN_ON(test_and_set_bit(0, read_bitmap));
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
rqd->ppa_addr = ppa;
}
+
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
}
int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
*/
bio_init_idx = pblk_get_bi_idx(bio);
+ rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd->dma_meta_list);
+ if (!rqd->meta_list) {
+ pr_err("pblk: not able to allocate ppa list\n");
+ goto fail_rqd_free;
+ }
+
if (nr_secs > 1) {
- rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd->dma_ppa_list);
- if (!rqd->ppa_list) {
- pr_err("pblk: not able to allocate ppa list\n");
- goto fail_rqd_free;
- }
+ rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+ rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
} else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
bio_get(bio);
if (bitmap_full(&read_bitmap, nr_secs)) {
bio_endio(bio);
+ atomic_inc(&pblk->inflight_io);
pblk_end_io_read(rqd);
return NVM_IO_OK;
}
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
/* All sectors are to be read from the device */
if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
struct bio *int_bio = NULL;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
/* Clone read bio to deal with read errors internally */
- int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+ int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
if (!int_bio) {
pr_err("pblk: could not clone read bio\n");
return NVM_IO_ERR;
}
rqd->bio = int_bio;
- r_ctx->orig_bio = bio;
+ r_ctx->private = bio;
ret = pblk_submit_read_io(pblk, rqd);
if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct request_queue *q = dev->q;
struct bio *bio;
struct nvm_rq rqd;
int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
memset(&rqd, 0, sizeof(struct nvm_rq));
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
+ return NVM_IO_ERR;
+
if (nr_secs > 1) {
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
- return NVM_IO_ERR;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
*secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
nr_secs);
- if (*secs_to_gc == 1) {
- struct ppa_addr ppa;
-
- ppa = rqd.ppa_list[0];
- nvm_dev_dma_free(dev->parent, rqd.ppa_list,
- rqd.dma_ppa_list);
- rqd.ppa_addr = ppa;
- }
+ if (*secs_to_gc == 1)
+ rqd.ppa_addr = rqd.ppa_list[0];
} else {
*secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
}
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
goto out;
data_len = (*secs_to_gc) * geo->sec_size;
- bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
+ PBLK_KMALLOC_META, GFP_KERNEL);
if (IS_ERR(bio)) {
pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
rqd.nr_ppas = *secs_to_gc;
+ rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd.bio = bio;
ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: GC read I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
#endif
out:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_OK;
err_free_dma:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_ERR;
}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index f8f8508..0e48d3e 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
return 0;
}
-__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
{
u32 crc;
- crc = pblk_calc_emeta_crc(pblk, emeta);
- if (le32_to_cpu(emeta->crc) != crc)
+ crc = pblk_calc_emeta_crc(pblk, emeta_buf);
+ if (le32_to_cpu(emeta_buf->crc) != crc)
return NULL;
- if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
return NULL;
- return pblk_line_emeta_to_lbas(emeta);
+ return emeta_to_lbas(pblk, emeta_buf);
}
static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
__le64 *lba_list;
int data_start;
int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
int i;
- lba_list = pblk_recov_get_lba_list(pblk, emeta);
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
if (!lba_list)
return 1;
data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
- nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+ nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
+ nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
if (test_and_set_bit(i, line->invalid_bitmap))
WARN_ONCE(1, "pblk: rec. double invalidate:\n");
else
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
spin_unlock(&line->lock);
continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
if (nr_valid_lbas != nr_lbas)
pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
- line->id, line->emeta->nr_valid_lbas, nr_lbas);
+ line->id, emeta_buf->nr_valid_lbas, nr_lbas);
line->left_msecs = 0;
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+ return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
nr_bb * geo->sec_per_blk;
}
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
r_ptr_int = r_ptr;
next_read_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -256,7 +257,6 @@ next_read_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ next_read_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -295,7 +300,7 @@ next_read_rq:
pr_err("pblk: L2P recovery read timed out\n");
return -EINTR;
}
-
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,94 @@ next_read_rq:
return 0;
}
+static void pblk_recov_complete(struct kref *ref)
+{
+ struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+ complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+ struct pblk_pad_rq *pad_rq = rqd->private;
+ struct pblk *pblk = pad_rq->pblk;
+ struct nvm_tgt_dev *dev = pblk->dev;
+
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+ pblk_free_rqd(pblk, rqd, WRITE);
+}
+
static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p, int left_ppas)
+ int left_ppas)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct ppa_addr *ppa_list;
struct pblk_sec_meta *meta_list;
+ struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd;
struct bio *bio;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
- __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
u64 w_ptr = line->cur_sec;
- int left_line_ppas = line->left_msecs;
- int rq_ppas, rq_len;
+ int left_line_ppas, rq_ppas, rq_len;
int i, j;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
+ spin_lock(&line->lock);
+ left_line_ppas = line->left_msecs;
+ spin_unlock(&line->lock);
+
+ pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+ if (!pad_rq)
+ return -ENOMEM;
+
+ data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+ if (!data) {
+ ret = -ENOMEM;
+ goto free_rq;
+ }
+
+ pad_rq->pblk = pblk;
+ init_completion(&pad_rq->wait);
+ kref_init(&pad_rq->ref);
next_pad_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
+ if (rq_ppas < pblk->min_write_pgs) {
+ pr_err("pblk: corrupted pad line %d\n", line->id);
+ goto free_rq;
+ }
+
rq_len = rq_ppas * geo->sec_size;
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+ if (!meta_list) {
+ ret = -ENOMEM;
+ goto free_data;
+ }
+
+ ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+ rqd = pblk_alloc_rqd(pblk, WRITE);
+ if (IS_ERR(rqd)) {
+ ret = PTR_ERR(rqd);
+ goto fail_free_meta;
+ }
+ memset(rqd, 0, pblk_w_rq_size);
+
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- memset(rqd, 0, pblk_r_rq_size);
-
rqd->bio = bio;
rqd->opcode = NVM_OP_PWRITE;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +423,8 @@ next_pad_rq:
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
+ rqd->end_io = pblk_end_io_recov;
+ rqd->private = pad_rq;
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
@@ -390,34 +442,51 @@ next_pad_rq:
for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
struct ppa_addr dev_ppa;
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pblk_map_invalidate(pblk, dev_ppa);
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+ lba_list[w_ptr] = meta_list[i].lba = addr_empty;
rqd->ppa_list[i] = dev_ppa;
}
}
+ kref_get(&pad_rq->ref);
+
ret = pblk_submit_io(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
- return ret;
+ goto free_data;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery write timed out\n");
- }
- reinit_completion(&wait);
+ atomic_dec(&pblk->inflight_io);
left_line_ppas -= rq_ppas;
left_ppas -= rq_ppas;
- if (left_ppas > 0 && left_line_ppas)
+ if (left_ppas && left_line_ppas)
goto next_pad_rq;
- return 0;
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+
+ if (!wait_for_completion_io_timeout(&pad_rq->wait,
+ msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+ pr_err("pblk: pad write timed out\n");
+ ret = -ETIME;
+ }
+
+free_rq:
+ kfree(pad_rq);
+free_data:
+ vfree(data);
+ return ret;
+
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+ nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+ kfree(pad_rq);
+ return ret;
}
/* When this function is called, it means that not all upper pages have been
@@ -456,7 +525,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
rec_round = 0;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -472,7 +541,6 @@ next_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -481,6 +549,11 @@ next_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -510,6 +583,7 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* This should not happen since the read failed during normal recovery,
@@ -544,7 +618,7 @@ next_rq:
if (pad_secs > line->left_msecs)
pad_secs = line->left_msecs;
- ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+ ret = pblk_recov_pad_oob(pblk, line, pad_secs);
if (ret)
pr_err("pblk: OOB padding failed (err:%d)\n", ret);
@@ -552,7 +626,6 @@ next_rq:
if (ret)
pr_err("pblk: OOB read failed (err:%d)\n", ret);
- line->left_ssecs = line->left_msecs;
left_ppas = 0;
}
@@ -591,7 +664,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
*done = 1;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -607,7 +680,6 @@ next_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -616,6 +688,11 @@ next_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -646,6 +723,7 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* Reached the end of the written line */
@@ -658,7 +736,6 @@ next_rq:
/* Roll back failed sectors */
line->cur_sec -= nr_error_bits;
line->left_msecs += nr_error_bits;
- line->left_ssecs = line->left_msecs;
bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
left_ppas = 0;
@@ -770,8 +847,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line, *tline, *data_line = NULL;
- struct line_smeta *smeta;
- struct line_emeta *emeta;
+ struct pblk_smeta *smeta;
+ struct pblk_emeta *emeta;
+ struct line_smeta *smeta_buf;
int found_lines = 0, recovered_lines = 0, open_lines = 0;
int is_next = 0;
int meta_line;
@@ -784,8 +862,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
spin_lock(&l_mg->free_lock);
meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
set_bit(meta_line, &l_mg->meta_bitmap);
- smeta = l_mg->sline_meta[meta_line].meta;
- emeta = l_mg->eline_meta[meta_line].meta;
+ smeta = l_mg->sline_meta[meta_line];
+ emeta = l_mg->eline_meta[meta_line];
+ smeta_buf = (struct line_smeta *)smeta;
spin_unlock(&l_mg->free_lock);
/* Order data lines using their sequence number */
@@ -796,33 +875,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
memset(smeta, 0, lm->smeta_len);
line->smeta = smeta;
- line->lun_bitmap = ((void *)(smeta)) +
+ line->lun_bitmap = ((void *)(smeta_buf)) +
sizeof(struct line_smeta);
/* Lines that cannot be read are assumed as not written here */
if (pblk_line_read_smeta(pblk, line))
continue;
- crc = pblk_calc_smeta_crc(pblk, smeta);
- if (le32_to_cpu(smeta->crc) != crc)
+ crc = pblk_calc_smeta_crc(pblk, smeta_buf);
+ if (le32_to_cpu(smeta_buf->crc) != crc)
continue;
- if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
continue;
- if (le16_to_cpu(smeta->header.version) != 1) {
+ if (le16_to_cpu(smeta_buf->header.version) != 1) {
pr_err("pblk: found incompatible line version %u\n",
- smeta->header.version);
+ smeta_buf->header.version);
return ERR_PTR(-EINVAL);
}
/* The first valid instance uuid is used for initialization */
if (!valid_uuid) {
- memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+ memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
valid_uuid = 1;
}
- if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+ if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
pr_debug("pblk: ignore line %u due to uuid mismatch\n",
i);
continue;
@@ -830,9 +909,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
/* Update line metadata */
spin_lock(&line->lock);
- line->id = le32_to_cpu(line->smeta->header.id);
- line->type = le16_to_cpu(line->smeta->header.type);
- line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+ line->id = le32_to_cpu(smeta_buf->header.id);
+ line->type = le16_to_cpu(smeta_buf->header.type);
+ line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
spin_unlock(&line->lock);
/* Update general metadata */
@@ -848,7 +927,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
pblk_recov_line_add_ordered(&recov_list, line);
found_lines++;
pr_debug("pblk: recovering data line %d, seq:%llu\n",
- line->id, smeta->seq_nr);
+ line->id, smeta_buf->seq_nr);
}
if (!found_lines) {
@@ -868,15 +947,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
recovered_lines++;
/* Calculate where emeta starts based on the line bb */
- off = lm->sec_per_line - lm->emeta_sec;
+ off = lm->sec_per_line - lm->emeta_sec[0];
nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
off -= nr_bb * geo->sec_per_pl;
- memset(emeta, 0, lm->emeta_len);
- line->emeta = emeta;
line->emeta_ssec = off;
+ line->emeta = emeta;
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
- if (pblk_line_read_emeta(pblk, line)) {
+ if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
}
@@ -941,58 +1020,26 @@ out:
}
/*
- * Pad until smeta can be read on current data line
+ * Pad current line
*/
-void pblk_recov_pad(struct pblk *pblk)
+int pblk_recov_pad(struct pblk *pblk)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_line *line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct nvm_rq *rqd;
- struct pblk_recov_alloc p;
- struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
+ int left_msecs;
+ int ret = 0;
spin_lock(&l_mg->free_lock);
line = l_mg->data_line;
+ left_msecs = line->left_msecs;
spin_unlock(&l_mg->free_lock);
- rqd = pblk_alloc_rqd(pblk, READ);
- if (IS_ERR(rqd))
- return;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
- if (!meta_list)
- goto free_rqd;
-
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
- data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
- if (!data)
- goto free_meta_list;
-
- p.ppa_list = ppa_list;
- p.meta_list = meta_list;
- p.rqd = rqd;
- p.data = data;
- p.dma_ppa_list = dma_ppa_list;
- p.dma_meta_list = dma_meta_list;
-
- if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
- pr_err("pblk: Tear down padding failed\n");
- goto free_data;
+ ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+ if (ret) {
+ pr_err("pblk: Tear down padding failed (%d)\n", ret);
+ return ret;
}
- pblk_line_close(pblk, line);
-
-free_data:
- kfree(data);
-free_meta_list:
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
- pblk_free_rqd(pblk, rqd, READ);
+ pblk_line_close_meta(pblk, line);
+ return ret;
}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index ab7cbb1..2e6a536 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
}
+int pblk_rl_is_limit(struct pblk_rl *rl)
+{
+ int rb_space;
+
+ rb_space = atomic_read(&rl->rb_space);
+
+ return (rb_space == 0);
+}
+
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+ int rb_space = atomic_read(&rl->rb_space);
- return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+ if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
+ return NVM_IO_ERR;
+
+ if (rb_user_cnt >= rl->rb_user_max)
+ return NVM_IO_REQUEUE;
+
+ return NVM_IO_OK;
+}
+
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
+{
+ int rb_space = atomic_read(&rl->rb_space);
+
+ if (unlikely(rb_space >= 0))
+ atomic_sub(nr_entries, &rl->rb_space);
}
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
/* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active);
- return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+ return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
}
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
if (free_blocks >= rl->high) {
- rl->rb_user_max = max - rl->rb_gc_rsv;
- rl->rb_gc_max = rl->rb_gc_rsv;
+ rl->rb_user_max = max;
+ rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
- int gc_max;
rl->rb_user_max = user_max;
- gc_max = max - rl->rb_user_max;
- rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
-
- if (free_blocks > rl->low)
- rl->rb_state = PBLK_RL_MID;
- else
- rl->rb_state = PBLK_RL_LOW;
+ rl->rb_gc_max = max - user_max;
+
+ if (free_blocks <= rl->rsv_blocks) {
+ rl->rb_user_max = 0;
+ rl->rb_gc_max = max;
+ }
+
+ /* In the worst case, we will need to GC lines in the low list
+ * (high valid sector count). If there are lines to GC on high
+ * or mid lists, these will be prioritized
+ */
+ rl->rb_state = PBLK_RL_LOW;
}
return rl->rb_state;
}
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
-{
- rl->rb_gc_rsv = rl->rb_gc_max = rsv;
-}
-
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
int blk_in_line = atomic_read(&line->blk_in_line);
- int ret;
atomic_sub(blk_in_line, &rl->free_blocks);
+}
+
+void pblk_gc_should_kick(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+ int ret;
/* Rates will not change that often - no need to lock update */
ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
pblk_gc_should_stop(pblk);
}
-int pblk_rl_gc_thrs(struct pblk_rl *rl)
+int pblk_rl_high_thrs(struct pblk_rl *rl)
{
return rl->high;
}
+int pblk_rl_low_thrs(struct pblk_rl *rl)
+{
+ return rl->low;
+}
+
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
{
return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
void pblk_rl_init(struct pblk_rl *rl, int budget)
{
+ struct pblk *pblk = container_of(rl, struct pblk, rl);
+ struct pblk_line_meta *lm = &pblk->lm;
+ int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
unsigned int rb_windows;
rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
- rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
rl->high_pw = get_count_order(rl->high);
+ rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+ if (rl->low < min_blocks)
+ rl->low = min_blocks;
+
+ rl->rsv_blocks = min_blocks;
+
/* This will always be a power-of-2 */
rb_windows = budget / PBLK_MAX_REQ_ADDRS;
- rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+ rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget;
rl->rb_user_max = budget;
- atomic_set(&rl->rb_user_cnt, 0);
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
+
+ atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
+ atomic_set(&rl->rb_space, -1);
setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+
rl->rb_user_active = 0;
+ rl->rb_gc_active = 0;
}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index f0af1d1..95fb434 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
int free_blocks, total_blocks;
int rb_user_max, rb_user_cnt;
- int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+ int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
free_blocks = atomic_read(&pblk->rl.free_blocks);
rb_user_max = pblk->rl.rb_user_max;
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
rb_gc_max = pblk->rl.rb_gc_max;
- rb_gc_rsv = pblk->rl.rb_gc_rsv;
rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
rb_budget = pblk->rl.rb_budget;
rb_state = pblk->rl.rb_state;
- total_blocks = geo->blks_per_lun * geo->nr_luns;
+ total_blocks = pblk->rl.total_blocks;
return snprintf(page, PAGE_SIZE,
- "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+ "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
rb_user_cnt,
rb_user_max,
rb_gc_cnt,
rb_gc_max,
- rb_gc_rsv,
rb_state,
rb_budget,
pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
ssize_t sz = 0;
int nr_free_lines;
int cur_data, cur_log;
- int free_line_cnt = 0, closed_line_cnt = 0;
+ int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
int d_line_cnt = 0, l_line_cnt = 0;
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
- int free = 0, bad = 0, cor = 0;
- int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+ int bad = 0, cor = 0;
+ int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
int map_weight = 0, meta_weight = 0;
spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
free_line_cnt++;
spin_unlock(&l_mg->free_lock);
+ spin_lock(&l_mg->close_lock);
+ list_for_each_entry(line, &l_mg->emeta_list, list)
+ emeta_line_cnt++;
+ spin_unlock(&l_mg->close_lock);
+
spin_lock(&l_mg->gc_lock);
list_for_each_entry(line, &l_mg->gc_full_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
gc_empty++;
}
- list_for_each_entry(line, &l_mg->free_list, list)
- free++;
list_for_each_entry(line, &l_mg->bad_list, list)
bad++;
list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
if (l_mg->data_line) {
cur_sec = l_mg->data_line->cur_sec;
msecs = l_mg->data_line->left_msecs;
- ssecs = l_mg->data_line->left_ssecs;
- vsc = l_mg->data_line->vsc;
+ vsc = le32_to_cpu(*l_mg->data_line->vsc);
sec_in_line = l_mg->data_line->sec_in_line;
meta_weight = bitmap_weight(&l_mg->meta_bitmap,
PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
spin_unlock(&l_mg->free_lock);
if (nr_free_lines != free_line_cnt)
- pr_err("pblk: corrupted free line list\n");
+ pr_err("pblk: corrupted free line list:%d/%d\n",
+ nr_free_lines, free_line_cnt);
sz = snprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n",
geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+ "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
cur_data, cur_log,
- free, nr_free_lines, bad, cor,
+ nr_free_lines,
+ emeta_line_cnt, meta_weight,
closed_line_cnt,
+ bad, cor,
d_line_cnt, l_line_cnt,
l_mg->nr_lines);
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
atomic_read(&pblk->gc.inflight_gc));
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
- cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
- map_weight, lm->sec_per_line, meta_weight);
+ "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+ cur_data, cur_sec, msecs, vsc, sec_in_line,
+ map_weight, lm->sec_per_line,
+ atomic_read(&pblk->inflight_io));
return sz;
}
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
lm->smeta_len, lm->smeta_sec);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"emeta - len:%d, sec:%d, bb_start:%d\n",
- lm->emeta_len, lm->emeta_sec,
+ lm->emeta_len[0], lm->emeta_sec[0],
lm->emeta_bb);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
return sz;
}
+static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
+}
+
#ifdef CONFIG_NVM_DEBUG
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
{
@@ -303,52 +310,51 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
atomic_long_read(&pblk->padded_wb),
atomic_long_read(&pblk->sub_writes),
atomic_long_read(&pblk->sync_writes),
- atomic_long_read(&pblk->compl_writes),
atomic_long_read(&pblk->recov_writes),
atomic_long_read(&pblk->recov_gc_writes),
atomic_long_read(&pblk->recov_gc_reads),
+ atomic_long_read(&pblk->cache_reads),
atomic_long_read(&pblk->sync_reads));
}
#endif
-static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+ size_t len)
{
- struct pblk_gc *gc = &pblk->gc;
size_t c_len;
- int value;
+ int force;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &value))
+ if (kstrtouint(page, 0, &force))
return -EINVAL;
- spin_lock(&gc->lock);
- pblk_rl_set_gc_rsc(&pblk->rl, value);
- spin_unlock(&gc->lock);
+ pblk_gc_sysfs_force(pblk, force);
return len;
}
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
+ const char *page, size_t len)
{
size_t c_len;
- int force;
+ int sec_per_write;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &force))
+ if (kstrtouint(page, 0, &sec_per_write))
return -EINVAL;
- if (force < 0 || force > 1)
+ if (sec_per_write < pblk->min_write_pgs
+ || sec_per_write > pblk->max_write_pgs
+ || sec_per_write % pblk->min_write_pgs != 0)
return -EINVAL;
- pblk_gc_sysfs_force(pblk, force);
+ pblk_set_sec_per_write(pblk, sec_per_write);
return len;
}
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
.mode = 0200,
};
-static struct attribute sys_gc_rl_max = {
- .name = "gc_rl_max",
- .mode = 0200,
+static struct attribute sys_max_sec_per_write = {
+ .name = "max_sec_per_write",
+ .mode = 0644,
};
#ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
&sys_errors_attr,
&sys_gc_state,
&sys_gc_force,
- &sys_gc_rl_max,
+ &sys_max_sec_per_write,
&sys_rb_attr,
&sys_stats_ppaf_attr,
&sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
return pblk_sysfs_lines(pblk, buf);
else if (strcmp(attr->name, "lines_info") == 0)
return pblk_sysfs_lines_info(pblk, buf);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_get_sec_per_write(pblk, buf);
#ifdef CONFIG_NVM_DEBUG
else if (strcmp(attr->name, "stats") == 0)
return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
{
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
- if (strcmp(attr->name, "gc_rl_max") == 0)
- return pblk_sysfs_rate_store(pblk, buf, len);
- else if (strcmp(attr->name, "gc_force") == 0)
+ if (strcmp(attr->name, "gc_force") == 0)
return pblk_sysfs_gc_force(pblk, buf, len);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_set_sec_per_write(pblk, buf, len);
return 0;
}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7..d62a8f4 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -17,18 +17,6 @@
#include "pblk.h"
-static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
-{
-#ifdef CONFIG_NVM_DEBUG
- atomic_long_inc(&pblk->sync_writes);
-#endif
-
- /* Counter protected by rb sync lock */
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-}
-
static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
@@ -39,21 +27,14 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
for (i = 0; i < c_ctx->nr_valid; i++) {
struct pblk_w_ctx *w_ctx;
- struct ppa_addr p;
- struct pblk_line *line;
w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
-
- p = rqd->ppa_list[i];
- line = &pblk->lines[pblk_dev_ppa_to_line(p)];
- pblk_sync_line(pblk, line);
-
while ((original_bio = bio_list_pop(&w_ctx->bios)))
bio_endio(original_bio);
}
#ifdef CONFIG_NVM_DEBUG
- atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+ atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
#endif
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
@@ -169,7 +150,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
}
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
- queue_work(pblk->kw_wq, &recovery->ws_rec);
+ queue_work(pblk->close_wq, &recovery->ws_rec);
out:
pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +167,50 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
}
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
#endif
pblk_complete_write(pblk, rqd, c_ctx);
+ atomic_dec(&pblk->inflight_io);
+}
+
+static void pblk_end_io_write_meta(struct nvm_rq *rqd)
+{
+ struct pblk *pblk = rqd->private;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_line *line = m_ctx->private;
+ struct pblk_emeta *emeta = line->emeta;
+ int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
+ struct pblk_lun *rlun = &pblk->luns[pos];
+ int sync;
+
+ up(&rlun->wr_sem);
+
+ if (rqd->error) {
+ pblk_log_write_err(pblk, rqd);
+ pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+ }
+#ifdef CONFIG_NVM_DEBUG
+ else
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
+#endif
+
+ sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
+ if (sync == emeta->nr_entries)
+ pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
+ pblk->close_wq);
+
+ bio_put(rqd->bio);
+ pblk_free_rqd(pblk, rqd, READ);
+
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int nr_secs)
+ unsigned int nr_secs,
+ nvm_end_io_fn(*end_io))
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -202,7 +219,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
rqd->nr_ppas = nr_secs;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
rqd->private = pblk;
- rqd->end_io = pblk_end_io_write;
+ rqd->end_io = end_io;
rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
&rqd->dma_meta_list);
@@ -219,11 +236,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
}
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
+ struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
{
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
- struct ppa_addr erase_ppa;
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded;
@@ -231,40 +247,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
int ret = 0;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
- if (!lun_bitmap) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!lun_bitmap)
+ return -ENOMEM;
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+ ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
if (ret) {
kfree(lun_bitmap);
- goto out;
+ return ret;
}
- ppa_set_empty(&erase_ppa);
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
else
pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, &erase_ppa);
-
-out:
- if (unlikely(e_line && !ppa_empty(erase_ppa))) {
- if (pblk_blk_erase_async(pblk, erase_ppa)) {
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int bit;
-
- atomic_inc(&e_line->left_eblks);
- bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
- WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
- up(&pblk->erase_sem);
- }
- }
+ valid, erase_ppa);
- return ret;
+ return 0;
}
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +279,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+ ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
if (ret)
return ret;
@@ -311,16 +310,237 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
return secs_to_sync;
}
+static inline int pblk_valid_meta_ppa(struct pblk *pblk,
+ struct pblk_line *meta_line,
+ struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line *data_line;
+ struct ppa_addr ppa, ppa_opt;
+ u64 paddr;
+ int i;
+
+ data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
+ paddr = pblk_lookup_page(pblk, meta_line);
+ ppa = addr_to_gen_ppa(pblk, paddr, 0);
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
+ return 1;
+
+ /* Schedule a metadata I/O that is half the distance from the data I/O
+ * with regards to the number of LUNs forming the pblk instance. This
+ * balances LUN conflicts across every I/O.
+ *
+ * When the LUN configuration changes (e.g., due to GC), this distance
+ * can align, which would result on a LUN deadlock. In this case, modify
+ * the distance to not be optimal, but allow metadata I/Os to succeed.
+ */
+ ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+ if (unlikely(ppa_opt.ppa == ppa.ppa)) {
+ data_line->meta_distance--;
+ return 0;
+ }
+
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa_opt.g.ch &&
+ ppa_list[i].g.lun == ppa_opt.g.lun)
+ return 1;
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa.g.ch &&
+ ppa_list[i].g.lun == ppa.g.lun)
+ return 0;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = meta_line->emeta;
+ struct pblk_g_ctx *m_ctx;
+ struct pblk_lun *rlun;
+ struct bio *bio;
+ struct nvm_rq *rqd;
+ void *data;
+ u64 paddr;
+ int rq_ppas = pblk->min_write_pgs;
+ int id = meta_line->id;
+ int rq_len;
+ int i, j;
+ int ret;
+
+ rqd = pblk_alloc_rqd(pblk, READ);
+ if (IS_ERR(rqd)) {
+ pr_err("pblk: cannot allocate write req.\n");
+ return PTR_ERR(rqd);
+ }
+ m_ctx = nvm_rq_to_pdu(rqd);
+ m_ctx->private = meta_line;
+
+ rq_len = rq_ppas * geo->sec_size;
+ data = ((void *)emeta->buf) + emeta->mem;
+
+ bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
+ bio->bi_iter.bi_sector = 0; /* internal bio */
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ rqd->bio = bio;
+
+ ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
+ if (ret)
+ goto fail_free_bio;
+
+ for (i = 0; i < rqd->nr_ppas; ) {
+ spin_lock(&meta_line->lock);
+ paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
+ spin_unlock(&meta_line->lock);
+ for (j = 0; j < rq_ppas; j++, i++, paddr++)
+ rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+ }
+
+ rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
+ ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+ if (ret) {
+ pr_err("pblk: lun semaphore timed out (%d)\n", ret);
+ goto fail_free_bio;
+ }
+
+ emeta->mem += rq_len;
+ if (emeta->mem >= lm->emeta_len[0]) {
+ spin_lock(&l_mg->close_lock);
+ list_del(&meta_line->list);
+ WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
+ "pblk: corrupt meta line %d\n", meta_line->id);
+ spin_unlock(&l_mg->close_lock);
+ }
+
+ ret = pblk_submit_io(pblk, rqd);
+ if (ret) {
+ pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+ goto fail_rollback;
+ }
+
+ return NVM_IO_OK;
+
+fail_rollback:
+ spin_lock(&l_mg->close_lock);
+ pblk_dealloc_page(pblk, meta_line, rq_ppas);
+ list_add(&meta_line->list, &meta_line->list);
+ spin_unlock(&l_mg->close_lock);
+fail_free_bio:
+ if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, READ);
+ return ret;
+}
+
+static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
+ int prev_n)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line *meta_line;
+
+ spin_lock(&l_mg->close_lock);
+retry:
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return 0;
+ }
+ meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
+ if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+ goto retry;
+ spin_unlock(&l_mg->close_lock);
+
+ if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
+ return 0;
+
+ return pblk_submit_meta_io(pblk, meta_line);
+}
+
+static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct ppa_addr erase_ppa;
+ int err;
+
+ ppa_set_empty(&erase_ppa);
+
+ /* Assign lbas to ppas and populate request structure */
+ err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+ if (err) {
+ pr_err("pblk: could not setup write request: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ if (likely(ppa_empty(erase_ppa))) {
+ /* Submit metadata write for previous data line */
+ err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
+ if (err) {
+ pr_err("pblk: metadata I/O submission failed: %d", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+ } else {
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit available erase for next data line */
+ if (pblk_blk_erase_async(pblk, erase_ppa)) {
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int bit;
+
+ atomic_inc(&e_line->left_eblks);
+ bit = pblk_ppa_to_pos(geo, erase_ppa);
+ WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+ }
+ }
+
+ return NVM_IO_OK;
+}
+
+static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct bio *bio = rqd->bio;
+
+ if (c_ctx->nr_padded)
+ pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+}
+
static int pblk_submit_write(struct pblk *pblk)
{
struct bio *bio;
struct nvm_rq *rqd;
- struct pblk_c_ctx *c_ctx;
- unsigned int pgs_read;
unsigned int secs_avail, secs_to_sync, secs_to_com;
unsigned int secs_to_flush;
unsigned long pos;
- int err;
/* If there are no sectors in the cache, flushes (bios without data)
* will be cleared on the cache threads
@@ -338,7 +558,6 @@ static int pblk_submit_write(struct pblk *pblk)
pr_err("pblk: cannot allocate write req.\n");
return 1;
}
- c_ctx = nvm_rq_to_pdu(rqd);
bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
if (!bio) {
@@ -358,29 +577,14 @@ static int pblk_submit_write(struct pblk *pblk)
secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
- pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
- secs_to_sync, secs_avail);
- if (!pgs_read) {
+ if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+ secs_avail)) {
pr_err("pblk: corrupted write bio\n");
goto fail_put_bio;
}
- if (c_ctx->nr_padded)
- if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
- goto fail_put_bio;
-
- /* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, c_ctx);
- if (err) {
- pr_err("pblk: could not setup write request\n");
- goto fail_free_bio;
- }
-
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: I/O submission failed: %d\n", err);
+ if (pblk_submit_io_set(pblk, rqd))
goto fail_free_bio;
- }
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +593,7 @@ static int pblk_submit_write(struct pblk *pblk)
return 0;
fail_free_bio:
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+ pblk_free_write_rqd(pblk, rqd);
fail_put_bio:
bio_put(bio);
fail_free_rqd:
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 99f3186..1593138 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -40,6 +40,12 @@
#define PBLK_MAX_REQ_ADDRS (64)
#define PBLK_MAX_REQ_ADDRS_PW (6)
+#define PBLK_WS_POOL_SIZE (128)
+#define PBLK_META_POOL_SIZE (128)
+#define PBLK_READ_REQ_POOL_SIZE (1024)
+
+#define PBLK_NR_CLOSE_JOBS (4)
+
#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
#define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
PBLK_BLK_ST_CLOSED = 0x2,
};
+struct pblk_sec_meta {
+ u64 reserved;
+ __le64 lba;
+};
+
/* The number of GC lists and the rate-limiter states go together. This way the
* rate-limiter can dictate how much GC is needed based on resource utilization.
*/
-#define PBLK_NR_GC_LISTS 3
-#define PBLK_MAX_GC_JOBS 32
+#define PBLK_GC_NR_LISTS 3
enum {
PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
PBLK_RL_LOW = 3,
};
-struct pblk_sec_meta {
- u64 reserved;
- __le64 lba;
-};
-
#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
-/* write completion context */
+/* write buffer completion context */
struct pblk_c_ctx {
struct list_head list; /* Head for out-of-order completion */
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
unsigned int nr_padded;
};
-/* Read context */
-struct pblk_r_ctx {
- struct bio *orig_bio;
+/* generic context */
+struct pblk_g_ctx {
+ void *private;
+};
+
+/* Pad context */
+struct pblk_pad_rq {
+ struct pblk *pblk;
+ struct completion wait;
+ struct kref ref;
};
/* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
struct pblk_gc_rq {
struct pblk_line *line;
void *data;
- u64 *lba_list;
+ u64 lba_list[PBLK_MAX_REQ_ADDRS];
int nr_secs;
int secs_to_gc;
struct list_head list;
};
struct pblk_gc {
+ /* These states are not protected by a lock since (i) they are in the
+ * fast path, and (ii) they are not critical.
+ */
int gc_active;
int gc_enabled;
int gc_forced;
- int gc_jobs_active;
- atomic_t inflight_gc;
struct task_struct *gc_ts;
struct task_struct *gc_writer_ts;
+ struct task_struct *gc_reader_ts;
+
+ struct workqueue_struct *gc_line_reader_wq;
struct workqueue_struct *gc_reader_wq;
+
struct timer_list gc_timer;
+ struct semaphore gc_sem;
+ atomic_t inflight_gc;
int w_entries;
+
struct list_head w_list;
+ struct list_head r_list;
spinlock_t lock;
spinlock_t w_lock;
+ spinlock_t r_lock;
};
struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
*/
unsigned int high_pw; /* High rounded up as a power of 2 */
-#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent
- * available blks
- */
-#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
+#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
+#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
int rb_windows_pw; /* Number of rate windows in the write buffer
* given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
*/
int rb_budget; /* Total number of entries available for I/O */
int rb_user_max; /* Max buffer entries available for user I/O */
- atomic_t rb_user_cnt; /* User I/O buffer counter */
int rb_gc_max; /* Max buffer entries available for GC I/O */
int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
int rb_state; /* Rate-limiter current state */
+
+ atomic_t rb_user_cnt; /* User I/O buffer counter */
atomic_t rb_gc_cnt; /* GC I/O buffer counter */
+ atomic_t rb_space; /* Space limit in case of reaching capacity */
+
+ int rsv_blocks; /* Reserved blocks for GC */
int rb_user_active;
+ int rb_gc_active;
+
struct timer_list u_timer;
unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
atomic_t free_blocks;
};
-#define PBLK_LINE_NR_LUN_BITMAP 2
-#define PBLK_LINE_NR_SEC_BITMAP 2
#define PBLK_LINE_EMPTY (~0U)
enum {
@@ -310,16 +334,19 @@ struct line_smeta {
__le32 window_wr_lun; /* Number of parallel LUNs to write */
__le32 rsvd[2];
+
+ __le64 lun_bitmap[];
};
/*
- * Metadata Layout:
- * 1. struct pblk_emeta
- * 2. nr_lbas u64 forming lba list
- * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
- * 4. nr_luns bits (u64 format) forming line bad block bitmap
- *
- * 3. and 4. will be part of FTL log
+ * Metadata layout in media:
+ * First sector:
+ * 1. struct line_emeta
+ * 2. bad block bitmap (u64 * window_wr_lun)
+ * Mid sectors (start at lbas_sector):
+ * 3. nr_lbas (u64) forming lba list
+ * Last sectors (start at vsc_sector):
+ * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
*/
struct line_emeta {
struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
__le32 next_id; /* Line id for next line */
__le64 nr_lbas; /* Number of lbas mapped in line */
__le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
+ __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
+};
+
+struct pblk_emeta {
+ struct line_emeta *buf; /* emeta buffer in media format */
+ int mem; /* Write offset - points to next
+ * writable entry in memory
+ */
+ atomic_t sync; /* Synced - backpointer that signals the
+ * last entry that has been successfully
+ * persisted to media
+ */
+ unsigned int nr_entries; /* Number of emeta entries */
+};
+
+struct pblk_smeta {
+ struct line_smeta *buf; /* smeta buffer in persistent format */
};
struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
- struct line_smeta *smeta; /* Start metadata */
- struct line_emeta *emeta; /* End metadata */
+ struct pblk_smeta *smeta; /* Start metadata */
+ struct pblk_emeta *emeta; /* End medatada */
+
int meta_line; /* Metadata line id */
+ int meta_distance; /* Distance between data and metadata */
+
u64 smeta_ssec; /* Sector where smeta starts */
u64 emeta_ssec; /* Sector where emeta starts */
@@ -374,9 +421,10 @@ struct pblk_line {
atomic_t left_seblks; /* Blocks left for sync erasing */
int left_msecs; /* Sectors left for mapping */
- int left_ssecs; /* Sectors left to sync */
unsigned int cur_sec; /* Sector map pointer */
- unsigned int vsc; /* Valid sector count in line */
+ unsigned int nr_valid_lbas; /* Number of valid lbas in line */
+
+ __le32 *vsc; /* Valid sector count in line */
struct kref ref; /* Write buffer L2P references */
@@ -385,13 +433,15 @@ struct pblk_line {
#define PBLK_DATA_LINES 4
-enum{
+enum {
PBLK_KMALLOC_META = 1,
PBLK_VMALLOC_META = 2,
};
-struct pblk_line_metadata {
- void *meta;
+enum {
+ PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
+ PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
+ PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
};
struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
struct list_head bad_list; /* Full lines bad */
/* GC lists - use gc_lock */
- struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+ struct list_head *gc_lists[PBLK_GC_NR_LISTS];
struct list_head gc_high_list; /* Full lines ready to GC, high isc */
struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
struct list_head gc_low_list; /* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
struct pblk_line *log_next; /* Next FTL log line */
struct pblk_line *data_next; /* Next data line */
+ struct list_head emeta_list; /* Lines queued to schedule emeta */
+
+ __le32 *vsc_list; /* Valid sector counts for all lines */
+
/* Metadata allocation type: VMALLOC | KMALLOC */
- int smeta_alloc_type;
int emeta_alloc_type;
/* Pre-allocated metadata for data lines */
- struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
- struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+ struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
+ struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
unsigned long meta_bitmap;
/* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
unsigned long l_seq_nr; /* Log line unique sequence number */
spinlock_t free_lock;
+ spinlock_t close_lock;
spinlock_t gc_lock;
};
struct pblk_line_meta {
unsigned int smeta_len; /* Total length for smeta */
- unsigned int smeta_sec; /* Sectors needed for smeta*/
- unsigned int emeta_len; /* Total length for emeta */
- unsigned int emeta_sec; /* Sectors needed for emeta*/
+ unsigned int smeta_sec; /* Sectors needed for smeta */
+
+ unsigned int emeta_len[4]; /* Lengths for emeta:
+ * [0]: Total length
+ * [1]: struct line_emeta length
+ * [2]: L2P portion length
+ * [3]: vsc list length
+ */
+ unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
+ * as emeta_len
+ */
+
unsigned int emeta_bb; /* Boundary for bb that affects emeta */
+
+ unsigned int vsc_list_len; /* Length for vsc list */
unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
unsigned int blk_bitmap_len; /* Length for block bitmap in line */
unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
unsigned int blk_per_line; /* Number of blocks in a full line */
unsigned int sec_per_line; /* Number of sectors in a line */
+ unsigned int dsec_per_line; /* Number of data sectors in a line */
unsigned int min_blk_line; /* Min. number of good blocks in line */
unsigned int mid_thrs; /* Threshold for GC mid list */
unsigned int high_thrs; /* Threshold for GC high list */
+
+ unsigned int meta_distance; /* Distance between data and metadata */
};
struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
u8 sec_offset;
};
+enum {
+ PBLK_STATE_RUNNING = 0,
+ PBLK_STATE_STOPPING = 1,
+ PBLK_STATE_RECOVERING = 2,
+ PBLK_STATE_STOPPED = 3,
+};
+
struct pblk {
struct nvm_tgt_dev *dev;
struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
struct pblk_rb rwb;
+ int state; /* pblk line state */
+
int min_write_pgs; /* Minimum amount of pages required by controller */
int max_write_pgs; /* Maximum amount of pages supported by controller */
int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
/* pblk provisioning values. Used by rate limiter */
struct pblk_rl rl;
- struct semaphore erase_sem;
+ int sec_per_write;
unsigned char instance_uuid[16];
#ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
atomic_long_t req_writes; /* Sectors stored on write buffer */
atomic_long_t sub_writes; /* Sectors submitted from buffer */
atomic_long_t sync_writes; /* Sectors synced to media */
- atomic_long_t compl_writes; /* Sectors completed in write bio */
atomic_long_t inflight_reads; /* Inflight sector read requests */
+ atomic_long_t cache_reads; /* Read requests that hit the cache */
atomic_long_t sync_reads; /* Completed sector read requests */
atomic_long_t recov_writes; /* Sectors submitted from recovery */
atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
atomic_long_t write_failed;
atomic_long_t erase_failed;
+ atomic_t inflight_io; /* General inflight I/O counter */
+
struct task_struct *writer_ts;
/* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
mempool_t *page_pool;
mempool_t *line_ws_pool;
mempool_t *rec_pool;
- mempool_t *r_rq_pool;
+ mempool_t *g_rq_pool;
mempool_t *w_rq_pool;
mempool_t *line_meta_pool;
- struct workqueue_struct *kw_wq;
+ struct workqueue_struct *close_wq;
+ struct workqueue_struct *bb_wq;
+
struct timer_list wtimer;
struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
struct work_struct ws;
};
-#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
/*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
unsigned int pos);
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+void pblk_rb_flush(struct pblk_rb *rb);
void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count);
unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
struct list_head *list,
unsigned int max);
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter);
+ struct ppa_addr ppa, int bio_iter);
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,40 +693,50 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
* pblk core
*/
struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx);
void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
-void pblk_flush_writer(struct pblk *pblk);
+void pblk_wait_for_meta(struct pblk *pblk);
struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
void pblk_discard(struct pblk *pblk, struct bio *bio);
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask);
+ int alloc_type, gfp_t gfp_mask);
struct pblk_line *pblk_line_get(struct pblk *pblk);
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+void pblk_line_replace_data(struct pblk *pblk);
int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
int pblk_line_is_full(struct pblk_line *line);
void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_meta_sync(struct pblk *pblk);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_pipeline_stop(struct pblk *pblk);
void pblk_line_mark_bb(struct work_struct *work);
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *));
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq);
u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf);
int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
void pblk_line_put(struct kref *ref);
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush);
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -656,11 +747,11 @@ void pblk_end_bio_sync(struct bio *bio);
void pblk_end_io_sync(struct nvm_rq *rqd);
int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
int nr_pages);
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr);
void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
int nr_pages);
void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr);
void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
struct ppa_addr ppa);
@@ -702,6 +793,7 @@ void pblk_write_should_kick(struct pblk *pblk);
/*
* pblk read path
*/
+extern struct bio_set *pblk_bio_set;
int pblk_submit_read(struct pblk *pblk, struct bio *bio);
int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +803,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
*/
void pblk_submit_rec(struct work_struct *work);
struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-void pblk_recov_pad(struct pblk *pblk);
+int pblk_recov_pad(struct pblk *pblk);
__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +812,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
/*
* pblk gc
*/
-#define PBLK_GC_TRIES 3
+#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
+#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */
+#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
+#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
int pblk_gc_init(struct pblk *pblk);
void pblk_gc_exit(struct pblk *pblk);
void pblk_gc_should_start(struct pblk *pblk);
void pblk_gc_should_stop(struct pblk *pblk);
-int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_should_kick(struct pblk *pblk);
+void pblk_gc_kick(struct pblk *pblk);
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active);
-void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+int pblk_gc_sysfs_force(struct pblk *pblk, int force);
/*
* pblk rate limiter
*/
void pblk_rl_init(struct pblk_rl *rl, int budget);
void pblk_rl_free(struct pblk_rl *rl);
-int pblk_rl_gc_thrs(struct pblk_rl *rl);
+int pblk_rl_high_thrs(struct pblk_rl *rl);
+int pblk_rl_low_thrs(struct pblk_rl *rl);
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
+int pblk_rl_is_limit(struct pblk_rl *rl);
/*
* pblk sysfs
@@ -774,9 +873,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
return c_ctx - sizeof(struct nvm_rq);
}
-static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+static inline void *emeta_to_bb(struct line_emeta *emeta)
+{
+ return emeta->bb_bitmap;
+}
+
+static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
+{
+ return ((void *)emeta + pblk->lm.emeta_len[1]);
+}
+
+static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
{
- return (emeta) + 1;
+ return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
+}
+
+static inline int pblk_line_vsc(struct pblk_line *line)
+{
+ int vsc;
+
+ spin_lock(&line->lock);
+ vsc = le32_to_cpu(*line->vsc);
+ spin_unlock(&line->lock);
+
+ return vsc;
}
#define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1037,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
ppa_addr->ppa = ADDR_EMPTY;
}
+static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
+{
+ if (lppa.ppa == rppa.ppa)
+ return true;
+
+ return false;
+}
+
static inline int pblk_addr_in_cache(struct ppa_addr ppa)
{
return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1092,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
}
static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
- struct line_smeta *smeta)
+ struct line_header *header)
{
u32 crc = ~(u32)0;
- crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+ crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
sizeof(struct line_header) - sizeof(crc));
return crc;
@@ -996,7 +1124,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
crc = crc32_le(crc, (unsigned char *)emeta +
sizeof(struct line_header) + sizeof(crc),
- lm->emeta_len -
+ lm->emeta_len[0] -
sizeof(struct line_header) - sizeof(crc));
return crc;
@@ -1016,9 +1144,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
return flags;
}
-static inline int pblk_set_read_mode(struct pblk *pblk)
+enum {
+ PBLK_READ_RANDOM = 0,
+ PBLK_READ_SEQUENTIAL = 1,
+};
+
+static inline int pblk_set_read_mode(struct pblk *pblk, int type)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int flags;
+
+ flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ if (type == PBLK_READ_SEQUENTIAL)
+ flags |= geo->plane_mode >> 1;
+
+ return flags;
+}
+
+static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
{
- return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ return !(nr_secs % pblk->min_write_pgs);
}
#ifdef CONFIG_NVM_DEBUG
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a..267f01a 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
{
struct completion *waiting = bio->bi_private;
- if (bio->bi_error)
- pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+ if (bio->bi_status)
+ pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
complete(waiting);
}
@@ -359,7 +359,7 @@ try:
goto finished;
}
wait_for_completion_io(&wait);
- if (bio->bi_error) {
+ if (bio->bi_status) {
rrpc_inflight_laddr_release(rrpc, rqd);
goto finished;
}
@@ -385,7 +385,7 @@ try:
wait_for_completion_io(&wait);
rrpc_inflight_laddr_release(rrpc, rqd);
- if (bio->bi_error)
+ if (bio->bi_status)
goto finished;
bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
struct nvm_rq *rqd;
int err;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_op(bio) == REQ_OP_DISCARD) {
rrpc_discard(rrpc, bio);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c..dee542f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
/* Forward declarations */
-void bch_count_io_errors(struct cache *, int, const char *);
+void bch_count_io_errors(struct cache *, blk_status_t, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- int, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *);
+ blk_status_t, const char *);
+void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
+ const char *);
void bch_bbio_free(struct bio *, struct cache_set *);
struct bio *bch_bbio_alloc(struct cache_set *);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e8..866dcf7 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
bch_submit_bbio(bio, b->c, &b->key, 0);
closure_sync(&cl);
- if (bio->bi_error)
+ if (bio->bi_status)
set_btree_node_io_error(b);
bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct btree *b = container_of(cl, struct btree, io);
- if (bio->bi_error)
+ if (bio->bi_status)
set_btree_node_io_error(b);
- bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree");
+ bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
closure_put(cl);
}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f5505..35a5a72 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
struct bio_vec bv, cbv;
struct bvec_iter iter, citer = { 0 };
- check = bio_clone(bio, GFP_NOIO);
+ check = bio_clone_kmalloc(bio, GFP_NOIO);
if (!check)
return;
check->bi_opf = REQ_OP_READ;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88..6a9b850 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
/* IO errors */
-void bch_count_io_errors(struct cache *ca, int error, const char *m)
+void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
{
/*
* The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
}
void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
- int error, const char *m)
+ blk_status_t error, const char *m)
{
struct bbio *b = container_of(bio, struct bbio, bio);
struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
}
void bch_bbio_endio(struct cache_set *c, struct bio *bio,
- int error, const char *m)
+ blk_status_t error, const char *m)
{
struct closure *cl = bio->bi_private;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53..0352d05 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
{
struct journal_write *w = bio->bi_private;
- cache_set_err_on(bio->bi_error, w->c, "journal io error");
+ cache_set_err_on(bio->bi_status, w->c, "journal io error");
closure_put(&w->c->journal.io);
}
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a90..f633b30 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
struct moving_io *io = container_of(bio->bi_private,
struct moving_io, cl);
- if (bio->bi_error)
- io->op.error = bio->bi_error;
+ if (bio->bi_status)
+ io->op.status = bio->bi_status;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(io->op.c, &b->key, 0)) {
- io->op.error = -EINTR;
+ io->op.status = BLK_STS_IOERR;
}
- bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
+ bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
}
static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct data_insert_op *op = &io->op;
- if (!op->error) {
+ if (!op->status) {
moving_init(io);
io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc..019b3df 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
if (ret == -ESRCH) {
op->replace_collision = true;
} else if (ret) {
- op->error = -ENOMEM;
+ op->status = BLK_STS_RESOURCE;
op->insert_data_done = true;
}
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* TODO: We could try to recover from this. */
if (op->writeback)
- op->error = bio->bi_error;
+ op->status = bio->bi_status;
else if (!op->replace)
set_closure_fn(cl, bch_data_insert_error, op->wq);
else
set_closure_fn(cl, NULL, NULL);
}
- bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache");
+ bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
}
static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
* from the backing device.
*/
- if (bio->bi_error)
- s->iop.error = bio->bi_error;
+ if (bio->bi_status)
+ s->iop.status = bio->bi_status;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(s->iop.c, &b->key, 0)) {
atomic_long_inc(&s->iop.c->cache_read_races);
- s->iop.error = -EINTR;
+ s->iop.status = BLK_STS_IOERR;
}
- bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache");
+ bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
}
/*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
- s->iop.error = bio->bi_error;
+ s->iop.status = bio->bi_status;
/* Only cache read errors are recoverable */
s->recoverable = false;
}
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
&s->d->disk->part0, s->start_time);
trace_bcache_request_end(s->d, s->orig_bio);
- s->orig_bio->bi_error = s->iop.error;
+ s->orig_bio->bi_status = s->iop.status;
bio_endio(s->orig_bio);
s->orig_bio = NULL;
}
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
s->iop.inode = d->id;
s->iop.write_point = hash_long((unsigned long) current, 16);
s->iop.write_prio = 0;
- s->iop.error = 0;
+ s->iop.status = 0;
s->iop.flags = 0;
s->iop.flush_journal = op_is_flush(bio->bi_opf);
s->iop.wq = bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
/* Retry from the backing device: */
trace_bcache_read_retry(s->orig_bio);
- s->iop.error = 0;
+ s->iop.status = 0;
do_bio_hook(s, s->orig_bio);
/* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
!s->cache_miss, s->iop.bypass);
trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
- if (s->iop.error)
+ if (s->iop.status)
continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
else if (s->iop.bio || verify(dc, &s->bio.bio))
continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff3687..7689176 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -10,7 +10,7 @@ struct data_insert_op {
unsigned inode;
uint16_t write_point;
uint16_t write_prio;
- short error;
+ blk_status_t status;
union {
uint16_t flags;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e..8352fad 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
{
struct cache *ca = bio->bi_private;
- bch_count_io_errors(ca, bio->bi_error, "writing superblock");
+ bch_count_io_errors(ca, bio->bi_status, "writing superblock");
closure_put(&ca->set->sb_write);
}
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
struct closure *cl = bio->bi_private;
struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
- cache_set_err_on(bio->bi_error, c, "accessing uuids");
+ cache_set_err_on(bio->bi_status, c, "accessing uuids");
bch_bbio_free(bio, c);
closure_put(cl);
}
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
{
struct cache *ca = bio->bi_private;
- cache_set_err_on(bio->bi_error, ca->set, "accessing priorities");
+ cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
bch_bbio_free(bio, ca->set);
closure_put(&ca->prio);
}
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
minor *= BCACHE_MINORS;
- if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+ if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER)) ||
!(d->disk = alloc_disk(BCACHE_MINORS))) {
ida_simple_remove(&bcache_minor, minor);
return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sizeof(struct bbio) + sizeof(struct bio_vec) *
bucket_pages(c))) ||
!(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
- !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
+ !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
+ BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER)) ||
!(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
!(c->moving_gc_wq = alloc_workqueue("bcache_gc",
WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48..42c66e7 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
struct keybuf_key *w = bio->bi_private;
struct dirty_io *io = w->private;
- if (bio->bi_error)
+ if (bio->bi_status)
SET_KEY_DIRTY(&w->key, false);
closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
struct dirty_io *io = w->private;
bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
- bio->bi_error, "reading dirty data from cache");
+ bio->bi_status, "reading dirty data from cache");
dirty_endio(bio);
}
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c..82d2738 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell, int error)
+ struct dm_bio_prison_cell *cell, blk_status_t error)
{
struct bio_list bios;
struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
dm_cell_release(prison, cell, &bios);
while ((bio = bio_list_pop(&bios))) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
}
}
diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac..cec52ac 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *inmates);
void dm_cell_error(struct dm_bio_prison *prison,
- struct dm_bio_prison_cell *cell, int error);
+ struct dm_bio_prison_cell *cell, blk_status_t error);
/*
* Visits the cell and then releases. Guarantees no new inmates are
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 840c149..850ff6c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -145,8 +145,8 @@ struct dm_buffer {
enum data_mode data_mode;
unsigned char list_mode; /* LIST_* */
unsigned hold_count;
- int read_error;
- int write_error;
+ blk_status_t read_error;
+ blk_status_t write_error;
unsigned long state;
unsigned long last_accessed;
struct dm_bufio_client *c;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
{
struct dm_buffer *b = context;
- b->bio.bi_error = error ? -EIO : 0;
+ b->bio.bi_status = error ? BLK_STS_IOERR : 0;
b->bio.bi_end_io(&b->bio);
}
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
r = dm_io(&io_req, 1, &region, NULL);
if (r) {
- b->bio.bi_error = r;
+ b->bio.bi_status = errno_to_blk_status(r);
end_io(&b->bio);
}
}
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
static void inline_endio(struct bio *bio)
{
bio_end_io_t *end_fn = bio->bi_private;
- int error = bio->bi_error;
+ blk_status_t status = bio->bi_status;
/*
* Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
*/
bio_reset(bio);
- bio->bi_error = error;
+ bio->bi_status = status;
end_fn(bio);
}
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->write_error = bio->bi_error;
- if (unlikely(bio->bi_error)) {
+ b->write_error = bio->bi_status;
+ if (unlikely(bio->bi_status)) {
struct dm_bufio_client *c = b->c;
- int error = bio->bi_error;
- (void)cmpxchg(&c->async_write_error, 0, error);
+
+ (void)cmpxchg(&c->async_write_error, 0,
+ blk_status_to_errno(bio->bi_status));
}
BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
{
struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
- b->read_error = bio->bi_error;
+ b->read_error = bio->bi_status;
BUG_ON(!test_bit(B_READING, &b->state));
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
- int error = b->read_error;
+ int error = blk_status_to_errno(b->read_error);
dm_bufio_release(b);
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
*/
int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
{
- int a, f;
+ blk_status_t a;
+ int f;
unsigned long buffers_processed = 0;
struct dm_buffer *b, *tmp;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index d682a05..c5ea03f 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -119,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
*/
struct continuation {
struct work_struct ws;
- int input;
+ blk_status_t input;
};
static inline void init_continuation(struct continuation *k,
@@ -145,7 +145,7 @@ struct batcher {
/*
* The operation that everyone is waiting for.
*/
- int (*commit_op)(void *context);
+ blk_status_t (*commit_op)(void *context);
void *commit_context;
/*
@@ -171,8 +171,7 @@ struct batcher {
static void __commit(struct work_struct *_ws)
{
struct batcher *b = container_of(_ws, struct batcher, commit_work);
-
- int r;
+ blk_status_t r;
unsigned long flags;
struct list_head work_items;
struct work_struct *ws, *tmp;
@@ -205,7 +204,7 @@ static void __commit(struct work_struct *_ws)
while ((bio = bio_list_pop(&bios))) {
if (r) {
- bio->bi_error = r;
+ bio->bi_status = r;
bio_endio(bio);
} else
b->issue_op(bio, b->issue_context);
@@ -213,7 +212,7 @@ static void __commit(struct work_struct *_ws)
}
static void batcher_init(struct batcher *b,
- int (*commit_op)(void *),
+ blk_status_t (*commit_op)(void *),
void *commit_context,
void (*issue_op)(struct bio *bio, void *),
void *issue_context,
@@ -955,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
dm_unhook_bio(&pb->hook_info, bio);
- if (bio->bi_error) {
+ if (bio->bi_status) {
bio_endio(bio);
return;
}
@@ -1220,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
if (read_err || write_err)
- mg->k.input = -EIO;
+ mg->k.input = BLK_STS_IOERR;
queue_continuation(mg->cache->wq, &mg->k);
}
@@ -1266,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
dm_unhook_bio(&pb->hook_info, bio);
- if (bio->bi_error)
- mg->k.input = bio->bi_error;
+ if (bio->bi_status)
+ mg->k.input = bio->bi_status;
queue_continuation(mg->cache->wq, &mg->k);
}
@@ -1323,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
if (mg->overwrite_bio) {
if (success)
force_set_dirty(cache, cblock);
+ else if (mg->k.input)
+ mg->overwrite_bio->bi_status = mg->k.input;
else
- mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO);
+ mg->overwrite_bio->bi_status = BLK_STS_IOERR;
bio_endio(mg->overwrite_bio);
} else {
if (success)
@@ -1504,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
r = copy(mg, is_policy_promote);
if (r) {
DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
- mg->k.input = -EIO;
+ mg->k.input = BLK_STS_IOERR;
mg_complete(mg, false);
}
}
@@ -1907,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
/*
* Used by the batcher.
*/
-static int commit_op(void *context)
+static blk_status_t commit_op(void *context)
{
struct cache *cache = context;
if (dm_cache_changed_this_transaction(cache->cmd))
- return commit(cache, false);
+ return errno_to_blk_status(commit(cache, false));
return 0;
}
@@ -2018,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
bio_list_init(&cache->deferred_bios);
while ((bio = bio_list_pop(&bios))) {
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
}
}
@@ -2820,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
return r;
}
-static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int cache_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct cache *cache = ti->private;
unsigned long flags;
@@ -2838,7 +2840,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
bio_drop_shared_lock(cache, bio);
accounted_complete(cache, bio);
- return 0;
+ return DM_ENDIO_DONE;
}
static int write_dirty_bitset(struct cache *cache)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ebf9e72..9e1b72e 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -71,7 +71,7 @@ struct dm_crypt_io {
struct convert_context ctx;
atomic_t io_pending;
- int error;
+ blk_status_t error;
sector_t sector;
struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
/*
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
-static int crypt_convert(struct crypt_config *cc,
+static blk_status_t crypt_convert(struct crypt_config *cc,
struct convert_context *ctx)
{
unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
*/
case -EBADMSG:
atomic_dec(&ctx->cc_pending);
- return -EILSEQ;
+ return BLK_STS_PROTECTION;
/*
* There was an error while processing the request.
*/
default:
atomic_dec(&ctx->cc_pending);
- return -EIO;
+ return BLK_STS_IOERR;
}
}
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
struct bio *base_bio = io->base_bio;
- int error = io->error;
+ blk_status_t error = io->error;
if (!atomic_dec_and_test(&io->io_pending))
return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
else
kfree(io->integrity_metadata);
- base_bio->bi_error = error;
+ base_bio->bi_status = error;
bio_endio(base_bio);
}
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
struct dm_crypt_io *io = clone->bi_private;
struct crypt_config *cc = io->cc;
unsigned rw = bio_data_dir(clone);
- int error;
+ blk_status_t error;
/*
* free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
if (rw == WRITE)
crypt_free_buffer_pages(cc, clone);
- error = clone->bi_error;
+ error = clone->bi_status;
bio_put(clone);
if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
crypt_inc_pending(io);
if (kcryptd_io_read(io, GFP_NOIO))
- io->error = -ENOMEM;
+ io->error = BLK_STS_RESOURCE;
crypt_dec_pending(io);
}
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
sector_t sector;
struct rb_node **rbp, *parent;
- if (unlikely(io->error < 0)) {
+ if (unlikely(io->error)) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
struct bio *clone;
int crypt_finished;
sector_t sector = io->sector;
- int r;
+ blk_status_t r;
/*
* Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
if (unlikely(!clone)) {
- io->error = -EIO;
+ io->error = BLK_STS_IOERR;
goto dec;
}
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
crypt_inc_pending(io);
r = crypt_convert(cc, &io->ctx);
- if (r < 0)
+ if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
- int r = 0;
+ blk_status_t r;
crypt_inc_pending(io);
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
io->sector);
r = crypt_convert(cc, &io->ctx);
- if (r < 0)
+ if (r)
io->error = r;
if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
if (error == -EBADMSG) {
DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
(unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
- io->error = -EILSEQ;
+ io->error = BLK_STS_PROTECTION;
} else if (error < 0)
- io->error = -EIO;
+ io->error = BLK_STS_IOERR;
crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- cc->bs = bioset_create(MIN_IOS, 0);
+ cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER));
if (!cc->bs) {
ti->error = "Cannot allocate crypt bioset";
goto bad;
@@ -2795,10 +2796,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
* and is aligned to this size as defined in IO hints.
*/
if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
- return -EIO;
+ return DM_MAPIO_KILL;
if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
- return -EIO;
+ return DM_MAPIO_KILL;
io = dm_per_bio_data(bio, cc->per_bio_data_size);
crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 13305a1..3d04d5c 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -321,7 +321,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
if (bio_data_dir(bio) == READ) {
if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
!test_bit(ERROR_WRITES, &fc->flags))
- return -EIO;
+ return DM_MAPIO_KILL;
goto map_bio;
}
@@ -349,7 +349,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
/*
* By default, error all I/O.
*/
- return -EIO;
+ return DM_MAPIO_KILL;
}
map_bio:
@@ -358,12 +358,13 @@ map_bio:
return DM_MAPIO_REMAPPED;
}
-static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct flakey_c *fc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
- if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+ if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc)) {
/*
@@ -377,11 +378,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
* Error read during the down_interval if drop_writes
* and error_writes were not configured.
*/
- return -EIO;
+ *error = BLK_STS_IOERR;
}
}
- return error;
+ return DM_ENDIO_DONE;
}
static void flakey_status(struct dm_target *ti, status_type_t type,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 93b1810..1b224aa 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -246,7 +246,7 @@ struct dm_integrity_io {
unsigned metadata_offset;
atomic_t in_flight;
- int bi_error;
+ blk_status_t bi_status;
struct completion *completion;
@@ -1118,8 +1118,8 @@ static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *
static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
{
int r = dm_integrity_failed(ic);
- if (unlikely(r) && !bio->bi_error)
- bio->bi_error = r;
+ if (unlikely(r) && !bio->bi_status)
+ bio->bi_status = errno_to_blk_status(r);
bio_endio(bio);
}
@@ -1127,7 +1127,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
{
struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic)))
+ if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
submit_flush_bio(ic, dio);
else
do_endio(ic, bio);
@@ -1146,9 +1146,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- if (unlikely(dio->bi_error) && !bio->bi_error)
- bio->bi_error = dio->bi_error;
- if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
+ if (unlikely(dio->bi_status) && !bio->bi_status)
+ bio->bi_status = dio->bi_status;
+ if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
dio->range.logical_sector += dio->range.n_sectors;
bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1322,7 +1322,7 @@ skip_io:
dec_in_flight(dio);
return;
error:
- dio->bi_error = r;
+ dio->bi_status = errno_to_blk_status(r);
dec_in_flight(dio);
}
@@ -1335,7 +1335,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
sector_t area, offset;
dio->ic = ic;
- dio->bi_error = 0;
+ dio->bi_status = 0;
if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
submit_flush_bio(ic, dio);
@@ -1356,13 +1356,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
(unsigned long long)dio->range.logical_sector, bio_sectors(bio),
(unsigned long long)ic->provided_data_sectors);
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
ic->sectors_per_block,
(unsigned long long)dio->range.logical_sector, bio_sectors(bio));
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (ic->sectors_per_block > 1) {
@@ -1372,7 +1372,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
bv.bv_offset, bv.bv_len, ic->sectors_per_block);
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
}
@@ -1387,18 +1387,18 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
wanted_tag_size *= ic->tag_size;
if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
} else {
if (unlikely(bip != NULL)) {
DMERR("Unexpected integrity data when using internal hash");
- return -EIO;
+ return DM_MAPIO_KILL;
}
}
if (unlikely(ic->mode == 'R') && unlikely(dio->write))
- return -EIO;
+ return DM_MAPIO_KILL;
get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 8d5ca30..2503960 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
if (!client->pool)
goto bad;
- client->bios = bioset_create(min_ios, 0);
+ client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
+ BIOSET_NEED_RESCUER));
if (!client->bios)
goto bad;
@@ -124,7 +125,7 @@ static void complete_io(struct io *io)
fn(error_bits, context);
}
-static void dec_count(struct io *io, unsigned int region, int error)
+static void dec_count(struct io *io, unsigned int region, blk_status_t error)
{
if (error)
set_bit(region, &io->error_bits);
@@ -137,9 +138,9 @@ static void endio(struct bio *bio)
{
struct io *io;
unsigned region;
- int error;
+ blk_status_t error;
- if (bio->bi_error && bio_data_dir(bio) == READ)
+ if (bio->bi_status && bio_data_dir(bio) == READ)
zero_fill_bio(bio);
/*
@@ -147,7 +148,7 @@ static void endio(struct bio *bio)
*/
retrieve_io_and_region_from_bio(bio, &io, &region);
- error = bio->bi_error;
+ error = bio->bi_status;
bio_put(bio);
dec_count(io, region, error);
@@ -319,7 +320,7 @@ static void do_region(int op, int op_flags, unsigned region,
if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
atomic_inc(&io->count);
- dec_count(io, region, -EOPNOTSUPP);
+ dec_count(io, region, BLK_STS_NOTSUPP);
return;
}
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 4dfe386..a1da0eb 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
{
struct log_writes_c *lc = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
unsigned long flags;
- DMERR("Error writing log block, error=%d", bio->bi_error);
+ DMERR("Error writing log block, error=%d", bio->bi_status);
spin_lock_irqsave(&lc->blocks_lock, flags);
lc->logging_enabled = false;
spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -586,7 +586,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
spin_lock_irq(&lc->blocks_lock);
lc->logging_enabled = false;
spin_unlock_irq(&lc->blocks_lock);
- return -ENOMEM;
+ return DM_MAPIO_KILL;
}
INIT_LIST_HEAD(&block->list);
pb->block = block;
@@ -639,7 +639,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
spin_lock_irq(&lc->blocks_lock);
lc->logging_enabled = false;
spin_unlock_irq(&lc->blocks_lock);
- return -ENOMEM;
+ return DM_MAPIO_KILL;
}
src = kmap_atomic(bv.bv_page);
@@ -664,7 +664,8 @@ map_bio:
return DM_MAPIO_REMAPPED;
}
-static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int normal_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct log_writes_c *lc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +687,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
spin_unlock_irqrestore(&lc->blocks_lock, flags);
}
- return error;
+ return DM_ENDIO_DONE;
}
/*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3df056b..0e8ab5b 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -559,13 +559,13 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
return DM_MAPIO_REQUEUE;
dm_report_EIO(m);
- return -EIO;
+ return DM_MAPIO_KILL;
}
mpio->pgpath = pgpath;
mpio->nr_bytes = nr_bytes;
- bio->bi_error = 0;
+ bio->bi_status = 0;
bio->bi_bdev = pgpath->path.dev->bdev;
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
@@ -621,11 +621,19 @@ static void process_queued_bios(struct work_struct *work)
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
- if (r < 0 || r == DM_MAPIO_REQUEUE) {
- bio->bi_error = r;
+ switch (r) {
+ case DM_MAPIO_KILL:
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
- } else if (r == DM_MAPIO_REMAPPED)
+ break;
+ case DM_MAPIO_REMAPPED:
generic_make_request(bio);
+ break;
+ }
}
blk_finish_plug(&plug);
}
@@ -1442,22 +1450,15 @@ static void activate_path_work(struct work_struct *work)
activate_or_offline_path(pgpath);
}
-static int noretry_error(int error)
+static int noretry_error(blk_status_t error)
{
switch (error) {
- case -EBADE:
- /*
- * EBADE signals an reservation conflict.
- * We shouldn't fail the path here as we can communicate with
- * the target. We should failover to the next path, but in
- * doing so we might be causing a ping-pong between paths.
- * So just return the reservation conflict error.
- */
- case -EOPNOTSUPP:
- case -EREMOTEIO:
- case -EILSEQ:
- case -ENODATA:
- case -ENOSPC:
+ case BLK_STS_NOTSUPP:
+ case BLK_STS_NOSPC:
+ case BLK_STS_TARGET:
+ case BLK_STS_NEXUS:
+ case BLK_STS_MEDIUM:
+ case BLK_STS_RESOURCE:
return 1;
}
@@ -1466,7 +1467,7 @@ static int noretry_error(int error)
}
static int multipath_end_io(struct dm_target *ti, struct request *clone,
- int error, union map_info *map_context)
+ blk_status_t error, union map_info *map_context)
{
struct dm_mpath_io *mpio = get_mpio(map_context);
struct pgpath *pgpath = mpio->pgpath;
@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
- if (error == -EIO)
+ if (error == BLK_STS_IOERR)
dm_report_EIO(m);
/* complete with the original error */
r = DM_ENDIO_DONE;
@@ -1510,24 +1511,26 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
return r;
}
-static int do_end_io_bio(struct multipath *m, struct bio *clone,
- int error, struct dm_mpath_io *mpio)
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+ blk_status_t *error)
{
+ struct multipath *m = ti->private;
+ struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+ struct pgpath *pgpath = mpio->pgpath;
unsigned long flags;
+ int r = DM_ENDIO_DONE;
- if (!error)
- return 0; /* I/O complete */
-
- if (noretry_error(error))
- return error;
+ if (!*error || noretry_error(*error))
+ goto done;
- if (mpio->pgpath)
- fail_path(mpio->pgpath);
+ if (pgpath)
+ fail_path(pgpath);
if (atomic_read(&m->nr_valid_paths) == 0 &&
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
dm_report_EIO(m);
- return -EIO;
+ *error = BLK_STS_IOERR;
+ goto done;
}
/* Queue for the daemon to resubmit */
@@ -1539,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
queue_work(kmultipathd, &m->process_queued_bios);
- return DM_ENDIO_INCOMPLETE;
-}
-
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
-{
- struct multipath *m = ti->private;
- struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
- struct pgpath *pgpath;
- struct path_selector *ps;
- int r;
-
- BUG_ON(!mpio);
-
- r = do_end_io_bio(m, clone, error, mpio);
- pgpath = mpio->pgpath;
+ r = DM_ENDIO_INCOMPLETE;
+done:
if (pgpath) {
- ps = &pgpath->pg->ps;
+ struct path_selector *ps = &pgpath->pg->ps;
+
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4da8858..a4fbd91 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -491,9 +491,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
* If device is suspended, complete the bio.
*/
if (dm_noflush_suspending(ms->ti))
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
else
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
@@ -627,7 +627,7 @@ static void write_callback(unsigned long error, void *context)
* degrade the array.
*/
if (bio_op(bio) == REQ_OP_DISCARD) {
- bio->bi_error = -EOPNOTSUPP;
+ bio->bi_status = BLK_STS_NOTSUPP;
bio_endio(bio);
return;
}
@@ -1210,14 +1210,14 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
if (r < 0 && r != -EWOULDBLOCK)
- return r;
+ return DM_MAPIO_KILL;
/*
* If region is not in-sync queue the bio.
*/
if (!r || (r == -EWOULDBLOCK)) {
if (bio->bi_opf & REQ_RAHEAD)
- return -EWOULDBLOCK;
+ return DM_MAPIO_KILL;
queue_bio(ms, bio, rw);
return DM_MAPIO_SUBMITTED;
@@ -1229,7 +1229,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
*/
m = choose_mirror(ms, bio->bi_iter.bi_sector);
if (unlikely(!m))
- return -EIO;
+ return DM_MAPIO_KILL;
dm_bio_record(&bio_record->details, bio);
bio_record->m = m;
@@ -1239,7 +1239,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
-static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
int rw = bio_data_dir(bio);
struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1255,16 +1256,16 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
if (!(bio->bi_opf & REQ_PREFLUSH) &&
bio_op(bio) != REQ_OP_DISCARD)
dm_rh_dec(ms->rh, bio_record->write_region);
- return error;
+ return DM_ENDIO_DONE;
}
- if (error == -EOPNOTSUPP)
+ if (*error == BLK_STS_NOTSUPP)
goto out;
- if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
+ if (bio->bi_opf & REQ_RAHEAD)
goto out;
- if (unlikely(error)) {
+ if (unlikely(*error)) {
if (!bio_record->details.bi_bdev) {
/*
* There wasn't enough memory to record necessary
@@ -1272,7 +1273,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
* mirror in-sync.
*/
DMERR_LIMIT("Mirror read failed.");
- return -EIO;
+ return DM_ENDIO_DONE;
}
m = bio_record->m;
@@ -1291,7 +1292,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
dm_bio_restore(bd, bio);
bio_record->details.bi_bdev = NULL;
- bio->bi_error = 0;
+ bio->bi_status = 0;
queue_bio(ms, bio, rw);
return DM_ENDIO_INCOMPLETE;
@@ -1302,7 +1303,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
out:
bio_record->details.bi_bdev = NULL;
- return error;
+ return DM_ENDIO_DONE;
}
static void mirror_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b639fa7..c6ebc5b 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
static void dm_mq_start_queue(struct request_queue *q)
{
- blk_mq_start_stopped_hw_queues(q, true);
+ blk_mq_unquiesce_queue(q);
blk_mq_kick_requeue_list(q);
}
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
struct dm_rq_target_io *tio = info->tio;
struct bio *bio = info->orig;
unsigned int nr_bytes = info->orig->bi_iter.bi_size;
- int error = clone->bi_error;
+ blk_status_t error = clone->bi_status;
bio_put(clone);
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
* Do not use blk_end_request() here, because it may complete
* the original request before the clone, and break the ordering.
*/
- blk_update_request(tio->orig, 0, nr_bytes);
+ blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
}
static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
* Must be called without clone's queue lock held,
* see end_clone_request() for more details.
*/
-static void dm_end_request(struct request *clone, int error)
+static void dm_end_request(struct request *clone, blk_status_t error)
{
int rw = rq_data_dir(clone);
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
rq_completed(md, rw, false);
}
-static void dm_done(struct request *clone, int error, bool mapped)
+static void dm_done(struct request *clone, blk_status_t error, bool mapped)
{
int r = DM_ENDIO_DONE;
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
r = rq_end_io(tio->ti, clone, error, &tio->info);
}
- if (unlikely(error == -EREMOTEIO)) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (req_op(clone) == REQ_OP_WRITE_SAME &&
!clone->q->limits.max_write_same_sectors)
disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
* Complete the clone and the original request with the error status
* through softirq context.
*/
-static void dm_complete_request(struct request *rq, int error)
+static void dm_complete_request(struct request *rq, blk_status_t error)
{
struct dm_rq_target_io *tio = tio_from_request(rq);
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
* Target's rq_end_io() function isn't called.
* This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
*/
-static void dm_kill_unmapped_request(struct request *rq, int error)
+static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
{
rq->rq_flags |= RQF_FAILED;
dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
/*
* Called with the clone's queue lock held (in the case of .request_fn)
*/
-static void end_clone_request(struct request *clone, int error)
+static void end_clone_request(struct request *clone, blk_status_t error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
{
- int r;
+ blk_status_t r;
if (blk_queue_io_stat(clone->q))
clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,7 @@ static int map_request(struct dm_rq_target_io *tio)
break;
case DM_MAPIO_KILL:
/* The target wants to complete the I/O */
- dm_kill_unmapped_request(rq, -EIO);
+ dm_kill_unmapped_request(rq, BLK_STS_IOERR);
break;
default:
DMWARN("unimplemented target map return value: %d", r);
@@ -727,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
return __dm_rq_init_rq(set->driver_data, rq);
}
-static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *rq = bd->rq;
@@ -744,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
}
if (ti->type->busy && ti->type->busy(ti))
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
dm_start_request(md, rq);
@@ -762,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
rq_end_stats(md, rq);
rq_completed(md, rq_data_dir(rq), false);
blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static const struct blk_mq_ops dm_mq_ops = {
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d2..9813922 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
struct dm_target *ti;
struct request *orig, *clone;
struct kthread_work work;
- int error;
+ blk_status_t error;
union map_info info;
struct dm_stats_aux stats_aux;
unsigned long duration_jiffies;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e152d98..1ba4104 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
{
void *callback_data = bio->bi_private;
- dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0);
+ dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
}
static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1690,7 +1690,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
if (!s->valid)
- return -EIO;
+ return DM_MAPIO_KILL;
/* FIXME: should only take write lock if we need
* to copy an exception */
@@ -1698,7 +1698,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid || (unlikely(s->snapshot_overflowed) &&
bio_data_dir(bio) == WRITE)) {
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
@@ -1723,7 +1723,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid || s->snapshot_overflowed) {
free_pending_exception(pe);
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
@@ -1741,7 +1741,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
DMERR("Snapshot overflowed: Unable to allocate exception.");
} else
__invalidate_snapshot(s, -ENOMEM);
- r = -EIO;
+ r = DM_MAPIO_KILL;
goto out_unlock;
}
}
@@ -1851,14 +1851,15 @@ out_unlock:
return r;
}
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
struct dm_snapshot *s = ti->private;
if (is_bio_tracked(bio))
stop_tracking_chunk(s, bio);
- return 0;
+ return DM_ENDIO_DONE;
}
static void snapshot_merge_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 7515248..11621a0 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,20 +375,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
}
}
-static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio,
+ blk_status_t *error)
{
unsigned i;
char major_minor[16];
struct stripe_c *sc = ti->private;
- if (!error)
- return 0; /* I/O complete */
+ if (!*error)
+ return DM_ENDIO_DONE; /* I/O complete */
- if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
- return error;
+ if (bio->bi_opf & REQ_RAHEAD)
+ return DM_ENDIO_DONE;
- if (error == -EOPNOTSUPP)
- return error;
+ if (*error == BLK_STS_NOTSUPP)
+ return DM_ENDIO_DONE;
memset(major_minor, 0, sizeof(major_minor));
sprintf(major_minor, "%d:%d",
@@ -409,7 +410,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
schedule_work(&sc->trigger_event);
}
- return error;
+ return DM_ENDIO_DONE;
}
static int stripe_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index b242b75..c0d7e60 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -128,7 +128,7 @@ static void io_err_dtr(struct dm_target *tt)
static int io_err_map(struct dm_target *tt, struct bio *bio)
{
- return -EIO;
+ return DM_MAPIO_KILL;
}
static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 28808e5..9dec2f8 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
* Even if r is set, there could be sub discards in flight that we
* need to wait for.
*/
- if (r && !op->parent_bio->bi_error)
- op->parent_bio->bi_error = r;
+ if (r && !op->parent_bio->bi_status)
+ op->parent_bio->bi_status = errno_to_blk_status(r);
bio_endio(op->parent_bio);
}
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
}
static void cell_error_with_code(struct pool *pool,
- struct dm_bio_prison_cell *cell, int error_code)
+ struct dm_bio_prison_cell *cell, blk_status_t error_code)
{
dm_cell_error(pool->prison, cell, error_code);
dm_bio_prison_free_cell(pool->prison, cell);
}
-static int get_pool_io_error_code(struct pool *pool)
+static blk_status_t get_pool_io_error_code(struct pool *pool)
{
- return pool->out_of_data_space ? -ENOSPC : -EIO;
+ return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
}
static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
{
- int error = get_pool_io_error_code(pool);
-
- cell_error_with_code(pool, cell, error);
+ cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
}
static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
{
- cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE);
+ cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
}
/*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
bio_list_init(master);
}
-static void error_bio_list(struct bio_list *bios, int error)
+static void error_bio_list(struct bio_list *bios, blk_status_t error)
{
struct bio *bio;
while ((bio = bio_list_pop(bios))) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
}
}
-static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
+static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
+ blk_status_t error)
{
struct bio_list bios;
unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
__merge_bio_list(&bios, &tc->retry_on_resume_list);
spin_unlock_irqrestore(&tc->lock, flags);
- error_bio_list(&bios, DM_ENDIO_REQUEUE);
+ error_bio_list(&bios, BLK_STS_DM_REQUEUE);
requeue_deferred_cells(tc);
}
-static void error_retry_list_with_code(struct pool *pool, int error)
+static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
{
struct thin_c *tc;
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
static void error_retry_list(struct pool *pool)
{
- int error = get_pool_io_error_code(pool);
-
- error_retry_list_with_code(pool, error);
+ error_retry_list_with_code(pool, get_pool_io_error_code(pool));
}
/*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
*/
atomic_t prepare_actions;
- int err;
+ blk_status_t status;
struct thin_c *tc;
dm_block_t virt_begin, virt_end;
dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
{
struct dm_thin_new_mapping *m = context;
- m->err = read_err || write_err ? -EIO : 0;
+ m->status = read_err || write_err ? BLK_STS_IOERR : 0;
complete_mapping_preparation(m);
}
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
bio->bi_end_io = m->saved_bi_end_io;
- m->err = bio->bi_error;
+ m->status = bio->bi_status;
complete_mapping_preparation(m);
}
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
struct bio *bio = m->bio;
int r;
- if (m->err) {
+ if (m->status) {
cell_error(pool, m->cell);
goto out;
}
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
spin_unlock_irqrestore(&tc->lock, flags);
}
-static int should_error_unserviceable_bio(struct pool *pool)
+static blk_status_t should_error_unserviceable_bio(struct pool *pool)
{
enum pool_mode m = get_pool_mode(pool);
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
case PM_WRITE:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
- return -EIO;
+ return BLK_STS_IOERR;
case PM_OUT_OF_DATA_SPACE:
- return pool->pf.error_if_no_space ? -ENOSPC : 0;
+ return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
case PM_READ_ONLY:
case PM_FAIL:
- return -EIO;
+ return BLK_STS_IOERR;
default:
/* Shouldn't get here */
DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
- return -EIO;
+ return BLK_STS_IOERR;
}
}
static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
{
- int error = should_error_unserviceable_bio(pool);
+ blk_status_t error = should_error_unserviceable_bio(pool);
if (error) {
- bio->bi_error = error;
+ bio->bi_status = error;
bio_endio(bio);
} else
retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
{
struct bio *bio;
struct bio_list bios;
- int error;
+ blk_status_t error;
error = should_error_unserviceable_bio(pool);
if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
unsigned count = 0;
if (tc->requeue_mode) {
- error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE);
+ error_thin_bio_list(tc, &tc->deferred_bio_list,
+ BLK_STS_DM_REQUEUE);
return;
}
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
pool->pf.error_if_no_space = true;
notify_of_pool_mode_change_to_oods(pool);
- error_retry_list_with_code(pool, -ENOSPC);
+ error_retry_list_with_code(pool, BLK_STS_NOSPC);
}
}
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
thin_hook_bio(tc, bio);
if (tc->requeue_mode) {
- bio->bi_error = DM_ENDIO_REQUEUE;
+ bio->bi_status = BLK_STS_DM_REQUEUE;
bio_endio(bio);
return DM_MAPIO_SUBMITTED;
}
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
return thin_bio_map(ti, bio);
}
-static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
+static int thin_endio(struct dm_target *ti, struct bio *bio,
+ blk_status_t *err)
{
unsigned long flags;
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4211,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
if (h->cell)
cell_defer_no_holder(h->tc, h->cell);
- return 0;
+ return DM_ENDIO_DONE;
}
static void thin_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 1ec9b2c..b46705e 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
/*
* End one "io" structure with a given error.
*/
-static void verity_finish_io(struct dm_verity_io *io, int error)
+static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
{
struct dm_verity *v = io->v;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
bio->bi_end_io = io->orig_bi_end_io;
- bio->bi_error = error;
+ bio->bi_status = status;
verity_fec_finish_io(io);
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
{
struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
- verity_finish_io(io, verity_verify_io(io));
+ verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
}
static void verity_end_io(struct bio *bio)
{
struct dm_verity_io *io = bio->bi_private;
- if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
- verity_finish_io(io, bio->bi_error);
+ if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+ verity_finish_io(io, bio->bi_status);
return;
}
@@ -643,17 +643,17 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
DMERR_LIMIT("unaligned io");
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (bio_end_sector(bio) >>
(v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
DMERR_LIMIT("io out of range");
- return -EIO;
+ return DM_MAPIO_KILL;
}
if (bio_data_dir(bio) == WRITE)
- return -EIO;
+ return DM_MAPIO_KILL;
io = dm_per_bio_data(bio, ti->per_io_data_size);
io->v = v;
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b616f11..b65ca8d 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -39,7 +39,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
case REQ_OP_READ:
if (bio->bi_opf & REQ_RAHEAD) {
/* readahead of null bytes only wastes buffer cache */
- return -EIO;
+ return DM_MAPIO_KILL;
}
zero_fill_bio(bio);
break;
@@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
/* writes get silently dropped */
break;
default:
- return -EIO;
+ return DM_MAPIO_KILL;
}
bio_endio(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 37ccd73..4029460 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
*/
struct dm_io {
struct mapped_device *md;
- int error;
+ blk_status_t status;
atomic_t io_count;
struct bio *bio;
unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
*/
-static void dec_pending(struct dm_io *io, int error)
+static void dec_pending(struct dm_io *io, blk_status_t error)
{
unsigned long flags;
- int io_error;
+ blk_status_t io_error;
struct bio *bio;
struct mapped_device *md = io->md;
/* Push-back supersedes any I/O errors */
if (unlikely(error)) {
spin_lock_irqsave(&io->endio_lock, flags);
- if (!(io->error > 0 && __noflush_suspending(md)))
- io->error = error;
+ if (!(io->status == BLK_STS_DM_REQUEUE &&
+ __noflush_suspending(md)))
+ io->status = error;
spin_unlock_irqrestore(&io->endio_lock, flags);
}
if (atomic_dec_and_test(&io->io_count)) {
- if (io->error == DM_ENDIO_REQUEUE) {
+ if (io->status == BLK_STS_DM_REQUEUE) {
/*
* Target requested pushing back the I/O.
*/
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
bio_list_add_head(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
- io->error = -EIO;
+ io->status = BLK_STS_IOERR;
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
- io_error = io->error;
+ io_error = io->status;
bio = io->bio;
end_io_acct(io);
free_io(md, io);
- if (io_error == DM_ENDIO_REQUEUE)
+ if (io_error == BLK_STS_DM_REQUEUE)
return;
if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- bio->bi_error = io_error;
+ bio->bi_status = io_error;
bio_endio(bio);
}
}
@@ -838,31 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
static void clone_endio(struct bio *bio)
{
- int error = bio->bi_error;
- int r = error;
+ blk_status_t error = bio->bi_status;
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
- if (endio) {
- r = endio(tio->ti, bio, error);
- if (r < 0 || r == DM_ENDIO_REQUEUE)
- /*
- * error and requeue request are handled
- * in dec_pending().
- */
- error = r;
- else if (r == DM_ENDIO_INCOMPLETE)
- /* The target will handle the io */
- return;
- else if (r) {
- DMWARN("unimplemented target endio return value: %d", r);
- BUG();
- }
- }
-
- if (unlikely(r == -EREMOTEIO)) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_WRITE_SAME &&
!bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
disable_write_zeroes(md);
}
+ if (endio) {
+ int r = endio(tio->ti, bio, &error);
+ switch (r) {
+ case DM_ENDIO_REQUEUE:
+ error = BLK_STS_DM_REQUEUE;
+ /*FALLTHRU*/
+ case DM_ENDIO_DONE:
+ break;
+ case DM_ENDIO_INCOMPLETE:
+ /* The target will handle the io */
+ return;
+ default:
+ DMWARN("unimplemented target endio return value: %d", r);
+ BUG();
+ }
+ }
+
free_tio(tio);
dec_pending(io, error);
}
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
while ((bio = bio_list_pop(&list))) {
struct bio_set *bs = bio->bi_pool;
- if (unlikely(!bs) || bs == fs_bio_set) {
+ if (unlikely(!bs) || bs == fs_bio_set ||
+ !bs->rescue_workqueue) {
bio_list_add(&current->bio_list[i], bio);
continue;
}
@@ -1084,18 +1085,24 @@ static void __map_bio(struct dm_target_io *tio)
r = ti->type->map(ti, clone);
dm_offload_end(&o);
- if (r == DM_MAPIO_REMAPPED) {
+ switch (r) {
+ case DM_MAPIO_SUBMITTED:
+ break;
+ case DM_MAPIO_REMAPPED:
/* the bio has been remapped so dispatch it */
-
trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
tio->io->bio->bi_bdev->bd_dev, sector);
-
generic_make_request(clone);
- } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
- /* error the io and bail out, or requeue it if needed */
- dec_pending(tio->io, r);
+ break;
+ case DM_MAPIO_KILL:
+ dec_pending(tio->io, BLK_STS_IOERR);
+ free_tio(tio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ dec_pending(tio->io, BLK_STS_DM_REQUEUE);
free_tio(tio);
- } else if (r != DM_MAPIO_SUBMITTED) {
+ break;
+ default:
DMWARN("unimplemented target map return value: %d", r);
BUG();
}
@@ -1360,7 +1367,7 @@ static void __split_and_process_bio(struct mapped_device *md,
ci.map = map;
ci.md = md;
ci.io = alloc_io(md);
- ci.io->error = 0;
+ ci.io->status = 0;
atomic_set(&ci.io->io_count, 1);
ci.io->bio = bio;
ci.io->md = md;
@@ -1527,7 +1534,6 @@ void dm_init_normal_md_queue(struct mapped_device *md)
* Initialize aspects of queue that aren't relevant for blk-mq
*/
md->queue->backing_dev_info->congested_fn = dm_any_congested;
- blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
}
static void cleanup_mapped_device(struct mapped_device *md)
@@ -2654,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
BUG();
}
- pools->bs = bioset_create_nobvec(pool_size, front_pad);
+ pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
if (!pools->bs)
goto out;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 84e76eb..31bcbfb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
static bool create_on_open = true;
/* bio_clone_mddev
- * like bio_clone, but with a local bio set
+ * like bio_clone_bioset, but with a local bio set
*/
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
unsigned int sectors;
int cpu;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (mddev == NULL || mddev->pers == NULL) {
bio_io_error(bio);
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
}
if (mddev->ro == 1 && unlikely(rw == WRITE)) {
if (bio_sectors(bio) != 0)
- bio->bi_error = -EROFS;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
struct md_rdev *rdev = bio->bi_private;
struct mddev *mddev = rdev->mddev;
- if (bio->bi_error) {
- pr_err("md: super_written gets error=%d\n", bio->bi_error);
+ if (bio->bi_status) {
+ pr_err("md: super_written gets error=%d\n", bio->bi_status);
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
submit_bio_wait(bio);
- ret = !bio->bi_error;
+ ret = !bio->bi_status;
bio_put(bio);
return ret;
}
@@ -5428,7 +5428,7 @@ int md_run(struct mddev *mddev)
}
if (mddev->bio_set == NULL) {
- mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
+ mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!mddev->bio_set)
return -ENOMEM;
}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521..68d036e 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
* operation and are ready to return a success/failure code to the buffer
* cache layer.
*/
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
+static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
{
struct bio *bio = mp_bh->master_bio;
struct mpconf *conf = mp_bh->mddev->private;
- bio->bi_error = err;
+ bio->bi_status = status;
bio_endio(bio);
mempool_free(mp_bh, conf->pool);
}
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
struct mpconf *conf = mp_bh->mddev->private;
struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
- if (!bio->bi_error)
+ if (!bio->bi_status)
multipath_end_bh_io(mp_bh, 0);
else if (!(bio->bi_opf & REQ_RAHEAD)) {
/*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
(unsigned long long)bio->bi_iter.bi_sector);
multipath_reschedule_retry(mp_bh);
} else
- multipath_end_bh_io(mp_bh, bio->bi_error);
+ multipath_end_bh_io(mp_bh, bio->bi_status);
rdev_dec_pending(rdev, conf->mddev);
}
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
bdevname(bio->bi_bdev,b),
(unsigned long long)bio->bi_iter.bi_sector);
- multipath_end_bh_io(mp_bh, -EIO);
+ multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
} else {
pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e1a7e3d..98ca2c1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
struct r1conf *conf = r1_bio->mddev->private;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
static void raid1_end_read_request(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r1bio *r1_bio = bio->bi_private;
struct r1conf *conf = r1_bio->mddev->private;
struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
struct md_rdev *rdev = conf->mirrors[mirror].rdev;
bool discard_error;
- discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+ discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
/*
* 'one mirror IO has finished' event handler:
*/
- if (bio->bi_error && !discard_error) {
+ if (bio->bi_status && !discard_error) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED, &
@@ -802,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1856,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
* or re-read if the read failed.
* We don't do much here, just schedule handling by raid1d
*/
- if (!bio->bi_error)
+ if (!bio->bi_status)
set_bit(R1BIO_Uptodate, &r1_bio->state);
if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1865,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
static void end_sync_write(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r1bio *r1_bio = get_resync_r1bio(bio);
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
@@ -2058,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
idx ++;
}
set_bit(R1BIO_Uptodate, &r1_bio->state);
- bio->bi_error = 0;
+ bio->bi_status = 0;
return 1;
}
@@ -2082,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
for (i = 0; i < conf->raid_disks * 2; i++) {
int j;
int size;
- int error;
+ blk_status_t status;
struct bio_vec *bi;
struct bio *b = r1_bio->bios[i];
struct resync_pages *rp = get_resync_pages(b);
if (b->bi_end_io != end_sync_read)
continue;
/* fixup the bio for reuse, but preserve errno */
- error = b->bi_error;
+ status = b->bi_status;
bio_reset(b);
- b->bi_error = error;
+ b->bi_status = status;
b->bi_vcnt = vcnt;
b->bi_iter.bi_size = r1_bio->sectors << 9;
b->bi_iter.bi_sector = r1_bio->sector +
@@ -2113,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
}
for (primary = 0; primary < conf->raid_disks * 2; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
- !r1_bio->bios[primary]->bi_error) {
+ !r1_bio->bios[primary]->bi_status) {
r1_bio->bios[primary]->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
break;
@@ -2123,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
int j;
struct bio *pbio = r1_bio->bios[primary];
struct bio *sbio = r1_bio->bios[i];
- int error = sbio->bi_error;
+ blk_status_t status = sbio->bi_status;
struct page **ppages = get_resync_pages(pbio)->pages;
struct page **spages = get_resync_pages(sbio)->pages;
struct bio_vec *bi;
@@ -2132,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
if (sbio->bi_end_io != end_sync_read)
continue;
/* Now we can 'fixup' the error value */
- sbio->bi_error = 0;
+ sbio->bi_status = 0;
bio_for_each_segment_all(bi, sbio, j)
page_len[j] = bi->bv_len;
- if (!error) {
+ if (!status) {
for (j = vcnt; j-- ; ) {
if (memcmp(page_address(ppages[j]),
page_address(spages[j]),
@@ -2149,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
if (j >= 0)
atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
- && !error)) {
+ && !status)) {
/* No need to write to this device. */
sbio->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2400,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
struct bio *bio = r1_bio->bios[m];
if (bio->bi_end_io == NULL)
continue;
- if (!bio->bi_error &&
+ if (!bio->bi_status &&
test_bit(R1BIO_MadeGood, &r1_bio->state)) {
rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
}
- if (bio->bi_error &&
+ if (bio->bi_status &&
test_bit(R1BIO_WriteError, &r1_bio->state)) {
if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
md_error(conf->mddev, rdev);
@@ -2955,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
if (!conf->r1bio_pool)
goto abort;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto abort;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 797ed60..57a250f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
struct r10conf *conf = r10_bio->mddev->private;
if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
static void raid10_end_read_request(struct bio *bio)
{
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct r10bio *r10_bio = bio->bi_private;
int slot, dev;
struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
struct bio *to_put = NULL;
bool discard_error;
- discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
+ discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
- if (bio->bi_error && !discard_error) {
+ if (bio->bi_status && !discard_error) {
if (repl)
/* Never record new bad blocks to replacement,
* just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1888,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
{
struct r10conf *conf = r10_bio->mddev->private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
set_bit(R10BIO_Uptodate, &r10_bio->state);
else
/* The write handler will notice the lack of
@@ -1972,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
else
rdev = conf->mirrors[d].rdev;
- if (bio->bi_error) {
+ if (bio->bi_status) {
if (repl)
md_error(mddev, rdev);
else {
@@ -2021,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
/* find the first device with a block */
for (i=0; i<conf->copies; i++)
- if (!r10_bio->devs[i].bio->bi_error)
+ if (!r10_bio->devs[i].bio->bi_status)
break;
if (i == conf->copies)
@@ -2050,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
tpages = get_resync_pages(tbio)->pages;
d = r10_bio->devs[i].devnum;
rdev = conf->mirrors[d].rdev;
- if (!r10_bio->devs[i].bio->bi_error) {
+ if (!r10_bio->devs[i].bio->bi_status) {
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
* All vec entries are PAGE_SIZE;
@@ -2633,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev = conf->mirrors[dev].rdev;
if (r10_bio->devs[m].bio == NULL)
continue;
- if (!r10_bio->devs[m].bio->bi_error) {
+ if (!r10_bio->devs[m].bio->bi_status) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2649,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
if (r10_bio->devs[m].repl_bio == NULL)
continue;
- if (!r10_bio->devs[m].repl_bio->bi_error) {
+ if (!r10_bio->devs[m].repl_bio->bi_status) {
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
@@ -2675,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
r10_bio->devs[m].addr,
r10_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev);
- } else if (bio != NULL && bio->bi_error) {
+ } else if (bio != NULL && bio->bi_status) {
fail = true;
if (!narrow_write_error(r10_bio, m)) {
md_error(conf->mddev, rdev);
@@ -3267,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
bio = r10_bio->devs[i].bio;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
rcu_read_lock();
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3309,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
/* Need to set up for writing to the replacement */
bio = r10_bio->devs[i].repl_bio;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
sector = r10_bio->devs[i].addr;
bio->bi_next = biolist;
@@ -3375,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(bio->bi_bdev, nr_sectors);
- bio->bi_error = 0;
+ bio->bi_status = 0;
generic_make_request(bio);
}
}
@@ -3552,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
if (!conf->r10bio_pool)
goto out;
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto out;
@@ -4397,7 +4397,7 @@ read_more:
read_bio->bi_end_io = end_reshape_read;
bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
- read_bio->bi_error = 0;
+ read_bio->bi_status = 0;
read_bio->bi_vcnt = 0;
read_bio->bi_iter.bi_size = 0;
r10_bio->master_bio = read_bio;
@@ -4641,7 +4641,7 @@ static void end_reshape_write(struct bio *bio)
rdev = conf->mirrors[d].rdev;
}
- if (bio->bi_error) {
+ if (bio->bi_status) {
/* FIXME should record badblock */
md_error(mddev, rdev);
}
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0a7af8b..bfa1e90 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -572,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
struct r5l_log *log = io->log;
unsigned long flags;
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
bio_put(bio);
@@ -1247,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
unsigned long flags;
struct r5l_io_unit *io;
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(log->rdev->mddev, log->rdev);
spin_lock_irqsave(&log->io_list_lock, flags);
@@ -3063,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
if (!log->io_pool)
goto io_pool;
- log->bs = bioset_create(R5L_POOL_SIZE, 0);
+ log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!log->bs)
goto io_bs;
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index ccce92e..77cce35 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
pr_debug("%s: seq: %llu\n", __func__, io->seq);
- if (bio->bi_error)
+ if (bio->bi_status)
md_error(ppl_conf->mddev, log->rdev);
list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
goto err;
}
- ppl_conf->bs = bioset_create(conf->raid_disks, 0);
+ ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
if (!ppl_conf->bs) {
ret = -ENOMEM;
goto err;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ec0f951..62c965b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2476,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
- bi->bi_error);
+ bi->bi_status);
if (i == disks) {
bio_reset(bi);
BUG();
@@ -2496,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
s = sh->sector + rdev->new_data_offset;
else
s = sh->sector + rdev->data_offset;
- if (!bi->bi_error) {
+ if (!bi->bi_status) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
/* Note that this cannot happen on a
@@ -2613,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
}
pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
(unsigned long long)sh->sector, i, atomic_read(&sh->count),
- bi->bi_error);
+ bi->bi_status);
if (i == disks) {
bio_reset(bi);
BUG();
@@ -2621,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
}
if (replacement) {
- if (bi->bi_error)
+ if (bi->bi_status)
md_error(conf->mddev, rdev);
else if (is_badblock(rdev, sh->sector,
STRIPE_SECTORS,
&first_bad, &bad_sectors))
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
- if (bi->bi_error) {
+ if (bi->bi_status) {
set_bit(STRIPE_DEGRADED, &sh->state);
set_bit(WriteErrorSeen, &rdev->flags);
set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2649,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
}
rdev_dec_pending(rdev, conf->mddev);
- if (sh->batch_head && bi->bi_error && !replacement)
+ if (sh->batch_head && bi->bi_status && !replacement)
set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
bio_reset(bi);
@@ -3381,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev);
bio_endio(bi);
bi = nextbi;
@@ -3403,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev);
bio_endio(bi);
bi = bi2;
@@ -3429,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector);
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
bio_endio(bi);
bi = nextbi;
}
@@ -5154,7 +5154,7 @@ static void raid5_align_endio(struct bio *bi)
struct mddev *mddev;
struct r5conf *conf;
struct md_rdev *rdev;
- int error = bi->bi_error;
+ blk_status_t error = bi->bi_status;
bio_put(bi);
@@ -5731,7 +5731,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
release_stripe_plug(mddev, sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
- bi->bi_error = -EIO;
+ bi->bi_status = BLK_STS_IOERR;
break;
}
}
@@ -6943,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
}
- conf->bio_split = bioset_create(BIO_POOL_SIZE, 0);
+ conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
if (!conf->bio_split)
goto abort;
conf->mddev = mddev;
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 99e651c..22de7f5 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
spin_lock_irqsave(&msb->q_lock, flags);
if (len)
- if (!__blk_end_request(msb->req, 0, len))
+ if (!__blk_end_request(msb->req, BLK_STS_OK, len))
msb->req = NULL;
if (error && msb->req) {
+ blk_status_t ret = errno_to_blk_status(error);
dbg_verbose("IO: ending one sector of the request with error");
- if (!__blk_end_request(msb->req, error, msb->page_size))
+ if (!__blk_end_request(msb->req, ret, msb->page_size))
msb->req = NULL;
}
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
WARN_ON(!msb->io_queue_stopped);
while ((req = blk_fetch_request(q)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c00d8a2..8897962 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -709,7 +709,8 @@ try_again:
msb->req_sg);
if (!msb->seg_count) {
- chunk = __blk_end_request_cur(msb->block_req, -ENOMEM);
+ chunk = __blk_end_request_cur(msb->block_req,
+ BLK_STS_RESOURCE);
continue;
}
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
if (error && !t_len)
t_len = blk_rq_cur_bytes(msb->block_req);
- chunk = __blk_end_request(msb->block_req, error, t_len);
+ chunk = __blk_end_request(msb->block_req,
+ errno_to_blk_status(error), t_len);
error = mspro_block_issue_req(card, chunk);
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
if (msb->eject) {
while ((req = blk_fetch_request(q)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b07..6ff94a9 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
struct mmc_card *card = md->queue.card;
unsigned int from, nr, arg;
int err = 0, type = MMC_BLK_DISCARD;
+ blk_status_t status = BLK_STS_OK;
if (!mmc_can_erase(card)) {
- err = -EOPNOTSUPP;
+ status = BLK_STS_NOTSUPP;
goto fail;
}
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
if (!err)
err = mmc_erase(card, from, nr, arg);
} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
- if (!err)
+ if (err)
+ status = BLK_STS_IOERR;
+ else
mmc_blk_reset_success(md, type);
fail:
- blk_end_request(req, err, blk_rq_bytes(req));
+ blk_end_request(req, status, blk_rq_bytes(req));
}
static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
struct mmc_card *card = md->queue.card;
unsigned int from, nr, arg;
int err = 0, type = MMC_BLK_SECDISCARD;
+ blk_status_t status = BLK_STS_OK;
if (!(mmc_can_secure_erase_trim(card))) {
- err = -EOPNOTSUPP;
+ status = BLK_STS_NOTSUPP;
goto out;
}
@@ -1254,8 +1258,10 @@ retry:
err = mmc_erase(card, from, nr, arg);
if (err == -EIO)
goto out_retry;
- if (err)
+ if (err) {
+ status = BLK_STS_IOERR;
goto out;
+ }
if (arg == MMC_SECURE_TRIM1_ARG) {
if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ retry:
err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
if (err == -EIO)
goto out_retry;
- if (err)
+ if (err) {
+ status = BLK_STS_IOERR;
goto out;
+ }
}
out_retry:
@@ -1280,7 +1288,7 @@ out_retry:
if (!err)
mmc_blk_reset_success(md, type);
out:
- blk_end_request(req, err, blk_rq_bytes(req));
+ blk_end_request(req, status, blk_rq_bytes(req));
}
static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
int ret = 0;
ret = mmc_flush_cache(card);
- if (ret)
- ret = -EIO;
-
- blk_end_request_all(req, ret);
+ blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
}
/*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
{
if (mmc_card_removed(card))
req->rq_flags |= RQF_QUIET;
- while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req)));
+ while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
mmc_queue_req_free(mq, mqrq);
}
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
*/
if (mmc_card_removed(mq->card)) {
req->rq_flags |= RQF_QUIET;
- blk_end_request_all(req, -EIO);
+ blk_end_request_all(req, BLK_STS_IOERR);
mmc_queue_req_free(mq, mqrq);
return;
}
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
*/
mmc_blk_reset_success(md, type);
- req_pending = blk_end_request(old_req, 0,
+ req_pending = blk_end_request(old_req, BLK_STS_OK,
brq->data.bytes_xfered);
/*
* If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
* time, so we only reach here after trying to
* read a single sector.
*/
- req_pending = blk_end_request(old_req, -EIO,
+ req_pending = blk_end_request(old_req, BLK_STS_IOERR,
brq->data.blksz);
if (!req_pending) {
mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
ret = mmc_blk_part_switch(card, md);
if (ret) {
if (req) {
- blk_end_request_all(req, -EIO);
+ blk_end_request_all(req, BLK_STS_IOERR);
}
goto out;
}
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6b..b659a28 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
if (!mq) {
while ((req = blk_fetch_request(q)) != NULL) {
req->rq_flags |= RQF_QUIET;
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
}
return;
}
@@ -388,7 +388,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
mmc_queue_setup_discard(mq->queue, card);
if (card->bouncesz) {
- blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
blk_queue_max_segments(mq->queue, card->bouncesz / 512);
blk_queue_max_segment_size(mq->queue, card->bouncesz);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6b8d5cd..f336a9b 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
}
-static int do_blktrans_request(struct mtd_blktrans_ops *tr,
+static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
struct mtd_blktrans_dev *dev,
struct request *req)
{
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
buf = bio_data(req->bio);
- if (req_op(req) == REQ_OP_FLUSH)
- return tr->flush(dev);
+ if (req_op(req) == REQ_OP_FLUSH) {
+ if (tr->flush(dev))
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
+ }
if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
get_capacity(req->rq_disk))
- return -EIO;
+ return BLK_STS_IOERR;
switch (req_op(req)) {
case REQ_OP_DISCARD:
- return tr->discard(dev, block, nsect);
+ if (tr->discard(dev, block, nsect))
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
case REQ_OP_READ:
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->readsect(dev, block, buf))
- return -EIO;
+ return BLK_STS_IOERR;
rq_flush_dcache_pages(req);
- return 0;
+ return BLK_STS_OK;
case REQ_OP_WRITE:
if (!tr->writesect)
- return -EIO;
+ return BLK_STS_IOERR;
rq_flush_dcache_pages(req);
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->writesect(dev, block, buf))
- return -EIO;
- return 0;
+ return BLK_STS_IOERR;
default:
- return -EIO;
+ return BLK_STS_IOERR;
}
}
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
spin_lock_irq(rq->queue_lock);
while (1) {
- int res;
+ blk_status_t res;
dev->bg_stop = false;
if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
if (!dev)
while ((req = blk_fetch_request(rq)) != NULL)
- __blk_end_request_all(req, -ENODEV);
+ __blk_end_request_all(req, BLK_STS_IOERR);
else
queue_work(dev->wq, &dev->work);
}
@@ -413,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
new->rq->queuedata = new;
blk_queue_logical_block_size(new->rq, tr->blksize);
+ blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 5497e65..c3963f8 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -313,10 +313,10 @@ static void ubiblock_do_work(struct work_struct *work)
ret = ubiblock_read(pdu);
rq_flush_dcache_pages(req);
- blk_mq_end_request(req, ret);
+ blk_mq_end_request(req, errno_to_blk_status(ret));
}
-static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
case REQ_OP_READ:
ubi_sgl_init(&pdu->usgl);
queue_work(dev->wq, &pdu->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
default:
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
}
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 822198a..f12d23c 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
* another kernel subsystem, and we just pass it through.
*/
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
goto out;
}
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
"io error in %s sector %lld, len %d,\n",
(rw == READ) ? "READ" : "WRITE",
(unsigned long long) iter.bi_sector, len);
- bio->bi_error = err;
+ bio->bi_status = errno_to_blk_status(err);
break;
}
}
@@ -273,7 +273,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
q->queuedata = nsblk;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 983718b..b6ba061 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1210,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
* another kernel subsystem, and we just pass it through.
*/
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
goto out;
}
@@ -1232,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
(op_is_write(bio_op(bio))) ? "WRITE" :
"READ",
(unsigned long long) iter.bi_sector, len);
- bio->bi_error = err;
+ bio->bi_status = errno_to_blk_status(err);
break;
}
}
@@ -1297,7 +1297,6 @@ static int btt_blk_init(struct btt *btt)
blk_queue_make_request(btt->btt_queue, btt_make_request);
blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
- blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
btt->btt_queue->queuedata = btt;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d46..6b577af 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
return to_nd_region(to_dev(pmem)->parent);
}
-static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
- unsigned int len)
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+ phys_addr_t offset, unsigned int len)
{
struct device *dev = to_dev(pmem);
sector_t sector;
long cleared;
- int rc = 0;
+ blk_status_t rc = BLK_STS_OK;
sector = (offset - pmem->data_offset) / 512;
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
if (cleared < len)
- rc = -EIO;
+ rc = BLK_STS_IOERR;
if (cleared > 0 && cleared / 512) {
cleared /= 512;
dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
kunmap_atomic(mem);
}
-static int read_pmem(struct page *page, unsigned int off,
+static blk_status_t read_pmem(struct page *page, unsigned int off,
void *pmem_addr, unsigned int len)
{
int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
rc = memcpy_mcsafe(mem + off, pmem_addr, len);
kunmap_atomic(mem);
if (rc)
- return -EIO;
- return 0;
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
}
-static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
unsigned int len, unsigned int off, bool is_write,
sector_t sector)
{
- int rc = 0;
+ blk_status_t rc = BLK_STS_OK;
bool bad_pmem = false;
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
if (!is_write) {
if (unlikely(bad_pmem))
- rc = -EIO;
+ rc = BLK_STS_IOERR;
else {
rc = read_pmem(page, off, pmem_addr, len);
flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
- int rc = 0;
+ blk_status_t rc = 0;
bool do_acct;
unsigned long start;
struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
bvec.bv_offset, op_is_write(bio_op(bio)),
iter.bi_sector);
if (rc) {
- bio->bi_error = rc;
+ bio->bi_status = rc;
break;
}
}
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
struct pmem_device *pmem = bdev->bd_queue->queuedata;
- int rc;
+ blk_status_t rc;
rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
if (rc == 0)
page_endio(page, is_write, 0);
- return rc;
+ return blk_status_to_errno(rc);
}
/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
@@ -343,7 +343,6 @@ static int pmem_attach_disk(struct device *dev,
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_max_hw_sectors(q, UINT_MAX);
- blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
q->queuedata = pmem;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 90745a6..46d6cb1 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -13,18 +13,6 @@ config BLK_DEV_NVME
To compile this driver as a module, choose M here: the
module will be called nvme.
-config BLK_DEV_NVME_SCSI
- bool "SCSI emulation for NVMe device nodes"
- depends on NVME_CORE
- ---help---
- This adds support for the SG_IO ioctl on the NVMe character
- and block devices nodes, as well as a translation for a small
- number of selected SCSI commands to NVMe commands to the NVMe
- driver. If you don't know what this means you probably want
- to say N here, unless you run a distro that abuses the SCSI
- emulation to provide stable device names for mount by id, like
- some OpenSuSE and SLES versions.
-
config NVME_FABRICS
tristate
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index f1a7d94..cc0aacb 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -5,7 +5,6 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
obj-$(CONFIG_NVME_FC) += nvme-fc.o
nvme-core-y := core.o
-nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 903d581..d70df1d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -27,7 +27,6 @@
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
-#include <scsi/sg.h>
#include <asm/unaligned.h>
#include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
EXPORT_SYMBOL_GPL(nvme_io_timeout);
-unsigned char shutdown_timeout = 5;
+static unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
@@ -65,34 +64,53 @@ static bool force_apst;
module_param(force_apst, bool, 0644);
MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
+static bool streams;
+module_param(streams, bool, 0644);
+MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+
+struct workqueue_struct *nvme_wq;
+EXPORT_SYMBOL_GPL(nvme_wq);
+
static LIST_HEAD(nvme_ctrl_list);
static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
-static int nvme_error_status(struct request *req)
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ return -EBUSY;
+ if (!queue_work(nvme_wq, &ctrl->reset_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
+
+static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
+ ret = nvme_reset_ctrl(ctrl);
+ if (!ret)
+ flush_work(&ctrl->reset_work);
+ return ret;
+}
+
+static blk_status_t nvme_error_status(struct request *req)
{
switch (nvme_req(req)->status & 0x7ff) {
case NVME_SC_SUCCESS:
- return 0;
+ return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
- return -ENOSPC;
- default:
- return -EIO;
-
- /*
- * XXX: these errors are a nasty side-band protocol to
- * drivers/md/dm-mpath.c:noretry_error() that aren't documented
- * anywhere..
- */
- case NVME_SC_CMD_SEQ_ERROR:
- return -EILSEQ;
+ return BLK_STS_NOSPC;
case NVME_SC_ONCS_NOT_SUPPORTED:
- return -EOPNOTSUPP;
+ return BLK_STS_NOTSUPP;
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
- return -ENODATA;
+ return BLK_STS_MEDIUM;
+ default:
+ return BLK_STS_IOERR;
}
}
@@ -165,7 +183,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
- case NVME_CTRL_RECONNECTING:
changed = true;
/* FALLTHRU */
default:
@@ -283,6 +300,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(nvme_alloc_request);
+static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+
+ c.directive.opcode = nvme_admin_directive_send;
+ c.directive.nsid = cpu_to_le32(0xffffffff);
+ c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
+ c.directive.dtype = NVME_DIR_IDENTIFY;
+ c.directive.tdtype = NVME_DIR_STREAMS;
+ c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
+
+ return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
+}
+
+static int nvme_disable_streams(struct nvme_ctrl *ctrl)
+{
+ return nvme_toggle_streams(ctrl, false);
+}
+
+static int nvme_enable_streams(struct nvme_ctrl *ctrl)
+{
+ return nvme_toggle_streams(ctrl, true);
+}
+
+static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
+ struct streams_directive_params *s, u32 nsid)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ memset(s, 0, sizeof(*s));
+
+ c.directive.opcode = nvme_admin_directive_recv;
+ c.directive.nsid = cpu_to_le32(nsid);
+ c.directive.numd = sizeof(*s);
+ c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
+ c.directive.dtype = NVME_DIR_STREAMS;
+
+ return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
+}
+
+static int nvme_configure_directives(struct nvme_ctrl *ctrl)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
+ return 0;
+ if (!streams)
+ return 0;
+
+ ret = nvme_enable_streams(ctrl);
+ if (ret)
+ return ret;
+
+ ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
+ if (ret)
+ return ret;
+
+ ctrl->nssa = le16_to_cpu(s.nssa);
+ if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
+ dev_info(ctrl->device, "too few streams (%u) available\n",
+ ctrl->nssa);
+ nvme_disable_streams(ctrl);
+ return 0;
+ }
+
+ ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
+ dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
+ return 0;
+}
+
+/*
+ * Check if 'req' has a write hint associated with it. If it does, assign
+ * a valid namespace stream to the write.
+ */
+static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
+ struct request *req, u16 *control,
+ u32 *dsmgmt)
+{
+ enum rw_hint streamid = req->write_hint;
+
+ if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
+ streamid = 0;
+ else {
+ streamid--;
+ if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
+ return;
+
+ *control |= NVME_RW_DTYPE_STREAMS;
+ *dsmgmt |= streamid << 16;
+ }
+
+ if (streamid < ARRAY_SIZE(req->q->write_hints))
+ req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
+}
+
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
@@ -291,7 +407,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
cmnd->common.nsid = cpu_to_le32(ns->ns_id);
}
-static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
+static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd)
{
unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +416,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
if (!range)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
__rq_for_each_bio(bio, req) {
u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +430,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
if (WARN_ON_ONCE(n != segments)) {
kfree(range);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +444,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
req->special_vec.bv_len = sizeof(*range) * segments;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
-static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
- struct nvme_command *cmnd)
+static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
+ struct request *req, struct nvme_command *cmnd)
{
+ struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0;
u32 dsmgmt = 0;
+ /*
+ * If formated with metadata, require the block layer provide a buffer
+ * unless this namespace is formated such that the metadata can be
+ * stripped/generated by the controller with PRACT=1.
+ */
+ if (ns && ns->ms &&
+ (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
+ !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
+ return BLK_STS_NOTSUPP;
+
if (req->cmd_flags & REQ_FUA)
control |= NVME_RW_FUA;
if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +478,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
+ nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
+
if (ns->ms) {
switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +500,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
cmnd->rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+ return 0;
}
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd)
{
- int ret = BLK_MQ_RQ_QUEUE_OK;
+ blk_status_t ret = BLK_STS_OK;
if (!(req->rq_flags & RQF_DONTPREP)) {
nvme_req(req)->retries = 0;
@@ -398,11 +529,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
break;
case REQ_OP_READ:
case REQ_OP_WRITE:
- nvme_setup_rw(ns, req, cmd);
+ ret = nvme_setup_rw(ns, req, cmd);
break;
default:
WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
cmd->common.command_id = req->tag;
@@ -555,15 +686,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
result, timeout);
}
-static void nvme_keep_alive_end_io(struct request *rq, int error)
+static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
{
struct nvme_ctrl *ctrl = rq->end_io_data;
blk_mq_free_request(rq);
- if (error) {
+ if (status) {
dev_err(ctrl->device,
- "failed nvme_keep_alive_end_io error=%d\n", error);
+ "failed nvme_keep_alive_end_io error=%d\n",
+ status);
return;
}
@@ -599,7 +731,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
if (nvme_keep_alive(ctrl)) {
/* allocation failure, reset the controller */
dev_err(ctrl->device, "keep-alive failed\n");
- ctrl->ops->reset_ctrl(ctrl);
+ nvme_reset_ctrl(ctrl);
return;
}
}
@@ -623,7 +755,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
int error;
@@ -643,6 +775,77 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error;
}
+static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
+{
+ struct nvme_command c = { };
+ int status;
+ void *data;
+ int pos;
+ int len;
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.nsid = cpu_to_le32(nsid);
+ c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
+
+ data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
+ NVME_IDENTIFY_DATA_SIZE);
+ if (status)
+ goto free_data;
+
+ for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+ struct nvme_ns_id_desc *cur = data + pos;
+
+ if (cur->nidl == 0)
+ break;
+
+ switch (cur->nidt) {
+ case NVME_NIDT_EUI64:
+ if (cur->nidl != NVME_NIDT_EUI64_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_EUI64_LEN;
+ memcpy(ns->eui, data + pos + sizeof(*cur), len);
+ break;
+ case NVME_NIDT_NGUID:
+ if (cur->nidl != NVME_NIDT_NGUID_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_NGUID_LEN;
+ memcpy(ns->nguid, data + pos + sizeof(*cur), len);
+ break;
+ case NVME_NIDT_UUID:
+ if (cur->nidl != NVME_NIDT_UUID_LEN) {
+ dev_warn(ns->ctrl->device,
+ "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
+ cur->nidl);
+ goto free_data;
+ }
+ len = NVME_NIDT_UUID_LEN;
+ uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
+ break;
+ default:
+ /* Skip unnkown types */
+ len = cur->nidl;
+ break;
+ }
+
+ len += sizeof(*cur);
+ }
+free_data:
+ kfree(data);
+ return status;
+}
+
static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
{
struct nvme_command c = { };
@@ -653,7 +856,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
}
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
struct nvme_id_ns **id)
{
struct nvme_command c = { };
@@ -675,26 +878,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
return error;
}
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
- void *buffer, size_t buflen, u32 *result)
-{
- struct nvme_command c;
- union nvme_result res;
- int ret;
-
- memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_get_features;
- c.features.nsid = cpu_to_le32(nsid);
- c.features.fid = cpu_to_le32(fid);
-
- ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
- NVME_QID_ANY, 0, 0);
- if (ret >= 0 && result)
- *result = le32_to_cpu(res.u32);
- return ret;
-}
-
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
void *buffer, size_t buflen, u32 *result)
{
struct nvme_command c;
@@ -713,28 +897,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
return ret;
}
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
-{
- struct nvme_command c = { };
- int error;
-
- c.common.opcode = nvme_admin_get_log_page,
- c.common.nsid = cpu_to_le32(0xFFFFFFFF),
- c.common.cdw10[0] = cpu_to_le32(
- (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
- NVME_LOG_SMART),
-
- *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
- if (!*log)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
- sizeof(struct nvme_smart_log));
- if (error)
- kfree(*log);
- return error;
-}
-
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
{
u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +914,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
* access to the admin queue, as that might be only way to fix them up.
*/
if (status > 0) {
- dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+ dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
*count = 0;
} else {
nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -870,12 +1032,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, (void __user *)arg);
-#ifdef CONFIG_BLK_DEV_NVME_SCSI
- case SG_GET_VERSION_NUM:
- return nvme_sg_get_version_num((void __user *)arg);
- case SG_IO:
- return nvme_sg_io(ns, (void __user *)arg);
-#endif
default:
#ifdef CONFIG_NVM
if (ns->ndev)
@@ -892,10 +1048,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- switch (cmd) {
- case SG_IO:
- return -ENOIOCTLCMD;
- }
return nvme_ioctl(bdev, mode, cmd, arg);
}
#else
@@ -983,6 +1135,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+static void nvme_set_chunk_size(struct nvme_ns *ns)
+{
+ u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+ blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
+}
+
static void nvme_config_discard(struct nvme_ns *ns)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -991,8 +1149,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- ns->queue->limits.discard_alignment = logical_block_size;
- ns->queue->limits.discard_granularity = logical_block_size;
+ if (ctrl->nr_streams && ns->sws && ns->sgs) {
+ unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+
+ ns->queue->limits.discard_alignment = sz;
+ ns->queue->limits.discard_granularity = sz;
+ } else {
+ ns->queue->limits.discard_alignment = logical_block_size;
+ ns->queue->limits.discard_granularity = logical_block_size;
+ }
blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -1016,7 +1181,15 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
- memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
+ memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
+ if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
+ /* Don't treat error as fatal we potentially
+ * already have a NGUID or EUI-64
+ */
+ if (nvme_identify_ns_descs(ns, ns->ns_id))
+ dev_warn(ns->ctrl->device,
+ "%s: Identify Descriptors failed\n", __func__);
+ }
return 0;
}
@@ -1024,6 +1197,7 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
u16 bs;
/*
@@ -1034,12 +1208,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->lba_shift == 0)
ns->lba_shift = 9;
bs = 1 << ns->lba_shift;
+ ns->noiob = le16_to_cpu(id->noiob);
blk_mq_freeze_queue(disk->queue);
- if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+ if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
nvme_prep_integrity(disk, id, bs);
blk_queue_logical_block_size(ns->queue, bs);
+ if (ns->noiob)
+ nvme_set_chunk_size(ns);
if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
nvme_init_integrity(ns);
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1047,7 +1224,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
- if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+ if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns);
blk_mq_unfreeze_queue(disk->queue);
}
@@ -1283,7 +1460,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
{
- unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+ unsigned long timeout = jiffies + (shutdown_timeout * HZ);
u32 csts;
int ret;
@@ -1372,7 +1549,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
if (!table)
return;
- if (ctrl->ps_max_latency_us == 0) {
+ if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
/* Turn off APST. */
apste = 0;
dev_dbg(ctrl->device, "APST disabled\n");
@@ -1528,6 +1705,31 @@ static bool quirk_matches(const struct nvme_id_ctrl *id,
string_matches(id->fr, q->fr, sizeof(id->fr));
}
+static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+ size_t nqnlen;
+ int off;
+
+ nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
+ if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
+ strcpy(ctrl->subnqn, id->subnqn);
+ return;
+ }
+
+ if (ctrl->vs >= NVME_VS(1, 2, 1))
+ dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
+
+ /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
+ off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
+ "nqn.2014.08.org.nvmexpress:%4x%4x",
+ le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
+ memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
+ off += sizeof(id->sn);
+ memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
+ off += sizeof(id->mn);
+ memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
+}
+
/*
* Initialize the cached copies of the Identify data and various controller
* register in our nvme_ctrl structure. This should be called as soon as
@@ -1539,7 +1741,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
u64 cap;
int ret, page_shift;
u32 max_hw_sectors;
- u8 prev_apsta;
+ bool prev_apst_enabled;
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
if (ret) {
@@ -1563,6 +1765,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
return -EIO;
}
+ nvme_init_subnqn(ctrl, id);
+
if (!ctrl->identified) {
/*
* Check for quirks. Quirk can depend on firmware version,
@@ -1582,7 +1786,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
}
if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
- dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
+ dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
}
@@ -1607,16 +1811,17 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->kas = le16_to_cpu(id->kas);
ctrl->npss = id->npss;
- prev_apsta = ctrl->apsta;
+ ctrl->apsta = id->apsta;
+ prev_apst_enabled = ctrl->apst_enabled;
if (ctrl->quirks & NVME_QUIRK_NO_APST) {
if (force_apst && id->apsta) {
- dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
- ctrl->apsta = 1;
+ dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
+ ctrl->apst_enabled = true;
} else {
- ctrl->apsta = 0;
+ ctrl->apst_enabled = false;
}
} else {
- ctrl->apsta = id->apsta;
+ ctrl->apst_enabled = id->apsta;
}
memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
@@ -1634,22 +1839,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ret = -EINVAL;
if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
- dev_err(ctrl->dev,
+ dev_err(ctrl->device,
"keep-alive support is mandatory for fabrics\n");
ret = -EINVAL;
}
} else {
ctrl->cntlid = le16_to_cpu(id->cntlid);
+ ctrl->hmpre = le32_to_cpu(id->hmpre);
+ ctrl->hmmin = le32_to_cpu(id->hmmin);
}
kfree(id);
- if (ctrl->apsta && !prev_apsta)
+ if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
- else if (!ctrl->apsta && prev_apsta)
+ else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
nvme_configure_apst(ctrl);
+ nvme_configure_directives(ctrl);
ctrl->identified = true;
@@ -1735,7 +1943,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
dev_warn(ctrl->device, "resetting controller\n");
- return ctrl->ops->reset_ctrl(ctrl);
+ return nvme_reset_ctrl_sync(ctrl);
case NVME_IOCTL_SUBSYS_RESET:
return nvme_reset_subsystem(ctrl);
case NVME_IOCTL_RESCAN:
@@ -1761,7 +1969,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
int ret;
- ret = ctrl->ops->reset_ctrl(ctrl);
+ ret = nvme_reset_ctrl_sync(ctrl);
if (ret < 0)
return ret;
return count;
@@ -1787,8 +1995,8 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
int serial_len = sizeof(ctrl->serial);
int model_len = sizeof(ctrl->model);
- if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
- return sprintf(buf, "eui.%16phN\n", ns->uuid);
+ if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+ return sprintf(buf, "eui.%16phN\n", ns->nguid);
if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
return sprintf(buf, "eui.%8phN\n", ns->eui);
@@ -1803,11 +2011,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
+ return sprintf(buf, "%pU\n", ns->nguid);
+}
+static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
+
static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
- return sprintf(buf, "%pU\n", ns->uuid);
+
+ /* For backward compatibility expose the NGUID to userspace if
+ * we have no UUID set
+ */
+ if (uuid_is_null(&ns->uuid)) {
+ printk_ratelimited(KERN_WARNING
+ "No UUID available providing old NGUID\n");
+ return sprintf(buf, "%pU\n", ns->nguid);
+ }
+ return sprintf(buf, "%pU\n", &ns->uuid);
}
static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
@@ -1830,6 +2055,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
+ &dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
NULL,
@@ -1842,7 +2068,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
if (a == &dev_attr_uuid.attr) {
- if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+ if (uuid_is_null(&ns->uuid) ||
+ !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
+ return 0;
+ }
+ if (a == &dev_attr_nguid.attr) {
+ if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
return 0;
}
if (a == &dev_attr_eui.attr) {
@@ -1931,8 +2162,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- return snprintf(buf, PAGE_SIZE, "%s\n",
- ctrl->ops->get_subsysnqn(ctrl));
+ return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
}
static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
@@ -1961,24 +2191,16 @@ static struct attribute *nvme_dev_attrs[] = {
NULL
};
-#define CHECK_ATTR(ctrl, a, name) \
- if ((a) == &dev_attr_##name.attr && \
- !(ctrl)->ops->get_##name) \
- return 0
-
static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct device *dev = container_of(kobj, struct device, kobj);
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- if (a == &dev_attr_delete_controller.attr) {
- if (!ctrl->ops->delete_ctrl)
- return 0;
- }
-
- CHECK_ATTR(ctrl, a, subsysnqn);
- CHECK_ATTR(ctrl, a, address);
+ if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
+ return 0;
+ if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
+ return 0;
return a->mode;
}
@@ -2019,6 +2241,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
return ret;
}
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!ctrl->nr_streams)
+ return 0;
+
+ ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
+ if (ret)
+ return ret;
+
+ ns->sws = le32_to_cpu(s.sws);
+ ns->sgs = le16_to_cpu(s.sgs);
+
+ if (ns->sws) {
+ unsigned int bs = 1 << ns->lba_shift;
+
+ blk_queue_io_min(ns->queue, bs * ns->sws);
+ if (ns->sgs)
+ blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
+ }
+
+ return 0;
+}
+
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
@@ -2048,6 +2296,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
nvme_set_queue_limits(ctrl, ns->queue);
+ nvme_setup_streams_ns(ctrl, ns);
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
@@ -2056,7 +2305,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (nvme_nvm_ns_supported(ns, id) &&
nvme_nvm_register(ns, disk_name, node)) {
- dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__);
+ dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
goto out_free_id;
}
@@ -2231,7 +2480,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
* removal.
*/
if (ctrl->state == NVME_CTRL_LIVE)
- schedule_work(&ctrl->scan_work);
+ queue_work(nvme_wq, &ctrl->scan_work);
}
EXPORT_SYMBOL_GPL(nvme_queue_scan);
@@ -2286,7 +2535,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
/*FALLTHRU*/
case NVME_SC_ABORT_REQ:
++ctrl->event_limit;
- schedule_work(&ctrl->async_event_work);
+ queue_work(nvme_wq, &ctrl->async_event_work);
break;
default:
break;
@@ -2309,7 +2558,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
void nvme_queue_async_events(struct nvme_ctrl *ctrl)
{
ctrl->event_limit = NVME_NR_AERS;
- schedule_work(&ctrl->async_event_work);
+ queue_work(nvme_wq, &ctrl->async_event_work);
}
EXPORT_SYMBOL_GPL(nvme_queue_async_events);
@@ -2442,6 +2691,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
mutex_lock(&ctrl->namespaces_mutex);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ctrl->admin_q);
+
/* Forcibly start all queues to avoid having stuck requests */
blk_mq_start_hw_queues(ctrl->admin_q);
@@ -2455,6 +2707,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ns->queue);
+
/*
* Forcibly start all queues to avoid having stuck requests.
* Note that we must ensure the queues are not stopped
@@ -2533,7 +2788,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- blk_mq_start_stopped_hw_queues(ns->queue, true);
+ blk_mq_unquiesce_queue(ns->queue);
blk_mq_kick_requeue_list(ns->queue);
}
mutex_unlock(&ctrl->namespaces_mutex);
@@ -2544,10 +2799,15 @@ int __init nvme_core_init(void)
{
int result;
+ nvme_wq = alloc_workqueue("nvme-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_wq)
+ return -ENOMEM;
+
result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
&nvme_dev_fops);
if (result < 0)
- return result;
+ goto destroy_wq;
else if (result > 0)
nvme_char_major = result;
@@ -2559,8 +2819,10 @@ int __init nvme_core_init(void)
return 0;
- unregister_chrdev:
+unregister_chrdev:
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+destroy_wq:
+ destroy_workqueue(nvme_wq);
return result;
}
@@ -2568,6 +2830,7 @@ void nvme_core_exit(void)
{
class_destroy(nvme_class);
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ destroy_workqueue(nvme_wq);
}
MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index c190d7e..2e582a2 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -58,7 +58,6 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
kref_init(&host->ref);
memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
- uuid_gen(&host->id);
list_add_tail(&host->list, &nvmf_hosts);
out_unlock:
@@ -75,7 +74,6 @@ static struct nvmf_host *nvmf_host_default(void)
return NULL;
kref_init(&host->ref);
- uuid_gen(&host->id);
snprintf(host->nqn, NVMF_NQN_SIZE,
"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
@@ -128,16 +126,6 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
EXPORT_SYMBOL_GPL(nvmf_get_address);
/**
- * nvmf_get_subsysnqn() - Get subsystem NQN
- * @ctrl: Host NVMe controller instance which we got the NQN
- */
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
-{
- return ctrl->opts->subsysnqn;
-}
-EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
-
-/**
* nvmf_reg_read32() - NVMe Fabrics "Property Get" API function.
* @ctrl: Host NVMe controller instance maintaining the admin
* queue used to submit the property read command to
@@ -337,6 +325,24 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
}
}
break;
+
+ case NVME_SC_CONNECT_INVALID_HOST:
+ dev_err(ctrl->device,
+ "Connect for subsystem %s is not allowed, hostnqn: %s\n",
+ data->subsysnqn, data->hostnqn);
+ break;
+
+ case NVME_SC_CONNECT_CTRL_BUSY:
+ dev_err(ctrl->device,
+ "Connect command failed: controller is busy or not available\n");
+ break;
+
+ case NVME_SC_CONNECT_FORMAT:
+ dev_err(ctrl->device,
+ "Connect incompatible format: %d",
+ cmd->connect.recfmt);
+ break;
+
default:
dev_err(ctrl->device,
"Connect command failed, error wo/DNR bit: %d\n",
@@ -376,13 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
cmd.connect.opcode = nvme_fabrics_command;
cmd.connect.fctype = nvme_fabrics_type_connect;
cmd.connect.qid = 0;
-
- /*
- * fabrics spec sets a minimum of depth 32 for admin queue,
- * so set the queue with this depth always until
- * justification otherwise.
- */
- cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+ cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
/*
* Set keep-alive timeout in seconds granularity (ms * 1000)
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
{
if (ctrl->opts->max_reconnects != -1 &&
- ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects)
+ ctrl->nr_reconnects < ctrl->opts->max_reconnects)
return true;
return false;
@@ -547,6 +547,7 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_KATO, "keep_alive_tmo=%d" },
{ NVMF_OPT_HOSTNQN, "hostnqn=%s" },
{ NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
+ { NVMF_OPT_HOST_ID, "hostid=%s" },
{ NVMF_OPT_ERR, NULL }
};
@@ -558,6 +559,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
int token, ret = 0;
size_t nqnlen = 0;
int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+ uuid_t hostid;
/* Set defaults */
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -568,6 +570,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
if (!options)
return -ENOMEM;
+ uuid_gen(&hostid);
+
while ((p = strsep(&o, ",\n")) != NULL) {
if (!*p)
continue;
@@ -724,6 +728,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
opts->host_traddr = p;
break;
+ case NVMF_OPT_HOST_ID:
+ p = match_strdup(args);
+ if (!p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (uuid_parse(p, &hostid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
@@ -743,6 +758,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->host = nvmf_default_host;
}
+ uuid_copy(&opts->host->id, &hostid);
+
out:
if (!opts->discovery_nqn && !opts->kato)
opts->kato = NVME_DEFAULT_KATO;
@@ -803,7 +820,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
- NVMF_OPT_KATO | NVMF_OPT_HOSTNQN)
+ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
+ NVMF_OPT_HOST_ID)
static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
@@ -854,6 +872,15 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_unlock;
}
+ if (strcmp(ctrl->subnqn, opts->subsysnqn)) {
+ dev_warn(ctrl->device,
+ "controller returned incorrect NQN: \"%s\".\n",
+ ctrl->subnqn);
+ mutex_unlock(&nvmf_transports_mutex);
+ ctrl->ops->delete_ctrl(ctrl);
+ return ERR_PTR(-EINVAL);
+ }
+
mutex_unlock(&nvmf_transports_mutex);
return ctrl;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 29be760..bf33663 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -56,6 +56,7 @@ enum {
NVMF_OPT_RECONNECT_DELAY = 1 << 9,
NVMF_OPT_HOST_TRADDR = 1 << 10,
NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
+ NVMF_OPT_HOST_ID = 1 << 12,
};
/**
@@ -80,7 +81,6 @@ enum {
* @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
* @kato: Keep-alive timeout.
* @host: Virtual NVMe host, contains the NQN and Host ID.
- * @nr_reconnects: number of reconnect attempted since the last ctrl failure
* @max_reconnects: maximum number of allowed reconnect attempts before removing
* the controller, (-1) means reconnect forever, zero means remove
* immediately;
@@ -98,7 +98,6 @@ struct nvmf_ctrl_options {
bool discovery_nqn;
unsigned int kato;
struct nvmf_host *host;
- int nr_reconnects;
int max_reconnects;
};
@@ -140,7 +139,6 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
int nvmf_register_transport(struct nvmf_transport_ops *ops);
void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
-const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5ee4c71..ed87214 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -36,7 +36,7 @@
*/
#define NVME_FC_NR_AEN_COMMANDS 1
#define NVME_FC_AQ_BLKMQ_DEPTH \
- (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1)
enum nvme_fc_queue_flags {
@@ -161,12 +161,12 @@ struct nvme_fc_ctrl {
struct blk_mq_tag_set tag_set;
struct work_struct delete_work;
- struct work_struct reset_work;
struct delayed_work connect_work;
struct kref ref;
u32 flags;
u32 iocnt;
+ wait_queue_head_t ioabort_wait;
struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS];
@@ -214,7 +214,6 @@ static LIST_HEAD(nvme_fc_lport_list);
static DEFINE_IDA(nvme_fc_local_port_cnt);
static DEFINE_IDA(nvme_fc_ctrl_cnt);
-static struct workqueue_struct *nvme_fc_wq;
@@ -1241,8 +1240,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
spin_lock_irqsave(&ctrl->lock, flags);
if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
- if (ctrl->flags & FCCTRL_TERMIO)
- ctrl->iocnt--;
+ if (ctrl->flags & FCCTRL_TERMIO) {
+ if (!--ctrl->iocnt)
+ wake_up(&ctrl->ioabort_wait);
+ }
}
if (op->flags & FCOP_FLAGS_RELEASED)
complete_rq = true;
@@ -1449,18 +1450,8 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
{
struct nvme_fc_ctrl *ctrl = set->driver_data;
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1];
-
- return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
-}
-
-static int
-nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- struct nvme_fc_ctrl *ctrl = set->driver_data;
- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
- struct nvme_fc_queue *queue = &ctrl->queues[0];
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
+ struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
}
@@ -1758,16 +1749,16 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
static void
nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
{
+ /* only proceed if in LIVE state - e.g. on first error */
+ if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+ return;
+
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport association error detected: %s\n",
ctrl->cnum, errmsg);
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
- /* stop the queues on error, cleanup is in reset thread */
- if (ctrl->queue_count > 1)
- nvme_stop_queues(&ctrl->ctrl);
-
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
@@ -1775,10 +1766,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
return;
}
- if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
- dev_err(ctrl->ctrl.device,
- "NVME-FC{%d}: error_recovery: Failed to schedule "
- "reset work\n", ctrl->cnum);
+ nvme_reset_ctrl(&ctrl->ctrl);
}
static enum blk_eh_timer_return
@@ -1887,7 +1875,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
* level FC exchange resource that is also outstanding. This must be
* considered in all cleanup operations.
*/
-static int
+static blk_status_t
nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
struct nvme_fc_fcp_op *op, u32 data_len,
enum nvmefc_fcp_datadir io_dir)
@@ -1902,10 +1890,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
* the target device is present
*/
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
if (!nvme_fc_ctrl_get(ctrl))
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
/* format the FC-NVME CMD IU and fcp_req */
cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1953,8 +1941,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (ret < 0) {
nvme_cleanup_cmd(op->rq);
nvme_fc_ctrl_put(ctrl);
- return (ret == -ENOMEM || ret == -EAGAIN) ?
- BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+ if (ret == -ENOMEM || ret == -EAGAIN)
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
}
@@ -1971,28 +1960,26 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
queue->lldd_handle, &op->fcp_req);
if (ret) {
- if (op->rq) { /* normal request */
+ if (op->rq) /* normal request */
nvme_fc_unmap_data(ctrl, op->rq, op);
- nvme_cleanup_cmd(op->rq);
- }
/* else - aen. no cleanup needed */
nvme_fc_ctrl_put(ctrl);
if (ret != -EBUSY)
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
if (op->rq) {
blk_mq_stop_hw_queues(op->rq->q);
blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
}
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
-static int
+static blk_status_t
nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -2005,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_command *sqe = &cmdiu->sqe;
enum nvmefc_fcp_datadir io_dir;
u32 data_len;
- int ret;
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, rq, sqe);
if (ret)
@@ -2060,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
struct nvme_fc_fcp_op *aen_op;
unsigned long flags;
bool terminating = false;
- int ret;
+ blk_status_t ret;
if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
return;
@@ -2092,7 +2079,6 @@ __nvme_fc_final_op_cleanup(struct request *rq)
op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
FCOP_FLAGS_COMPLETE);
- nvme_cleanup_cmd(rq);
nvme_fc_unmap_data(ctrl, rq, op);
nvme_complete_rq(rq);
nvme_fc_ctrl_put(ctrl);
@@ -2310,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
int ret;
bool changed;
- ++ctrl->ctrl.opts->nr_reconnects;
+ ++ctrl->ctrl.nr_reconnects;
/*
* Create the admin queue
@@ -2407,7 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- ctrl->ctrl.opts->nr_reconnects = 0;
+ ctrl->ctrl.nr_reconnects = 0;
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
@@ -2493,11 +2479,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
/* wait for all io that had to be aborted */
spin_lock_irqsave(&ctrl->lock, flags);
- while (ctrl->iocnt) {
- spin_unlock_irqrestore(&ctrl->lock, flags);
- msleep(1000);
- spin_lock_irqsave(&ctrl->lock, flags);
- }
+ wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
ctrl->flags &= ~FCCTRL_TERMIO;
spin_unlock_irqrestore(&ctrl->lock, flags);
@@ -2527,7 +2509,7 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
struct nvme_fc_ctrl *ctrl =
container_of(work, struct nvme_fc_ctrl, delete_work);
- cancel_work_sync(&ctrl->reset_work);
+ cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
/*
@@ -2554,7 +2536,7 @@ __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return true;
- if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return true;
return false;
@@ -2581,7 +2563,7 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
ret = __nvme_fc_del_ctrl(ctrl);
if (!ret)
- flush_workqueue(nvme_fc_wq);
+ flush_workqueue(nvme_wq);
nvme_put_ctrl(&ctrl->ctrl);
@@ -2606,13 +2588,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
- queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
+ queue_delayed_work(nvme_wq, &ctrl->connect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) "
"reached. Removing controller\n",
- ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
+ ctrl->cnum, ctrl->ctrl.nr_reconnects);
WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
}
}
@@ -2621,7 +2603,7 @@ static void
nvme_fc_reset_ctrl_work(struct work_struct *work)
{
struct nvme_fc_ctrl *ctrl =
- container_of(work, struct nvme_fc_ctrl, reset_work);
+ container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
int ret;
/* will block will waiting for io to terminate */
@@ -2635,29 +2617,6 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
}
-/*
- * called by the nvme core layer, for sysfs interface that requests
- * a reset of the nvme controller
- */
-static int
-nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
-
- dev_info(ctrl->ctrl.device,
- "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
-}
-
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.name = "fc",
.module = THIS_MODULE,
@@ -2665,11 +2624,9 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_fc_reset_nvme_ctrl,
.free_ctrl = nvme_fc_nvme_ctrl_freed,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_del_nvme_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
.get_address = nvmf_get_address,
};
@@ -2695,7 +2652,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
.queue_rq = nvme_fc_queue_rq,
.complete = nvme_fc_complete_rq,
- .init_request = nvme_fc_init_admin_request,
+ .init_request = nvme_fc_init_request,
.exit_request = nvme_fc_exit_request,
.reinit_request = nvme_fc_reinit_request,
.init_hctx = nvme_fc_init_admin_hctx,
@@ -2740,7 +2697,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
kref_init(&ctrl->ref);
INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
spin_lock_init(&ctrl->lock);
@@ -2807,6 +2764,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
+ /* Remove core ctrl ref. */
+ nvme_put_ctrl(&ctrl->ctrl);
+
/* as we're past the point where we transition to the ref
* counting teardown path, if we return a bad pointer here,
* the calling routine, thinking it's prior to the
@@ -2965,20 +2925,7 @@ static struct nvmf_transport_ops nvme_fc_transport = {
static int __init nvme_fc_init_module(void)
{
- int ret;
-
- nvme_fc_wq = create_workqueue("nvme_fc_wq");
- if (!nvme_fc_wq)
- return -ENOMEM;
-
- ret = nvmf_register_transport(&nvme_fc_transport);
- if (ret)
- goto err;
-
- return 0;
-err:
- destroy_workqueue(nvme_fc_wq);
- return ret;
+ return nvmf_register_transport(&nvme_fc_transport);
}
static void __exit nvme_fc_exit_module(void)
@@ -2989,8 +2936,6 @@ static void __exit nvme_fc_exit_module(void)
nvmf_unregister_transport(&nvme_fc_transport);
- destroy_workqueue(nvme_fc_wq);
-
ida_destroy(&nvme_fc_local_port_cnt);
ida_destroy(&nvme_fc_ctrl_cnt);
}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f5df78e..be85413 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
}
@@ -480,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
rqd->bio->bi_iter.bi_sector));
}
-static void nvme_nvm_end_io(struct request *rq, int error)
+static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
{
struct nvm_rq *rqd = rq->end_io_data;
@@ -509,7 +509,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
if (IS_ERR(rq)) {
kfree(cmd);
- return -ENOMEM;
+ return PTR_ERR(rq);
}
rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
@@ -571,13 +571,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
.max_phys_sect = 64,
};
-static void nvme_nvm_end_user_vio(struct request *rq, int error)
-{
- struct completion *waiting = rq->end_io_data;
-
- complete(waiting);
-}
-
static int nvme_nvm_submit_user_cmd(struct request_queue *q,
struct nvme_ns *ns,
struct nvme_nvm_command *vcmd,
@@ -608,7 +601,6 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
- rq->end_io_data = &wait;
if (ppa_buf && ppa_len) {
ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
@@ -662,9 +654,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
}
submit:
- blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
-
- wait_for_completion_io(&wait);
+ blk_execute_rq(q, NULL, rq, 0);
if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
ret = -EINTR;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9d6a070..d70ff0f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -27,12 +27,11 @@ extern unsigned char nvme_io_timeout;
extern unsigned char admin_timeout;
#define ADMIN_TIMEOUT (admin_timeout * HZ)
-extern unsigned char shutdown_timeout;
-#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
-
#define NVME_DEFAULT_KATO 5
#define NVME_KATO_GRACE 10
+extern struct workqueue_struct *nvme_wq;
+
enum {
NVME_NS_LBA = 0,
NVME_NS_LIGHTNVM = 1,
@@ -131,6 +130,7 @@ struct nvme_ctrl {
struct device *device; /* char device */
struct list_head node;
struct ida ns_ida;
+ struct work_struct reset_work;
struct opal_dev *opal_dev;
@@ -138,6 +138,7 @@ struct nvme_ctrl {
char serial[20];
char model[40];
char firmware_rev[8];
+ char subnqn[NVMF_NQN_SIZE];
u16 cntlid;
u32 ctrl_config;
@@ -147,6 +148,8 @@ struct nvme_ctrl {
u16 oncs;
u16 vid;
u16 oacs;
+ u16 nssa;
+ u16 nr_streams;
atomic_t abort_limit;
u8 event_limit;
u8 vwc;
@@ -165,6 +168,10 @@ struct nvme_ctrl {
/* Power saving configuration */
u64 ps_max_latency_us;
+ bool apst_enabled;
+
+ u32 hmpre;
+ u32 hmmin;
/* Fabrics only */
u16 sqsize;
@@ -172,12 +179,10 @@ struct nvme_ctrl {
u32 iorcsz;
u16 icdoff;
u16 maxcmd;
+ int nr_reconnects;
struct nvmf_ctrl_options *opts;
};
-/*
- * An NVM Express namespace is equivalent to a SCSI LUN
- */
struct nvme_ns {
struct list_head list;
@@ -189,14 +194,18 @@ struct nvme_ns {
int instance;
u8 eui[8];
- u8 uuid[16];
+ u8 nguid[16];
+ uuid_t uuid;
unsigned ns_id;
int lba_shift;
u16 ms;
+ u16 sgs;
+ u32 sws;
bool ext;
u8 pi_type;
unsigned long flags;
+ u16 noiob;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
@@ -214,11 +223,9 @@ struct nvme_ctrl_ops {
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
- int (*reset_ctrl)(struct nvme_ctrl *ctrl);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
int (*delete_ctrl)(struct nvme_ctrl *ctrl);
- const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
};
@@ -296,7 +303,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags, int qid);
-int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
+blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmd);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
@@ -310,23 +317,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
void __user *ubuffer, unsigned bufflen,
void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
- struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
- void *buffer, size_t buflen, u32 *result);
-int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
- void *buffer, size_t buflen, u32 *result);
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
-
-struct sg_io_hdr;
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
-int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
-int nvme_sg_get_version_num(int __user *ip);
+int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
#ifdef CONFIG_NVM
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 40c7581..33c3b9d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -17,28 +17,15 @@
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-pci.h>
-#include <linux/cpu.h>
-#include <linux/delay.h>
#include <linux/dmi.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/idr.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/poison.h>
-#include <linux/ptrace.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/t10-pi.h>
#include <linux/timer.h>
#include <linux/types.h>
@@ -49,7 +36,6 @@
#include "nvme.h"
#define NVME_Q_DEPTH 1024
-#define NVME_AQ_DEPTH 256
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
@@ -66,12 +52,14 @@ static bool use_cmb_sqes = true;
module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
-static struct workqueue_struct *nvme_workq;
+static unsigned int max_host_mem_size_mb = 128;
+module_param(max_host_mem_size_mb, uint, 0444);
+MODULE_PARM_DESC(max_host_mem_size_mb,
+ "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
struct nvme_dev;
struct nvme_queue;
-static int nvme_reset(struct nvme_dev *dev);
static void nvme_process_cq(struct nvme_queue *nvmeq);
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
@@ -92,9 +80,8 @@ struct nvme_dev {
int q_depth;
u32 db_stride;
void __iomem *bar;
- struct work_struct reset_work;
+ unsigned long bar_mapped_size;
struct work_struct remove_work;
- struct timer_list watchdog_timer;
struct mutex shutdown_lock;
bool subsystem;
void __iomem *cmb;
@@ -104,10 +91,18 @@ struct nvme_dev {
u32 cmbloc;
struct nvme_ctrl ctrl;
struct completion ioq_wait;
+
+ /* shadow doorbell buffer support: */
u32 *dbbuf_dbs;
dma_addr_t dbbuf_dbs_dma_addr;
u32 *dbbuf_eis;
dma_addr_t dbbuf_eis_dma_addr;
+
+ /* host memory buffer support: */
+ u64 host_mem_size;
+ u32 nr_host_mem_descs;
+ struct nvme_host_mem_buf_desc *host_mem_descs;
+ void **host_mem_desc_bufs;
};
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -185,8 +180,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
- BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+ BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
@@ -350,19 +345,6 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i
nvmeq->tags = NULL;
}
-static int nvme_admin_init_request(struct blk_mq_tag_set *set,
- struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- struct nvme_dev *dev = set->driver_data;
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = dev->queues[0];
-
- BUG_ON(!nvmeq);
- iod->nvmeq = nvmeq;
- return 0;
-}
-
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -382,7 +364,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
{
struct nvme_dev *dev = set->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+ int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
+ struct nvme_queue *nvmeq = dev->queues[queue_idx];
BUG_ON(!nvmeq);
iod->nvmeq = nvmeq;
@@ -427,7 +410,7 @@ static __le64 **iod_list(struct request *req)
return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
}
-static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
+static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +419,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
if (!iod->sg)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
} else {
iod->sg = iod->inline_sg;
}
@@ -446,7 +429,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
iod->nents = 0;
iod->length = size;
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +599,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
return true;
}
-static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct request_queue *q = req->q;
enum dma_data_direction dma_dir = rq_data_dir(req) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE;
- int ret = BLK_MQ_RQ_QUEUE_ERROR;
+ blk_status_t ret = BLK_STS_IOERR;
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(q, req, iod->sg);
if (!iod->nents)
goto out;
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
DMA_ATTR_NO_WARN))
goto out;
@@ -638,7 +621,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
if (!nvme_setup_prps(dev, req))
goto out_unmap;
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_STS_IOERR;
if (blk_integrity_rq(req)) {
if (blk_rq_count_integrity_sg(q, req->bio) != 1)
goto out_unmap;
@@ -658,7 +641,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
if (blk_integrity_rq(req))
cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_unmap:
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +671,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
/*
* NOTE: ns is NULL when called on the admin queue.
*/
-static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,47 +679,34 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_command cmnd;
- int ret = BLK_MQ_RQ_QUEUE_OK;
-
- /*
- * If formated with metadata, require the block layer provide a buffer
- * unless this namespace is formated such that the metadata can be
- * stripped/generated by the controller with PRACT=1.
- */
- if (ns && ns->ms && !blk_integrity_rq(req)) {
- if (!(ns->pi_type && ns->ms == 8) &&
- !blk_rq_is_passthrough(req)) {
- blk_mq_end_request(req, -EFAULT);
- return BLK_MQ_RQ_QUEUE_OK;
- }
- }
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, req, &cmnd);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
ret = nvme_init_iod(req, dev);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
goto out_free_cmd;
- if (blk_rq_nr_phys_segments(req))
+ if (blk_rq_nr_phys_segments(req)) {
ret = nvme_map_data(dev, req, &cmnd);
-
- if (ret != BLK_MQ_RQ_QUEUE_OK)
- goto out_cleanup_iod;
+ if (ret)
+ goto out_cleanup_iod;
+ }
blk_mq_start_request(req);
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
- ret = BLK_MQ_RQ_QUEUE_ERROR;
+ ret = BLK_STS_IOERR;
spin_unlock_irq(&nvmeq->q_lock);
goto out_cleanup_iod;
}
__nvme_submit_cmd(nvmeq, &cmnd);
nvme_process_cq(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_cleanup_iod:
nvme_free_iod(dev, req);
out_free_cmd:
@@ -759,65 +729,75 @@ static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
}
-static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
{
- u16 head, phase;
-
- head = nvmeq->cq_head;
- phase = nvmeq->cq_phase;
-
- while (nvme_cqe_valid(nvmeq, head, phase)) {
- struct nvme_completion cqe = nvmeq->cqes[head];
- struct request *req;
-
- if (++head == nvmeq->q_depth) {
- head = 0;
- phase = !phase;
- }
-
- if (tag && *tag == cqe.command_id)
- *tag = -1;
+ u16 head = nvmeq->cq_head;
- if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
- dev_warn(nvmeq->dev->ctrl.device,
- "invalid id %d completed on queue %d\n",
- cqe.command_id, le16_to_cpu(cqe.sq_id));
- continue;
- }
+ if (likely(nvmeq->cq_vector >= 0)) {
+ if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
+ nvmeq->dbbuf_cq_ei))
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+ }
+}
- /*
- * AEN requests are special as they don't time out and can
- * survive any kind of queue freeze and often don't respond to
- * aborts. We don't even bother to allocate a struct request
- * for them but rather special case them here.
- */
- if (unlikely(nvmeq->qid == 0 &&
- cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
- nvme_complete_async_event(&nvmeq->dev->ctrl,
- cqe.status, &cqe.result);
- continue;
- }
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
+ struct nvme_completion *cqe)
+{
+ struct request *req;
- req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
- nvme_end_request(req, cqe.status, cqe.result);
+ if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+ dev_warn(nvmeq->dev->ctrl.device,
+ "invalid id %d completed on queue %d\n",
+ cqe->command_id, le16_to_cpu(cqe->sq_id));
+ return;
}
- if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
+ /*
+ * AEN requests are special as they don't time out and can
+ * survive any kind of queue freeze and often don't respond to
+ * aborts. We don't even bother to allocate a struct request
+ * for them but rather special case them here.
+ */
+ if (unlikely(nvmeq->qid == 0 &&
+ cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+ nvme_complete_async_event(&nvmeq->dev->ctrl,
+ cqe->status, &cqe->result);
return;
+ }
- if (likely(nvmeq->cq_vector >= 0))
- if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
- nvmeq->dbbuf_cq_ei))
- writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
- nvmeq->cq_head = head;
- nvmeq->cq_phase = phase;
+ req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+ nvme_end_request(req, cqe->status, cqe->result);
+}
- nvmeq->cqe_seen = 1;
+static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
+ struct nvme_completion *cqe)
+{
+ if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
+ *cqe = nvmeq->cqes[nvmeq->cq_head];
+
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = !nvmeq->cq_phase;
+ }
+ return true;
+ }
+ return false;
}
static void nvme_process_cq(struct nvme_queue *nvmeq)
{
- __nvme_process_cq(nvmeq, NULL);
+ struct nvme_completion cqe;
+ int consumed = 0;
+
+ while (nvme_read_cqe(nvmeq, &cqe)) {
+ nvme_handle_cqe(nvmeq, &cqe);
+ consumed++;
+ }
+
+ if (consumed) {
+ nvme_ring_cq_doorbell(nvmeq);
+ nvmeq->cqe_seen = 1;
+ }
}
static irqreturn_t nvme_irq(int irq, void *data)
@@ -842,16 +822,28 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
- spin_lock_irq(&nvmeq->q_lock);
- __nvme_process_cq(nvmeq, &tag);
- spin_unlock_irq(&nvmeq->q_lock);
+ struct nvme_completion cqe;
+ int found = 0, consumed = 0;
- if (tag == -1)
- return 1;
- }
+ if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+ return 0;
- return 0;
+ spin_lock_irq(&nvmeq->q_lock);
+ while (nvme_read_cqe(nvmeq, &cqe)) {
+ nvme_handle_cqe(nvmeq, &cqe);
+ consumed++;
+
+ if (tag == cqe.command_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (consumed)
+ nvme_ring_cq_doorbell(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
+
+ return found;
}
static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -939,7 +931,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}
-static void abort_endio(struct request *req, int error)
+static void abort_endio(struct request *req, blk_status_t error)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = iod->nvmeq;
@@ -950,6 +942,51 @@ static void abort_endio(struct request *req, int error)
blk_mq_free_request(req);
}
+static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
+{
+
+ /* If true, indicates loss of adapter communication, possibly by a
+ * NVMe Subsystem reset.
+ */
+ bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
+
+ /* If there is a reset ongoing, we shouldn't reset again. */
+ if (dev->ctrl.state == NVME_CTRL_RESETTING)
+ return false;
+
+ /* We shouldn't reset unless the controller is on fatal error state
+ * _or_ if we lost the communication with it.
+ */
+ if (!(csts & NVME_CSTS_CFS) && !nssro)
+ return false;
+
+ /* If PCI error recovery process is happening, we cannot reset or
+ * the recovery mechanism will surely fail.
+ */
+ if (pci_channel_offline(to_pci_dev(dev->dev)))
+ return false;
+
+ return true;
+}
+
+static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
+{
+ /* Read a config register to help see what died. */
+ u16 pci_status;
+ int result;
+
+ result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
+ &pci_status);
+ if (result == PCIBIOS_SUCCESSFUL)
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
+ csts, pci_status);
+ else
+ dev_warn(dev->ctrl.device,
+ "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
+ csts, result);
+}
+
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -957,6 +994,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
struct nvme_command cmd;
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ /*
+ * Reset immediately if the controller is failed
+ */
+ if (nvme_should_reset(dev, csts)) {
+ nvme_warn_reset(dev, csts);
+ nvme_dev_disable(dev, false);
+ nvme_reset_ctrl(&dev->ctrl);
+ return BLK_EH_HANDLED;
+ }
/*
* Did we miss an interrupt?
@@ -993,7 +1041,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
/*
* Mark the request as handled, since the inline shutdown
@@ -1247,7 +1295,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
.complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
- .init_request = nvme_admin_init_request,
+ .init_request = nvme_init_request,
.timeout = nvme_timeout,
};
@@ -1311,6 +1359,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return 0;
}
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+ return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (size <= dev->bar_mapped_size)
+ return 0;
+ if (size > pci_resource_len(pdev, 0))
+ return -ENOMEM;
+ if (dev->bar)
+ iounmap(dev->bar);
+ dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+ if (!dev->bar) {
+ dev->bar_mapped_size = 0;
+ return -ENOMEM;
+ }
+ dev->bar_mapped_size = size;
+ dev->dbs = dev->bar + NVME_REG_DBS;
+
+ return 0;
+}
+
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
@@ -1318,6 +1392,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
+ result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+ if (result < 0)
+ return result;
+
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
NVME_CAP_NSSRC(cap) : 0;
@@ -1358,66 +1436,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
}
-static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
-{
-
- /* If true, indicates loss of adapter communication, possibly by a
- * NVMe Subsystem reset.
- */
- bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
-
- /* If there is a reset ongoing, we shouldn't reset again. */
- if (dev->ctrl.state == NVME_CTRL_RESETTING)
- return false;
-
- /* We shouldn't reset unless the controller is on fatal error state
- * _or_ if we lost the communication with it.
- */
- if (!(csts & NVME_CSTS_CFS) && !nssro)
- return false;
-
- /* If PCI error recovery process is happening, we cannot reset or
- * the recovery mechanism will surely fail.
- */
- if (pci_channel_offline(to_pci_dev(dev->dev)))
- return false;
-
- return true;
-}
-
-static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
-{
- /* Read a config register to help see what died. */
- u16 pci_status;
- int result;
-
- result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
- &pci_status);
- if (result == PCIBIOS_SUCCESSFUL)
- dev_warn(dev->ctrl.device,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
- csts, pci_status);
- else
- dev_warn(dev->ctrl.device,
- "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
- csts, result);
-}
-
-static void nvme_watchdog_timer(unsigned long data)
-{
- struct nvme_dev *dev = (struct nvme_dev *)data;
- u32 csts = readl(dev->bar + NVME_REG_CSTS);
-
- /* Skip controllers under certain specific conditions. */
- if (nvme_should_reset(dev, csts)) {
- if (!nvme_reset(dev))
- nvme_warn_reset(dev, csts);
- return;
- }
-
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-}
-
static int nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i, max;
@@ -1514,16 +1532,168 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
}
}
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+{
+ size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
+ struct nvme_command c;
+ u64 dma_addr;
+ int ret;
+
+ dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev->dev, dma_addr))
+ return -ENOMEM;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_set_features;
+ c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
+ c.features.dword11 = cpu_to_le32(bits);
+ c.features.dword12 = cpu_to_le32(dev->host_mem_size >>
+ ilog2(dev->ctrl.page_size));
+ c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr));
+ c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
+ c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
+
+ ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
+ if (ret) {
+ dev_warn(dev->ctrl.device,
+ "failed to set host mem (err %d, flags %#x).\n",
+ ret, bits);
+ }
+ dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
+ return ret;
+}
+
+static void nvme_free_host_mem(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->nr_host_mem_descs; i++) {
+ struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
+ size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
+
+ dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
+ le64_to_cpu(desc->addr));
+ }
+
+ kfree(dev->host_mem_desc_bufs);
+ dev->host_mem_desc_bufs = NULL;
+ kfree(dev->host_mem_descs);
+ dev->host_mem_descs = NULL;
+}
+
+static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
- return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
+ struct nvme_host_mem_buf_desc *descs;
+ u32 chunk_size, max_entries, i = 0;
+ void **bufs;
+ u64 size, tmp;
+
+ /* start big and work our way down */
+ chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
+retry:
+ tmp = (preferred + chunk_size - 1);
+ do_div(tmp, chunk_size);
+ max_entries = tmp;
+ descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
+ if (!descs)
+ goto out;
+
+ bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
+ if (!bufs)
+ goto out_free_descs;
+
+ for (size = 0; size < preferred; size += chunk_size) {
+ u32 len = min_t(u64, chunk_size, preferred - size);
+ dma_addr_t dma_addr;
+
+ bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
+ DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
+ if (!bufs[i])
+ break;
+
+ descs[i].addr = cpu_to_le64(dma_addr);
+ descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
+ i++;
+ }
+
+ if (!size || (min && size < min)) {
+ dev_warn(dev->ctrl.device,
+ "failed to allocate host memory buffer.\n");
+ goto out_free_bufs;
+ }
+
+ dev_info(dev->ctrl.device,
+ "allocated %lld MiB host memory buffer.\n",
+ size >> ilog2(SZ_1M));
+ dev->nr_host_mem_descs = i;
+ dev->host_mem_size = size;
+ dev->host_mem_descs = descs;
+ dev->host_mem_desc_bufs = bufs;
+ return 0;
+
+out_free_bufs:
+ while (--i >= 0) {
+ size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
+
+ dma_free_coherent(dev->dev, size, bufs[i],
+ le64_to_cpu(descs[i].addr));
+ }
+
+ kfree(bufs);
+out_free_descs:
+ kfree(descs);
+out:
+ /* try a smaller chunk size if we failed early */
+ if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
+ chunk_size /= 2;
+ goto retry;
+ }
+ dev->host_mem_descs = NULL;
+ return -ENOMEM;
+}
+
+static void nvme_setup_host_mem(struct nvme_dev *dev)
+{
+ u64 max = (u64)max_host_mem_size_mb * SZ_1M;
+ u64 preferred = (u64)dev->ctrl.hmpre * 4096;
+ u64 min = (u64)dev->ctrl.hmmin * 4096;
+ u32 enable_bits = NVME_HOST_MEM_ENABLE;
+
+ preferred = min(preferred, max);
+ if (min > max) {
+ dev_warn(dev->ctrl.device,
+ "min host memory (%lld MiB) above limit (%d MiB).\n",
+ min >> ilog2(SZ_1M), max_host_mem_size_mb);
+ nvme_free_host_mem(dev);
+ return;
+ }
+
+ /*
+ * If we already have a buffer allocated check if we can reuse it.
+ */
+ if (dev->host_mem_descs) {
+ if (dev->host_mem_size >= min)
+ enable_bits |= NVME_HOST_MEM_RETURN;
+ else
+ nvme_free_host_mem(dev);
+ }
+
+ if (!dev->host_mem_descs) {
+ if (nvme_alloc_host_mem(dev, min, preferred))
+ return;
+ }
+
+ if (nvme_set_host_mem(dev, enable_bits))
+ nvme_free_host_mem(dev);
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int result, nr_io_queues, size;
+ int result, nr_io_queues;
+ unsigned long size;
nr_io_queues = num_online_cpus();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1542,20 +1712,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
nvme_release_cmb(dev);
}
- size = db_bar_size(dev, nr_io_queues);
- if (size > 8192) {
- iounmap(dev->bar);
- do {
- dev->bar = ioremap(pci_resource_start(pdev, 0), size);
- if (dev->bar)
- break;
- if (!--nr_io_queues)
- return -ENOMEM;
- size = db_bar_size(dev, nr_io_queues);
- } while (1);
- dev->dbs = dev->bar + 4096;
- adminq->q_db = dev->dbs;
- }
+ do {
+ size = db_bar_size(dev, nr_io_queues);
+ result = nvme_remap_bar(dev, size);
+ if (!result)
+ break;
+ if (!--nr_io_queues)
+ return -ENOMEM;
+ } while (1);
+ adminq->q_db = dev->dbs;
/* Deregister the admin queue's interrupt */
pci_free_irq(pdev, 0, adminq);
@@ -1586,7 +1751,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return nvme_create_io_queues(dev);
}
-static void nvme_del_queue_end(struct request *req, int error)
+static void nvme_del_queue_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
@@ -1594,7 +1759,7 @@ static void nvme_del_queue_end(struct request *req, int error)
complete(&nvmeq->dev->ioq_wait);
}
-static void nvme_del_cq_end(struct request *req, int error)
+static void nvme_del_cq_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
@@ -1799,8 +1964,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
- del_timer_sync(&dev->watchdog_timer);
-
mutex_lock(&dev->shutdown_lock);
if (pci_is_enabled(pdev)) {
u32 csts = readl(dev->bar + NVME_REG_CSTS);
@@ -1816,8 +1979,20 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
* Give the controller a chance to complete all entered requests if
* doing a safe shutdown.
*/
- if (!dead && shutdown)
- nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+ if (!dead) {
+ if (shutdown)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+
+ /*
+ * If the controller is still alive tell it to stop using the
+ * host memory buffer. In theory the shutdown / reset should
+ * make sure that it doesn't access the host memoery anymore,
+ * but I'd rather be safe than sorry..
+ */
+ if (dev->host_mem_descs)
+ nvme_set_host_mem(dev, 0);
+
+ }
nvme_stop_queues(&dev->ctrl);
queues = dev->online_queues - 1;
@@ -1900,7 +2075,8 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
static void nvme_reset_work(struct work_struct *work)
{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+ struct nvme_dev *dev =
+ container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV;
@@ -1949,6 +2125,9 @@ static void nvme_reset_work(struct work_struct *work)
"unable to allocate dma for dbbuf\n");
}
+ if (dev->ctrl.hmpre)
+ nvme_setup_host_mem(dev);
+
result = nvme_setup_io_queues(dev);
if (result)
goto out;
@@ -1962,8 +2141,6 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->online_queues > 1)
nvme_queue_async_events(&dev->ctrl);
- mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
-
/*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
@@ -2003,17 +2180,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
nvme_put_ctrl(&dev->ctrl);
}
-static int nvme_reset(struct nvme_dev *dev)
-{
- if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
- return -ENODEV;
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
- if (!queue_work(nvme_workq, &dev->reset_work))
- return -EBUSY;
- return 0;
-}
-
static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
{
*val = readl(to_nvme_dev(ctrl)->bar + off);
@@ -2032,16 +2198,6 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
return 0;
}
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
-{
- struct nvme_dev *dev = to_nvme_dev(ctrl);
- int ret = nvme_reset(dev);
-
- if (!ret)
- flush_work(&dev->reset_work);
- return ret;
-}
-
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
@@ -2049,7 +2205,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
- .reset_ctrl = nvme_pci_reset_ctrl,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
};
@@ -2061,8 +2216,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (pci_request_mem_regions(pdev, "nvme"))
return -ENODEV;
- dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
- if (!dev->bar)
+ if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
goto release;
return 0;
@@ -2116,10 +2270,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result)
goto free;
- INIT_WORK(&dev->reset_work, nvme_reset_work);
+ INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
- setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
- (unsigned long)dev);
mutex_init(&dev->shutdown_lock);
init_completion(&dev->ioq_wait);
@@ -2137,7 +2289,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- queue_work(nvme_workq, &dev->reset_work);
+ queue_work(nvme_wq, &dev->ctrl.reset_work);
return 0;
release_pools:
@@ -2158,7 +2310,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
if (prepare)
nvme_dev_disable(dev, false);
else
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
}
static void nvme_shutdown(struct pci_dev *pdev)
@@ -2178,7 +2330,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
- cancel_work_sync(&dev->reset_work);
+ cancel_work_sync(&dev->ctrl.reset_work);
pci_set_drvdata(pdev, NULL);
if (!pci_device_is_present(pdev)) {
@@ -2186,9 +2338,10 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_dev_disable(dev, false);
}
- flush_work(&dev->reset_work);
+ flush_work(&dev->ctrl.reset_work);
nvme_uninit_ctrl(&dev->ctrl);
nvme_dev_disable(dev, true);
+ nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0);
nvme_release_prp_pools(dev);
@@ -2229,7 +2382,7 @@ static int nvme_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_reset(ndev);
+ nvme_reset_ctrl(&ndev->ctrl);
return 0;
}
#endif
@@ -2268,7 +2421,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
- nvme_reset(dev);
+ nvme_reset_ctrl(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}
@@ -2324,22 +2477,12 @@ static struct pci_driver nvme_driver = {
static int __init nvme_init(void)
{
- int result;
-
- nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
- if (!nvme_workq)
- return -ENOMEM;
-
- result = pci_register_driver(&nvme_driver);
- if (result)
- destroy_workqueue(nvme_workq);
- return result;
+ return pci_register_driver(&nvme_driver);
}
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
- destroy_workqueue(nvme_workq);
_nvme_check_size();
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 24397d3..6d4119d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -48,7 +48,7 @@
*/
#define NVME_RDMA_NR_AEN_COMMANDS 1
#define NVME_RDMA_AQ_BLKMQ_DEPTH \
- (NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
struct nvme_rdma_device {
struct ib_device *dev;
@@ -80,10 +80,8 @@ struct nvme_rdma_request {
};
enum nvme_rdma_queue_flags {
- NVME_RDMA_Q_CONNECTED = (1 << 0),
- NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
- NVME_RDMA_Q_DELETING = (1 << 2),
- NVME_RDMA_Q_LIVE = (1 << 3),
+ NVME_RDMA_Q_LIVE = 0,
+ NVME_RDMA_Q_DELETING = 1,
};
struct nvme_rdma_queue {
@@ -103,9 +101,6 @@ struct nvme_rdma_queue {
};
struct nvme_rdma_ctrl {
- /* read and written in the hot path */
- spinlock_t lock;
-
/* read only in the hot path */
struct nvme_rdma_queue *queues;
u32 queue_count;
@@ -113,7 +108,6 @@ struct nvme_rdma_ctrl {
/* other member variables */
struct blk_mq_tag_set tag_set;
struct work_struct delete_work;
- struct work_struct reset_work;
struct work_struct err_work;
struct nvme_rdma_qe async_event_sqe;
@@ -145,8 +139,6 @@ static DEFINE_MUTEX(device_list_mutex);
static LIST_HEAD(nvme_rdma_ctrl_list);
static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
-static struct workqueue_struct *nvme_rdma_wq;
-
/*
* Disabling this option makes small I/O goes faster, but is fundamentally
* unsafe. With it turned off we will have to register a global rkey that
@@ -301,10 +293,12 @@ out:
return ret;
}
-static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
- struct request *rq, unsigned int queue_idx)
+static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
+ struct request *rq, unsigned int hctx_idx)
{
+ struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
@@ -315,22 +309,13 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
DMA_TO_DEVICE);
}
-static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx)
-{
- return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx)
-{
- return __nvme_rdma_exit_request(set->driver_data, rq, 0);
-}
-
-static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
- struct request *rq, unsigned int queue_idx)
+static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
+ struct request *rq, unsigned int hctx_idx,
+ unsigned int numa_node)
{
+ struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
+ int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
@@ -358,20 +343,6 @@ out_free_qe:
return -ENOMEM;
}
-static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1);
-}
-
-static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set,
- struct request *rq, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return __nvme_rdma_init_request(set->driver_data, rq, 0);
-}
-
static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -469,9 +440,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
struct nvme_rdma_device *dev;
struct ib_device *ibdev;
- if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
- return;
-
dev = queue->device;
ibdev = dev->dev;
rdma_destroy_qp(queue->cm_id);
@@ -483,17 +451,21 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
nvme_rdma_dev_put(dev);
}
-static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
- struct nvme_rdma_device *dev)
+static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
{
- struct ib_device *ibdev = dev->dev;
+ struct ib_device *ibdev;
const int send_wr_factor = 3; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1; /* + RECV */
int comp_vector, idx = nvme_rdma_queue_idx(queue);
-
int ret;
- queue->device = dev;
+ queue->device = nvme_rdma_find_get_device(queue->cm_id);
+ if (!queue->device) {
+ dev_err(queue->cm_id->device->dev.parent,
+ "no client data found!\n");
+ return -ECONNREFUSED;
+ }
+ ibdev = queue->device->dev;
/*
* The admin queue is barely used once the controller is live, so don't
@@ -506,12 +478,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
/* +1 for ib_stop_cq */
- queue->ib_cq = ib_alloc_cq(dev->dev, queue,
- cq_factor * queue->queue_size + 1, comp_vector,
- IB_POLL_SOFTIRQ);
+ queue->ib_cq = ib_alloc_cq(ibdev, queue,
+ cq_factor * queue->queue_size + 1,
+ comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(queue->ib_cq)) {
ret = PTR_ERR(queue->ib_cq);
- goto out;
+ goto out_put_dev;
}
ret = nvme_rdma_create_qp(queue, send_wr_factor);
@@ -524,7 +496,6 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
ret = -ENOMEM;
goto out_destroy_qp;
}
- set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
return 0;
@@ -532,7 +503,8 @@ out_destroy_qp:
ib_destroy_qp(queue->qp);
out_destroy_ib_cq:
ib_free_cq(queue->ib_cq);
-out:
+out_put_dev:
+ nvme_rdma_dev_put(queue->device);
return ret;
}
@@ -583,12 +555,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
}
clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
- set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
return 0;
out_destroy_cm_id:
- nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
return ret;
}
@@ -718,11 +688,11 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
if (nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
- queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
+ queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->ctrl.device, "Removing controller...\n");
- queue_work(nvme_rdma_wq, &ctrl->delete_work);
+ queue_work(nvme_wq, &ctrl->delete_work);
}
}
@@ -733,7 +703,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
bool changed;
int ret;
- ++ctrl->ctrl.opts->nr_reconnects;
+ ++ctrl->ctrl.nr_reconnects;
if (ctrl->queue_count > 1) {
nvme_rdma_free_io_queues(ctrl);
@@ -749,7 +719,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (ret)
goto requeue;
- ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+ ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
if (ret)
goto requeue;
@@ -777,7 +747,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
- ctrl->ctrl.opts->nr_reconnects = 0;
+ ctrl->ctrl.nr_reconnects = 0;
if (ctrl->queue_count > 1) {
nvme_queue_scan(&ctrl->ctrl);
@@ -790,7 +760,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
- ctrl->ctrl.opts->nr_reconnects);
+ ctrl->ctrl.nr_reconnects);
nvme_rdma_reconnect_or_remove(ctrl);
}
@@ -802,10 +772,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
nvme_stop_keep_alive(&ctrl->ctrl);
- for (i = 0; i < ctrl->queue_count; i++) {
- clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+ for (i = 0; i < ctrl->queue_count; i++)
clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
- }
if (ctrl->queue_count > 1)
nvme_stop_queues(&ctrl->ctrl);
@@ -833,7 +801,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
return;
- queue_work(nvme_rdma_wq, &ctrl->err_work);
+ queue_work(nvme_wq, &ctrl->err_work);
}
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -1278,21 +1246,11 @@ static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
{
- struct nvme_rdma_device *dev;
int ret;
- dev = nvme_rdma_find_get_device(queue->cm_id);
- if (!dev) {
- dev_err(queue->cm_id->device->dev.parent,
- "no client data found!\n");
- return -ECONNREFUSED;
- }
-
- ret = nvme_rdma_create_queue_ib(queue, dev);
- if (ret) {
- nvme_rdma_dev_put(dev);
- goto out;
- }
+ ret = nvme_rdma_create_queue_ib(queue);
+ if (ret)
+ return ret;
ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
if (ret) {
@@ -1306,7 +1264,6 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
out_destroy_queue:
nvme_rdma_destroy_queue_ib(queue);
-out:
return ret;
}
@@ -1334,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
* specified by the Fabrics standard.
*/
if (priv.qid == 0) {
- priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
- priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+ priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
+ priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
} else {
/*
* current interpretation of the fabrics spec
@@ -1383,12 +1340,14 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
complete(&queue->cm_done);
return 0;
case RDMA_CM_EVENT_REJECTED:
+ nvme_rdma_destroy_queue_ib(queue);
cm_error = nvme_rdma_conn_rejected(queue, ev);
break;
- case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
+ nvme_rdma_destroy_queue_ib(queue);
+ case RDMA_CM_EVENT_ADDR_ERROR:
dev_dbg(queue->ctrl->ctrl.device,
"CM error event %d\n", ev->event);
cm_error = -ECONNRESET;
@@ -1435,8 +1394,8 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
/*
* We cannot accept any other command until the Connect command has completed.
*/
-static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
- struct request *rq)
+static inline blk_status_t
+nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
{
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
struct nvme_command *cmd = nvme_req(rq)->cmd;
@@ -1452,16 +1411,15 @@ static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
* failover.
*/
if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
- return -EIO;
- else
- return -EAGAIN;
+ return BLK_STS_IOERR;
+ return BLK_STS_RESOURCE; /* try again later */
}
}
return 0;
}
-static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1472,28 +1430,29 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_command *c = sqe->data;
bool flush = false;
struct ib_device *dev;
- int ret;
+ blk_status_t ret;
+ int err;
WARN_ON_ONCE(rq->tag < 0);
ret = nvme_rdma_queue_is_ready(queue, rq);
if (unlikely(ret))
- goto err;
+ return ret;
dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE);
ret = nvme_setup_cmd(ns, rq, c);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
blk_mq_start_request(rq);
- ret = nvme_rdma_map_data(queue, rq, c);
- if (ret < 0) {
+ err = nvme_rdma_map_data(queue, rq, c);
+ if (err < 0) {
dev_err(queue->ctrl->ctrl.device,
- "Failed to map data (%d)\n", ret);
+ "Failed to map data (%d)\n", err);
nvme_cleanup_cmd(rq);
goto err;
}
@@ -1503,17 +1462,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
if (req_op(rq) == REQ_OP_FLUSH)
flush = true;
- ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
+ err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
- if (ret) {
+ if (err) {
nvme_rdma_unmap_data(queue, rq);
goto err;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
err:
- return (ret == -ENOMEM || ret == -EAGAIN) ?
- BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR;
+ if (err == -ENOMEM || err == -EAGAIN)
+ return BLK_STS_RESOURCE;
+ return BLK_STS_IOERR;
}
static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -1523,7 +1483,6 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
struct ib_wc wc;
int found = 0;
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
while (ib_poll_cq(cq, 1, &wc) > 0) {
struct ib_cqe *cqe = wc.wr_cqe;
@@ -1560,8 +1519,8 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
.queue_rq = nvme_rdma_queue_rq,
.complete = nvme_rdma_complete_rq,
- .init_request = nvme_rdma_init_admin_request,
- .exit_request = nvme_rdma_exit_admin_request,
+ .init_request = nvme_rdma_init_request,
+ .exit_request = nvme_rdma_exit_request,
.reinit_request = nvme_rdma_reinit_request,
.init_hctx = nvme_rdma_init_admin_hctx,
.timeout = nvme_rdma_timeout,
@@ -1571,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
{
int error;
- error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH);
+ error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
if (error)
return error;
@@ -1672,7 +1631,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
nvme_rdma_free_io_queues(ctrl);
}
- if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
+ if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
nvme_shutdown_ctrl(&ctrl->ctrl);
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1709,7 +1668,7 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY;
- if (!queue_work(nvme_rdma_wq, &ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
@@ -1743,8 +1702,8 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
{
- struct nvme_rdma_ctrl *ctrl = container_of(work,
- struct nvme_rdma_ctrl, reset_work);
+ struct nvme_rdma_ctrl *ctrl =
+ container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
int ret;
bool changed;
@@ -1785,22 +1744,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
del_dead_ctrl:
/* Deleting this dead controller... */
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
- WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work));
-}
-
-static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
+ WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1810,11 +1754,9 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_rdma_reset_ctrl,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_del_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
.get_address = nvmf_get_address,
};
@@ -1919,8 +1861,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
nvme_rdma_reconnect_ctrl_work);
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work);
- spin_lock_init(&ctrl->lock);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -1939,12 +1880,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
/* sanity check icdoff */
if (ctrl->ctrl.icdoff) {
dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
+ ret = -EINVAL;
goto out_remove_admin_queue;
}
/* sanity check keyed sgls */
if (!(ctrl->ctrl.sgls & (1 << 20))) {
dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
+ ret = -EINVAL;
goto out_remove_admin_queue;
}
@@ -2033,7 +1976,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
}
mutex_unlock(&nvme_rdma_ctrl_mutex);
- flush_workqueue(nvme_rdma_wq);
+ flush_workqueue(nvme_wq);
}
static struct ib_client nvme_rdma_ib_client = {
@@ -2046,13 +1989,9 @@ static int __init nvme_rdma_init_module(void)
{
int ret;
- nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
- if (!nvme_rdma_wq)
- return -ENOMEM;
-
ret = ib_register_client(&nvme_rdma_ib_client);
if (ret)
- goto err_destroy_wq;
+ return ret;
ret = nvmf_register_transport(&nvme_rdma_transport);
if (ret)
@@ -2062,8 +2001,6 @@ static int __init nvme_rdma_init_module(void)
err_unreg_client:
ib_unregister_client(&nvme_rdma_ib_client);
-err_destroy_wq:
- destroy_workqueue(nvme_rdma_wq);
return ret;
}
@@ -2071,7 +2008,6 @@ static void __exit nvme_rdma_cleanup_module(void)
{
nvmf_unregister_transport(&nvme_rdma_transport);
ib_unregister_client(&nvme_rdma_ib_client);
- destroy_workqueue(nvme_rdma_wq);
}
module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
deleted file mode 100644
index 1f7671e..0000000
--- a/drivers/nvme/host/scsi.c
+++ /dev/null
@@ -1,2460 +0,0 @@
-/*
- * NVM Express device driver
- * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-/*
- * Refer to the SCSI-NVMe Translation spec for details on how
- * each command is translated.
- */
-
-#include <linux/bio.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-#include <linux/compat.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/kdev_t.h>
-#include <linux/kthread.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/poison.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <asm/unaligned.h>
-#include <scsi/sg.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_request.h>
-
-#include "nvme.h"
-
-static int sg_version_num = 30534; /* 2 digits for each component */
-
-/* VPD Page Codes */
-#define VPD_SUPPORTED_PAGES 0x00
-#define VPD_SERIAL_NUMBER 0x80
-#define VPD_DEVICE_IDENTIFIERS 0x83
-#define VPD_EXTENDED_INQUIRY 0x86
-#define VPD_BLOCK_LIMITS 0xB0
-#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1
-
-/* format unit paramter list offsets */
-#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4
-#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8
-#define FORMAT_UNIT_PROT_INT_OFFSET 3
-#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0
-#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07
-
-/* Misc. defines */
-#define FIXED_SENSE_DATA 0x70
-#define DESC_FORMAT_SENSE_DATA 0x72
-#define FIXED_SENSE_DATA_ADD_LENGTH 10
-#define LUN_ENTRY_SIZE 8
-#define LUN_DATA_HEADER_SIZE 8
-#define ALL_LUNS_RETURNED 0x02
-#define ALL_WELL_KNOWN_LUNS_RETURNED 0x01
-#define RESTRICTED_LUNS_RETURNED 0x00
-#define DOWNLOAD_SAVE_ACTIVATE 0x05
-#define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E
-#define ACTIVATE_DEFERRED_MICROCODE 0x0F
-#define FORMAT_UNIT_IMMED_MASK 0x2
-#define FORMAT_UNIT_IMMED_OFFSET 1
-#define KELVIN_TEMP_FACTOR 273
-#define FIXED_FMT_SENSE_DATA_SIZE 18
-#define DESC_FMT_SENSE_DATA_SIZE 8
-
-/* SCSI/NVMe defines and bit masks */
-#define INQ_STANDARD_INQUIRY_PAGE 0x00
-#define INQ_SUPPORTED_VPD_PAGES_PAGE 0x00
-#define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80
-#define INQ_DEVICE_IDENTIFICATION_PAGE 0x83
-#define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86
-#define INQ_BDEV_LIMITS_PAGE 0xB0
-#define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1
-#define INQ_SERIAL_NUMBER_LENGTH 0x14
-#define INQ_NUM_SUPPORTED_VPD_PAGES 6
-#define VERSION_SPC_4 0x06
-#define ACA_UNSUPPORTED 0
-#define STANDARD_INQUIRY_LENGTH 36
-#define ADDITIONAL_STD_INQ_LENGTH 31
-#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C
-#define RESERVED_FIELD 0
-
-/* Mode Sense/Select defines */
-#define MODE_PAGE_INFO_EXCEP 0x1C
-#define MODE_PAGE_CACHING 0x08
-#define MODE_PAGE_CONTROL 0x0A
-#define MODE_PAGE_POWER_CONDITION 0x1A
-#define MODE_PAGE_RETURN_ALL 0x3F
-#define MODE_PAGE_BLK_DES_LEN 0x08
-#define MODE_PAGE_LLBAA_BLK_DES_LEN 0x10
-#define MODE_PAGE_CACHING_LEN 0x14
-#define MODE_PAGE_CONTROL_LEN 0x0C
-#define MODE_PAGE_POW_CND_LEN 0x28
-#define MODE_PAGE_INF_EXC_LEN 0x0C
-#define MODE_PAGE_ALL_LEN 0x54
-#define MODE_SENSE6_MPH_SIZE 4
-#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0
-#define MODE_SENSE_PAGE_CODE_OFFSET 2
-#define MODE_SENSE_PAGE_CODE_MASK 0x3F
-#define MODE_SENSE_LLBAA_MASK 0x10
-#define MODE_SENSE_LLBAA_SHIFT 4
-#define MODE_SENSE_DBD_MASK 8
-#define MODE_SENSE_DBD_SHIFT 3
-#define MODE_SENSE10_MPH_SIZE 8
-#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10
-#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1
-#define MODE_SELECT_6_BD_OFFSET 3
-#define MODE_SELECT_10_BD_OFFSET 6
-#define MODE_SELECT_10_LLBAA_OFFSET 4
-#define MODE_SELECT_10_LLBAA_MASK 1
-#define MODE_SELECT_6_MPH_SIZE 4
-#define MODE_SELECT_10_MPH_SIZE 8
-#define CACHING_MODE_PAGE_WCE_MASK 0x04
-#define MODE_SENSE_BLK_DESC_ENABLED 0
-#define MODE_SENSE_BLK_DESC_COUNT 1
-#define MODE_SELECT_PAGE_CODE_MASK 0x3F
-#define SHORT_DESC_BLOCK 8
-#define LONG_DESC_BLOCK 16
-#define MODE_PAGE_POW_CND_LEN_FIELD 0x26
-#define MODE_PAGE_INF_EXC_LEN_FIELD 0x0A
-#define MODE_PAGE_CACHING_LEN_FIELD 0x12
-#define MODE_PAGE_CONTROL_LEN_FIELD 0x0A
-#define MODE_SENSE_PC_CURRENT_VALUES 0
-
-/* Log Sense defines */
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE 0x00
-#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07
-#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F
-#define LOG_PAGE_TEMPERATURE_PAGE 0x0D
-#define LOG_SENSE_CDB_SP_NOT_ENABLED 0
-#define LOG_SENSE_CDB_PC_MASK 0xC0
-#define LOG_SENSE_CDB_PC_SHIFT 6
-#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1
-#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F
-#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8
-#define LOG_INFO_EXCP_PAGE_LENGTH 0xC
-#define REMAINING_TEMP_PAGE_LENGTH 0xC
-#define LOG_TEMP_PAGE_LENGTH 0x10
-#define LOG_TEMP_UNKNOWN 0xFF
-#define SUPPORTED_LOG_PAGES_PAGE_LENGTH 0x3
-
-/* Read Capacity defines */
-#define READ_CAP_10_RESP_SIZE 8
-#define READ_CAP_16_RESP_SIZE 32
-
-/* NVMe Namespace and Command Defines */
-#define BYTES_TO_DWORDS 4
-#define NVME_MAX_FIRMWARE_SLOT 7
-
-/* Report LUNs defines */
-#define REPORT_LUNS_FIRST_LUN_OFFSET 8
-
-/* SCSI ADDITIONAL SENSE Codes */
-
-#define SCSI_ASC_NO_SENSE 0x00
-#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT 0x03
-#define SCSI_ASC_LUN_NOT_READY 0x04
-#define SCSI_ASC_WARNING 0x0B
-#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED 0x10
-#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED 0x10
-#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED 0x10
-#define SCSI_ASC_UNRECOVERED_READ_ERROR 0x11
-#define SCSI_ASC_MISCOMPARE_DURING_VERIFY 0x1D
-#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID 0x20
-#define SCSI_ASC_ILLEGAL_COMMAND 0x20
-#define SCSI_ASC_ILLEGAL_BLOCK 0x21
-#define SCSI_ASC_INVALID_CDB 0x24
-#define SCSI_ASC_INVALID_LUN 0x25
-#define SCSI_ASC_INVALID_PARAMETER 0x26
-#define SCSI_ASC_FORMAT_COMMAND_FAILED 0x31
-#define SCSI_ASC_INTERNAL_TARGET_FAILURE 0x44
-
-/* SCSI ADDITIONAL SENSE Code Qualifiers */
-
-#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE 0x00
-#define SCSI_ASCQ_FORMAT_COMMAND_FAILED 0x01
-#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED 0x01
-#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED 0x02
-#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED 0x03
-#define SCSI_ASCQ_FORMAT_IN_PROGRESS 0x04
-#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08
-#define SCSI_ASCQ_INVALID_LUN_ID 0x09
-
-/* copied from drivers/usb/gadget/function/storage_common.h */
-static inline u32 get_unaligned_be24(u8 *buf)
-{
- return 0xffffff & (u32) get_unaligned_be32(buf - 1);
-}
-
-/* Struct to gather data that needs to be extracted from a SCSI CDB.
- Not conforming to any particular CDB variant, but compatible with all. */
-
-struct nvme_trans_io_cdb {
- u8 fua;
- u8 prot_info;
- u64 lba;
- u32 xfer_len;
-};
-
-
-/* Internal Helper Functions */
-
-
-/* Copy data to userspace memory */
-
-static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
- unsigned long n)
-{
- int i;
- void *index = from;
- size_t remaining = n;
- size_t xfer_len;
-
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- for (i = 0; i < hdr->iovec_count; i++) {
- if (copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec)))
- return -EFAULT;
- xfer_len = min(remaining, sgl.iov_len);
- if (copy_to_user(sgl.iov_base, index, xfer_len))
- return -EFAULT;
-
- index += xfer_len;
- remaining -= xfer_len;
- if (remaining == 0)
- break;
- }
- return 0;
- }
-
- if (copy_to_user(hdr->dxferp, from, n))
- return -EFAULT;
- return 0;
-}
-
-/* Copy data from userspace memory */
-
-static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
- unsigned long n)
-{
- int i;
- void *index = to;
- size_t remaining = n;
- size_t xfer_len;
-
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- for (i = 0; i < hdr->iovec_count; i++) {
- if (copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec)))
- return -EFAULT;
- xfer_len = min(remaining, sgl.iov_len);
- if (copy_from_user(index, sgl.iov_base, xfer_len))
- return -EFAULT;
- index += xfer_len;
- remaining -= xfer_len;
- if (remaining == 0)
- break;
- }
- return 0;
- }
-
- if (copy_from_user(to, hdr->dxferp, n))
- return -EFAULT;
- return 0;
-}
-
-/* Status/Sense Buffer Writeback */
-
-static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
- u8 asc, u8 ascq)
-{
- u8 xfer_len;
- u8 resp[DESC_FMT_SENSE_DATA_SIZE];
-
- if (scsi_status_is_good(status)) {
- hdr->status = SAM_STAT_GOOD;
- hdr->masked_status = GOOD;
- hdr->host_status = DID_OK;
- hdr->driver_status = DRIVER_OK;
- hdr->sb_len_wr = 0;
- } else {
- hdr->status = status;
- hdr->masked_status = status >> 1;
- hdr->host_status = DID_OK;
- hdr->driver_status = DRIVER_OK;
-
- memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE);
- resp[0] = DESC_FORMAT_SENSE_DATA;
- resp[1] = sense_key;
- resp[2] = asc;
- resp[3] = ascq;
-
- xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
- hdr->sb_len_wr = xfer_len;
- if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
- return -EFAULT;
- }
-
- return 0;
-}
-
-/*
- * Take a status code from a lowlevel routine, and if it was a positive NVMe
- * error code update the sense data based on it. In either case the passed
- * in value is returned again, unless an -EFAULT from copy_to_user overrides
- * it.
- */
-static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
-{
- u8 status, sense_key, asc, ascq;
- int res;
-
- /* For non-nvme (Linux) errors, simply return the error code */
- if (nvme_sc < 0)
- return nvme_sc;
-
- /* Mask DNR, More, and reserved fields */
- switch (nvme_sc & 0x7FF) {
- /* Generic Command Status */
- case NVME_SC_SUCCESS:
- status = SAM_STAT_GOOD;
- sense_key = NO_SENSE;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_OPCODE:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ILLEGAL_COMMAND;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_FIELD:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_INVALID_CDB;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_DATA_XFER_ERROR:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_POWER_LOSS:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_WARNING;
- ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED;
- break;
- case NVME_SC_INTERNAL:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = HARDWARE_ERROR;
- asc = SCSI_ASC_INTERNAL_TARGET_FAILURE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ABORT_REQ:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ABORT_QUEUE:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_FUSED_FAIL:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_FUSED_MISSING:
- status = SAM_STAT_TASK_ABORTED;
- sense_key = ABORTED_COMMAND;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_INVALID_NS:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
- ascq = SCSI_ASCQ_INVALID_LUN_ID;
- break;
- case NVME_SC_LBA_RANGE:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ILLEGAL_BLOCK;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_CAP_EXCEEDED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_NS_NOT_READY:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = NOT_READY;
- asc = SCSI_ASC_LUN_NOT_READY;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
-
- /* Command Specific Status */
- case NVME_SC_INVALID_FORMAT:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_FORMAT_COMMAND_FAILED;
- ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED;
- break;
- case NVME_SC_BAD_ATTRIBUTES:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_INVALID_CDB;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
-
- /* Media Errors */
- case NVME_SC_WRITE_FAULT:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_READ_ERROR:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_UNRECOVERED_READ_ERROR;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_GUARD_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED;
- break;
- case NVME_SC_APPTAG_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED;
- break;
- case NVME_SC_REFTAG_CHECK:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MEDIUM_ERROR;
- asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED;
- ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED;
- break;
- case NVME_SC_COMPARE_FAILED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = MISCOMPARE;
- asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- case NVME_SC_ACCESS_DENIED:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
- ascq = SCSI_ASCQ_INVALID_LUN_ID;
- break;
-
- /* Unspecified/Default */
- case NVME_SC_CMDID_CONFLICT:
- case NVME_SC_CMD_SEQ_ERROR:
- case NVME_SC_CQ_INVALID:
- case NVME_SC_QID_INVALID:
- case NVME_SC_QUEUE_SIZE:
- case NVME_SC_ABORT_LIMIT:
- case NVME_SC_ABORT_MISSING:
- case NVME_SC_ASYNC_LIMIT:
- case NVME_SC_FIRMWARE_SLOT:
- case NVME_SC_FIRMWARE_IMAGE:
- case NVME_SC_INVALID_VECTOR:
- case NVME_SC_INVALID_LOG_PAGE:
- default:
- status = SAM_STAT_CHECK_CONDITION;
- sense_key = ILLEGAL_REQUEST;
- asc = SCSI_ASC_NO_SENSE;
- ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- break;
- }
-
- res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
- return res ? res : nvme_sc;
-}
-
-/* INQUIRY Helper Functions */
-
-static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ns *id_ns;
- int res;
- int nvme_sc;
- int xfer_len;
- u8 resp_data_format = 0x02;
- u8 protect;
- u8 cmdque = 0x01 << 1;
- u8 fw_offset = sizeof(ctrl->firmware_rev);
-
- /* nvme ns identify - use DPS value for PROTECT field */
- nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- if (id_ns->dps)
- protect = 0x01;
- else
- protect = 0;
- kfree(id_ns);
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[2] = VERSION_SPC_4;
- inq_response[3] = resp_data_format; /*normaca=0 | hisup=0 */
- inq_response[4] = ADDITIONAL_STD_INQ_LENGTH;
- inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
- inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */
- strncpy(&inq_response[8], "NVMe ", 8);
- strncpy(&inq_response[16], ctrl->model, 16);
-
- while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
- fw_offset--;
- fw_offset -= 4;
- strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- int xfer_len;
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE; /* Page Code */
- inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES; /* Page Length */
- inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE;
- inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE;
- inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
- inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
- inq_response[9] = INQ_BDEV_LIMITS_PAGE;
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response,
- int alloc_len)
-{
- int xfer_len;
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
- inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */
- strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
-
- xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-}
-
-static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *inq_response, int alloc_len)
-{
- struct nvme_id_ns *id_ns;
- int nvme_sc, res;
- size_t len;
- void *eui;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- eui = id_ns->eui64;
- len = sizeof(id_ns->eui64);
-
- if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
- if (bitmap_empty(eui, len * 8)) {
- eui = id_ns->nguid;
- len = sizeof(id_ns->nguid);
- }
- }
-
- if (bitmap_empty(eui, len * 8)) {
- res = -EOPNOTSUPP;
- goto out_free_id;
- }
-
- memset(inq_response, 0, alloc_len);
- inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[3] = 4 + len; /* Page Length */
-
- /* Designation Descriptor start */
- inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
- inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = len; /* Designator Length */
- memcpy(&inq_response[8], eui, len);
-
- res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
-out_free_id:
- kfree(id_ns);
- return res;
-}
-
-static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
-{
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ctrl *id_ctrl;
- int nvme_sc, res;
-
- if (alloc_len < 72) {
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- memset(inq_response, 0, alloc_len);
- inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
- inq_response[3] = 0x48; /* Page Length */
-
- /* Designation Descriptor start */
- inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
- inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = 0x44; /* Designator Length */
-
- sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
- memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
- sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
- memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
-
- res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
- kfree(id_ctrl);
- return res;
-}
-
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int alloc_len)
-{
- int res;
-
- if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
- res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
- if (res != -EOPNOTSUPP)
- return res;
- }
-
- return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
-}
-
-static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- u8 *inq_response;
- int res;
- int nvme_sc;
- struct nvme_ctrl *ctrl = ns->ctrl;
- struct nvme_id_ctrl *id_ctrl;
- struct nvme_id_ns *id_ns;
- int xfer_len;
- u8 microcode = 0x80;
- u8 spt;
- u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7};
- u8 grd_chk, app_chk, ref_chk, protect;
- u8 uask_sup = 0x20;
- u8 v_sup;
- u8 luiclr = 0x01;
-
- inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
- if (inq_response == NULL)
- return -ENOMEM;
-
- nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_free_inq;
-
- spt = spt_lut[id_ns->dpc & 0x07] << 3;
- if (id_ns->dps)
- protect = 0x01;
- else
- protect = 0;
- kfree(id_ns);
-
- grd_chk = protect << 2;
- app_chk = protect << 1;
- ref_chk = protect;
-
- nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_free_inq;
-
- v_sup = id_ctrl->vwc;
- kfree(id_ctrl);
-
- memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */
- inq_response[2] = 0x00; /* Page Length MSB */
- inq_response[3] = 0x3C; /* Page Length LSB */
- inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk;
- inq_response[5] = uask_sup;
- inq_response[6] = v_sup;
- inq_response[7] = luiclr;
- inq_response[8] = 0;
- inq_response[9] = 0;
-
- xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free_inq:
- kfree(inq_response);
- return res;
-}
-
-static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *inq_response, int alloc_len)
-{
- __be32 max_sectors = cpu_to_be32(
- nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
- __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
- __be32 discard_desc_count = cpu_to_be32(0x100);
-
- memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
- inq_response[1] = VPD_BLOCK_LIMITS;
- inq_response[3] = 0x3c; /* Page Length */
- memcpy(&inq_response[8], &max_sectors, sizeof(u32));
- memcpy(&inq_response[20], &max_discard, sizeof(u32));
-
- if (max_discard)
- memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
-
- return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
-}
-
-static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- u8 *inq_response;
- int res;
- int xfer_len;
-
- inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
- if (inq_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */
- inq_response[2] = 0x00; /* Page Length MSB */
- inq_response[3] = 0x3C; /* Page Length LSB */
- inq_response[4] = 0x00; /* Medium Rotation Rate MSB */
- inq_response[5] = 0x01; /* Medium Rotation Rate LSB */
- inq_response[6] = 0x00; /* Form Factor */
-
- xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- kfree(inq_response);
- out_mem:
- return res;
-}
-
-/* LOG SENSE Helper Functions */
-
-static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
-
- log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
- if (log_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH;
- log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
- log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
- log_response[6] = LOG_PAGE_TEMPERATURE_PAGE;
-
- xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- kfree(log_response);
- out_mem:
- return res;
-}
-
-static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
- struct nvme_smart_log *smart_log;
- u8 temp_c;
- u16 temp_k;
-
- log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL)
- return -ENOMEM;
-
- res = nvme_get_log_page(ns->ctrl, &smart_log);
- if (res < 0)
- goto out_free_response;
-
- if (res != NVME_SC_SUCCESS) {
- temp_c = LOG_TEMP_UNKNOWN;
- } else {
- temp_k = (smart_log->temperature[1] << 8) +
- (smart_log->temperature[0]);
- temp_c = temp_k - KELVIN_TEMP_FACTOR;
- }
- kfree(smart_log);
-
- log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH;
- /* Informational Exceptions Log Parameter 1 Start */
- /* Parameter Code=0x0000 bytes 4,5 */
- log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */
- log_response[7] = 0x04; /* PARAMETER LENGTH */
- /* Add sense Code and qualifier = 0x00 each */
- /* Use Temperature from NVMe Get Log Page, convert to C from K */
- log_response[10] = temp_c;
-
- xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
- kfree(log_response);
- return res;
-}
-
-static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- int alloc_len)
-{
- int res;
- int xfer_len;
- u8 *log_response;
- struct nvme_smart_log *smart_log;
- u32 feature_resp;
- u8 temp_c_cur, temp_c_thresh;
- u16 temp_k;
-
- log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL)
- return -ENOMEM;
-
- res = nvme_get_log_page(ns->ctrl, &smart_log);
- if (res < 0)
- goto out_free_response;
-
- if (res != NVME_SC_SUCCESS) {
- temp_c_cur = LOG_TEMP_UNKNOWN;
- } else {
- temp_k = (smart_log->temperature[1] << 8) +
- (smart_log->temperature[0]);
- temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
- }
- kfree(smart_log);
-
- /* Get Features for Temp Threshold */
- res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
- &feature_resp);
- if (res != NVME_SC_SUCCESS)
- temp_c_thresh = LOG_TEMP_UNKNOWN;
- else
- temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR;
-
- log_response[0] = LOG_PAGE_TEMPERATURE_PAGE;
- /* Subpage=0x00, Page Length MSB=0 */
- log_response[3] = REMAINING_TEMP_PAGE_LENGTH;
- /* Temperature Log Parameter 1 (Temperature) Start */
- /* Parameter Code = 0x0000 */
- log_response[6] = 0x01; /* Format and Linking = 01b */
- log_response[7] = 0x02; /* Parameter Length */
- /* Use Temperature from NVMe Get Log Page, convert to C from K */
- log_response[9] = temp_c_cur;
- /* Temperature Log Parameter 2 (Reference Temperature) Start */
- log_response[11] = 0x01; /* Parameter Code = 0x0001 */
- log_response[12] = 0x01; /* Format and Linking = 01b */
- log_response[13] = 0x02; /* Parameter Length */
- /* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */
- log_response[15] = temp_c_thresh;
-
- xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
- res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
-
- out_free_response:
- kfree(log_response);
- return res;
-}
-
-/* MODE SENSE Helper Functions */
-
-static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
- u16 mode_data_length, u16 blk_desc_len)
-{
- /* Quick check to make sure I don't stomp on my own memory... */
- if ((cdb10 && len < 8) || (!cdb10 && len < 4))
- return -EINVAL;
-
- if (cdb10) {
- resp[0] = (mode_data_length & 0xFF00) >> 8;
- resp[1] = (mode_data_length & 0x00FF);
- resp[3] = 0x10 /* DPOFUA */;
- resp[4] = llbaa;
- resp[5] = RESERVED_FIELD;
- resp[6] = (blk_desc_len & 0xFF00) >> 8;
- resp[7] = (blk_desc_len & 0x00FF);
- } else {
- resp[0] = (mode_data_length & 0x00FF);
- resp[2] = 0x10 /* DPOFUA */;
- resp[3] = (blk_desc_len & 0x00FF);
- }
-
- return 0;
-}
-
-static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int len, u8 llbaa)
-{
- int res;
- int nvme_sc;
- struct nvme_id_ns *id_ns;
- u8 flbas;
- u32 lba_length;
-
- if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
- return -EINVAL;
- else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
- return -EINVAL;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- flbas = (id_ns->flbas) & 0x0F;
- lba_length = (1 << (id_ns->lbaf[flbas].ds));
-
- if (llbaa == 0) {
- __be32 tmp_cap = cpu_to_be32(le64_to_cpu(id_ns->ncap));
- /* Byte 4 is reserved */
- __be32 tmp_len = cpu_to_be32(lba_length & 0x00FFFFFF);
-
- memcpy(resp, &tmp_cap, sizeof(u32));
- memcpy(&resp[4], &tmp_len, sizeof(u32));
- } else {
- __be64 tmp_cap = cpu_to_be64(le64_to_cpu(id_ns->ncap));
- __be32 tmp_len = cpu_to_be32(lba_length);
-
- memcpy(resp, &tmp_cap, sizeof(u64));
- /* Bytes 8, 9, 10, 11 are reserved */
- memcpy(&resp[12], &tmp_len, sizeof(u32));
- }
-
- kfree(id_ns);
- return res;
-}
-
-static int nvme_trans_fill_control_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_CONTROL_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_CONTROL;
- resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
- resp[2] = 0x0E; /* TST=000b, TMF_ONLY=0, DPICZ=1,
- * D_SENSE=1, GLTSD=1, RLEC=0 */
- resp[3] = 0x12; /* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */
- /* Byte 4: VS=0, RAC=0, UA_INT=0, SWP=0 */
- resp[5] = 0x40; /* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */
- /* resp[6] and [7] are obsolete, thus zero */
- resp[8] = 0xFF; /* Busy timeout period = 0xffff */
- resp[9] = 0xFF;
- /* Bytes 10,11: Extended selftest completion time = 0x0000 */
-
- return 0;
-}
-
-static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr,
- u8 *resp, int len)
-{
- int res = 0;
- int nvme_sc;
- u32 feature_resp;
- u8 vwc;
-
- if (len < MODE_PAGE_CACHING_LEN)
- return -EINVAL;
-
- nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0,
- &feature_resp);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- vwc = feature_resp & 0x00000001;
-
- resp[0] = MODE_PAGE_CACHING;
- resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
- resp[2] = vwc << 2;
- return 0;
-}
-
-static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_POW_CND_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_POWER_CONDITION;
- resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
- /* All other bytes are zero */
-
- return 0;
-}
-
-static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *resp,
- int len)
-{
- if (len < MODE_PAGE_INF_EXC_LEN)
- return -EINVAL;
-
- resp[0] = MODE_PAGE_INFO_EXCEP;
- resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
- resp[2] = 0x88;
- /* All other bytes are zero */
-
- return 0;
-}
-
-static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *resp, int len)
-{
- int res;
- u16 mode_pages_offset_1 = 0;
- u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
-
- mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN;
- mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN;
- mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN;
-
- res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
- MODE_PAGE_CACHING_LEN);
- if (res)
- return res;
- res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
- MODE_PAGE_CONTROL_LEN);
- if (res)
- return res;
- res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
- MODE_PAGE_POW_CND_LEN);
- if (res)
- return res;
- return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
- MODE_PAGE_INF_EXC_LEN);
-}
-
-static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
-{
- if (dbd == MODE_SENSE_BLK_DESC_ENABLED) {
- /* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */
- return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT;
- } else {
- return 0;
- }
-}
-
-static int nvme_trans_mode_page_create(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *cmd,
- u16 alloc_len, u8 cdb10,
- int (*mode_page_fill_func)
- (struct nvme_ns *,
- struct sg_io_hdr *hdr, u8 *, int),
- u16 mode_pages_tot_len)
-{
- int res;
- int xfer_len;
- u8 *response;
- u8 dbd, llbaa;
- u16 resp_size;
- int mph_size;
- u16 mode_pages_offset_1;
- u16 blk_desc_len, blk_desc_offset, mode_data_length;
-
- dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
- llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
- mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
-
- blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
-
- resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
- /* Refer spc4r34 Table 440 for calculation of Mode data Length field */
- mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len;
-
- blk_desc_offset = mph_size;
- mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
- llbaa, mode_data_length, blk_desc_len);
- if (res)
- goto out_free;
- if (blk_desc_len > 0) {
- res = nvme_trans_fill_blk_desc(ns, hdr,
- &response[blk_desc_offset],
- blk_desc_len, llbaa);
- if (res)
- goto out_free;
- }
- res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
- mode_pages_tot_len);
- if (res)
- goto out_free;
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- out_free:
- kfree(response);
- out_mem:
- return res;
-}
-
-/* Read Capacity Helper Functions */
-
-static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
- u8 cdb16)
-{
- u8 flbas;
- u32 lba_length;
- u64 rlba;
- u8 prot_en;
- u8 p_type_lut[4] = {0, 0, 1, 2};
- __be64 tmp_rlba;
- __be32 tmp_rlba_32;
- __be32 tmp_len;
-
- flbas = (id_ns->flbas) & 0x0F;
- lba_length = (1 << (id_ns->lbaf[flbas].ds));
- rlba = le64_to_cpup(&id_ns->nsze) - 1;
- (id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0);
-
- if (!cdb16) {
- if (rlba > 0xFFFFFFFF)
- rlba = 0xFFFFFFFF;
- tmp_rlba_32 = cpu_to_be32(rlba);
- tmp_len = cpu_to_be32(lba_length);
- memcpy(response, &tmp_rlba_32, sizeof(u32));
- memcpy(&response[4], &tmp_len, sizeof(u32));
- } else {
- tmp_rlba = cpu_to_be64(rlba);
- tmp_len = cpu_to_be32(lba_length);
- memcpy(response, &tmp_rlba, sizeof(u64));
- memcpy(&response[8], &tmp_len, sizeof(u32));
- response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en;
- /* P_I_Exponent = 0x0 | LBPPBE = 0x0 */
- /* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */
- /* Bytes 16-31 - Reserved */
- }
-}
-
-/* Start Stop Unit Helper Functions */
-
-static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 buffer_id)
-{
- struct nvme_command c;
- int nvme_sc;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_activate_fw;
- c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 opcode, u32 tot_len, u32 offset,
- u8 buffer_id)
-{
- int nvme_sc;
- struct nvme_command c;
-
- if (hdr->iovec_count > 0) {
- /* Assuming SGL is not allowed for this command */
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_download_fw;
- c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
- c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
-
- nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
- hdr->dxferp, tot_len, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-/* Mode Select Helper Functions */
-
-static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
- u16 *bd_len, u8 *llbaa)
-{
- if (cdb10) {
- /* 10 Byte CDB */
- *bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
- parm_list[MODE_SELECT_10_BD_OFFSET + 1];
- *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
- MODE_SELECT_10_LLBAA_MASK;
- } else {
- /* 6 Byte CDB */
- *bd_len = parm_list[MODE_SELECT_6_BD_OFFSET];
- }
-}
-
-static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
- u16 idx, u16 bd_len, u8 llbaa)
-{
- /* Store block descriptor info if a FORMAT UNIT comes later */
- /* TODO Saving 1st BD info; what to do if multiple BD received? */
- if (llbaa == 0) {
- /* Standard Block Descriptor - spc4r34 7.5.5.1 */
- ns->mode_select_num_blocks =
- (parm_list[idx + 1] << 16) +
- (parm_list[idx + 2] << 8) +
- (parm_list[idx + 3]);
-
- ns->mode_select_block_len =
- (parm_list[idx + 5] << 16) +
- (parm_list[idx + 6] << 8) +
- (parm_list[idx + 7]);
- } else {
- /* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */
- ns->mode_select_num_blocks =
- (((u64)parm_list[idx + 0]) << 56) +
- (((u64)parm_list[idx + 1]) << 48) +
- (((u64)parm_list[idx + 2]) << 40) +
- (((u64)parm_list[idx + 3]) << 32) +
- (((u64)parm_list[idx + 4]) << 24) +
- (((u64)parm_list[idx + 5]) << 16) +
- (((u64)parm_list[idx + 6]) << 8) +
- ((u64)parm_list[idx + 7]);
-
- ns->mode_select_block_len =
- (parm_list[idx + 12] << 24) +
- (parm_list[idx + 13] << 16) +
- (parm_list[idx + 14] << 8) +
- (parm_list[idx + 15]);
- }
-}
-
-static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *mode_page, u8 page_code)
-{
- int res = 0;
- int nvme_sc;
- unsigned dword11;
-
- switch (page_code) {
- case MODE_PAGE_CACHING:
- dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
- nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
- dword11, NULL, 0, NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
- break;
- case MODE_PAGE_CONTROL:
- break;
- case MODE_PAGE_POWER_CONDITION:
- /* Verify the OS is not trying to set timers */
- if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- return res;
-}
-
-static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd, u16 parm_list_len, u8 pf,
- u8 sp, u8 cdb10)
-{
- int res;
- u8 *parm_list;
- u16 bd_len;
- u8 llbaa = 0;
- u16 index, saved_index;
- u8 page_code;
- u16 mp_size;
-
- /* Get parm list from data-in/out buffer */
- parm_list = kmalloc(parm_list_len, GFP_KERNEL);
- if (parm_list == NULL) {
- res = -ENOMEM;
- goto out;
- }
-
- res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
- if (res)
- goto out_mem;
-
- nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
- index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE);
-
- if (bd_len != 0) {
- /* Block Descriptors present, parse */
- nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa);
- index += bd_len;
- }
- saved_index = index;
-
- /* Multiple mode pages may be present; iterate through all */
- /* In 1st Iteration, don't do NVME Command, only check for CDB errors */
- do {
- page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
- mp_size = parm_list[index + 1] + 2;
- if ((page_code != MODE_PAGE_CACHING) &&
- (page_code != MODE_PAGE_CONTROL) &&
- (page_code != MODE_PAGE_POWER_CONDITION)) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
- index += mp_size;
- } while (index < parm_list_len);
-
- /* In 2nd Iteration, do the NVME Commands */
- index = saved_index;
- do {
- page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
- mp_size = parm_list[index + 1] + 2;
- res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
- page_code);
- if (res)
- break;
- index += mp_size;
- } while (index < parm_list_len);
-
- out_mem:
- kfree(parm_list);
- out:
- return res;
-}
-
-/* Format Unit Helper Functions */
-
-static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
- struct sg_io_hdr *hdr)
-{
- int res = 0;
- int nvme_sc;
- u8 flbas;
-
- /*
- * SCSI Expects a MODE SELECT would have been issued prior to
- * a FORMAT UNIT, and the block size and number would be used
- * from the block descriptor in it. If a MODE SELECT had not
- * been issued, FORMAT shall use the current values for both.
- */
-
- if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
- struct nvme_id_ns *id_ns;
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- if (ns->mode_select_num_blocks == 0)
- ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
- if (ns->mode_select_block_len == 0) {
- flbas = (id_ns->flbas) & 0x0F;
- ns->mode_select_block_len =
- (1 << (id_ns->lbaf[flbas].ds));
- }
-
- kfree(id_ns);
- }
-
- return 0;
-}
-
-static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
- u8 format_prot_info, u8 *nvme_pf_code)
-{
- int res;
- u8 *parm_list;
- u8 pf_usage, pf_code;
-
- parm_list = kmalloc(len, GFP_KERNEL);
- if (parm_list == NULL) {
- res = -ENOMEM;
- goto out;
- }
- res = nvme_trans_copy_from_user(hdr, parm_list, len);
- if (res)
- goto out_mem;
-
- if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
- FORMAT_UNIT_IMMED_MASK) != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
-
- if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN &&
- (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_mem;
- }
- pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] &
- FORMAT_UNIT_PROT_FIELD_USAGE_MASK;
- pf_code = (pf_usage << 2) | format_prot_info;
- switch (pf_code) {
- case 0:
- *nvme_pf_code = 0;
- break;
- case 2:
- *nvme_pf_code = 1;
- break;
- case 3:
- *nvme_pf_code = 2;
- break;
- case 7:
- *nvme_pf_code = 3;
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out_mem:
- kfree(parm_list);
- out:
- return res;
-}
-
-static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 prot_info)
-{
- int res;
- int nvme_sc;
- struct nvme_id_ns *id_ns;
- u8 i;
- u8 nlbaf;
- u8 selected_lbaf = 0xFF;
- u32 cdw10 = 0;
- struct nvme_command c;
-
- /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- nlbaf = id_ns->nlbaf;
-
- for (i = 0; i < nlbaf; i++) {
- if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) {
- selected_lbaf = i;
- break;
- }
- }
- if (selected_lbaf > 0x0F) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- if (ns->mode_select_num_blocks != le64_to_cpu(id_ns->ncap)) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
-
- cdw10 |= prot_info << 5;
- cdw10 |= selected_lbaf & 0x0F;
- memset(&c, 0, sizeof(c));
- c.format.opcode = nvme_admin_format_nvm;
- c.format.nsid = cpu_to_le32(ns->ns_id);
- c.format.cdw10 = cpu_to_le32(cdw10);
-
- nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
- res = nvme_trans_status_code(hdr, nvme_sc);
-
- kfree(id_ns);
- return res;
-}
-
-static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
- struct nvme_trans_io_cdb *cdb_info,
- u32 max_blocks)
-{
- /* If using iovecs, send one nvme command per vector */
- if (hdr->iovec_count > 0)
- return hdr->iovec_count;
- else if (cdb_info->xfer_len > max_blocks)
- return ((cdb_info->xfer_len - 1) / max_blocks) + 1;
- else
- return 1;
-}
-
-static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
- struct nvme_trans_io_cdb *cdb_info)
-{
- u16 control = 0;
-
- /* When Protection information support is added, implement here */
-
- if (cdb_info->fua > 0)
- control |= NVME_RW_FUA;
-
- return control;
-}
-
-static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- struct nvme_trans_io_cdb *cdb_info, u8 is_write)
-{
- int nvme_sc = NVME_SC_SUCCESS;
- u32 num_cmds;
- u64 unit_len;
- u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */
- u32 retcode;
- u32 i = 0;
- u64 nvme_offset = 0;
- void __user *next_mapping_addr;
- struct nvme_command c;
- u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
- u16 control;
- u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
-
- num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
-
- /*
- * This loop handles two cases.
- * First, when an SGL is used in the form of an iovec list:
- * - Use iov_base as the next mapping address for the nvme command_id
- * - Use iov_len as the data transfer length for the command.
- * Second, when we have a single buffer
- * - If larger than max_blocks, split into chunks, offset
- * each nvme command accordingly.
- */
- for (i = 0; i < num_cmds; i++) {
- memset(&c, 0, sizeof(c));
- if (hdr->iovec_count > 0) {
- struct sg_iovec sgl;
-
- retcode = copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (retcode)
- return -EFAULT;
- unit_len = sgl.iov_len;
- unit_num_blocks = unit_len >> ns->lba_shift;
- next_mapping_addr = sgl.iov_base;
- } else {
- unit_num_blocks = min((u64)max_blocks,
- (cdb_info->xfer_len - nvme_offset));
- unit_len = unit_num_blocks << ns->lba_shift;
- next_mapping_addr = hdr->dxferp +
- ((1 << ns->lba_shift) * nvme_offset);
- }
-
- c.rw.opcode = opcode;
- c.rw.nsid = cpu_to_le32(ns->ns_id);
- c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset);
- c.rw.length = cpu_to_le16(unit_num_blocks - 1);
- control = nvme_trans_io_get_control(ns, cdb_info);
- c.rw.control = cpu_to_le16(control);
-
- if (get_capacity(ns->disk) - unit_num_blocks <
- cdb_info->lba + nvme_offset) {
- nvme_sc = NVME_SC_LBA_RANGE;
- break;
- }
- nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
- next_mapping_addr, unit_len, NULL, 0);
- if (nvme_sc)
- break;
-
- nvme_offset += unit_num_blocks;
- }
-
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-
-/* SCSI Command Translation Functions */
-
-static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
- u8 *cmd)
-{
- int res = 0;
- struct nvme_trans_io_cdb cdb_info = { 0, };
- u8 opcode = cmd[0];
- u64 xfer_bytes;
- u64 sum_iov_len = 0;
- struct sg_iovec sgl;
- int i;
- size_t not_copied;
-
- /*
- * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
- * but always in the same place for all others.
- */
- switch (opcode) {
- case WRITE_6:
- case READ_6:
- break;
- default:
- cdb_info.fua = cmd[1] & 0x8;
- cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
- if (cdb_info.prot_info && !ns->pi_type) {
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- }
-
- switch (opcode) {
- case WRITE_6:
- case READ_6:
- cdb_info.lba = get_unaligned_be24(&cmd[1]);
- cdb_info.xfer_len = cmd[4];
- if (cdb_info.xfer_len == 0)
- cdb_info.xfer_len = 256;
- break;
- case WRITE_10:
- case READ_10:
- cdb_info.lba = get_unaligned_be32(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
- break;
- case WRITE_12:
- case READ_12:
- cdb_info.lba = get_unaligned_be32(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
- break;
- case WRITE_16:
- case READ_16:
- cdb_info.lba = get_unaligned_be64(&cmd[2]);
- cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
- break;
- default:
- /* Will never really reach here */
- res = -EIO;
- goto out;
- }
-
- /* Calculate total length of transfer (in bytes) */
- if (hdr->iovec_count > 0) {
- for (i = 0; i < hdr->iovec_count; i++) {
- not_copied = copy_from_user(&sgl, hdr->dxferp +
- i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (not_copied)
- return -EFAULT;
- sum_iov_len += sgl.iov_len;
- /* IO vector sizes should be multiples of block size */
- if (sgl.iov_len % (1 << ns->lba_shift) != 0) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_PARAMETER,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- }
- } else {
- sum_iov_len = hdr->dxfer_len;
- }
-
- /* As Per sg ioctl howto, if the lengths differ, use the lower one */
- xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len);
-
- /* If block count and actual data buffer size dont match, error out */
- if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
- res = -EINVAL;
- goto out;
- }
-
- /* Check for 0 length transfer - it is not illegal */
- if (cdb_info.xfer_len == 0)
- goto out;
-
- /* Send NVMe IO Command(s) */
- res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
- if (res)
- goto out;
-
- out:
- return res;
-}
-
-static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u8 evpd;
- u8 page_code;
- int alloc_len;
- u8 *inq_response;
-
- evpd = cmd[1] & 0x01;
- page_code = cmd[2];
- alloc_len = get_unaligned_be16(&cmd[3]);
-
- inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
- GFP_KERNEL);
- if (inq_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- if (evpd == 0) {
- if (page_code == INQ_STANDARD_INQUIRY_PAGE) {
- res = nvme_trans_standard_inquiry_page(ns, hdr,
- inq_response, alloc_len);
- } else {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- } else {
- switch (page_code) {
- case VPD_SUPPORTED_PAGES:
- res = nvme_trans_supported_vpd_pages(ns, hdr,
- inq_response, alloc_len);
- break;
- case VPD_SERIAL_NUMBER:
- res = nvme_trans_unit_serial_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_DEVICE_IDENTIFIERS:
- res = nvme_trans_device_id_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_EXTENDED_INQUIRY:
- res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
- break;
- case VPD_BLOCK_LIMITS:
- res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
- alloc_len);
- break;
- case VPD_BLOCK_DEV_CHARACTERISTICS:
- res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
- break;
- default:
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- }
- kfree(inq_response);
- out_mem:
- return res;
-}
-
-static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u16 alloc_len;
- u8 pc;
- u8 page_code;
-
- if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
- pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
- if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- alloc_len = get_unaligned_be16(&cmd[7]);
- switch (page_code) {
- case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
- res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
- break;
- case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE:
- res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len);
- break;
- case LOG_PAGE_TEMPERATURE_PAGE:
- res = nvme_trans_log_temperature(ns, hdr, alloc_len);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- u8 cdb10 = 0;
- u16 parm_list_len;
- u8 page_format;
- u8 save_pages;
-
- page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
- save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
- if (cmd[0] == MODE_SELECT) {
- parm_list_len = cmd[4];
- } else {
- parm_list_len = cmd[7];
- cdb10 = 1;
- }
-
- if (parm_list_len != 0) {
- /*
- * According to SPC-4 r24, a paramter list length field of 0
- * shall not be considered an error
- */
- return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
- page_format, save_pages, cdb10);
- }
-
- return 0;
-}
-
-static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u16 alloc_len;
- u8 cdb10 = 0;
-
- if (cmd[0] == MODE_SENSE) {
- alloc_len = cmd[4];
- } else {
- alloc_len = get_unaligned_be16(&cmd[7]);
- cdb10 = 1;
- }
-
- if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
- MODE_SENSE_PC_CURRENT_VALUES) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
- case MODE_PAGE_CACHING:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_caching_page,
- MODE_PAGE_CACHING_LEN);
- break;
- case MODE_PAGE_CONTROL:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_control_page,
- MODE_PAGE_CONTROL_LEN);
- break;
- case MODE_PAGE_POWER_CONDITION:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_pow_cnd_page,
- MODE_PAGE_POW_CND_LEN);
- break;
- case MODE_PAGE_INFO_EXCEP:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_inf_exc_page,
- MODE_PAGE_INF_EXC_LEN);
- break;
- case MODE_PAGE_RETURN_ALL:
- res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
- cdb10,
- &nvme_trans_fill_all_pages,
- MODE_PAGE_ALL_LEN);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd, u8 cdb16)
-{
- int res;
- int nvme_sc;
- u32 alloc_len;
- u32 resp_size;
- u32 xfer_len;
- struct nvme_id_ns *id_ns;
- u8 *response;
-
- if (cdb16) {
- alloc_len = get_unaligned_be32(&cmd[10]);
- resp_size = READ_CAP_16_RESP_SIZE;
- } else {
- alloc_len = READ_CAP_10_RESP_SIZE;
- resp_size = READ_CAP_10_RESP_SIZE;
- }
-
- nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_free_id;
- }
- nvme_trans_fill_read_cap(response, id_ns, cdb16);
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out_free_id:
- kfree(id_ns);
- return res;
-}
-
-static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- int nvme_sc;
- u32 alloc_len, xfer_len, resp_size;
- u8 *response;
- struct nvme_id_ctrl *id_ctrl;
- u32 ll_length, lun_id;
- u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
- __be32 tmp_len;
-
- switch (cmd[2]) {
- default:
- return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- case ALL_LUNS_RETURNED:
- case ALL_WELL_KNOWN_LUNS_RETURNED:
- case RESTRICTED_LUNS_RETURNED:
- nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
-
- ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
- resp_size = ll_length + LUN_DATA_HEADER_SIZE;
-
- alloc_len = get_unaligned_be32(&cmd[6]);
- if (alloc_len < resp_size) {
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_free_id;
- }
-
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out_free_id;
- }
-
- /* The first LUN ID will always be 0 per the SAM spec */
- for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
- /*
- * Set the LUN Id and then increment to the next LUN
- * location in the parameter data.
- */
- __be64 tmp_id = cpu_to_be64(lun_id);
- memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64));
- lun_id_offset += LUN_ENTRY_SIZE;
- }
- tmp_len = cpu_to_be32(ll_length);
- memcpy(response, &tmp_len, sizeof(u32));
- }
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out_free_id:
- kfree(id_ctrl);
- return res;
-}
-
-static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u8 alloc_len, xfer_len, resp_size;
- u8 desc_format;
- u8 *response;
-
- desc_format = cmd[1] & 0x01;
- alloc_len = cmd[4];
-
- resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
- (FIXED_FMT_SENSE_DATA_SIZE));
- response = kzalloc(resp_size, GFP_KERNEL);
- if (response == NULL) {
- res = -ENOMEM;
- goto out;
- }
-
- if (desc_format) {
- /* Descriptor Format Sense Data */
- response[0] = DESC_FORMAT_SENSE_DATA;
- response[1] = NO_SENSE;
- /* TODO How is LOW POWER CONDITION ON handled? (byte 2) */
- response[2] = SCSI_ASC_NO_SENSE;
- response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- /* SDAT_OVFL = 0 | Additional Sense Length = 0 */
- } else {
- /* Fixed Format Sense Data */
- response[0] = FIXED_SENSE_DATA;
- /* Byte 1 = Obsolete */
- response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */
- /* Bytes 3-6 - Information - set to zero */
- response[7] = FIXED_SENSE_DATA_ADD_LENGTH;
- /* Bytes 8-11 - Cmd Specific Information - set to zero */
- response[12] = SCSI_ASC_NO_SENSE;
- response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
- /* Byte 14 = Field Replaceable Unit Code = 0 */
- /* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */
- }
-
- xfer_len = min(alloc_len, resp_size);
- res = nvme_trans_copy_to_user(hdr, response, xfer_len);
-
- kfree(response);
- out:
- return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
- struct sg_io_hdr *hdr)
-{
- int nvme_sc;
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_cmd_flush;
- c.common.nsid = cpu_to_le32(ns->ns_id);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
- return nvme_trans_status_code(hdr, nvme_sc);
-}
-
-static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res;
- u8 parm_hdr_len = 0;
- u8 nvme_pf_code = 0;
- u8 format_prot_info, long_list, format_data;
-
- format_prot_info = (cmd[1] & 0xc0) >> 6;
- long_list = cmd[1] & 0x20;
- format_data = cmd[1] & 0x10;
-
- if (format_data != 0) {
- if (format_prot_info != 0) {
- if (long_list == 0)
- parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN;
- else
- parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN;
- }
- } else if (format_data == 0 && format_prot_info != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
-
- /* Get parm header from data-in/out buffer */
- /*
- * According to the translation spec, the only fields in the parameter
- * list we are concerned with are in the header. So allocate only that.
- */
- if (parm_hdr_len > 0) {
- res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
- format_prot_info, &nvme_pf_code);
- if (res)
- goto out;
- }
-
- /* Attempt to activate any previously downloaded firmware image */
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
-
- /* Determine Block size and count and send format command */
- res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
- if (res)
- goto out;
-
- res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
-
- out:
- return res;
-}
-
-static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
- struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- if (nvme_ctrl_ready(ns->ctrl))
- return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- NOT_READY, SCSI_ASC_LUN_NOT_READY,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- else
- return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-}
-
-static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- int res = 0;
- u32 buffer_offset, parm_list_length;
- u8 buffer_id, mode;
-
- parm_list_length = get_unaligned_be24(&cmd[6]);
- if (parm_list_length % BYTES_TO_DWORDS != 0) {
- /* NVMe expects Firmware file to be a whole number of DWORDS */
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- buffer_id = cmd[2];
- if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- mode = cmd[1] & 0x1f;
- buffer_offset = get_unaligned_be24(&cmd[3]);
-
- switch (mode) {
- case DOWNLOAD_SAVE_ACTIVATE:
- res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
- parm_list_length, buffer_offset,
- buffer_id);
- if (res)
- goto out;
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
- break;
- case DOWNLOAD_SAVE_DEFER_ACTIVATE:
- res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
- parm_list_length, buffer_offset,
- buffer_id);
- break;
- case ACTIVATE_DEFERRED_MICROCODE:
- res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
- break;
- default:
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
-
- out:
- return res;
-}
-
-struct scsi_unmap_blk_desc {
- __be64 slba;
- __be32 nlb;
- u32 resv;
-};
-
-struct scsi_unmap_parm_list {
- __be16 unmap_data_len;
- __be16 unmap_blk_desc_data_len;
- u32 resv;
- struct scsi_unmap_blk_desc desc[0];
-};
-
-static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
-{
- struct scsi_unmap_parm_list *plist;
- struct nvme_dsm_range *range;
- struct nvme_command c;
- int i, nvme_sc, res;
- u16 ndesc, list_len;
-
- list_len = get_unaligned_be16(&cmd[7]);
- if (!list_len)
- return -EINVAL;
-
- plist = kmalloc(list_len, GFP_KERNEL);
- if (!plist)
- return -ENOMEM;
-
- res = nvme_trans_copy_from_user(hdr, plist, list_len);
- if (res)
- goto out;
-
- ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
- if (!ndesc || ndesc > 256) {
- res = -EINVAL;
- goto out;
- }
-
- range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
- if (!range) {
- res = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < ndesc; i++) {
- range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
- range[i].slba = cpu_to_le64(be64_to_cpu(plist->desc[i].slba));
- range[i].cattr = 0;
- }
-
- memset(&c, 0, sizeof(c));
- c.dsm.opcode = nvme_cmd_dsm;
- c.dsm.nsid = cpu_to_le32(ns->ns_id);
- c.dsm.nr = cpu_to_le32(ndesc - 1);
- c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
-
- nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
- ndesc * sizeof(*range));
- res = nvme_trans_status_code(hdr, nvme_sc);
-
- kfree(range);
- out:
- kfree(plist);
- return res;
-}
-
-static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
-{
- u8 cmd[16];
- int retcode;
- unsigned int opcode;
-
- if (hdr->cmdp == NULL)
- return -EMSGSIZE;
- if (hdr->cmd_len > sizeof(cmd))
- return -EINVAL;
- if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
- return -EFAULT;
-
- /*
- * Prime the hdr with good status for scsi commands that don't require
- * an nvme command for translation.
- */
- retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
- if (retcode)
- return retcode;
-
- opcode = cmd[0];
-
- switch (opcode) {
- case READ_6:
- case READ_10:
- case READ_12:
- case READ_16:
- retcode = nvme_trans_io(ns, hdr, 0, cmd);
- break;
- case WRITE_6:
- case WRITE_10:
- case WRITE_12:
- case WRITE_16:
- retcode = nvme_trans_io(ns, hdr, 1, cmd);
- break;
- case INQUIRY:
- retcode = nvme_trans_inquiry(ns, hdr, cmd);
- break;
- case LOG_SENSE:
- retcode = nvme_trans_log_sense(ns, hdr, cmd);
- break;
- case MODE_SELECT:
- case MODE_SELECT_10:
- retcode = nvme_trans_mode_select(ns, hdr, cmd);
- break;
- case MODE_SENSE:
- case MODE_SENSE_10:
- retcode = nvme_trans_mode_sense(ns, hdr, cmd);
- break;
- case READ_CAPACITY:
- retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
- break;
- case SERVICE_ACTION_IN_16:
- switch (cmd[1]) {
- case SAI_READ_CAPACITY_16:
- retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
- break;
- default:
- goto out;
- }
- break;
- case REPORT_LUNS:
- retcode = nvme_trans_report_luns(ns, hdr, cmd);
- break;
- case REQUEST_SENSE:
- retcode = nvme_trans_request_sense(ns, hdr, cmd);
- break;
- case SYNCHRONIZE_CACHE:
- retcode = nvme_trans_synchronize_cache(ns, hdr);
- break;
- case FORMAT_UNIT:
- retcode = nvme_trans_format_unit(ns, hdr, cmd);
- break;
- case TEST_UNIT_READY:
- retcode = nvme_trans_test_unit_ready(ns, hdr, cmd);
- break;
- case WRITE_BUFFER:
- retcode = nvme_trans_write_buffer(ns, hdr, cmd);
- break;
- case UNMAP:
- retcode = nvme_trans_unmap(ns, hdr, cmd);
- break;
- default:
- out:
- retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- break;
- }
- return retcode;
-}
-
-int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
-{
- struct sg_io_hdr hdr;
- int retcode;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (copy_from_user(&hdr, u_hdr, sizeof(hdr)))
- return -EFAULT;
- if (hdr.interface_id != 'S')
- return -EINVAL;
-
- /*
- * A positive return code means a NVMe status, which has been
- * translated to sense data.
- */
- retcode = nvme_scsi_translate(ns, &hdr);
- if (retcode < 0)
- return retcode;
- if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
- return -EFAULT;
- return 0;
-}
-
-int nvme_sg_get_version_num(int __user *ip)
-{
- return put_user(sg_version_num, ip);
-}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ff1f970..35f930d 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -336,7 +336,7 @@ out:
static void nvmet_execute_identify_nslist(struct nvmet_req *req)
{
- static const int buf_size = 4096;
+ static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
@@ -367,6 +367,64 @@ out:
nvmet_req_complete(req, status);
}
+static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
+ void *id, off_t *off)
+{
+ struct nvme_ns_id_desc desc = {
+ .nidt = type,
+ .nidl = len,
+ };
+ u16 status;
+
+ status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
+ if (status)
+ return status;
+ *off += sizeof(desc);
+
+ status = nvmet_copy_to_sgl(req, *off, id, len);
+ if (status)
+ return status;
+ *off += len;
+
+ return 0;
+}
+
+static void nvmet_execute_identify_desclist(struct nvmet_req *req)
+{
+ struct nvmet_ns *ns;
+ u16 status = 0;
+ off_t off = 0;
+
+ ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+ if (!ns) {
+ status = NVME_SC_INVALID_NS | NVME_SC_DNR;
+ goto out;
+ }
+
+ if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) {
+ status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
+ NVME_NIDT_UUID_LEN,
+ &ns->uuid, &off);
+ if (status)
+ goto out_put_ns;
+ }
+ if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) {
+ status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
+ NVME_NIDT_NGUID_LEN,
+ &ns->nguid, &off);
+ if (status)
+ goto out_put_ns;
+ }
+
+ if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
+ off) != NVME_IDENTIFY_DATA_SIZE - off)
+ status = NVME_SC_INTERNAL | NVME_SC_DNR;
+out_put_ns:
+ nvmet_put_namespace(ns);
+out:
+ nvmet_req_complete(req, status);
+}
+
/*
* A "mimimum viable" abort implementation: the command is mandatory in the
* spec, but we are not required to do any useful work. We couldn't really
@@ -504,7 +562,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
}
break;
case nvme_admin_identify:
- req->data_len = 4096;
+ req->data_len = NVME_IDENTIFY_DATA_SIZE;
switch (cmd->identify.cns) {
case NVME_ID_CNS_NS:
req->execute = nvmet_execute_identify_ns;
@@ -515,6 +573,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
case NVME_ID_CNS_NS_ACTIVE_LIST:
req->execute = nvmet_execute_identify_nslist;
return 0;
+ case NVME_ID_CNS_NS_DESC_LIST:
+ req->execute = nvmet_execute_identify_desclist;
+ return 0;
}
break;
case nvme_admin_abort_cmd:
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index be8c800..a358ecd 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -305,11 +305,41 @@ out_unlock:
CONFIGFS_ATTR(nvmet_ns_, device_path);
+static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
+}
+
+static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ struct nvmet_subsys *subsys = ns->subsys;
+ int ret = 0;
+
+
+ mutex_lock(&subsys->lock);
+ if (ns->enabled) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+
+ if (uuid_parse(page, &ns->uuid))
+ ret = -EINVAL;
+
+out_unlock:
+ mutex_unlock(&subsys->lock);
+ return ret ? ret : count;
+}
+
static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
{
return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
}
+CONFIGFS_ATTR(nvmet_ns_, device_uuid);
+
static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
const char *page, size_t count)
{
@@ -379,6 +409,7 @@ CONFIGFS_ATTR(nvmet_ns_, enable);
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
+ &nvmet_ns_attr_device_uuid,
&nvmet_ns_attr_enable,
NULL,
};
@@ -619,8 +650,45 @@ out_unlock:
CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
+static ssize_t nvmet_subsys_version_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+
+ if (NVME_TERTIARY(subsys->ver))
+ return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
+ (int)NVME_MAJOR(subsys->ver),
+ (int)NVME_MINOR(subsys->ver),
+ (int)NVME_TERTIARY(subsys->ver));
+ else
+ return snprintf(page, PAGE_SIZE, "%d.%d\n",
+ (int)NVME_MAJOR(subsys->ver),
+ (int)NVME_MINOR(subsys->ver));
+}
+
+static ssize_t nvmet_subsys_version_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ int major, minor, tertiary = 0;
+ int ret;
+
+
+ ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
+ if (ret != 2 && ret != 3)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ subsys->ver = NVME_VS(major, minor, tertiary);
+ up_write(&nvmet_config_sem);
+
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, version);
+
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
+ &nvmet_subsys_attr_version,
NULL,
};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index eb9399a..b5b4ac1 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -380,6 +380,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid;
ns->subsys = subsys;
+ uuid_gen(&ns->uuid);
return ns;
}
@@ -926,7 +927,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
if (!subsys)
return NULL;
- subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
+ subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
switch (type) {
case NVME_NQN_NVME:
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 1aaf597..8f3b57b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
e->portid = port->disc_addr.portid;
/* we support only dynamic controllers */
e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
- e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
+ e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
e->subtype = type;
memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
@@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
case nvme_admin_identify:
- req->data_len = 4096;
+ req->data_len = NVME_IDENTIFY_DATA_SIZE;
switch (cmd->identify.cns) {
case NVME_ID_CNS_CTRL:
req->execute =
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 2006fae..7692a96 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2096,20 +2096,22 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
/* clear any response payload */
memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
+ fod->data_sg = NULL;
+ fod->data_sg_cnt = 0;
+
ret = nvmet_req_init(&fod->req,
&fod->queue->nvme_cq,
&fod->queue->nvme_sq,
&nvmet_fc_tgt_fcp_ops);
- if (!ret) { /* bad SQE content or invalid ctrl state */
- nvmet_fc_abort_op(tgtport, fod);
+ if (!ret) {
+ /* bad SQE content or invalid ctrl state */
+ /* nvmet layer has already called op done to send rsp. */
return;
}
/* keep a running counter of tail position */
atomic_inc(&fod->queue->sqtail);
- fod->data_sg = NULL;
- fod->data_sg_cnt = 0;
if (fod->total_length) {
ret = nvmet_fc_alloc_tgt_pgs(fod);
if (ret) {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 294a661..1bb9d5b 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -569,7 +569,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *tgt_fcpreq)
{
struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
- int active;
/*
* mark aborted only in case there were 2 threads in transport
@@ -577,7 +576,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
* after the abort request
*/
spin_lock(&tfcp_req->reqlock);
- active = tfcp_req->active;
tfcp_req->aborted = true;
spin_unlock(&tfcp_req->reqlock);
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c77940d..4012879 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
struct nvmet_req *req = bio->bi_private;
nvmet_req_complete(req,
- bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+ bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
if (bio != &req->inline_bio)
bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
if (status) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
} else {
submit_bio(bio);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e503cff..5f55c68 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -21,8 +21,6 @@
#include "../host/nvme.h"
#include "../host/fabrics.h"
-#define NVME_LOOP_AQ_DEPTH 256
-
#define NVME_LOOP_MAX_SEGMENTS 256
/*
@@ -31,7 +29,7 @@
*/
#define NVME_LOOP_NR_AEN_COMMANDS 1
#define NVME_LOOP_AQ_BLKMQ_DEPTH \
- (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
+ (NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
struct nvme_loop_iod {
struct nvme_request nvme_req;
@@ -45,7 +43,6 @@ struct nvme_loop_iod {
};
struct nvme_loop_ctrl {
- spinlock_t lock;
struct nvme_loop_queue *queues;
u32 queue_count;
@@ -59,7 +56,6 @@ struct nvme_loop_ctrl {
struct nvmet_ctrl *target_ctrl;
struct work_struct delete_work;
- struct work_struct reset_work;
};
static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -151,7 +147,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
/* queue error recovery */
- schedule_work(&iod->queue->ctrl->reset_work);
+ nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
/* fail with DNR on admin cmd timeout */
nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
@@ -159,17 +155,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
return BLK_EH_HANDLED;
}
-static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_loop_queue *queue = hctx->driver_data;
struct request *req = bd->rq;
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
- int ret;
+ blk_status_t ret;
ret = nvme_setup_cmd(ns, req, &iod->cmd);
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret)
return ret;
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +175,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
nvme_cleanup_cmd(req);
blk_mq_start_request(req);
nvme_loop_queue_response(&iod->req);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
if (blk_rq_bytes(req)) {
iod->sg_table.sgl = iod->first_sgl;
- ret = sg_alloc_table_chained(&iod->sg_table,
+ if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl);
- if (ret)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ iod->sg_table.sgl))
+ return BLK_STS_RESOURCE;
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +192,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(req);
schedule_work(&iod->work);
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
}
static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
@@ -234,15 +229,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
unsigned int numa_node)
{
- return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req),
- hctx_idx + 1);
-}
+ struct nvme_loop_ctrl *ctrl = set->driver_data;
-static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set,
- struct request *req, unsigned int hctx_idx,
- unsigned int numa_node)
-{
- return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0);
+ return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
+ (set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
}
static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -280,7 +270,7 @@ static const struct blk_mq_ops nvme_loop_mq_ops = {
static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
.queue_rq = nvme_loop_queue_rq,
.complete = nvme_loop_complete_rq,
- .init_request = nvme_loop_init_admin_request,
+ .init_request = nvme_loop_init_request,
.init_hctx = nvme_loop_init_admin_hctx,
.timeout = nvme_loop_timeout,
};
@@ -467,7 +457,7 @@ static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY;
- if (!schedule_work(&ctrl->delete_work))
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
@@ -501,8 +491,8 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
static void nvme_loop_reset_ctrl_work(struct work_struct *work)
{
- struct nvme_loop_ctrl *ctrl = container_of(work,
- struct nvme_loop_ctrl, reset_work);
+ struct nvme_loop_ctrl *ctrl =
+ container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
bool changed;
int ret;
@@ -540,21 +530,6 @@ out_disable:
nvme_put_ctrl(&ctrl->ctrl);
}
-static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
-{
- struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
-
- if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
- return -EBUSY;
-
- if (!schedule_work(&ctrl->reset_work))
- return -EBUSY;
-
- flush_work(&ctrl->reset_work);
-
- return 0;
-}
-
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.name = "loop",
.module = THIS_MODULE,
@@ -562,11 +537,9 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .reset_ctrl = nvme_loop_reset_ctrl,
.free_ctrl = nvme_loop_free_ctrl,
.submit_async_event = nvme_loop_submit_async_event,
.delete_ctrl = nvme_loop_del_ctrl,
- .get_subsysnqn = nvmf_get_subsysnqn,
};
static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -629,15 +602,13 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
INIT_LIST_HEAD(&ctrl->list);
INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
- INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
+ INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
0 /* no quirks, we're perfect! */);
if (ret)
goto out_put_ctrl;
- spin_lock_init(&ctrl->lock);
-
ret = -ENOMEM;
ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -766,7 +737,7 @@ static void __exit nvme_loop_cleanup_module(void)
__nvme_loop_del_ctrl(ctrl);
mutex_unlock(&nvme_loop_ctrl_mutex);
- flush_scheduled_work();
+ flush_workqueue(nvme_wq);
}
module_init(nvme_loop_init_module);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8ff6e43..747bbdb 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -47,6 +47,7 @@ struct nvmet_ns {
u32 blksize_shift;
loff_t size;
u8 nguid[16];
+ uuid_t uuid;
bool enabled;
struct nvmet_subsys *subsys;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9e45cde..56a4cba 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1027,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
queue->send_queue_size = le16_to_cpu(req->hrqsize);
- if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
+ if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
return NVME_RDMA_CM_INVALID_HSQSIZE;
/* XXX: Should we enforce some kind of max for IO queues? */
@@ -1307,53 +1307,44 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
/**
* nvme_rdma_device_removal() - Handle RDMA device removal
+ * @cm_id: rdma_cm id, used for nvmet port
* @queue: nvmet rdma queue (cm id qp_context)
- * @addr: nvmet address (cm_id context)
*
* DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
+ * to unplug. Note that this event can be generated on a normal
+ * queue cm_id and/or a device bound listener cm_id (where in this
+ * case queue will be null).
*
- * Note that this event can be generated on a normal queue cm_id
- * and/or a device bound listener cm_id (where in this case
- * queue will be null).
- *
- * we claim ownership on destroying the cm_id. For queues we move
- * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * We registered an ib_client to handle device removal for queues,
+ * so we only need to handle the listening port cm_ids. In this case
* we nullify the priv to prevent double cm_id destruction and destroying
* the cm_id implicitely by returning a non-zero rc to the callout.
*/
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
struct nvmet_rdma_queue *queue)
{
- unsigned long flags;
-
- if (!queue) {
- struct nvmet_port *port = cm_id->context;
+ struct nvmet_port *port;
+ if (queue) {
/*
- * This is a listener cm_id. Make sure that
- * future remove_port won't invoke a double
- * cm_id destroy. use atomic xchg to make sure
- * we don't compete with remove_port.
- */
- if (xchg(&port->priv, NULL) != cm_id)
- return 0;
- } else {
- /*
- * This is a queue cm_id. Make sure that
- * release queue will not destroy the cm_id
- * and schedule all ctrl queues removal (only
- * if the queue is not disconnecting already).
+ * This is a queue cm_id. we have registered
+ * an ib_client to handle queues removal
+ * so don't interfear and just return.
*/
- spin_lock_irqsave(&queue->state_lock, flags);
- if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
- queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
- spin_unlock_irqrestore(&queue->state_lock, flags);
- nvmet_rdma_queue_disconnect(queue);
- flush_scheduled_work();
+ return 0;
}
+ port = cm_id->context;
+
+ /*
+ * This is a listener cm_id. Make sure that
+ * future remove_port won't invoke a double
+ * cm_id destroy. use atomic xchg to make sure
+ * we don't compete with remove_port.
+ */
+ if (xchg(&port->priv, NULL) != cm_id)
+ return 0;
+
/*
* We need to return 1 so that the core will destroy
* it's own ID. What a great API design..
@@ -1519,9 +1510,51 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
.delete_ctrl = nvmet_rdma_delete_ctrl,
};
+static void nvmet_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+ struct nvmet_rdma_queue *queue;
+
+ /* Device is being removed, delete all queues using this device */
+ mutex_lock(&nvmet_rdma_queue_mutex);
+ list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
+ if (queue->dev->device != ib_device)
+ continue;
+
+ pr_info("Removing queue %d\n", queue->idx);
+ __nvmet_rdma_queue_disconnect(queue);
+ }
+ mutex_unlock(&nvmet_rdma_queue_mutex);
+
+ flush_scheduled_work();
+}
+
+static struct ib_client nvmet_rdma_ib_client = {
+ .name = "nvmet_rdma",
+ .add = nvmet_rdma_add_one,
+ .remove = nvmet_rdma_remove_one
+};
+
static int __init nvmet_rdma_init(void)
{
- return nvmet_register_transport(&nvmet_rdma_ops);
+ int ret;
+
+ ret = ib_register_client(&nvmet_rdma_ib_client);
+ if (ret)
+ return ret;
+
+ ret = nvmet_register_transport(&nvmet_rdma_ops);
+ if (ret)
+ goto err_ib_client;
+
+ return 0;
+
+err_ib_client:
+ ib_unregister_client(&nvmet_rdma_ib_client);
+ return ret;
}
static void __exit nvmet_rdma_exit(void)
@@ -1544,6 +1577,7 @@ static void __exit nvmet_rdma_exit(void)
mutex_unlock(&nvmet_rdma_queue_mutex);
flush_scheduled_work();
+ ib_unregister_client(&nvmet_rdma_ib_client);
ida_destroy(&nvmet_rdma_queue_ida);
}
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5..b7cbd5d 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
*/
if (basedev->state < DASD_STATE_READY) {
while ((req = blk_fetch_request(block->request_queue)))
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
return;
}
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
"Rejecting write request %p",
req);
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
continue;
}
if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
"Rejecting failfast request %p",
req);
blk_start_request(req);
- __blk_end_request_all(req, -ETIMEDOUT);
+ __blk_end_request_all(req, BLK_STS_TIMEOUT);
continue;
}
cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
"on request %p",
PTR_ERR(cqr), req);
blk_start_request(req);
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
continue;
}
/*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
{
struct request *req;
int status;
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
req = (struct request *) cqr->callback_data;
dasd_profile_end(cqr->block, cqr, req);
+
status = cqr->block->base->discipline->free_cp(cqr, req);
if (status < 0)
- error = status;
+ error = errno_to_blk_status(status);
else if (status == 0) {
- if (cqr->intrc == -EPERM)
- error = -EBADE;
- else if (cqr->intrc == -ENOLINK ||
- cqr->intrc == -ETIMEDOUT)
- error = cqr->intrc;
- else
- error = -EIO;
+ switch (cqr->intrc) {
+ case -EPERM:
+ error = BLK_STS_NEXUS;
+ break;
+ case -ENOLINK:
+ error = BLK_STS_TRANSPORT;
+ break;
+ case -ETIMEDOUT:
+ error = BLK_STS_TIMEOUT;
+ break;
+ default:
+ error = BLK_STS_IOERR;
+ break;
+ }
}
__blk_end_request_all(req, error);
}
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
spin_lock_irq(&block->request_queue_lock);
while ((req = blk_fetch_request(block->request_queue)))
- __blk_end_request_all(req, -EIO);
+ __blk_end_request_all(req, BLK_STS_IOERR);
spin_unlock_irq(&block->request_queue_lock);
}
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280..06eb1de 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
unsigned long source_addr;
unsigned long bytes_done;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
bytes_done = 0;
dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 152de68..3c2c84b 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
aob->request.data = (u64) aobrq;
scmrq->bdev = bdev;
scmrq->retries = 4;
- scmrq->error = 0;
+ scmrq->error = BLK_STS_OK;
/* We don't use all msbs - place aidaws at the end of the aob page. */
scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
{
struct aob *aob = scmrq->aob;
- if (scmrq->error == -ETIMEDOUT)
+ if (scmrq->error == BLK_STS_TIMEOUT)
SCM_LOG(1, "Request timeout");
else {
SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
scmrq->error);
}
-void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
+void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
{
struct scm_request *scmrq = data;
struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
struct scm_blk_dev *bdev = scmrq->bdev;
unsigned long flags;
- if (scmrq->error != -EIO)
+ if (scmrq->error != BLK_STS_IOERR)
goto restart;
/* For -EIO the response block is valid. */
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 09218cd..cd598d1 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -35,7 +35,7 @@ struct scm_request {
struct aob *aob;
struct list_head list;
u8 retries;
- int error;
+ blk_status_t error;
#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
struct {
enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
void scm_blk_dev_cleanup(struct scm_blk_dev *);
void scm_blk_set_available(struct scm_blk_dev *);
-void scm_blk_irq(struct scm_device *, void *, int);
+void scm_blk_irq(struct scm_device *, void *, blk_status_t);
void scm_request_finish(struct scm_request *);
void scm_request_requeue(struct scm_request *);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e75..a48f0d4 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
unsigned long page_addr;
unsigned long bytes;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if ((bio->bi_iter.bi_sector & 7) != 0 ||
(bio->bi_iter.bi_size & 4095) != 0)
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index b3f44bc..0f11f3b 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
struct eadm_private *private = get_eadm_private(sch);
struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
struct irb *irb = this_cpu_ptr(&cio_irb);
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
EADM_LOG(6, "irq");
EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
&& scsw->eswf == 1 && irb->esw.eadm.erw.r)
- error = -EIO;
+ error = BLK_STS_IOERR;
if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
- error = -ETIMEDOUT;
+ error = BLK_STS_TIMEOUT;
eadm_subchannel_set_timeout(sch, 0);
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 15268ed..1fa53ec 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
}
EXPORT_SYMBOL_GPL(scm_driver_unregister);
-void scm_irq_handler(struct aob *aob, int error)
+void scm_irq_handler(struct aob *aob, blk_status_t error)
{
struct aob_rq_header *aobrq = (void *) aob->request.data;
struct scm_device *scmdev = aobrq->scmdev;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 62fed9d..14f377a 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -214,7 +214,7 @@ static void jsfd_request(void)
struct jsfd_part *jdp = req->rq_disk->private_data;
unsigned long offset = blk_rq_pos(req) << 9;
size_t len = blk_rq_cur_bytes(req);
- int err = -EIO;
+ blk_status_t err = BLK_STS_IOERR;
if ((offset + len) > jdp->dsize)
goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
}
jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
- err = 0;
+ err = BLK_STS_OK;
end:
if (!__blk_end_request_cur(req, err))
req = jsfd_next_request();
@@ -592,6 +592,7 @@ static int jsfd_init(void)
put_disk(disk);
goto out;
}
+ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
jsfd_disk[i] = disk;
}
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 8a1b948..a4f28b7 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
* code paths.
*/
if (unlikely(rq->bio))
- blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq));
+ blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
else
blk_put_request(rq);
}
@@ -474,10 +474,10 @@ void osd_end_request(struct osd_request *or)
EXPORT_SYMBOL(osd_end_request);
static void _set_error_resid(struct osd_request *or, struct request *req,
- int error)
+ blk_status_t error)
{
or->async_error = error;
- or->req_errors = scsi_req(req)->result ? : error;
+ or->req_errors = scsi_req(req)->result;
or->sense_len = scsi_req(req)->sense_len;
if (or->sense_len)
memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
int osd_execute_request(struct osd_request *or)
{
- int error;
-
blk_execute_rq(or->request->q, NULL, or->request, 0);
- error = scsi_req(or->request)->result ? -EIO : 0;
- _set_error_resid(or, or->request, error);
- return error;
+ if (scsi_req(or->request)->result) {
+ _set_error_resid(or, or->request, BLK_STS_IOERR);
+ return -EIO;
+ }
+
+ _set_error_resid(or, or->request, BLK_STS_OK);
+ return 0;
}
EXPORT_SYMBOL(osd_execute_request);
-static void osd_request_async_done(struct request *req, int error)
+static void osd_request_async_done(struct request *req, blk_status_t error)
{
struct osd_request *or = req->end_io_data;
@@ -1572,13 +1574,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
flags);
if (IS_ERR(req))
return req;
- scsi_req_init(req);
for_each_bio(bio) {
- struct bio *bounce_bio = bio;
-
- blk_queue_bounce(req->q, &bounce_bio);
- ret = blk_rq_append_bio(req, bounce_bio);
+ ret = blk_rq_append_bio(req, bio);
if (ret)
return ERR_PTR(ret);
}
@@ -1617,7 +1615,6 @@ static int _init_blk_request(struct osd_request *or,
ret = PTR_ERR(req);
goto out;
}
- scsi_req_init(req);
or->in.req = or->request->next_rq = req;
}
} else if (has_in)
@@ -1914,7 +1911,7 @@ analyze:
/* scsi sense is Empty, the request was never issued to target
* linux return code might tell us what happened.
*/
- if (or->async_error == -ENOMEM)
+ if (or->async_error == BLK_STS_RESOURCE)
osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
else
osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 67cbed9..929ee7e 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
/* Wakeup from interrupt */
-static void osst_end_async(struct request *req, int update)
+static void osst_end_async(struct request *req, blk_status_t status)
{
struct scsi_request *rq = scsi_req(req);
struct osst_request *SRpnt = req->end_io_data;
@@ -373,7 +373,6 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
return DRIVER_ERROR << 24;
rq = scsi_req(req);
- scsi_req_init(req);
req->rq_flags |= RQF_QUIET;
SRpnt->bio = NULL;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07da..304a715 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
}
}
-static void eh_lock_door_done(struct request *req, int uptodate)
+static void eh_lock_door_done(struct request *req, blk_status_t status)
{
__blk_put_request(req->q, req);
}
@@ -1903,7 +1903,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
if (IS_ERR(req))
return;
rq = scsi_req(req);
- scsi_req_init(req);
rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
rq->cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 99e16ac..550e29f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -250,7 +250,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
if (IS_ERR(req))
return ret;
rq = scsi_req(req);
- scsi_req_init(req);
if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
buffer, bufflen, __GFP_RECLAIM))
@@ -635,7 +634,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
cmd->request->next_rq->special = NULL;
}
-static bool scsi_end_request(struct request *req, int error,
+static bool scsi_end_request(struct request *req, blk_status_t error,
unsigned int bytes, unsigned int bidi_bytes)
{
struct scsi_cmnd *cmd = req->special;
@@ -694,45 +693,28 @@ static bool scsi_end_request(struct request *req, int error,
* @cmd: SCSI command (unused)
* @result: scsi error code
*
- * Translate SCSI error code into standard UNIX errno.
- * Return values:
- * -ENOLINK temporary transport failure
- * -EREMOTEIO permanent target failure, do not retry
- * -EBADE permanent nexus failure, retry on other path
- * -ENOSPC No write space available
- * -ENODATA Medium error
- * -EIO unspecified I/O error
+ * Translate SCSI error code into block errors.
*/
-static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result)
+static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
+ int result)
{
- int error = 0;
-
- switch(host_byte(result)) {
+ switch (host_byte(result)) {
case DID_TRANSPORT_FAILFAST:
- error = -ENOLINK;
- break;
+ return BLK_STS_TRANSPORT;
case DID_TARGET_FAILURE:
set_host_byte(cmd, DID_OK);
- error = -EREMOTEIO;
- break;
+ return BLK_STS_TARGET;
case DID_NEXUS_FAILURE:
- set_host_byte(cmd, DID_OK);
- error = -EBADE;
- break;
+ return BLK_STS_NEXUS;
case DID_ALLOC_FAILURE:
set_host_byte(cmd, DID_OK);
- error = -ENOSPC;
- break;
+ return BLK_STS_NOSPC;
case DID_MEDIUM_ERROR:
set_host_byte(cmd, DID_OK);
- error = -ENODATA;
- break;
+ return BLK_STS_MEDIUM;
default:
- error = -EIO;
- break;
+ return BLK_STS_IOERR;
}
-
- return error;
}
/*
@@ -769,7 +751,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
int result = cmd->result;
struct request_queue *q = cmd->device->request_queue;
struct request *req = cmd->request;
- int error = 0;
+ blk_status_t error = BLK_STS_OK;
struct scsi_sense_hdr sshdr;
bool sense_valid = false;
int sense_deferred = 0, level = 0;
@@ -808,7 +790,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
* both sides at once.
*/
scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
- if (scsi_end_request(req, 0, blk_rq_bytes(req),
+ if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
blk_rq_bytes(req->next_rq)))
BUG();
return;
@@ -850,7 +832,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
scsi_print_sense(cmd);
result = 0;
/* for passthrough error may be set */
- error = 0;
+ error = BLK_STS_OK;
}
/*
@@ -922,18 +904,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
action = ACTION_REPREP;
} else if (sshdr.asc == 0x10) /* DIX */ {
action = ACTION_FAIL;
- error = -EILSEQ;
+ error = BLK_STS_PROTECTION;
/* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
} else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
action = ACTION_FAIL;
- error = -EREMOTEIO;
+ error = BLK_STS_TARGET;
} else
action = ACTION_FAIL;
break;
case ABORTED_COMMAND:
action = ACTION_FAIL;
if (sshdr.asc == 0x10) /* DIF */
- error = -EILSEQ;
+ error = BLK_STS_PROTECTION;
break;
case NOT_READY:
/* If the device is in the process of becoming
@@ -1134,6 +1116,20 @@ err_exit:
}
EXPORT_SYMBOL(scsi_init_io);
+/**
+ * scsi_initialize_rq - initialize struct scsi_cmnd.req
+ *
+ * Called from inside blk_get_request().
+ */
+void scsi_initialize_rq(struct request *rq)
+{
+ struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+ scsi_req_init(&cmd->req);
+}
+EXPORT_SYMBOL(scsi_initialize_rq);
+
+/* Called after a request has been started. */
void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
{
void *buf = cmd->sense_buffer;
@@ -1829,15 +1825,15 @@ out_delay:
blk_delay_queue(q, SCSI_QUEUE_DELAY);
}
-static inline int prep_to_mq(int ret)
+static inline blk_status_t prep_to_mq(int ret)
{
switch (ret) {
case BLKPREP_OK:
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
case BLKPREP_DEFER:
- return BLK_MQ_RQ_QUEUE_BUSY;
+ return BLK_STS_RESOURCE;
default:
- return BLK_MQ_RQ_QUEUE_ERROR;
+ return BLK_STS_IOERR;
}
}
@@ -1909,7 +1905,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
blk_mq_complete_request(cmd->request);
}
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct request *req = bd->rq;
@@ -1917,14 +1913,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
struct scsi_device *sdev = q->queuedata;
struct Scsi_Host *shost = sdev->host;
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
- int ret;
+ blk_status_t ret;
int reason;
ret = prep_to_mq(scsi_prep_state_check(sdev, req));
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret != BLK_STS_OK)
goto out;
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
if (!get_device(&sdev->sdev_gendev))
goto out;
@@ -1937,7 +1933,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!(req->rq_flags & RQF_DONTPREP)) {
ret = prep_to_mq(scsi_mq_prep_fn(req));
- if (ret != BLK_MQ_RQ_QUEUE_OK)
+ if (ret != BLK_STS_OK)
goto out_dec_host_busy;
req->rq_flags |= RQF_DONTPREP;
} else {
@@ -1955,11 +1951,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
reason = scsi_dispatch_cmd(cmd);
if (reason) {
scsi_set_blocked(cmd, reason);
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ ret = BLK_STS_RESOURCE;
goto out_dec_host_busy;
}
- return BLK_MQ_RQ_QUEUE_OK;
+ return BLK_STS_OK;
out_dec_host_busy:
atomic_dec(&shost->host_busy);
@@ -1972,12 +1968,14 @@ out_put_device:
put_device(&sdev->sdev_gendev);
out:
switch (ret) {
- case BLK_MQ_RQ_QUEUE_BUSY:
+ case BLK_STS_OK:
+ break;
+ case BLK_STS_RESOURCE:
if (atomic_read(&sdev->device_busy) == 0 &&
!scsi_device_blocked(sdev))
blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
break;
- case BLK_MQ_RQ_QUEUE_ERROR:
+ default:
/*
* Make sure to release all allocated ressources when
* we hit an error, as we will never see this command
@@ -1986,8 +1984,6 @@ out:
if (req->rq_flags & RQF_DONTPREP)
scsi_mq_uninit_cmd(cmd);
break;
- default:
- break;
}
return ret;
}
@@ -2057,6 +2053,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
{
struct device *dev = shost->dma_dev;
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
+
/*
* this limit is imposed by hardware restrictions
*/
@@ -2139,6 +2137,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
q->request_fn = scsi_request_fn;
q->init_rq_fn = scsi_init_rq;
q->exit_rq_fn = scsi_exit_rq;
+ q->initialize_rq_fn = scsi_initialize_rq;
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
@@ -2163,6 +2162,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
#endif
.init_request = scsi_init_request,
.exit_request = scsi_exit_request,
+ .initialize_rq_fn = scsi_initialize_rq,
.map_queues = scsi_map_queues,
};
@@ -2977,7 +2977,7 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
if (wait)
blk_mq_quiesce_queue(q);
else
- blk_mq_stop_hw_queues(q);
+ blk_mq_quiesce_queue_nowait(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_stop_queue(q);
@@ -3031,7 +3031,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
return -EINVAL;
if (q->mq_ops) {
- blk_mq_start_stopped_hw_queues(q, false);
+ blk_mq_unquiesce_queue(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_start_queue(q);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0ebe2f1..5006a65 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -33,6 +33,7 @@
#include <linux/bsg.h>
#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_request.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_host.h>
@@ -172,7 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
struct sas_rphy *rphy)
{
struct request *req;
- int ret;
+ blk_status_t ret;
int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
while ((req = blk_fetch_request(q)) != NULL) {
@@ -230,6 +231,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
+ q->initialize_rq_fn = scsi_initialize_rq;
q->cmd_size = sizeof(struct scsi_request);
if (rphy) {
@@ -249,6 +251,11 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
if (error)
goto out_cleanup_queue;
+ /*
+ * by default assume old behaviour and bounce for any highmem page
+ */
+ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
error = bsg_register_queue(q, dev, name, release);
if (error)
goto out_cleanup_queue;
@@ -264,6 +271,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
q->queuedata = shost;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
return 0;
out_cleanup_queue:
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82c33a6..21225d6 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
} Sg_device;
/* tasklet or soft irq callback */
-static void sg_rq_end_io(struct request *rq, int uptodate);
+static void sg_rq_end_io(struct request *rq, blk_status_t status);
static int sg_start_req(Sg_request *srp, unsigned char *cmd);
static int sg_finish_rem_req(Sg_request * srp);
static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
if (atomic_read(&sdp->detaching)) {
if (srp->bio) {
scsi_req_free_cmd(scsi_req(srp->rq));
- blk_end_request_all(srp->rq, -EIO);
+ blk_end_request_all(srp->rq, BLK_STS_IOERR);
srp->rq = NULL;
}
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
* level when a command is completed (or has failed).
*/
static void
-sg_rq_end_io(struct request *rq, int uptodate)
+sg_rq_end_io(struct request *rq, blk_status_t status)
{
struct sg_request *srp = rq->end_io_data;
struct scsi_request *req = scsi_req(rq);
@@ -1732,8 +1732,6 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
}
req = scsi_req(rq);
- scsi_req_init(rq);
-
if (hp->cmd_len > BLK_MAX_CDB)
req->cmd = long_cmdp;
memcpy(req->cmd, cmd, hp->cmd_len);
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1ea34d6..8e5013d 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
atomic64_dec(&STp->stats->in_flight);
}
-static void st_scsi_execute_end(struct request *req, int uptodate)
+static void st_scsi_execute_end(struct request *req, blk_status_t status)
{
struct st_request *SRpnt = req->end_io_data;
struct scsi_request *rq = scsi_req(req);
@@ -549,7 +549,6 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
if (IS_ERR(req))
return DRIVER_ERROR << 24;
rq = scsi_req(req);
- scsi_req_init(req);
req->rq_flags |= RQF_QUIET;
mdata->null_mapped = 1;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bb069eb..c05d3801 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
return -EINVAL;
}
- ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
+ ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!ib_dev->ibd_bio_set) {
pr_err("IBLOCK: Unable to create bioset\n");
goto out;
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
struct se_cmd *cmd = bio->bi_private;
struct iblock_req *ibr = cmd->priv;
- if (bio->bi_error) {
- pr_err("bio error: %p, err: %d\n", bio, bio->bi_error);
+ if (bio->bi_status) {
+ pr_err("bio error: %p, err: %d\n", bio, bio->bi_status);
/*
* Bump the ib_bio_err_cnt and release bio.
*/
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
{
struct se_cmd *cmd = bio->bi_private;
- if (bio->bi_error)
- pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error);
+ if (bio->bi_status)
+ pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
if (cmd) {
- if (bio->bi_error)
+ if (bio->bi_status)
target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
else
target_complete_cmd(cmd, SAM_STAT_GOOD);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 3e4abb1..ceec021 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
}
static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
-static void pscsi_req_done(struct request *, int);
+static void pscsi_req_done(struct request *, blk_status_t);
/* pscsi_attach_hba():
*
@@ -992,8 +992,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
goto fail;
}
- scsi_req_init(req);
-
if (sgl) {
ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
if (ret)
@@ -1045,7 +1043,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
return 0;
}
-static void pscsi_req_done(struct request *req, int uptodate)
+static void pscsi_req_done(struct request *req, blk_status_t status)
{
struct se_cmd *cmd = req->end_io_data;
struct pscsi_plugin_task *pt = cmd->priv;
diff --git a/fs/aio.c b/fs/aio.c
index f52d925..dcad3a6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
ssize_t ret;
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
@@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_pos = iocb->aio_offset;
req->common.ki_complete = aio_complete;
req->common.ki_flags = iocb_flags(req->common.ki_filp);
+ req->common.ki_hint = file_write_hint(file);
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
@@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_flags |= IOCB_EVENTFD;
}
+ ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
+ if (unlikely(ret)) {
+ pr_debug("EINVAL: aio_rw_flags\n");
+ goto out_put_req;
+ }
+
+ if ((req->common.ki_flags & IOCB_NOWAIT) &&
+ !(req->common.ki_flags & IOCB_DIRECT)) {
+ ret = -EOPNOTSUPP;
+ goto out_put_req;
+ }
+
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
pr_debug("EFAULT: aio_key\n");
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0a7404e..a7df151 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio_init(&bio, vecs, nr_pages);
bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = pos >> 9;
+ bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
bio.bi_end_io = blkdev_bio_end_io_simple;
@@ -262,8 +263,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
if (vecs != inline_vecs)
kfree(vecs);
- if (unlikely(bio.bi_error))
- ret = bio.bi_error;
+ if (unlikely(bio.bi_status))
+ ret = blk_status_to_errno(bio.bi_status);
bio_uninit(&bio);
@@ -291,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio)
bool should_dirty = dio->should_dirty;
if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
- if (bio->bi_error && !dio->bio.bi_error)
- dio->bio.bi_error = bio->bi_error;
+ if (bio->bi_status && !dio->bio.bi_status)
+ dio->bio.bi_status = bio->bi_status;
} else {
if (!dio->is_sync) {
struct kiocb *iocb = dio->iocb;
- ssize_t ret = dio->bio.bi_error;
+ ssize_t ret;
- if (likely(!ret)) {
+ if (likely(!dio->bio.bi_status)) {
ret = dio->size;
iocb->ki_pos += ret;
+ } else {
+ ret = blk_status_to_errno(dio->bio.bi_status);
}
dio->iocb->ki_complete(iocb, ret, 0);
@@ -337,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
- int ret;
+ int ret = 0;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
@@ -361,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
for (;;) {
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = pos >> 9;
+ bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
- bio->bi_error = ret;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
break;
}
@@ -415,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
}
__set_current_state(TASK_RUNNING);
- ret = dio->bio.bi_error;
+ if (!ret)
+ ret = blk_status_to_errno(dio->bio.bi_status);
if (likely(!ret))
ret = dio->size;
@@ -439,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static __init int blkdev_init(void)
{
- blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+ blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
if (!blkdev_dio_pool)
return -ENOMEM;
return 0;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b8622e4..d87ac27 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -310,7 +310,8 @@ struct btrfs_dio_private {
* The original bio may be split to several sub-bios, this is
* done during endio of sub-bios
*/
- int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+ blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+ blk_status_t);
};
/*
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e..4ded1c3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
/* mutex is not held! This is not save if IO is not yet completed
* on umount */
iodone_w_error = 0;
- if (bp->bi_error)
+ if (bp->bi_status)
iodone_w_error = 1;
BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
- bp->bi_error,
+ bp->bi_status,
btrfsic_get_block_type(dev_state->state, block),
block->logical_bytenr, dev_state->name,
block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b28..a2fad39 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio)
unsigned long index;
int ret;
- if (bio->bi_error)
+ if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
@@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio)
struct page *page;
unsigned long index;
- if (bio->bi_error)
+ if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start,
cb->start + cb->len - 1,
NULL,
- bio->bi_error ? 0 : 1);
+ bio->bi_status ? 0 : 1);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ out:
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
@@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
struct page *page;
u64 first_byte = disk_start;
struct block_device *bdev;
- int ret;
+ blk_status_t ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
WARN_ON(start & ((u64)PAGE_SIZE - 1));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
refcount_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
@@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
if (!bio) {
kfree(cb);
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
}
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
@@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
/* create and submit bios for the compressed pages */
bytes_left = compressed_len;
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+ int submit = 0;
+
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
- ret = io_tree->ops->merge_bio_hook(page, 0,
+ submit = io_tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
bio, 0);
- else
- ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+ if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(bio);
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_map_bio(fs_info, bio, 0, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_map_bio(fs_info, bio, 0, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
@@ -569,7 +569,7 @@ next:
* After the compressed pages are read, we copy the bytes into the
* bio we were passed and then call the bio end_io calls
*/
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
- int ret = -ENOMEM;
+ blk_status_t ret = BLK_STS_RESOURCE;
int faili = 0;
u32 *sums;
@@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
PAGE_SIZE);
read_unlock(&em_tree->lock);
if (!em)
- return -EIO;
+ return BLK_STS_IOERR;
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -638,7 +638,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
__GFP_HIGHMEM);
if (!cb->compressed_pages[pg_index]) {
faili = pg_index - 1;
- ret = -ENOMEM;
+ ret = BLK_STS_RESOURCE;
goto fail2;
}
}
@@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
refcount_set(&cb->pending_bios, 1);
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+ int submit = 0;
+
page = cb->compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- ret = tree->ops->merge_bio_hook(page, 0,
+ submit = tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
comp_bio, 0);
- else
- ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+ if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(comp_bio);
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
- comp_bio->bi_error = ret;
+ comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
- comp_bio->bi_error = ret;
+ comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43a..680d426 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long total_out, u64 disk_start,
struct bio *bio);
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
enum btrfs_compression_type {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4f8f75d..a0d0c79 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5f678dc..6036d15 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
- int error;
+ blk_status_t status;
enum btrfs_wq_endio_type metadata;
struct list_head list;
struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
*/
u64 bio_offset;
struct btrfs_work work;
- int error;
+ blk_status_t status;
};
/*
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
btrfs_work_func_t func;
fs_info = end_io_wq->info;
- end_io_wq->error = bio->bi_error;
+ end_io_wq->status = bio->bi_status;
if (bio_op(bio) == REQ_OP_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
btrfs_queue_work(wq, &end_io_wq->work);
}
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata)
{
struct btrfs_end_io_wq *end_io_wq;
end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
if (!end_io_wq)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
end_io_wq->private = bio->bi_private;
end_io_wq->end_io = bio->bi_end_io;
end_io_wq->info = info;
- end_io_wq->error = 0;
+ end_io_wq->status = 0;
end_io_wq->bio = bio;
end_io_wq->metadata = metadata;
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
- int ret;
+ blk_status_t ret;
async = container_of(work, struct async_submit_bio, work);
ret = async->submit_bio_start(async->inode, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
if (ret)
- async->error = ret;
+ async->status = ret;
}
static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
wake_up(&fs_info->async_submit_wait);
/* If an error occurred we just want to clean up the bio and move on */
- if (async->error) {
- async->bio->bi_error = async->error;
+ if (async->status) {
+ async->bio->bi_status = async->status;
bio_endio(async->bio);
return;
}
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
kfree(async);
}
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
- u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+ struct inode *inode, struct bio *bio, int mirror_num,
+ unsigned long bio_flags, u64 bio_offset,
+ extent_submit_bio_hook_t *submit_bio_start,
+ extent_submit_bio_hook_t *submit_bio_done)
{
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
if (!async)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
async->inode = inode;
async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
- async->error = 0;
+ async->status = 0;
atomic_inc(&fs_info->nr_async_submits);
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
return 0;
}
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
{
struct bio_vec *bvec;
struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
break;
}
- return ret;
+ return errno_to_blk_status(ret);
}
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
/*
* when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
return btree_csum_one_bio(bio);
}
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
- int ret;
+ blk_status_t ret;
/*
* when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
*/
ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
return 1;
}
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(bio_flags);
- int ret;
+ blk_status_t ret;
if (bio_op(bio) != REQ_OP_WRITE) {
/*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
return 0;
out_w_error:
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
return ret;
}
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
- bio->bi_error = end_io_wq->error;
+ bio->bi_status = end_io_wq->status;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3497,11 +3496,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
* any device where the flush fails with eopnotsupp are flagged as not-barrier
* capable
*/
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
{
struct request_queue *q = bdev_get_queue(device->bdev);
struct bio *bio;
- int ret = 0;
+ blk_status_t ret = 0;
if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
return 0;
@@ -3513,8 +3512,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
wait_for_completion(&device->flush_wait);
- if (bio->bi_error) {
- ret = bio->bi_error;
+ if (bio->bi_status) {
+ ret = bio->bi_status;
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
@@ -3533,7 +3532,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
device->flush_bio = NULL;
bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
bio->bi_end_io = btrfs_end_empty_barrier;
bio->bi_bdev = device->bdev;
@@ -3558,7 +3557,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
struct btrfs_device *dev;
int errors_send = 0;
int errors_wait = 0;
- int ret;
+ blk_status_t ret;
/* send down all the barriers */
head = &info->fs_devices->devices;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 21f1ceb..c581927 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done);
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+ struct inode *inode, struct bio *bio, int mirror_num,
+ unsigned long bio_flags, u64 bio_offset,
+ extent_submit_bio_hook_t *submit_bio_start,
+ extent_submit_bio_hook_t *submit_bio_done);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
int btrfs_write_tree_block(struct extent_buffer *buf);
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d3619e0..d1cd601 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -174,7 +174,8 @@ int __init extent_io_init(void)
goto free_state_cache;
btrfs_bioset = bioset_create(BIO_POOL_SIZE,
- offsetof(struct btrfs_io_bio, bio));
+ offsetof(struct btrfs_io_bio, bio),
+ BIOSET_NEED_BVECS);
if (!btrfs_bioset)
goto free_buffer_cache;
@@ -2399,6 +2400,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct bio *bio;
int read_mode = 0;
+ blk_status_t status;
int ret;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2431,11 +2433,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+ status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
failrec->bio_flags, 0);
- if (ret) {
+ if (status) {
free_io_failure(BTRFS_I(inode), failrec);
bio_put(bio);
+ ret = blk_status_to_errno(status);
}
return ret;
@@ -2474,6 +2477,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
*/
static void end_bio_extent_writepage(struct bio *bio)
{
+ int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
u64 start;
u64 end;
@@ -2503,7 +2507,7 @@ static void end_bio_extent_writepage(struct bio *bio)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- end_extent_writepage(page, bio->bi_error, start, end);
+ end_extent_writepage(page, error, start, end);
end_page_writeback(page);
}
@@ -2536,7 +2540,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
static void end_bio_extent_readpage(struct bio *bio)
{
struct bio_vec *bvec;
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree;
u64 offset = 0;
@@ -2556,7 +2560,7 @@ static void end_bio_extent_readpage(struct bio *bio)
btrfs_debug(fs_info,
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
- (u64)bio->bi_iter.bi_sector, bio->bi_error,
+ (u64)bio->bi_iter.bi_sector, bio->bi_status,
io_bio->mirror_num);
tree = &BTRFS_I(inode)->io_tree;
@@ -2615,7 +2619,7 @@ static void end_bio_extent_readpage(struct bio *bio)
ret = bio_readpage_error(bio, offset, page,
start, end, mirror);
if (ret == 0) {
- uptodate = !bio->bi_error;
+ uptodate = !bio->bi_status;
offset += len;
continue;
}
@@ -2673,7 +2677,7 @@ readpage_ok:
endio_readpage_release_extent(tree, extent_start, extent_len,
uptodate);
if (io_bio->end_io)
- io_bio->end_io(io_bio, bio->bi_error);
+ io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
@@ -2743,7 +2747,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
unsigned long bio_flags)
{
- int ret = 0;
+ blk_status_t ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
@@ -2761,7 +2765,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
btrfsic_submit_bio(bio);
bio_put(bio);
- return ret;
+ return blk_status_to_errno(ret);
}
static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2826,6 +2830,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
+ bio->bi_write_hint = page->mapping->host->i_write_hint;
bio_set_op_attrs(bio, op, op_flags);
if (wbc) {
wbc_init_bio(wbc, bio);
@@ -3707,7 +3712,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
BUG_ON(!eb);
done = atomic_dec_and_test(&eb->io_pages);
- if (bio->bi_error ||
+ if (bio->bi_status ||
test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
ClearPageUptodate(page);
set_btree_ioerr(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1eafa2f..487ca02 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,9 +92,9 @@ struct btrfs_inode;
struct btrfs_io_bio;
struct io_failure_record;
-typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset);
+typedef blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset);
struct extent_io_ops {
/*
* The following callbacks must be allways defined, the function
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31..5b1c709 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
kfree(bio->csum_allocated);
}
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
path = btrfs_alloc_path();
if (!path)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
csum_size, GFP_NOFS);
if (!btrfs_bio->csum_allocated) {
btrfs_free_path(path);
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
}
btrfs_bio->csum = btrfs_bio->csum_allocated;
btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ next:
return 0;
}
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
{
return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
}
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
{
return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
}
@@ -433,7 +433,7 @@ fail:
return ret;
}
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
GFP_NOFS);
if (!sums)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index da1096e..59e2dcc 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1875,12 +1875,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
ssize_t num_written = 0;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
ssize_t err;
- loff_t pos;
- size_t count;
+ loff_t pos = iocb->ki_pos;
+ size_t count = iov_iter_count(from);
loff_t oldsize;
int clean_page = 0;
- inode_lock(inode);
+ if ((iocb->ki_flags & IOCB_NOWAIT) &&
+ (iocb->ki_flags & IOCB_DIRECT)) {
+ /* Don't sleep on inode rwsem */
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ /*
+ * We will allocate space in case nodatacow is not set,
+ * so bail
+ */
+ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) ||
+ check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
+ } else
+ inode_lock(inode);
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
inode_unlock(inode);
@@ -1914,8 +1931,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/
update_time_for_write(inode);
- pos = iocb->ki_pos;
- count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
@@ -3071,13 +3086,19 @@ out:
return offset;
}
+static int btrfs_file_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode |= FMODE_AIO_NOWAIT;
+ return generic_file_open(inode, filp);
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,
- .open = generic_file_open,
+ .open = btrfs_file_open,
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
.fallocate = btrfs_fallocate,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ef3c98c..556c930 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -842,13 +842,12 @@ retry:
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK);
- ret = btrfs_submit_compressed_write(inode,
+ if (btrfs_submit_compressed_write(inode,
async_extent->start,
async_extent->ram_size,
ins.objectid,
ins.offset, async_extent->pages,
- async_extent->nr_pages);
- if (ret) {
+ async_extent->nr_pages)) {
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
@@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
- int ret = 0;
+ blk_status_t ret = 0;
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int ret;
+ blk_status_t ret;
ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
* extent_io.c submission hook. This does the right thing for csum calculation
* on write, or reading the csums from the tree before a read
*/
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
- int ret = 0;
+ blk_status_t ret = 0;
int skip_sum;
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -1991,8 +1990,8 @@ mapit:
ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
out:
- if (ret < 0) {
- bio->bi_error = ret;
+ if (ret) {
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
struct bio_vec *bvec;
int i;
- if (bio->bi_error)
+ if (bio->bi_status)
goto end;
ASSERT(bio->bi_vcnt == 1);
@@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
int ret;
int i;
- if (bio->bi_error)
+ if (bio->bi_status)
goto end;
uptodate = 1;
@@ -8141,8 +8140,8 @@ end:
bio_put(bio);
}
-static int __btrfs_subio_endio_read(struct inode *inode,
- struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+ struct btrfs_io_bio *io_bio, blk_status_t err)
{
struct btrfs_fs_info *fs_info;
struct bio_vec *bvec;
@@ -8184,7 +8183,7 @@ try_again:
io_bio->mirror_num,
btrfs_retry_endio, &done);
if (ret) {
- err = ret;
+ err = errno_to_blk_status(ret);
goto next;
}
@@ -8211,8 +8210,8 @@ next:
return err;
}
-static int btrfs_subio_endio_read(struct inode *inode,
- struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+ struct btrfs_io_bio *io_bio, blk_status_t err)
{
bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
struct inode *inode = dip->inode;
struct bio *dio_bio;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
kfree(dip);
- dio_bio->bi_error = bio->bi_error;
- dio_end_io(dio_bio, bio->bi_error);
+ dio_bio->bi_status = bio->bi_status;
+ dio_end_io(dio_bio);
if (io_bio->end_io)
- io_bio->end_io(io_bio, err);
+ io_bio->end_io(io_bio, blk_status_to_errno(err));
bio_put(bio);
}
@@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio)
struct bio *dio_bio = dip->dio_bio;
__endio_write_update_ordered(dip->inode, dip->logical_offset,
- dip->bytes, !bio->bi_error);
+ dip->bytes, !bio->bi_status);
kfree(dip);
- dio_bio->bi_error = bio->bi_error;
- dio_end_io(dio_bio, bio->bi_error);
+ dio_bio->bi_status = bio->bi_status;
+ dio_end_io(dio_bio);
bio_put(bio);
}
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 offset)
{
- int ret;
+ blk_status_t ret;
ret = btrfs_csum_one_bio(inode, bio, offset, 1);
BUG_ON(ret); /* -ENOMEM */
return 0;
@@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
static void btrfs_end_dio_bio(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
if (dip->errors) {
bio_io_error(dip->orig_bio);
} else {
- dip->dio_bio->bi_error = 0;
+ dip->dio_bio->bi_status = 0;
bio_endio(dip->orig_bio);
}
out:
@@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
return bio;
}
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
struct btrfs_dio_private *dip,
struct bio *bio,
u64 file_offset)
{
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
- int ret;
+ blk_status_t ret;
/*
* We load all the csum data we need when we submit
@@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_private *dip = bio->bi_private;
bool write = bio_op(bio) == REQ_OP_WRITE;
- int ret;
+ blk_status_t ret;
if (async_submit)
async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8649,7 +8648,7 @@ free_ordered:
* callbacks - they require an allocated dip and a clone of dio_bio.
*/
if (io_bio && dip) {
- io_bio->bi_error = -EIO;
+ io_bio->bi_status = BLK_STS_IOERR;
bio_endio(io_bio);
/*
* The end io callbacks free our dip, do the final put on io_bio
@@ -8668,12 +8667,12 @@ free_ordered:
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
file_offset + dio_bio->bi_iter.bi_size - 1);
- dio_bio->bi_error = -EIO;
+ dio_bio->bi_status = BLK_STS_IOERR;
/*
* Releases and cleans up our dio_bio, no need to bio_put()
* nor bio_endio()/bio_io_error() against dio_bio.
*/
- dio_end_io(dio_bio, ret);
+ dio_end_io(dio_bio);
}
if (io_bio)
bio_put(io_bio);
@@ -8755,6 +8754,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (ret)
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb..f3d30d9e 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
* this frees the rbio and runs through all the bios in the
* bio_list and calls end_io on them
*/
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *next;
@@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
- cur->bi_error = err;
+ cur->bi_status = err;
bio_endio(cur);
cur = next;
}
@@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
static void raid_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
int max_errors;
if (err)
@@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
0 : rbio->bbio->max_errors;
if (atomic_read(&rbio->error) > max_errors)
- err = -EIO;
+ err = BLK_STS_IOERR;
rbio_orig_end_io(rbio, err);
}
@@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
* devices or if they are not contiguous
*/
if (last_end == disk_start && stripe->dev->bdev &&
- !last->bi_error &&
+ !last->bi_status &&
last->bi_bdev == stripe->dev->bdev) {
ret = bio_add_page(last, page, PAGE_SIZE, 0);
if (ret == PAGE_SIZE)
@@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
* we only read stripe pages off the disk, set them
* up to date if there were no errors
*/
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c7b45eb..ba5595d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -95,7 +95,7 @@ struct scrub_bio {
struct scrub_ctx *sctx;
struct btrfs_device *dev;
struct bio *bio;
- int err;
+ blk_status_t status;
u64 logical;
u64 physical;
#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -1668,14 +1668,14 @@ leave_nomem:
struct scrub_bio_ret {
struct completion event;
- int error;
+ blk_status_t status;
};
static void scrub_bio_wait_endio(struct bio *bio)
{
struct scrub_bio_ret *ret = bio->bi_private;
- ret->error = bio->bi_error;
+ ret->status = bio->bi_status;
complete(&ret->event);
}
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
int ret;
init_completion(&done.event);
- done.error = 0;
+ done.status = 0;
bio->bi_iter.bi_sector = page->logical >> 9;
bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
return ret;
wait_for_completion(&done.event);
- if (done.error)
+ if (done.status)
return -EIO;
return 0;
@@ -1937,7 +1937,7 @@ again:
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- sbio->err = 0;
+ sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
- sbio->err = bio->bi_error;
+ sbio->status = bio->bi_status;
sbio->bio = bio;
btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
int i;
WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
- if (sbio->err) {
+ if (sbio->status) {
struct btrfs_dev_replace *dev_replace =
&sbio->sctx->fs_info->dev_replace;
@@ -2341,7 +2341,7 @@ again:
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
- sbio->err = 0;
+ sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
struct scrub_block *sblock = bio->bi_private;
struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
- if (bio->bi_error)
+ if (bio->bi_status)
sblock->no_io_error_seen = 0;
bio_put(bio);
@@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
- sbio->err = bio->bi_error;
+ sbio->status = bio->bi_status;
sbio->bio = bio;
btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
int i;
BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
- if (sbio->err) {
+ if (sbio->status) {
for (i = 0; i < sbio->page_count; i++) {
struct scrub_page *spage = sbio->pagev[i];
@@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
- if (bio->bi_error)
+ if (bio->bi_status)
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67d..84a4959 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
struct btrfs_bio *bbio = bio->bi_private;
int is_orig_bio = 0;
- if (bio->bi_error) {
+ if (bio->bi_status) {
atomic_inc(&bbio->error);
- if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+ if (bio->bi_status == BLK_STS_IOERR ||
+ bio->bi_status == BLK_STS_TARGET) {
unsigned int stripe_index =
btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
@@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
* beyond the tolerance of the btrfs bio
*/
if (atomic_read(&bbio->error) > bbio->max_errors) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
} else {
/*
* this bio is actually up to date, we didn't
* go over the max number of errors
*/
- bio->bi_error = 0;
+ bio->bi_status = 0;
}
btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_iter.bi_sector = logical >> 9;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
btrfs_end_bbio(bbio, bio);
}
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58..5c2cba8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,7 +49,7 @@
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- struct writeback_control *wbc);
+ enum rw_hint hint, struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -1829,7 +1829,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -1883,7 +1884,8 @@ recover:
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -3038,7 +3040,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
if (unlikely(bio_flagged(bio, BIO_QUIET)))
set_bit(BH_Quiet, &bh->b_state);
- bh->b_end_io(bh, !bio->bi_error);
+ bh->b_end_io(bh, !bio->bi_status);
bio_put(bio);
}
@@ -3091,7 +3093,7 @@ void guard_bio_eod(int op, struct bio *bio)
}
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- struct writeback_control *wbc)
+ enum rw_hint write_hint, struct writeback_control *wbc)
{
struct bio *bio;
@@ -3120,6 +3122,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
+ bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3145,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
int submit_bh(int op, int op_flags, struct buffer_head *bh)
{
- return submit_bh_wbc(op, op_flags, bh, NULL);
+ return submit_bh_wbc(op, op_flags, bh, 0, NULL);
}
EXPORT_SYMBOL(submit_bh);
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84..6181e95 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
goto errout;
}
err = submit_bio_wait(bio);
- if ((err == 0) && bio->bi_error)
+ if (err == 0 && bio->bi_status)
err = -EIO;
bio_put(bio);
if (err)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea..08cf278 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
dio_complete(dio, 0, true);
}
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
/*
* Asynchronous IO callback.
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
/**
* dio_end_io - handle the end io action for the given bio
* @bio: The direct io bio thats being completed
- * @error: Error if there was one
*
* This is meant to be called by any filesystem that uses their own dio_submit_t
* so that the DIO specific endio actions are dealt with after the filesystem
* has done it's completion work.
*/
-void dio_end_io(struct bio *bio, int error)
+void dio_end_io(struct bio *bio)
{
struct dio *dio = bio->bi_private;
@@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
+ bio->bi_write_hint = dio->iocb->ki_hint;
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
@@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio)
/*
* Process one completed BIO. No locks are held.
*/
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
{
struct bio_vec *bvec;
unsigned i;
- int err;
+ blk_status_t err = bio->bi_status;
- if (bio->bi_error)
- dio->io_error = -EIO;
+ if (err) {
+ if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+ dio->io_error = -EAGAIN;
+ else
+ dio->io_error = -EIO;
+ }
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
- err = bio->bi_error;
bio_check_pages_dirty(bio); /* transfers ownership */
} else {
bio_for_each_segment_all(bvec, bio, i) {
@@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
set_page_dirty_lock(page);
put_page(page);
}
- err = bio->bi_error;
bio_put(bio);
}
return err;
@@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
bio = dio->bio_list;
dio->bio_list = bio->bi_private;
spin_unlock_irqrestore(&dio->bio_lock, flags);
- ret2 = dio_bio_complete(dio, bio);
+ ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
if (ret == 0)
ret = ret2;
}
@@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
if (iov_iter_rw(iter) == WRITE) {
dio->op = REQ_OP_WRITE;
dio->op_flags = REQ_SYNC | REQ_IDLE;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ dio->op_flags |= REQ_NOWAIT;
} else {
dio->op = REQ_OP_READ;
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 02ce7e7..58e2eea 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- inode_lock_shared(inode);
+ if (!inode_trylock_shared(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock_shared(inode);
+ }
/*
* Recheck under inode lock - at this point we are sure it cannot
* change anymore
@@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_dax_write_iter(iocb, from);
#endif
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
+
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
- if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
- overwrite = 1;
+ if (o_direct && !unaligned_aio) {
+ if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+ if (ext4_should_dioread_nolock(inode))
+ overwrite = 1;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
@@ -435,6 +454,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ret < 0)
return ret;
}
+
+ /* Set the flags to support nowait AIO */
+ filp->f_mode |= FMODE_AIO_NOWAIT;
+
return dquot_file_open(inode, filp);
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138..c2fce44 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
}
#endif
- if (bio->bi_error) {
+ if (bio->bi_status) {
SetPageError(page);
mapping_set_error(page->mapping, -EIO);
}
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
continue;
}
clear_buffer_async_write(bh);
- if (bio->bi_error)
+ if (bio->bi_status)
buffer_io_error(bh);
} while ((bh = bh->b_this_page) != head);
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
bdevname(bio->bi_bdev, b),
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
- bio->bi_error)) {
+ bio->bi_status)) {
ext4_finish_bio(bio);
bio_put(bio);
return;
}
bio->bi_end_io = NULL;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
- bio->bi_error, inode->i_ino,
+ bio->bi_status, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
- mapping_set_error(inode->i_mapping, bio->bi_error);
+ mapping_set_error(inode->i_mapping,
+ blk_status_to_errno(bio->bi_status));
}
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
if (bio) {
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
REQ_SYNC : 0;
+ io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
submit_bio(io->io_bio);
}
@@ -396,6 +398,7 @@ submit_and_retry:
ret = io_submit_init_bio(io, bh);
if (ret)
return ret;
+ io->io_bio->bi_write_hint = inode->i_write_hint;
}
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829..40a5497 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
int i;
if (ext4_bio_encrypted(bio)) {
- if (bio->bi_error) {
+ if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- if (!bio->bi_error) {
+ if (!bio->bi_status) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bd..36fe820 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
}
#endif
if (f2fs_bio_encrypted(bio)) {
- if (bio->bi_error) {
+ if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
- if (!bio->bi_error) {
+ if (!bio->bi_status) {
if (!PageUptodate(page))
SetPageUptodate(page);
} else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
unlock_page(page);
mempool_free(page, sbi->write_io_dummy);
- if (unlikely(bio->bi_error))
+ if (unlikely(bio->bi_status))
f2fs_stop_checkpoint(sbi, true);
continue;
}
fscrypt_pullback_bio_page(&page, true);
- if (unlikely(bio->bi_error)) {
+ if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
f2fs_stop_checkpoint(sbi, true);
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9684585..ea9f455 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
- dc->error = bio->bi_error;
+ dc->error = blk_status_to_errno(bio->bi_status);
dc->state = D_DONE;
complete(&dc->wait);
bio_put(bio);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267..ed051f8 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -243,6 +243,67 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
}
#endif
+static bool rw_hint_valid(enum rw_hint hint)
+{
+ switch (hint) {
+ case RWF_WRITE_LIFE_NOT_SET:
+ case RWH_WRITE_LIFE_NONE:
+ case RWH_WRITE_LIFE_SHORT:
+ case RWH_WRITE_LIFE_MEDIUM:
+ case RWH_WRITE_LIFE_LONG:
+ case RWH_WRITE_LIFE_EXTREME:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct inode *inode = file_inode(file);
+ u64 *argp = (u64 __user *)arg;
+ enum rw_hint hint;
+ u64 h;
+
+ switch (cmd) {
+ case F_GET_FILE_RW_HINT:
+ h = file_write_hint(file);
+ if (copy_to_user(argp, &h, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+ case F_SET_FILE_RW_HINT:
+ if (copy_from_user(&h, argp, sizeof(h)))
+ return -EFAULT;
+ hint = (enum rw_hint) h;
+ if (!rw_hint_valid(hint))
+ return -EINVAL;
+
+ spin_lock(&file->f_lock);
+ file->f_write_hint = hint;
+ spin_unlock(&file->f_lock);
+ return 0;
+ case F_GET_RW_HINT:
+ h = inode->i_write_hint;
+ if (copy_to_user(argp, &h, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+ case F_SET_RW_HINT:
+ if (copy_from_user(&h, argp, sizeof(h)))
+ return -EFAULT;
+ hint = (enum rw_hint) h;
+ if (!rw_hint_valid(hint))
+ return -EINVAL;
+
+ inode_lock(inode);
+ inode->i_write_hint = hint;
+ inode_unlock(inode);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
@@ -337,6 +398,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_GET_SEALS:
err = shmem_fcntl(filp, cmd, arg);
break;
+ case F_GET_RW_HINT:
+ case F_SET_RW_HINT:
+ case F_GET_FILE_RW_HINT:
+ case F_SET_FILE_RW_HINT:
+ err = fcntl_rw_hint(filp, cmd, arg);
+ break;
default:
break;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b7cf65d..aa3d445 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -815,7 +815,6 @@ struct gfs2_sbd {
atomic_t sd_log_in_flight;
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
- int sd_log_error;
atomic_t sd_reserving_log;
wait_queue_head_t sd_reserving_log_wait;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index b1f9144..885d36e 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
*/
static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
- int error)
+ blk_status_t error)
{
struct buffer_head *bh, *next;
struct page *page = bvec->bv_page;
@@ -209,15 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
struct page *page;
int i;
- if (bio->bi_error) {
- sdp->sd_log_error = bio->bi_error;
- fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
- }
+ if (bio->bi_status)
+ fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
if (page_has_buffers(page))
- gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+ gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
else
mempool_free(page, gfs2_page_pool);
}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc1..fabe1614f 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
do {
struct buffer_head *next = bh->b_this_page;
len -= bh->b_size;
- bh->b_end_io(bh, !bio->bi_error);
+ bh->b_end_io(bh, !bio->bi_status);
bh = next;
} while (bh && len);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b92135c..e76058d 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
{
struct page *page = bio->bi_private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
SetPageUptodate(page);
else
- pr_warn("error %d reading superblock\n", bio->bi_error);
+ pr_warn("error %d reading superblock\n", bio->bi_status);
unlock_page(page);
}
diff --git a/fs/inode.c b/fs/inode.c
index db59147..f0e5fc7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
i_gid_write(inode, 0);
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
+ inode->i_write_hint = WRITE_LIFE_NOT_SET;
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892..fa6cd5b 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
struct iomap_dio *dio = bio->bi_private;
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
- if (bio->bi_error)
- iomap_dio_set_error(dio, bio->bi_error);
+ if (bio->bi_status)
+ iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
if (atomic_dec_and_test(&dio->ref)) {
if (is_sync_kiocb(dio->iocb)) {
@@ -793,6 +793,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
bio->bi_bdev = iomap->bdev;
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
+ bio->bi_write_hint = dio->iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
@@ -881,6 +882,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
flags |= IOMAP_WRITE;
}
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (filemap_range_has_page(mapping, start, end)) {
+ ret = -EAGAIN;
+ goto out_free_dio;
+ }
+ flags |= IOMAP_NOWAIT;
+ }
+
ret = filemap_write_and_wait_range(mapping, start, end);
if (ret)
goto out_free_dio;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1f..a21f0e9 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
bp->l_flag |= lbmDONE;
- if (bio->bi_error) {
+ if (bio->bi_status) {
bp->l_flag |= lbmERROR;
jfs_err("lbmIODone: I/O error in JFS log");
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1..ce93db3 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
{
struct page *page = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ERR "metapage_read_end_io: I/O error\n");
SetPageError(page);
}
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
BUG_ON(!PagePrivate(page));
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ERR "metapage_write_end_io: I/O error\n");
SetPageError(page);
}
diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f8..d6d1486 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+ page_endio(page, op_is_write(bio_op(bio)),
+ blk_status_to_errno(bio->bi_status));
}
bio_put(bio);
@@ -614,6 +615,7 @@ alloc_new:
goto confused;
wbc_init_bio(wbc, bio);
+ bio->bi_write_hint = inode->i_write_hint;
}
/*
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d..d8863a8 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
{
struct parallel_io *par = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
struct parallel_io *par = bio->bi_private;
struct nfs_pgio_header *header = par->data;
- if (bio->bi_error) {
+ if (bio->bi_status) {
if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index fb5213a..c862c24 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
u8 *buf, *d, type, assoc;
int error;
+ if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
+ return -EINVAL;
+
buf = kzalloc(bufflen, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
goto out_free_buf;
}
req = scsi_req(rq);
- scsi_req_init(rq);
error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
if (error)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2a..e73c86d 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
{
struct nilfs_segment_buffer *segbuf = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
atomic_inc(&segbuf->sb_err);
bio_put(bio);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332..ffe0039 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
- if (bio->bi_error) {
- mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
- wc->wc_error = bio->bi_error;
+ if (bio->bi_status) {
+ mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+ wc->wc_error = blk_status_to_errno(bio->bi_status);
}
o2hb_bio_wait_dec(wc, 1);
diff --git a/fs/open.c b/fs/open.c
index cd0c5be..3fe0c4a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f,
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
+ f->f_write_hint = WRITE_LIFE_NOT_SET;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/fs/read_write.c b/fs/read_write.c
index 19d4d88..d591eee 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
struct kiocb kiocb;
ssize_t ret;
- if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
- return -EOPNOTSUPP;
-
init_sync_kiocb(&kiocb, filp);
- if (flags & RWF_HIPRI)
- kiocb.ki_flags |= IOCB_HIPRI;
- if (flags & RWF_DSYNC)
- kiocb.ki_flags |= IOCB_DSYNC;
- if (flags & RWF_SYNC)
- kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ ret = kiocb_set_rw_flags(&kiocb, flags);
+ if (ret)
+ return ret;
kiocb.ki_pos = *ppos;
if (type == READ)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3b91faa..d20c29b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -276,7 +276,7 @@ xfs_end_io(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
- int error = ioend->io_bio->bi_error;
+ int error;
/*
* Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
/*
* Clean up any COW blocks on an I/O error.
*/
+ error = blk_status_to_errno(ioend->io_bio->bi_status);
if (unlikely(error)) {
switch (ioend->io_type) {
case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
- xfs_destroy_ioend(ioend, bio->bi_error);
+ xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
}
STATIC int
@@ -500,11 +501,12 @@ xfs_submit_ioend(
* time.
*/
if (status) {
- ioend->io_bio->bi_error = status;
+ ioend->io_bio->bi_status = errno_to_blk_status(status);
bio_endio(ioend->io_bio);
return status;
}
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
return 0;
}
@@ -564,6 +566,7 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 16d6a57..438505f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1227,8 +1227,11 @@ xfs_buf_bio_end_io(
* don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete.
*/
- if (bio->bi_error)
- cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+ if (bio->bi_status) {
+ int error = blk_status_to_errno(bio->bi_status);
+
+ cmpxchg(&bp->b_io_error, 0, error);
+ }
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5fb5a09..17f27a2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -237,7 +237,11 @@ xfs_file_dax_read(
if (!count)
return 0; /* skip atime */
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -541,7 +545,11 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
@@ -553,9 +561,15 @@ xfs_file_dio_aio_write(
* otherwise demote the lock if we had to take the exclusive lock
* for other reasons in xfs_file_aio_write_checks.
*/
- if (unaligned_io)
- inode_dio_wait(inode);
- else if (iolock == XFS_IOLOCK_EXCL) {
+ if (unaligned_io) {
+ /* If we are going to wait for other DIO to finish, bail */
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (atomic_read(&inode->i_dio_count))
+ return -EAGAIN;
+ } else {
+ inode_dio_wait(inode);
+ }
+ } else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
@@ -585,7 +599,12 @@ xfs_file_dax_write(
size_t count;
loff_t pos;
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
+
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -892,6 +911,7 @@ xfs_file_open(
return -EFBIG;
if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
return -EIO;
+ file->f_mode |= FMODE_AIO_NOWAIT;
return 0;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf..05dc87e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -995,6 +995,11 @@ xfs_file_iomap_begin(
lockmode = xfs_ilock_data_map_shared(ip);
}
+ if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
length = mp->m_super->s_maxbytes - offset;
@@ -1016,6 +1021,15 @@ xfs_file_iomap_begin(
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
if (flags & IOMAP_DIRECT) {
+ /*
+ * A reflinked inode will result in CoW alloc.
+ * FIXME: It could still overwrite on unshared extents
+ * and not need allocation.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
/* may drop and re-acquire the ilock */
error = xfs_reflink_allocate_cow(ip, &imap, &shared,
&lockmode);
@@ -1033,6 +1047,14 @@ xfs_file_iomap_begin(
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
/*
+ * If nowait is set bail since we are going to make
+ * allocations.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+ /*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575..97df4db 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1766,7 +1766,8 @@ STATIC int __init
xfs_init_zones(void)
{
xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
- offsetof(struct xfs_ioend, io_inline_bio));
+ offsetof(struct xfs_ioend, io_inline_bio),
+ BIOSET_NEED_BVECS);
if (!xfs_ioend_bioset)
goto out;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a7e29fa..664a27d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -118,7 +118,6 @@ static inline void *bio_data(struct bio *bio)
/*
* will die
*/
-#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
/*
@@ -373,8 +372,11 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
return bio_split(bio, sectors, gfp, bs);
}
-extern struct bio_set *bioset_create(unsigned int, unsigned int);
-extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
+enum {
+ BIOSET_NEED_BVECS = BIT(0),
+ BIOSET_NEED_RESCUER = BIT(1),
+};
extern void bioset_free(struct bio_set *);
extern mempool_t *biovec_create_pool(int pool_entries);
@@ -392,11 +394,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
}
-static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
-{
- return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
-}
-
static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
@@ -414,7 +411,13 @@ extern void bio_endio(struct bio *);
static inline void bio_io_error(struct bio *bio)
{
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+}
+
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+ bio->bi_status = BLK_STS_AGAIN;
bio_endio(bio);
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fcd6410..23d32ff 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -39,8 +39,6 @@ struct blk_mq_hw_ctx {
struct blk_mq_tags *tags;
struct blk_mq_tags *sched_tags;
- struct srcu_struct queue_rq_srcu;
-
unsigned long queued;
unsigned long run;
#define BLK_MQ_MAX_DISPATCH_ORDER 7
@@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
struct dentry *debugfs_dir;
struct dentry *sched_debugfs_dir;
#endif
+
+ /* Must be the last member - see also blk_mq_hw_ctx_size(). */
+ struct srcu_struct queue_rq_srcu[0];
};
struct blk_mq_tag_set {
@@ -87,7 +88,8 @@ struct blk_mq_queue_data {
bool last;
};
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
+ const struct blk_mq_queue_data *);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -142,6 +144,8 @@ struct blk_mq_ops {
init_request_fn *init_request;
exit_request_fn *exit_request;
reinit_request_fn *reinit_request;
+ /* Called from inside blk_get_request() */
+ void (*initialize_rq_fn)(struct request *rq);
map_queues_fn *map_queues;
@@ -155,10 +159,6 @@ struct blk_mq_ops {
};
enum {
- BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */
- BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */
- BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */
-
BLK_MQ_F_SHOULD_MERGE = 1 << 0,
BLK_MQ_F_TAG_SHARED = 1 << 1,
BLK_MQ_F_SG_MERGE = 1 << 2,
@@ -204,10 +204,10 @@ enum {
BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
};
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
unsigned int flags);
-struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op,
- unsigned int flags, unsigned int hctx_idx);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
+ unsigned int op, unsigned int flags, unsigned int hctx_idx);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
enum {
@@ -230,8 +230,8 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
int blk_mq_request_started(struct request *rq);
void blk_mq_start_request(struct request *rq);
-void blk_mq_end_request(struct request *rq, int error);
-void __blk_mq_end_request(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, blk_status_t error);
+void __blk_mq_end_request(struct request *rq, blk_status_t error);
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
@@ -247,6 +247,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
void blk_mq_start_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
@@ -264,6 +266,8 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
int blk_mq_map_queues(struct blk_mq_tag_set *set);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
+void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+
/*
* Driver command data is immediately after the request. So subtract request
* size to get back to the original request, add request size to get the PDU.
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 61339bc..d2eb87c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,6 +17,27 @@ struct io_context;
struct cgroup_subsys_state;
typedef void (bio_end_io_t) (struct bio *);
+/*
+ * Block error status values. See block/blk-core:blk_errors for the details.
+ */
+typedef u8 __bitwise blk_status_t;
+#define BLK_STS_OK 0
+#define BLK_STS_NOTSUPP ((__force blk_status_t)1)
+#define BLK_STS_TIMEOUT ((__force blk_status_t)2)
+#define BLK_STS_NOSPC ((__force blk_status_t)3)
+#define BLK_STS_TRANSPORT ((__force blk_status_t)4)
+#define BLK_STS_TARGET ((__force blk_status_t)5)
+#define BLK_STS_NEXUS ((__force blk_status_t)6)
+#define BLK_STS_MEDIUM ((__force blk_status_t)7)
+#define BLK_STS_PROTECTION ((__force blk_status_t)8)
+#define BLK_STS_RESOURCE ((__force blk_status_t)9)
+#define BLK_STS_IOERR ((__force blk_status_t)10)
+
+/* hack for device mapper, don't use elsewhere: */
+#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11)
+
+#define BLK_STS_AGAIN ((__force blk_status_t)12)
+
struct blk_issue_stat {
u64 stat;
};
@@ -28,13 +49,14 @@ struct blk_issue_stat {
struct bio {
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
- int bi_error;
+ blk_status_t bi_status;
unsigned int bi_opf; /* bottom bits req flags,
* top bits REQ_OP. Use
* accessors.
*/
unsigned short bi_flags; /* status, etc and bvec pool number */
unsigned short bi_ioprio;
+ unsigned short bi_write_hint;
struct bvec_iter bi_iter;
@@ -205,6 +227,7 @@ enum req_flag_bits {
/* command specific flags for REQ_OP_WRITE_ZEROES: */
__REQ_NOUNMAP, /* do not free blocks when zeroing */
+ __REQ_NOWAIT, /* Don't wait if request will block */
__REQ_NR_BITS, /* stops here */
};
@@ -223,6 +246,7 @@ enum req_flag_bits {
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
+#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ddd36b..25f6a0c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -55,7 +55,7 @@ struct blk_stat_callback;
*/
#define BLKCG_MAX_POLS 3
-typedef void (rq_end_io_fn)(struct request *, int);
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
#define BLK_RL_SYNCFULL (1U << 0)
#define BLK_RL_ASYNCFULL (1U << 1)
@@ -225,6 +225,8 @@ struct request {
unsigned int extra_len; /* length of alignment and padding */
+ unsigned short write_hint;
+
unsigned long deadline;
struct list_head timeout_list;
@@ -412,8 +414,12 @@ struct request_queue {
rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
+ /* Called just after a request is allocated */
init_rq_fn *init_rq_fn;
+ /* Called just before a request is freed */
exit_rq_fn *exit_rq_fn;
+ /* Called from inside blk_get_request() */
+ void (*initialize_rq_fn)(struct request *rq);
const struct blk_mq_ops *mq_ops;
@@ -590,6 +596,9 @@ struct request_queue {
void *rq_alloc_data;
struct work_struct release_work;
+
+#define BLK_MAX_WRITE_HINTS 5
+ u64 write_hints[BLK_MAX_WRITE_HINTS];
};
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -622,6 +631,8 @@ struct request_queue {
#define QUEUE_FLAG_STATS 27 /* track rq completion times */
#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */
#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -633,6 +644,13 @@ struct request_queue {
(1 << QUEUE_FLAG_SAME_COMP) | \
(1 << QUEUE_FLAG_POLL))
+/*
+ * @q->queue_lock is set while a queue is being initialized. Since we know
+ * that no other threads access the queue object before @q->queue_lock has
+ * been set, it is safe to manipulate queue flags without holding the
+ * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
+ * blk_init_allocated_queue().
+ */
static inline void queue_lockdep_assert_held(struct request_queue *q)
{
if (q->queue_lock)
@@ -712,10 +730,13 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_secure_erase(q) \
(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_scsi_passthrough(q) \
+ test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
#define blk_noretry_request(rq) \
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
REQ_FAILFAST_DRIVER))
+#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
static inline bool blk_account_rq(struct request *rq)
{
@@ -814,7 +835,8 @@ static inline bool rq_mergeable(struct request *rq)
static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
{
- if (bio_data(a) == bio_data(b))
+ if (bio_page(a) == bio_page(b) &&
+ bio_offset(a) == bio_offset(b))
return true;
return false;
@@ -862,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
#define BLK_MIN_SG_TIMEOUT (7 * HZ)
-#ifdef CONFIG_BOUNCE
-extern int init_emergency_isa_pool(void);
-extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
-#else
-static inline int init_emergency_isa_pool(void)
-{
- return 0;
-}
-static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
-{
-}
-#endif /* CONFIG_MMU */
-
struct rq_map_data {
struct page **pages;
int page_order;
@@ -933,7 +942,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
extern void blk_put_request(struct request *);
extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
+extern struct request *blk_get_request(struct request_queue *, unsigned int op,
+ gfp_t gfp_mask);
extern void blk_requeue_request(struct request_queue *, struct request *);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -941,12 +951,11 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
int (*bio_ctr)(struct bio *, struct bio *, void *),
void *data);
extern void blk_rq_unprep_clone(struct request *rq);
-extern int blk_insert_cloned_request(struct request_queue *q,
+extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
struct request *rq);
extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
extern void blk_delay_queue(struct request_queue *, unsigned long);
-extern void blk_queue_split(struct request_queue *, struct bio **,
- struct bio_set *);
+extern void blk_queue_split(struct request_queue *, struct bio **);
extern void blk_recount_segments(struct request_queue *, struct bio *);
extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
@@ -967,7 +976,6 @@ extern void __blk_run_queue(struct request_queue *q);
extern void __blk_run_queue_uncond(struct request_queue *q);
extern void blk_run_queue(struct request_queue *);
extern void blk_run_queue_async(struct request_queue *q);
-extern void blk_mq_quiesce_queue(struct request_queue *q);
extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long,
gfp_t);
@@ -981,6 +989,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *,
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
struct request *, int, rq_end_io_fn *);
+int blk_status_to_errno(blk_status_t status);
+blk_status_t errno_to_blk_status(int errno);
+
bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -1113,16 +1124,16 @@ extern struct request *blk_fetch_request(struct request_queue *q);
* blk_end_request() for parts of the original function.
* This prevents code duplication in drivers.
*/
-extern bool blk_update_request(struct request *rq, int error,
+extern bool blk_update_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void blk_finish_request(struct request *rq, int error);
-extern bool blk_end_request(struct request *rq, int error,
+extern void blk_finish_request(struct request *rq, blk_status_t error);
+extern bool blk_end_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request(struct request *rq, int error,
+extern void blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request(struct request *rq, blk_status_t error,
unsigned int nr_bytes);
-extern void __blk_end_request_all(struct request *rq, int error);
-extern bool __blk_end_request_cur(struct request *rq, int error);
+extern void __blk_end_request_all(struct request *rq, blk_status_t error);
+extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
@@ -1374,11 +1385,6 @@ enum blk_default_limits {
#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
-static inline unsigned long queue_bounce_pfn(struct request_queue *q)
-{
- return q->limits.bounce_pfn;
-}
-
static inline unsigned long queue_segment_boundary(struct request_queue *q)
{
return q->limits.seg_boundary_mask;
@@ -1780,7 +1786,7 @@ struct blk_integrity_iter {
const char *disk_name;
};
-typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
struct blk_integrity_profile {
integrity_processing_fn *generate_fn;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index f4c639c..456da50 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -72,9 +72,9 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
* 2 : The target wants to push back the io
*/
typedef int (*dm_endio_fn) (struct dm_target *ti,
- struct bio *bio, int error);
+ struct bio *bio, blk_status_t *error);
typedef int (*dm_request_endio_fn) (struct dm_target *ti,
- struct request *clone, int error,
+ struct request *clone, blk_status_t error,
union map_info *map_context);
typedef void (*dm_presuspend_fn) (struct dm_target *ti);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 0e306c5..5bc8f86 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -104,8 +104,9 @@ struct elevator_mq_ops {
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
- struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
- void (*put_request)(struct request *);
+ void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+ void (*prepare_request)(struct request *, struct bio *bio);
+ void (*finish_request)(struct request *);
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
bool (*has_work)(struct blk_mq_hw_ctx *);
@@ -114,8 +115,6 @@ struct elevator_mq_ops {
void (*requeue_request)(struct request *);
struct request *(*former_request)(struct request_queue *, struct request *);
struct request *(*next_request)(struct request_queue *, struct request *);
- int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
- void (*put_rq_priv)(struct request_queue *, struct request *);
void (*init_icq)(struct io_cq *);
void (*exit_icq)(struct io_cq *);
};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3e68cab..65adbdd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -20,6 +20,7 @@
#include <linux/rwsem.h>
#include <linux/capability.h>
#include <linux/semaphore.h>
+#include <linux/fcntl.h>
#include <linux/fiemap.h>
#include <linux/rculist_bl.h>
#include <linux/atomic.h>
@@ -143,6 +144,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
+/* File is capable of returning -EAGAIN if AIO will block */
+#define FMODE_AIO_NOWAIT ((__force fmode_t)0x8000000)
+
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are
@@ -262,6 +266,18 @@ struct page;
struct address_space;
struct writeback_control;
+/*
+ * Write life time hint values.
+ */
+enum rw_hint {
+ WRITE_LIFE_NOT_SET = 0,
+ WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
+ WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
+ WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
+ WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
+ WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
+};
+
#define IOCB_EVENTFD (1 << 0)
#define IOCB_APPEND (1 << 1)
#define IOCB_DIRECT (1 << 2)
@@ -269,6 +285,7 @@ struct writeback_control;
#define IOCB_DSYNC (1 << 4)
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
+#define IOCB_NOWAIT (1 << 7)
struct kiocb {
struct file *ki_filp;
@@ -276,6 +293,7 @@ struct kiocb {
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private;
int ki_flags;
+ enum rw_hint ki_hint;
};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -283,16 +301,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
return kiocb->ki_complete == NULL;
}
-static inline int iocb_flags(struct file *file);
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
- *kiocb = (struct kiocb) {
- .ki_filp = filp,
- .ki_flags = iocb_flags(filp),
- };
-}
-
/*
* "descriptor" for what we're up to with a read.
* This allows us to use the same read code yet
@@ -593,6 +601,7 @@ struct inode {
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
unsigned int i_blkbits;
+ enum rw_hint i_write_hint;
blkcnt_t i_blocks;
#ifdef __NEED_I_SIZE_ORDERED
@@ -847,6 +856,7 @@ struct file {
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
+ enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
@@ -1022,8 +1032,6 @@ struct file_lock_context {
#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
#endif
-#include <linux/fcntl.h>
-
extern void send_sigio(struct fown_struct *fown, int fd, int band);
/*
@@ -1874,6 +1882,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
}
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+ if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+ return file->f_write_hint;
+
+ return file_inode(file)->i_write_hint;
+}
+
+static inline int iocb_flags(struct file *file);
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+ *kiocb = (struct kiocb) {
+ .ki_filp = filp,
+ .ki_flags = iocb_flags(filp),
+ .ki_hint = file_write_hint(filp),
+ };
+}
+
/*
* Inode state bits. Protected by inode->i_lock
*
@@ -2518,6 +2545,8 @@ extern int filemap_fdatawait(struct address_space *);
extern void filemap_fdatawait_keep_errors(struct address_space *);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
+extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
+ loff_t lend);
extern int filemap_write_and_wait(struct address_space *mapping);
extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
@@ -2844,7 +2873,7 @@ enum {
DIO_SKIP_DIO_COUNT = 0x08,
};
-void dio_end_io(struct bio *bio, int error);
+void dio_end_io(struct bio *bio);
ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, struct iov_iter *iter,
@@ -3057,6 +3086,25 @@ static inline int iocb_flags(struct file *file)
return res;
}
+static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
+{
+ if (unlikely(flags & ~RWF_SUPPORTED))
+ return -EOPNOTSUPP;
+
+ if (flags & RWF_NOWAIT) {
+ if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT))
+ return -EOPNOTSUPP;
+ ki->ki_flags |= IOCB_NOWAIT;
+ }
+ if (flags & RWF_HIPRI)
+ ki->ki_flags |= IOCB_HIPRI;
+ if (flags & RWF_DSYNC)
+ ki->ki_flags |= IOCB_DSYNC;
+ if (flags & RWF_SYNC)
+ ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ return 0;
+}
+
static inline ino_t parent_ino(struct dentry *dentry)
{
ino_t res;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6980ca3..dc152e4 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -671,7 +671,7 @@ struct ide_port_ops {
void (*init_dev)(ide_drive_t *);
void (*set_pio_mode)(struct hwif_s *, ide_drive_t *);
void (*set_dma_mode)(struct hwif_s *, ide_drive_t *);
- int (*reset_poll)(ide_drive_t *);
+ blk_status_t (*reset_poll)(ide_drive_t *);
void (*pre_reset)(ide_drive_t *);
void (*resetproc)(ide_drive_t *);
void (*maskproc)(ide_drive_t *, int);
@@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
extern int ide_vlb_clk;
extern int ide_pci_clk;
-int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int);
+int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
void ide_kill_rq(ide_drive_t *, struct request *);
void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive,
const struct ide_devset *setting, int arg);
void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
-int ide_complete_rq(ide_drive_t *, int, unsigned int);
+int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
void ide_tf_dump(const char *, struct ide_cmd *);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f753e78..69f4e94 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -52,6 +52,7 @@ struct iomap {
#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
#define IOMAP_DIRECT (1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT (1 << 5) /* Don't wait for writeback */
struct iomap_ops {
/*
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index e400a69..6b8ee9e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -87,7 +87,7 @@ enum {
NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
};
-#define NVMF_AQ_DEPTH 32
+#define NVME_AQ_DEPTH 32
enum {
NVME_REG_CAP = 0x0000, /* Controller Capabilities */
@@ -102,6 +102,7 @@ enum {
NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
+ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
};
#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
@@ -208,9 +209,15 @@ struct nvme_id_ctrl {
__u8 tnvmcap[16];
__u8 unvmcap[16];
__le32 rpmbs;
- __u8 rsvd316[4];
+ __le16 edstt;
+ __u8 dsto;
+ __u8 fwug;
__le16 kas;
- __u8 rsvd322[190];
+ __le16 hctma;
+ __le16 mntmt;
+ __le16 mxtmt;
+ __le32 sanicap;
+ __u8 rsvd332[180];
__u8 sqes;
__u8 cqes;
__le16 maxcmd;
@@ -246,6 +253,7 @@ enum {
NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
NVME_CTRL_VWC_PRESENT = 1 << 0,
NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
+ NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7,
};
@@ -275,7 +283,7 @@ struct nvme_id_ns {
__le16 nabsn;
__le16 nabo;
__le16 nabspf;
- __u16 rsvd46;
+ __le16 noiob;
__u8 nvmcap[16];
__u8 rsvd64[40];
__u8 nguid[16];
@@ -289,6 +297,7 @@ enum {
NVME_ID_CNS_NS = 0x00,
NVME_ID_CNS_CTRL = 0x01,
NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
+ NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CTRL_NS_LIST = 0x12,
@@ -296,6 +305,19 @@ enum {
};
enum {
+ NVME_DIR_IDENTIFY = 0x00,
+ NVME_DIR_STREAMS = 0x01,
+ NVME_DIR_SND_ID_OP_ENABLE = 0x01,
+ NVME_DIR_SND_ST_OP_REL_ID = 0x01,
+ NVME_DIR_SND_ST_OP_REL_RSC = 0x02,
+ NVME_DIR_RCV_ID_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_STATUS = 0x02,
+ NVME_DIR_RCV_ST_OP_RESOURCE = 0x03,
+ NVME_DIR_ENDIR = 0x01,
+};
+
+enum {
NVME_NS_FEAT_THIN = 1 << 0,
NVME_NS_FLBAS_LBA_MASK = 0xf,
NVME_NS_FLBAS_META_EXT = 0x10,
@@ -315,6 +337,22 @@ enum {
NVME_NS_DPS_PI_TYPE3 = 3,
};
+struct nvme_ns_id_desc {
+ __u8 nidt;
+ __u8 nidl;
+ __le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN 8
+#define NVME_NIDT_NGUID_LEN 16
+#define NVME_NIDT_UUID_LEN 16
+
+enum {
+ NVME_NIDT_EUI64 = 0x01,
+ NVME_NIDT_NGUID = 0x02,
+ NVME_NIDT_UUID = 0x03,
+};
+
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
@@ -536,6 +574,7 @@ enum {
NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
+ NVME_RW_DTYPE_STREAMS = 1 << 4,
};
struct nvme_dsm_cmd {
@@ -587,6 +626,11 @@ struct nvme_feat_auto_pst {
__le64 entries[32];
};
+enum {
+ NVME_HOST_MEM_ENABLE = (1 << 0),
+ NVME_HOST_MEM_RETURN = (1 << 1),
+};
+
/* Admin commands */
enum nvme_admin_opcode {
@@ -605,6 +649,8 @@ enum nvme_admin_opcode {
nvme_admin_download_fw = 0x11,
nvme_admin_ns_attach = 0x15,
nvme_admin_keep_alive = 0x18,
+ nvme_admin_directive_send = 0x19,
+ nvme_admin_directive_recv = 0x1a,
nvme_admin_dbbuf = 0x7C,
nvme_admin_format_nvm = 0x80,
nvme_admin_security_send = 0x81,
@@ -659,6 +705,8 @@ struct nvme_identify {
__u32 rsvd11[5];
};
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
struct nvme_features {
__u8 opcode;
__u8 flags;
@@ -668,7 +716,16 @@ struct nvme_features {
union nvme_data_ptr dptr;
__le32 fid;
__le32 dword11;
- __u32 rsvd12[4];
+ __le32 dword12;
+ __le32 dword13;
+ __le32 dword14;
+ __le32 dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+ __le64 addr;
+ __le32 size;
+ __u32 rsvd;
};
struct nvme_create_cq {
@@ -757,6 +814,24 @@ struct nvme_get_log_page_command {
__u32 rsvd14[2];
};
+struct nvme_directive_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 numd;
+ __u8 doper;
+ __u8 dtype;
+ __le16 dspec;
+ __u8 endir;
+ __u8 tdtype;
+ __u16 rsvd15;
+
+ __u32 rsvd16[3];
+};
+
/*
* Fabrics subcommands.
*/
@@ -887,6 +962,18 @@ struct nvme_dbbuf {
__u32 rsvd12[6];
};
+struct streams_directive_params {
+ __u16 msl;
+ __u16 nssa;
+ __u16 nsso;
+ __u8 rsvd[10];
+ __u32 sws;
+ __u16 sgs;
+ __u16 nsa;
+ __u16 nso;
+ __u8 rsvd2[6];
+};
+
struct nvme_command {
union {
struct nvme_common_command common;
@@ -907,6 +994,7 @@ struct nvme_command {
struct nvmf_property_set_command prop_set;
struct nvmf_property_get_command prop_get;
struct nvme_dbbuf dbbuf;
+ struct nvme_directive_cmd directive;
};
};
@@ -1051,4 +1139,8 @@ struct nvme_completion {
#define NVME_VS(major, minor, tertiary) \
(((major) << 16) | ((minor) << 8) | (tertiary))
+#define NVME_MAJOR(ver) ((ver) >> 16)
+#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver) ((ver) & 0xff)
+
#endif /* _LINUX_NVME_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index cb3c8fe..4b3286a 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -278,6 +278,8 @@ size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
const void *buf, size_t buflen, off_t skip);
size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
void *buf, size_t buflen, off_t skip);
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+ size_t buflen, off_t skip);
/*
* Maximum number of entries that will be allocated in one piece, if
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a09cca8..a29d308 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -157,7 +157,7 @@ struct osd_request {
osd_req_done_fn *async_done;
void *async_private;
- int async_error;
+ blk_status_t async_error;
int req_errors;
};
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index b379f93..da9bf2b 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -166,6 +166,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
extern void scsi_kunmap_atomic_sg(void *virt);
extern int scsi_init_io(struct scsi_cmnd *cmd);
+extern void scsi_initialize_rq(struct request *rq);
extern int scsi_dma_map(struct scsi_cmnd *cmd);
extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index f0c76f9..e0afa44 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -27,6 +27,6 @@ static inline void scsi_req_free_cmd(struct scsi_request *req)
kfree(req->cmd);
}
-void scsi_req_init(struct request *);
+void scsi_req_init(struct scsi_request *req);
#endif /* _SCSI_SCSI_REQUEST_H */
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index bb2554f..a2d4a8a 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -79,7 +79,7 @@ struct io_event {
struct iocb {
/* these are internal to the kernel/libc. */
__u64 aio_data; /* data to be returned in event's data */
- __u32 PADDED(aio_key, aio_reserved1);
+ __u32 PADDED(aio_key, aio_rw_flags);
/* the kernel sets aio_key to the req # */
/* common fields */
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 4bf9f1e..2f6c77a 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
-#define DM_VERSION_MINOR 35
+#define DM_VERSION_MINOR 36
#define DM_VERSION_PATCHLEVEL 0
-#define DM_VERSION_EXTRA "-ioctl (2016-06-23)"
+#define DM_VERSION_EXTRA "-ioctl (2017-06-09)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 813afd6..ec69d55 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -43,6 +43,27 @@
/* (1U << 31) is reserved for signed error codes */
/*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NONE 1
+#define RWH_WRITE_LIFE_SHORT 2
+#define RWH_WRITE_LIFE_MEDIUM 3
+#define RWH_WRITE_LIFE_LONG 4
+#define RWH_WRITE_LIFE_EXTREME 5
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 24e61a5..27d8c36 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -360,5 +360,9 @@ struct fscrypt_key {
#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */
#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */
#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */
+#define RWF_NOWAIT 0x00000008 /* per-IO, return -EAGAIN if operation would block */
+
+#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
+ RWF_NOWAIT)
#endif /* _UAPI_LINUX_FS_H */
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index c8125ec..a3960f9 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -22,6 +22,7 @@ enum {
LO_FLAGS_AUTOCLEAR = 4,
LO_FLAGS_PARTSCAN = 8,
LO_FLAGS_DIRECT_IO = 16,
+ LO_FLAGS_BLOCKSIZE = 32,
};
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
@@ -59,6 +60,8 @@ struct loop_info64 {
__u64 lo_init[2];
};
+#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
+
/*
* Loop filter types
*/
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 155e33f..a50527e 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -41,10 +41,14 @@ enum {
#define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */
#define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */
#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */
+#define NBD_FLAG_SEND_FUA (1 << 3) /* send FUA (forced unit access) */
/* there is a gap here to match userspace */
#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */
#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */
+/* values for cmd flags in the upper 16 bits of request type */
+#define NBD_CMD_FLAG_FUA (1 << 16) /* FUA (forced unit access) op */
+
/* These are client behavior specific flags. */
#define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on
disconnect. */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f80fd33..57d2257 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -225,14 +225,14 @@ static struct block_device *hib_resume_bdev;
struct hib_bio_batch {
atomic_t count;
wait_queue_head_t wait;
- int error;
+ blk_status_t error;
};
static void hib_init_batch(struct hib_bio_batch *hb)
{
atomic_set(&hb->count, 0);
init_waitqueue_head(&hb->wait);
- hb->error = 0;
+ hb->error = BLK_STS_OK;
}
static void hib_end_io(struct bio *bio)
@@ -240,7 +240,7 @@ static void hib_end_io(struct bio *bio)
struct hib_bio_batch *hb = bio->bi_private;
struct page *page = bio->bi_io_vec[0].bv_page;
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
imajor(bio->bi_bdev->bd_inode),
iminor(bio->bi_bdev->bd_inode),
@@ -253,8 +253,8 @@ static void hib_end_io(struct bio *bio)
flush_icache_range((unsigned long)page_address(page),
(unsigned long)page_address(page) + PAGE_SIZE);
- if (bio->bi_error && !hb->error)
- hb->error = bio->bi_error;
+ if (bio->bi_status && !hb->error)
+ hb->error = bio->bi_status;
if (atomic_dec_and_test(&hb->count))
wake_up(&hb->wait);
@@ -293,10 +293,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
return error;
}
-static int hib_wait_io(struct hib_bio_batch *hb)
+static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
{
wait_event(hb->wait, atomic_read(&hb->count) == 0);
- return hb->error;
+ return blk_status_to_errno(hb->error);
}
/*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 193c5f5..bc364f8 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -867,7 +867,7 @@ static void blk_add_trace_split(void *ignore,
__blk_add_trace(bt, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
- BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
+ BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
&rpdu);
}
}
@@ -900,7 +900,7 @@ static void blk_add_trace_bio_remap(void *ignore,
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
- bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
+ bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
sizeof(r), &r);
}
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index c6cf822..be7b4dd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -751,3 +751,38 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
}
EXPORT_SYMBOL(sg_pcopy_to_buffer);
+
+/**
+ * sg_zero_buffer - Zero-out a part of a SG list
+ * @sgl: The SG list
+ * @nents: Number of SG entries
+ * @buflen: The number of bytes to zero out
+ * @skip: Number of bytes to skip before zeroing
+ *
+ * Returns the number of bytes zeroed.
+ **/
+size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
+ size_t buflen, off_t skip)
+{
+ unsigned int offset = 0;
+ struct sg_mapping_iter miter;
+ unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
+
+ sg_miter_start(&miter, sgl, nents, sg_flags);
+
+ if (!sg_miter_skip(&miter, skip))
+ return false;
+
+ while (offset < buflen && sg_miter_next(&miter)) {
+ unsigned int len;
+
+ len = min(miter.length, buflen - offset);
+ memset(miter.addr, 0, len);
+
+ offset += len;
+ }
+
+ sg_miter_stop(&miter);
+ return offset;
+}
+EXPORT_SYMBOL(sg_zero_buffer);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be57..742034e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,38 @@ int filemap_flush(struct address_space *mapping)
}
EXPORT_SYMBOL(filemap_flush);
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping: address space within which to check
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+bool filemap_range_has_page(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
+{
+ pgoff_t index = start_byte >> PAGE_SHIFT;
+ pgoff_t end = end_byte >> PAGE_SHIFT;
+ struct pagevec pvec;
+ bool ret;
+
+ if (end_byte < start_byte)
+ return false;
+
+ if (mapping->nrpages == 0)
+ return false;
+
+ pagevec_init(&pvec, 0);
+ if (!pagevec_lookup(&pvec, mapping, index, 1))
+ return false;
+ ret = (pvec.pages[0]->index <= end);
+ pagevec_release(&pvec);
+ return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
static int __filemap_fdatawait_range(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
@@ -2038,10 +2070,17 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
loff_t size;
size = i_size_read(inode);
- retval = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- iocb->ki_pos + count - 1);
- if (retval < 0)
- goto out;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (filemap_range_has_page(mapping, iocb->ki_pos,
+ iocb->ki_pos + count - 1))
+ return -EAGAIN;
+ } else {
+ retval = filemap_write_and_wait_range(mapping,
+ iocb->ki_pos,
+ iocb->ki_pos + count - 1);
+ if (retval < 0)
+ goto out;
+ }
file_accessed(file);
@@ -2642,6 +2681,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
pos = iocb->ki_pos;
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+ return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2710,9 +2752,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
- written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
- if (written)
- goto out;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* If there are pages to writeback, return */
+ if (filemap_range_has_page(inode->i_mapping, pos,
+ pos + iov_iter_count(from)))
+ return -EAGAIN;
+ } else {
+ written = filemap_write_and_wait_range(mapping, pos,
+ pos + write_len - 1);
+ if (written)
+ goto out;
+ }
/*
* After a write we want buffered reads to be sure to go to disk to get
diff --git a/mm/page_io.c b/mm/page_io.c
index 23f6d0d..2da71e6 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio)
{
struct page *page = bio->bi_io_vec[0].bv_page;
- if (bio->bi_error) {
+ if (bio->bi_status) {
SetPageError(page);
/*
* We failed to write the page out to swap-space.
@@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio)
{
struct page *page = bio->bi_io_vec[0].bv_page;
- if (bio->bi_error) {
+ if (bio->bi_status) {
SetPageError(page);
ClearPageUptodate(page);
pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
OpenPOWER on IntegriCloud