diff options
203 files changed, 9729 insertions, 5577 deletions
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex index c06233f..8f85b0e 100644 --- a/Documentation/cdrom/cdrom-standard.tex +++ b/Documentation/cdrom/cdrom-standard.tex @@ -249,7 +249,6 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr unsigned\ long);\cr \noalign{\medskip} &const\ int& capability;& capability flags \cr - &int& n_minors;& number of active minor devices \cr \};\cr } $$ @@ -258,13 +257,7 @@ it should add a function pointer to this $struct$. When a particular function is not implemented, however, this $struct$ should contain a NULL instead. The $capability$ flags specify the capabilities of the \cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive -is registered with the \UCD. The value $n_minors$ should be a positive -value indicating the number of minor devices that are supported by -the low-level device driver, normally~1. Although these two variables -are `informative' rather than `operational,' they are included in -$cdrom_device_ops$ because they describe the capability of the {\em -driver\/} rather than the {\em drive}. Nomenclature has always been -difficult in computer programming. +is registered with the \UCD. Note that most functions have fewer parameters than their $blkdev_fops$ counterparts. This is because very little of the diff --git a/MAINTAINERS b/MAINTAINERS index 80f1a58..a0fb98c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8620,10 +8620,10 @@ S: Maintained F: drivers/net/ethernet/netronome/ NETWORK BLOCK DEVICE (NBD) -M: Markus Pargmann <mpa@pengutronix.de> +M: Josef Bacik <jbacik@fb.com> S: Maintained +L: linux-block@vger.kernel.org L: nbd-general@lists.sourceforge.net -T: git git://git.pengutronix.de/git/mpa/linux-nbd.git F: Documentation/blockdev/nbd.txt F: drivers/block/nbd.c F: include/uapi/linux/nbd.h @@ -11097,6 +11097,17 @@ L: linux-mmc@vger.kernel.org S: Maintained F: drivers/mmc/host/sdhci-spear.c +SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER +M: Scott Bauer <scott.bauer@intel.com> +M: Jonathan Derrick <jonathan.derrick@intel.com> +M: Rafael Antognolli <rafael.antognolli@intel.com> +L: linux-block@vger.kernel.org +S: Supported +F: block/sed* +F: block/opal_proto.h +F: include/linux/sed* +F: include/uapi/linux/sed* + SECURITY SUBSYSTEM M: James Morris <james.l.morris@oracle.com> M: "Serge E. Hallyn" <serge@hallyn.com> diff --git a/block/Kconfig b/block/Kconfig index 8bf114a..a2a92e5 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -49,9 +49,13 @@ config LBDAF If unsure, say Y. +config BLK_SCSI_REQUEST + bool + config BLK_DEV_BSG bool "Block layer SG support v4" default y + select BLK_SCSI_REQUEST help Saying Y here will enable generic SG (SCSI generic) v4 support for any block device. @@ -71,6 +75,7 @@ config BLK_DEV_BSGLIB bool "Block layer SG support v4 helper lib" default n select BLK_DEV_BSG + select BLK_SCSI_REQUEST help Subsystems will normally enable this if needed. Users will not normally need to manually enable this. @@ -147,6 +152,25 @@ config BLK_WBT_MQ Multiqueue currently doesn't have support for IO scheduling, enabling this option is recommended. +config BLK_DEBUG_FS + bool "Block layer debugging information in debugfs" + default y + depends on DEBUG_FS + ---help--- + Include block layer debugging information in debugfs. This information + is mostly useful for kernel developers, but it doesn't incur any cost + at runtime. + + Unless you are building a kernel for a tiny system, you should + say Y here. + +config BLK_SED_OPAL + bool "Logic for interfacing with Opal enabled SEDs" + ---help--- + Builds Logic for interfacing with Opal enabled controllers. + Enabling this option enables users to setup/unlock/lock + Locking ranges for SED devices using the Opal protocol. + menu "Partition Types" source "block/partitions/Kconfig" diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 421bef9..0715ce9 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -63,6 +63,56 @@ config DEFAULT_IOSCHED default "cfq" if DEFAULT_CFQ default "noop" if DEFAULT_NOOP +config MQ_IOSCHED_DEADLINE + tristate "MQ deadline I/O scheduler" + default y + ---help--- + MQ version of the deadline IO scheduler. + +config MQ_IOSCHED_NONE + bool + default y + +choice + prompt "Default single-queue blk-mq I/O scheduler" + default DEFAULT_SQ_NONE + help + Select the I/O scheduler which will be used by default for blk-mq + managed block devices with a single queue. + + config DEFAULT_SQ_DEADLINE + bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y + + config DEFAULT_SQ_NONE + bool "None" + +endchoice + +config DEFAULT_SQ_IOSCHED + string + default "mq-deadline" if DEFAULT_SQ_DEADLINE + default "none" if DEFAULT_SQ_NONE + +choice + prompt "Default multi-queue blk-mq I/O scheduler" + default DEFAULT_MQ_NONE + help + Select the I/O scheduler which will be used by default for blk-mq + managed block devices with multiple queues. + + config DEFAULT_MQ_DEADLINE + bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y + + config DEFAULT_MQ_NONE + bool "None" + +endchoice + +config DEFAULT_MQ_IOSCHED + string + default "mq-deadline" if DEFAULT_MQ_DEADLINE + default "none" if DEFAULT_MQ_NONE + endmenu endif diff --git a/block/Makefile b/block/Makefile index a827f98..2ad7c30 100644 --- a/block/Makefile +++ b/block/Makefile @@ -6,11 +6,12 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ - blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ - genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ + blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ + genhd.o partition-generic.o ioprio.o \ badblocks.o partitions/ -obj-$(CONFIG_BOUNCE) += bounce.o +obj-$(CONFIG_BOUNCE) += bounce.o +obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o @@ -18,6 +19,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o +obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o @@ -25,3 +27,5 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o +obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o +obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o diff --git a/block/bio.c b/block/bio.c index 2b37502..4b564d0 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1227,9 +1227,6 @@ struct bio *bio_copy_user_iov(struct request_queue *q, if (!bio) goto out_bmd; - if (iter->type & WRITE) - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - ret = 0; if (map_data) { @@ -1394,16 +1391,10 @@ struct bio *bio_map_user_iov(struct request_queue *q, kfree(pages); - /* - * set data direction, and check if mapped pages need bouncing - */ - if (iter->type & WRITE) - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - bio_set_flag(bio, BIO_USER_MAPPED); /* - * subtle -- if __bio_map_user() ended up bouncing a bio, + * subtle -- if bio_map_user_iov() ended up bouncing a bio, * it would normally disappear when its bi_end_io is run. * however, we need it for the unmap, so grab an extra * reference to it @@ -1445,8 +1436,8 @@ static void __bio_unmap_user(struct bio *bio) * bio_unmap_user - unmap a bio * @bio: the bio being unmapped * - * Unmap a bio previously mapped by bio_map_user(). Must be called with - * a process context. + * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from + * process context. * * bio_unmap_user() may sleep. */ @@ -1590,7 +1581,6 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, bio->bi_private = data; } else { bio->bi_end_io = bio_copy_kern_endio; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); } return bio; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8ba0af7..295e98c2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -184,7 +184,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, goto err_free_blkg; } - wb_congested = wb_congested_get_create(&q->backing_dev_info, + wb_congested = wb_congested_get_create(q->backing_dev_info, blkcg->css.id, GFP_NOWAIT | __GFP_NOWARN); if (!wb_congested) { @@ -469,8 +469,8 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, const char *blkg_dev_name(struct blkcg_gq *blkg) { /* some drivers (floppy) instantiate a queue w/o disk registered */ - if (blkg->q->backing_dev_info.dev) - return dev_name(blkg->q->backing_dev_info.dev); + if (blkg->q->backing_dev_info->dev) + return dev_name(blkg->q->backing_dev_info->dev); return NULL; } EXPORT_SYMBOL_GPL(blkg_dev_name); @@ -1079,10 +1079,8 @@ int blkcg_init_queue(struct request_queue *q) if (preloaded) radix_tree_preload_end(); - if (IS_ERR(blkg)) { - blkg_free(new_blkg); + if (IS_ERR(blkg)) return PTR_ERR(blkg); - } q->root_blkg = blkg; q->root_rl.blkg = blkg; @@ -1223,7 +1221,10 @@ int blkcg_activate_policy(struct request_queue *q, if (blkcg_policy_enabled(q, pol)) return 0; - blk_queue_bypass_start(q); + if (q->mq_ops) + blk_mq_freeze_queue(q); + else + blk_queue_bypass_start(q); pd_prealloc: if (!pd_prealloc) { pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); @@ -1261,7 +1262,10 @@ pd_prealloc: spin_unlock_irq(q->queue_lock); out_bypass_end: - blk_queue_bypass_end(q); + if (q->mq_ops) + blk_mq_unfreeze_queue(q); + else + blk_queue_bypass_end(q); if (pd_prealloc) pol->pd_free_fn(pd_prealloc); return ret; @@ -1284,7 +1288,11 @@ void blkcg_deactivate_policy(struct request_queue *q, if (!blkcg_policy_enabled(q, pol)) return; - blk_queue_bypass_start(q); + if (q->mq_ops) + blk_mq_freeze_queue(q); + else + blk_queue_bypass_start(q); + spin_lock_irq(q->queue_lock); __clear_bit(pol->plid, q->blkcg_pols); @@ -1304,7 +1312,11 @@ void blkcg_deactivate_policy(struct request_queue *q, } spin_unlock_irq(q->queue_lock); - blk_queue_bypass_end(q); + + if (q->mq_ops) + blk_mq_unfreeze_queue(q); + else + blk_queue_bypass_end(q); } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); diff --git a/block/blk-core.c b/block/blk-core.c index 61ba08c..b9e857f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -33,14 +33,20 @@ #include <linux/ratelimit.h> #include <linux/pm_runtime.h> #include <linux/blk-cgroup.h> +#include <linux/debugfs.h> #define CREATE_TRACE_POINTS #include <trace/events/block.h> #include "blk.h" #include "blk-mq.h" +#include "blk-mq-sched.h" #include "blk-wbt.h" +#ifdef CONFIG_DEBUG_FS +struct dentry *blk_debugfs_root; +#endif + EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); @@ -74,7 +80,7 @@ static void blk_clear_congested(struct request_list *rl, int sync) * flip its congestion state for events on other blkcgs. */ if (rl == &rl->q->root_rl) - clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync); + clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync); #endif } @@ -85,7 +91,7 @@ static void blk_set_congested(struct request_list *rl, int sync) #else /* see blk_clear_congested() */ if (rl == &rl->q->root_rl) - set_wb_congested(rl->q->backing_dev_info.wb.congested, sync); + set_wb_congested(rl->q->backing_dev_info->wb.congested, sync); #endif } @@ -104,22 +110,6 @@ void blk_queue_congestion_threshold(struct request_queue *q) q->nr_congestion_off = nr; } -/** - * blk_get_backing_dev_info - get the address of a queue's backing_dev_info - * @bdev: device - * - * Locates the passed device's request queue and returns the address of its - * backing_dev_info. This function can only be called if @bdev is opened - * and the return value is never NULL. - */ -struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - return &q->backing_dev_info; -} -EXPORT_SYMBOL(blk_get_backing_dev_info); - void blk_rq_init(struct request_queue *q, struct request *rq) { memset(rq, 0, sizeof(*rq)); @@ -131,9 +121,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->__sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); - rq->cmd = rq->__cmd; - rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; + rq->internal_tag = -1; rq->start_time = jiffies; set_start_time_ns(rq); rq->part = NULL; @@ -158,10 +147,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio, void blk_dump_rq_flags(struct request *rq, char *msg) { - int bit; - - printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg, - rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, + printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg, + rq->rq_disk ? rq->rq_disk->disk_name : "?", (unsigned long long) rq->cmd_flags); printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", @@ -169,13 +156,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg) blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); printk(KERN_INFO " bio %p, biotail %p, len %u\n", rq->bio, rq->biotail, blk_rq_bytes(rq)); - - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { - printk(KERN_INFO " cdb: "); - for (bit = 0; bit < BLK_MAX_CDB; bit++) - printk("%02x ", rq->cmd[bit]); - printk("\n"); - } } EXPORT_SYMBOL(blk_dump_rq_flags); @@ -525,12 +505,14 @@ void blk_set_queue_dying(struct request_queue *q) else { struct request_list *rl; + spin_lock_irq(q->queue_lock); blk_queue_for_each_rl(rl, q) { if (rl->rq_pool) { wake_up(&rl->wait[BLK_RW_SYNC]); wake_up(&rl->wait[BLK_RW_ASYNC]); } } + spin_unlock_irq(q->queue_lock); } } EXPORT_SYMBOL_GPL(blk_set_queue_dying); @@ -584,7 +566,7 @@ void blk_cleanup_queue(struct request_queue *q) blk_flush_integrity(); /* @q won't process any more request, flush async actions */ - del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); + del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); blk_sync_queue(q); if (q->mq_ops) @@ -596,7 +578,8 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); - bdi_unregister(&q->backing_dev_info); + bdi_unregister(q->backing_dev_info); + put_disk_devt(q->disk_devt); /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); @@ -604,17 +587,41 @@ void blk_cleanup_queue(struct request_queue *q) EXPORT_SYMBOL(blk_cleanup_queue); /* Allocate memory local to the request queue */ -static void *alloc_request_struct(gfp_t gfp_mask, void *data) +static void *alloc_request_simple(gfp_t gfp_mask, void *data) { - int nid = (int)(long)data; - return kmem_cache_alloc_node(request_cachep, gfp_mask, nid); + struct request_queue *q = data; + + return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node); } -static void free_request_struct(void *element, void *unused) +static void free_request_simple(void *element, void *data) { kmem_cache_free(request_cachep, element); } +static void *alloc_request_size(gfp_t gfp_mask, void *data) +{ + struct request_queue *q = data; + struct request *rq; + + rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask, + q->node); + if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) { + kfree(rq); + rq = NULL; + } + return rq; +} + +static void free_request_size(void *element, void *data) +{ + struct request_queue *q = data; + + if (q->exit_rq_fn) + q->exit_rq_fn(q, element); + kfree(element); +} + int blk_init_rl(struct request_list *rl, struct request_queue *q, gfp_t gfp_mask) { @@ -627,10 +634,15 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); - rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct, - free_request_struct, - (void *)(long)q->node, gfp_mask, - q->node); + if (q->cmd_size) { + rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, + alloc_request_size, free_request_size, + q, gfp_mask, q->node); + } else { + rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, + alloc_request_simple, free_request_simple, + q, gfp_mask, q->node); + } if (!rl->rq_pool) return -ENOMEM; @@ -693,7 +705,6 @@ static void blk_rq_timed_out_timer(unsigned long data) struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) { struct request_queue *q; - int err; q = kmem_cache_alloc_node(blk_requestq_cachep, gfp_mask | __GFP_ZERO, node_id); @@ -708,17 +719,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q->bio_split) goto fail_id; - q->backing_dev_info.ra_pages = + q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id); + if (!q->backing_dev_info) + goto fail_split; + + q->backing_dev_info->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; - q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK; - q->backing_dev_info.name = "block"; + q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; + q->backing_dev_info->name = "block"; q->node = node_id; - err = bdi_init(&q->backing_dev_info); - if (err) - goto fail_split; - - setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, + setup_timer(&q->backing_dev_info->laptop_mode_wb_timer, laptop_mode_timer_fn, (unsigned long) q); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->queue_head); @@ -768,7 +779,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) fail_ref: percpu_ref_exit(&q->q_usage_counter); fail_bdi: - bdi_destroy(&q->backing_dev_info); + bdi_put(q->backing_dev_info); fail_split: bioset_free(q->bio_split); fail_id: @@ -821,15 +832,19 @@ EXPORT_SYMBOL(blk_init_queue); struct request_queue * blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { - struct request_queue *uninit_q, *q; + struct request_queue *q; - uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); - if (!uninit_q) + q = blk_alloc_queue_node(GFP_KERNEL, node_id); + if (!q) return NULL; - q = blk_init_allocated_queue(uninit_q, rfn, lock); - if (!q) - blk_cleanup_queue(uninit_q); + q->request_fn = rfn; + if (lock) + q->queue_lock = lock; + if (blk_init_allocated_queue(q) < 0) { + blk_cleanup_queue(q); + return NULL; + } return q; } @@ -837,30 +852,22 @@ EXPORT_SYMBOL(blk_init_queue_node); static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio); -struct request_queue * -blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, - spinlock_t *lock) -{ - if (!q) - return NULL; - q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0); +int blk_init_allocated_queue(struct request_queue *q) +{ + q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size); if (!q->fq) - return NULL; + return -ENOMEM; + + if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL)) + goto out_free_flush_queue; if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) - goto fail; + goto out_exit_flush_rq; INIT_WORK(&q->timeout_work, blk_timeout_work); - q->request_fn = rfn; - q->prep_rq_fn = NULL; - q->unprep_rq_fn = NULL; q->queue_flags |= QUEUE_FLAG_DEFAULT; - /* Override internal queue lock with supplied lock pointer */ - if (lock) - q->queue_lock = lock; - /* * This also sets hw/phys segments, boundary and size */ @@ -874,17 +881,19 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, /* init elevator */ if (elevator_init(q, NULL)) { mutex_unlock(&q->sysfs_lock); - goto fail; + goto out_exit_flush_rq; } mutex_unlock(&q->sysfs_lock); + return 0; - return q; - -fail: +out_exit_flush_rq: + if (q->exit_rq_fn) + q->exit_rq_fn(q, q->fq->flush_rq); +out_free_flush_queue: blk_free_flush_queue(q->fq); wbt_exit(q); - return NULL; + return -ENOMEM; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1020,41 +1029,6 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr) return 0; } -/* - * Determine if elevator data should be initialized when allocating the - * request associated with @bio. - */ -static bool blk_rq_should_init_elevator(struct bio *bio) -{ - if (!bio) - return true; - - /* - * Flush requests do not use the elevator so skip initialization. - * This allows a request to share the flush and elevator data. - */ - if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) - return false; - - return true; -} - -/** - * rq_ioc - determine io_context for request allocation - * @bio: request being allocated is for this bio (can be %NULL) - * - * Determine io_context to use for request allocation for @bio. May return - * %NULL if %current->io_context doesn't exist. - */ -static struct io_context *rq_ioc(struct bio *bio) -{ -#ifdef CONFIG_BLK_CGROUP - if (bio && bio->bi_ioc) - return bio->bi_ioc; -#endif - return current->io_context; -} - /** * __get_request - get a free request * @rl: request list to allocate from @@ -1133,10 +1107,13 @@ static struct request *__get_request(struct request_list *rl, unsigned int op, * request is freed. This guarantees icq's won't be destroyed and * makes creating new ones safe. * + * Flush requests do not use the elevator so skip initialization. + * This allows a request to share the flush and elevator data. + * * Also, lookup icq while holding queue_lock. If it doesn't exist, * it will be created after releasing queue_lock. */ - if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { + if (!op_is_flush(op) && !blk_queue_bypass(q)) { rq_flags |= RQF_ELVPRIV; q->nr_rqs_elvpriv++; if (et->icq_cache && ioc) @@ -1196,7 +1173,7 @@ fail_elvpriv: * disturb iosched and blkcg but weird is bettern than dead. */ printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", - __func__, dev_name(q->backing_dev_info.dev)); + __func__, dev_name(q->backing_dev_info->dev)); rq->rq_flags &= ~RQF_ELVPRIV; rq->elv.icq = NULL; @@ -1290,8 +1267,6 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, { struct request *rq; - BUG_ON(rw != READ && rw != WRITE); - /* create ioc upfront */ create_io_context(gfp_mask, q->node); @@ -1321,18 +1296,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) EXPORT_SYMBOL(blk_get_request); /** - * blk_rq_set_block_pc - initialize a request to type BLOCK_PC - * @rq: request to be initialized - * - */ -void blk_rq_set_block_pc(struct request *rq) -{ - rq->cmd_type = REQ_TYPE_BLOCK_PC; - memset(rq->__cmd, 0, sizeof(rq->__cmd)); -} -EXPORT_SYMBOL(blk_rq_set_block_pc); - -/** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted * @rq: request to be inserted @@ -1522,6 +1485,30 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, return true; } +bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, + struct bio *bio) +{ + unsigned short segments = blk_rq_nr_discard_segments(req); + + if (segments >= queue_max_discard_segments(q)) + goto no_merge; + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req, blk_rq_pos(req))) + goto no_merge; + + req->biotail->bi_next = bio; + req->biotail = bio; + req->__data_len += bio->bi_iter.bi_size; + req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); + req->nr_phys_segments = segments + 1; + + blk_account_io_start(req, false); + return true; +no_merge: + req_set_nomerge(q, req); + return false; +} + /** * blk_attempt_plug_merge - try to merge with %current's plugged list * @q: request_queue new bio is being queued at @@ -1550,12 +1537,11 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, { struct blk_plug *plug; struct request *rq; - bool ret = false; struct list_head *plug_list; plug = current->plug; if (!plug) - goto out; + return false; *request_count = 0; if (q->mq_ops) @@ -1564,7 +1550,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, plug_list = &plug->list; list_for_each_entry_reverse(rq, plug_list, queuelist) { - int el_ret; + bool merged = false; if (rq->q == q) { (*request_count)++; @@ -1580,19 +1566,25 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (rq->q != q || !blk_rq_merge_ok(rq, bio)) continue; - el_ret = blk_try_merge(rq, bio); - if (el_ret == ELEVATOR_BACK_MERGE) { - ret = bio_attempt_back_merge(q, rq, bio); - if (ret) - break; - } else if (el_ret == ELEVATOR_FRONT_MERGE) { - ret = bio_attempt_front_merge(q, rq, bio); - if (ret) - break; + switch (blk_try_merge(rq, bio)) { + case ELEVATOR_BACK_MERGE: + merged = bio_attempt_back_merge(q, rq, bio); + break; + case ELEVATOR_FRONT_MERGE: + merged = bio_attempt_front_merge(q, rq, bio); + break; + case ELEVATOR_DISCARD_MERGE: + merged = bio_attempt_discard_merge(q, rq, bio); + break; + default: + break; } + + if (merged) + return true; } -out: - return ret; + + return false; } unsigned int blk_plug_queued_count(struct request_queue *q) @@ -1621,7 +1613,6 @@ out: void init_request_from_bio(struct request *req, struct bio *bio) { - req->cmd_type = REQ_TYPE_FS; if (bio->bi_opf & REQ_RAHEAD) req->cmd_flags |= REQ_FAILFAST_MASK; @@ -1635,8 +1626,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) { struct blk_plug *plug; - int el_ret, where = ELEVATOR_INSERT_SORT; - struct request *req; + int where = ELEVATOR_INSERT_SORT; + struct request *req, *free; unsigned int request_count = 0; unsigned int wb_acct; @@ -1655,7 +1646,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } - if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) { + if (op_is_flush(bio->bi_opf)) { spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; goto get_rq; @@ -1673,21 +1664,29 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - el_ret = elv_merge(q, &req, bio); - if (el_ret == ELEVATOR_BACK_MERGE) { - if (bio_attempt_back_merge(q, req, bio)) { - elv_bio_merged(q, req, bio); - if (!attempt_back_merge(q, req)) - elv_merged_request(q, req, el_ret); - goto out_unlock; - } - } else if (el_ret == ELEVATOR_FRONT_MERGE) { - if (bio_attempt_front_merge(q, req, bio)) { - elv_bio_merged(q, req, bio); - if (!attempt_front_merge(q, req)) - elv_merged_request(q, req, el_ret); - goto out_unlock; - } + switch (elv_merge(q, &req, bio)) { + case ELEVATOR_BACK_MERGE: + if (!bio_attempt_back_merge(q, req, bio)) + break; + elv_bio_merged(q, req, bio); + free = attempt_back_merge(q, req); + if (free) + __blk_put_request(q, free); + else + elv_merged_request(q, req, ELEVATOR_BACK_MERGE); + goto out_unlock; + case ELEVATOR_FRONT_MERGE: + if (!bio_attempt_front_merge(q, req, bio)) + break; + elv_bio_merged(q, req, bio); + free = attempt_front_merge(q, req); + if (free) + __blk_put_request(q, free); + else + elv_merged_request(q, req, ELEVATOR_FRONT_MERGE); + goto out_unlock; + default: + break; } get_rq: @@ -1894,7 +1893,7 @@ generic_make_request_checks(struct bio *bio) * drivers without flush support don't have to worry * about them. */ - if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && + if (op_is_flush(bio->bi_opf) && !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); if (!nr_sectors) { @@ -2143,7 +2142,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) if (q->mq_ops) { if (blk_queue_io_stat(q)) blk_account_io_start(rq, true); - blk_mq_insert_request(rq, false, true, false); + blk_mq_sched_insert_request(rq, false, true, false, false); return 0; } @@ -2159,7 +2158,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) + if (op_is_flush(rq->cmd_flags)) where = ELEVATOR_INSERT_FLUSH; add_acct_request(q, rq, where); @@ -2464,14 +2463,6 @@ void blk_start_request(struct request *req) wbt_issue(req->q->rq_wb, &req->issue_stat); } - /* - * We are now handing the request to the hardware, initialize - * resid_len to full count and add the timeout handler. - */ - req->resid_len = blk_rq_bytes(req); - if (unlikely(blk_bidi_rq(req))) - req->next_rq->resid_len = blk_rq_bytes(req->next_rq); - BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); blk_add_timer(req); } @@ -2542,10 +2533,10 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) * TODO: tj: This is too subtle. It would be better to let * low level drivers do what they see fit. */ - if (req->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(req)) req->errors = 0; - if (error && req->cmd_type == REQ_TYPE_FS && + if (error && !blk_rq_is_passthrough(req) && !(req->rq_flags & RQF_QUIET)) { char *error_type; @@ -2617,7 +2608,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->__data_len -= total_bytes; /* update sector only for requests with clear definition of sector */ - if (req->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(req)) req->__sector += total_bytes >> 9; /* mixed attributes always follow the first bio */ @@ -2695,8 +2686,8 @@ void blk_finish_request(struct request *req, int error) BUG_ON(blk_queued_rq(req)); - if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) - laptop_io_completion(&req->q->backing_dev_info); + if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req)) + laptop_io_completion(req->q->backing_dev_info); blk_delete_timer(req); @@ -3019,8 +3010,6 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags = src->cmd_flags | REQ_NOMERGE; - dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); dst->nr_phys_segments = src->nr_phys_segments; @@ -3270,7 +3259,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) /* * rq is already accounted, so use raw insert */ - if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) + if (op_is_flush(rq->cmd_flags)) __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); else __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); @@ -3496,5 +3485,9 @@ int __init blk_dev_init(void) blk_requestq_cachep = kmem_cache_create("request_queue", sizeof(struct request_queue), 0, SLAB_PANIC, NULL); +#ifdef CONFIG_DEBUG_FS + blk_debugfs_root = debugfs_create_dir("block", NULL); +#endif + return 0; } diff --git a/block/blk-exec.c b/block/blk-exec.c index 3ecb00a..8cd0e9b 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -9,11 +9,7 @@ #include <linux/sched/sysctl.h> #include "blk.h" - -/* - * for max sense size - */ -#include <scsi/scsi_cmnd.h> +#include "blk-mq-sched.h" /** * blk_end_sync_rq - executes a completion event on a request @@ -55,7 +51,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; WARN_ON(irqs_disabled()); - WARN_ON(rq->cmd_type == REQ_TYPE_FS); + WARN_ON(!blk_rq_is_passthrough(rq)); rq->rq_disk = bd_disk; rq->end_io = done; @@ -65,7 +61,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be reused after dying flag is set */ if (q->mq_ops) { - blk_mq_insert_request(rq, at_head, true, false); + blk_mq_sched_insert_request(rq, at_head, true, false, false); return; } @@ -100,16 +96,9 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, struct request *rq, int at_head) { DECLARE_COMPLETION_ONSTACK(wait); - char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; unsigned long hang_check; - if (!rq->sense) { - memset(sense, 0, sizeof(sense)); - rq->sense = sense; - rq->sense_len = 0; - } - rq->end_io_data = &wait; blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); @@ -123,11 +112,6 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, if (rq->errors) err = -EIO; - if (rq->sense == sense) { - rq->sense = NULL; - rq->sense_len = 0; - } - return err; } EXPORT_SYMBOL(blk_execute_rq); diff --git a/block/blk-flush.c b/block/blk-flush.c index 20b7c7a..0d5a9c1 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -74,6 +74,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" +#include "blk-mq-sched.h" /* FLUSH/FUA sequences */ enum { @@ -296,8 +297,14 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending)) return false; - /* C2 and C3 */ + /* C2 and C3 + * + * For blk-mq + scheduling, we can risk having all driver tags + * assigned to empty flushes, and we deadlock if we are expecting + * other requests to make progress. Don't defer for that case. + */ if (!list_empty(&fq->flush_data_in_flight) && + !(q->mq_ops && q->elevator) && time_before(jiffies, fq->flush_pending_since + FLUSH_PENDING_TIMEOUT)) return false; @@ -326,7 +333,6 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); } - flush_rq->cmd_type = REQ_TYPE_FS; flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; @@ -391,9 +397,10 @@ static void mq_flush_data_end_io(struct request *rq, int error) * the comment in flush_end_io(). */ spin_lock_irqsave(&fq->mq_flush_lock, flags); - if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) - blk_mq_run_hw_queue(hctx, true); + blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags); + + blk_mq_run_hw_queue(hctx, true); } /** @@ -453,9 +460,9 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - if (q->mq_ops) { - blk_mq_insert_request(rq, false, true, false); - } else + if (q->mq_ops) + blk_mq_sched_insert_request(rq, false, true, false, false); + else list_add_tail(&rq->queuelist, &q->queue_head); return; } @@ -545,11 +552,10 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, if (!fq) goto fail; - if (q->mq_ops) { + if (q->mq_ops) spin_lock_init(&fq->mq_flush_lock); - rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); - } + rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node); if (!fq->flush_rq) goto fail_rq; diff --git a/block/blk-integrity.c b/block/blk-integrity.c index d69c5c7..9f0ff5b 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -443,10 +443,10 @@ void blk_integrity_revalidate(struct gendisk *disk) return; if (bi->profile) - disk->queue->backing_dev_info.capabilities |= + disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; else - disk->queue->backing_dev_info.capabilities &= + disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 381cb50..b12f9c8 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -35,7 +35,10 @@ static void icq_free_icq_rcu(struct rcu_head *head) kmem_cache_free(icq->__rcu_icq_cache, icq); } -/* Exit an icq. Called with both ioc and q locked. */ +/* + * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for + * mq. + */ static void ioc_exit_icq(struct io_cq *icq) { struct elevator_type *et = icq->q->elevator->type; @@ -43,8 +46,10 @@ static void ioc_exit_icq(struct io_cq *icq) if (icq->flags & ICQ_EXITED) return; - if (et->ops.elevator_exit_icq_fn) - et->ops.elevator_exit_icq_fn(icq); + if (et->uses_mq && et->ops.mq.exit_icq) + et->ops.mq.exit_icq(icq); + else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn) + et->ops.sq.elevator_exit_icq_fn(icq); icq->flags |= ICQ_EXITED; } @@ -164,6 +169,7 @@ EXPORT_SYMBOL(put_io_context); */ void put_io_context_active(struct io_context *ioc) { + struct elevator_type *et; unsigned long flags; struct io_cq *icq; @@ -182,13 +188,19 @@ retry: hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) { if (icq->flags & ICQ_EXITED) continue; - if (spin_trylock(icq->q->queue_lock)) { + + et = icq->q->elevator->type; + if (et->uses_mq) { ioc_exit_icq(icq); - spin_unlock(icq->q->queue_lock); } else { - spin_unlock_irqrestore(&ioc->lock, flags); - cpu_relax(); - goto retry; + if (spin_trylock(icq->q->queue_lock)) { + ioc_exit_icq(icq); + spin_unlock(icq->q->queue_lock); + } else { + spin_unlock_irqrestore(&ioc->lock, flags); + cpu_relax(); + goto retry; + } } } spin_unlock_irqrestore(&ioc->lock, flags); @@ -383,8 +395,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { hlist_add_head(&icq->ioc_node, &ioc->icq_list); list_add(&icq->q_node, &q->icq_list); - if (et->ops.elevator_init_icq_fn) - et->ops.elevator_init_icq_fn(icq); + if (et->uses_mq && et->ops.mq.init_icq) + et->ops.mq.init_icq(icq); + else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn) + et->ops.sq.elevator_init_icq_fn(icq); } else { kmem_cache_free(et->icq_cache, icq); icq = ioc_lookup_icq(ioc, q); diff --git a/block/blk-map.c b/block/blk-map.c index 0acb664..2f18c2a 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -16,8 +16,6 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio) { if (!rq->bio) { - rq->cmd_flags &= REQ_OP_MASK; - rq->cmd_flags |= (bio->bi_opf & REQ_OP_MASK); blk_rq_bio_prep(rq->q, rq, bio); } else { if (!ll_back_merge_fn(rq->q, rq, bio)) @@ -62,6 +60,9 @@ static int __blk_rq_map_user_iov(struct request *rq, if (IS_ERR(bio)) return PTR_ERR(bio); + bio->bi_opf &= ~REQ_OP_MASK; + bio->bi_opf |= req_op(rq); + if (map_data && map_data->null_mapped) bio_set_flag(bio, BIO_NULL_MAPPED); @@ -90,7 +91,7 @@ static int __blk_rq_map_user_iov(struct request *rq, } /** - * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage + * blk_rq_map_user_iov - map user data to a request, for passthrough requests * @q: request queue where request should be inserted * @rq: request to map data to * @map_data: pointer to the rq_map_data holding pages (if necessary) @@ -199,7 +200,7 @@ int blk_rq_unmap_user(struct bio *bio) EXPORT_SYMBOL(blk_rq_unmap_user); /** - * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage + * blk_rq_map_kern - map kernel data to a request, for passthrough requests * @q: request queue where request should be inserted * @rq: request to fill * @kbuf: the kernel buffer @@ -234,8 +235,8 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (IS_ERR(bio)) return PTR_ERR(bio); - if (!reading) - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf &= ~REQ_OP_MASK; + bio->bi_opf |= req_op(rq); if (do_copy) rq->rq_flags |= RQF_COPY_USER; diff --git a/block/blk-merge.c b/block/blk-merge.c index 182398c..2afa262 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -482,13 +482,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_sg); -static void req_set_nomerge(struct request_queue *q, struct request *req) -{ - req->cmd_flags |= REQ_NOMERGE; - if (req == q->last_merge) - q->last_merge = NULL; -} - static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) @@ -659,31 +652,32 @@ static void blk_account_io_merge(struct request *req) } /* - * Has to be called with the request spinlock acquired + * For non-mq, this has to be called with the request spinlock acquired. + * For mq with scheduling, the appropriate queue wide lock should be held. */ -static int attempt_merge(struct request_queue *q, struct request *req, - struct request *next) +static struct request *attempt_merge(struct request_queue *q, + struct request *req, struct request *next) { if (!rq_mergeable(req) || !rq_mergeable(next)) - return 0; + return NULL; if (req_op(req) != req_op(next)) - return 0; + return NULL; /* * not contiguous */ if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) - return 0; + return NULL; if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk || req_no_special_merge(next)) - return 0; + return NULL; if (req_op(req) == REQ_OP_WRITE_SAME && !blk_write_same_mergeable(req->bio, next->bio)) - return 0; + return NULL; /* * If we are allowed to merge, then append bio list @@ -692,7 +686,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, * counts here. */ if (!ll_merge_requests_fn(q, req, next)) - return 0; + return NULL; /* * If failfast settings disagree or any of the two is already @@ -732,42 +726,51 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (blk_rq_cpu_valid(next)) req->cpu = next->cpu; - /* owner-ship of bio passed from next to req */ + /* + * ownership of bio passed from next to req, return 'next' for + * the caller to free + */ next->bio = NULL; - __blk_put_request(q, next); - return 1; + return next; } -int attempt_back_merge(struct request_queue *q, struct request *rq) +struct request *attempt_back_merge(struct request_queue *q, struct request *rq) { struct request *next = elv_latter_request(q, rq); if (next) return attempt_merge(q, rq, next); - return 0; + return NULL; } -int attempt_front_merge(struct request_queue *q, struct request *rq) +struct request *attempt_front_merge(struct request_queue *q, struct request *rq) { struct request *prev = elv_former_request(q, rq); if (prev) return attempt_merge(q, prev, rq); - return 0; + return NULL; } int blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next) { struct elevator_queue *e = q->elevator; + struct request *free; - if (e->type->ops.elevator_allow_rq_merge_fn) - if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next)) + if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn) + if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) return 0; - return attempt_merge(q, rq, next); + free = attempt_merge(q, rq, next); + if (free) { + __blk_put_request(q, free); + return 1; + } + + return 0; } bool blk_rq_merge_ok(struct request *rq, struct bio *bio) @@ -798,9 +801,12 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) return true; } -int blk_try_merge(struct request *rq, struct bio *bio) +enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) { - if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) + if (req_op(rq) == REQ_OP_DISCARD && + queue_max_discard_segments(rq->q) > 1) + return ELEVATOR_DISCARD_MERGE; + else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) return ELEVATOR_BACK_MERGE; else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) return ELEVATOR_FRONT_MERGE; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c new file mode 100644 index 0000000..f6d9179 --- /dev/null +++ b/block/blk-mq-debugfs.c @@ -0,0 +1,772 @@ +/* + * Copyright (C) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/blkdev.h> +#include <linux/debugfs.h> + +#include <linux/blk-mq.h> +#include "blk.h" +#include "blk-mq.h" +#include "blk-mq-tag.h" + +struct blk_mq_debugfs_attr { + const char *name; + umode_t mode; + const struct file_operations *fops; +}; + +static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file, + const struct seq_operations *ops) +{ + struct seq_file *m; + int ret; + + ret = seq_open(file, ops); + if (!ret) { + m = file->private_data; + m->private = inode->i_private; + } + return ret; +} + +static int hctx_state_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + seq_printf(m, "0x%lx\n", hctx->state); + return 0; +} + +static int hctx_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_state_show, inode->i_private); +} + +static const struct file_operations hctx_state_fops = { + .open = hctx_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_flags_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + seq_printf(m, "0x%lx\n", hctx->flags); + return 0; +} + +static int hctx_flags_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_flags_show, inode->i_private); +} + +static const struct file_operations hctx_flags_fops = { + .open = hctx_flags_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) +{ + struct request *rq = list_entry_rq(v); + + seq_printf(m, "%p {.cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n", + rq, rq->cmd_flags, (__force unsigned int)rq->rq_flags, + rq->tag, rq->internal_tag); + return 0; +} + +static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) + __acquires(&hctx->lock) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + spin_lock(&hctx->lock); + return seq_list_start(&hctx->dispatch, *pos); +} + +static void *hctx_dispatch_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + return seq_list_next(v, &hctx->dispatch, pos); +} + +static void hctx_dispatch_stop(struct seq_file *m, void *v) + __releases(&hctx->lock) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + spin_unlock(&hctx->lock); +} + +static const struct seq_operations hctx_dispatch_seq_ops = { + .start = hctx_dispatch_start, + .next = hctx_dispatch_next, + .stop = hctx_dispatch_stop, + .show = blk_mq_debugfs_rq_show, +}; + +static int hctx_dispatch_open(struct inode *inode, struct file *file) +{ + return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops); +} + +static const struct file_operations hctx_dispatch_fops = { + .open = hctx_dispatch_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int hctx_ctx_map_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + sbitmap_bitmap_show(&hctx->ctx_map, m); + return 0; +} + +static int hctx_ctx_map_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_ctx_map_show, inode->i_private); +} + +static const struct file_operations hctx_ctx_map_fops = { + .open = hctx_ctx_map_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void blk_mq_debugfs_tags_show(struct seq_file *m, + struct blk_mq_tags *tags) +{ + seq_printf(m, "nr_tags=%u\n", tags->nr_tags); + seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags); + seq_printf(m, "active_queues=%d\n", + atomic_read(&tags->active_queues)); + + seq_puts(m, "\nbitmap_tags:\n"); + sbitmap_queue_show(&tags->bitmap_tags, m); + + if (tags->nr_reserved_tags) { + seq_puts(m, "\nbreserved_tags:\n"); + sbitmap_queue_show(&tags->breserved_tags, m); + } +} + +static int hctx_tags_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + struct request_queue *q = hctx->queue; + int res; + + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; + if (hctx->tags) + blk_mq_debugfs_tags_show(m, hctx->tags); + mutex_unlock(&q->sysfs_lock); + +out: + return res; +} + +static int hctx_tags_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_tags_show, inode->i_private); +} + +static const struct file_operations hctx_tags_fops = { + .open = hctx_tags_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_tags_bitmap_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + struct request_queue *q = hctx->queue; + int res; + + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; + if (hctx->tags) + sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); + mutex_unlock(&q->sysfs_lock); + +out: + return res; +} + +static int hctx_tags_bitmap_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_tags_bitmap_show, inode->i_private); +} + +static const struct file_operations hctx_tags_bitmap_fops = { + .open = hctx_tags_bitmap_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_sched_tags_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + struct request_queue *q = hctx->queue; + int res; + + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; + if (hctx->sched_tags) + blk_mq_debugfs_tags_show(m, hctx->sched_tags); + mutex_unlock(&q->sysfs_lock); + +out: + return res; +} + +static int hctx_sched_tags_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_sched_tags_show, inode->i_private); +} + +static const struct file_operations hctx_sched_tags_fops = { + .open = hctx_sched_tags_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + struct request_queue *q = hctx->queue; + int res; + + res = mutex_lock_interruptible(&q->sysfs_lock); + if (res) + goto out; + if (hctx->sched_tags) + sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); + mutex_unlock(&q->sysfs_lock); + +out: + return res; +} + +static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private); +} + +static const struct file_operations hctx_sched_tags_bitmap_fops = { + .open = hctx_sched_tags_bitmap_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_io_poll_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + seq_printf(m, "considered=%lu\n", hctx->poll_considered); + seq_printf(m, "invoked=%lu\n", hctx->poll_invoked); + seq_printf(m, "success=%lu\n", hctx->poll_success); + return 0; +} + +static int hctx_io_poll_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_io_poll_show, inode->i_private); +} + +static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct blk_mq_hw_ctx *hctx = m->private; + + hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0; + return count; +} + +static const struct file_operations hctx_io_poll_fops = { + .open = hctx_io_poll_open, + .read = seq_read, + .write = hctx_io_poll_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static void print_stat(struct seq_file *m, struct blk_rq_stat *stat) +{ + seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu", + stat->nr_samples, stat->mean, stat->min, stat->max); +} + +static int hctx_stats_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + struct blk_rq_stat stat[2]; + + blk_stat_init(&stat[BLK_STAT_READ]); + blk_stat_init(&stat[BLK_STAT_WRITE]); + + blk_hctx_stat_get(hctx, stat); + + seq_puts(m, "read: "); + print_stat(m, &stat[BLK_STAT_READ]); + seq_puts(m, "\n"); + + seq_puts(m, "write: "); + print_stat(m, &stat[BLK_STAT_WRITE]); + seq_puts(m, "\n"); + return 0; +} + +static int hctx_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_stats_show, inode->i_private); +} + +static ssize_t hctx_stats_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_ctx *ctx; + int i; + + hctx_for_each_ctx(hctx, ctx, i) { + blk_stat_init(&ctx->stat[BLK_STAT_READ]); + blk_stat_init(&ctx->stat[BLK_STAT_WRITE]); + } + return count; +} + +static const struct file_operations hctx_stats_fops = { + .open = hctx_stats_open, + .read = seq_read, + .write = hctx_stats_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_dispatched_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + int i; + + seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]); + + for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) { + unsigned int d = 1U << (i - 1); + + seq_printf(m, "%8u\t%lu\n", d, hctx->dispatched[i]); + } + + seq_printf(m, "%8u+\t%lu\n", 1U << (i - 1), hctx->dispatched[i]); + return 0; +} + +static int hctx_dispatched_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_dispatched_show, inode->i_private); +} + +static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct blk_mq_hw_ctx *hctx = m->private; + int i; + + for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) + hctx->dispatched[i] = 0; + return count; +} + +static const struct file_operations hctx_dispatched_fops = { + .open = hctx_dispatched_open, + .read = seq_read, + .write = hctx_dispatched_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_queued_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + seq_printf(m, "%lu\n", hctx->queued); + return 0; +} + +static int hctx_queued_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_queued_show, inode->i_private); +} + +static ssize_t hctx_queued_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct blk_mq_hw_ctx *hctx = m->private; + + hctx->queued = 0; + return count; +} + +static const struct file_operations hctx_queued_fops = { + .open = hctx_queued_open, + .read = seq_read, + .write = hctx_queued_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_run_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + seq_printf(m, "%lu\n", hctx->run); + return 0; +} + +static int hctx_run_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_run_show, inode->i_private); +} + +static ssize_t hctx_run_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct blk_mq_hw_ctx *hctx = m->private; + + hctx->run = 0; + return count; +} + +static const struct file_operations hctx_run_fops = { + .open = hctx_run_open, + .read = seq_read, + .write = hctx_run_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hctx_active_show(struct seq_file *m, void *v) +{ + struct blk_mq_hw_ctx *hctx = m->private; + + seq_printf(m, "%d\n", atomic_read(&hctx->nr_active)); + return 0; +} + +static int hctx_active_open(struct inode *inode, struct file *file) +{ + return single_open(file, hctx_active_show, inode->i_private); +} + +static const struct file_operations hctx_active_fops = { + .open = hctx_active_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos) + __acquires(&ctx->lock) +{ + struct blk_mq_ctx *ctx = m->private; + + spin_lock(&ctx->lock); + return seq_list_start(&ctx->rq_list, *pos); +} + +static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct blk_mq_ctx *ctx = m->private; + + return seq_list_next(v, &ctx->rq_list, pos); +} + +static void ctx_rq_list_stop(struct seq_file *m, void *v) + __releases(&ctx->lock) +{ + struct blk_mq_ctx *ctx = m->private; + + spin_unlock(&ctx->lock); +} + +static const struct seq_operations ctx_rq_list_seq_ops = { + .start = ctx_rq_list_start, + .next = ctx_rq_list_next, + .stop = ctx_rq_list_stop, + .show = blk_mq_debugfs_rq_show, +}; + +static int ctx_rq_list_open(struct inode *inode, struct file *file) +{ + return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops); +} + +static const struct file_operations ctx_rq_list_fops = { + .open = ctx_rq_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int ctx_dispatched_show(struct seq_file *m, void *v) +{ + struct blk_mq_ctx *ctx = m->private; + + seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]); + return 0; +} + +static int ctx_dispatched_open(struct inode *inode, struct file *file) +{ + return single_open(file, ctx_dispatched_show, inode->i_private); +} + +static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct blk_mq_ctx *ctx = m->private; + + ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0; + return count; +} + +static const struct file_operations ctx_dispatched_fops = { + .open = ctx_dispatched_open, + .read = seq_read, + .write = ctx_dispatched_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ctx_merged_show(struct seq_file *m, void *v) +{ + struct blk_mq_ctx *ctx = m->private; + + seq_printf(m, "%lu\n", ctx->rq_merged); + return 0; +} + +static int ctx_merged_open(struct inode *inode, struct file *file) +{ + return single_open(file, ctx_merged_show, inode->i_private); +} + +static ssize_t ctx_merged_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct blk_mq_ctx *ctx = m->private; + + ctx->rq_merged = 0; + return count; +} + +static const struct file_operations ctx_merged_fops = { + .open = ctx_merged_open, + .read = seq_read, + .write = ctx_merged_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ctx_completed_show(struct seq_file *m, void *v) +{ + struct blk_mq_ctx *ctx = m->private; + + seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]); + return 0; +} + +static int ctx_completed_open(struct inode *inode, struct file *file) +{ + return single_open(file, ctx_completed_show, inode->i_private); +} + +static ssize_t ctx_completed_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *m = file->private_data; + struct blk_mq_ctx *ctx = m->private; + + ctx->rq_completed[0] = ctx->rq_completed[1] = 0; + return count; +} + +static const struct file_operations ctx_completed_fops = { + .open = ctx_completed_open, + .read = seq_read, + .write = ctx_completed_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { + {"state", 0400, &hctx_state_fops}, + {"flags", 0400, &hctx_flags_fops}, + {"dispatch", 0400, &hctx_dispatch_fops}, + {"ctx_map", 0400, &hctx_ctx_map_fops}, + {"tags", 0400, &hctx_tags_fops}, + {"tags_bitmap", 0400, &hctx_tags_bitmap_fops}, + {"sched_tags", 0400, &hctx_sched_tags_fops}, + {"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops}, + {"io_poll", 0600, &hctx_io_poll_fops}, + {"stats", 0600, &hctx_stats_fops}, + {"dispatched", 0600, &hctx_dispatched_fops}, + {"queued", 0600, &hctx_queued_fops}, + {"run", 0600, &hctx_run_fops}, + {"active", 0400, &hctx_active_fops}, + {}, +}; + +static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { + {"rq_list", 0400, &ctx_rq_list_fops}, + {"dispatched", 0600, &ctx_dispatched_fops}, + {"merged", 0600, &ctx_merged_fops}, + {"completed", 0600, &ctx_completed_fops}, + {}, +}; + +int blk_mq_debugfs_register(struct request_queue *q, const char *name) +{ + if (!blk_debugfs_root) + return -ENOENT; + + q->debugfs_dir = debugfs_create_dir(name, blk_debugfs_root); + if (!q->debugfs_dir) + goto err; + + if (blk_mq_debugfs_register_hctxs(q)) + goto err; + + return 0; + +err: + blk_mq_debugfs_unregister(q); + return -ENOMEM; +} + +void blk_mq_debugfs_unregister(struct request_queue *q) +{ + debugfs_remove_recursive(q->debugfs_dir); + q->mq_debugfs_dir = NULL; + q->debugfs_dir = NULL; +} + +static bool debugfs_create_files(struct dentry *parent, void *data, + const struct blk_mq_debugfs_attr *attr) +{ + for (; attr->name; attr++) { + if (!debugfs_create_file(attr->name, attr->mode, parent, + data, attr->fops)) + return false; + } + return true; +} + +static int blk_mq_debugfs_register_ctx(struct request_queue *q, + struct blk_mq_ctx *ctx, + struct dentry *hctx_dir) +{ + struct dentry *ctx_dir; + char name[20]; + + snprintf(name, sizeof(name), "cpu%u", ctx->cpu); + ctx_dir = debugfs_create_dir(name, hctx_dir); + if (!ctx_dir) + return -ENOMEM; + + if (!debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs)) + return -ENOMEM; + + return 0; +} + +static int blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) +{ + struct blk_mq_ctx *ctx; + struct dentry *hctx_dir; + char name[20]; + int i; + + snprintf(name, sizeof(name), "%u", hctx->queue_num); + hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir); + if (!hctx_dir) + return -ENOMEM; + + if (!debugfs_create_files(hctx_dir, hctx, blk_mq_debugfs_hctx_attrs)) + return -ENOMEM; + + hctx_for_each_ctx(hctx, ctx, i) { + if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir)) + return -ENOMEM; + } + + return 0; +} + +int blk_mq_debugfs_register_hctxs(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + if (!q->debugfs_dir) + return -ENOENT; + + q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir); + if (!q->mq_debugfs_dir) + goto err; + + queue_for_each_hw_ctx(q, hctx, i) { + if (blk_mq_debugfs_register_hctx(q, hctx)) + goto err; + } + + return 0; + +err: + blk_mq_debugfs_unregister_hctxs(q); + return -ENOMEM; +} + +void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) +{ + debugfs_remove_recursive(q->mq_debugfs_dir); + q->mq_debugfs_dir = NULL; +} diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c new file mode 100644 index 0000000..9e8d679 --- /dev/null +++ b/block/blk-mq-sched.c @@ -0,0 +1,515 @@ +/* + * blk-mq scheduling framework + * + * Copyright (C) 2016 Jens Axboe + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/blk-mq.h> + +#include <trace/events/block.h> + +#include "blk.h" +#include "blk-mq.h" +#include "blk-mq-sched.h" +#include "blk-mq-tag.h" +#include "blk-wbt.h" + +void blk_mq_sched_free_hctx_data(struct request_queue *q, + void (*exit)(struct blk_mq_hw_ctx *)) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + if (exit && hctx->sched_data) + exit(hctx); + kfree(hctx->sched_data); + hctx->sched_data = NULL; + } +} +EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); + +int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size, + int (*init)(struct blk_mq_hw_ctx *), + void (*exit)(struct blk_mq_hw_ctx *)) +{ + struct blk_mq_hw_ctx *hctx; + int ret; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node); + if (!hctx->sched_data) { + ret = -ENOMEM; + goto error; + } + + if (init) { + ret = init(hctx); + if (ret) { + /* + * We don't want to give exit() a partially + * initialized sched_data. init() must clean up + * if it fails. + */ + kfree(hctx->sched_data); + hctx->sched_data = NULL; + goto error; + } + } + } + + return 0; +error: + blk_mq_sched_free_hctx_data(q, exit); + return ret; +} +EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data); + +static void __blk_mq_sched_assign_ioc(struct request_queue *q, + struct request *rq, + struct bio *bio, + struct io_context *ioc) +{ + struct io_cq *icq; + + spin_lock_irq(q->queue_lock); + icq = ioc_lookup_icq(ioc, q); + spin_unlock_irq(q->queue_lock); + + if (!icq) { + icq = ioc_create_icq(ioc, q, GFP_ATOMIC); + if (!icq) + return; + } + + rq->elv.icq = icq; + if (!blk_mq_sched_get_rq_priv(q, rq, bio)) { + rq->rq_flags |= RQF_ELVPRIV; + get_io_context(icq->ioc); + return; + } + + rq->elv.icq = NULL; +} + +static void blk_mq_sched_assign_ioc(struct request_queue *q, + struct request *rq, struct bio *bio) +{ + struct io_context *ioc; + + ioc = rq_ioc(bio); + if (ioc) + __blk_mq_sched_assign_ioc(q, rq, bio, ioc); +} + +struct request *blk_mq_sched_get_request(struct request_queue *q, + struct bio *bio, + unsigned int op, + struct blk_mq_alloc_data *data) +{ + struct elevator_queue *e = q->elevator; + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + struct request *rq; + + blk_queue_enter_live(q); + ctx = blk_mq_get_ctx(q); + hctx = blk_mq_map_queue(q, ctx->cpu); + + blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx); + + if (e) { + data->flags |= BLK_MQ_REQ_INTERNAL; + + /* + * Flush requests are special and go directly to the + * dispatch list. + */ + if (!op_is_flush(op) && e->type->ops.mq.get_request) { + rq = e->type->ops.mq.get_request(q, op, data); + if (rq) + rq->rq_flags |= RQF_QUEUED; + } else + rq = __blk_mq_alloc_request(data, op); + } else { + rq = __blk_mq_alloc_request(data, op); + if (rq) + data->hctx->tags->rqs[rq->tag] = rq; + } + + if (rq) { + if (!op_is_flush(op)) { + rq->elv.icq = NULL; + if (e && e->type->icq_cache) + blk_mq_sched_assign_ioc(q, rq, bio); + } + data->hctx->queued++; + return rq; + } + + blk_queue_exit(q); + return NULL; +} + +void blk_mq_sched_put_request(struct request *rq) +{ + struct request_queue *q = rq->q; + struct elevator_queue *e = q->elevator; + + if (rq->rq_flags & RQF_ELVPRIV) { + blk_mq_sched_put_rq_priv(rq->q, rq); + if (rq->elv.icq) { + put_io_context(rq->elv.icq->ioc); + rq->elv.icq = NULL; + } + } + + if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request) + e->type->ops.mq.put_request(rq); + else + blk_mq_finish_request(rq); +} + +void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) +{ + struct elevator_queue *e = hctx->queue->elevator; + const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; + bool did_work = false; + LIST_HEAD(rq_list); + + if (unlikely(blk_mq_hctx_stopped(hctx))) + return; + + hctx->run++; + + /* + * If we have previous entries on our dispatch list, grab them first for + * more fair dispatch. + */ + if (!list_empty_careful(&hctx->dispatch)) { + spin_lock(&hctx->lock); + if (!list_empty(&hctx->dispatch)) + list_splice_init(&hctx->dispatch, &rq_list); + spin_unlock(&hctx->lock); + } + + /* + * Only ask the scheduler for requests, if we didn't have residual + * requests from the dispatch list. This is to avoid the case where + * we only ever dispatch a fraction of the requests available because + * of low device queue depth. Once we pull requests out of the IO + * scheduler, we can no longer merge or sort them. So it's best to + * leave them there for as long as we can. Mark the hw queue as + * needing a restart in that case. + */ + if (!list_empty(&rq_list)) { + blk_mq_sched_mark_restart(hctx); + did_work = blk_mq_dispatch_rq_list(hctx, &rq_list); + } else if (!has_sched_dispatch) { + blk_mq_flush_busy_ctxs(hctx, &rq_list); + blk_mq_dispatch_rq_list(hctx, &rq_list); + } + + /* + * We want to dispatch from the scheduler if we had no work left + * on the dispatch list, OR if we did have work but weren't able + * to make progress. + */ + if (!did_work && has_sched_dispatch) { + do { + struct request *rq; + + rq = e->type->ops.mq.dispatch_request(hctx); + if (!rq) + break; + list_add(&rq->queuelist, &rq_list); + } while (blk_mq_dispatch_rq_list(hctx, &rq_list)); + } +} + +void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, + struct list_head *rq_list, + struct request *(*get_rq)(struct blk_mq_hw_ctx *)) +{ + do { + struct request *rq; + + rq = get_rq(hctx); + if (!rq) + break; + + list_add_tail(&rq->queuelist, rq_list); + } while (1); +} +EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch); + +bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, + struct request **merged_request) +{ + struct request *rq; + + switch (elv_merge(q, &rq, bio)) { + case ELEVATOR_BACK_MERGE: + if (!blk_mq_sched_allow_merge(q, rq, bio)) + return false; + if (!bio_attempt_back_merge(q, rq, bio)) + return false; + *merged_request = attempt_back_merge(q, rq); + if (!*merged_request) + elv_merged_request(q, rq, ELEVATOR_BACK_MERGE); + return true; + case ELEVATOR_FRONT_MERGE: + if (!blk_mq_sched_allow_merge(q, rq, bio)) + return false; + if (!bio_attempt_front_merge(q, rq, bio)) + return false; + *merged_request = attempt_front_merge(q, rq); + if (!*merged_request) + elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); + return true; + default: + return false; + } +} +EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); + +bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) +{ + struct elevator_queue *e = q->elevator; + + if (e->type->ops.mq.bio_merge) { + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + + blk_mq_put_ctx(ctx); + return e->type->ops.mq.bio_merge(hctx, bio); + } + + return false; +} + +bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) +{ + return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq); +} +EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); + +void blk_mq_sched_request_inserted(struct request *rq) +{ + trace_block_rq_insert(rq->q, rq); +} +EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); + +static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag == -1) { + rq->rq_flags |= RQF_SORTED; + return false; + } + + /* + * If we already have a real request tag, send directly to + * the dispatch list. + */ + spin_lock(&hctx->lock); + list_add(&rq->queuelist, &hctx->dispatch); + spin_unlock(&hctx->lock); + return true; +} + +static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +{ + if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { + clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + if (blk_mq_hctx_has_pending(hctx)) + blk_mq_run_hw_queue(hctx, true); + } +} + +void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx) +{ + unsigned int i; + + if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + blk_mq_sched_restart_hctx(hctx); + else { + struct request_queue *q = hctx->queue; + + if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) + return; + + clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags); + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_restart_hctx(hctx); + } +} + +/* + * Add flush/fua to the queue. If we fail getting a driver tag, then + * punt to the requeue list. Requeue will re-invoke us from a context + * that's safe to block from. + */ +static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx, + struct request *rq, bool can_block) +{ + if (blk_mq_get_driver_tag(rq, &hctx, can_block)) { + blk_insert_flush(rq); + blk_mq_run_hw_queue(hctx, true); + } else + blk_mq_add_to_requeue_list(rq, false, true); +} + +void blk_mq_sched_insert_request(struct request *rq, bool at_head, + bool run_queue, bool async, bool can_block) +{ + struct request_queue *q = rq->q; + struct elevator_queue *e = q->elevator; + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + + if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) { + blk_mq_sched_insert_flush(hctx, rq, can_block); + return; + } + + if (e && blk_mq_sched_bypass_insert(hctx, rq)) + goto run; + + if (e && e->type->ops.mq.insert_requests) { + LIST_HEAD(list); + + list_add(&rq->queuelist, &list); + e->type->ops.mq.insert_requests(hctx, &list, at_head); + } else { + spin_lock(&ctx->lock); + __blk_mq_insert_request(hctx, rq, at_head); + spin_unlock(&ctx->lock); + } + +run: + if (run_queue) + blk_mq_run_hw_queue(hctx, async); +} + +void blk_mq_sched_insert_requests(struct request_queue *q, + struct blk_mq_ctx *ctx, + struct list_head *list, bool run_queue_async) +{ + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + struct elevator_queue *e = hctx->queue->elevator; + + if (e) { + struct request *rq, *next; + + /* + * We bypass requests that already have a driver tag assigned, + * which should only be flushes. Flushes are only ever inserted + * as single requests, so we shouldn't ever hit the + * WARN_ON_ONCE() below (but let's handle it just in case). + */ + list_for_each_entry_safe(rq, next, list, queuelist) { + if (WARN_ON_ONCE(rq->tag != -1)) { + list_del_init(&rq->queuelist); + blk_mq_sched_bypass_insert(hctx, rq); + } + } + } + + if (e && e->type->ops.mq.insert_requests) + e->type->ops.mq.insert_requests(hctx, list, false); + else + blk_mq_insert_requests(hctx, ctx, list); + + blk_mq_run_hw_queue(hctx, run_queue_async); +} + +static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, + struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + if (hctx->sched_tags) { + blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); + blk_mq_free_rq_map(hctx->sched_tags); + hctx->sched_tags = NULL; + } +} + +int blk_mq_sched_setup(struct request_queue *q) +{ + struct blk_mq_tag_set *set = q->tag_set; + struct blk_mq_hw_ctx *hctx; + int ret, i; + + /* + * Default to 256, since we don't split into sync/async like the + * old code did. Additionally, this is a per-hw queue depth. + */ + q->nr_requests = 2 * BLKDEV_MAX_RQ; + + /* + * We're switching to using an IO scheduler, so setup the hctx + * scheduler tags and switch the request map from the regular + * tags to scheduler tags. First allocate what we need, so we + * can safely fail and fallback, if needed. + */ + ret = 0; + queue_for_each_hw_ctx(q, hctx, i) { + hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0); + if (!hctx->sched_tags) { + ret = -ENOMEM; + break; + } + ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests); + if (ret) + break; + } + + /* + * If we failed, free what we did allocate + */ + if (ret) { + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx->sched_tags) + continue; + blk_mq_sched_free_tags(set, hctx, i); + } + + return ret; + } + + return 0; +} + +void blk_mq_sched_teardown(struct request_queue *q) +{ + struct blk_mq_tag_set *set = q->tag_set; + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_free_tags(set, hctx, i); +} + +int blk_mq_sched_init(struct request_queue *q) +{ + int ret; + +#if defined(CONFIG_DEFAULT_SQ_NONE) + if (q->nr_hw_queues == 1) + return 0; +#endif +#if defined(CONFIG_DEFAULT_MQ_NONE) + if (q->nr_hw_queues > 1) + return 0; +#endif + + mutex_lock(&q->sysfs_lock); + ret = elevator_init(q, NULL); + mutex_unlock(&q->sysfs_lock); + + return ret; +} diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h new file mode 100644 index 0000000..7b5f3b9 --- /dev/null +++ b/block/blk-mq-sched.h @@ -0,0 +1,143 @@ +#ifndef BLK_MQ_SCHED_H +#define BLK_MQ_SCHED_H + +#include "blk-mq.h" +#include "blk-mq-tag.h" + +int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size, + int (*init)(struct blk_mq_hw_ctx *), + void (*exit)(struct blk_mq_hw_ctx *)); + +void blk_mq_sched_free_hctx_data(struct request_queue *q, + void (*exit)(struct blk_mq_hw_ctx *)); + +struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data); +void blk_mq_sched_put_request(struct request *rq); + +void blk_mq_sched_request_inserted(struct request *rq); +bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, + struct request **merged_request); +bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); +bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); +void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx); + +void blk_mq_sched_insert_request(struct request *rq, bool at_head, + bool run_queue, bool async, bool can_block); +void blk_mq_sched_insert_requests(struct request_queue *q, + struct blk_mq_ctx *ctx, + struct list_head *list, bool run_queue_async); + +void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); +void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, + struct list_head *rq_list, + struct request *(*get_rq)(struct blk_mq_hw_ctx *)); + +int blk_mq_sched_setup(struct request_queue *q); +void blk_mq_sched_teardown(struct request_queue *q); + +int blk_mq_sched_init(struct request_queue *q); + +static inline bool +blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) +{ + struct elevator_queue *e = q->elevator; + + if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio)) + return false; + + return __blk_mq_sched_bio_merge(q, bio); +} + +static inline int blk_mq_sched_get_rq_priv(struct request_queue *q, + struct request *rq, + struct bio *bio) +{ + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.mq.get_rq_priv) + return e->type->ops.mq.get_rq_priv(q, rq, bio); + + return 0; +} + +static inline void blk_mq_sched_put_rq_priv(struct request_queue *q, + struct request *rq) +{ + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.mq.put_rq_priv) + e->type->ops.mq.put_rq_priv(q, rq); +} + +static inline bool +blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +{ + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.mq.allow_merge) + return e->type->ops.mq.allow_merge(q, rq, bio); + + return true; +} + +static inline void +blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq) +{ + struct elevator_queue *e = hctx->queue->elevator; + + if (e && e->type->ops.mq.completed_request) + e->type->ops.mq.completed_request(hctx, rq); + + BUG_ON(rq->internal_tag == -1); + + blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag); +} + +static inline void blk_mq_sched_started_request(struct request *rq) +{ + struct request_queue *q = rq->q; + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.mq.started_request) + e->type->ops.mq.started_request(rq); +} + +static inline void blk_mq_sched_requeue_request(struct request *rq) +{ + struct request_queue *q = rq->q; + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.mq.requeue_request) + e->type->ops.mq.requeue_request(rq); +} + +static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) +{ + struct elevator_queue *e = hctx->queue->elevator; + + if (e && e->type->ops.mq.has_work) + return e->type->ops.mq.has_work(hctx); + + return false; +} + +static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx) +{ + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { + set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + if (hctx->flags & BLK_MQ_F_TAG_SHARED) { + struct request_queue *q = hctx->queue; + + if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) + set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); + } + } +} + +static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) +{ + return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); +} + +#endif diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index eacd3af..295e696 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -122,123 +122,16 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj, return res; } -static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page) -{ - return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1], - ctx->rq_dispatched[0]); -} - -static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page) -{ - return sprintf(page, "%lu\n", ctx->rq_merged); -} - -static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page) -{ - return sprintf(page, "%lu %lu\n", ctx->rq_completed[1], - ctx->rq_completed[0]); -} - -static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg) -{ - struct request *rq; - int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg); - - list_for_each_entry(rq, list, queuelist) { - const int rq_len = 2 * sizeof(rq) + 2; - - /* if the output will be truncated */ - if (PAGE_SIZE - 1 < len + rq_len) { - /* backspacing if it can't hold '\t...\n' */ - if (PAGE_SIZE - 1 < len + 5) - len -= rq_len; - len += snprintf(page + len, PAGE_SIZE - 1 - len, - "\t...\n"); - break; - } - len += snprintf(page + len, PAGE_SIZE - 1 - len, - "\t%p\n", rq); - } - - return len; -} - -static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) -{ - ssize_t ret; - - spin_lock(&ctx->lock); - ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending"); - spin_unlock(&ctx->lock); - - return ret; -} - -static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page) -{ - return sprintf(page, "considered=%lu, invoked=%lu, success=%lu\n", - hctx->poll_considered, hctx->poll_invoked, - hctx->poll_success); -} - -static ssize_t blk_mq_hw_sysfs_poll_store(struct blk_mq_hw_ctx *hctx, - const char *page, size_t size) -{ - hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0; - - return size; -} - -static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx, - char *page) -{ - return sprintf(page, "%lu\n", hctx->queued); -} - -static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page) -{ - return sprintf(page, "%lu\n", hctx->run); -} - -static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx, - char *page) -{ - char *start_page = page; - int i; - - page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]); - - for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) { - unsigned int d = 1U << (i - 1); - - page += sprintf(page, "%8u\t%lu\n", d, hctx->dispatched[i]); - } - - page += sprintf(page, "%8u+\t%lu\n", 1U << (i - 1), - hctx->dispatched[i]); - return page - start_page; -} - -static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx, +static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx, char *page) { - ssize_t ret; - - spin_lock(&hctx->lock); - ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending"); - spin_unlock(&hctx->lock); - - return ret; + return sprintf(page, "%u\n", hctx->tags->nr_tags); } -static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) +static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx, + char *page) { - return blk_mq_tag_sysfs_show(hctx->tags, page); -} - -static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page) -{ - return sprintf(page, "%u\n", atomic_read(&hctx->nr_active)); + return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags); } static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) @@ -259,121 +152,27 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) return ret; } -static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx) -{ - struct blk_mq_ctx *ctx; - unsigned int i; - - hctx_for_each_ctx(hctx, ctx, i) { - blk_stat_init(&ctx->stat[BLK_STAT_READ]); - blk_stat_init(&ctx->stat[BLK_STAT_WRITE]); - } -} - -static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx, - const char *page, size_t count) -{ - blk_mq_stat_clear(hctx); - return count; -} - -static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre) -{ - return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n", - pre, (long long) stat->nr_samples, - (long long) stat->mean, (long long) stat->min, - (long long) stat->max); -} - -static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page) -{ - struct blk_rq_stat stat[2]; - ssize_t ret; - - blk_stat_init(&stat[BLK_STAT_READ]); - blk_stat_init(&stat[BLK_STAT_WRITE]); - - blk_hctx_stat_get(hctx, stat); - - ret = print_stat(page, &stat[BLK_STAT_READ], "read :"); - ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:"); - return ret; -} - -static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = { - .attr = {.name = "dispatched", .mode = S_IRUGO }, - .show = blk_mq_sysfs_dispatched_show, -}; -static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = { - .attr = {.name = "merged", .mode = S_IRUGO }, - .show = blk_mq_sysfs_merged_show, -}; -static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = { - .attr = {.name = "completed", .mode = S_IRUGO }, - .show = blk_mq_sysfs_completed_show, -}; -static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = { - .attr = {.name = "rq_list", .mode = S_IRUGO }, - .show = blk_mq_sysfs_rq_list_show, -}; - static struct attribute *default_ctx_attrs[] = { - &blk_mq_sysfs_dispatched.attr, - &blk_mq_sysfs_merged.attr, - &blk_mq_sysfs_completed.attr, - &blk_mq_sysfs_rq_list.attr, NULL, }; -static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = { - .attr = {.name = "queued", .mode = S_IRUGO }, - .show = blk_mq_hw_sysfs_queued_show, +static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = { + .attr = {.name = "nr_tags", .mode = S_IRUGO }, + .show = blk_mq_hw_sysfs_nr_tags_show, }; -static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = { - .attr = {.name = "run", .mode = S_IRUGO }, - .show = blk_mq_hw_sysfs_run_show, -}; -static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = { - .attr = {.name = "dispatched", .mode = S_IRUGO }, - .show = blk_mq_hw_sysfs_dispatched_show, -}; -static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = { - .attr = {.name = "active", .mode = S_IRUGO }, - .show = blk_mq_hw_sysfs_active_show, -}; -static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { - .attr = {.name = "pending", .mode = S_IRUGO }, - .show = blk_mq_hw_sysfs_rq_list_show, -}; -static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { - .attr = {.name = "tags", .mode = S_IRUGO }, - .show = blk_mq_hw_sysfs_tags_show, +static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = { + .attr = {.name = "nr_reserved_tags", .mode = S_IRUGO }, + .show = blk_mq_hw_sysfs_nr_reserved_tags_show, }; static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { .attr = {.name = "cpu_list", .mode = S_IRUGO }, .show = blk_mq_hw_sysfs_cpus_show, }; -static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = { - .attr = {.name = "io_poll", .mode = S_IWUSR | S_IRUGO }, - .show = blk_mq_hw_sysfs_poll_show, - .store = blk_mq_hw_sysfs_poll_store, -}; -static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = { - .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR }, - .show = blk_mq_hw_sysfs_stat_show, - .store = blk_mq_hw_sysfs_stat_store, -}; static struct attribute *default_hw_ctx_attrs[] = { - &blk_mq_hw_sysfs_queued.attr, - &blk_mq_hw_sysfs_run.attr, - &blk_mq_hw_sysfs_dispatched.attr, - &blk_mq_hw_sysfs_pending.attr, - &blk_mq_hw_sysfs_tags.attr, + &blk_mq_hw_sysfs_nr_tags.attr, + &blk_mq_hw_sysfs_nr_reserved_tags.attr, &blk_mq_hw_sysfs_cpus.attr, - &blk_mq_hw_sysfs_active.attr, - &blk_mq_hw_sysfs_poll.attr, - &blk_mq_hw_sysfs_stat.attr, NULL, }; @@ -455,6 +254,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) kobject_put(&hctx->kobj); } + blk_mq_debugfs_unregister_hctxs(q); + kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); kobject_put(&q->mq_kobj); @@ -504,6 +305,8 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) kobject_uevent(&q->mq_kobj, KOBJ_ADD); + blk_mq_debugfs_register(q, kobject_name(&dev->kobj)); + queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) @@ -529,6 +332,8 @@ void blk_mq_sysfs_unregister(struct request_queue *q) if (!q->mq_sysfs_init_done) return; + blk_mq_debugfs_unregister_hctxs(q); + queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); } @@ -541,6 +346,8 @@ int blk_mq_sysfs_register(struct request_queue *q) if (!q->mq_sysfs_init_done) return ret; + blk_mq_debugfs_register_hctxs(q); + queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index dcf5ce3..54c8436 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -90,113 +90,97 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, return atomic_read(&hctx->nr_active) < depth; } -static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt) +static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, + struct sbitmap_queue *bt) { - if (!hctx_may_queue(hctx, bt)) + if (!(data->flags & BLK_MQ_REQ_INTERNAL) && + !hctx_may_queue(data->hctx, bt)) return -1; return __sbitmap_queue_get(bt); } -static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, - struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags) +unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) { + struct blk_mq_tags *tags = blk_mq_tags_from_data(data); + struct sbitmap_queue *bt; struct sbq_wait_state *ws; DEFINE_WAIT(wait); + unsigned int tag_offset; + bool drop_ctx; int tag; - tag = __bt_get(hctx, bt); + if (data->flags & BLK_MQ_REQ_RESERVED) { + if (unlikely(!tags->nr_reserved_tags)) { + WARN_ON_ONCE(1); + return BLK_MQ_TAG_FAIL; + } + bt = &tags->breserved_tags; + tag_offset = 0; + } else { + bt = &tags->bitmap_tags; + tag_offset = tags->nr_reserved_tags; + } + + tag = __blk_mq_get_tag(data, bt); if (tag != -1) - return tag; + goto found_tag; if (data->flags & BLK_MQ_REQ_NOWAIT) - return -1; + return BLK_MQ_TAG_FAIL; - ws = bt_wait_ptr(bt, hctx); + ws = bt_wait_ptr(bt, data->hctx); + drop_ctx = data->ctx == NULL; do { prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); - tag = __bt_get(hctx, bt); + tag = __blk_mq_get_tag(data, bt); if (tag != -1) break; /* * We're out of tags on this hardware queue, kick any * pending IO submits before going to sleep waiting for - * some to complete. Note that hctx can be NULL here for - * reserved tag allocation. + * some to complete. */ - if (hctx) - blk_mq_run_hw_queue(hctx, false); + blk_mq_run_hw_queue(data->hctx, false); /* * Retry tag allocation after running the hardware queue, * as running the queue may also have found completions. */ - tag = __bt_get(hctx, bt); + tag = __blk_mq_get_tag(data, bt); if (tag != -1) break; - blk_mq_put_ctx(data->ctx); + if (data->ctx) + blk_mq_put_ctx(data->ctx); io_schedule(); data->ctx = blk_mq_get_ctx(data->q); data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); - if (data->flags & BLK_MQ_REQ_RESERVED) { - bt = &data->hctx->tags->breserved_tags; - } else { - hctx = data->hctx; - bt = &hctx->tags->bitmap_tags; - } + tags = blk_mq_tags_from_data(data); + if (data->flags & BLK_MQ_REQ_RESERVED) + bt = &tags->breserved_tags; + else + bt = &tags->bitmap_tags; + finish_wait(&ws->wait, &wait); - ws = bt_wait_ptr(bt, hctx); + ws = bt_wait_ptr(bt, data->hctx); } while (1); - finish_wait(&ws->wait, &wait); - return tag; -} - -static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data) -{ - int tag; - - tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, - data->hctx->tags); - if (tag >= 0) - return tag + data->hctx->tags->nr_reserved_tags; - - return BLK_MQ_TAG_FAIL; -} - -static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) -{ - int tag; - - if (unlikely(!data->hctx->tags->nr_reserved_tags)) { - WARN_ON_ONCE(1); - return BLK_MQ_TAG_FAIL; - } - - tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, - data->hctx->tags); - if (tag < 0) - return BLK_MQ_TAG_FAIL; + if (drop_ctx && data->ctx) + blk_mq_put_ctx(data->ctx); - return tag; -} + finish_wait(&ws->wait, &wait); -unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) -{ - if (data->flags & BLK_MQ_REQ_RESERVED) - return __blk_mq_get_reserved_tag(data); - return __blk_mq_get_tag(data); +found_tag: + return tag + tag_offset; } -void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, - unsigned int tag) +void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, + struct blk_mq_ctx *ctx, unsigned int tag) { - struct blk_mq_tags *tags = hctx->tags; - if (tag >= tags->nr_reserved_tags) { const int real_tag = tag - tags->nr_reserved_tags; @@ -312,11 +296,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) struct blk_mq_tags *tags = set->tags[i]; for (j = 0; j < tags->nr_tags; j++) { - if (!tags->rqs[j]) + if (!tags->static_rqs[j]) continue; ret = set->ops->reinit_request(set->driver_data, - tags->rqs[j]); + tags->static_rqs[j]); if (ret) goto out; } @@ -351,11 +335,6 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, } -static unsigned int bt_unused_tags(const struct sbitmap_queue *bt) -{ - return bt->sb.depth - sbitmap_weight(&bt->sb); -} - static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, bool round_robin, int node) { @@ -411,19 +390,56 @@ void blk_mq_free_tags(struct blk_mq_tags *tags) kfree(tags); } -int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) +int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, + struct blk_mq_tags **tagsptr, unsigned int tdepth, + bool can_grow) { - tdepth -= tags->nr_reserved_tags; - if (tdepth > tags->nr_tags) + struct blk_mq_tags *tags = *tagsptr; + + if (tdepth <= tags->nr_reserved_tags) return -EINVAL; + tdepth -= tags->nr_reserved_tags; + /* - * Don't need (or can't) update reserved tags here, they remain - * static and should never need resizing. + * If we are allowed to grow beyond the original size, allocate + * a new set of tags before freeing the old one. */ - sbitmap_queue_resize(&tags->bitmap_tags, tdepth); + if (tdepth > tags->nr_tags) { + struct blk_mq_tag_set *set = hctx->queue->tag_set; + struct blk_mq_tags *new; + bool ret; + + if (!can_grow) + return -EINVAL; + + /* + * We need some sort of upper limit, set it high enough that + * no valid use cases should require more. + */ + if (tdepth > 16 * BLKDEV_MAX_RQ) + return -EINVAL; + + new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0); + if (!new) + return -ENOMEM; + ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); + if (ret) { + blk_mq_free_rq_map(new); + return -ENOMEM; + } + + blk_mq_free_rqs(set, *tagsptr, hctx->queue_num); + blk_mq_free_rq_map(*tagsptr); + *tagsptr = new; + } else { + /* + * Don't need (or can't) update reserved tags here, they + * remain static and should never need resizing. + */ + sbitmap_queue_resize(&tags->bitmap_tags, tdepth); + } - blk_mq_tag_wakeup_all(tags, false); return 0; } @@ -454,25 +470,3 @@ u32 blk_mq_unique_tag(struct request *rq) (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); } EXPORT_SYMBOL(blk_mq_unique_tag); - -ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) -{ - char *orig_page = page; - unsigned int free, res; - - if (!tags) - return 0; - - page += sprintf(page, "nr_tags=%u, reserved_tags=%u, " - "bits_per_word=%u\n", - tags->nr_tags, tags->nr_reserved_tags, - 1U << tags->bitmap_tags.sb.shift); - - free = bt_unused_tags(&tags->bitmap_tags); - res = bt_unused_tags(&tags->breserved_tags); - - page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res); - page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues)); - - return page - orig_page; -} diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index d166273..6349742 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -16,6 +16,7 @@ struct blk_mq_tags { struct sbitmap_queue breserved_tags; struct request **rqs; + struct request **static_rqs; struct list_head page_list; }; @@ -24,11 +25,12 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); -extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, - unsigned int tag); +extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, + struct blk_mq_ctx *ctx, unsigned int tag); extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); -extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); -extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); +extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, + struct blk_mq_tags **tags, + unsigned int depth, bool can_grow); extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv); diff --git a/block/blk-mq.c b/block/blk-mq.c index c3400b5..b29e7dc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -32,6 +32,7 @@ #include "blk-mq-tag.h" #include "blk-stat.h" #include "blk-wbt.h" +#include "blk-mq-sched.h" static DEFINE_MUTEX(all_q_mutex); static LIST_HEAD(all_q_list); @@ -39,9 +40,11 @@ static LIST_HEAD(all_q_list); /* * Check if any of the ctx's have pending work in this hardware queue */ -static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) +bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) { - return sbitmap_any_bit_set(&hctx->ctx_map); + return sbitmap_any_bit_set(&hctx->ctx_map) || + !list_empty_careful(&hctx->dispatch) || + blk_mq_sched_has_work(hctx); } /* @@ -167,8 +170,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) } EXPORT_SYMBOL(blk_mq_can_queue); -static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, - struct request *rq, unsigned int op) +void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, + struct request *rq, unsigned int op) { INIT_LIST_HEAD(&rq->queuelist); /* csd/requeue_work/fifo_time is initialized before use */ @@ -196,13 +199,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, rq->special = NULL; /* tag was already set */ rq->errors = 0; - - rq->cmd = rq->__cmd; - rq->extra_len = 0; - rq->sense_len = 0; - rq->resid_len = 0; - rq->sense = NULL; INIT_LIST_HEAD(&rq->timeout_list); rq->timeout = 0; @@ -213,53 +210,58 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, ctx->rq_dispatched[op_is_sync(op)]++; } +EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init); -static struct request * -__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op) +struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, + unsigned int op) { struct request *rq; unsigned int tag; tag = blk_mq_get_tag(data); if (tag != BLK_MQ_TAG_FAIL) { - rq = data->hctx->tags->rqs[tag]; + struct blk_mq_tags *tags = blk_mq_tags_from_data(data); - if (blk_mq_tag_busy(data->hctx)) { - rq->rq_flags = RQF_MQ_INFLIGHT; - atomic_inc(&data->hctx->nr_active); + rq = tags->static_rqs[tag]; + + if (data->flags & BLK_MQ_REQ_INTERNAL) { + rq->tag = -1; + rq->internal_tag = tag; + } else { + if (blk_mq_tag_busy(data->hctx)) { + rq->rq_flags = RQF_MQ_INFLIGHT; + atomic_inc(&data->hctx->nr_active); + } + rq->tag = tag; + rq->internal_tag = -1; } - rq->tag = tag; blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); return rq; } return NULL; } +EXPORT_SYMBOL_GPL(__blk_mq_alloc_request); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, unsigned int flags) { - struct blk_mq_ctx *ctx; - struct blk_mq_hw_ctx *hctx; + struct blk_mq_alloc_data alloc_data = { .flags = flags }; struct request *rq; - struct blk_mq_alloc_data alloc_data; int ret; ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); if (ret) return ERR_PTR(ret); - ctx = blk_mq_get_ctx(q); - hctx = blk_mq_map_queue(q, ctx->cpu); - blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw); - blk_mq_put_ctx(ctx); + rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); - if (!rq) { - blk_queue_exit(q); + blk_mq_put_ctx(alloc_data.ctx); + blk_queue_exit(q); + + if (!rq) return ERR_PTR(-EWOULDBLOCK); - } rq->__data_len = 0; rq->__sector = (sector_t) -1; @@ -319,10 +321,10 @@ out_queue_exit: } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); -static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, struct request *rq) +void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + struct request *rq) { - const int tag = rq->tag; + const int sched_tag = rq->internal_tag; struct request_queue *q = rq->q; if (rq->rq_flags & RQF_MQ_INFLIGHT) @@ -333,23 +335,31 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); - blk_mq_put_tag(hctx, ctx, tag); + if (rq->tag != -1) + blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); + if (sched_tag != -1) + blk_mq_sched_completed_request(hctx, rq); + blk_mq_sched_restart_queues(hctx); blk_queue_exit(q); } -void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq) +static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx, + struct request *rq) { struct blk_mq_ctx *ctx = rq->mq_ctx; ctx->rq_completed[rq_is_sync(rq)]++; - __blk_mq_free_request(hctx, ctx, rq); + __blk_mq_finish_request(hctx, ctx, rq); +} +void blk_mq_finish_request(struct request *rq) +{ + blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq); } -EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request); void blk_mq_free_request(struct request *rq) { - blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq); + blk_mq_sched_put_request(rq); } EXPORT_SYMBOL_GPL(blk_mq_free_request); @@ -467,11 +477,9 @@ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; - trace_block_rq_issue(q, rq); + blk_mq_sched_started_request(rq); - rq->resid_len = blk_rq_bytes(rq); - if (unlikely(blk_bidi_rq(rq))) - rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq); + trace_block_rq_issue(q, rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { blk_stat_set_issue_time(&rq->issue_stat); @@ -515,6 +523,7 @@ static void __blk_mq_requeue_request(struct request *rq) trace_block_rq_requeue(q, rq); wbt_requeue(q->rq_wb, &rq->issue_stat); + blk_mq_sched_requeue_request(rq); if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { if (q->dma_drain_size && blk_rq_bytes(rq)) @@ -549,13 +558,13 @@ static void blk_mq_requeue_work(struct work_struct *work) rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_insert_request(rq, true, false, false); + blk_mq_sched_insert_request(rq, true, false, false, true); } while (!list_empty(&rq_list)) { rq = list_entry(rq_list.next, struct request, queuelist); list_del_init(&rq->queuelist); - blk_mq_insert_request(rq, false, false, false); + blk_mq_sched_insert_request(rq, false, false, false, true); } blk_mq_run_hw_queues(q, false); @@ -639,7 +648,7 @@ struct blk_mq_timeout_data { void blk_mq_rq_timed_out(struct request *req, bool reserved) { - struct blk_mq_ops *ops = req->q->mq_ops; + const struct blk_mq_ops *ops = req->q->mq_ops; enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; /* @@ -754,7 +763,7 @@ static bool blk_mq_attempt_merge(struct request_queue *q, int checked = 8; list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { - int el_ret; + bool merged = false; if (!checked--) break; @@ -762,20 +771,25 @@ static bool blk_mq_attempt_merge(struct request_queue *q, if (!blk_rq_merge_ok(rq, bio)) continue; - el_ret = blk_try_merge(rq, bio); - if (el_ret == ELEVATOR_BACK_MERGE) { - if (bio_attempt_back_merge(q, rq, bio)) { - ctx->rq_merged++; - return true; - } + switch (blk_try_merge(rq, bio)) { + case ELEVATOR_BACK_MERGE: + if (blk_mq_sched_allow_merge(q, rq, bio)) + merged = bio_attempt_back_merge(q, rq, bio); break; - } else if (el_ret == ELEVATOR_FRONT_MERGE) { - if (bio_attempt_front_merge(q, rq, bio)) { - ctx->rq_merged++; - return true; - } + case ELEVATOR_FRONT_MERGE: + if (blk_mq_sched_allow_merge(q, rq, bio)) + merged = bio_attempt_front_merge(q, rq, bio); + break; + case ELEVATOR_DISCARD_MERGE: + merged = bio_attempt_discard_merge(q, rq, bio); break; + default: + continue; } + + if (merged) + ctx->rq_merged++; + return merged; } return false; @@ -803,7 +817,7 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data) * Process software queues that have been marked busy, splicing them * to the for-dispatch */ -static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) +void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) { struct flush_busy_ctx_data data = { .hctx = hctx, @@ -812,6 +826,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); } +EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs); static inline unsigned int queued_to_index(unsigned int queued) { @@ -821,6 +836,74 @@ static inline unsigned int queued_to_index(unsigned int queued) return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1); } +bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, + bool wait) +{ + struct blk_mq_alloc_data data = { + .q = rq->q, + .hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), + .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT, + }; + + if (rq->tag != -1) { +done: + if (hctx) + *hctx = data.hctx; + return true; + } + + rq->tag = blk_mq_get_tag(&data); + if (rq->tag >= 0) { + if (blk_mq_tag_busy(data.hctx)) { + rq->rq_flags |= RQF_MQ_INFLIGHT; + atomic_inc(&data.hctx->nr_active); + } + data.hctx->tags->rqs[rq->tag] = rq; + goto done; + } + + return false; +} + +static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag == -1 || rq->internal_tag == -1) + return; + + blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); + rq->tag = -1; + + if (rq->rq_flags & RQF_MQ_INFLIGHT) { + rq->rq_flags &= ~RQF_MQ_INFLIGHT; + atomic_dec(&hctx->nr_active); + } +} + +/* + * If we fail getting a driver tag because all the driver tags are already + * assigned and on the dispatch list, BUT the first entry does not have a + * tag, then we could deadlock. For that case, move entries with assigned + * driver tags to the front, leaving the set of tagged requests in the + * same order, and the untagged set in the same order. + */ +static bool reorder_tags_to_front(struct list_head *list) +{ + struct request *rq, *tmp, *first = NULL; + + list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) { + if (rq == first) + break; + if (rq->tag != -1) { + list_move(&rq->queuelist, list); + if (!first) + first = rq; + } + } + + return first != NULL; +} + bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) { struct request_queue *q = hctx->queue; @@ -843,6 +926,20 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) struct blk_mq_queue_data bd; rq = list_first_entry(list, struct request, queuelist); + if (!blk_mq_get_driver_tag(rq, &hctx, false)) { + if (!queued && reorder_tags_to_front(list)) + continue; + + /* + * We failed getting a driver tag. Mark the queue(s) + * as needing a restart. Retry getting a tag again, + * in case the needed IO completed right before we + * marked the queue as needing a restart. + */ + blk_mq_sched_mark_restart(hctx); + if (!blk_mq_get_driver_tag(rq, &hctx, false)) + break; + } list_del_init(&rq->queuelist); bd.rq = rq; @@ -855,6 +952,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) queued++; break; case BLK_MQ_RQ_QUEUE_BUSY: + blk_mq_put_driver_tag(hctx, rq); list_add(&rq->queuelist, list); __blk_mq_requeue_request(rq); break; @@ -885,7 +983,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) */ if (!list_empty(list)) { spin_lock(&hctx->lock); - list_splice(list, &hctx->dispatch); + list_splice_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); /* @@ -896,45 +994,15 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) * the requests in rq_list might get lost. * * blk_mq_run_hw_queue() already checks the STOPPED bit - **/ - blk_mq_run_hw_queue(hctx, true); - } - - return ret != BLK_MQ_RQ_QUEUE_BUSY; -} - -/* - * Run this hardware queue, pulling any software queues mapped to it in. - * Note that this function currently has various problems around ordering - * of IO. In particular, we'd like FIFO behaviour on handling existing - * items on the hctx->dispatch list. Ignore that for now. - */ -static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx) -{ - LIST_HEAD(rq_list); - - if (unlikely(blk_mq_hctx_stopped(hctx))) - return; - - hctx->run++; - - /* - * Touch any software queue that has pending entries. - */ - flush_busy_ctxs(hctx, &rq_list); - - /* - * If we have previous entries on our dispatch list, grab them - * and stuff them at the front for more fair dispatch. - */ - if (!list_empty_careful(&hctx->dispatch)) { - spin_lock(&hctx->lock); - if (!list_empty(&hctx->dispatch)) - list_splice_init(&hctx->dispatch, &rq_list); - spin_unlock(&hctx->lock); + * + * If RESTART is set, then let completion restart the queue + * instead of potentially looping here. + */ + if (!blk_mq_sched_needs_restart(hctx)) + blk_mq_run_hw_queue(hctx, true); } - blk_mq_dispatch_rq_list(hctx, &rq_list); + return queued != 0; } static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) @@ -946,11 +1014,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - blk_mq_process_rq_list(hctx); + blk_mq_sched_dispatch_requests(hctx); rcu_read_unlock(); } else { srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); - blk_mq_process_rq_list(hctx); + blk_mq_sched_dispatch_requests(hctx); srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); } } @@ -1006,8 +1074,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async) int i; queue_for_each_hw_ctx(q, hctx, i) { - if ((!blk_mq_hctx_has_pending(hctx) && - list_empty_careful(&hctx->dispatch)) || + if (!blk_mq_hctx_has_pending(hctx) || blk_mq_hctx_stopped(hctx)) continue; @@ -1116,6 +1183,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) if (unlikely(!blk_mq_hw_queue_mapped(hctx))) return; + blk_mq_stop_hw_queue(hctx); kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->delay_work, msecs_to_jiffies(msecs)); } @@ -1135,8 +1203,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, list_add_tail(&rq->queuelist, &ctx->rq_list); } -static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, - struct request *rq, bool at_head) +void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool at_head) { struct blk_mq_ctx *ctx = rq->mq_ctx; @@ -1144,32 +1212,10 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, blk_mq_hctx_mark_pending(hctx, ctx); } -void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, - bool async) -{ - struct blk_mq_ctx *ctx = rq->mq_ctx; - struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); - - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, at_head); - spin_unlock(&ctx->lock); - - if (run_queue) - blk_mq_run_hw_queue(hctx, async); -} - -static void blk_mq_insert_requests(struct request_queue *q, - struct blk_mq_ctx *ctx, - struct list_head *list, - int depth, - bool from_schedule) +void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + struct list_head *list) { - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); - - trace_block_unplug(q, depth, !from_schedule); - /* * preemption doesn't flush plug list, so it's possible ctx->cpu is * offline now @@ -1185,8 +1231,6 @@ static void blk_mq_insert_requests(struct request_queue *q, } blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); - - blk_mq_run_hw_queue(hctx, from_schedule); } static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) @@ -1222,9 +1266,10 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) BUG_ON(!rq->q); if (rq->mq_ctx != this_ctx) { if (this_ctx) { - blk_mq_insert_requests(this_q, this_ctx, - &ctx_list, depth, - from_schedule); + trace_block_unplug(this_q, depth, from_schedule); + blk_mq_sched_insert_requests(this_q, this_ctx, + &ctx_list, + from_schedule); } this_ctx = rq->mq_ctx; @@ -1241,8 +1286,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) * on 'ctx_list'. Do those. */ if (this_ctx) { - blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth, - from_schedule); + trace_block_unplug(this_q, depth, from_schedule); + blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list, + from_schedule); } } @@ -1280,46 +1326,39 @@ insert_rq: } spin_unlock(&ctx->lock); - __blk_mq_free_request(hctx, ctx, rq); + __blk_mq_finish_request(hctx, ctx, rq); return true; } } -static struct request *blk_mq_map_request(struct request_queue *q, - struct bio *bio, - struct blk_mq_alloc_data *data) +static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) { - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - struct request *rq; + if (rq->tag != -1) + return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false); - blk_queue_enter_live(q); - ctx = blk_mq_get_ctx(q); - hctx = blk_mq_map_queue(q, ctx->cpu); - - trace_block_getrq(q, bio, bio->bi_opf); - blk_mq_set_alloc_data(data, q, 0, ctx, hctx); - rq = __blk_mq_alloc_request(data, bio->bi_opf); - - data->hctx->queued++; - return rq; + return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); } static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) { - int ret; struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); struct blk_mq_queue_data bd = { .rq = rq, .list = NULL, .last = 1 }; - blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num); + struct blk_mq_hw_ctx *hctx; + blk_qc_t new_cookie; + int ret; - if (blk_mq_hctx_stopped(hctx)) + if (q->elevator) goto insert; + if (!blk_mq_get_driver_tag(rq, &hctx, false)) + goto insert; + + new_cookie = request_to_qc_t(hctx, rq); + /* * For OK queue, we are done. For error, kill it. Any other * error (busy), just add it to our list as we previously @@ -1341,7 +1380,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) } insert: - blk_mq_insert_request(rq, false, true, true); + blk_mq_sched_insert_request(rq, false, true, true, false); } /* @@ -1352,8 +1391,8 @@ insert: static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); - const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); - struct blk_mq_alloc_data data; + const int is_flush_fua = op_is_flush(bio->bi_opf); + struct blk_mq_alloc_data data = { .flags = 0 }; struct request *rq; unsigned int request_count = 0, srcu_idx; struct blk_plug *plug; @@ -1374,9 +1413,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) return BLK_QC_T_NONE; + if (blk_mq_sched_bio_merge(q, bio)) + return BLK_QC_T_NONE; + wb_acct = wbt_wait(q->rq_wb, bio, NULL); - rq = blk_mq_map_request(q, bio, &data); + trace_block_getrq(q, bio, bio->bi_opf); + + rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data); if (unlikely(!rq)) { __wbt_done(q->rq_wb, wb_acct); return BLK_QC_T_NONE; @@ -1384,9 +1428,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) wbt_track(&rq->issue_stat, wb_acct); - cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); + cookie = request_to_qc_t(data.hctx, rq); if (unlikely(is_flush_fua)) { + if (q->elevator) + goto elv_insert; blk_mq_bio_to_request(rq, bio); blk_insert_flush(rq); goto run_queue; @@ -1438,6 +1484,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) goto done; } + if (q->elevator) { +elv_insert: + blk_mq_put_ctx(data.ctx); + blk_mq_bio_to_request(rq, bio); + blk_mq_sched_insert_request(rq, false, true, + !is_sync || is_flush_fua, true); + goto done; + } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { /* * For a SYNC request, send it to the hardware immediately. For @@ -1460,10 +1514,10 @@ done: static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); - const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); + const int is_flush_fua = op_is_flush(bio->bi_opf); struct blk_plug *plug; unsigned int request_count = 0; - struct blk_mq_alloc_data data; + struct blk_mq_alloc_data data = { .flags = 0 }; struct request *rq; blk_qc_t cookie; unsigned int wb_acct; @@ -1483,9 +1537,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) } else request_count = blk_plug_queued_count(q); + if (blk_mq_sched_bio_merge(q, bio)) + return BLK_QC_T_NONE; + wb_acct = wbt_wait(q->rq_wb, bio, NULL); - rq = blk_mq_map_request(q, bio, &data); + trace_block_getrq(q, bio, bio->bi_opf); + + rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data); if (unlikely(!rq)) { __wbt_done(q->rq_wb, wb_acct); return BLK_QC_T_NONE; @@ -1493,9 +1552,11 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) wbt_track(&rq->issue_stat, wb_acct); - cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); + cookie = request_to_qc_t(data.hctx, rq); if (unlikely(is_flush_fua)) { + if (q->elevator) + goto elv_insert; blk_mq_bio_to_request(rq, bio); blk_insert_flush(rq); goto run_queue; @@ -1535,6 +1596,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) return cookie; } + if (q->elevator) { +elv_insert: + blk_mq_put_ctx(data.ctx); + blk_mq_bio_to_request(rq, bio); + blk_mq_sched_insert_request(rq, false, true, + !is_sync || is_flush_fua, true); + goto done; + } if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { /* * For a SYNC request, send it to the hardware immediately. For @@ -1547,11 +1616,12 @@ run_queue: } blk_mq_put_ctx(data.ctx); +done: return cookie; } -static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, - struct blk_mq_tags *tags, unsigned int hctx_idx) +void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx) { struct page *page; @@ -1559,11 +1629,13 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, int i; for (i = 0; i < tags->nr_tags; i++) { - if (!tags->rqs[i]) + struct request *rq = tags->static_rqs[i]; + + if (!rq) continue; - set->ops->exit_request(set->driver_data, tags->rqs[i], + set->ops->exit_request(set->driver_data, rq, hctx_idx, i); - tags->rqs[i] = NULL; + tags->static_rqs[i] = NULL; } } @@ -1577,33 +1649,32 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, kmemleak_free(page_address(page)); __free_pages(page, page->private); } +} +void blk_mq_free_rq_map(struct blk_mq_tags *tags) +{ kfree(tags->rqs); + tags->rqs = NULL; + kfree(tags->static_rqs); + tags->static_rqs = NULL; blk_mq_free_tags(tags); } -static size_t order_to_size(unsigned int order) -{ - return (size_t)PAGE_SIZE << order; -} - -static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, - unsigned int hctx_idx) +struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, + unsigned int hctx_idx, + unsigned int nr_tags, + unsigned int reserved_tags) { struct blk_mq_tags *tags; - unsigned int i, j, entries_per_page, max_order = 4; - size_t rq_size, left; - tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, + tags = blk_mq_init_tags(nr_tags, reserved_tags, set->numa_node, BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); if (!tags) return NULL; - INIT_LIST_HEAD(&tags->page_list); - - tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), + tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, set->numa_node); if (!tags->rqs) { @@ -1611,15 +1682,40 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, return NULL; } + tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + set->numa_node); + if (!tags->static_rqs) { + kfree(tags->rqs); + blk_mq_free_tags(tags); + return NULL; + } + + return tags; +} + +static size_t order_to_size(unsigned int order) +{ + return (size_t)PAGE_SIZE << order; +} + +int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx, unsigned int depth) +{ + unsigned int i, j, entries_per_page, max_order = 4; + size_t rq_size, left; + + INIT_LIST_HEAD(&tags->page_list); + /* * rq_size is the size of the request plus driver payload, rounded * to the cacheline size */ rq_size = round_up(sizeof(struct request) + set->cmd_size, cache_line_size()); - left = rq_size * set->queue_depth; + left = rq_size * depth; - for (i = 0; i < set->queue_depth; ) { + for (i = 0; i < depth; ) { int this_order = max_order; struct page *page; int to_do; @@ -1653,15 +1749,17 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, */ kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); entries_per_page = order_to_size(this_order) / rq_size; - to_do = min(entries_per_page, set->queue_depth - i); + to_do = min(entries_per_page, depth - i); left -= to_do * rq_size; for (j = 0; j < to_do; j++) { - tags->rqs[i] = p; + struct request *rq = p; + + tags->static_rqs[i] = rq; if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, - tags->rqs[i], hctx_idx, i, + rq, hctx_idx, i, set->numa_node)) { - tags->rqs[i] = NULL; + tags->static_rqs[i] = NULL; goto fail; } } @@ -1670,11 +1768,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, i++; } } - return tags; + return 0; fail: - blk_mq_free_rq_map(set, tags, hctx_idx); - return NULL; + blk_mq_free_rqs(set, tags, hctx_idx); + return -ENOMEM; } /* @@ -1866,6 +1964,35 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, } } +static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx) +{ + int ret = 0; + + set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx, + set->queue_depth, set->reserved_tags); + if (!set->tags[hctx_idx]) + return false; + + ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx, + set->queue_depth); + if (!ret) + return true; + + blk_mq_free_rq_map(set->tags[hctx_idx]); + set->tags[hctx_idx] = NULL; + return false; +} + +static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, + unsigned int hctx_idx) +{ + if (set->tags[hctx_idx]) { + blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx); + blk_mq_free_rq_map(set->tags[hctx_idx]); + set->tags[hctx_idx] = NULL; + } +} + static void blk_mq_map_swqueue(struct request_queue *q, const struct cpumask *online_mask) { @@ -1894,17 +2021,15 @@ static void blk_mq_map_swqueue(struct request_queue *q, hctx_idx = q->mq_map[i]; /* unmapped hw queue can be remapped after CPU topo changed */ - if (!set->tags[hctx_idx]) { - set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx); - + if (!set->tags[hctx_idx] && + !__blk_mq_alloc_rq_map(set, hctx_idx)) { /* * If tags initialization fail for some hctx, * that hctx won't be brought online. In this * case, remap the current ctx to hctx[0] which * is guaranteed to always have tags allocated */ - if (!set->tags[hctx_idx]) - q->mq_map[i] = 0; + q->mq_map[i] = 0; } ctx = per_cpu_ptr(q->queue_ctx, i); @@ -1927,10 +2052,9 @@ static void blk_mq_map_swqueue(struct request_queue *q, * fallback in case of a new remap fails * allocation */ - if (i && set->tags[i]) { - blk_mq_free_rq_map(set, set->tags[i], i); - set->tags[i] = NULL; - } + if (i && set->tags[i]) + blk_mq_free_map_and_requests(set, i); + hctx->tags = NULL; continue; } @@ -2023,6 +2147,8 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned int i; + blk_mq_sched_teardown(q); + /* hctx kobj stays in hctx */ queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) @@ -2097,10 +2223,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx = hctxs[j]; if (hctx) { - if (hctx->tags) { - blk_mq_free_rq_map(set, hctx->tags, j); - set->tags[j] = NULL; - } + if (hctx->tags) + blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); @@ -2181,6 +2305,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, mutex_unlock(&all_q_mutex); put_online_cpus(); + if (!(set->flags & BLK_MQ_F_NO_SCHED)) { + int ret; + + ret = blk_mq_sched_init(q); + if (ret) + return ERR_PTR(ret); + } + return q; err_hctxs: @@ -2279,10 +2411,10 @@ static int blk_mq_queue_reinit_dead(unsigned int cpu) * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list * and set bit0 in pending bitmap as ctx1->index_hw is still zero. * - * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in - * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. - * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list - * is ignored. + * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set + * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. + * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is + * ignored. */ static int blk_mq_queue_reinit_prepare(unsigned int cpu) { @@ -2296,17 +2428,15 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) { int i; - for (i = 0; i < set->nr_hw_queues; i++) { - set->tags[i] = blk_mq_init_rq_map(set, i); - if (!set->tags[i]) + for (i = 0; i < set->nr_hw_queues; i++) + if (!__blk_mq_alloc_rq_map(set, i)) goto out_unwind; - } return 0; out_unwind: while (--i >= 0) - blk_mq_free_rq_map(set, set->tags[i], i); + blk_mq_free_rq_map(set->tags[i]); return -ENOMEM; } @@ -2430,10 +2560,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) { int i; - for (i = 0; i < nr_cpu_ids; i++) { - if (set->tags[i]) - blk_mq_free_rq_map(set, set->tags[i], i); - } + for (i = 0; i < nr_cpu_ids; i++) + blk_mq_free_map_and_requests(set, i); kfree(set->mq_map); set->mq_map = NULL; @@ -2449,14 +2577,28 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) struct blk_mq_hw_ctx *hctx; int i, ret; - if (!set || nr > set->queue_depth) + if (!set) return -EINVAL; + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + ret = 0; queue_for_each_hw_ctx(q, hctx, i) { if (!hctx->tags) continue; - ret = blk_mq_tag_update_depth(hctx->tags, nr); + /* + * If we're using an MQ scheduler, just update the scheduler + * queue depth. This is similar to what the old code would do. + */ + if (!hctx->sched_tags) { + ret = blk_mq_tag_update_depth(hctx, &hctx->tags, + min(nr, set->queue_depth), + false); + } else { + ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, + nr, true); + } if (ret) break; } @@ -2464,6 +2606,9 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) if (!ret) q->nr_requests = nr; + blk_mq_unfreeze_queue(q); + blk_mq_start_stopped_hw_queues(q, true); + return ret; } @@ -2483,10 +2628,14 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); + /* + * Manually set the make_request_fn as blk_queue_make_request + * resets a lot of the queue settings. + */ if (q->nr_hw_queues > 1) - blk_queue_make_request(q, blk_mq_make_request); + q->make_request_fn = blk_mq_make_request; else - blk_queue_make_request(q, blk_sq_make_request); + q->make_request_fn = blk_sq_make_request; blk_mq_queue_reinit(q, cpu_online_mask); } @@ -2649,7 +2798,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) blk_flush_plug_list(plug, false); hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; - rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); + if (!blk_qc_t_is_internal(cookie)) + rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); + else + rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie)); return __blk_mq_poll(hctx, rq); } diff --git a/block/blk-mq.h b/block/blk-mq.h index 63e9116..24b2256 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -32,8 +32,32 @@ void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); +void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); +bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx); +bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, + bool wait); /* + * Internal helpers for allocating/freeing the request map + */ +void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx); +void blk_mq_free_rq_map(struct blk_mq_tags *tags); +struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, + unsigned int hctx_idx, + unsigned int nr_tags, + unsigned int reserved_tags); +int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx, unsigned int depth); + +/* + * Internal helpers for request insertion into sw queues + */ +void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool at_head); +void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + struct list_head *list); +/* * CPU hotplug helpers */ void blk_mq_enable_hotplug(void); @@ -57,6 +81,35 @@ extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); +/* + * debugfs helpers + */ +#ifdef CONFIG_BLK_DEBUG_FS +int blk_mq_debugfs_register(struct request_queue *q, const char *name); +void blk_mq_debugfs_unregister(struct request_queue *q); +int blk_mq_debugfs_register_hctxs(struct request_queue *q); +void blk_mq_debugfs_unregister_hctxs(struct request_queue *q); +#else +static inline int blk_mq_debugfs_register(struct request_queue *q, + const char *name) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister(struct request_queue *q) +{ +} + +static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) +{ +} +#endif + extern void blk_mq_rq_timed_out(struct request *req, bool reserved); void blk_mq_release(struct request_queue *q); @@ -103,6 +156,25 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, data->hctx = hctx; } +static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) +{ + if (data->flags & BLK_MQ_REQ_INTERNAL) + return data->hctx->sched_tags; + + return data->hctx->tags; +} + +/* + * Internal helpers for request allocation/init/free + */ +void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, + struct request *rq, unsigned int op); +void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + struct request *rq); +void blk_mq_finish_request(struct request *rq); +struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, + unsigned int op); + static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_STOPPED, &hctx->state); diff --git a/block/blk-settings.c b/block/blk-settings.c index 529e55f..1e7174f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -88,6 +88,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); void blk_set_default_limits(struct queue_limits *lim) { lim->max_segments = BLK_MAX_SEGMENTS; + lim->max_discard_segments = 1; lim->max_integrity_segments = 0; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->virt_boundary_mask = 0; @@ -128,6 +129,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) /* Inherit limits from component devices */ lim->discard_zeroes_data = 1; lim->max_segments = USHRT_MAX; + lim->max_discard_segments = 1; lim->max_hw_sectors = UINT_MAX; lim->max_segment_size = UINT_MAX; lim->max_sectors = UINT_MAX; @@ -253,7 +255,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS); limits->max_sectors = max_sectors; - q->backing_dev_info.io_pages = max_sectors >> (PAGE_SHIFT - 9); + q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); @@ -337,6 +339,22 @@ void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments EXPORT_SYMBOL(blk_queue_max_segments); /** + * blk_queue_max_discard_segments - set max segments for discard requests + * @q: the request queue for the device + * @max_segments: max number of segments + * + * Description: + * Enables a low level driver to set an upper limit on the number of + * segments in a discard request. + **/ +void blk_queue_max_discard_segments(struct request_queue *q, + unsigned short max_segments) +{ + q->limits.max_discard_segments = max_segments; +} +EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments); + +/** * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg * @q: the request queue for the device * @max_size: max size of segment in bytes @@ -553,6 +571,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->virt_boundary_mask); t->max_segments = min_not_zero(t->max_segments, b->max_segments); + t->max_discard_segments = min_not_zero(t->max_discard_segments, + b->max_discard_segments); t->max_integrity_segments = min_not_zero(t->max_integrity_segments, b->max_integrity_segments); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 1dbce05..002af83 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -89,7 +89,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) static ssize_t queue_ra_show(struct request_queue *q, char *page) { - unsigned long ra_kb = q->backing_dev_info.ra_pages << + unsigned long ra_kb = q->backing_dev_info->ra_pages << (PAGE_SHIFT - 10); return queue_var_show(ra_kb, (page)); @@ -104,7 +104,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) if (ret < 0) return ret; - q->backing_dev_info.ra_pages = ra_kb >> (PAGE_SHIFT - 10); + q->backing_dev_info->ra_pages = ra_kb >> (PAGE_SHIFT - 10); return ret; } @@ -121,6 +121,12 @@ static ssize_t queue_max_segments_show(struct request_queue *q, char *page) return queue_var_show(queue_max_segments(q), (page)); } +static ssize_t queue_max_discard_segments_show(struct request_queue *q, + char *page) +{ + return queue_var_show(queue_max_discard_segments(q), (page)); +} + static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page) { return queue_var_show(q->limits.max_integrity_segments, (page)); @@ -236,7 +242,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) spin_lock_irq(q->queue_lock); q->limits.max_sectors = max_sectors_kb << 1; - q->backing_dev_info.io_pages = max_sectors_kb >> (PAGE_SHIFT - 10); + q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10); spin_unlock_irq(q->queue_lock); return ret; @@ -545,6 +551,11 @@ static struct queue_sysfs_entry queue_max_segments_entry = { .show = queue_max_segments_show, }; +static struct queue_sysfs_entry queue_max_discard_segments_entry = { + .attr = {.name = "max_discard_segments", .mode = S_IRUGO }, + .show = queue_max_discard_segments_show, +}; + static struct queue_sysfs_entry queue_max_integrity_segments_entry = { .attr = {.name = "max_integrity_segments", .mode = S_IRUGO }, .show = queue_max_integrity_segments_show, @@ -697,6 +708,7 @@ static struct attribute *default_attrs[] = { &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_max_segments_entry.attr, + &queue_max_discard_segments_entry.attr, &queue_max_integrity_segments_entry.attr, &queue_max_segment_size_entry.attr, &queue_iosched_entry.attr, @@ -799,7 +811,7 @@ static void blk_release_queue(struct kobject *kobj) container_of(kobj, struct request_queue, kobj); wbt_exit(q); - bdi_exit(&q->backing_dev_info); + bdi_put(q->backing_dev_info); blkcg_exit_queue(q); if (q->elevator) { @@ -814,13 +826,19 @@ static void blk_release_queue(struct kobject *kobj) if (q->queue_tags) __blk_queue_free_tags(q); - if (!q->mq_ops) + if (!q->mq_ops) { + if (q->exit_rq_fn) + q->exit_rq_fn(q, q->fq->flush_rq); blk_free_flush_queue(q->fq); - else + } else { blk_mq_release(q); + } blk_trace_shutdown(q); + if (q->mq_ops) + blk_mq_debugfs_unregister(q); + if (q->bio_split) bioset_free(q->bio_split); @@ -884,32 +902,36 @@ int blk_register_queue(struct gendisk *disk) if (ret) return ret; + if (q->mq_ops) + blk_mq_register_dev(dev, q); + + /* Prevent changes through sysfs until registration is completed. */ + mutex_lock(&q->sysfs_lock); + ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) { blk_trace_remove_sysfs(dev); - return ret; + goto unlock; } kobject_uevent(&q->kobj, KOBJ_ADD); - if (q->mq_ops) - blk_mq_register_dev(dev, q); - blk_wb_init(q); - if (!q->request_fn) - return 0; - - ret = elv_register_queue(q); - if (ret) { - kobject_uevent(&q->kobj, KOBJ_REMOVE); - kobject_del(&q->kobj); - blk_trace_remove_sysfs(dev); - kobject_put(&dev->kobj); - return ret; + if (q->request_fn || (q->mq_ops && q->elevator)) { + ret = elv_register_queue(q); + if (ret) { + kobject_uevent(&q->kobj, KOBJ_REMOVE); + kobject_del(&q->kobj); + blk_trace_remove_sysfs(dev); + kobject_put(&dev->kobj); + goto unlock; + } } - - return 0; + ret = 0; +unlock: + mutex_unlock(&q->sysfs_lock); + return ret; } void blk_unregister_queue(struct gendisk *disk) @@ -922,7 +944,7 @@ void blk_unregister_queue(struct gendisk *disk) if (q->mq_ops) blk_mq_unregister_dev(disk_to_dev(disk), q); - if (q->request_fn) + if (q->request_fn || (q->mq_ops && q->elevator)) elv_unregister_queue(q); kobject_uevent(&q->kobj, KOBJ_REMOVE); diff --git a/block/blk-tag.c b/block/blk-tag.c index bae1dec..07cc329 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) list_del_init(&rq->queuelist); rq->rq_flags &= ~RQF_QUEUED; rq->tag = -1; + rq->internal_tag = -1; if (unlikely(bqt->tag_index[tag] == NULL)) printk(KERN_ERR "%s: tag %d is missing\n", diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a6bb4fe..82fd0cc 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -866,10 +866,12 @@ static void tg_update_disptime(struct throtl_grp *tg) unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; struct bio *bio; - if ((bio = throtl_peek_queued(&sq->queued[READ]))) + bio = throtl_peek_queued(&sq->queued[READ]); + if (bio) tg_may_dispatch(tg, bio, &read_wait); - if ((bio = throtl_peek_queued(&sq->queued[WRITE]))) + bio = throtl_peek_queued(&sq->queued[WRITE]); + if (bio) tg_may_dispatch(tg, bio, &write_wait); min_wait = min(read_wait, write_wait); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index f0a9c07..1aedb1f 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -96,7 +96,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) */ static bool wb_recent_wait(struct rq_wb *rwb) { - struct bdi_writeback *wb = &rwb->queue->backing_dev_info.wb; + struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb; return time_before(jiffies, wb->dirty_sleep + HZ); } @@ -279,7 +279,7 @@ enum { static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) { - struct backing_dev_info *bdi = &rwb->queue->backing_dev_info; + struct backing_dev_info *bdi = rwb->queue->backing_dev_info; u64 thislat; /* @@ -339,7 +339,7 @@ static int latency_exceeded(struct rq_wb *rwb) static void rwb_trace_step(struct rq_wb *rwb, const char *msg) { - struct backing_dev_info *bdi = &rwb->queue->backing_dev_info; + struct backing_dev_info *bdi = rwb->queue->backing_dev_info; trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec, rwb->wb_background, rwb->wb_normal, rwb->wb_max); @@ -423,7 +423,7 @@ static void wb_timer_fn(unsigned long data) status = latency_exceeded(rwb); - trace_wbt_timer(&rwb->queue->backing_dev_info, status, rwb->scale_step, + trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step, inflight); /* diff --git a/block/blk.h b/block/blk.h index 041185e..d1ea4bd9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -14,6 +14,10 @@ /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) +#ifdef CONFIG_DEBUG_FS +extern struct dentry *blk_debugfs_root; +#endif + struct blk_flush_queue { unsigned int flush_queue_delayed:1; unsigned int flush_pending_idx:1; @@ -96,6 +100,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, struct bio *bio); bool bio_attempt_back_merge(struct request_queue *q, struct request *req, struct bio *bio); +bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, + struct bio *bio); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int *request_count, struct request **same_queue_rq); @@ -167,7 +173,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) return NULL; } if (unlikely(blk_queue_bypass(q)) || - !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) + !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0)) return NULL; } } @@ -176,16 +182,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_activate_req_fn) - e->type->ops.elevator_activate_req_fn(q, rq); + if (e->type->ops.sq.elevator_activate_req_fn) + e->type->ops.sq.elevator_activate_req_fn(q, rq); } static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_deactivate_req_fn) - e->type->ops.elevator_deactivate_req_fn(q, rq); + if (e->type->ops.sq.elevator_deactivate_req_fn) + e->type->ops.sq.elevator_deactivate_req_fn(q, rq); } #ifdef CONFIG_FAIL_IO_TIMEOUT @@ -204,14 +210,14 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio); int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio); -int attempt_back_merge(struct request_queue *q, struct request *rq); -int attempt_front_merge(struct request_queue *q, struct request *rq); +struct request *attempt_back_merge(struct request_queue *q, struct request *rq); +struct request *attempt_front_merge(struct request_queue *q, struct request *rq); int blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next); void blk_recalc_rq_segments(struct request *rq); void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); -int blk_try_merge(struct request *rq, struct bio *bio); +enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); void blk_queue_congestion_threshold(struct request_queue *q); @@ -249,7 +255,14 @@ static inline int blk_do_io_stat(struct request *rq) { return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT) && - (rq->cmd_type == REQ_TYPE_FS); + !blk_rq_is_passthrough(rq); +} + +static inline void req_set_nomerge(struct request_queue *q, struct request *req) +{ + req->cmd_flags |= REQ_NOMERGE; + if (req == q->last_merge) + q->last_merge = NULL; } /* @@ -264,6 +277,22 @@ void ioc_clear_queue(struct request_queue *q); int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); /** + * rq_ioc - determine io_context for request allocation + * @bio: request being allocated is for this bio (can be %NULL) + * + * Determine io_context to use for request allocation for @bio. May return + * %NULL if %current->io_context doesn't exist. + */ +static inline struct io_context *rq_ioc(struct bio *bio) +{ +#ifdef CONFIG_BLK_CGROUP + if (bio && bio->bi_ioc) + return bio->bi_ioc; +#endif + return current->io_context; +} + +/** * create_io_context - try to create task->io_context * @gfp_mask: allocation mask * @node: allocation node diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 9d652a9..cd15f9d 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -71,22 +71,24 @@ void bsg_job_done(struct bsg_job *job, int result, { struct request *req = job->req; struct request *rsp = req->next_rq; + struct scsi_request *rq = scsi_req(req); int err; err = job->req->errors = result; if (err < 0) /* we're only returning the result field in the reply */ - job->req->sense_len = sizeof(u32); + rq->sense_len = sizeof(u32); else - job->req->sense_len = job->reply_len; + rq->sense_len = job->reply_len; /* we assume all request payload was transferred, residual == 0 */ - req->resid_len = 0; + rq->resid_len = 0; if (rsp) { - WARN_ON(reply_payload_rcv_len > rsp->resid_len); + WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len); /* set reply (bidi) residual */ - rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); + scsi_req(rsp)->resid_len -= + min(reply_payload_rcv_len, scsi_req(rsp)->resid_len); } blk_complete_request(req); } @@ -113,6 +115,7 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) if (!buf->sg_list) return -ENOMEM; sg_init_table(buf->sg_list, req->nr_phys_segments); + scsi_req(req)->resid_len = blk_rq_bytes(req); buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); buf->payload_len = blk_rq_bytes(req); return 0; @@ -127,6 +130,7 @@ static int bsg_create_job(struct device *dev, struct request *req) { struct request *rsp = req->next_rq; struct request_queue *q = req->q; + struct scsi_request *rq = scsi_req(req); struct bsg_job *job; int ret; @@ -140,9 +144,9 @@ static int bsg_create_job(struct device *dev, struct request *req) job->req = req; if (q->bsg_job_size) job->dd_data = (void *)&job[1]; - job->request = req->cmd; - job->request_len = req->cmd_len; - job->reply = req->sense; + job->request = rq->cmd; + job->request_len = rq->cmd_len; + job->reply = rq->sense; job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer * allocated */ if (req->bio) { @@ -177,7 +181,7 @@ failjob_rls_job: * * Drivers/subsys should pass this to the queue init function. */ -void bsg_request_fn(struct request_queue *q) +static void bsg_request_fn(struct request_queue *q) __releases(q->queue_lock) __acquires(q->queue_lock) { @@ -214,24 +218,30 @@ void bsg_request_fn(struct request_queue *q) put_device(dev); spin_lock_irq(q->queue_lock); } -EXPORT_SYMBOL_GPL(bsg_request_fn); /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to - * @q: request queue setup by caller * @name: device to give bsg device * @job_fn: bsg job handler * @dd_job_size: size of LLD data needed for each job - * - * The caller should have setup the reuqest queue with bsg_request_fn - * as the request_fn. */ -int bsg_setup_queue(struct device *dev, struct request_queue *q, - char *name, bsg_job_fn *job_fn, int dd_job_size) +struct request_queue *bsg_setup_queue(struct device *dev, char *name, + bsg_job_fn *job_fn, int dd_job_size) { + struct request_queue *q; int ret; + q = blk_alloc_queue(GFP_KERNEL); + if (!q) + return ERR_PTR(-ENOMEM); + q->cmd_size = sizeof(struct scsi_request); + q->request_fn = bsg_request_fn; + + ret = blk_init_allocated_queue(q); + if (ret) + goto out_cleanup_queue; + q->queuedata = dev; q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; @@ -243,9 +253,12 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q, if (ret) { printk(KERN_ERR "%s: bsg interface failed to " "initialize - register queue\n", dev->kobj.name); - return ret; + goto out_cleanup_queue; } - return 0; + return q; +out_cleanup_queue: + blk_cleanup_queue(q); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(bsg_setup_queue); diff --git a/block/bsg.c b/block/bsg.c index a57046d..a9a8b8e 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -85,7 +85,6 @@ struct bsg_command { struct bio *bidi_bio; int err; struct sg_io_v4 hdr; - char sense[SCSI_SENSE_BUFFERSIZE]; }; static void bsg_free_command(struct bsg_command *bc) @@ -140,18 +139,20 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, struct sg_io_v4 *hdr, struct bsg_device *bd, fmode_t has_write_perm) { + struct scsi_request *req = scsi_req(rq); + if (hdr->request_len > BLK_MAX_CDB) { - rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); - if (!rq->cmd) + req->cmd = kzalloc(hdr->request_len, GFP_KERNEL); + if (!req->cmd) return -ENOMEM; } - if (copy_from_user(rq->cmd, (void __user *)(unsigned long)hdr->request, + if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request, hdr->request_len)) return -EFAULT; if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { - if (blk_verify_command(rq->cmd, has_write_perm)) + if (blk_verify_command(req->cmd, has_write_perm)) return -EPERM; } else if (!capable(CAP_SYS_RAWIO)) return -EPERM; @@ -159,7 +160,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, /* * fill in request structure */ - rq->cmd_len = hdr->request_len; + req->cmd_len = hdr->request_len; rq->timeout = msecs_to_jiffies(hdr->timeout); if (!rq->timeout) @@ -176,7 +177,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, * Check if sg_io_v4 from user is allowed and valid */ static int -bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw) +bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op) { int ret = 0; @@ -197,7 +198,7 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw) ret = -EINVAL; } - *rw = hdr->dout_xfer_len ? WRITE : READ; + *op = hdr->dout_xfer_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN; return ret; } @@ -205,13 +206,12 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw) * map sg_io_v4 to a request. */ static struct request * -bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, - u8 *sense) +bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm) { struct request_queue *q = bd->queue; struct request *rq, *next_rq = NULL; - int ret, rw; - unsigned int dxfer_len; + int ret; + unsigned int op, dxfer_len; void __user *dxferp = NULL; struct bsg_class_device *bcd = &q->bsg_dev; @@ -226,36 +226,35 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp, hdr->din_xfer_len); - ret = bsg_validate_sgv4_hdr(hdr, &rw); + ret = bsg_validate_sgv4_hdr(hdr, &op); if (ret) return ERR_PTR(ret); /* * map scatter-gather elements separately and string them to request */ - rq = blk_get_request(q, rw, GFP_KERNEL); + rq = blk_get_request(q, op, GFP_KERNEL); if (IS_ERR(rq)) return rq; - blk_rq_set_block_pc(rq); + scsi_req_init(rq); ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); if (ret) goto out; - if (rw == WRITE && hdr->din_xfer_len) { + if (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len) { if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) { ret = -EOPNOTSUPP; goto out; } - next_rq = blk_get_request(q, READ, GFP_KERNEL); + next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(next_rq)) { ret = PTR_ERR(next_rq); next_rq = NULL; goto out; } rq->next_rq = next_rq; - next_rq->cmd_type = rq->cmd_type; dxferp = (void __user *)(unsigned long)hdr->din_xferp; ret = blk_rq_map_user(q, next_rq, NULL, dxferp, @@ -280,13 +279,9 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, goto out; } - rq->sense = sense; - rq->sense_len = 0; - return rq; out: - if (rq->cmd != rq->__cmd) - kfree(rq->cmd); + scsi_req_free_cmd(scsi_req(rq)); blk_put_request(rq); if (next_rq) { blk_rq_unmap_user(next_rq->bio); @@ -393,6 +388,7 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd) static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, struct bio *bio, struct bio *bidi_bio) { + struct scsi_request *req = scsi_req(rq); int ret = 0; dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors); @@ -407,12 +403,12 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, hdr->info |= SG_INFO_CHECK; hdr->response_len = 0; - if (rq->sense_len && hdr->response) { + if (req->sense_len && hdr->response) { int len = min_t(unsigned int, hdr->max_response_len, - rq->sense_len); + req->sense_len); ret = copy_to_user((void __user *)(unsigned long)hdr->response, - rq->sense, len); + req->sense, len); if (!ret) hdr->response_len = len; else @@ -420,14 +416,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, } if (rq->next_rq) { - hdr->dout_resid = rq->resid_len; - hdr->din_resid = rq->next_rq->resid_len; + hdr->dout_resid = req->resid_len; + hdr->din_resid = scsi_req(rq->next_rq)->resid_len; blk_rq_unmap_user(bidi_bio); blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) - hdr->din_resid = rq->resid_len; + hdr->din_resid = req->resid_len; else - hdr->dout_resid = rq->resid_len; + hdr->dout_resid = req->resid_len; /* * If the request generated a negative error number, return it @@ -439,8 +435,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, ret = rq->errors; blk_rq_unmap_user(bio); - if (rq->cmd != rq->__cmd) - kfree(rq->cmd); + scsi_req_free_cmd(req); blk_put_request(rq); return ret; @@ -625,7 +620,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf, /* * get a request, fill in the blanks, and add to request queue */ - rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm, bc->sense); + rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm); if (IS_ERR(rq)) { ret = PTR_ERR(rq); rq = NULL; @@ -911,12 +906,11 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct bio *bio, *bidi_bio = NULL; struct sg_io_v4 hdr; int at_head; - u8 sense[SCSI_SENSE_BUFFERSIZE]; if (copy_from_user(&hdr, uarg, sizeof(hdr))) return -EFAULT; - rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE, sense); + rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 838f07e..1379447 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2528,7 +2528,7 @@ static void cfq_remove_request(struct request *rq) } } -static int cfq_merge(struct request_queue *q, struct request **req, +static enum elv_merge cfq_merge(struct request_queue *q, struct request **req, struct bio *bio) { struct cfq_data *cfqd = q->elevator->elevator_data; @@ -2544,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req, } static void cfq_merged_request(struct request_queue *q, struct request *req, - int type) + enum elv_merge type) { if (type == ELEVATOR_FRONT_MERGE) { struct cfq_queue *cfqq = RQ_CFQQ(req); @@ -2749,9 +2749,11 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) if (!cfqg) return NULL; - for_each_cfqg_st(cfqg, i, j, st) - if ((cfqq = cfq_rb_first(st)) != NULL) + for_each_cfqg_st(cfqg, i, j, st) { + cfqq = cfq_rb_first(st); + if (cfqq) return cfqq; + } return NULL; } @@ -3860,6 +3862,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, goto out; } + /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */ + cfqq->ioprio_class = IOPRIO_CLASS_NONE; cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); cfq_init_prio_data(cfqq, cic); cfq_link_cfqq_cfqg(cfqq, cfqg); @@ -4838,7 +4842,7 @@ static struct elv_fs_entry cfq_attrs[] = { }; static struct elevator_type iosched_cfq = { - .ops = { + .ops.sq = { .elevator_merge_fn = cfq_merge, .elevator_merged_fn = cfq_merged_request, .elevator_merge_req_fn = cfq_merged_requests, diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 556826a..570021a 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -661,7 +661,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct block_device *bdev = inode->i_bdev; struct gendisk *disk = bdev->bd_disk; fmode_t mode = file->f_mode; - struct backing_dev_info *bdi; loff_t size; unsigned int max_sectors; @@ -708,9 +707,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKFRAGET: if (!arg) return -EINVAL; - bdi = blk_get_backing_dev_info(bdev); return compat_put_long(arg, - (bdi->ra_pages * PAGE_SIZE) / 512); + (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512); case BLKROGET: /* compatible */ return compat_put_int(arg, bdev_read_only(bdev) != 0); case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ @@ -728,8 +726,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKFRASET: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - bdi = blk_get_backing_dev_info(bdev); - bdi->ra_pages = (arg * 512) / PAGE_SIZE; + bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE; return 0; case BLKGETSIZE: size = i_size_read(bdev->bd_inode); diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 55e0bb6..c68f6bb 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -120,12 +120,11 @@ static void deadline_remove_request(struct request_queue *q, struct request *rq) deadline_del_rq_rb(dd, rq); } -static int +static enum elv_merge deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) { struct deadline_data *dd = q->elevator->elevator_data; struct request *__rq; - int ret; /* * check for front merge @@ -138,20 +137,17 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) BUG_ON(sector != blk_rq_pos(__rq)); if (elv_bio_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; + *req = __rq; + return ELEVATOR_FRONT_MERGE; } } } return ELEVATOR_NO_MERGE; -out: - *req = __rq; - return ret; } static void deadline_merged_request(struct request_queue *q, - struct request *req, int type) + struct request *req, enum elv_merge type) { struct deadline_data *dd = q->elevator->elevator_data; @@ -439,7 +435,7 @@ static struct elv_fs_entry deadline_attrs[] = { }; static struct elevator_type iosched_deadline = { - .ops = { + .ops.sq = { .elevator_merge_fn = deadline_merge, .elevator_merged_fn = deadline_merged_request, .elevator_merge_req_fn = deadline_merged_requests, diff --git a/block/elevator.c b/block/elevator.c index 40f0c04..699d10f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -40,6 +40,7 @@ #include <trace/events/block.h> #include "blk.h" +#include "blk-mq-sched.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); @@ -58,8 +59,10 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_allow_bio_merge_fn) - return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio); + if (e->uses_mq && e->type->ops.mq.allow_merge) + return e->type->ops.mq.allow_merge(q, rq, bio); + else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn) + return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio); return 1; } @@ -163,6 +166,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, kobject_init(&eq->kobj, &elv_ktype); mutex_init(&eq->sysfs_lock); hash_init(eq->hash); + eq->uses_mq = e->uses_mq; return eq; } @@ -203,11 +207,12 @@ int elevator_init(struct request_queue *q, char *name) } /* - * Use the default elevator specified by config boot param or - * config option. Don't try to load modules as we could be running - * off async and request_module() isn't allowed from async. + * Use the default elevator specified by config boot param for + * non-mq devices, or by config option. Don't try to load modules + * as we could be running off async and request_module() isn't + * allowed from async. */ - if (!e && *chosen_elevator) { + if (!e && !q->mq_ops && *chosen_elevator) { e = elevator_get(chosen_elevator, false); if (!e) printk(KERN_ERR "I/O scheduler %s not found\n", @@ -215,18 +220,32 @@ int elevator_init(struct request_queue *q, char *name) } if (!e) { - e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); + if (q->mq_ops && q->nr_hw_queues == 1) + e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false); + else if (q->mq_ops) + e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false); + else + e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); + if (!e) { printk(KERN_ERR "Default I/O scheduler not found. " \ - "Using noop.\n"); + "Using noop/none.\n"); e = elevator_get("noop", false); } } - err = e->ops.elevator_init_fn(q, e); - if (err) + if (e->uses_mq) { + err = blk_mq_sched_setup(q); + if (!err) + err = e->ops.mq.init_sched(q, e); + } else + err = e->ops.sq.elevator_init_fn(q, e); + if (err) { + if (e->uses_mq) + blk_mq_sched_teardown(q); elevator_put(e); + } return err; } EXPORT_SYMBOL(elevator_init); @@ -234,8 +253,10 @@ EXPORT_SYMBOL(elevator_init); void elevator_exit(struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); - if (e->type->ops.elevator_exit_fn) - e->type->ops.elevator_exit_fn(e); + if (e->uses_mq && e->type->ops.mq.exit_sched) + e->type->ops.mq.exit_sched(e); + else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) + e->type->ops.sq.elevator_exit_fn(e); mutex_unlock(&e->sysfs_lock); kobject_put(&e->kobj); @@ -253,6 +274,7 @@ void elv_rqhash_del(struct request_queue *q, struct request *rq) if (ELV_ON_HASH(rq)) __elv_rqhash_del(rq); } +EXPORT_SYMBOL_GPL(elv_rqhash_del); void elv_rqhash_add(struct request_queue *q, struct request *rq) { @@ -262,6 +284,7 @@ void elv_rqhash_add(struct request_queue *q, struct request *rq) hash_add(e->hash, &rq->hash, rq_hash_key(rq)); rq->rq_flags |= RQF_HASHED; } +EXPORT_SYMBOL_GPL(elv_rqhash_add); void elv_rqhash_reposition(struct request_queue *q, struct request *rq) { @@ -405,11 +428,11 @@ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL(elv_dispatch_add_tail); -int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) +enum elv_merge elv_merge(struct request_queue *q, struct request **req, + struct bio *bio) { struct elevator_queue *e = q->elevator; struct request *__rq; - int ret; /* * Levels of merges: @@ -424,7 +447,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) * First try one-hit cache. */ if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { - ret = blk_try_merge(q->last_merge, bio); + enum elv_merge ret = blk_try_merge(q->last_merge, bio); + if (ret != ELEVATOR_NO_MERGE) { *req = q->last_merge; return ret; @@ -443,8 +467,10 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) return ELEVATOR_BACK_MERGE; } - if (e->type->ops.elevator_merge_fn) - return e->type->ops.elevator_merge_fn(q, req, bio); + if (e->uses_mq && e->type->ops.mq.request_merge) + return e->type->ops.mq.request_merge(q, req, bio); + else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn) + return e->type->ops.sq.elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } @@ -456,8 +482,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) * * Returns true if we merged, false otherwise */ -static bool elv_attempt_insert_merge(struct request_queue *q, - struct request *rq) +bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) { struct request *__rq; bool ret; @@ -491,12 +516,15 @@ static bool elv_attempt_insert_merge(struct request_queue *q, return ret; } -void elv_merged_request(struct request_queue *q, struct request *rq, int type) +void elv_merged_request(struct request_queue *q, struct request *rq, + enum elv_merge type) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_merged_fn) - e->type->ops.elevator_merged_fn(q, rq, type); + if (e->uses_mq && e->type->ops.mq.request_merged) + e->type->ops.mq.request_merged(q, rq, type); + else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn) + e->type->ops.sq.elevator_merged_fn(q, rq, type); if (type == ELEVATOR_BACK_MERGE) elv_rqhash_reposition(q, rq); @@ -508,10 +536,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, struct request *next) { struct elevator_queue *e = q->elevator; - const int next_sorted = next->rq_flags & RQF_SORTED; - - if (next_sorted && e->type->ops.elevator_merge_req_fn) - e->type->ops.elevator_merge_req_fn(q, rq, next); + bool next_sorted = false; + + if (e->uses_mq && e->type->ops.mq.requests_merged) + e->type->ops.mq.requests_merged(q, rq, next); + else if (e->type->ops.sq.elevator_merge_req_fn) { + next_sorted = (__force bool)(next->rq_flags & RQF_SORTED); + if (next_sorted) + e->type->ops.sq.elevator_merge_req_fn(q, rq, next); + } elv_rqhash_reposition(q, rq); @@ -528,8 +561,11 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_bio_merged_fn) - e->type->ops.elevator_bio_merged_fn(q, rq, bio); + if (WARN_ON_ONCE(e->uses_mq)) + return; + + if (e->type->ops.sq.elevator_bio_merged_fn) + e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); } #ifdef CONFIG_PM @@ -574,11 +610,15 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) void elv_drain_elevator(struct request_queue *q) { + struct elevator_queue *e = q->elevator; static int printed; + if (WARN_ON_ONCE(e->uses_mq)) + return; + lockdep_assert_held(q->queue_lock); - while (q->elevator->type->ops.elevator_dispatch_fn(q, 1)) + while (e->type->ops.sq.elevator_dispatch_fn(q, 1)) ; if (q->nr_sorted && printed++ < 10) { printk(KERN_ERR "%s: forced dispatching is broken " @@ -597,7 +637,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (rq->rq_flags & RQF_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ - if (rq->cmd_type == REQ_TYPE_FS) { + if (!blk_rq_is_passthrough(rq)) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } @@ -639,7 +679,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (elv_attempt_insert_merge(q, rq)) break; case ELEVATOR_INSERT_SORT: - BUG_ON(rq->cmd_type != REQ_TYPE_FS); + BUG_ON(blk_rq_is_passthrough(rq)); rq->rq_flags |= RQF_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { @@ -653,7 +693,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) * rq cannot be accessed after calling * elevator_add_req_fn. */ - q->elevator->type->ops.elevator_add_req_fn(q, rq); + q->elevator->type->ops.sq.elevator_add_req_fn(q, rq); break; case ELEVATOR_INSERT_FLUSH: @@ -682,8 +722,11 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_latter_req_fn) - return e->type->ops.elevator_latter_req_fn(q, rq); + if (e->uses_mq && e->type->ops.mq.next_request) + return e->type->ops.mq.next_request(q, rq); + else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn) + return e->type->ops.sq.elevator_latter_req_fn(q, rq); + return NULL; } @@ -691,8 +734,10 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_former_req_fn) - return e->type->ops.elevator_former_req_fn(q, rq); + if (e->uses_mq && e->type->ops.mq.former_request) + return e->type->ops.mq.former_request(q, rq); + if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn) + return e->type->ops.sq.elevator_former_req_fn(q, rq); return NULL; } @@ -701,8 +746,11 @@ int elv_set_request(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_set_req_fn) - return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask); + if (WARN_ON_ONCE(e->uses_mq)) + return 0; + + if (e->type->ops.sq.elevator_set_req_fn) + return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask); return 0; } @@ -710,16 +758,22 @@ void elv_put_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_put_req_fn) - e->type->ops.elevator_put_req_fn(rq); + if (WARN_ON_ONCE(e->uses_mq)) + return; + + if (e->type->ops.sq.elevator_put_req_fn) + e->type->ops.sq.elevator_put_req_fn(rq); } int elv_may_queue(struct request_queue *q, unsigned int op) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_may_queue_fn) - return e->type->ops.elevator_may_queue_fn(q, op); + if (WARN_ON_ONCE(e->uses_mq)) + return 0; + + if (e->type->ops.sq.elevator_may_queue_fn) + return e->type->ops.sq.elevator_may_queue_fn(q, op); return ELV_MQUEUE_MAY; } @@ -728,14 +782,17 @@ void elv_completed_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; + if (WARN_ON_ONCE(e->uses_mq)) + return; + /* * request is released from the driver, io must be done */ if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]--; if ((rq->rq_flags & RQF_SORTED) && - e->type->ops.elevator_completed_req_fn) - e->type->ops.elevator_completed_req_fn(q, rq); + e->type->ops.sq.elevator_completed_req_fn) + e->type->ops.sq.elevator_completed_req_fn(q, rq); } } @@ -803,8 +860,8 @@ int elv_register_queue(struct request_queue *q) } kobject_uevent(&e->kobj, KOBJ_ADD); e->registered = 1; - if (e->type->ops.elevator_registered_fn) - e->type->ops.elevator_registered_fn(q); + if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn) + e->type->ops.sq.elevator_registered_fn(q); } return error; } @@ -891,9 +948,14 @@ EXPORT_SYMBOL_GPL(elv_unregister); static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { struct elevator_queue *old = q->elevator; - bool registered = old->registered; + bool old_registered = false; int err; + if (q->mq_ops) { + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + } + /* * Turn on BYPASS and drain all requests w/ elevator private data. * Block layer doesn't call into a quiesced elevator - all requests @@ -901,42 +963,76 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * using INSERT_BACK. All requests have SOFTBARRIER set and no * merge happens either. */ - blk_queue_bypass_start(q); + if (old) { + old_registered = old->registered; + + if (old->uses_mq) + blk_mq_sched_teardown(q); + + if (!q->mq_ops) + blk_queue_bypass_start(q); - /* unregister and clear all auxiliary data of the old elevator */ - if (registered) - elv_unregister_queue(q); + /* unregister and clear all auxiliary data of the old elevator */ + if (old_registered) + elv_unregister_queue(q); - spin_lock_irq(q->queue_lock); - ioc_clear_queue(q); - spin_unlock_irq(q->queue_lock); + spin_lock_irq(q->queue_lock); + ioc_clear_queue(q); + spin_unlock_irq(q->queue_lock); + } /* allocate, init and register new elevator */ - err = new_e->ops.elevator_init_fn(q, new_e); - if (err) - goto fail_init; + if (new_e) { + if (new_e->uses_mq) { + err = blk_mq_sched_setup(q); + if (!err) + err = new_e->ops.mq.init_sched(q, new_e); + } else + err = new_e->ops.sq.elevator_init_fn(q, new_e); + if (err) + goto fail_init; - if (registered) { err = elv_register_queue(q); if (err) goto fail_register; - } + } else + q->elevator = NULL; /* done, kill the old one and finish */ - elevator_exit(old); - blk_queue_bypass_end(q); + if (old) { + elevator_exit(old); + if (!q->mq_ops) + blk_queue_bypass_end(q); + } - blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); + if (q->mq_ops) { + blk_mq_unfreeze_queue(q); + blk_mq_start_stopped_hw_queues(q, true); + } + + if (new_e) + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); + else + blk_add_trace_msg(q, "elv switch: none"); return 0; fail_register: + if (q->mq_ops) + blk_mq_sched_teardown(q); elevator_exit(q->elevator); fail_init: /* switch failed, restore and re-register old elevator */ - q->elevator = old; - elv_register_queue(q); - blk_queue_bypass_end(q); + if (old) { + q->elevator = old; + elv_register_queue(q); + if (!q->mq_ops) + blk_queue_bypass_end(q); + } + if (q->mq_ops) { + blk_mq_unfreeze_queue(q); + blk_mq_start_stopped_hw_queues(q, true); + } return err; } @@ -949,8 +1045,11 @@ static int __elevator_change(struct request_queue *q, const char *name) char elevator_name[ELV_NAME_MAX]; struct elevator_type *e; - if (!q->elevator) - return -ENXIO; + /* + * Special case for mq, turn off scheduling + */ + if (q->mq_ops && !strncmp(name, "none", 4)) + return elevator_switch(q, NULL); strlcpy(elevator_name, name, sizeof(elevator_name)); e = elevator_get(strstrip(elevator_name), true); @@ -959,11 +1058,21 @@ static int __elevator_change(struct request_queue *q, const char *name) return -EINVAL; } - if (!strcmp(elevator_name, q->elevator->type->elevator_name)) { + if (q->elevator && + !strcmp(elevator_name, q->elevator->type->elevator_name)) { elevator_put(e); return 0; } + if (!e->uses_mq && q->mq_ops) { + elevator_put(e); + return -EINVAL; + } + if (e->uses_mq && !q->mq_ops) { + elevator_put(e); + return -EINVAL; + } + return elevator_switch(q, e); } @@ -985,7 +1094,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, { int ret; - if (!q->elevator) + if (!(q->mq_ops || q->request_fn)) return count; ret = __elevator_change(q, name); @@ -999,24 +1108,34 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, ssize_t elv_iosched_show(struct request_queue *q, char *name) { struct elevator_queue *e = q->elevator; - struct elevator_type *elv; + struct elevator_type *elv = NULL; struct elevator_type *__e; int len = 0; - if (!q->elevator || !blk_queue_stackable(q)) + if (!blk_queue_stackable(q)) return sprintf(name, "none\n"); - elv = e->type; + if (!q->elevator) + len += sprintf(name+len, "[none] "); + else + elv = e->type; spin_lock(&elv_list_lock); list_for_each_entry(__e, &elv_list, list) { - if (!strcmp(elv->elevator_name, __e->elevator_name)) + if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) { len += sprintf(name+len, "[%s] ", elv->elevator_name); - else + continue; + } + if (__e->uses_mq && q->mq_ops) + len += sprintf(name+len, "%s ", __e->elevator_name); + else if (!__e->uses_mq && !q->mq_ops) len += sprintf(name+len, "%s ", __e->elevator_name); } spin_unlock(&elv_list_lock); + if (q->mq_ops && q->elevator) + len += sprintf(name+len, "none"); + len += sprintf(len+name, "\n"); return len; } diff --git a/block/genhd.c b/block/genhd.c index fcd6d4f..3631cd4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -572,6 +572,20 @@ exit: disk_part_iter_exit(&piter); } +void put_disk_devt(struct disk_devt *disk_devt) +{ + if (disk_devt && atomic_dec_and_test(&disk_devt->count)) + disk_devt->release(disk_devt); +} +EXPORT_SYMBOL(put_disk_devt); + +void get_disk_devt(struct disk_devt *disk_devt) +{ + if (disk_devt) + atomic_inc(&disk_devt->count); +} +EXPORT_SYMBOL(get_disk_devt); + /** * device_add_disk - add partitioning information to kernel list * @parent: parent device for the disk @@ -612,8 +626,15 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk_alloc_events(disk); + /* + * Take a reference on the devt and assign it to queue since it + * must not be reallocated while the bdi is registered + */ + disk->queue->disk_devt = disk->disk_devt; + get_disk_devt(disk->disk_devt); + /* Register BDI before referencing it from bdev */ - bdi = &disk->queue->backing_dev_info; + bdi = disk->queue->backing_dev_info; bdi_register_owner(bdi, disk_to_dev(disk)); blk_register_region(disk_devt(disk), disk->minors, NULL, @@ -648,6 +669,8 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { + bdev_unhash_inode(MKDEV(disk->major, + disk->first_minor + part->partno)); invalidate_partition(disk, part->partno); delete_partition(disk, part->partno); } diff --git a/block/ioctl.c b/block/ioctl.c index be7f4de..7b88820 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -505,7 +505,6 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode, int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { - struct backing_dev_info *bdi; void __user *argp = (void __user *)arg; loff_t size; unsigned int max_sectors; @@ -532,8 +531,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKFRAGET: if (!arg) return -EINVAL; - bdi = blk_get_backing_dev_info(bdev); - return put_long(arg, (bdi->ra_pages * PAGE_SIZE) / 512); + return put_long(arg, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512); case BLKROGET: return put_int(arg, bdev_read_only(bdev) != 0); case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ @@ -560,8 +558,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - bdi = blk_get_backing_dev_info(bdev); - bdi->ra_pages = (arg * 512) / PAGE_SIZE; + bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE; return 0; case BLKBSZSET: return blkdev_bszset(bdev, mode, argp); diff --git a/block/mq-deadline.c b/block/mq-deadline.c new file mode 100644 index 0000000..23612163 --- /dev/null +++ b/block/mq-deadline.c @@ -0,0 +1,556 @@ +/* + * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, + * for the blk-mq scheduling framework + * + * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk> + */ +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/blkdev.h> +#include <linux/blk-mq.h> +#include <linux/elevator.h> +#include <linux/bio.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/compiler.h> +#include <linux/rbtree.h> +#include <linux/sbitmap.h> + +#include "blk.h" +#include "blk-mq.h" +#include "blk-mq-tag.h" +#include "blk-mq-sched.h" + +/* + * See Documentation/block/deadline-iosched.txt + */ +static const int read_expire = HZ / 2; /* max time before a read is submitted. */ +static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ +static const int writes_starved = 2; /* max times reads can starve a write */ +static const int fifo_batch = 16; /* # of sequential requests treated as one + by the above parameters. For throughput. */ + +struct deadline_data { + /* + * run time data + */ + + /* + * requests (deadline_rq s) are present on both sort_list and fifo_list + */ + struct rb_root sort_list[2]; + struct list_head fifo_list[2]; + + /* + * next in sort order. read, write or both are NULL + */ + struct request *next_rq[2]; + unsigned int batching; /* number of sequential requests made */ + unsigned int starved; /* times reads have starved writes */ + + /* + * settings that change how the i/o scheduler behaves + */ + int fifo_expire[2]; + int fifo_batch; + int writes_starved; + int front_merges; + + spinlock_t lock; + struct list_head dispatch; +}; + +static inline struct rb_root * +deadline_rb_root(struct deadline_data *dd, struct request *rq) +{ + return &dd->sort_list[rq_data_dir(rq)]; +} + +/* + * get the request after `rq' in sector-sorted order + */ +static inline struct request * +deadline_latter_request(struct request *rq) +{ + struct rb_node *node = rb_next(&rq->rb_node); + + if (node) + return rb_entry_rq(node); + + return NULL; +} + +static void +deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) +{ + struct rb_root *root = deadline_rb_root(dd, rq); + + elv_rb_add(root, rq); +} + +static inline void +deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) +{ + const int data_dir = rq_data_dir(rq); + + if (dd->next_rq[data_dir] == rq) + dd->next_rq[data_dir] = deadline_latter_request(rq); + + elv_rb_del(deadline_rb_root(dd, rq), rq); +} + +/* + * remove rq from rbtree and fifo. + */ +static void deadline_remove_request(struct request_queue *q, struct request *rq) +{ + struct deadline_data *dd = q->elevator->elevator_data; + + list_del_init(&rq->queuelist); + + /* + * We might not be on the rbtree, if we are doing an insert merge + */ + if (!RB_EMPTY_NODE(&rq->rb_node)) + deadline_del_rq_rb(dd, rq); + + elv_rqhash_del(q, rq); + if (q->last_merge == rq) + q->last_merge = NULL; +} + +static void dd_request_merged(struct request_queue *q, struct request *req, + enum elv_merge type) +{ + struct deadline_data *dd = q->elevator->elevator_data; + + /* + * if the merge was a front merge, we need to reposition request + */ + if (type == ELEVATOR_FRONT_MERGE) { + elv_rb_del(deadline_rb_root(dd, req), req); + deadline_add_rq_rb(dd, req); + } +} + +static void dd_merged_requests(struct request_queue *q, struct request *req, + struct request *next) +{ + /* + * if next expires before rq, assign its expire time to rq + * and move into next position (next will be deleted) in fifo + */ + if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { + if (time_before((unsigned long)next->fifo_time, + (unsigned long)req->fifo_time)) { + list_move(&req->queuelist, &next->queuelist); + req->fifo_time = next->fifo_time; + } + } + + /* + * kill knowledge of next, this one is a goner + */ + deadline_remove_request(q, next); +} + +/* + * move an entry to dispatch queue + */ +static void +deadline_move_request(struct deadline_data *dd, struct request *rq) +{ + const int data_dir = rq_data_dir(rq); + + dd->next_rq[READ] = NULL; + dd->next_rq[WRITE] = NULL; + dd->next_rq[data_dir] = deadline_latter_request(rq); + + /* + * take it off the sort and fifo list + */ + deadline_remove_request(rq->q, rq); +} + +/* + * deadline_check_fifo returns 0 if there are no expired requests on the fifo, + * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) + */ +static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) +{ + struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next); + + /* + * rq is expired! + */ + if (time_after_eq(jiffies, (unsigned long)rq->fifo_time)) + return 1; + + return 0; +} + +/* + * deadline_dispatch_requests selects the best request according to + * read/write expire, fifo_batch, etc + */ +static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx) +{ + struct deadline_data *dd = hctx->queue->elevator->elevator_data; + struct request *rq; + bool reads, writes; + int data_dir; + + if (!list_empty(&dd->dispatch)) { + rq = list_first_entry(&dd->dispatch, struct request, queuelist); + list_del_init(&rq->queuelist); + goto done; + } + + reads = !list_empty(&dd->fifo_list[READ]); + writes = !list_empty(&dd->fifo_list[WRITE]); + + /* + * batches are currently reads XOR writes + */ + if (dd->next_rq[WRITE]) + rq = dd->next_rq[WRITE]; + else + rq = dd->next_rq[READ]; + + if (rq && dd->batching < dd->fifo_batch) + /* we have a next request are still entitled to batch */ + goto dispatch_request; + + /* + * at this point we are not running a batch. select the appropriate + * data direction (read / write) + */ + + if (reads) { + BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ])); + + if (writes && (dd->starved++ >= dd->writes_starved)) + goto dispatch_writes; + + data_dir = READ; + + goto dispatch_find_request; + } + + /* + * there are either no reads or writes have been starved + */ + + if (writes) { +dispatch_writes: + BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE])); + + dd->starved = 0; + + data_dir = WRITE; + + goto dispatch_find_request; + } + + return NULL; + +dispatch_find_request: + /* + * we are not running a batch, find best request for selected data_dir + */ + if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) { + /* + * A deadline has expired, the last request was in the other + * direction, or we have run out of higher-sectored requests. + * Start again from the request with the earliest expiry time. + */ + rq = rq_entry_fifo(dd->fifo_list[data_dir].next); + } else { + /* + * The last req was the same dir and we have a next request in + * sort order. No expired requests so continue on from here. + */ + rq = dd->next_rq[data_dir]; + } + + dd->batching = 0; + +dispatch_request: + /* + * rq is the selected appropriate request. + */ + dd->batching++; + deadline_move_request(dd, rq); +done: + rq->rq_flags |= RQF_STARTED; + return rq; +} + +static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) +{ + struct deadline_data *dd = hctx->queue->elevator->elevator_data; + struct request *rq; + + spin_lock(&dd->lock); + rq = __dd_dispatch_request(hctx); + spin_unlock(&dd->lock); + + return rq; +} + +static void dd_exit_queue(struct elevator_queue *e) +{ + struct deadline_data *dd = e->elevator_data; + + BUG_ON(!list_empty(&dd->fifo_list[READ])); + BUG_ON(!list_empty(&dd->fifo_list[WRITE])); + + kfree(dd); +} + +/* + * initialize elevator private data (deadline_data). + */ +static int dd_init_queue(struct request_queue *q, struct elevator_type *e) +{ + struct deadline_data *dd; + struct elevator_queue *eq; + + eq = elevator_alloc(q, e); + if (!eq) + return -ENOMEM; + + dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); + if (!dd) { + kobject_put(&eq->kobj); + return -ENOMEM; + } + eq->elevator_data = dd; + + INIT_LIST_HEAD(&dd->fifo_list[READ]); + INIT_LIST_HEAD(&dd->fifo_list[WRITE]); + dd->sort_list[READ] = RB_ROOT; + dd->sort_list[WRITE] = RB_ROOT; + dd->fifo_expire[READ] = read_expire; + dd->fifo_expire[WRITE] = write_expire; + dd->writes_starved = writes_starved; + dd->front_merges = 1; + dd->fifo_batch = fifo_batch; + spin_lock_init(&dd->lock); + INIT_LIST_HEAD(&dd->dispatch); + + q->elevator = eq; + return 0; +} + +static int dd_request_merge(struct request_queue *q, struct request **rq, + struct bio *bio) +{ + struct deadline_data *dd = q->elevator->elevator_data; + sector_t sector = bio_end_sector(bio); + struct request *__rq; + + if (!dd->front_merges) + return ELEVATOR_NO_MERGE; + + __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); + if (__rq) { + BUG_ON(sector != blk_rq_pos(__rq)); + + if (elv_bio_merge_ok(__rq, bio)) { + *rq = __rq; + return ELEVATOR_FRONT_MERGE; + } + } + + return ELEVATOR_NO_MERGE; +} + +static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) +{ + struct request_queue *q = hctx->queue; + struct deadline_data *dd = q->elevator->elevator_data; + struct request *free = NULL; + bool ret; + + spin_lock(&dd->lock); + ret = blk_mq_sched_try_merge(q, bio, &free); + spin_unlock(&dd->lock); + + if (free) + blk_mq_free_request(free); + + return ret; +} + +/* + * add rq to rbtree and fifo + */ +static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool at_head) +{ + struct request_queue *q = hctx->queue; + struct deadline_data *dd = q->elevator->elevator_data; + const int data_dir = rq_data_dir(rq); + + if (blk_mq_sched_try_insert_merge(q, rq)) + return; + + blk_mq_sched_request_inserted(rq); + + if (at_head || blk_rq_is_passthrough(rq)) { + if (at_head) + list_add(&rq->queuelist, &dd->dispatch); + else + list_add_tail(&rq->queuelist, &dd->dispatch); + } else { + deadline_add_rq_rb(dd, rq); + + if (rq_mergeable(rq)) { + elv_rqhash_add(q, rq); + if (!q->last_merge) + q->last_merge = rq; + } + + /* + * set expire time and add to fifo list + */ + rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; + list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); + } +} + +static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, + struct list_head *list, bool at_head) +{ + struct request_queue *q = hctx->queue; + struct deadline_data *dd = q->elevator->elevator_data; + + spin_lock(&dd->lock); + while (!list_empty(list)) { + struct request *rq; + + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + dd_insert_request(hctx, rq, at_head); + } + spin_unlock(&dd->lock); +} + +static bool dd_has_work(struct blk_mq_hw_ctx *hctx) +{ + struct deadline_data *dd = hctx->queue->elevator->elevator_data; + + return !list_empty_careful(&dd->dispatch) || + !list_empty_careful(&dd->fifo_list[0]) || + !list_empty_careful(&dd->fifo_list[1]); +} + +/* + * sysfs parts below + */ +static ssize_t +deadline_var_show(int var, char *page) +{ + return sprintf(page, "%d\n", var); +} + +static ssize_t +deadline_var_store(int *var, const char *page, size_t count) +{ + char *p = (char *) page; + + *var = simple_strtol(p, &p, 10); + return count; +} + +#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct deadline_data *dd = e->elevator_data; \ + int __data = __VAR; \ + if (__CONV) \ + __data = jiffies_to_msecs(__data); \ + return deadline_var_show(__data, (page)); \ +} +SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1); +SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1); +SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0); +SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0); +SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct deadline_data *dd = e->elevator_data; \ + int __data; \ + int ret = deadline_var_store(&__data, (page), count); \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + if (__CONV) \ + *(__PTR) = msecs_to_jiffies(__data); \ + else \ + *(__PTR) = __data; \ + return ret; \ +} +STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); +STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); +STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); +STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0); +STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); +#undef STORE_FUNCTION + +#define DD_ATTR(name) \ + __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \ + deadline_##name##_store) + +static struct elv_fs_entry deadline_attrs[] = { + DD_ATTR(read_expire), + DD_ATTR(write_expire), + DD_ATTR(writes_starved), + DD_ATTR(front_merges), + DD_ATTR(fifo_batch), + __ATTR_NULL +}; + +static struct elevator_type mq_deadline = { + .ops.mq = { + .insert_requests = dd_insert_requests, + .dispatch_request = dd_dispatch_request, + .next_request = elv_rb_latter_request, + .former_request = elv_rb_former_request, + .bio_merge = dd_bio_merge, + .request_merge = dd_request_merge, + .requests_merged = dd_merged_requests, + .request_merged = dd_request_merged, + .has_work = dd_has_work, + .init_sched = dd_init_queue, + .exit_sched = dd_exit_queue, + }, + + .uses_mq = true, + .elevator_attrs = deadline_attrs, + .elevator_name = "mq-deadline", + .elevator_owner = THIS_MODULE, +}; + +static int __init deadline_init(void) +{ + return elv_register(&mq_deadline); +} + +static void __exit deadline_exit(void) +{ + elv_unregister(&mq_deadline); +} + +module_init(deadline_init); +module_exit(deadline_exit); + +MODULE_AUTHOR("Jens Axboe"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MQ deadline IO scheduler"); diff --git a/block/noop-iosched.c b/block/noop-iosched.c index a163c48..2d1b15d 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -92,7 +92,7 @@ static void noop_exit_queue(struct elevator_queue *e) } static struct elevator_type elevator_noop = { - .ops = { + .ops.sq = { .elevator_merge_req_fn = noop_merged_requests, .elevator_dispatch_fn = noop_dispatch, .elevator_add_req_fn = noop_add_request, diff --git a/block/opal_proto.h b/block/opal_proto.h new file mode 100644 index 0000000..f40c9ac --- /dev/null +++ b/block/opal_proto.h @@ -0,0 +1,452 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Authors: + * Rafael Antognolli <rafael.antognolli@intel.com> + * Scott Bauer <scott.bauer@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/types.h> + +#ifndef _OPAL_PROTO_H +#define _OPAL_PROTO_H + +/* + * These constant values come from: + * SPC-4 section + * 6.30 SECURITY PROTOCOL IN command / table 265. + */ +enum { + TCG_SECP_00 = 0, + TCG_SECP_01, +}; + +/* + * Token defs derived from: + * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 + * 3.2.2 Data Stream Encoding + */ +enum opal_response_token { + OPAL_DTA_TOKENID_BYTESTRING = 0xe0, + OPAL_DTA_TOKENID_SINT = 0xe1, + OPAL_DTA_TOKENID_UINT = 0xe2, + OPAL_DTA_TOKENID_TOKEN = 0xe3, /* actual token is returned */ + OPAL_DTA_TOKENID_INVALID = 0X0 +}; + +#define DTAERROR_NO_METHOD_STATUS 0x89 +#define GENERIC_HOST_SESSION_NUM 0x41 + +#define TPER_SYNC_SUPPORTED 0x01 + +#define TINY_ATOM_DATA_MASK 0x3F +#define TINY_ATOM_SIGNED 0x40 + +#define SHORT_ATOM_ID 0x80 +#define SHORT_ATOM_BYTESTRING 0x20 +#define SHORT_ATOM_SIGNED 0x10 +#define SHORT_ATOM_LEN_MASK 0xF + +#define MEDIUM_ATOM_ID 0xC0 +#define MEDIUM_ATOM_BYTESTRING 0x10 +#define MEDIUM_ATOM_SIGNED 0x8 +#define MEDIUM_ATOM_LEN_MASK 0x7 + +#define LONG_ATOM_ID 0xe0 +#define LONG_ATOM_BYTESTRING 0x2 +#define LONG_ATOM_SIGNED 0x1 + +/* Derived from TCG Core spec 2.01 Section: + * 3.2.2.1 + * Data Type + */ +#define TINY_ATOM_BYTE 0x7F +#define SHORT_ATOM_BYTE 0xBF +#define MEDIUM_ATOM_BYTE 0xDF +#define LONG_ATOM_BYTE 0xE3 + +#define OPAL_INVAL_PARAM 12 +#define OPAL_MANUFACTURED_INACTIVE 0x08 +#define OPAL_DISCOVERY_COMID 0x0001 + +#define LOCKING_RANGE_NON_GLOBAL 0x03 +/* + * User IDs used in the TCG storage SSCs + * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 + * Section: 6.3 Assigned UIDs + */ +#define OPAL_UID_LENGTH 8 +#define OPAL_METHOD_LENGTH 8 +#define OPAL_MSID_KEYLEN 15 +#define OPAL_UID_LENGTH_HALF 4 + +/* Enum to index OPALUID array */ +enum opal_uid { + /* users */ + OPAL_SMUID_UID, + OPAL_THISSP_UID, + OPAL_ADMINSP_UID, + OPAL_LOCKINGSP_UID, + OPAL_ENTERPRISE_LOCKINGSP_UID, + OPAL_ANYBODY_UID, + OPAL_SID_UID, + OPAL_ADMIN1_UID, + OPAL_USER1_UID, + OPAL_USER2_UID, + OPAL_PSID_UID, + OPAL_ENTERPRISE_BANDMASTER0_UID, + OPAL_ENTERPRISE_ERASEMASTER_UID, + /* tables */ + OPAL_LOCKINGRANGE_GLOBAL, + OPAL_LOCKINGRANGE_ACE_RDLOCKED, + OPAL_LOCKINGRANGE_ACE_WRLOCKED, + OPAL_MBRCONTROL, + OPAL_MBR, + OPAL_AUTHORITY_TABLE, + OPAL_C_PIN_TABLE, + OPAL_LOCKING_INFO_TABLE, + OPAL_ENTERPRISE_LOCKING_INFO_TABLE, + /* C_PIN_TABLE object ID's */ + OPAL_C_PIN_MSID, + OPAL_C_PIN_SID, + OPAL_C_PIN_ADMIN1, + /* half UID's (only first 4 bytes used) */ + OPAL_HALF_UID_AUTHORITY_OBJ_REF, + OPAL_HALF_UID_BOOLEAN_ACE, + /* omitted optional parameter */ + OPAL_UID_HEXFF, +}; + +#define OPAL_METHOD_LENGTH 8 + +/* Enum for indexing the OPALMETHOD array */ +enum opal_method { + OPAL_PROPERTIES, + OPAL_STARTSESSION, + OPAL_REVERT, + OPAL_ACTIVATE, + OPAL_EGET, + OPAL_ESET, + OPAL_NEXT, + OPAL_EAUTHENTICATE, + OPAL_GETACL, + OPAL_GENKEY, + OPAL_REVERTSP, + OPAL_GET, + OPAL_SET, + OPAL_AUTHENTICATE, + OPAL_RANDOM, + OPAL_ERASE, +}; + +enum opal_token { + /* Boolean */ + OPAL_TRUE = 0x01, + OPAL_FALSE = 0x00, + OPAL_BOOLEAN_EXPR = 0x03, + /* cellblocks */ + OPAL_TABLE = 0x00, + OPAL_STARTROW = 0x01, + OPAL_ENDROW = 0x02, + OPAL_STARTCOLUMN = 0x03, + OPAL_ENDCOLUMN = 0x04, + OPAL_VALUES = 0x01, + /* authority table */ + OPAL_PIN = 0x03, + /* locking tokens */ + OPAL_RANGESTART = 0x03, + OPAL_RANGELENGTH = 0x04, + OPAL_READLOCKENABLED = 0x05, + OPAL_WRITELOCKENABLED = 0x06, + OPAL_READLOCKED = 0x07, + OPAL_WRITELOCKED = 0x08, + OPAL_ACTIVEKEY = 0x0A, + /* locking info table */ + OPAL_MAXRANGES = 0x04, + /* mbr control */ + OPAL_MBRENABLE = 0x01, + OPAL_MBRDONE = 0x02, + /* properties */ + OPAL_HOSTPROPERTIES = 0x00, + /* atoms */ + OPAL_STARTLIST = 0xf0, + OPAL_ENDLIST = 0xf1, + OPAL_STARTNAME = 0xf2, + OPAL_ENDNAME = 0xf3, + OPAL_CALL = 0xf8, + OPAL_ENDOFDATA = 0xf9, + OPAL_ENDOFSESSION = 0xfa, + OPAL_STARTTRANSACTON = 0xfb, + OPAL_ENDTRANSACTON = 0xfC, + OPAL_EMPTYATOM = 0xff, + OPAL_WHERE = 0x00, +}; + +/* Locking state for a locking range */ +enum opal_lockingstate { + OPAL_LOCKING_READWRITE = 0x01, + OPAL_LOCKING_READONLY = 0x02, + OPAL_LOCKING_LOCKED = 0x03, +}; + +/* Packets derived from: + * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 + * Secion: 3.2.3 ComPackets, Packets & Subpackets + */ + +/* Comm Packet (header) for transmissions. */ +struct opal_compacket { + __be32 reserved0; + u8 extendedComID[4]; + __be32 outstandingData; + __be32 minTransfer; + __be32 length; +}; + +/* Packet structure. */ +struct opal_packet { + __be32 tsn; + __be32 hsn; + __be32 seq_number; + __be16 reserved0; + __be16 ack_type; + __be32 acknowledgment; + __be32 length; +}; + +/* Data sub packet header */ +struct opal_data_subpacket { + u8 reserved0[6]; + __be16 kind; + __be32 length; +}; + +/* header of a response */ +struct opal_header { + struct opal_compacket cp; + struct opal_packet pkt; + struct opal_data_subpacket subpkt; +}; + +#define FC_TPER 0x0001 +#define FC_LOCKING 0x0002 +#define FC_GEOMETRY 0x0003 +#define FC_ENTERPRISE 0x0100 +#define FC_DATASTORE 0x0202 +#define FC_SINGLEUSER 0x0201 +#define FC_OPALV100 0x0200 +#define FC_OPALV200 0x0203 + +/* + * The Discovery 0 Header. As defined in + * Opal SSC Documentation + * Section: 3.3.5 Capability Discovery + */ +struct d0_header { + __be32 length; /* the length of the header 48 in 2.00.100 */ + __be32 revision; /**< revision of the header 1 in 2.00.100 */ + __be32 reserved01; + __be32 reserved02; + /* + * the remainder of the structure is vendor specific and will not be + * addressed now + */ + u8 ignored[32]; +}; + +/* + * TPer Feature Descriptor. Contains flags indicating support for the + * TPer features described in the OPAL specification. The names match the + * OPAL terminology + * + * code == 0x001 in 2.00.100 + */ +struct d0_tper_features { + /* + * supported_features bits: + * bit 7: reserved + * bit 6: com ID management + * bit 5: reserved + * bit 4: streaming support + * bit 3: buffer management + * bit 2: ACK/NACK + * bit 1: async + * bit 0: sync + */ + u8 supported_features; + /* + * bytes 5 through 15 are reserved, but we represent the first 3 as + * u8 to keep the other two 32bits integers aligned. + */ + u8 reserved01[3]; + __be32 reserved02; + __be32 reserved03; +}; + +/* + * Locking Feature Descriptor. Contains flags indicating support for the + * locking features described in the OPAL specification. The names match the + * OPAL terminology + * + * code == 0x0002 in 2.00.100 + */ +struct d0_locking_features { + /* + * supported_features bits: + * bits 6-7: reserved + * bit 5: MBR done + * bit 4: MBR enabled + * bit 3: media encryption + * bit 2: locked + * bit 1: locking enabled + * bit 0: locking supported + */ + u8 supported_features; + /* + * bytes 5 through 15 are reserved, but we represent the first 3 as + * u8 to keep the other two 32bits integers aligned. + */ + u8 reserved01[3]; + __be32 reserved02; + __be32 reserved03; +}; + +/* + * Geometry Feature Descriptor. Contains flags indicating support for the + * geometry features described in the OPAL specification. The names match the + * OPAL terminology + * + * code == 0x0003 in 2.00.100 + */ +struct d0_geometry_features { + /* + * skip 32 bits from header, needed to align the struct to 64 bits. + */ + u8 header[4]; + /* + * reserved01: + * bits 1-6: reserved + * bit 0: align + */ + u8 reserved01; + u8 reserved02[7]; + __be32 logical_block_size; + __be64 alignment_granularity; + __be64 lowest_aligned_lba; +}; + +/* + * Enterprise SSC Feature + * + * code == 0x0100 + */ +struct d0_enterprise_ssc { + __be16 baseComID; + __be16 numComIDs; + /* range_crossing: + * bits 1-6: reserved + * bit 0: range crossing + */ + u8 range_crossing; + u8 reserved01; + __be16 reserved02; + __be32 reserved03; + __be32 reserved04; +}; + +/* + * Opal V1 feature + * + * code == 0x0200 + */ +struct d0_opal_v100 { + __be16 baseComID; + __be16 numComIDs; +}; + +/* + * Single User Mode feature + * + * code == 0x0201 + */ +struct d0_single_user_mode { + __be32 num_locking_objects; + /* reserved01: + * bit 0: any + * bit 1: all + * bit 2: policy + * bits 3-7: reserved + */ + u8 reserved01; + u8 reserved02; + __be16 reserved03; + __be32 reserved04; +}; + +/* + * Additonal Datastores feature + * + * code == 0x0202 + */ +struct d0_datastore_table { + __be16 reserved01; + __be16 max_tables; + __be32 max_size_tables; + __be32 table_size_alignment; +}; + +/* + * OPAL 2.0 feature + * + * code == 0x0203 + */ +struct d0_opal_v200 { + __be16 baseComID; + __be16 numComIDs; + /* range_crossing: + * bits 1-6: reserved + * bit 0: range crossing + */ + u8 range_crossing; + /* num_locking_admin_auth: + * not aligned to 16 bits, so use two u8. + * stored in big endian: + * 0: MSB + * 1: LSB + */ + u8 num_locking_admin_auth[2]; + /* num_locking_user_auth: + * not aligned to 16 bits, so use two u8. + * stored in big endian: + * 0: MSB + * 1: LSB + */ + u8 num_locking_user_auth[2]; + u8 initialPIN; + u8 revertedPIN; + u8 reserved01; + __be32 reserved02; +}; + +/* Union of features used to parse the discovery 0 response */ +struct d0_features { + __be16 code; + /* + * r_version bits: + * bits 4-7: version + * bits 0-3: reserved + */ + u8 r_version; + u8 length; + u8 features[]; +}; + +#endif /* _OPAL_PROTO_H */ diff --git a/block/partitions/efi.c b/block/partitions/efi.c index bcd86e5..39f70d9 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, if (!gpt) return NULL; - count = le32_to_cpu(gpt->num_partition_entries) * + count = (size_t)le32_to_cpu(gpt->num_partition_entries) * le32_to_cpu(gpt->sizeof_partition_entry); if (!count) return NULL; @@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, gpt_header **gpt, gpt_entry **ptes) { u32 crc, origcrc; - u64 lastlba; + u64 lastlba, pt_size; if (!ptes) return 0; @@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, goto fail; } + /* Sanity check partition table size */ + pt_size = (u64)le32_to_cpu((*gpt)->num_partition_entries) * + le32_to_cpu((*gpt)->sizeof_partition_entry); + if (pt_size > KMALLOC_MAX_SIZE) { + pr_debug("GUID Partition Table is too large: %llu > %lu bytes\n", + (unsigned long long)pt_size, KMALLOC_MAX_SIZE); + goto fail; + } + if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) goto fail; /* Check the GUID Partition Entry Array CRC */ - crc = efi_crc32((const unsigned char *) (*ptes), - le32_to_cpu((*gpt)->num_partition_entries) * - le32_to_cpu((*gpt)->sizeof_partition_entry)); + crc = efi_crc32((const unsigned char *) (*ptes), pt_size); if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { pr_debug("GUID Partition Entry Array CRC check failed.\n"); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index c2b6492..2a2fc76 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -230,15 +230,17 @@ EXPORT_SYMBOL(blk_verify_command); static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, fmode_t mode) { - if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) + struct scsi_request *req = scsi_req(rq); + + if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; - if (blk_verify_command(rq->cmd, mode & FMODE_WRITE)) + if (blk_verify_command(req->cmd, mode & FMODE_WRITE)) return -EPERM; /* * fill in request structure */ - rq->cmd_len = hdr->cmd_len; + req->cmd_len = hdr->cmd_len; rq->timeout = msecs_to_jiffies(hdr->timeout); if (!rq->timeout) @@ -254,6 +256,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, struct bio *bio) { + struct scsi_request *req = scsi_req(rq); int r, ret = 0; /* @@ -267,13 +270,13 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, hdr->info = 0; if (hdr->masked_status || hdr->host_status || hdr->driver_status) hdr->info |= SG_INFO_CHECK; - hdr->resid = rq->resid_len; + hdr->resid = req->resid_len; hdr->sb_len_wr = 0; - if (rq->sense_len && hdr->sbp) { - int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len); + if (req->sense_len && hdr->sbp) { + int len = min((unsigned int) hdr->mx_sb_len, req->sense_len); - if (!copy_to_user(hdr->sbp, rq->sense, len)) + if (!copy_to_user(hdr->sbp, req->sense, len)) hdr->sb_len_wr = len; else ret = -EFAULT; @@ -294,7 +297,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, int writing = 0; int at_head = 0; struct request *rq; - char sense[SCSI_SENSE_BUFFERSIZE]; + struct scsi_request *req; struct bio *bio; if (hdr->interface_id != 'S') @@ -318,14 +321,16 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, at_head = 1; ret = -ENOMEM; - rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); + rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + GFP_KERNEL); if (IS_ERR(rq)) return PTR_ERR(rq); - blk_rq_set_block_pc(rq); + req = scsi_req(rq); + scsi_req_init(rq); if (hdr->cmd_len > BLK_MAX_CDB) { - rq->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL); - if (!rq->cmd) + req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL); + if (!req->cmd) goto out_put_request; } @@ -357,9 +362,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, goto out_free_cdb; bio = rq->bio; - memset(sense, 0, sizeof(sense)); - rq->sense = sense; - rq->sense_len = 0; rq->retries = 0; start_time = jiffies; @@ -375,8 +377,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, ret = blk_complete_sghdr_rq(rq, hdr, bio); out_free_cdb: - if (rq->cmd != rq->__cmd) - kfree(rq->cmd); + scsi_req_free_cmd(req); out_put_request: blk_put_request(rq); return ret; @@ -420,9 +421,10 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, struct scsi_ioctl_command __user *sic) { struct request *rq; + struct scsi_request *req; int err; unsigned int in_len, out_len, bytes, opcode, cmdlen; - char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; + char *buffer = NULL; if (!sic) return -EINVAL; @@ -447,12 +449,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } - rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_RECLAIM); + rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + __GFP_RECLAIM); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto error_free_buffer; } - blk_rq_set_block_pc(rq); + req = scsi_req(rq); + scsi_req_init(rq); cmdlen = COMMAND_SIZE(opcode); @@ -460,14 +464,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, * get command and data to send to device, if any */ err = -EFAULT; - rq->cmd_len = cmdlen; - if (copy_from_user(rq->cmd, sic->data, cmdlen)) + req->cmd_len = cmdlen; + if (copy_from_user(req->cmd, sic->data, cmdlen)) goto error; if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; - err = blk_verify_command(rq->cmd, mode & FMODE_WRITE); + err = blk_verify_command(req->cmd, mode & FMODE_WRITE); if (err) goto error; @@ -503,18 +507,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, goto error; } - memset(sense, 0, sizeof(sense)); - rq->sense = sense; - rq->sense_len = 0; - blk_execute_rq(q, disk, rq, 0); err = rq->errors & 0xff; /* only 8 bit SCSI status */ if (err) { - if (rq->sense_len && rq->sense) { - bytes = (OMAX_SB_LEN > rq->sense_len) ? - rq->sense_len : OMAX_SB_LEN; - if (copy_to_user(sic->data, rq->sense, bytes)) + if (req->sense_len && req->sense) { + bytes = (OMAX_SB_LEN > req->sense_len) ? + req->sense_len : OMAX_SB_LEN; + if (copy_to_user(sic->data, req->sense, bytes)) err = -EFAULT; } } else { @@ -539,14 +539,14 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, struct request *rq; int err; - rq = blk_get_request(q, WRITE, __GFP_RECLAIM); + rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); - blk_rq_set_block_pc(rq); + scsi_req_init(rq); rq->timeout = BLK_DEFAULT_SG_TIMEOUT; - rq->cmd[0] = cmd; - rq->cmd[4] = data; - rq->cmd_len = 6; + scsi_req(rq)->cmd[0] = cmd; + scsi_req(rq)->cmd[4] = data; + scsi_req(rq)->cmd_len = 6; err = blk_execute_rq(q, bd_disk, rq, 0); blk_put_request(rq); @@ -743,6 +743,17 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode, } EXPORT_SYMBOL(scsi_cmd_blk_ioctl); +void scsi_req_init(struct request *rq) +{ + struct scsi_request *req = scsi_req(rq); + + memset(req->__cmd, 0, sizeof(req->__cmd)); + req->cmd = req->__cmd; + req->cmd_len = BLK_MAX_CDB; + req->sense_len = 0; +} +EXPORT_SYMBOL(scsi_req_init); + static int __init blk_scsi_ioctl_init(void) { blk_set_cmd_filter_defaults(&blk_default_cmd_filter); diff --git a/block/sed-opal.c b/block/sed-opal.c new file mode 100644 index 0000000..d1c52ba --- /dev/null +++ b/block/sed-opal.c @@ -0,0 +1,2488 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Authors: + * Scott Bauer <scott.bauer@intel.com> + * Rafael Antognolli <rafael.antognolli@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ":OPAL: " fmt + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/genhd.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <uapi/linux/sed-opal.h> +#include <linux/sed-opal.h> +#include <linux/string.h> +#include <linux/kdev_t.h> + +#include "opal_proto.h" + +#define IO_BUFFER_LENGTH 2048 +#define MAX_TOKS 64 + +typedef int (*opal_step)(struct opal_dev *dev); + +enum opal_atom_width { + OPAL_WIDTH_TINY, + OPAL_WIDTH_SHORT, + OPAL_WIDTH_MEDIUM, + OPAL_WIDTH_LONG, + OPAL_WIDTH_TOKEN +}; + +/* + * On the parsed response, we don't store again the toks that are already + * stored in the response buffer. Instead, for each token, we just store a + * pointer to the position in the buffer where the token starts, and the size + * of the token in bytes. + */ +struct opal_resp_tok { + const u8 *pos; + size_t len; + enum opal_response_token type; + enum opal_atom_width width; + union { + u64 u; + s64 s; + } stored; +}; + +/* + * From the response header it's not possible to know how many tokens there are + * on the payload. So we hardcode that the maximum will be MAX_TOKS, and later + * if we start dealing with messages that have more than that, we can increase + * this number. This is done to avoid having to make two passes through the + * response, the first one counting how many tokens we have and the second one + * actually storing the positions. + */ +struct parsed_resp { + int num; + struct opal_resp_tok toks[MAX_TOKS]; +}; + +struct opal_dev { + bool supported; + + void *data; + sec_send_recv *send_recv; + + const opal_step *funcs; + void **func_data; + int state; + struct mutex dev_lock; + u16 comid; + u32 hsn; + u32 tsn; + u64 align; + u64 lowest_lba; + + size_t pos; + u8 cmd[IO_BUFFER_LENGTH]; + u8 resp[IO_BUFFER_LENGTH]; + + struct parsed_resp parsed; + size_t prev_d_len; + void *prev_data; + + struct list_head unlk_lst; +}; + + +static const u8 opaluid[][OPAL_UID_LENGTH] = { + /* users */ + [OPAL_SMUID_UID] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff }, + [OPAL_THISSP_UID] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, + [OPAL_ADMINSP_UID] = + { 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x01 }, + [OPAL_LOCKINGSP_UID] = + { 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x02 }, + [OPAL_ENTERPRISE_LOCKINGSP_UID] = + { 0x00, 0x00, 0x02, 0x05, 0x00, 0x01, 0x00, 0x01 }, + [OPAL_ANYBODY_UID] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01 }, + [OPAL_SID_UID] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06 }, + [OPAL_ADMIN1_UID] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x01 }, + [OPAL_USER1_UID] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x03, 0x00, 0x01 }, + [OPAL_USER2_UID] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x03, 0x00, 0x02 }, + [OPAL_PSID_UID] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0xff, 0x01 }, + [OPAL_ENTERPRISE_BANDMASTER0_UID] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x80, 0x01 }, + [OPAL_ENTERPRISE_ERASEMASTER_UID] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 }, + + /* tables */ + + [OPAL_LOCKINGRANGE_GLOBAL] = + { 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 }, + [OPAL_LOCKINGRANGE_ACE_RDLOCKED] = + { 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE0, 0x01 }, + [OPAL_LOCKINGRANGE_ACE_WRLOCKED] = + { 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE8, 0x01 }, + [OPAL_MBRCONTROL] = + { 0x00, 0x00, 0x08, 0x03, 0x00, 0x00, 0x00, 0x01 }, + [OPAL_MBR] = + { 0x00, 0x00, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00 }, + [OPAL_AUTHORITY_TABLE] = + { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00}, + [OPAL_C_PIN_TABLE] = + { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00}, + [OPAL_LOCKING_INFO_TABLE] = + { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 }, + [OPAL_ENTERPRISE_LOCKING_INFO_TABLE] = + { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 }, + + /* C_PIN_TABLE object ID's */ + + [OPAL_C_PIN_MSID] = + { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x84, 0x02}, + [OPAL_C_PIN_SID] = + { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01}, + [OPAL_C_PIN_ADMIN1] = + { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x01, 0x00, 0x01}, + + /* half UID's (only first 4 bytes used) */ + + [OPAL_HALF_UID_AUTHORITY_OBJ_REF] = + { 0x00, 0x00, 0x0C, 0x05, 0xff, 0xff, 0xff, 0xff }, + [OPAL_HALF_UID_BOOLEAN_ACE] = + { 0x00, 0x00, 0x04, 0x0E, 0xff, 0xff, 0xff, 0xff }, + + /* special value for omitted optional parameter */ + [OPAL_UID_HEXFF] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, +}; + +/* + * TCG Storage SSC Methods. + * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 + * Section: 6.3 Assigned UIDs + */ +static const u8 opalmethod[][OPAL_UID_LENGTH] = { + [OPAL_PROPERTIES] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01 }, + [OPAL_STARTSESSION] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x02 }, + [OPAL_REVERT] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x02, 0x02 }, + [OPAL_ACTIVATE] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x02, 0x03 }, + [OPAL_EGET] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x06 }, + [OPAL_ESET] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x07 }, + [OPAL_NEXT] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08 }, + [OPAL_EAUTHENTICATE] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c }, + [OPAL_GETACL] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0d }, + [OPAL_GENKEY] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x10 }, + [OPAL_REVERTSP] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11 }, + [OPAL_GET] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16 }, + [OPAL_SET] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17 }, + [OPAL_AUTHENTICATE] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1c }, + [OPAL_RANDOM] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x06, 0x01 }, + [OPAL_ERASE] = + { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 }, +}; + +typedef int (cont_fn)(struct opal_dev *dev); + +static int end_opal_session_error(struct opal_dev *dev); + +struct opal_suspend_data { + struct opal_lock_unlock unlk; + u8 lr; + struct list_head node; +}; + +/* + * Derived from: + * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 + * Section: 5.1.5 Method Status Codes + */ +static const char * const opal_errors[] = { + "Success", + "Not Authorized", + "Unknown Error", + "SP Busy", + "SP Failed", + "SP Disabled", + "SP Frozen", + "No Sessions Available", + "Uniqueness Conflict", + "Insufficient Space", + "Insufficient Rows", + "Invalid Function", + "Invalid Parameter", + "Invalid Reference", + "Unknown Error", + "TPER Malfunction", + "Transaction Failure", + "Response Overflow", + "Authority Locked Out", +}; + +static const char *opal_error_to_human(int error) +{ + if (error == 0x3f) + return "Failed"; + + if (error >= ARRAY_SIZE(opal_errors) || error < 0) + return "Unknown Error"; + + return opal_errors[error]; +} + +static void print_buffer(const u8 *ptr, u32 length) +{ +#ifdef DEBUG + print_hex_dump_bytes("OPAL: ", DUMP_PREFIX_OFFSET, ptr, length); + pr_debug("\n"); +#endif +} + +static bool check_tper(const void *data) +{ + const struct d0_tper_features *tper = data; + u8 flags = tper->supported_features; + + if (!(flags & TPER_SYNC_SUPPORTED)) { + pr_err("TPer sync not supported. flags = %d\n", + tper->supported_features); + return false; + } + + return true; +} + +static bool check_sum(const void *data) +{ + const struct d0_single_user_mode *sum = data; + u32 nlo = be32_to_cpu(sum->num_locking_objects); + + if (nlo == 0) { + pr_err("Need at least one locking object.\n"); + return false; + } + + pr_debug("Number of locking objects: %d\n", nlo); + + return true; +} + +static u16 get_comid_v100(const void *data) +{ + const struct d0_opal_v100 *v100 = data; + + return be16_to_cpu(v100->baseComID); +} + +static u16 get_comid_v200(const void *data) +{ + const struct d0_opal_v200 *v200 = data; + + return be16_to_cpu(v200->baseComID); +} + +static int opal_send_cmd(struct opal_dev *dev) +{ + return dev->send_recv(dev->data, dev->comid, TCG_SECP_01, + dev->cmd, IO_BUFFER_LENGTH, + true); +} + +static int opal_recv_cmd(struct opal_dev *dev) +{ + return dev->send_recv(dev->data, dev->comid, TCG_SECP_01, + dev->resp, IO_BUFFER_LENGTH, + false); +} + +static int opal_recv_check(struct opal_dev *dev) +{ + size_t buflen = IO_BUFFER_LENGTH; + void *buffer = dev->resp; + struct opal_header *hdr = buffer; + int ret; + + do { + pr_debug("Sent OPAL command: outstanding=%d, minTransfer=%d\n", + hdr->cp.outstandingData, + hdr->cp.minTransfer); + + if (hdr->cp.outstandingData == 0 || + hdr->cp.minTransfer != 0) + return 0; + + memset(buffer, 0, buflen); + ret = opal_recv_cmd(dev); + } while (!ret); + + return ret; +} + +static int opal_send_recv(struct opal_dev *dev, cont_fn *cont) +{ + int ret; + + ret = opal_send_cmd(dev); + if (ret) + return ret; + ret = opal_recv_cmd(dev); + if (ret) + return ret; + ret = opal_recv_check(dev); + if (ret) + return ret; + return cont(dev); +} + +static void check_geometry(struct opal_dev *dev, const void *data) +{ + const struct d0_geometry_features *geo = data; + + dev->align = geo->alignment_granularity; + dev->lowest_lba = geo->lowest_aligned_lba; +} + +static int next(struct opal_dev *dev) +{ + opal_step func; + int error = 0; + + do { + func = dev->funcs[dev->state]; + if (!func) + break; + + error = func(dev); + if (error) { + pr_err("Error on step function: %d with error %d: %s\n", + dev->state, error, + opal_error_to_human(error)); + + /* For each OPAL command we do a discovery0 then we + * start some sort of session. + * If we haven't passed state 1 then there was an error + * on discovery0 or during the attempt to start a + * session. Therefore we shouldn't attempt to terminate + * a session, as one has not yet been created. + */ + if (dev->state > 1) + return end_opal_session_error(dev); + } + dev->state++; + } while (!error); + + return error; +} + +static int opal_discovery0_end(struct opal_dev *dev) +{ + bool found_com_id = false, supported = true, single_user = false; + const struct d0_header *hdr = (struct d0_header *)dev->resp; + const u8 *epos = dev->resp, *cpos = dev->resp; + u16 comid = 0; + + print_buffer(dev->resp, be32_to_cpu(hdr->length)); + + epos += be32_to_cpu(hdr->length); /* end of buffer */ + cpos += sizeof(*hdr); /* current position on buffer */ + + while (cpos < epos && supported) { + const struct d0_features *body = + (const struct d0_features *)cpos; + + switch (be16_to_cpu(body->code)) { + case FC_TPER: + supported = check_tper(body->features); + break; + case FC_SINGLEUSER: + single_user = check_sum(body->features); + break; + case FC_GEOMETRY: + check_geometry(dev, body); + break; + case FC_LOCKING: + case FC_ENTERPRISE: + case FC_DATASTORE: + /* some ignored properties */ + pr_debug("Found OPAL feature description: %d\n", + be16_to_cpu(body->code)); + break; + case FC_OPALV100: + comid = get_comid_v100(body->features); + found_com_id = true; + break; + case FC_OPALV200: + comid = get_comid_v200(body->features); + found_com_id = true; + break; + case 0xbfff ... 0xffff: + /* vendor specific, just ignore */ + break; + default: + pr_debug("OPAL Unknown feature: %d\n", + be16_to_cpu(body->code)); + + } + cpos += body->length + 4; + } + + if (!supported) { + pr_debug("This device is not Opal enabled. Not Supported!\n"); + return -EOPNOTSUPP; + } + + if (!single_user) + pr_debug("Device doesn't support single user mode\n"); + + + if (!found_com_id) { + pr_debug("Could not find OPAL comid for device. Returning early\n"); + return -EOPNOTSUPP;; + } + + dev->comid = comid; + + return 0; +} + +static int opal_discovery0(struct opal_dev *dev) +{ + int ret; + + memset(dev->resp, 0, IO_BUFFER_LENGTH); + dev->comid = OPAL_DISCOVERY_COMID; + ret = opal_recv_cmd(dev); + if (ret) + return ret; + return opal_discovery0_end(dev); +} + +static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok) +{ + if (*err) + return; + if (cmd->pos >= IO_BUFFER_LENGTH - 1) { + pr_err("Error adding u8: end of buffer.\n"); + *err = -ERANGE; + return; + } + cmd->cmd[cmd->pos++] = tok; +} + +static void add_short_atom_header(struct opal_dev *cmd, bool bytestring, + bool has_sign, int len) +{ + u8 atom; + int err = 0; + + atom = SHORT_ATOM_ID; + atom |= bytestring ? SHORT_ATOM_BYTESTRING : 0; + atom |= has_sign ? SHORT_ATOM_SIGNED : 0; + atom |= len & SHORT_ATOM_LEN_MASK; + + add_token_u8(&err, cmd, atom); +} + +static void add_medium_atom_header(struct opal_dev *cmd, bool bytestring, + bool has_sign, int len) +{ + u8 header0; + + header0 = MEDIUM_ATOM_ID; + header0 |= bytestring ? MEDIUM_ATOM_BYTESTRING : 0; + header0 |= has_sign ? MEDIUM_ATOM_SIGNED : 0; + header0 |= (len >> 8) & MEDIUM_ATOM_LEN_MASK; + cmd->cmd[cmd->pos++] = header0; + cmd->cmd[cmd->pos++] = len; +} + +static void add_token_u64(int *err, struct opal_dev *cmd, u64 number) +{ + + size_t len; + int msb; + u8 n; + + if (!(number & ~TINY_ATOM_DATA_MASK)) { + add_token_u8(err, cmd, number); + return; + } + + msb = fls(number); + len = DIV_ROUND_UP(msb, 4); + + if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) { + pr_err("Error adding u64: end of buffer.\n"); + *err = -ERANGE; + return; + } + add_short_atom_header(cmd, false, false, len); + while (len--) { + n = number >> (len * 8); + add_token_u8(err, cmd, n); + } +} + +static void add_token_bytestring(int *err, struct opal_dev *cmd, + const u8 *bytestring, size_t len) +{ + size_t header_len = 1; + bool is_short_atom = true; + + if (*err) + return; + + if (len & ~SHORT_ATOM_LEN_MASK) { + header_len = 2; + is_short_atom = false; + } + + if (len >= IO_BUFFER_LENGTH - cmd->pos - header_len) { + pr_err("Error adding bytestring: end of buffer.\n"); + *err = -ERANGE; + return; + } + + if (is_short_atom) + add_short_atom_header(cmd, true, false, len); + else + add_medium_atom_header(cmd, true, false, len); + + memcpy(&cmd->cmd[cmd->pos], bytestring, len); + cmd->pos += len; + +} + +static int build_locking_range(u8 *buffer, size_t length, u8 lr) +{ + if (length > OPAL_UID_LENGTH) { + pr_err("Can't build locking range. Length OOB\n"); + return -ERANGE; + } + + memcpy(buffer, opaluid[OPAL_LOCKINGRANGE_GLOBAL], OPAL_UID_LENGTH); + + if (lr == 0) + return 0; + buffer[5] = LOCKING_RANGE_NON_GLOBAL; + buffer[7] = lr; + + return 0; +} + +static int build_locking_user(u8 *buffer, size_t length, u8 lr) +{ + if (length > OPAL_UID_LENGTH) { + pr_err("Can't build locking range user, Length OOB\n"); + return -ERANGE; + } + + memcpy(buffer, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH); + + buffer[7] = lr + 1; + + return 0; +} + +static void set_comid(struct opal_dev *cmd, u16 comid) +{ + struct opal_header *hdr = (struct opal_header *)cmd->cmd; + + hdr->cp.extendedComID[0] = comid >> 8; + hdr->cp.extendedComID[1] = comid; + hdr->cp.extendedComID[2] = 0; + hdr->cp.extendedComID[3] = 0; +} + +static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn) +{ + struct opal_header *hdr; + int err = 0; + + add_token_u8(&err, cmd, OPAL_ENDOFDATA); + add_token_u8(&err, cmd, OPAL_STARTLIST); + add_token_u8(&err, cmd, 0); + add_token_u8(&err, cmd, 0); + add_token_u8(&err, cmd, 0); + add_token_u8(&err, cmd, OPAL_ENDLIST); + + if (err) { + pr_err("Error finalizing command.\n"); + return -EFAULT; + } + + hdr = (struct opal_header *) cmd->cmd; + + hdr->pkt.tsn = cpu_to_be32(tsn); + hdr->pkt.hsn = cpu_to_be32(hsn); + + hdr->subpkt.length = cpu_to_be32(cmd->pos - sizeof(*hdr)); + while (cmd->pos % 4) { + if (cmd->pos >= IO_BUFFER_LENGTH) { + pr_err("Error: Buffer overrun\n"); + return -ERANGE; + } + cmd->cmd[cmd->pos++] = 0; + } + hdr->pkt.length = cpu_to_be32(cmd->pos - sizeof(hdr->cp) - + sizeof(hdr->pkt)); + hdr->cp.length = cpu_to_be32(cmd->pos - sizeof(hdr->cp)); + + return 0; +} + +static enum opal_response_token token_type(const struct parsed_resp *resp, + int n) +{ + const struct opal_resp_tok *tok; + + if (n >= resp->num) { + pr_err("Token number doesn't exist: %d, resp: %d\n", + n, resp->num); + return OPAL_DTA_TOKENID_INVALID; + } + + tok = &resp->toks[n]; + if (tok->len == 0) { + pr_err("Token length must be non-zero\n"); + return OPAL_DTA_TOKENID_INVALID; + } + + return tok->type; +} + +/* + * This function returns 0 in case of invalid token. One should call + * token_type() first to find out if the token is valid or not. + */ +static enum opal_token response_get_token(const struct parsed_resp *resp, + int n) +{ + const struct opal_resp_tok *tok; + + if (n >= resp->num) { + pr_err("Token number doesn't exist: %d, resp: %d\n", + n, resp->num); + return 0; + } + + tok = &resp->toks[n]; + if (tok->len == 0) { + pr_err("Token length must be non-zero\n"); + return 0; + } + + return tok->pos[0]; +} + +static size_t response_parse_tiny(struct opal_resp_tok *tok, + const u8 *pos) +{ + tok->pos = pos; + tok->len = 1; + tok->width = OPAL_WIDTH_TINY; + + if (pos[0] & TINY_ATOM_SIGNED) { + tok->type = OPAL_DTA_TOKENID_SINT; + } else { + tok->type = OPAL_DTA_TOKENID_UINT; + tok->stored.u = pos[0] & 0x3f; + } + + return tok->len; +} + +static size_t response_parse_short(struct opal_resp_tok *tok, + const u8 *pos) +{ + tok->pos = pos; + tok->len = (pos[0] & SHORT_ATOM_LEN_MASK) + 1; + tok->width = OPAL_WIDTH_SHORT; + + if (pos[0] & SHORT_ATOM_BYTESTRING) { + tok->type = OPAL_DTA_TOKENID_BYTESTRING; + } else if (pos[0] & SHORT_ATOM_SIGNED) { + tok->type = OPAL_DTA_TOKENID_SINT; + } else { + u64 u_integer = 0; + int i, b = 0; + + tok->type = OPAL_DTA_TOKENID_UINT; + if (tok->len > 9) { + pr_warn("uint64 with more than 8 bytes\n"); + return -EINVAL; + } + for (i = tok->len - 1; i > 0; i--) { + u_integer |= ((u64)pos[i] << (8 * b)); + b++; + } + tok->stored.u = u_integer; + } + + return tok->len; +} + +static size_t response_parse_medium(struct opal_resp_tok *tok, + const u8 *pos) +{ + tok->pos = pos; + tok->len = (((pos[0] & MEDIUM_ATOM_LEN_MASK) << 8) | pos[1]) + 2; + tok->width = OPAL_WIDTH_MEDIUM; + + if (pos[0] & MEDIUM_ATOM_BYTESTRING) + tok->type = OPAL_DTA_TOKENID_BYTESTRING; + else if (pos[0] & MEDIUM_ATOM_SIGNED) + tok->type = OPAL_DTA_TOKENID_SINT; + else + tok->type = OPAL_DTA_TOKENID_UINT; + + return tok->len; +} + +static size_t response_parse_long(struct opal_resp_tok *tok, + const u8 *pos) +{ + tok->pos = pos; + tok->len = ((pos[1] << 16) | (pos[2] << 8) | pos[3]) + 4; + tok->width = OPAL_WIDTH_LONG; + + if (pos[0] & LONG_ATOM_BYTESTRING) + tok->type = OPAL_DTA_TOKENID_BYTESTRING; + else if (pos[0] & LONG_ATOM_SIGNED) + tok->type = OPAL_DTA_TOKENID_SINT; + else + tok->type = OPAL_DTA_TOKENID_UINT; + + return tok->len; +} + +static size_t response_parse_token(struct opal_resp_tok *tok, + const u8 *pos) +{ + tok->pos = pos; + tok->len = 1; + tok->type = OPAL_DTA_TOKENID_TOKEN; + tok->width = OPAL_WIDTH_TOKEN; + + return tok->len; +} + +static int response_parse(const u8 *buf, size_t length, + struct parsed_resp *resp) +{ + const struct opal_header *hdr; + struct opal_resp_tok *iter; + int num_entries = 0; + int total; + size_t token_length; + const u8 *pos; + + if (!buf) + return -EFAULT; + + if (!resp) + return -EFAULT; + + hdr = (struct opal_header *)buf; + pos = buf; + pos += sizeof(*hdr); + + pr_debug("Response size: cp: %d, pkt: %d, subpkt: %d\n", + be32_to_cpu(hdr->cp.length), + be32_to_cpu(hdr->pkt.length), + be32_to_cpu(hdr->subpkt.length)); + + if (hdr->cp.length == 0 || hdr->pkt.length == 0 || + hdr->subpkt.length == 0) { + pr_err("Bad header length. cp: %d, pkt: %d, subpkt: %d\n", + be32_to_cpu(hdr->cp.length), + be32_to_cpu(hdr->pkt.length), + be32_to_cpu(hdr->subpkt.length)); + print_buffer(pos, sizeof(*hdr)); + return -EINVAL; + } + + if (pos > buf + length) + return -EFAULT; + + iter = resp->toks; + total = be32_to_cpu(hdr->subpkt.length); + print_buffer(pos, total); + while (total > 0) { + if (pos[0] <= TINY_ATOM_BYTE) /* tiny atom */ + token_length = response_parse_tiny(iter, pos); + else if (pos[0] <= SHORT_ATOM_BYTE) /* short atom */ + token_length = response_parse_short(iter, pos); + else if (pos[0] <= MEDIUM_ATOM_BYTE) /* medium atom */ + token_length = response_parse_medium(iter, pos); + else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */ + token_length = response_parse_long(iter, pos); + else /* TOKEN */ + token_length = response_parse_token(iter, pos); + + if (token_length == -EINVAL) + return -EINVAL; + + pos += token_length; + total -= token_length; + iter++; + num_entries++; + } + + if (num_entries == 0) { + pr_err("Couldn't parse response.\n"); + return -EINVAL; + } + resp->num = num_entries; + + return 0; +} + +static size_t response_get_string(const struct parsed_resp *resp, int n, + const char **store) +{ + *store = NULL; + if (!resp) { + pr_err("Response is NULL\n"); + return 0; + } + + if (n > resp->num) { + pr_err("Response has %d tokens. Can't access %d\n", + resp->num, n); + return 0; + } + + if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) { + pr_err("Token is not a byte string!\n"); + return 0; + } + + *store = resp->toks[n].pos + 1; + return resp->toks[n].len - 1; +} + +static u64 response_get_u64(const struct parsed_resp *resp, int n) +{ + if (!resp) { + pr_err("Response is NULL\n"); + return 0; + } + + if (n > resp->num) { + pr_err("Response has %d tokens. Can't access %d\n", + resp->num, n); + return 0; + } + + if (resp->toks[n].type != OPAL_DTA_TOKENID_UINT) { + pr_err("Token is not unsigned it: %d\n", + resp->toks[n].type); + return 0; + } + + if (!(resp->toks[n].width == OPAL_WIDTH_TINY || + resp->toks[n].width == OPAL_WIDTH_SHORT)) { + pr_err("Atom is not short or tiny: %d\n", + resp->toks[n].width); + return 0; + } + + return resp->toks[n].stored.u; +} + +static u8 response_status(const struct parsed_resp *resp) +{ + if (token_type(resp, 0) == OPAL_DTA_TOKENID_TOKEN && + response_get_token(resp, 0) == OPAL_ENDOFSESSION) { + return 0; + } + + if (resp->num < 5) + return DTAERROR_NO_METHOD_STATUS; + + if (token_type(resp, resp->num - 1) != OPAL_DTA_TOKENID_TOKEN || + token_type(resp, resp->num - 5) != OPAL_DTA_TOKENID_TOKEN || + response_get_token(resp, resp->num - 1) != OPAL_ENDLIST || + response_get_token(resp, resp->num - 5) != OPAL_STARTLIST) + return DTAERROR_NO_METHOD_STATUS; + + return response_get_u64(resp, resp->num - 4); +} + +/* Parses and checks for errors */ +static int parse_and_check_status(struct opal_dev *dev) +{ + int error; + + print_buffer(dev->cmd, dev->pos); + + error = response_parse(dev->resp, IO_BUFFER_LENGTH, &dev->parsed); + if (error) { + pr_err("Couldn't parse response.\n"); + return error; + } + + return response_status(&dev->parsed); +} + +static void clear_opal_cmd(struct opal_dev *dev) +{ + dev->pos = sizeof(struct opal_header); + memset(dev->cmd, 0, IO_BUFFER_LENGTH); +} + +static int start_opal_session_cont(struct opal_dev *dev) +{ + u32 hsn, tsn; + int error = 0; + + error = parse_and_check_status(dev); + if (error) + return error; + + hsn = response_get_u64(&dev->parsed, 4); + tsn = response_get_u64(&dev->parsed, 5); + + if (hsn == 0 && tsn == 0) { + pr_err("Couldn't authenticate session\n"); + return -EPERM; + } + + dev->hsn = hsn; + dev->tsn = tsn; + return 0; +} + +static void add_suspend_info(struct opal_dev *dev, + struct opal_suspend_data *sus) +{ + struct opal_suspend_data *iter; + + list_for_each_entry(iter, &dev->unlk_lst, node) { + if (iter->lr == sus->lr) { + list_del(&iter->node); + kfree(iter); + break; + } + } + list_add_tail(&sus->node, &dev->unlk_lst); +} + +static int end_session_cont(struct opal_dev *dev) +{ + dev->hsn = 0; + dev->tsn = 0; + return parse_and_check_status(dev); +} + +static int finalize_and_send(struct opal_dev *dev, cont_fn cont) +{ + int ret; + + ret = cmd_finalize(dev, dev->hsn, dev->tsn); + if (ret) { + pr_err("Error finalizing command buffer: %d\n", ret); + return ret; + } + + print_buffer(dev->cmd, dev->pos); + + return opal_send_recv(dev, cont); +} + +static int gen_key(struct opal_dev *dev) +{ + const u8 *method; + u8 uid[OPAL_UID_LENGTH]; + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); + method = opalmethod[OPAL_GENKEY]; + kfree(dev->prev_data); + dev->prev_data = NULL; + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_GENKEY], + OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error building gen key command\n"); + return err; + + } + return finalize_and_send(dev, parse_and_check_status); +} + +static int get_active_key_cont(struct opal_dev *dev) +{ + const char *activekey; + size_t keylen; + int error = 0; + + error = parse_and_check_status(dev); + if (error) + return error; + keylen = response_get_string(&dev->parsed, 4, &activekey); + if (!activekey) { + pr_err("%s: Couldn't extract the Activekey from the response\n", + __func__); + return OPAL_INVAL_PARAM; + } + dev->prev_data = kmemdup(activekey, keylen, GFP_KERNEL); + + if (!dev->prev_data) + return -ENOMEM; + + dev->prev_d_len = keylen; + + return 0; +} + +static int get_active_key(struct opal_dev *dev) +{ + u8 uid[OPAL_UID_LENGTH]; + int err = 0; + u8 *lr; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + lr = dev->func_data[dev->state]; + + err = build_locking_range(uid, sizeof(uid), *lr); + if (err) + return err; + + err = 0; + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 3); /* startCloumn */ + add_token_u8(&err, dev, 10); /* ActiveKey */ + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 4); /* endColumn */ + add_token_u8(&err, dev, 10); /* ActiveKey */ + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDLIST); + if (err) { + pr_err("Error building get active key command\n"); + return err; + } + + return finalize_and_send(dev, get_active_key_cont); +} + +static int generic_lr_enable_disable(struct opal_dev *dev, + u8 *uid, bool rle, bool wle, + bool rl, bool wl) +{ + int err = 0; + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + add_token_u8(&err, dev, OPAL_STARTLIST); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 5); /* ReadLockEnabled */ + add_token_u8(&err, dev, rle); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 6); /* WriteLockEnabled */ + add_token_u8(&err, dev, wle); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_READLOCKED); + add_token_u8(&err, dev, rl); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_WRITELOCKED); + add_token_u8(&err, dev, wl); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + return err; +} + +static inline int enable_global_lr(struct opal_dev *dev, u8 *uid, + struct opal_user_lr_setup *setup) +{ + int err; + + err = generic_lr_enable_disable(dev, uid, !!setup->RLE, !!setup->WLE, + 0, 0); + if (err) + pr_err("Failed to create enable global lr command\n"); + return err; +} + +static int setup_locking_range(struct opal_dev *dev) +{ + u8 uid[OPAL_UID_LENGTH]; + struct opal_user_lr_setup *setup; + u8 lr; + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + setup = dev->func_data[dev->state]; + lr = setup->session.opal_key.lr; + err = build_locking_range(uid, sizeof(uid), lr); + if (err) + return err; + + if (lr == 0) + err = enable_global_lr(dev, uid, setup); + else { + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_SET], + OPAL_UID_LENGTH); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + add_token_u8(&err, dev, OPAL_STARTLIST); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 3); /* Ranges Start */ + add_token_u64(&err, dev, setup->range_start); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 4); /* Ranges length */ + add_token_u64(&err, dev, setup->range_length); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 5); /*ReadLockEnabled */ + add_token_u64(&err, dev, !!setup->RLE); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 6); /*WriteLockEnabled*/ + add_token_u64(&err, dev, !!setup->WLE); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + } + if (err) { + pr_err("Error building Setup Locking range command.\n"); + return err; + + } + + return finalize_and_send(dev, parse_and_check_status); +} + +static int start_generic_opal_session(struct opal_dev *dev, + enum opal_uid auth, + enum opal_uid sp_type, + const char *key, + u8 key_len) +{ + u32 hsn; + int err = 0; + + if (key == NULL && auth != OPAL_ANYBODY_UID) { + pr_err("%s: Attempted to open ADMIN_SP Session without a Host" \ + "Challenge, and not as the Anybody UID\n", __func__); + return OPAL_INVAL_PARAM; + } + + clear_opal_cmd(dev); + + set_comid(dev, dev->comid); + hsn = GENERIC_HOST_SESSION_NUM; + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID], + OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION], + OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u64(&err, dev, hsn); + add_token_bytestring(&err, dev, opaluid[sp_type], OPAL_UID_LENGTH); + add_token_u8(&err, dev, 1); + + switch (auth) { + case OPAL_ANYBODY_UID: + add_token_u8(&err, dev, OPAL_ENDLIST); + break; + case OPAL_ADMIN1_UID: + case OPAL_SID_UID: + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 0); /* HostChallenge */ + add_token_bytestring(&err, dev, key, key_len); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 3); /* HostSignAuth */ + add_token_bytestring(&err, dev, opaluid[auth], + OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + break; + default: + pr_err("Cannot start Admin SP session with auth %d\n", auth); + return OPAL_INVAL_PARAM; + } + + if (err) { + pr_err("Error building start adminsp session command.\n"); + return err; + } + + return finalize_and_send(dev, start_opal_session_cont); +} + +static int start_anybodyASP_opal_session(struct opal_dev *dev) +{ + return start_generic_opal_session(dev, OPAL_ANYBODY_UID, + OPAL_ADMINSP_UID, NULL, 0); +} + +static int start_SIDASP_opal_session(struct opal_dev *dev) +{ + int ret; + const u8 *key = dev->prev_data; + struct opal_key *okey; + + if (!key) { + okey = dev->func_data[dev->state]; + ret = start_generic_opal_session(dev, OPAL_SID_UID, + OPAL_ADMINSP_UID, + okey->key, + okey->key_len); + } else { + ret = start_generic_opal_session(dev, OPAL_SID_UID, + OPAL_ADMINSP_UID, + key, dev->prev_d_len); + kfree(key); + dev->prev_data = NULL; + } + return ret; +} + +static inline int start_admin1LSP_opal_session(struct opal_dev *dev) +{ + struct opal_key *key = dev->func_data[dev->state]; + + return start_generic_opal_session(dev, OPAL_ADMIN1_UID, + OPAL_LOCKINGSP_UID, + key->key, key->key_len); +} + +static int start_auth_opal_session(struct opal_dev *dev) +{ + u8 lk_ul_user[OPAL_UID_LENGTH]; + int err = 0; + + struct opal_session_info *session = dev->func_data[dev->state]; + size_t keylen = session->opal_key.key_len; + u8 *key = session->opal_key.key; + u32 hsn = GENERIC_HOST_SESSION_NUM; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + if (session->sum) { + err = build_locking_user(lk_ul_user, sizeof(lk_ul_user), + session->opal_key.lr); + if (err) + return err; + + } else if (session->who != OPAL_ADMIN1 && !session->sum) { + err = build_locking_user(lk_ul_user, sizeof(lk_ul_user), + session->who - 1); + if (err) + return err; + } else + memcpy(lk_ul_user, opaluid[OPAL_ADMIN1_UID], OPAL_UID_LENGTH); + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID], + OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION], + OPAL_UID_LENGTH); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u64(&err, dev, hsn); + add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], + OPAL_UID_LENGTH); + add_token_u8(&err, dev, 1); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 0); + add_token_bytestring(&err, dev, key, keylen); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 3); + add_token_bytestring(&err, dev, lk_ul_user, OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error building STARTSESSION command.\n"); + return err; + } + + return finalize_and_send(dev, start_opal_session_cont); +} + +static int revert_tper(struct opal_dev *dev) +{ + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, opaluid[OPAL_ADMINSP_UID], + OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_REVERT], + OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_ENDLIST); + if (err) { + pr_err("Error building REVERT TPER command.\n"); + return err; + } + + return finalize_and_send(dev, parse_and_check_status); +} + +static int internal_activate_user(struct opal_dev *dev) +{ + struct opal_session_info *session = dev->func_data[dev->state]; + u8 uid[OPAL_UID_LENGTH]; + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + memcpy(uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH); + uid[7] = session->who; + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 5); /* Enabled */ + add_token_u8(&err, dev, OPAL_TRUE); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error building Activate UserN command.\n"); + return err; + } + + return finalize_and_send(dev, parse_and_check_status); +} + +static int erase_locking_range(struct opal_dev *dev) +{ + struct opal_session_info *session; + u8 uid[OPAL_UID_LENGTH]; + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + session = dev->func_data[dev->state]; + + if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0) + return -ERANGE; + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_ERASE], + OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error building Erase Locking Range Command.\n"); + return err; + } + return finalize_and_send(dev, parse_and_check_status); +} + +static int set_mbr_done(struct opal_dev *dev) +{ + u8 mbr_done_tf = *(u8 *)dev->func_data[dev->state]; + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL], + OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 2); /* Done */ + add_token_u8(&err, dev, mbr_done_tf); /* Done T or F */ + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error Building set MBR Done command\n"); + return err; + } + + return finalize_and_send(dev, parse_and_check_status); +} + +static int set_mbr_enable_disable(struct opal_dev *dev) +{ + u8 mbr_en_dis = *(u8 *)dev->func_data[dev->state]; + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL], + OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 1); + add_token_u8(&err, dev, mbr_en_dis); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error Building set MBR done command\n"); + return err; + } + + return finalize_and_send(dev, parse_and_check_status); +} + +static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid, + struct opal_dev *dev) +{ + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, cpin_uid, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_SET], + OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 3); /* PIN */ + add_token_bytestring(&err, dev, key, key_len); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + return err; +} + +static int set_new_pw(struct opal_dev *dev) +{ + u8 cpin_uid[OPAL_UID_LENGTH]; + struct opal_session_info *usr = dev->func_data[dev->state]; + + + memcpy(cpin_uid, opaluid[OPAL_C_PIN_ADMIN1], OPAL_UID_LENGTH); + + if (usr->who != OPAL_ADMIN1) { + cpin_uid[5] = 0x03; + if (usr->sum) + cpin_uid[7] = usr->opal_key.lr + 1; + else + cpin_uid[7] = usr->who; + } + + if (generic_pw_cmd(usr->opal_key.key, usr->opal_key.key_len, + cpin_uid, dev)) { + pr_err("Error building set password command.\n"); + return -ERANGE; + } + + return finalize_and_send(dev, parse_and_check_status); +} + +static int set_sid_cpin_pin(struct opal_dev *dev) +{ + u8 cpin_uid[OPAL_UID_LENGTH]; + struct opal_key *key = dev->func_data[dev->state]; + + memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH); + + if (generic_pw_cmd(key->key, key->key_len, cpin_uid, dev)) { + pr_err("Error building Set SID cpin\n"); + return -ERANGE; + } + return finalize_and_send(dev, parse_and_check_status); +} + +static int add_user_to_lr(struct opal_dev *dev) +{ + u8 lr_buffer[OPAL_UID_LENGTH]; + u8 user_uid[OPAL_UID_LENGTH]; + struct opal_lock_unlock *lkul; + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + lkul = dev->func_data[dev->state]; + + memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED], + OPAL_UID_LENGTH); + + if (lkul->l_state == OPAL_RW) + memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_WRLOCKED], + OPAL_UID_LENGTH); + + lr_buffer[7] = lkul->session.opal_key.lr; + + memcpy(user_uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH); + + user_uid[7] = lkul->session.who; + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_SET], + OPAL_UID_LENGTH); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 3); + + add_token_u8(&err, dev, OPAL_STARTLIST); + + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_bytestring(&err, dev, + opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF], + OPAL_UID_LENGTH/2); + add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_ENDNAME); + + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_bytestring(&err, dev, + opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF], + OPAL_UID_LENGTH/2); + add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_ENDNAME); + + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_bytestring(&err, dev, opaluid[OPAL_HALF_UID_BOOLEAN_ACE], + OPAL_UID_LENGTH/2); + add_token_u8(&err, dev, 1); + add_token_u8(&err, dev, OPAL_ENDNAME); + + + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error building add user to locking range command.\n"); + return err; + } + + return finalize_and_send(dev, parse_and_check_status); +} + +static int lock_unlock_locking_range(struct opal_dev *dev) +{ + u8 lr_buffer[OPAL_UID_LENGTH]; + const u8 *method; + struct opal_lock_unlock *lkul; + u8 read_locked = 1, write_locked = 1; + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + method = opalmethod[OPAL_SET]; + lkul = dev->func_data[dev->state]; + if (build_locking_range(lr_buffer, sizeof(lr_buffer), + lkul->session.opal_key.lr) < 0) + return -ERANGE; + + switch (lkul->l_state) { + case OPAL_RO: + read_locked = 0; + write_locked = 1; + break; + case OPAL_RW: + read_locked = 0; + write_locked = 0; + break; + case OPAL_LK: + /* vars are initalized to locked */ + break; + default: + pr_err("Tried to set an invalid locking state... returning to uland\n"); + return OPAL_INVAL_PARAM; + } + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + add_token_u8(&err, dev, OPAL_STARTLIST); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_READLOCKED); + add_token_u8(&err, dev, read_locked); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_WRITELOCKED); + add_token_u8(&err, dev, write_locked); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error building SET command.\n"); + return err; + } + return finalize_and_send(dev, parse_and_check_status); +} + + +static int lock_unlock_locking_range_sum(struct opal_dev *dev) +{ + u8 lr_buffer[OPAL_UID_LENGTH]; + u8 read_locked = 1, write_locked = 1; + const u8 *method; + struct opal_lock_unlock *lkul; + int ret; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + method = opalmethod[OPAL_SET]; + lkul = dev->func_data[dev->state]; + if (build_locking_range(lr_buffer, sizeof(lr_buffer), + lkul->session.opal_key.lr) < 0) + return -ERANGE; + + switch (lkul->l_state) { + case OPAL_RO: + read_locked = 0; + write_locked = 1; + break; + case OPAL_RW: + read_locked = 0; + write_locked = 0; + break; + case OPAL_LK: + /* vars are initalized to locked */ + break; + default: + pr_err("Tried to set an invalid locking state.\n"); + return OPAL_INVAL_PARAM; + } + ret = generic_lr_enable_disable(dev, lr_buffer, 1, 1, + read_locked, write_locked); + + if (ret < 0) { + pr_err("Error building SET command.\n"); + return ret; + } + return finalize_and_send(dev, parse_and_check_status); +} + +static int activate_lsp(struct opal_dev *dev) +{ + struct opal_lr_act *opal_act; + u8 user_lr[OPAL_UID_LENGTH]; + u8 uint_3 = 0x83; + int err = 0, i; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + opal_act = dev->func_data[dev->state]; + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], + OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_ACTIVATE], + OPAL_UID_LENGTH); + + + if (opal_act->sum) { + err = build_locking_range(user_lr, sizeof(user_lr), + opal_act->lr[0]); + if (err) + return err; + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, uint_3); + add_token_u8(&err, dev, 6); + add_token_u8(&err, dev, 0); + add_token_u8(&err, dev, 0); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH); + for (i = 1; i < opal_act->num_lrs; i++) { + user_lr[7] = opal_act->lr[i]; + add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH); + } + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + } else { + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_ENDLIST); + } + + if (err) { + pr_err("Error building Activate LockingSP command.\n"); + return err; + } + + return finalize_and_send(dev, parse_and_check_status); +} + +static int get_lsp_lifecycle_cont(struct opal_dev *dev) +{ + u8 lc_status; + int error = 0; + + error = parse_and_check_status(dev); + if (error) + return error; + + lc_status = response_get_u64(&dev->parsed, 4); + /* 0x08 is Manufacured Inactive */ + /* 0x09 is Manufactured */ + if (lc_status != OPAL_MANUFACTURED_INACTIVE) { + pr_err("Couldn't determine the status of the Lifcycle state\n"); + return -ENODEV; + } + + return 0; +} + +/* Determine if we're in the Manufactured Inactive or Active state */ +static int get_lsp_lifecycle(struct opal_dev *dev) +{ + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], + OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTLIST); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 3); /* Start Column */ + add_token_u8(&err, dev, 6); /* Lifecycle Column */ + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 4); /* End Column */ + add_token_u8(&err, dev, 6); /* Lifecycle Column */ + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error Building GET Lifecycle Status command\n"); + return err; + } + + return finalize_and_send(dev, get_lsp_lifecycle_cont); +} + +static int get_msid_cpin_pin_cont(struct opal_dev *dev) +{ + const char *msid_pin; + size_t strlen; + int error = 0; + + error = parse_and_check_status(dev); + if (error) + return error; + + strlen = response_get_string(&dev->parsed, 4, &msid_pin); + if (!msid_pin) { + pr_err("%s: Couldn't extract PIN from response\n", __func__); + return OPAL_INVAL_PARAM; + } + + dev->prev_data = kmemdup(msid_pin, strlen, GFP_KERNEL); + if (!dev->prev_data) + return -ENOMEM; + + dev->prev_d_len = strlen; + + return 0; +} + +static int get_msid_cpin_pin(struct opal_dev *dev) +{ + int err = 0; + + clear_opal_cmd(dev); + set_comid(dev, dev->comid); + + + add_token_u8(&err, dev, OPAL_CALL); + add_token_bytestring(&err, dev, opaluid[OPAL_C_PIN_MSID], + OPAL_UID_LENGTH); + add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTLIST); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 3); /* Start Column */ + add_token_u8(&err, dev, 3); /* PIN */ + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, 4); /* End Column */ + add_token_u8(&err, dev, 3); /* Lifecycle Column */ + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_ENDLIST); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_err("Error building Get MSID CPIN PIN command.\n"); + return err; + } + + return finalize_and_send(dev, get_msid_cpin_pin_cont); +} + +static int build_end_opal_session(struct opal_dev *dev) +{ + int err = 0; + + clear_opal_cmd(dev); + + set_comid(dev, dev->comid); + add_token_u8(&err, dev, OPAL_ENDOFSESSION); + return err; +} + +static int end_opal_session(struct opal_dev *dev) +{ + int ret = build_end_opal_session(dev); + + if (ret < 0) + return ret; + return finalize_and_send(dev, end_session_cont); +} + +static int end_opal_session_error(struct opal_dev *dev) +{ + const opal_step error_end_session[] = { + end_opal_session, + NULL, + }; + dev->funcs = error_end_session; + dev->state = 0; + return next(dev); +} + +static inline void setup_opal_dev(struct opal_dev *dev, + const opal_step *funcs) +{ + dev->state = 0; + dev->funcs = funcs; + dev->tsn = 0; + dev->hsn = 0; + dev->func_data = NULL; + dev->prev_data = NULL; +} + +static int check_opal_support(struct opal_dev *dev) +{ + static const opal_step funcs[] = { + opal_discovery0, + NULL + }; + int ret; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, funcs); + ret = next(dev); + dev->supported = !ret; + mutex_unlock(&dev->dev_lock); + return ret; +} + +struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv) +{ + struct opal_dev *dev; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + + INIT_LIST_HEAD(&dev->unlk_lst); + mutex_init(&dev->dev_lock); + dev->data = data; + dev->send_recv = send_recv; + if (check_opal_support(dev) != 0) { + pr_debug("Opal is not supported on this device\n"); + kfree(dev); + return NULL; + } + return dev; +} +EXPORT_SYMBOL(init_opal_dev); + +static int opal_secure_erase_locking_range(struct opal_dev *dev, + struct opal_session_info *opal_session) +{ + void *data[3] = { NULL }; + static const opal_step erase_funcs[] = { + opal_discovery0, + start_auth_opal_session, + get_active_key, + gen_key, + end_opal_session, + NULL, + }; + int ret; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, erase_funcs); + + dev->func_data = data; + dev->func_data[1] = opal_session; + dev->func_data[2] = &opal_session->opal_key.lr; + + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_erase_locking_range(struct opal_dev *dev, + struct opal_session_info *opal_session) +{ + void *data[3] = { NULL }; + static const opal_step erase_funcs[] = { + opal_discovery0, + start_auth_opal_session, + erase_locking_range, + end_opal_session, + NULL, + }; + int ret; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, erase_funcs); + + dev->func_data = data; + dev->func_data[1] = opal_session; + dev->func_data[2] = opal_session; + + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, + struct opal_mbr_data *opal_mbr) +{ + void *func_data[6] = { NULL }; + static const opal_step mbr_funcs[] = { + opal_discovery0, + start_admin1LSP_opal_session, + set_mbr_done, + end_opal_session, + start_admin1LSP_opal_session, + set_mbr_enable_disable, + end_opal_session, + NULL, + }; + int ret; + + if (opal_mbr->enable_disable != OPAL_MBR_ENABLE && + opal_mbr->enable_disable != OPAL_MBR_DISABLE) + return -EINVAL; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, mbr_funcs); + dev->func_data = func_data; + dev->func_data[1] = &opal_mbr->key; + dev->func_data[2] = &opal_mbr->enable_disable; + dev->func_data[4] = &opal_mbr->key; + dev->func_data[5] = &opal_mbr->enable_disable; + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) +{ + struct opal_suspend_data *suspend; + + suspend = kzalloc(sizeof(*suspend), GFP_KERNEL); + if (!suspend) + return -ENOMEM; + + suspend->unlk = *lk_unlk; + suspend->lr = lk_unlk->session.opal_key.lr; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, NULL); + add_suspend_info(dev, suspend); + mutex_unlock(&dev->dev_lock); + return 0; +} + +static int opal_add_user_to_lr(struct opal_dev *dev, + struct opal_lock_unlock *lk_unlk) +{ + void *func_data[3] = { NULL }; + static const opal_step funcs[] = { + opal_discovery0, + start_admin1LSP_opal_session, + add_user_to_lr, + end_opal_session, + NULL + }; + int ret; + + if (lk_unlk->l_state != OPAL_RO && + lk_unlk->l_state != OPAL_RW) { + pr_err("Locking state was not RO or RW\n"); + return -EINVAL; + } + if (lk_unlk->session.who < OPAL_USER1 && + lk_unlk->session.who > OPAL_USER9) { + pr_err("Authority was not within the range of users: %d\n", + lk_unlk->session.who); + return -EINVAL; + } + if (lk_unlk->session.sum) { + pr_err("%s not supported in sum. Use setup locking range\n", + __func__); + return -EINVAL; + } + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, funcs); + dev->func_data = func_data; + dev->func_data[1] = &lk_unlk->session.opal_key; + dev->func_data[2] = lk_unlk; + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal) +{ + void *data[2] = { NULL }; + static const opal_step revert_funcs[] = { + opal_discovery0, + start_SIDASP_opal_session, + revert_tper, /* controller will terminate session */ + NULL, + }; + int ret; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, revert_funcs); + dev->func_data = data; + dev->func_data[1] = opal; + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int __opal_lock_unlock_sum(struct opal_dev *dev) +{ + static const opal_step ulk_funcs_sum[] = { + opal_discovery0, + start_auth_opal_session, + lock_unlock_locking_range_sum, + end_opal_session, + NULL + }; + + dev->funcs = ulk_funcs_sum; + return next(dev); +} + +static int __opal_lock_unlock(struct opal_dev *dev) +{ + static const opal_step _unlock_funcs[] = { + opal_discovery0, + start_auth_opal_session, + lock_unlock_locking_range, + end_opal_session, + NULL + }; + + dev->funcs = _unlock_funcs; + return next(dev); +} + +static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) +{ + void *func_data[3] = { NULL }; + int ret; + + if (lk_unlk->session.who < OPAL_ADMIN1 || + lk_unlk->session.who > OPAL_USER9) + return -EINVAL; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, NULL); + dev->func_data = func_data; + dev->func_data[1] = &lk_unlk->session; + dev->func_data[2] = lk_unlk; + + if (lk_unlk->session.sum) + ret = __opal_lock_unlock_sum(dev); + else + ret = __opal_lock_unlock(dev); + + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) +{ + static const opal_step owner_funcs[] = { + opal_discovery0, + start_anybodyASP_opal_session, + get_msid_cpin_pin, + end_opal_session, + start_SIDASP_opal_session, + set_sid_cpin_pin, + end_opal_session, + NULL + }; + void *data[6] = { NULL }; + int ret; + + if (!dev) + return -ENODEV; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, owner_funcs); + dev->func_data = data; + dev->func_data[4] = opal; + dev->func_data[5] = opal; + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_act) +{ + void *data[4] = { NULL }; + static const opal_step active_funcs[] = { + opal_discovery0, + start_SIDASP_opal_session, /* Open session as SID auth */ + get_lsp_lifecycle, + activate_lsp, + end_opal_session, + NULL + }; + int ret; + + if (!opal_lr_act->num_lrs || opal_lr_act->num_lrs > OPAL_MAX_LRS) + return -EINVAL; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, active_funcs); + dev->func_data = data; + dev->func_data[1] = &opal_lr_act->key; + dev->func_data[3] = opal_lr_act; + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_setup_locking_range(struct opal_dev *dev, + struct opal_user_lr_setup *opal_lrs) +{ + void *data[3] = { NULL }; + static const opal_step lr_funcs[] = { + opal_discovery0, + start_auth_opal_session, + setup_locking_range, + end_opal_session, + NULL, + }; + int ret; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, lr_funcs); + dev->func_data = data; + dev->func_data[1] = &opal_lrs->session; + dev->func_data[2] = opal_lrs; + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) +{ + static const opal_step pw_funcs[] = { + opal_discovery0, + start_auth_opal_session, + set_new_pw, + end_opal_session, + NULL + }; + void *data[3] = { NULL }; + int ret; + + if (opal_pw->session.who < OPAL_ADMIN1 || + opal_pw->session.who > OPAL_USER9 || + opal_pw->new_user_pw.who < OPAL_ADMIN1 || + opal_pw->new_user_pw.who > OPAL_USER9) + return -EINVAL; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, pw_funcs); + dev->func_data = data; + dev->func_data[1] = (void *) &opal_pw->session; + dev->func_data[2] = (void *) &opal_pw->new_user_pw; + + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +static int opal_activate_user(struct opal_dev *dev, + struct opal_session_info *opal_session) +{ + static const opal_step act_funcs[] = { + opal_discovery0, + start_admin1LSP_opal_session, + internal_activate_user, + end_opal_session, + NULL + }; + void *data[3] = { NULL }; + int ret; + + /* We can't activate Admin1 it's active as manufactured */ + if (opal_session->who < OPAL_USER1 && + opal_session->who > OPAL_USER9) { + pr_err("Who was not a valid user: %d\n", opal_session->who); + return -EINVAL; + } + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, act_funcs); + dev->func_data = data; + dev->func_data[1] = &opal_session->opal_key; + dev->func_data[2] = opal_session; + ret = next(dev); + mutex_unlock(&dev->dev_lock); + return ret; +} + +bool opal_unlock_from_suspend(struct opal_dev *dev) +{ + struct opal_suspend_data *suspend; + void *func_data[3] = { NULL }; + bool was_failure = false; + int ret = 0; + + if (!dev) + return false; + if (!dev->supported) + return false; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev, NULL); + dev->func_data = func_data; + + list_for_each_entry(suspend, &dev->unlk_lst, node) { + dev->state = 0; + dev->func_data[1] = &suspend->unlk.session; + dev->func_data[2] = &suspend->unlk; + dev->tsn = 0; + dev->hsn = 0; + + if (suspend->unlk.session.sum) + ret = __opal_lock_unlock_sum(dev); + else + ret = __opal_lock_unlock(dev); + if (ret) { + pr_warn("Failed to unlock LR %hhu with sum %d\n", + suspend->unlk.session.opal_key.lr, + suspend->unlk.session.sum); + was_failure = true; + } + } + mutex_unlock(&dev->dev_lock); + return was_failure; +} +EXPORT_SYMBOL(opal_unlock_from_suspend); + +int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) +{ + void *p; + int ret = -ENOTTY; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!dev) + return -ENOTSUPP; + if (!dev->supported) { + pr_err("Not supported\n"); + return -ENOTSUPP; + } + + p = memdup_user(arg, _IOC_SIZE(cmd)); + if (IS_ERR(p)) + return PTR_ERR(p); + + switch (cmd) { + case IOC_OPAL_SAVE: + ret = opal_save(dev, p); + break; + case IOC_OPAL_LOCK_UNLOCK: + ret = opal_lock_unlock(dev, p); + break; + case IOC_OPAL_TAKE_OWNERSHIP: + ret = opal_take_ownership(dev, p); + break; + case IOC_OPAL_ACTIVATE_LSP: + ret = opal_activate_lsp(dev, p); + break; + case IOC_OPAL_SET_PW: + ret = opal_set_new_pw(dev, p); + break; + case IOC_OPAL_ACTIVATE_USR: + ret = opal_activate_user(dev, p); + break; + case IOC_OPAL_REVERT_TPR: + ret = opal_reverttper(dev, p); + break; + case IOC_OPAL_LR_SETUP: + ret = opal_setup_locking_range(dev, p); + break; + case IOC_OPAL_ADD_USR_TO_LR: + ret = opal_add_user_to_lr(dev, p); + break; + case IOC_OPAL_ENABLE_DISABLE_MBR: + ret = opal_enable_disable_shadow_mbr(dev, p); + break; + case IOC_OPAL_ERASE_LR: + ret = opal_erase_locking_range(dev, p); + break; + case IOC_OPAL_SECURE_ERASE_LR: + ret = opal_secure_erase_locking_range(dev, p); + break; + default: + pr_warn("No such Opal Ioctl %u\n", cmd); + } + + kfree(p); + return ret; +} +EXPORT_SYMBOL_GPL(sed_ioctl); diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1f863e7..c771d4c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1265,13 +1265,13 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev) */ static int atapi_drain_needed(struct request *rq) { - if (likely(rq->cmd_type != REQ_TYPE_BLOCK_PC)) + if (likely(!blk_rq_is_passthrough(rq))) return 0; if (!blk_rq_bytes(rq) || op_is_write(req_op(rq))) return 0; - return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC; + return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC; } static int ata_scsi_dev_config(struct scsi_device *sdev, diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 223ff2f..f744de7 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -69,6 +69,7 @@ config AMIGA_Z2RAM config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST + select BLK_SCSI_REQUEST # only for the generic cdrom code help A standard SEGA Dreamcast comes with a modified CD ROM drive called a "GD-ROM" by SEGA to signify it is capable of reading special disks @@ -114,6 +115,7 @@ config BLK_CPQ_CISS_DA tristate "Compaq Smart Array 5xxx support" depends on PCI select CHECK_SIGNATURE + select BLK_SCSI_REQUEST help This is the driver for Compaq Smart Array 5xxx controllers. Everyone using these boards should say Y here. @@ -386,6 +388,7 @@ config BLK_DEV_RAM_DAX config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media (DEPRECATED)" depends on !UML + select BLK_SCSI_REQUEST help Note: This driver is deprecated and will be removed from the kernel in the near future! @@ -501,6 +504,16 @@ config VIRTIO_BLK This is the virtual block driver for virtio. It can be used with lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. +config VIRTIO_BLK_SCSI + bool "SCSI passthrough request for the Virtio block driver" + depends on VIRTIO_BLK + select BLK_SCSI_REQUEST + ---help--- + Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on + virtio-blk devices. This is only supported for the legacy + virtio protocol and not enabled by default by any hypervisor. + Your probably want to virtio-scsi instead. + config BLK_DEV_HD bool "Very old hard disk (MFM/RLL/IDE) driver" depends on HAVE_IDE diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index ec9d861..027b876 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -396,8 +396,8 @@ aoeblk_gdalloc(void *vp) WARN_ON(d->gd); WARN_ON(d->flags & DEVFL_UP); blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); - q->backing_dev_info.name = "aoe"; - q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_SIZE; + q->backing_dev_info->name = "aoe"; + q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE; d->bufpool = mp; d->blkq = gd->queue = q; q->queuedata = d; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index e5c5b8e..27d6137 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -52,6 +52,7 @@ #include <scsi/scsi.h> #include <scsi/sg.h> #include <scsi/scsi_ioctl.h> +#include <scsi/scsi_request.h> #include <linux/cdrom.h> #include <linux/scatterlist.h> #include <linux/kthread.h> @@ -1853,8 +1854,8 @@ static void cciss_softirq_done(struct request *rq) dev_dbg(&h->pdev->dev, "Done with %p\n", rq); /* set the residual count for pc requests */ - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) - rq->resid_len = c->err_info->ResidualCnt; + if (blk_rq_is_passthrough(rq)) + scsi_req(rq)->resid_len = c->err_info->ResidualCnt; blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO); @@ -1941,9 +1942,16 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol, static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, int drv_index) { - disk->queue = blk_init_queue(do_cciss_request, &h->lock); + disk->queue = blk_alloc_queue(GFP_KERNEL); if (!disk->queue) goto init_queue_failure; + + disk->queue->cmd_size = sizeof(struct scsi_request); + disk->queue->request_fn = do_cciss_request; + disk->queue->queue_lock = &h->lock; + if (blk_init_allocated_queue(disk->queue) < 0) + goto cleanup_queue; + sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index); disk->major = h->major; disk->first_minor = drv_index << NWD_SHIFT; @@ -3075,7 +3083,7 @@ static inline int evaluate_target_status(ctlr_info_t *h, driver_byte = DRIVER_OK; msg_byte = cmd->err_info->CommandStatus; /* correct? seems too device specific */ - if (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) + if (blk_rq_is_passthrough(cmd->rq)) host_byte = DID_PASSTHROUGH; else host_byte = DID_OK; @@ -3084,7 +3092,7 @@ static inline int evaluate_target_status(ctlr_info_t *h, host_byte, driver_byte); if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) { - if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) + if (!blk_rq_is_passthrough(cmd->rq)) dev_warn(&h->pdev->dev, "cmd %p " "has SCSI Status 0x%x\n", cmd, cmd->err_info->ScsiStatus); @@ -3095,31 +3103,23 @@ static inline int evaluate_target_status(ctlr_info_t *h, sense_key = 0xf & cmd->err_info->SenseInfo[2]; /* no status or recovered error */ if (((sense_key == 0x0) || (sense_key == 0x1)) && - (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC)) + !blk_rq_is_passthrough(cmd->rq)) error_value = 0; if (check_for_unit_attention(h, cmd)) { - *retry_cmd = !(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC); + *retry_cmd = !blk_rq_is_passthrough(cmd->rq); return 0; } /* Not SG_IO or similar? */ - if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) { + if (!blk_rq_is_passthrough(cmd->rq)) { if (error_value != 0) dev_warn(&h->pdev->dev, "cmd %p has CHECK CONDITION" " sense key = 0x%x\n", cmd, sense_key); return error_value; } - /* SG_IO or similar, copy sense data back */ - if (cmd->rq->sense) { - if (cmd->rq->sense_len > cmd->err_info->SenseLen) - cmd->rq->sense_len = cmd->err_info->SenseLen; - memcpy(cmd->rq->sense, cmd->err_info->SenseInfo, - cmd->rq->sense_len); - } else - cmd->rq->sense_len = 0; - + scsi_req(cmd->rq)->sense_len = cmd->err_info->SenseLen; return error_value; } @@ -3146,15 +3146,14 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, rq->errors = evaluate_target_status(h, cmd, &retry_cmd); break; case CMD_DATA_UNDERRUN: - if (cmd->rq->cmd_type == REQ_TYPE_FS) { + if (!blk_rq_is_passthrough(cmd->rq)) { dev_warn(&h->pdev->dev, "cmd %p has" " completed with data underrun " "reported\n", cmd); - cmd->rq->resid_len = cmd->err_info->ResidualCnt; } break; case CMD_DATA_OVERRUN: - if (cmd->rq->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(cmd->rq)) dev_warn(&h->pdev->dev, "cciss: cmd %p has" " completed with data overrun " "reported\n", cmd); @@ -3164,7 +3163,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "reported invalid\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_PROTOCOL_ERR: @@ -3172,7 +3171,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "protocol error\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_HARDWARE_ERR: @@ -3180,7 +3179,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, " hardware error\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_CONNECTION_LOST: @@ -3188,7 +3187,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "connection lost\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_ABORTED: @@ -3196,7 +3195,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "aborted\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); break; case CMD_ABORT_FAILED: @@ -3204,7 +3203,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "abort failed\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_UNSOLICITED_ABORT: @@ -3219,21 +3218,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, "%p retried too many times\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); break; case CMD_TIMEOUT: dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_UNABORTABLE: dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; default: @@ -3242,7 +3241,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, cmd->err_info->CommandStatus); rq->errors = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, - (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); } @@ -3395,7 +3394,9 @@ static void do_cciss_request(struct request_queue *q) c->Header.SGList = h->max_cmd_sgentries; set_performant_mode(h, c); - if (likely(creq->cmd_type == REQ_TYPE_FS)) { + switch (req_op(creq)) { + case REQ_OP_READ: + case REQ_OP_WRITE: if(h->cciss_read == CCISS_READ_10) { c->Request.CDB[1] = 0; c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */ @@ -3425,12 +3426,16 @@ static void do_cciss_request(struct request_queue *q) c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff; c->Request.CDB[14] = c->Request.CDB[15] = 0; } - } else if (creq->cmd_type == REQ_TYPE_BLOCK_PC) { - c->Request.CDBLen = creq->cmd_len; - memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB); - } else { + break; + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: + c->Request.CDBLen = scsi_req(creq)->cmd_len; + memcpy(c->Request.CDB, scsi_req(creq)->cmd, BLK_MAX_CDB); + scsi_req(creq)->sense = c->err_info->SenseInfo; + break; + default: dev_warn(&h->pdev->dev, "bad request type %d\n", - creq->cmd_type); + creq->cmd_flags); BUG(); } @@ -4074,41 +4079,27 @@ clean_up: static void cciss_interrupt_mode(ctlr_info_t *h) { -#ifdef CONFIG_PCI_MSI - int err; - struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1}, - {0, 2}, {0, 3} - }; + int ret; /* Some boards advertise MSI but don't really support it */ if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11)) goto default_int_mode; - if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { - err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4); - if (!err) { - h->intr[0] = cciss_msix_entries[0].vector; - h->intr[1] = cciss_msix_entries[1].vector; - h->intr[2] = cciss_msix_entries[2].vector; - h->intr[3] = cciss_msix_entries[3].vector; - h->msix_vector = 1; - return; - } else { - dev_warn(&h->pdev->dev, - "MSI-X init failed %d\n", err); - } - } - if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) { - if (!pci_enable_msi(h->pdev)) - h->msi_vector = 1; - else - dev_warn(&h->pdev->dev, "MSI init failed\n"); + ret = pci_alloc_irq_vectors(h->pdev, 4, 4, PCI_IRQ_MSIX); + if (ret >= 0) { + h->intr[0] = pci_irq_vector(h->pdev, 0); + h->intr[1] = pci_irq_vector(h->pdev, 1); + h->intr[2] = pci_irq_vector(h->pdev, 2); + h->intr[3] = pci_irq_vector(h->pdev, 3); + return; } + + ret = pci_alloc_irq_vectors(h->pdev, 1, 1, PCI_IRQ_MSI); + default_int_mode: -#endif /* CONFIG_PCI_MSI */ /* if we get here we're going to use the default interrupt mode */ - h->intr[h->intr_mode] = h->pdev->irq; + h->intr[h->intr_mode] = pci_irq_vector(h->pdev, 0); return; } @@ -4888,7 +4879,7 @@ static int cciss_request_irq(ctlr_info_t *h, irqreturn_t (*msixhandler)(int, void *), irqreturn_t (*intxhandler)(int, void *)) { - if (h->msix_vector || h->msi_vector) { + if (h->pdev->msi_enabled || h->pdev->msix_enabled) { if (!request_irq(h->intr[h->intr_mode], msixhandler, 0, h->devname, h)) return 0; @@ -4934,12 +4925,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h) int ctlr = h->ctlr; free_irq(h->intr[h->intr_mode], h); -#ifdef CONFIG_PCI_MSI - if (h->msix_vector) - pci_disable_msix(h->pdev); - else if (h->msi_vector) - pci_disable_msi(h->pdev); -#endif /* CONFIG_PCI_MSI */ + pci_free_irq_vectors(h->pdev); cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); cciss_free_scatterlists(h); cciss_free_cmd_pool(h); @@ -5295,12 +5281,7 @@ static void cciss_remove_one(struct pci_dev *pdev) cciss_shutdown(pdev); -#ifdef CONFIG_PCI_MSI - if (h->msix_vector) - pci_disable_msix(h->pdev); - else if (h->msi_vector) - pci_disable_msi(h->pdev); -#endif /* CONFIG_PCI_MSI */ + pci_free_irq_vectors(h->pdev); iounmap(h->transtable); iounmap(h->cfgtable); diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 7fda30e..4affa94 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -90,8 +90,6 @@ struct ctlr_info # define SIMPLE_MODE_INT 2 # define MEMQ_MODE_INT 3 unsigned int intr[4]; - unsigned int msix_vector; - unsigned int msi_vector; int intr_mode; int cciss_max_sectors; BYTE cciss_read; @@ -333,7 +331,7 @@ static unsigned long SA5_performant_completed(ctlr_info_t *h) */ register_value = readl(h->vaddr + SA5_OUTDB_STATUS); /* msi auto clears the interrupt pending bit. */ - if (!(h->msi_vector || h->msix_vector)) { + if (!(h->pdev->msi_enabled || h->pdev->msix_enabled)) { writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR); /* Do a read in order to flush the write to the controller * (as per spec.) @@ -393,7 +391,7 @@ static bool SA5_performant_intr_pending(ctlr_info_t *h) if (!register_value) return false; - if (h->msi_vector || h->msix_vector) + if (h->pdev->msi_enabled || h->pdev->msix_enabled) return true; /* Read outbound doorbell to flush */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c3ff60c..615e5b5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2462,7 +2462,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) if (get_ldev(device)) { q = bdev_get_queue(device->ldev->backing_bdev); - r = bdi_congested(&q->backing_dev_info, bdi_bits); + r = bdi_congested(q->backing_dev_info, bdi_bits); put_ldev(device); if (r) reason = 'b'; @@ -2834,8 +2834,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig /* we have no partitions. we contain only ourselves. */ device->this_bdev->bd_contains = device->this_bdev; - q->backing_dev_info.congested_fn = drbd_congested; - q->backing_dev_info.congested_data = device; + q->backing_dev_info->congested_fn = drbd_congested; + q->backing_dev_info->congested_data = device; blk_queue_make_request(q, drbd_make_request); blk_queue_write_cache(q, true, true); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f35db29..908c704 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1328,11 +1328,13 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi if (b) { blk_queue_stack_limits(q, b); - if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { + if (q->backing_dev_info->ra_pages != + b->backing_dev_info->ra_pages) { drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", - q->backing_dev_info.ra_pages, - b->backing_dev_info.ra_pages); - q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; + q->backing_dev_info->ra_pages, + b->backing_dev_info->ra_pages); + q->backing_dev_info->ra_pages = + b->backing_dev_info->ra_pages; } } fixup_discard_if_not_supported(q); @@ -3345,7 +3347,7 @@ static void device_to_statistics(struct device_statistics *s, s->dev_disk_flags = md->flags; q = bdev_get_queue(device->ldev->backing_bdev); s->dev_lower_blocked = - bdi_congested(&q->backing_dev_info, + bdi_congested(q->backing_dev_info, (1 << WB_async_congested) | (1 << WB_sync_congested)); put_ldev(device); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index be2b93f..8378142 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -288,7 +288,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { /* reset device->congestion_reason */ - bdi_rw_congested(&device->rq_queue->backing_dev_info); + bdi_rw_congested(device->rq_queue->backing_dev_info); nc = rcu_dereference(first_peer_device(device)->connection->net_conf); wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b489ac2..652114a 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -927,7 +927,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se switch (rbm) { case RB_CONGESTED_REMOTE: - bdi = &device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; + bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; return bdi_read_congested(bdi); case RB_LEAST_PENDING: return atomic_read(&device->local_cnt) > diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a391a3c..45b4384 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2900,8 +2900,8 @@ static void do_fd_request(struct request_queue *q) return; if (WARN(atomic_read(&usage_count) == 0, - "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n", - current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, + "warning: usage count=0, current_req=%p sect=%ld flags=%llx\n", + current_req, (long)blk_rq_pos(current_req), (unsigned long long) current_req->cmd_flags)) return; @@ -3119,7 +3119,7 @@ static int raw_cmd_copyin(int cmd, void __user *param, *rcmd = NULL; loop: - ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_USER); + ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL); if (!ptr) return -ENOMEM; *rcmd = ptr; diff --git a/drivers/block/hd.c b/drivers/block/hd.c index a9b48ed..6043648 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -626,30 +626,29 @@ repeat: req_data_dir(req) == READ ? "read" : "writ", cyl, head, sec, nsect, bio_data(req->bio)); #endif - if (req->cmd_type == REQ_TYPE_FS) { - switch (rq_data_dir(req)) { - case READ: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, - &read_intr); - if (reset) - goto repeat; - break; - case WRITE: - hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, - &write_intr); - if (reset) - goto repeat; - if (wait_DRQ()) { - bad_rw_intr(); - goto repeat; - } - outsw(HD_DATA, bio_data(req->bio), 256); - break; - default: - printk("unknown hd-command\n"); - hd_end_request_cur(-EIO); - break; + + switch (req_op(req)) { + case REQ_OP_READ: + hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, + &read_intr); + if (reset) + goto repeat; + break; + case REQ_OP_WRITE: + hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, + &write_intr); + if (reset) + goto repeat; + if (wait_DRQ()) { + bad_rw_intr(); + goto repeat; } + outsw(HD_DATA, bio_data(req->bio), 256); + break; + default: + printk("unknown hd-command\n"); + hd_end_request_cur(-EIO); + break; } } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f347285..3043771 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1097,9 +1097,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; + /* I/O need to be drained during transfer transition */ + blk_mq_freeze_queue(lo->lo_queue); + err = loop_release_xfer(lo); if (err) - return err; + goto exit; if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; @@ -1114,12 +1117,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) err = loop_init_xfer(lo, xfer, info); if (err) - return err; + goto exit; if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) - return -EFBIG; + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { + err = -EFBIG; + goto exit; + } loop_config_discard(lo); @@ -1156,7 +1161,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); - return 0; + exit: + blk_mq_unfreeze_queue(lo->lo_queue); + return err; } static int diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index e937fcf7..286f276 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -670,15 +670,17 @@ static void mg_request_poll(struct request_queue *q) break; } - if (unlikely(host->req->cmd_type != REQ_TYPE_FS)) { - mg_end_request_cur(host, -EIO); - continue; - } - - if (rq_data_dir(host->req) == READ) + switch (req_op(host->req)) { + case REQ_OP_READ: mg_read(host->req); - else + break; + case REQ_OP_WRITE: mg_write(host->req); + break; + default: + mg_end_request_cur(host, -EIO); + break; + } } } @@ -687,13 +689,15 @@ static unsigned int mg_issue_req(struct request *req, unsigned int sect_num, unsigned int sect_cnt) { - if (rq_data_dir(req) == READ) { + switch (req_op(host->req)) { + case REQ_OP_READ: if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) != MG_ERR_NONE) { mg_bad_rw_intr(host); return host->error; } - } else { + break; + case REQ_OP_WRITE: /* TODO : handler */ outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr) @@ -712,6 +716,10 @@ static unsigned int mg_issue_req(struct request *req, mod_timer(&host->timer, jiffies + 3 * HZ); outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); + break; + default: + mg_end_request_cur(host, -EIO); + break; } return MG_ERR_NONE; } @@ -753,11 +761,6 @@ static void mg_request(struct request_queue *q) continue; } - if (unlikely(req->cmd_type != REQ_TYPE_FS)) { - mg_end_request_cur(host, -EIO); - continue; - } - if (!mg_issue_req(req, host, sect_num, sect_cnt)) return; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9fd06ee..0be84a3 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -41,6 +41,9 @@ #include <linux/nbd.h> +static DEFINE_IDR(nbd_index_idr); +static DEFINE_MUTEX(nbd_index_mutex); + struct nbd_sock { struct socket *sock; struct mutex tx_lock; @@ -89,8 +92,9 @@ static struct dentry *nbd_dbg_dir; #define NBD_MAGIC 0x68797548 static unsigned int nbds_max = 16; -static struct nbd_device *nbd_dev; static int max_part; +static struct workqueue_struct *recv_workqueue; +static int part_shift; static inline struct device *nbd_to_dev(struct nbd_device *nbd) { @@ -193,13 +197,6 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); req->errors++; - /* - * If our disconnect packet times out then we're already holding the - * config_lock and could deadlock here, so just set an error and return, - * we'll handle shutting everything down later. - */ - if (req->cmd_type == REQ_TYPE_DRV_PRIV) - return BLK_EH_HANDLED; mutex_lock(&nbd->config_lock); sock_shutdown(nbd); mutex_unlock(&nbd->config_lock); @@ -278,14 +275,29 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) u32 type; u32 tag = blk_mq_unique_tag(req); - if (req_op(req) == REQ_OP_DISCARD) + switch (req_op(req)) { + case REQ_OP_DISCARD: type = NBD_CMD_TRIM; - else if (req_op(req) == REQ_OP_FLUSH) + break; + case REQ_OP_FLUSH: type = NBD_CMD_FLUSH; - else if (rq_data_dir(req) == WRITE) + break; + case REQ_OP_WRITE: type = NBD_CMD_WRITE; - else + break; + case REQ_OP_READ: type = NBD_CMD_READ; + break; + default: + return -EIO; + } + + if (rq_data_dir(req) == WRITE && + (nbd->flags & NBD_FLAG_READ_ONLY)) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Write on read-only\n"); + return -EIO; + } memset(&request, 0, sizeof(request)); request.magic = htonl(NBD_REQUEST_MAGIC); @@ -510,18 +522,6 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) goto error_out; } - if (req->cmd_type != REQ_TYPE_FS && - req->cmd_type != REQ_TYPE_DRV_PRIV) - goto error_out; - - if (req->cmd_type == REQ_TYPE_FS && - rq_data_dir(req) == WRITE && - (nbd->flags & NBD_FLAG_READ_ONLY)) { - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Write on read-only\n"); - goto error_out; - } - req->errors = 0; nsock = nbd->socks[index]; @@ -785,7 +785,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, INIT_WORK(&args[i].work, recv_work); args[i].nbd = nbd; args[i].index = i; - queue_work(system_long_wq, &args[i].work); + queue_work(recv_workqueue, &args[i].work); } wait_event_interruptible(nbd->recv_wq, atomic_read(&nbd->recv_threads) == 0); @@ -996,6 +996,103 @@ static struct blk_mq_ops nbd_mq_ops = { .timeout = nbd_xmit_timeout, }; +static void nbd_dev_remove(struct nbd_device *nbd) +{ + struct gendisk *disk = nbd->disk; + nbd->magic = 0; + if (disk) { + del_gendisk(disk); + blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&nbd->tag_set); + put_disk(disk); + } + kfree(nbd); +} + +static int nbd_dev_add(int index) +{ + struct nbd_device *nbd; + struct gendisk *disk; + struct request_queue *q; + int err = -ENOMEM; + + nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL); + if (!nbd) + goto out; + + disk = alloc_disk(1 << part_shift); + if (!disk) + goto out_free_nbd; + + if (index >= 0) { + err = idr_alloc(&nbd_index_idr, nbd, index, index + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); + if (err >= 0) + index = err; + } + if (err < 0) + goto out_free_disk; + + nbd->disk = disk; + nbd->tag_set.ops = &nbd_mq_ops; + nbd->tag_set.nr_hw_queues = 1; + nbd->tag_set.queue_depth = 128; + nbd->tag_set.numa_node = NUMA_NO_NODE; + nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); + nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | + BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; + nbd->tag_set.driver_data = nbd; + + err = blk_mq_alloc_tag_set(&nbd->tag_set); + if (err) + goto out_free_idr; + + q = blk_mq_init_queue(&nbd->tag_set); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto out_free_tags; + } + disk->queue = q; + + /* + * Tell the block layer that we are not a rotational device + */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); + disk->queue->limits.discard_granularity = 512; + blk_queue_max_discard_sectors(disk->queue, UINT_MAX); + disk->queue->limits.discard_zeroes_data = 0; + blk_queue_max_hw_sectors(disk->queue, 65536); + disk->queue->limits.max_sectors = 256; + + nbd->magic = NBD_MAGIC; + mutex_init(&nbd->config_lock); + disk->major = NBD_MAJOR; + disk->first_minor = index << part_shift; + disk->fops = &nbd_fops; + disk->private_data = nbd; + sprintf(disk->disk_name, "nbd%d", index); + init_waitqueue_head(&nbd->recv_wq); + nbd_reset(nbd); + add_disk(disk); + return index; + +out_free_tags: + blk_mq_free_tag_set(&nbd->tag_set); +out_free_idr: + idr_remove(&nbd_index_idr, index); +out_free_disk: + put_disk(disk); +out_free_nbd: + kfree(nbd); +out: + return err; +} + /* * And here should be modules and kernel interface * (Just smiley confuses emacs :-) @@ -1003,9 +1100,7 @@ static struct blk_mq_ops nbd_mq_ops = { static int __init nbd_init(void) { - int err = -ENOMEM; int i; - int part_shift; BUILD_BUG_ON(sizeof(struct nbd_request) != 28); @@ -1034,111 +1129,38 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; - - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); - if (!nbd_dev) + recv_workqueue = alloc_workqueue("knbd-recv", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); + if (!recv_workqueue) return -ENOMEM; - for (i = 0; i < nbds_max; i++) { - struct request_queue *q; - struct gendisk *disk = alloc_disk(1 << part_shift); - if (!disk) - goto out; - nbd_dev[i].disk = disk; - - nbd_dev[i].tag_set.ops = &nbd_mq_ops; - nbd_dev[i].tag_set.nr_hw_queues = 1; - nbd_dev[i].tag_set.queue_depth = 128; - nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE; - nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd); - nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING; - nbd_dev[i].tag_set.driver_data = &nbd_dev[i]; - - err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set); - if (err) { - put_disk(disk); - goto out; - } - - /* - * The new linux 2.5 block layer implementation requires - * every gendisk to have its very own request_queue struct. - * These structs are big so we dynamically allocate them. - */ - q = blk_mq_init_queue(&nbd_dev[i].tag_set); - if (IS_ERR(q)) { - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - put_disk(disk); - goto out; - } - disk->queue = q; - - /* - * Tell the block layer that we are not a rotational device - */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); - disk->queue->limits.discard_granularity = 512; - blk_queue_max_discard_sectors(disk->queue, UINT_MAX); - disk->queue->limits.discard_zeroes_data = 0; - blk_queue_max_hw_sectors(disk->queue, 65536); - disk->queue->limits.max_sectors = 256; - } - - if (register_blkdev(NBD_MAJOR, "nbd")) { - err = -EIO; - goto out; - } - - printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR); + if (register_blkdev(NBD_MAJOR, "nbd")) + return -EIO; nbd_dbg_init(); - for (i = 0; i < nbds_max; i++) { - struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].magic = NBD_MAGIC; - mutex_init(&nbd_dev[i].config_lock); - disk->major = NBD_MAJOR; - disk->first_minor = i << part_shift; - disk->fops = &nbd_fops; - disk->private_data = &nbd_dev[i]; - sprintf(disk->disk_name, "nbd%d", i); - init_waitqueue_head(&nbd_dev[i].recv_wq); - nbd_reset(&nbd_dev[i]); - add_disk(disk); - } + mutex_lock(&nbd_index_mutex); + for (i = 0; i < nbds_max; i++) + nbd_dev_add(i); + mutex_unlock(&nbd_index_mutex); + return 0; +} +static int nbd_exit_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + nbd_dev_remove(nbd); return 0; -out: - while (i--) { - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - blk_cleanup_queue(nbd_dev[i].disk->queue); - put_disk(nbd_dev[i].disk); - } - kfree(nbd_dev); - return err; } static void __exit nbd_cleanup(void) { - int i; - nbd_dbg_close(); - for (i = 0; i < nbds_max; i++) { - struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].magic = 0; - if (disk) { - del_gendisk(disk); - blk_cleanup_queue(disk->queue); - blk_mq_free_tag_set(&nbd_dev[i].tag_set); - put_disk(disk); - } - } + idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL); + idr_destroy(&nbd_index_idr); + destroy_workqueue(recv_workqueue); unregister_blkdev(NBD_MAJOR, "nbd"); - kfree(nbd_dev); - printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR); } module_init(nbd_init); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index c0e14e5..6f2e565 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -420,7 +420,8 @@ static void null_lnvm_end_io(struct request *rq, int error) { struct nvm_rq *rqd = rq->end_io_data; - nvm_end_io(rqd, error); + rqd->error = error; + nvm_end_io(rqd); blk_put_request(rq); } @@ -431,11 +432,11 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) struct request *rq; struct bio *bio = rqd->bio; - rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0); + rq = blk_mq_alloc_request(q, + op_is_write(bio_op(bio)) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return -ENOMEM; - rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->__sector = bio->bi_iter.bi_sector; rq->ioprio = bio_prio(bio); @@ -460,7 +461,6 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) id->ver_id = 0x1; id->vmnt = 0; - id->cgrps = 1; id->cap = 0x2; id->dom = 0x1; @@ -479,7 +479,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) sector_div(size, bs); /* convert size to pages */ size >>= 8; /* concert size to pgs pr blk */ - grp = &id->groups[0]; + grp = &id->grp; grp->mtype = 0; grp->fmtype = 0; grp->num_ch = 1; diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 92900f5..8127b82 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -308,12 +308,6 @@ static void osdblk_rq_fn(struct request_queue *q) if (!rq) break; - /* filter out block requests we don't understand */ - if (rq->cmd_type != REQ_TYPE_FS) { - blk_end_request_all(rq, 0); - continue; - } - /* deduce our operation (read, write, flush) */ /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] * into a clearly defined set of RPC commands: diff --git a/drivers/block/paride/Kconfig b/drivers/block/paride/Kconfig index efefb5a..3a15247 100644 --- a/drivers/block/paride/Kconfig +++ b/drivers/block/paride/Kconfig @@ -25,6 +25,7 @@ config PARIDE_PD config PARIDE_PCD tristate "Parallel port ATAPI CD-ROMs" depends on PARIDE + select BLK_SCSI_REQUEST # only for the generic cdrom code ---help--- This option enables the high-level driver for ATAPI CD-ROM devices connected through a parallel port. If you chose to build PARIDE diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 5fd2d0e..10aed84 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -273,7 +273,7 @@ static const struct block_device_operations pcd_bdops = { .check_events = pcd_block_check_events, }; -static struct cdrom_device_ops pcd_dops = { +static const struct cdrom_device_ops pcd_dops = { .open = pcd_open, .release = pcd_release, .drive_status = pcd_drive_status, diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index c3ed2fc..644ba08 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -439,18 +439,16 @@ static int pd_retries = 0; /* i/o error retry count */ static int pd_block; /* address of next requested block */ static int pd_count; /* number of blocks still to do */ static int pd_run; /* sectors in current cluster */ -static int pd_cmd; /* current command READ/WRITE */ static char *pd_buf; /* buffer for request in progress */ static enum action do_pd_io_start(void) { - if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) { + switch (req_op(pd_req)) { + case REQ_OP_DRV_IN: phase = pd_special; return pd_special(); - } - - pd_cmd = rq_data_dir(pd_req); - if (pd_cmd == READ || pd_cmd == WRITE) { + case REQ_OP_READ: + case REQ_OP_WRITE: pd_block = blk_rq_pos(pd_req); pd_count = blk_rq_cur_sectors(pd_req); if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) @@ -458,7 +456,7 @@ static enum action do_pd_io_start(void) pd_run = blk_rq_sectors(pd_req); pd_buf = bio_data(pd_req->bio); pd_retries = 0; - if (pd_cmd == READ) + if (req_op(pd_req) == REQ_OP_READ) return do_pd_read_start(); else return do_pd_write_start(); @@ -723,11 +721,10 @@ static int pd_special_command(struct pd_unit *disk, struct request *rq; int err = 0; - rq = blk_get_request(disk->gd->queue, READ, __GFP_RECLAIM); + rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); - rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->special = func; err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 1b94c1c..66d846b 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -704,10 +704,10 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * int ret = 0; rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? - WRITE : READ, __GFP_RECLAIM); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); - blk_rq_set_block_pc(rq); + scsi_req_init(rq); if (cgc->buflen) { ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, @@ -716,8 +716,8 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * goto out; } - rq->cmd_len = COMMAND_SIZE(cgc->cmd[0]); - memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); + scsi_req(rq)->cmd_len = COMMAND_SIZE(cgc->cmd[0]); + memcpy(scsi_req(rq)->cmd, cgc->cmd, CDROM_PACKET_SIZE); rq->timeout = 60*HZ; if (cgc->quiet) @@ -1243,7 +1243,7 @@ try_next_bio: && pd->bio_queue_size <= pd->write_congestion_off); spin_unlock(&pd->lock); if (wakeup) { - clear_bdi_congested(&pd->disk->queue->backing_dev_info, + clear_bdi_congested(pd->disk->queue->backing_dev_info, BLK_RW_ASYNC); } @@ -2370,7 +2370,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) spin_lock(&pd->lock); if (pd->write_congestion_on > 0 && pd->bio_queue_size >= pd->write_congestion_on) { - set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC); + set_bdi_congested(q->backing_dev_info, BLK_RW_ASYNC); do { spin_unlock(&pd->lock); congestion_wait(BLK_RW_ASYNC, HZ); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 76f33c8..a809e3e 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -196,16 +196,19 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); while ((req = blk_fetch_request(q))) { - if (req_op(req) == REQ_OP_FLUSH) { + switch (req_op(req)) { + case REQ_OP_FLUSH: if (ps3disk_submit_flush_request(dev, req)) - break; - } else if (req->cmd_type == REQ_TYPE_FS) { + return; + break; + case REQ_OP_READ: + case REQ_OP_WRITE: if (ps3disk_submit_request_sg(dev, req)) - break; - } else { + return; + break; + default: blk_dump_rq_flags(req, DEVICE_NAME " bad request"); __blk_end_request_all(req, -EIO); - continue; } } } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 436baa6..362cecc 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4099,19 +4099,21 @@ static void rbd_queue_workfn(struct work_struct *work) bool must_be_locked; int result; - if (rq->cmd_type != REQ_TYPE_FS) { - dout("%s: non-fs request type %d\n", __func__, - (int) rq->cmd_type); - result = -EIO; - goto err; - } - - if (req_op(rq) == REQ_OP_DISCARD) + switch (req_op(rq)) { + case REQ_OP_DISCARD: op_type = OBJ_OP_DISCARD; - else if (req_op(rq) == REQ_OP_WRITE) + break; + case REQ_OP_WRITE: op_type = OBJ_OP_WRITE; - else + break; + case REQ_OP_READ: op_type = OBJ_OP_READ; + break; + default: + dout("%s: non-fs request type %d\n", __func__, req_op(rq)); + result = -EIO; + goto err; + } /* Ignore/skip any zero-length requests */ @@ -4524,7 +4526,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_zeroes_data = 1; if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) - q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; disk->queue = q; diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index abf805e..27833e4 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -1204,10 +1204,11 @@ static void skd_complete_special(struct skd_device *skdev, static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode, uint cmd_in, ulong arg) { - int rc = 0; + static const int sg_version_num = 30527; + int rc = 0, timeout; struct gendisk *disk = bdev->bd_disk; struct skd_device *skdev = disk->private_data; - void __user *p = (void *)arg; + int __user *p = (int __user *)arg; pr_debug("%s:%s:%d %s: CMD[%s] ioctl mode 0x%x, cmd 0x%x arg %0lx\n", skdev->name, __func__, __LINE__, @@ -1218,12 +1219,18 @@ static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd_in) { case SG_SET_TIMEOUT: + rc = get_user(timeout, p); + if (!rc) + disk->queue->sg_timeout = clock_t_to_jiffies(timeout); + break; case SG_GET_TIMEOUT: + rc = jiffies_to_clock_t(disk->queue->sg_timeout); + break; case SG_GET_VERSION_NUM: - rc = scsi_cmd_ioctl(disk->queue, disk, mode, cmd_in, p); + rc = put_user(sg_version_num, p); break; case SG_IO: - rc = skd_ioctl_sg_io(skdev, mode, p); + rc = skd_ioctl_sg_io(skdev, mode, (void __user *)arg); break; default: diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 0e93ad7..c8e072c 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -567,7 +567,7 @@ static struct carm_request *carm_get_special(struct carm_host *host) if (!crq) return NULL; - rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL); + rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, GFP_KERNEL); if (IS_ERR(rq)) { spin_lock_irqsave(&host->lock, flags); carm_put_request(host, crq); @@ -620,7 +620,6 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx) spin_unlock_irq(&host->lock); DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->cmd_type = REQ_TYPE_DRV_PRIV; crq->rq->special = crq; blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); @@ -661,7 +660,6 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func) crq->msg_bucket = (u32) rc; DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->cmd_type = REQ_TYPE_DRV_PRIV; crq->rq->special = crq; blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 264c5ea..024b473 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -52,11 +52,13 @@ struct virtio_blk { }; struct virtblk_req { - struct request *req; - struct virtio_blk_outhdr out_hdr; +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; u8 status; - u8 sense[SCSI_SENSE_BUFFERSIZE]; struct scatterlist sg[]; }; @@ -72,28 +74,88 @@ static inline int virtblk_result(struct virtblk_req *vbr) } } -static int __virtblk_add_req(struct virtqueue *vq, - struct virtblk_req *vbr, - struct scatterlist *data_sg, - bool have_data) +/* + * If this is a packet command we need a couple of additional headers. Behind + * the normal outhdr we put a segment with the scsi command block, and before + * the normal inhdr we put the sense data and the inhdr with additional status + * information. + */ +#ifdef CONFIG_VIRTIO_BLK_SCSI +static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr, + struct scatterlist *data_sg, bool have_data) { struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6]; unsigned int num_out = 0, num_in = 0; - __virtio32 type = vbr->out_hdr.type & ~cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT); sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); sgs[num_out++] = &hdr; + sg_init_one(&cmd, vbr->sreq.cmd, vbr->sreq.cmd_len); + sgs[num_out++] = &cmd; + + if (have_data) { + if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) + sgs[num_out++] = data_sg; + else + sgs[num_out + num_in++] = data_sg; + } + + sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE); + sgs[num_out + num_in++] = &sense; + sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); + sgs[num_out + num_in++] = &inhdr; + sg_init_one(&status, &vbr->status, sizeof(vbr->status)); + sgs[num_out + num_in++] = &status; + + return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); +} + +static inline void virtblk_scsi_reques_done(struct request *req) +{ + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + struct virtio_blk *vblk = req->q->queuedata; + struct scsi_request *sreq = &vbr->sreq; + + sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); + sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); + req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); +} + +static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long data) +{ + struct gendisk *disk = bdev->bd_disk; + struct virtio_blk *vblk = disk->private_data; /* - * If this is a packet command we need a couple of additional headers. - * Behind the normal outhdr we put a segment with the scsi command - * block, and before the normal inhdr we put the sense data and the - * inhdr with additional status information. + * Only allow the generic SCSI ioctls if the host can support it. */ - if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { - sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len); - sgs[num_out++] = &cmd; - } + if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) + return -ENOTTY; + + return scsi_cmd_blk_ioctl(bdev, mode, cmd, + (void __user *)data); +} +#else +static inline int virtblk_add_req_scsi(struct virtqueue *vq, + struct virtblk_req *vbr, struct scatterlist *data_sg, + bool have_data) +{ + return -EIO; +} +static inline void virtblk_scsi_reques_done(struct request *req) +{ +} +#define virtblk_ioctl NULL +#endif /* CONFIG_VIRTIO_BLK_SCSI */ + +static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, + struct scatterlist *data_sg, bool have_data) +{ + struct scatterlist hdr, status, *sgs[3]; + unsigned int num_out = 0, num_in = 0; + + sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); + sgs[num_out++] = &hdr; if (have_data) { if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) @@ -102,14 +164,6 @@ static int __virtblk_add_req(struct virtqueue *vq, sgs[num_out + num_in++] = data_sg; } - if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { - memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE); - sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE); - sgs[num_out + num_in++] = &sense; - sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr)); - sgs[num_out + num_in++] = &inhdr; - } - sg_init_one(&status, &vbr->status, sizeof(vbr->status)); sgs[num_out + num_in++] = &status; @@ -119,15 +173,16 @@ static int __virtblk_add_req(struct virtqueue *vq, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - struct virtio_blk *vblk = req->q->queuedata; int error = virtblk_result(vbr); - if (req->cmd_type == REQ_TYPE_BLOCK_PC) { - req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); - req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); - req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); - } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) { + switch (req_op(req)) { + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: + virtblk_scsi_reques_done(req); + break; + case REQ_OP_DRV_IN: req->errors = (error != 0); + break; } blk_mq_end_request(req, error); @@ -146,7 +201,9 @@ static void virtblk_done(struct virtqueue *vq) do { virtqueue_disable_cb(vq); while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { - blk_mq_complete_request(vbr->req, vbr->req->errors); + struct request *req = blk_mq_rq_from_pdu(vbr); + + blk_mq_complete_request(req, req->errors); req_done = true; } if (unlikely(virtqueue_is_broken(vq))) @@ -170,49 +227,50 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, int qid = hctx->queue_num; int err; bool notify = false; + u32 type; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); - vbr->req = req; - if (req_op(req) == REQ_OP_FLUSH) { - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH); - vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); - } else { - switch (req->cmd_type) { - case REQ_TYPE_FS: - vbr->out_hdr.type = 0; - vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, blk_rq_pos(vbr->req)); - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); - break; - case REQ_TYPE_BLOCK_PC: - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_SCSI_CMD); - vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); - break; - case REQ_TYPE_DRV_PRIV: - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); - vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); - break; - default: - /* We don't put anything else in the queue. */ - BUG(); - } + switch (req_op(req)) { + case REQ_OP_READ: + case REQ_OP_WRITE: + type = 0; + break; + case REQ_OP_FLUSH: + type = VIRTIO_BLK_T_FLUSH; + break; + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: + type = VIRTIO_BLK_T_SCSI_CMD; + break; + case REQ_OP_DRV_IN: + type = VIRTIO_BLK_T_GET_ID; + break; + default: + WARN_ON_ONCE(1); + return BLK_MQ_RQ_QUEUE_ERROR; } + vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); + vbr->out_hdr.sector = type ? + 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); + vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); + blk_mq_start_request(req); - num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg); + num = blk_rq_map_sg(hctx->queue, req, vbr->sg); if (num) { - if (rq_data_dir(vbr->req) == WRITE) + if (rq_data_dir(req) == WRITE) vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); else vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); } spin_lock_irqsave(&vblk->vqs[qid].lock, flags); - err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); + if (req_op(req) == REQ_OP_SCSI_IN || req_op(req) == REQ_OP_SCSI_OUT) + err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num); + else + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); blk_mq_stop_hw_queue(hctx); @@ -242,10 +300,9 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) struct request *req; int err; - req = blk_get_request(q, READ, GFP_KERNEL); + req = blk_get_request(q, REQ_OP_DRV_IN, GFP_KERNEL); if (IS_ERR(req)) return PTR_ERR(req); - req->cmd_type = REQ_TYPE_DRV_PRIV; err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); if (err) @@ -257,22 +314,6 @@ out: return err; } -static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long data) -{ - struct gendisk *disk = bdev->bd_disk; - struct virtio_blk *vblk = disk->private_data; - - /* - * Only allow the generic SCSI ioctls if the host can support it. - */ - if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI)) - return -ENOTTY; - - return scsi_cmd_blk_ioctl(bdev, mode, cmd, - (void __user *)data); -} - /* We provide getgeo only to please some old bootloader/partitioning tools */ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { @@ -538,6 +579,9 @@ static int virtblk_init_request(void *data, struct request *rq, struct virtio_blk *vblk = data; struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); +#ifdef CONFIG_VIRTIO_BLK_SCSI + vbr->sreq.sense = vbr->sense; +#endif sg_init_table(vbr->sg, vblk->sg_elems); return 0; } @@ -821,7 +865,10 @@ static const struct virtio_device_id id_table[] = { static unsigned int features_legacy[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, - VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, + VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, +#ifdef CONFIG_VIRTIO_BLK_SCSI + VIRTIO_BLK_F_SCSI, +#endif VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 265f1a7..5067a0a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -865,7 +865,7 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo) static inline bool blkif_request_flush_invalid(struct request *req, struct blkfront_info *info) { - return ((req->cmd_type != REQ_TYPE_FS) || + return (blk_rq_is_passthrough(req) || ((req_op(req) == REQ_OP_FLUSH) && !info->feature_flush) || ((req->cmd_flags & REQ_FUA) && diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index c4328d9..757dce2 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -468,7 +468,7 @@ static struct request *ace_get_next_request(struct request_queue *q) struct request *req; while ((req = blk_peek_request(q)) != NULL) { - if (req->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(req)) break; blk_start_request(req); __blk_end_request_all(req, -EIO); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e5ab7d9..3cd7856 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -117,7 +117,7 @@ static void zram_revalidate_disk(struct zram *zram) { revalidate_disk(zram->disk); /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ - zram->disk->queue->backing_dev_info.capabilities |= + zram->disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; } diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 59cca72..8773964 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -281,8 +281,8 @@ #include <linux/fcntl.h> #include <linux/blkdev.h> #include <linux/times.h> - #include <linux/uaccess.h> +#include <scsi/scsi_request.h> /* used to tell the module to turn on full debugging messages */ static bool debug; @@ -342,8 +342,8 @@ static void cdrom_sysctl_register(void); static LIST_HEAD(cdrom_list); -static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, - struct packet_command *cgc) +int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, + struct packet_command *cgc) { if (cgc->sense) { cgc->sense->sense_key = 0x05; @@ -354,6 +354,7 @@ static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, cgc->stat = -EIO; return -EIO; } +EXPORT_SYMBOL(cdrom_dummy_generic_packet); static int cdrom_flush_cache(struct cdrom_device_info *cdi) { @@ -371,7 +372,7 @@ static int cdrom_flush_cache(struct cdrom_device_info *cdi) static int cdrom_get_disc_info(struct cdrom_device_info *cdi, disc_information *di) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; struct packet_command cgc; int ret, buflen; @@ -586,7 +587,7 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space) int register_cdrom(struct cdrom_device_info *cdi) { static char banner_printed; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; int *change_capability = (int *)&cdo->capability; /* hack */ cd_dbg(CD_OPEN, "entering register_cdrom\n"); @@ -610,7 +611,6 @@ int register_cdrom(struct cdrom_device_info *cdi) ENSURE(reset, CDC_RESET); ENSURE(generic_packet, CDC_GENERIC_PACKET); cdi->mc_flags = 0; - cdo->n_minors = 0; cdi->options = CDO_USE_FFLAGS; if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY)) @@ -630,8 +630,7 @@ int register_cdrom(struct cdrom_device_info *cdi) else cdi->cdda_method = CDDA_OLD; - if (!cdo->generic_packet) - cdo->generic_packet = cdrom_dummy_generic_packet; + WARN_ON(!cdo->generic_packet); cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); mutex_lock(&cdrom_mutex); @@ -652,7 +651,6 @@ void unregister_cdrom(struct cdrom_device_info *cdi) if (cdi->exit) cdi->exit(cdi); - cdi->ops->n_minors--; cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name); } @@ -1036,7 +1034,7 @@ static int open_for_data(struct cdrom_device_info *cdi) { int ret; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; tracktype tracks; cd_dbg(CD_OPEN, "entering open_for_data\n"); /* Check if the driver can report drive status. If it can, we @@ -1198,8 +1196,8 @@ err: /* This code is similar to that in open_for_data. The routine is called whenever an audio play operation is requested. */ -static int check_for_audio_disc(struct cdrom_device_info * cdi, - struct cdrom_device_ops * cdo) +static int check_for_audio_disc(struct cdrom_device_info *cdi, + const struct cdrom_device_ops *cdo) { int ret; tracktype tracks; @@ -1254,7 +1252,7 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi, void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; int opened_for_data; cd_dbg(CD_CLOSE, "entering cdrom_release\n"); @@ -1294,7 +1292,7 @@ static int cdrom_read_mech_status(struct cdrom_device_info *cdi, struct cdrom_changer_info *buf) { struct packet_command cgc; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; int length; /* @@ -1643,7 +1641,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) int ret; u_char buf[20]; struct packet_command cgc; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; rpc_state_t rpc_state; memset(buf, 0, sizeof(buf)); @@ -1791,7 +1789,7 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s, { unsigned char buf[21], *base; struct dvd_layer *layer; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; int ret, layer_num = s->physical.layer_num; if (layer_num >= DVD_LAYERS) @@ -1842,7 +1840,7 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s, { int ret; u_char buf[8]; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ); cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; @@ -1866,7 +1864,7 @@ static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s, { int ret, size; u_char *buf; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; size = sizeof(s->disckey.value) + 4; @@ -1894,7 +1892,7 @@ static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s, { int ret, size = 4 + 188; u_char *buf; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; buf = kmalloc(size, GFP_KERNEL); if (!buf) @@ -1928,7 +1926,7 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s, { int ret = 0, size; u_char *buf; - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; size = sizeof(s->manufact.value) + 4; @@ -1995,7 +1993,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi, struct packet_command *cgc, int page_code, int page_control) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; memset(cgc->cmd, 0, sizeof(cgc->cmd)); @@ -2010,7 +2008,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi, int cdrom_mode_select(struct cdrom_device_info *cdi, struct packet_command *cgc) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; memset(cgc->cmd, 0, sizeof(cgc->cmd)); memset(cgc->buffer, 0, 2); @@ -2025,7 +2023,7 @@ int cdrom_mode_select(struct cdrom_device_info *cdi, static int cdrom_read_subchannel(struct cdrom_device_info *cdi, struct cdrom_subchnl *subchnl, int mcn) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; struct packet_command cgc; char buffer[32]; int ret; @@ -2073,7 +2071,7 @@ static int cdrom_read_cd(struct cdrom_device_info *cdi, struct packet_command *cgc, int lba, int blocksize, int nblocks) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; memset(&cgc->cmd, 0, sizeof(cgc->cmd)); cgc->cmd[0] = GPCMD_READ_10; @@ -2093,7 +2091,7 @@ static int cdrom_read_block(struct cdrom_device_info *cdi, struct packet_command *cgc, int lba, int nblocks, int format, int blksize) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; memset(&cgc->cmd, 0, sizeof(cgc->cmd)); cgc->cmd[0] = GPCMD_READ_CD; @@ -2172,6 +2170,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, { struct request_queue *q = cdi->disk->queue; struct request *rq; + struct scsi_request *req; struct bio *bio; unsigned int len; int nr, ret = 0; @@ -2190,12 +2189,13 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - rq = blk_get_request(q, READ, GFP_KERNEL); + rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(rq)) { ret = PTR_ERR(rq); break; } - blk_rq_set_block_pc(rq); + req = scsi_req(rq); + scsi_req_init(rq); ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); if (ret) { @@ -2203,23 +2203,23 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, break; } - rq->cmd[0] = GPCMD_READ_CD; - rq->cmd[1] = 1 << 2; - rq->cmd[2] = (lba >> 24) & 0xff; - rq->cmd[3] = (lba >> 16) & 0xff; - rq->cmd[4] = (lba >> 8) & 0xff; - rq->cmd[5] = lba & 0xff; - rq->cmd[6] = (nr >> 16) & 0xff; - rq->cmd[7] = (nr >> 8) & 0xff; - rq->cmd[8] = nr & 0xff; - rq->cmd[9] = 0xf8; - - rq->cmd_len = 12; + req->cmd[0] = GPCMD_READ_CD; + req->cmd[1] = 1 << 2; + req->cmd[2] = (lba >> 24) & 0xff; + req->cmd[3] = (lba >> 16) & 0xff; + req->cmd[4] = (lba >> 8) & 0xff; + req->cmd[5] = lba & 0xff; + req->cmd[6] = (nr >> 16) & 0xff; + req->cmd[7] = (nr >> 8) & 0xff; + req->cmd[8] = nr & 0xff; + req->cmd[9] = 0xf8; + + req->cmd_len = 12; rq->timeout = 60 * HZ; bio = rq->bio; if (blk_execute_rq(q, cdi->disk, rq, 0)) { - struct request_sense *s = rq->sense; + struct request_sense *s = req->sense; ret = -EIO; cdi->last_sense = s->sense_key; } @@ -2764,7 +2764,7 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi, */ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; struct packet_command cgc; struct modesel_head mh; @@ -2790,7 +2790,7 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) static int cdrom_get_track_info(struct cdrom_device_info *cdi, __u16 track, __u8 type, track_information *ti) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; struct packet_command cgc; int ret, buflen; @@ -3049,7 +3049,7 @@ static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi, void __user *arg, struct packet_command *cgc) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; struct cdrom_msf msf; cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf))) @@ -3069,7 +3069,7 @@ static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi, void __user *arg, struct packet_command *cgc) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; struct cdrom_blk blk; cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk))) @@ -3164,7 +3164,7 @@ static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi, struct packet_command *cgc, int cmd) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); cgc->cmd[0] = GPCMD_START_STOP_UNIT; cgc->cmd[1] = 1; @@ -3177,7 +3177,7 @@ static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi, struct packet_command *cgc, int cmd) { - struct cdrom_device_ops *cdo = cdi->ops; + const struct cdrom_device_ops *cdo = cdi->ops; cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); cgc->cmd[0] = GPCMD_PAUSE_RESUME; cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 584bc31..1372763 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -481,7 +481,7 @@ static int gdrom_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, return -EINVAL; } -static struct cdrom_device_ops gdrom_ops = { +static const struct cdrom_device_ops gdrom_ops = { .open = gdrom_open, .release = gdrom_release, .drive_status = gdrom_drivestatus, @@ -489,9 +489,9 @@ static struct cdrom_device_ops gdrom_ops = { .get_last_session = gdrom_get_last_session, .reset = gdrom_hardreset, .audio_ioctl = gdrom_audio_ioctl, + .generic_packet = cdrom_dummy_generic_packet, .capability = CDC_MULTI_SESSION | CDC_MEDIA_CHANGED | CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R, - .n_minors = 1, }; static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode) @@ -659,23 +659,24 @@ static void gdrom_request(struct request_queue *rq) struct request *req; while ((req = blk_fetch_request(rq)) != NULL) { - if (req->cmd_type != REQ_TYPE_FS) { - printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); - __blk_end_request_all(req, -EIO); - continue; - } - if (rq_data_dir(req) != READ) { + switch (req_op(req)) { + case REQ_OP_READ: + /* + * Add to list of deferred work and then schedule + * workqueue. + */ + list_add_tail(&req->queuelist, &gdrom_deferred); + schedule_work(&work); + break; + case REQ_OP_WRITE: pr_notice("Read only device - write request ignored\n"); __blk_end_request_all(req, -EIO); - continue; + break; + default: + printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); + __blk_end_request_all(req, -EIO); + break; } - - /* - * Add to list of deferred work and then schedule - * workqueue. - */ - list_add_tail(&req->queuelist, &gdrom_deferred); - schedule_work(&work); } } @@ -807,16 +808,20 @@ static int probe_gdrom(struct platform_device *devptr) if (err) goto probe_fail_cmdirq_register; gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock); - if (!gd.gdrom_rq) + if (!gd.gdrom_rq) { + err = -ENOMEM; goto probe_fail_requestq; + } err = probe_gdrom_setupqueue(); if (err) goto probe_fail_toc; gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL); - if (!gd.toc) + if (!gd.toc) { + err = -ENOMEM; goto probe_fail_toc; + } add_disk(gd.disk); return 0; diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 39ea67f..c99a25c 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -10,6 +10,7 @@ menuconfig IDE tristate "ATA/ATAPI/MFM/RLL support (DEPRECATED)" depends on HAVE_IDE depends on BLOCK + select BLK_SCSI_REQUEST ---help--- If you say Y here, your kernel will be able to manage ATA/(E)IDE and ATAPI units. The most common cases are IDE hard drives and ATAPI diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index f90ea22..feb3006 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -92,8 +92,9 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, struct request *rq; int error; - rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_DRV_PRIV; + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_MISC; rq->special = (char *)pc; if (buf && bufflen) { @@ -103,9 +104,9 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, goto put_req; } - memcpy(rq->cmd, pc->c, 12); + memcpy(scsi_req(rq)->cmd, pc->c, 12); if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; + scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1; error = blk_execute_rq(drive->queue, disk, rq, 0); put_req: blk_put_request(rq); @@ -171,7 +172,8 @@ EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); void ide_prep_sense(ide_drive_t *drive, struct request *rq) { struct request_sense *sense = &drive->sense_data; - struct request *sense_rq = &drive->sense_rq; + struct request *sense_rq = drive->sense_rq; + struct scsi_request *req = scsi_req(sense_rq); unsigned int cmd_len, sense_len; int err; @@ -191,12 +193,13 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) BUG_ON(sense_len > sizeof(*sense)); - if (rq->cmd_type == REQ_TYPE_ATA_SENSE || drive->sense_rq_armed) + if (ata_sense_request(rq) || drive->sense_rq_armed) return; memset(sense, 0, sizeof(*sense)); blk_rq_init(rq->q, sense_rq); + scsi_req_init(sense_rq); err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, GFP_NOIO); @@ -208,13 +211,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) } sense_rq->rq_disk = rq->rq_disk; - sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; - sense_rq->cmd[4] = cmd_len; - sense_rq->cmd_type = REQ_TYPE_ATA_SENSE; + sense_rq->cmd_flags = REQ_OP_DRV_IN; + ide_req(sense_rq)->type = ATA_PRIV_SENSE; sense_rq->rq_flags |= RQF_PREEMPT; + req->cmd[0] = GPCMD_REQUEST_SENSE; + req->cmd[4] = cmd_len; if (drive->media == ide_tape) - sense_rq->cmd[13] = REQ_IDETAPE_PC1; + req->cmd[13] = REQ_IDETAPE_PC1; drive->sense_rq_armed = true; } @@ -229,12 +233,12 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special) return -ENOMEM; } - drive->sense_rq.special = special; + drive->sense_rq->special = special; drive->sense_rq_armed = false; drive->hwif->rq = NULL; - elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT); + elv_add_request(drive->queue, drive->sense_rq, ELEVATOR_INSERT_FRONT); return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); @@ -247,14 +251,14 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq); void ide_retry_pc(ide_drive_t *drive) { struct request *failed_rq = drive->hwif->rq; - struct request *sense_rq = &drive->sense_rq; + struct request *sense_rq = drive->sense_rq; struct ide_atapi_pc *pc = &drive->request_sense_pc; (void)ide_read_error(drive); /* init pc from sense_rq */ ide_init_pc(pc); - memcpy(pc->c, sense_rq->cmd, 12); + memcpy(pc->c, scsi_req(sense_rq)->cmd, 12); if (drive->media == ide_tape) drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC; @@ -286,7 +290,7 @@ int ide_cd_expiry(ide_drive_t *drive) * commands/drives support that. Let ide_timer_expiry keep polling us * for these. */ - switch (rq->cmd[0]) { + switch (scsi_req(rq)->cmd[0]) { case GPCMD_BLANK: case GPCMD_FORMAT_UNIT: case GPCMD_RESERVE_RZONE_TRACK: @@ -297,7 +301,7 @@ int ide_cd_expiry(ide_drive_t *drive) default: if (!(rq->rq_flags & RQF_QUIET)) printk(KERN_INFO PFX "cmd 0x%x timed out\n", - rq->cmd[0]); + scsi_req(rq)->cmd[0]); wait = 0; break; } @@ -307,15 +311,21 @@ EXPORT_SYMBOL_GPL(ide_cd_expiry); int ide_cd_get_xferlen(struct request *rq) { - switch (rq->cmd_type) { - case REQ_TYPE_FS: + switch (req_op(rq)) { + default: return 32768; - case REQ_TYPE_ATA_SENSE: - case REQ_TYPE_BLOCK_PC: - case REQ_TYPE_ATA_PC: + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: return blk_rq_bytes(rq); - default: - return 0; + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: + switch (ide_req(rq)->type) { + case ATA_PRIV_PC: + case ATA_PRIV_SENSE: + return blk_rq_bytes(rq); + default: + return 0; + } } } EXPORT_SYMBOL_GPL(ide_cd_get_xferlen); @@ -374,7 +384,7 @@ int ide_check_ireason(ide_drive_t *drive, struct request *rq, int len, drive->name, __func__, ireason); } - if (dev_is_idecd(drive) && rq->cmd_type == REQ_TYPE_ATA_PC) + if (dev_is_idecd(drive) && ata_pc_request(rq)) rq->rq_flags |= RQF_FAILED; return 1; @@ -420,7 +430,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; } else - rq->resid_len = 0; + scsi_req(rq)->resid_len = 0; debug_log("%s: DMA finished\n", drive->name); } @@ -436,7 +446,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) local_irq_enable_in_hardirq(); if (drive->media == ide_tape && - (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE) + (stat & ATA_ERR) && scsi_req(rq)->cmd[0] == REQUEST_SENSE) stat &= ~ATA_ERR; if ((stat & ATA_ERR) || (pc->flags & PC_FLAG_DMA_ERROR)) { @@ -446,7 +456,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (drive->media != ide_tape) pc->rq->errors++; - if (rq->cmd[0] == REQUEST_SENSE) { + if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) { printk(KERN_ERR PFX "%s: I/O error in request " "sense command\n", drive->name); return ide_do_reset(drive); @@ -477,12 +487,12 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (uptodate == 0) drive->failed_pc = NULL; - if (rq->cmd_type == REQ_TYPE_DRV_PRIV) { + if (ata_misc_request(rq)) { rq->errors = 0; error = 0; } else { - if (rq->cmd_type != REQ_TYPE_FS && uptodate <= 0) { + if (blk_rq_is_passthrough(rq) && uptodate <= 0) { if (rq->errors == 0) rq->errors = -EIO; } @@ -512,7 +522,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) ide_pio_bytes(drive, cmd, write, done); /* Update transferred byte count */ - rq->resid_len -= done; + scsi_req(rq)->resid_len -= done; bcount -= done; @@ -520,7 +530,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) ide_pad_transfer(drive, write, bcount); debug_log("[cmd %x] transferred %d bytes, padded %d bytes, resid: %u\n", - rq->cmd[0], done, bcount, rq->resid_len); + rq->cmd[0], done, bcount, scsi_req(rq)->resid_len); /* And set the interrupt handler again */ ide_set_handler(drive, ide_pc_intr, timeout); @@ -603,7 +613,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) if (dev_is_idecd(drive)) { /* ATAPI commands get padded out to 12 bytes minimum */ - cmd_len = COMMAND_SIZE(rq->cmd[0]); + cmd_len = COMMAND_SIZE(scsi_req(rq)->cmd[0]); if (cmd_len < ATAPI_MIN_CDB_BYTES) cmd_len = ATAPI_MIN_CDB_BYTES; @@ -650,7 +660,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) /* Send the actual packet */ if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0) - hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len); + hwif->tp_ops->output_data(drive, NULL, scsi_req(rq)->cmd, cmd_len); /* Begin DMA, if necessary */ if (dev_is_idecd(drive)) { @@ -695,7 +705,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) bytes, 63 * 1024)); /* We haven't transferred any data yet */ - rq->resid_len = bcount; + scsi_req(rq)->resid_len = bcount; if (pc->flags & PC_FLAG_DMA_ERROR) { pc->flags &= ~PC_FLAG_DMA_ERROR; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 9cbd217..aef0051 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -121,7 +121,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq) * don't log START_STOP unit with LoEj set, since we cannot * reliably check if drive can auto-close */ - if (rq->cmd[0] == GPCMD_START_STOP_UNIT && sense->asc == 0x24) + if (scsi_req(rq)->cmd[0] == GPCMD_START_STOP_UNIT && sense->asc == 0x24) break; log = 1; break; @@ -163,7 +163,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, * toc has not been recorded yet, it will fail with 05/24/00 (which is a * confusing error) */ - if (failed_command && failed_command->cmd[0] == GPCMD_READ_TOC_PMA_ATIP) + if (failed_command && scsi_req(failed_command)->cmd[0] == GPCMD_READ_TOC_PMA_ATIP) if (sense->sense_key == 0x05 && sense->asc == 0x24) return; @@ -176,7 +176,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, if (!sense->valid) break; if (failed_command == NULL || - failed_command->cmd_type != REQ_TYPE_FS) + blk_rq_is_passthrough(failed_command)) break; sector = (sense->information[0] << 24) | (sense->information[1] << 16) | @@ -210,7 +210,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For REQ_TYPE_ATA_SENSE, "rq->special" points to the original + * For ATA_PRIV_SENSE, "rq->special" points to the original * failed request. Also, the sense data should be read * directly from rq which might be different from the original * sense buffer if it got copied during mapping. @@ -219,15 +219,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) void *sense = bio_data(rq->bio); if (failed) { - if (failed->sense) { - /* - * Sense is always read into drive->sense_data. - * Copy back if the failed request has its - * sense pointer set. - */ - memcpy(failed->sense, sense, 18); - failed->sense_len = rq->sense_len; - } + /* + * Sense is always read into drive->sense_data, copy back to the + * original request. + */ + memcpy(scsi_req(failed)->sense, sense, 18); + scsi_req(failed)->sense_len = scsi_req(rq)->sense_len; cdrom_analyze_sense_data(drive, failed); if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed))) @@ -285,7 +282,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) "stat 0x%x", rq->cmd[0], rq->cmd_type, err, stat); - if (rq->cmd_type == REQ_TYPE_ATA_SENSE) { + if (ata_sense_request(rq)) { /* * We got an error trying to get sense info from the drive * (probably while trying to recover from a former error). @@ -296,7 +293,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) } /* if we have an error, pass CHECK_CONDITION as the SCSI status byte */ - if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !rq->errors) + if (blk_rq_is_scsi(rq) && !rq->errors) rq->errors = SAM_STAT_CHECK_CONDITION; if (blk_noretry_request(rq)) @@ -304,13 +301,13 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) switch (sense_key) { case NOT_READY: - if (rq->cmd_type == REQ_TYPE_FS && rq_data_dir(rq) == WRITE) { + if (req_op(rq) == REQ_OP_WRITE) { if (ide_cd_breathe(drive, rq)) return 1; } else { cdrom_saw_media_change(drive); - if (rq->cmd_type == REQ_TYPE_FS && + if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_QUIET)) printk(KERN_ERR PFX "%s: tray open\n", drive->name); @@ -320,7 +317,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) case UNIT_ATTENTION: cdrom_saw_media_change(drive); - if (rq->cmd_type != REQ_TYPE_FS) + if (blk_rq_is_passthrough(rq)) return 0; /* @@ -338,7 +335,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * * cdrom_log_sense() knows this! */ - if (rq->cmd[0] == GPCMD_START_STOP_UNIT) + if (scsi_req(rq)->cmd[0] == GPCMD_START_STOP_UNIT) break; /* fall-through */ case DATA_PROTECT: @@ -368,7 +365,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) do_end_request = 1; break; default: - if (rq->cmd_type != REQ_TYPE_FS) + if (blk_rq_is_passthrough(rq)) break; if (err & ~ATA_ABORTED) { /* go to the default handler for other errors */ @@ -379,7 +376,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) do_end_request = 1; } - if (rq->cmd_type != REQ_TYPE_FS) { + if (blk_rq_is_passthrough(rq)) { rq->rq_flags |= RQF_FAILED; do_end_request = 1; } @@ -414,7 +411,7 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) * Some of the trailing request sense fields are optional, * and some drives don't send them. Sigh. */ - if (rq->cmd[0] == GPCMD_REQUEST_SENSE && + if (scsi_req(rq)->cmd[0] == GPCMD_REQUEST_SENSE && cmd->nleft > 0 && cmd->nleft <= 5) cmd->nleft = 0; } @@ -425,12 +422,8 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, req_flags_t rq_flags) { struct cdrom_info *info = drive->driver_data; - struct request_sense local_sense; int retries = 10; - req_flags_t flags = 0; - - if (!sense) - sense = &local_sense; + bool failed; ide_debug_log(IDE_DBG_PC, "cmd[0]: 0x%x, write: 0x%x, timeout: %d, " "rq_flags: 0x%x", @@ -440,12 +433,13 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, do { struct request *rq; int error; + bool delay = false; - rq = blk_get_request(drive->queue, write, __GFP_RECLAIM); - - memcpy(rq->cmd, cmd, BLK_MAX_CDB); - rq->cmd_type = REQ_TYPE_ATA_PC; - rq->sense = sense; + rq = blk_get_request(drive->queue, + write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB); + ide_req(rq)->type = ATA_PRIV_PC; rq->rq_flags |= rq_flags; rq->timeout = timeout; if (buffer) { @@ -460,21 +454,21 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, error = blk_execute_rq(drive->queue, info->disk, rq, 0); if (buffer) - *bufflen = rq->resid_len; - - flags = rq->rq_flags; - blk_put_request(rq); + *bufflen = scsi_req(rq)->resid_len; + if (sense) + memcpy(sense, scsi_req(rq)->sense, sizeof(*sense)); /* * FIXME: we should probably abort/retry or something in case of * failure. */ - if (flags & RQF_FAILED) { + failed = (rq->rq_flags & RQF_FAILED) != 0; + if (failed) { /* * The request failed. Retry if it was due to a unit * attention status (usually means media was changed). */ - struct request_sense *reqbuf = sense; + struct request_sense *reqbuf = scsi_req(rq)->sense; if (reqbuf->sense_key == UNIT_ATTENTION) cdrom_saw_media_change(drive); @@ -485,19 +479,20 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, * a disk. Retry, but wait a little to give * the drive time to complete the load. */ - ssleep(2); + delay = true; } else { /* otherwise, don't retry */ retries = 0; } --retries; } - - /* end of retry loop */ - } while ((flags & RQF_FAILED) && retries >= 0); + blk_put_request(rq); + if (delay) + ssleep(2); + } while (failed && retries >= 0); /* return an error if the command failed */ - return (flags & RQF_FAILED) ? -EIO : 0; + return failed ? -EIO : 0; } /* @@ -526,7 +521,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) ide_expiry_t *expiry = NULL; int dma_error = 0, dma, thislen, uptodate = 0; int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0; - int sense = (rq->cmd_type == REQ_TYPE_ATA_SENSE); + int sense = ata_sense_request(rq); unsigned int timeout; u16 len; u8 ireason, stat; @@ -569,7 +564,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) ide_read_bcount_and_ireason(drive, &len, &ireason); - thislen = (rq->cmd_type == REQ_TYPE_FS) ? len : cmd->nleft; + thislen = !blk_rq_is_passthrough(rq) ? len : cmd->nleft; if (thislen > len) thislen = len; @@ -578,7 +573,8 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) /* If DRQ is clear, the command has completed. */ if ((stat & ATA_DRQ) == 0) { - if (rq->cmd_type == REQ_TYPE_FS) { + switch (req_op(rq)) { + default: /* * If we're not done reading/writing, complain. * Otherwise, complete the command normally. @@ -592,7 +588,9 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) rq->rq_flags |= RQF_FAILED; uptodate = 0; } - } else if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { + goto out_end; + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: ide_cd_request_sense_fixup(drive, cmd); uptodate = cmd->nleft ? 0 : 1; @@ -608,8 +606,11 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) if (!uptodate) rq->rq_flags |= RQF_FAILED; + goto out_end; + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: + goto out_end; } - goto out_end; } rc = ide_check_ireason(drive, rq, len, ireason, write); @@ -636,12 +637,12 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) len -= blen; if (sense && write == 0) - rq->sense_len += blen; + scsi_req(rq)->sense_len += blen; } /* pad, if necessary */ if (len > 0) { - if (rq->cmd_type != REQ_TYPE_FS || write == 0) + if (blk_rq_is_passthrough(rq) || write == 0) ide_pad_transfer(drive, write, len); else { printk(KERN_ERR PFX "%s: confused, missing data\n", @@ -650,12 +651,18 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) } } - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { + switch (req_op(rq)) { + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: timeout = rq->timeout; - } else { + break; + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: + expiry = ide_cd_expiry; + /*FALLTHRU*/ + default: timeout = ATAPI_WAIT_PC; - if (rq->cmd_type != REQ_TYPE_FS) - expiry = ide_cd_expiry; + break; } hwif->expiry = expiry; @@ -663,15 +670,15 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) return ide_started; out_end: - if (rq->cmd_type == REQ_TYPE_BLOCK_PC && rc == 0) { - rq->resid_len = 0; + if (blk_rq_is_scsi(rq) && rc == 0) { + scsi_req(rq)->resid_len = 0; blk_end_request_all(rq, 0); hwif->rq = NULL; } else { if (sense && uptodate) ide_cd_complete_failed_rq(drive, rq); - if (rq->cmd_type == REQ_TYPE_FS) { + if (!blk_rq_is_passthrough(rq)) { if (cmd->nleft == 0) uptodate = 1; } else { @@ -684,10 +691,10 @@ out_end: return ide_stopped; /* make sure it's fully ended */ - if (rq->cmd_type != REQ_TYPE_FS) { - rq->resid_len -= cmd->nbytes - cmd->nleft; + if (blk_rq_is_passthrough(rq)) { + scsi_req(rq)->resid_len -= cmd->nbytes - cmd->nleft; if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE)) - rq->resid_len += cmd->last_xfer_len; + scsi_req(rq)->resid_len += cmd->last_xfer_len; } ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq)); @@ -744,7 +751,7 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) ide_debug_log(IDE_DBG_PC, "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x", rq->cmd[0], rq->cmd_type); - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) + if (blk_rq_is_scsi(rq)) rq->rq_flags |= RQF_QUIET; else rq->rq_flags &= ~RQF_FAILED; @@ -786,25 +793,31 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, if (drive->debug_mask & IDE_DBG_RQ) blk_dump_rq_flags(rq, "ide_cd_do_request"); - switch (rq->cmd_type) { - case REQ_TYPE_FS: + switch (req_op(rq)) { + default: if (cdrom_start_rw(drive, rq) == ide_stopped) goto out_end; break; - case REQ_TYPE_ATA_SENSE: - case REQ_TYPE_BLOCK_PC: - case REQ_TYPE_ATA_PC: + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: + handle_pc: if (!rq->timeout) rq->timeout = ATAPI_WAIT_PC; - cdrom_do_block_pc(drive, rq); break; - case REQ_TYPE_DRV_PRIV: - /* right now this can only be a reset... */ - uptodate = 1; - goto out_end; - default: - BUG(); + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: + switch (ide_req(rq)->type) { + case ATA_PRIV_MISC: + /* right now this can only be a reset... */ + uptodate = 1; + goto out_end; + case ATA_PRIV_SENSE: + case ATA_PRIV_PC: + goto handle_pc; + default: + BUG(); + } } /* prepare sense request for this command */ @@ -817,7 +830,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, cmd.rq = rq; - if (rq->cmd_type == REQ_TYPE_FS || blk_rq_bytes(rq)) { + if (!blk_rq_is_passthrough(rq) || blk_rq_bytes(rq)) { ide_init_sg_cmd(&cmd, blk_rq_bytes(rq)); ide_map_sg(drive, &cmd); } @@ -1166,7 +1179,7 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf) CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM | CDC_GENERIC_PACKET | \ CDC_MO_DRIVE | CDC_MRW | CDC_MRW_W | CDC_RAM) -static struct cdrom_device_ops ide_cdrom_dops = { +static const struct cdrom_device_ops ide_cdrom_dops = { .open = ide_cdrom_open_real, .release = ide_cdrom_release_real, .drive_status = ide_cdrom_drive_status, @@ -1312,28 +1325,29 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) int hard_sect = queue_logical_block_size(q); long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); + struct scsi_request *req = scsi_req(rq); - memset(rq->cmd, 0, BLK_MAX_CDB); + memset(req->cmd, 0, BLK_MAX_CDB); if (rq_data_dir(rq) == READ) - rq->cmd[0] = GPCMD_READ_10; + req->cmd[0] = GPCMD_READ_10; else - rq->cmd[0] = GPCMD_WRITE_10; + req->cmd[0] = GPCMD_WRITE_10; /* * fill in lba */ - rq->cmd[2] = (block >> 24) & 0xff; - rq->cmd[3] = (block >> 16) & 0xff; - rq->cmd[4] = (block >> 8) & 0xff; - rq->cmd[5] = block & 0xff; + req->cmd[2] = (block >> 24) & 0xff; + req->cmd[3] = (block >> 16) & 0xff; + req->cmd[4] = (block >> 8) & 0xff; + req->cmd[5] = block & 0xff; /* * and transfer length */ - rq->cmd[7] = (blocks >> 8) & 0xff; - rq->cmd[8] = blocks & 0xff; - rq->cmd_len = 10; + req->cmd[7] = (blocks >> 8) & 0xff; + req->cmd[8] = blocks & 0xff; + req->cmd_len = 10; return BLKPREP_OK; } @@ -1343,7 +1357,7 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) */ static int ide_cdrom_prep_pc(struct request *rq) { - u8 *c = rq->cmd; + u8 *c = scsi_req(rq)->cmd; /* transform 6-byte read/write commands to the 10-byte version */ if (c[0] == READ_6 || c[0] == WRITE_6) { @@ -1354,7 +1368,7 @@ static int ide_cdrom_prep_pc(struct request *rq) c[2] = 0; c[1] &= 0xe0; c[0] += (READ_10 - READ_6); - rq->cmd_len = 10; + scsi_req(rq)->cmd_len = 10; return BLKPREP_OK; } @@ -1373,9 +1387,9 @@ static int ide_cdrom_prep_pc(struct request *rq) static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq) { - if (rq->cmd_type == REQ_TYPE_FS) + if (!blk_rq_is_passthrough(rq)) return ide_cdrom_prep_fs(q, rq); - else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) + else if (blk_rq_is_scsi(rq)) return ide_cdrom_prep_pc(rq); return 0; diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index f085e3a..9fcefbc 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -303,8 +303,9 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi) struct request *rq; int ret; - rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_DRV_PRIV; + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_MISC; rq->rq_flags = RQF_QUIET; ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); blk_put_request(rq); diff --git a/drivers/ide/ide-cd_verbose.c b/drivers/ide/ide-cd_verbose.c index f079ca2..58a6feb 100644 --- a/drivers/ide/ide-cd_verbose.c +++ b/drivers/ide/ide-cd_verbose.c @@ -315,12 +315,12 @@ void ide_cd_log_error(const char *name, struct request *failed_command, while (hi > lo) { mid = (lo + hi) / 2; if (packet_command_texts[mid].packet_command == - failed_command->cmd[0]) { + scsi_req(failed_command)->cmd[0]) { s = packet_command_texts[mid].text; break; } if (packet_command_texts[mid].packet_command > - failed_command->cmd[0]) + scsi_req(failed_command)->cmd[0]) hi = mid; else lo = mid + 1; @@ -329,7 +329,7 @@ void ide_cd_log_error(const char *name, struct request *failed_command, printk(KERN_ERR " The failed \"%s\" packet command " "was: \n \"", s); for (i = 0; i < BLK_MAX_CDB; i++) - printk(KERN_CONT "%02x ", failed_command->cmd[i]); + printk(KERN_CONT "%02x ", scsi_req(failed_command)->cmd[i]); printk(KERN_CONT "\"\n"); } diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index 0dd43b4..a45dda5 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -165,11 +165,12 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, if (!(setting->flags & DS_SYNC)) return setting->set(drive, arg); - rq = blk_get_request(q, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_DRV_PRIV; - rq->cmd_len = 5; - rq->cmd[0] = REQ_DEVSET_EXEC; - *(int *)&rq->cmd[1] = arg; + rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_MISC; + scsi_req(rq)->cmd_len = 5; + scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC; + *(int *)&scsi_req(rq)->cmd[1] = arg; rq->special = setting->set; if (blk_execute_rq(q, NULL, rq, 0)) @@ -183,7 +184,7 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq) { int err, (*setfunc)(ide_drive_t *, int) = rq->special; - err = setfunc(drive, *(int *)&rq->cmd[1]); + err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]); if (err) rq->errors = err; ide_complete_rq(drive, err, blk_rq_bytes(rq)); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 5ceace5..1861597 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -184,7 +184,7 @@ static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq, ide_hwif_t *hwif = drive->hwif; BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED); - BUG_ON(rq->cmd_type != REQ_TYPE_FS); + BUG_ON(blk_rq_is_passthrough(rq)); ledtrig_disk_activity(); @@ -452,8 +452,9 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq) cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; cmd->tf_flags = IDE_TFLAG_DYN; cmd->protocol = ATA_PROT_NODATA; - - rq->cmd_type = REQ_TYPE_ATA_TASKFILE; + rq->cmd_flags &= ~REQ_OP_MASK; + rq->cmd_flags |= REQ_OP_DRV_OUT; + ide_req(rq)->type = ATA_PRIV_TASKFILE; rq->special = cmd; cmd->rq = rq; @@ -477,8 +478,9 @@ static int set_multcount(ide_drive_t *drive, int arg) if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) return -EBUSY; - rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_ATA_TASKFILE; + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_TASKFILE; drive->mult_req = arg; drive->special_flags |= IDE_SFLAG_SET_MULTMODE; diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index d6da011..cf3af68 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -123,8 +123,8 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat) return ide_stopped; /* retry only "normal" I/O: */ - if (rq->cmd_type != REQ_TYPE_FS) { - if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { + if (blk_rq_is_passthrough(rq)) { + if (ata_taskfile_request(rq)) { struct ide_cmd *cmd = rq->special; if (cmd) @@ -147,8 +147,8 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) { struct request *rq = drive->hwif->rq; - if (rq && rq->cmd_type == REQ_TYPE_DRV_PRIV && - rq->cmd[0] == REQ_DRIVE_RESET) { + if (rq && ata_misc_request(rq) && + scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) { if (err <= 0 && rq->errors == 0) rq->errors = -EIO; ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq)); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index f079d8d..a69e801 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -72,7 +72,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc) drive->failed_pc = NULL; if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 || - rq->cmd_type == REQ_TYPE_BLOCK_PC) + (req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT)) uptodate = 1; /* FIXME */ else if (pc->c[0] == GPCMD_REQUEST_SENSE) { @@ -97,7 +97,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc) "Aborting request!\n"); } - if (rq->cmd_type == REQ_TYPE_DRV_PRIV) + if (ata_misc_request(rq)) rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; return uptodate; @@ -203,7 +203,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive, put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]); put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]); - memcpy(rq->cmd, pc->c, 12); + memcpy(scsi_req(rq)->cmd, pc->c, 12); pc->rq = rq; if (cmd == WRITE) @@ -216,7 +216,7 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy, struct ide_atapi_pc *pc, struct request *rq) { ide_init_pc(pc); - memcpy(pc->c, rq->cmd, sizeof(pc->c)); + memcpy(pc->c, scsi_req(rq)->cmd, sizeof(pc->c)); pc->rq = rq; if (blk_rq_bytes(rq)) { pc->flags |= PC_FLAG_DMA_OK; @@ -246,7 +246,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, } else printk(KERN_ERR PFX "%s: I/O error\n", drive->name); - if (rq->cmd_type == REQ_TYPE_DRV_PRIV) { + if (ata_misc_request(rq)) { rq->errors = 0; ide_complete_rq(drive, 0, blk_rq_bytes(rq)); return ide_stopped; @@ -254,8 +254,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, goto out_end; } - switch (rq->cmd_type) { - case REQ_TYPE_FS: + switch (req_op(rq)) { + default: if (((long)blk_rq_pos(rq) % floppy->bs_factor) || (blk_rq_sectors(rq) % floppy->bs_factor)) { printk(KERN_ERR PFX "%s: unsupported r/w rq size\n", @@ -265,16 +265,21 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); break; - case REQ_TYPE_DRV_PRIV: - case REQ_TYPE_ATA_SENSE: - pc = (struct ide_atapi_pc *)rq->special; - break; - case REQ_TYPE_BLOCK_PC: + case REQ_OP_SCSI_IN: + case REQ_OP_SCSI_OUT: pc = &floppy->queued_pc; idefloppy_blockpc_cmd(floppy, pc, rq); break; - default: - BUG(); + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: + switch (ide_req(rq)->type) { + case ATA_PRIV_MISC: + case ATA_PRIV_SENSE: + pc = (struct ide_atapi_pc *)rq->special; + break; + default: + BUG(); + } } ide_prep_sense(drive, rq); @@ -286,7 +291,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, cmd.rq = rq; - if (rq->cmd_type == REQ_TYPE_FS || blk_rq_bytes(rq)) { + if (!blk_rq_is_passthrough(rq) || blk_rq_bytes(rq)) { ide_init_sg_cmd(&cmd, blk_rq_bytes(rq)); ide_map_sg(drive, &cmd); } @@ -296,7 +301,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, return ide_floppy_issue_pc(drive, &cmd, pc); out_end: drive->failed_pc = NULL; - if (rq->cmd_type != REQ_TYPE_FS && rq->errors == 0) + if (blk_rq_is_passthrough(rq) && rq->errors == 0) rq->errors = -EIO; ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); return ide_stopped; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 201e43f..043b1fb9 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -102,7 +102,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err) drive->dev_flags |= IDE_DFLAG_PARKED; } - if (rq && rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { + if (rq && ata_taskfile_request(rq)) { struct ide_cmd *orig_cmd = rq->special; if (cmd->tf_flags & IDE_TFLAG_DYN) @@ -135,7 +135,7 @@ EXPORT_SYMBOL(ide_complete_rq); void ide_kill_rq(ide_drive_t *drive, struct request *rq) { - u8 drv_req = (rq->cmd_type == REQ_TYPE_DRV_PRIV) && rq->rq_disk; + u8 drv_req = ata_misc_request(rq) && rq->rq_disk; u8 media = drive->media; drive->failed_pc = NULL; @@ -145,7 +145,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq) } else { if (media == ide_tape) rq->errors = IDE_DRV_ERROR_GENERAL; - else if (rq->cmd_type != REQ_TYPE_FS && rq->errors == 0) + else if (blk_rq_is_passthrough(rq) && rq->errors == 0) rq->errors = -EIO; } @@ -279,7 +279,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq) { - u8 cmd = rq->cmd[0]; + u8 cmd = scsi_req(rq)->cmd[0]; switch (cmd) { case REQ_PARK_HEADS: @@ -340,7 +340,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (drive->current_speed == 0xff) ide_config_drive_speed(drive, drive->desired_speed); - if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) + if (ata_taskfile_request(rq)) return execute_drive_cmd(drive, rq); else if (ata_pm_request(rq)) { struct ide_pm_state *pm = rq->special; @@ -353,7 +353,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) pm->pm_step == IDE_PM_COMPLETED) ide_complete_pm_rq(drive, rq); return startstop; - } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_DRV_PRIV) + } else if (!rq->rq_disk && ata_misc_request(rq)) /* * TODO: Once all ULDs have been modified to * check for specific op codes rather than @@ -545,6 +545,7 @@ repeat: goto plug_device; } + scsi_req(rq)->resid_len = blk_rq_bytes(rq); hwif->rq = rq; spin_unlock_irq(&hwif->lock); diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index d05db24..248a3e0 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -125,8 +125,9 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) if (NULL == (void *) arg) { struct request *rq; - rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_ATA_TASKFILE; + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_TASKFILE; err = blk_execute_rq(drive->queue, NULL, rq, 0); blk_put_request(rq); @@ -221,10 +222,11 @@ static int generic_drive_reset(ide_drive_t *drive) struct request *rq; int ret = 0; - rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_DRV_PRIV; - rq->cmd_len = 1; - rq->cmd[0] = REQ_DRIVE_RESET; + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_MISC; + scsi_req(rq)->cmd_len = 1; + scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET; if (blk_execute_rq(drive->queue, NULL, rq, 1)) ret = rq->errors; blk_put_request(rq); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 2d7dca5..101aed9 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -31,10 +31,11 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) } spin_unlock_irq(&hwif->lock); - rq = blk_get_request(q, READ, __GFP_RECLAIM); - rq->cmd[0] = REQ_PARK_HEADS; - rq->cmd_len = 1; - rq->cmd_type = REQ_TYPE_DRV_PRIV; + rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + scsi_req(rq)->cmd[0] = REQ_PARK_HEADS; + scsi_req(rq)->cmd_len = 1; + ide_req(rq)->type = ATA_PRIV_MISC; rq->special = &timeout; rc = blk_execute_rq(q, NULL, rq, 1); blk_put_request(rq); @@ -45,13 +46,14 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) * Make sure that *some* command is sent to the drive after the * timeout has expired, so power management will be reenabled. */ - rq = blk_get_request(q, READ, GFP_NOWAIT); + rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT); + scsi_req_init(rq); if (IS_ERR(rq)) goto out; - rq->cmd[0] = REQ_UNPARK_HEADS; - rq->cmd_len = 1; - rq->cmd_type = REQ_TYPE_DRV_PRIV; + scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS; + scsi_req(rq)->cmd_len = 1; + ide_req(rq)->type = ATA_PRIV_MISC; elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); out: @@ -64,7 +66,7 @@ ide_startstop_t ide_do_park_unpark(ide_drive_t *drive, struct request *rq) struct ide_taskfile *tf = &cmd.tf; memset(&cmd, 0, sizeof(cmd)); - if (rq->cmd[0] == REQ_PARK_HEADS) { + if (scsi_req(rq)->cmd[0] == REQ_PARK_HEADS) { drive->sleep = *(unsigned long *)rq->special; drive->dev_flags |= IDE_DFLAG_SLEEPING; tf->command = ATA_CMD_IDLEIMMEDIATE; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index a015acd..ec951be 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -18,8 +18,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND; + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_PM_SUSPEND; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) @@ -88,8 +89,9 @@ int generic_ide_resume(struct device *dev) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_ATA_PM_RESUME; + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_PM_RESUME; rq->rq_flags |= RQF_PREEMPT; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; @@ -221,10 +223,10 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) #ifdef DEBUG_PM printk("%s: completing PM request, %s\n", drive->name, - (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) ? "suspend" : "resume"); + (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) ? "suspend" : "resume"); #endif spin_lock_irqsave(q->queue_lock, flags); - if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) + if (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) blk_stop_queue(q); else drive->dev_flags &= ~IDE_DFLAG_BLOCKED; @@ -240,11 +242,13 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { struct ide_pm_state *pm = rq->special; - if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND && + if (blk_rq_is_private(rq) && + ide_req(rq)->type == ATA_PRIV_PM_SUSPEND && pm->pm_step == IDE_PM_START_SUSPEND) /* Mark drive blocked when starting the suspend sequence. */ drive->dev_flags |= IDE_DFLAG_BLOCKED; - else if (rq->cmd_type == REQ_TYPE_ATA_PM_RESUME && + else if (blk_rq_is_private(rq) && + ide_req(rq)->type == ATA_PRIV_PM_RESUME && pm->pm_step == IDE_PM_START_RESUME) { /* * The first thing we do on wakeup is to wait for BSY bit to diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 330e319..a74ae8df 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -741,6 +741,14 @@ static void ide_port_tune_devices(ide_hwif_t *hwif) } } +static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp) +{ + struct ide_request *req = blk_mq_rq_to_pdu(rq); + + req->sreq.sense = req->sense; + return 0; +} + /* * init request queue */ @@ -758,11 +766,18 @@ static int ide_init_queue(ide_drive_t *drive) * limits and LBA48 we could raise it but as yet * do not. */ - - q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif)); + q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif)); if (!q) return 1; + q->request_fn = do_ide_request; + q->init_rq_fn = ide_init_rq; + q->cmd_size = sizeof(struct ide_request); + if (blk_init_allocated_queue(q) < 0) { + blk_cleanup_queue(q); + return 1; + } + q->queuedata = drive; blk_queue_segment_boundary(q, 0xffff); @@ -1131,10 +1146,12 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) ide_port_for_each_dev(i, drive, hwif) { u8 j = (hwif->index * MAX_DRIVES) + i; u16 *saved_id = drive->id; + struct request *saved_sense_rq = drive->sense_rq; memset(drive, 0, sizeof(*drive)); memset(saved_id, 0, SECTOR_SIZE); drive->id = saved_id; + drive->sense_rq = saved_sense_rq; drive->media = ide_disk; drive->select = (i << 4) | ATA_DEVICE_OBS; @@ -1241,6 +1258,7 @@ static void ide_port_free_devices(ide_hwif_t *hwif) int i; ide_port_for_each_dev(i, drive, hwif) { + kfree(drive->sense_rq); kfree(drive->id); kfree(drive); } @@ -1248,11 +1266,10 @@ static void ide_port_free_devices(ide_hwif_t *hwif) static int ide_port_alloc_devices(ide_hwif_t *hwif, int node) { + ide_drive_t *drive; int i; for (i = 0; i < MAX_DRIVES; i++) { - ide_drive_t *drive; - drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node); if (drive == NULL) goto out_nomem; @@ -1267,12 +1284,21 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node) */ drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node); if (drive->id == NULL) - goto out_nomem; + goto out_free_drive; + + drive->sense_rq = kmalloc(sizeof(struct request) + + sizeof(struct ide_request), GFP_KERNEL); + if (!drive->sense_rq) + goto out_free_id; hwif->devices[i] = drive; } return 0; +out_free_id: + kfree(drive->id); +out_free_drive: + kfree(drive); out_nomem: ide_port_free_devices(hwif); return -ENOMEM; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 9ecf4e3..3c1b7974 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -282,7 +282,7 @@ static void idetape_analyze_error(ide_drive_t *drive) /* correct remaining bytes to transfer */ if (pc->flags & PC_FLAG_DMA_ERROR) - rq->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]); + scsi_req(rq)->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]); /* * If error was the result of a zero-length read or write command, @@ -316,7 +316,7 @@ static void idetape_analyze_error(ide_drive_t *drive) pc->flags |= PC_FLAG_ABORT; } if (!(pc->flags & PC_FLAG_ABORT) && - (blk_rq_bytes(rq) - rq->resid_len)) + (blk_rq_bytes(rq) - scsi_req(rq)->resid_len)) pc->retries = IDETAPE_MAX_PC_RETRIES + 1; } } @@ -348,7 +348,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) "itself - Aborting request!\n"); } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) { unsigned int blocks = - (blk_rq_bytes(rq) - rq->resid_len) / tape->blk_size; + (blk_rq_bytes(rq) - scsi_req(rq)->resid_len) / tape->blk_size; tape->avg_size += blocks * tape->blk_size; @@ -560,7 +560,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, pc->flags |= PC_FLAG_WRITING; } - memcpy(rq->cmd, pc->c, 12); + memcpy(scsi_req(rq)->cmd, pc->c, 12); } static ide_startstop_t idetape_do_request(ide_drive_t *drive, @@ -570,14 +570,16 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, idetape_tape_t *tape = drive->driver_data; struct ide_atapi_pc *pc = NULL; struct ide_cmd cmd; + struct scsi_request *req = scsi_req(rq); u8 stat; ide_debug_log(IDE_DBG_RQ, "cmd: 0x%x, sector: %llu, nr_sectors: %u", - rq->cmd[0], (unsigned long long)blk_rq_pos(rq), + req->cmd[0], (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq)); - BUG_ON(!(rq->cmd_type == REQ_TYPE_DRV_PRIV || - rq->cmd_type == REQ_TYPE_ATA_SENSE)); + BUG_ON(!blk_rq_is_private(rq)); + BUG_ON(ide_req(rq)->type != ATA_PRIV_MISC && + ide_req(rq)->type != ATA_PRIV_SENSE); /* Retry a failed packet command */ if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) { @@ -592,7 +594,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, stat = hwif->tp_ops->read_status(hwif); if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 && - (rq->cmd[13] & REQ_IDETAPE_PC2) == 0) + (req->cmd[13] & REQ_IDETAPE_PC2) == 0) drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC; if (drive->dev_flags & IDE_DFLAG_POST_RESET) { @@ -609,7 +611,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, } else if (time_after(jiffies, tape->dsc_timeout)) { printk(KERN_ERR "ide-tape: %s: DSC timeout\n", tape->name); - if (rq->cmd[13] & REQ_IDETAPE_PC2) { + if (req->cmd[13] & REQ_IDETAPE_PC2) { idetape_media_access_finished(drive); return ide_stopped; } else { @@ -626,23 +628,23 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, tape->postponed_rq = false; } - if (rq->cmd[13] & REQ_IDETAPE_READ) { + if (req->cmd[13] & REQ_IDETAPE_READ) { pc = &tape->queued_pc; ide_tape_create_rw_cmd(tape, pc, rq, READ_6); goto out; } - if (rq->cmd[13] & REQ_IDETAPE_WRITE) { + if (req->cmd[13] & REQ_IDETAPE_WRITE) { pc = &tape->queued_pc; ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6); goto out; } - if (rq->cmd[13] & REQ_IDETAPE_PC1) { + if (req->cmd[13] & REQ_IDETAPE_PC1) { pc = (struct ide_atapi_pc *)rq->special; - rq->cmd[13] &= ~(REQ_IDETAPE_PC1); - rq->cmd[13] |= REQ_IDETAPE_PC2; + req->cmd[13] &= ~(REQ_IDETAPE_PC1); + req->cmd[13] |= REQ_IDETAPE_PC2; goto out; } - if (rq->cmd[13] & REQ_IDETAPE_PC2) { + if (req->cmd[13] & REQ_IDETAPE_PC2) { idetape_media_access_finished(drive); return ide_stopped; } @@ -852,9 +854,10 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); BUG_ON(size < 0 || size % tape->blk_size); - rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_DRV_PRIV; - rq->cmd[13] = cmd; + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_MISC; + scsi_req(rq)->cmd[13] = cmd; rq->rq_disk = tape->disk; rq->__sector = tape->first_frame; @@ -868,7 +871,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) blk_execute_rq(drive->queue, tape->disk, rq, 0); /* calculate the number of transferred bytes and update buffer state */ - size -= rq->resid_len; + size -= scsi_req(rq)->resid_len; tape->cur = tape->buf; if (cmd == REQ_IDETAPE_READ) tape->valid = size; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index a716693..247b9fa 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -428,10 +428,12 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, { struct request *rq; int error; - int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE; - rq = blk_get_request(drive->queue, rw, __GFP_RECLAIM); - rq->cmd_type = REQ_TYPE_ATA_TASKFILE; + rq = blk_get_request(drive->queue, + (cmd->tf_flags & IDE_TFLAG_WRITE) ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); + scsi_req_init(rq); + ide_req(rq)->type = ATA_PRIV_TASKFILE; /* * (ks) We transfer currently only whole sectors. diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c index 247853e..c3062b5 100644 --- a/drivers/ide/sis5513.c +++ b/drivers/ide/sis5513.c @@ -54,7 +54,7 @@ #define DRV_NAME "sis5513" /* registers layout and init values are chipset family dependent */ - +#undef ATA_16 #define ATA_16 0x01 #define ATA_33 0x02 #define ATA_66 0x03 diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 2f5d5f4..0527141 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -26,15 +26,6 @@ config NVM_DEBUG It is required to create/remove targets without IOCTLs. -config NVM_GENNVM - tristate "General Non-Volatile Memory Manager for Open-Channel SSDs" - ---help--- - Non-volatile memory media manager for Open-Channel SSDs that implements - physical media metadata management and block provisioning API. - - This is the standard media manager for using Open-Channel SSDs, and - required for targets to be instantiated. - config NVM_RRPC tristate "Round-robin Hybrid Open-Channel SSD target" ---help--- diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile index a7a0a22..b2a39e2 100644 --- a/drivers/lightnvm/Makefile +++ b/drivers/lightnvm/Makefile @@ -2,6 +2,5 @@ # Makefile for Open-Channel SSDs. # -obj-$(CONFIG_NVM) := core.o sysblk.o -obj-$(CONFIG_NVM_GENNVM) += gennvm.o +obj-$(CONFIG_NVM) := core.o obj-$(CONFIG_NVM_RRPC) += rrpc.o diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 02240a0..5262ba6 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -29,10 +29,483 @@ static LIST_HEAD(nvm_tgt_types); static DECLARE_RWSEM(nvm_tgtt_lock); -static LIST_HEAD(nvm_mgrs); static LIST_HEAD(nvm_devices); static DECLARE_RWSEM(nvm_lock); +/* Map between virtual and physical channel and lun */ +struct nvm_ch_map { + int ch_off; + int nr_luns; + int *lun_offs; +}; + +struct nvm_dev_map { + struct nvm_ch_map *chnls; + int nr_chnls; +}; + +struct nvm_area { + struct list_head list; + sector_t begin; + sector_t end; /* end is excluded */ +}; + +static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) +{ + struct nvm_target *tgt; + + list_for_each_entry(tgt, &dev->targets, list) + if (!strcmp(name, tgt->disk->disk_name)) + return tgt; + + return NULL; +} + +static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end) +{ + int i; + + for (i = lun_begin; i <= lun_end; i++) { + if (test_and_set_bit(i, dev->lun_map)) { + pr_err("nvm: lun %d already allocated\n", i); + goto err; + } + } + + return 0; +err: + while (--i > lun_begin) + clear_bit(i, dev->lun_map); + + return -EBUSY; +} + +static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin, + int lun_end) +{ + int i; + + for (i = lun_begin; i <= lun_end; i++) + WARN_ON(!test_and_clear_bit(i, dev->lun_map)); +} + +static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev) +{ + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_dev_map *dev_map = tgt_dev->map; + int i, j; + + for (i = 0; i < dev_map->nr_chnls; i++) { + struct nvm_ch_map *ch_map = &dev_map->chnls[i]; + int *lun_offs = ch_map->lun_offs; + int ch = i + ch_map->ch_off; + + for (j = 0; j < ch_map->nr_luns; j++) { + int lun = j + lun_offs[j]; + int lunid = (ch * dev->geo.luns_per_chnl) + lun; + + WARN_ON(!test_and_clear_bit(lunid, dev->lun_map)); + } + + kfree(ch_map->lun_offs); + } + + kfree(dev_map->chnls); + kfree(dev_map); + + kfree(tgt_dev->luns); + kfree(tgt_dev); +} + +static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, + int lun_begin, int lun_end) +{ + struct nvm_tgt_dev *tgt_dev = NULL; + struct nvm_dev_map *dev_rmap = dev->rmap; + struct nvm_dev_map *dev_map; + struct ppa_addr *luns; + int nr_luns = lun_end - lun_begin + 1; + int luns_left = nr_luns; + int nr_chnls = nr_luns / dev->geo.luns_per_chnl; + int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl; + int bch = lun_begin / dev->geo.luns_per_chnl; + int blun = lun_begin % dev->geo.luns_per_chnl; + int lunid = 0; + int lun_balanced = 1; + int prev_nr_luns; + int i, j; + + nr_chnls = nr_luns / dev->geo.luns_per_chnl; + nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1; + + dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); + if (!dev_map) + goto err_dev; + + dev_map->chnls = kcalloc(nr_chnls, sizeof(struct nvm_ch_map), + GFP_KERNEL); + if (!dev_map->chnls) + goto err_chnls; + + luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL); + if (!luns) + goto err_luns; + + prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ? + dev->geo.luns_per_chnl : luns_left; + for (i = 0; i < nr_chnls; i++) { + struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; + int *lun_roffs = ch_rmap->lun_offs; + struct nvm_ch_map *ch_map = &dev_map->chnls[i]; + int *lun_offs; + int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ? + dev->geo.luns_per_chnl : luns_left; + + if (lun_balanced && prev_nr_luns != luns_in_chnl) + lun_balanced = 0; + + ch_map->ch_off = ch_rmap->ch_off = bch; + ch_map->nr_luns = luns_in_chnl; + + lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); + if (!lun_offs) + goto err_ch; + + for (j = 0; j < luns_in_chnl; j++) { + luns[lunid].ppa = 0; + luns[lunid].g.ch = i; + luns[lunid++].g.lun = j; + + lun_offs[j] = blun; + lun_roffs[j + blun] = blun; + } + + ch_map->lun_offs = lun_offs; + + /* when starting a new channel, lun offset is reset */ + blun = 0; + luns_left -= luns_in_chnl; + } + + dev_map->nr_chnls = nr_chnls; + + tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL); + if (!tgt_dev) + goto err_ch; + + memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); + /* Target device only owns a portion of the physical device */ + tgt_dev->geo.nr_chnls = nr_chnls; + tgt_dev->geo.nr_luns = nr_luns; + tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1; + tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; + tgt_dev->q = dev->q; + tgt_dev->map = dev_map; + tgt_dev->luns = luns; + memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id)); + + tgt_dev->parent = dev; + + return tgt_dev; +err_ch: + while (--i > 0) + kfree(dev_map->chnls[i].lun_offs); + kfree(luns); +err_luns: + kfree(dev_map->chnls); +err_chnls: + kfree(dev_map); +err_dev: + return tgt_dev; +} + +static const struct block_device_operations nvm_fops = { + .owner = THIS_MODULE, +}; + +static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) +{ + struct nvm_ioctl_create_simple *s = &create->conf.s; + struct request_queue *tqueue; + struct gendisk *tdisk; + struct nvm_tgt_type *tt; + struct nvm_target *t; + struct nvm_tgt_dev *tgt_dev; + void *targetdata; + + tt = nvm_find_target_type(create->tgttype, 1); + if (!tt) { + pr_err("nvm: target type %s not found\n", create->tgttype); + return -EINVAL; + } + + mutex_lock(&dev->mlock); + t = nvm_find_target(dev, create->tgtname); + if (t) { + pr_err("nvm: target name already exists.\n"); + mutex_unlock(&dev->mlock); + return -EINVAL; + } + mutex_unlock(&dev->mlock); + + if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end)) + return -ENOMEM; + + t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); + if (!t) + goto err_reserve; + + tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end); + if (!tgt_dev) { + pr_err("nvm: could not create target device\n"); + goto err_t; + } + + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); + if (!tqueue) + goto err_dev; + blk_queue_make_request(tqueue, tt->make_rq); + + tdisk = alloc_disk(0); + if (!tdisk) + goto err_queue; + + sprintf(tdisk->disk_name, "%s", create->tgtname); + tdisk->flags = GENHD_FL_EXT_DEVT; + tdisk->major = 0; + tdisk->first_minor = 0; + tdisk->fops = &nvm_fops; + tdisk->queue = tqueue; + + targetdata = tt->init(tgt_dev, tdisk); + if (IS_ERR(targetdata)) + goto err_init; + + tdisk->private_data = targetdata; + tqueue->queuedata = targetdata; + + blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); + + set_capacity(tdisk, tt->capacity(targetdata)); + add_disk(tdisk); + + if (tt->sysfs_init && tt->sysfs_init(tdisk)) + goto err_sysfs; + + t->type = tt; + t->disk = tdisk; + t->dev = tgt_dev; + + mutex_lock(&dev->mlock); + list_add_tail(&t->list, &dev->targets); + mutex_unlock(&dev->mlock); + + return 0; +err_sysfs: + if (tt->exit) + tt->exit(targetdata); +err_init: + put_disk(tdisk); +err_queue: + blk_cleanup_queue(tqueue); +err_dev: + nvm_remove_tgt_dev(tgt_dev); +err_t: + kfree(t); +err_reserve: + nvm_release_luns_err(dev, s->lun_begin, s->lun_end); + return -ENOMEM; +} + +static void __nvm_remove_target(struct nvm_target *t) +{ + struct nvm_tgt_type *tt = t->type; + struct gendisk *tdisk = t->disk; + struct request_queue *q = tdisk->queue; + + del_gendisk(tdisk); + blk_cleanup_queue(q); + + if (tt->sysfs_exit) + tt->sysfs_exit(tdisk); + + if (tt->exit) + tt->exit(tdisk->private_data); + + nvm_remove_tgt_dev(t->dev); + put_disk(tdisk); + + list_del(&t->list); + kfree(t); +} + +/** + * nvm_remove_tgt - Removes a target from the media manager + * @dev: device + * @remove: ioctl structure with target name to remove. + * + * Returns: + * 0: on success + * 1: on not found + * <0: on error + */ +static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) +{ + struct nvm_target *t; + + mutex_lock(&dev->mlock); + t = nvm_find_target(dev, remove->tgtname); + if (!t) { + mutex_unlock(&dev->mlock); + return 1; + } + __nvm_remove_target(t); + mutex_unlock(&dev->mlock); + + return 0; +} + +static int nvm_register_map(struct nvm_dev *dev) +{ + struct nvm_dev_map *rmap; + int i, j; + + rmap = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); + if (!rmap) + goto err_rmap; + + rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct nvm_ch_map), + GFP_KERNEL); + if (!rmap->chnls) + goto err_chnls; + + for (i = 0; i < dev->geo.nr_chnls; i++) { + struct nvm_ch_map *ch_rmap; + int *lun_roffs; + int luns_in_chnl = dev->geo.luns_per_chnl; + + ch_rmap = &rmap->chnls[i]; + + ch_rmap->ch_off = -1; + ch_rmap->nr_luns = luns_in_chnl; + + lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); + if (!lun_roffs) + goto err_ch; + + for (j = 0; j < luns_in_chnl; j++) + lun_roffs[j] = -1; + + ch_rmap->lun_offs = lun_roffs; + } + + dev->rmap = rmap; + + return 0; +err_ch: + while (--i >= 0) + kfree(rmap->chnls[i].lun_offs); +err_chnls: + kfree(rmap); +err_rmap: + return -ENOMEM; +} + +static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) +{ + struct nvm_dev_map *dev_map = tgt_dev->map; + struct nvm_ch_map *ch_map = &dev_map->chnls[p->g.ch]; + int lun_off = ch_map->lun_offs[p->g.lun]; + + p->g.ch += ch_map->ch_off; + p->g.lun += lun_off; +} + +static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) +{ + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_dev_map *dev_rmap = dev->rmap; + struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch]; + int lun_roff = ch_rmap->lun_offs[p->g.lun]; + + p->g.ch -= ch_rmap->ch_off; + p->g.lun -= lun_roff; +} + +static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, + struct ppa_addr *ppa_list, int nr_ppas) +{ + int i; + + for (i = 0; i < nr_ppas; i++) { + nvm_map_to_dev(tgt_dev, &ppa_list[i]); + ppa_list[i] = generic_to_dev_addr(tgt_dev, ppa_list[i]); + } +} + +static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, + struct ppa_addr *ppa_list, int nr_ppas) +{ + int i; + + for (i = 0; i < nr_ppas; i++) { + ppa_list[i] = dev_to_generic_addr(tgt_dev, ppa_list[i]); + nvm_map_to_tgt(tgt_dev, &ppa_list[i]); + } +} + +static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) +{ + if (rqd->nr_ppas == 1) { + nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1); + return; + } + + nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas); +} + +static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) +{ + if (rqd->nr_ppas == 1) { + nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1); + return; + } + + nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas); +} + +void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, + int len) +{ + struct nvm_geo *geo = &dev->geo; + struct nvm_dev_map *dev_rmap = dev->rmap; + u64 i; + + for (i = 0; i < len; i++) { + struct nvm_ch_map *ch_rmap; + int *lun_roffs; + struct ppa_addr gaddr; + u64 pba = le64_to_cpu(entries[i]); + int off; + u64 diff; + + if (!pba) + continue; + + gaddr = linear_to_generic_addr(geo, pba); + ch_rmap = &dev_rmap->chnls[gaddr.g.ch]; + lun_roffs = ch_rmap->lun_offs; + + off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun; + + diff = ((ch_rmap->ch_off * geo->luns_per_chnl) + + (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun; + + entries[i] -= cpu_to_le64(diff); + } +} +EXPORT_SYMBOL(nvm_part_to_tgt); + struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) { struct nvm_tgt_type *tmp, *tt = NULL; @@ -92,78 +565,6 @@ void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler) } EXPORT_SYMBOL(nvm_dev_dma_free); -static struct nvmm_type *nvm_find_mgr_type(const char *name) -{ - struct nvmm_type *mt; - - list_for_each_entry(mt, &nvm_mgrs, list) - if (!strcmp(name, mt->name)) - return mt; - - return NULL; -} - -static struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev) -{ - struct nvmm_type *mt; - int ret; - - lockdep_assert_held(&nvm_lock); - - list_for_each_entry(mt, &nvm_mgrs, list) { - if (strncmp(dev->sb.mmtype, mt->name, NVM_MMTYPE_LEN)) - continue; - - ret = mt->register_mgr(dev); - if (ret < 0) { - pr_err("nvm: media mgr failed to init (%d) on dev %s\n", - ret, dev->name); - return NULL; /* initialization failed */ - } else if (ret > 0) - return mt; - } - - return NULL; -} - -int nvm_register_mgr(struct nvmm_type *mt) -{ - struct nvm_dev *dev; - int ret = 0; - - down_write(&nvm_lock); - if (nvm_find_mgr_type(mt->name)) { - ret = -EEXIST; - goto finish; - } else { - list_add(&mt->list, &nvm_mgrs); - } - - /* try to register media mgr if any device have none configured */ - list_for_each_entry(dev, &nvm_devices, devices) { - if (dev->mt) - continue; - - dev->mt = nvm_init_mgr(dev); - } -finish: - up_write(&nvm_lock); - - return ret; -} -EXPORT_SYMBOL(nvm_register_mgr); - -void nvm_unregister_mgr(struct nvmm_type *mt) -{ - if (!mt) - return; - - down_write(&nvm_lock); - list_del(&mt->list); - up_write(&nvm_lock); -} -EXPORT_SYMBOL(nvm_unregister_mgr); - static struct nvm_dev *nvm_find_nvm_dev(const char *name) { struct nvm_dev *dev; @@ -175,53 +576,6 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name) return NULL; } -static void nvm_tgt_generic_to_addr_mode(struct nvm_tgt_dev *tgt_dev, - struct nvm_rq *rqd) -{ - struct nvm_dev *dev = tgt_dev->parent; - int i; - - if (rqd->nr_ppas > 1) { - for (i = 0; i < rqd->nr_ppas; i++) { - rqd->ppa_list[i] = dev->mt->trans_ppa(tgt_dev, - rqd->ppa_list[i], TRANS_TGT_TO_DEV); - rqd->ppa_list[i] = generic_to_dev_addr(dev, - rqd->ppa_list[i]); - } - } else { - rqd->ppa_addr = dev->mt->trans_ppa(tgt_dev, rqd->ppa_addr, - TRANS_TGT_TO_DEV); - rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr); - } -} - -int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas, - int type) -{ - struct nvm_rq rqd; - int ret; - - if (nr_ppas > dev->ops->max_phys_sect) { - pr_err("nvm: unable to update all sysblocks atomically\n"); - return -EINVAL; - } - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); - nvm_generic_to_addr_mode(dev, &rqd); - - ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); - nvm_free_rqd_ppalist(dev, &rqd); - if (ret) { - pr_err("nvm: sysblk failed bb mark\n"); - return -EINVAL; - } - - return 0; -} -EXPORT_SYMBOL(nvm_set_bb_tbl); - int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int nr_ppas, int type) { @@ -237,12 +591,12 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, memset(&rqd, 0, sizeof(struct nvm_rq)); nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); - nvm_tgt_generic_to_addr_mode(tgt_dev, &rqd); + nvm_rq_tgt_to_dev(tgt_dev, &rqd); ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); nvm_free_rqd_ppalist(dev, &rqd); if (ret) { - pr_err("nvm: sysblk failed bb mark\n"); + pr_err("nvm: failed bb mark\n"); return -EINVAL; } @@ -262,15 +616,42 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { struct nvm_dev *dev = tgt_dev->parent; - return dev->mt->submit_io(tgt_dev, rqd); + if (!dev->ops->submit_io) + return -ENODEV; + + nvm_rq_tgt_to_dev(tgt_dev, rqd); + + rqd->dev = tgt_dev; + return dev->ops->submit_io(dev, rqd); } EXPORT_SYMBOL(nvm_submit_io); -int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, int flags) +int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int flags) { struct nvm_dev *dev = tgt_dev->parent; + struct nvm_rq rqd; + int ret; + + if (!dev->ops->erase_block) + return 0; + + nvm_map_to_dev(tgt_dev, ppas); + + memset(&rqd, 0, sizeof(struct nvm_rq)); + + ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, 1, 1); + if (ret) + return ret; + + nvm_rq_tgt_to_dev(tgt_dev, &rqd); + + rqd.flags = flags; + + ret = dev->ops->erase_block(dev, &rqd); - return dev->mt->erase_blk(tgt_dev, p, flags); + nvm_free_rqd_ppalist(dev, &rqd); + + return ret; } EXPORT_SYMBOL(nvm_erase_blk); @@ -289,46 +670,67 @@ EXPORT_SYMBOL(nvm_get_l2p_tbl); int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len) { struct nvm_dev *dev = tgt_dev->parent; + struct nvm_geo *geo = &dev->geo; + struct nvm_area *area, *prev, *next; + sector_t begin = 0; + sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9; - return dev->mt->get_area(dev, lba, len); -} -EXPORT_SYMBOL(nvm_get_area); + if (len > max_sectors) + return -EINVAL; -void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t lba) -{ - struct nvm_dev *dev = tgt_dev->parent; + area = kmalloc(sizeof(struct nvm_area), GFP_KERNEL); + if (!area) + return -ENOMEM; - dev->mt->put_area(dev, lba); -} -EXPORT_SYMBOL(nvm_put_area); + prev = NULL; -void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd) -{ - int i; + spin_lock(&dev->lock); + list_for_each_entry(next, &dev->area_list, list) { + if (begin + len > next->begin) { + begin = next->end; + prev = next; + continue; + } + break; + } - if (rqd->nr_ppas > 1) { - for (i = 0; i < rqd->nr_ppas; i++) - rqd->ppa_list[i] = dev_to_generic_addr(dev, - rqd->ppa_list[i]); - } else { - rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr); + if ((begin + len) > max_sectors) { + spin_unlock(&dev->lock); + kfree(area); + return -EINVAL; } + + area->begin = *lba = begin; + area->end = begin + len; + + if (prev) /* insert into sorted order */ + list_add(&area->list, &prev->list); + else + list_add(&area->list, &dev->area_list); + spin_unlock(&dev->lock); + + return 0; } -EXPORT_SYMBOL(nvm_addr_to_generic_mode); +EXPORT_SYMBOL(nvm_get_area); -void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) +void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin) { - int i; + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_area *area; - if (rqd->nr_ppas > 1) { - for (i = 0; i < rqd->nr_ppas; i++) - rqd->ppa_list[i] = generic_to_dev_addr(dev, - rqd->ppa_list[i]); - } else { - rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr); + spin_lock(&dev->lock); + list_for_each_entry(area, &dev->area_list, list) { + if (area->begin != begin) + continue; + + list_del(&area->list); + spin_unlock(&dev->lock); + kfree(area); + return; } + spin_unlock(&dev->lock); } -EXPORT_SYMBOL(nvm_generic_to_addr_mode); +EXPORT_SYMBOL(nvm_put_area); int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, const struct ppa_addr *ppas, int nr_ppas, int vblk) @@ -380,149 +782,19 @@ void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_free_rqd_ppalist); -int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas, - int flags) +void nvm_end_io(struct nvm_rq *rqd) { - struct nvm_rq rqd; - int ret; + struct nvm_tgt_dev *tgt_dev = rqd->dev; - if (!dev->ops->erase_block) - return 0; + /* Convert address space */ + if (tgt_dev) + nvm_rq_dev_to_tgt(tgt_dev, rqd); - memset(&rqd, 0, sizeof(struct nvm_rq)); - - ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); - if (ret) - return ret; - - nvm_generic_to_addr_mode(dev, &rqd); - - rqd.flags = flags; - - ret = dev->ops->erase_block(dev, &rqd); - - nvm_free_rqd_ppalist(dev, &rqd); - - return ret; -} -EXPORT_SYMBOL(nvm_erase_ppa); - -void nvm_end_io(struct nvm_rq *rqd, int error) -{ - rqd->error = error; - rqd->end_io(rqd); + if (rqd->end_io) + rqd->end_io(rqd); } EXPORT_SYMBOL(nvm_end_io); -static void nvm_end_io_sync(struct nvm_rq *rqd) -{ - struct completion *waiting = rqd->wait; - - rqd->wait = NULL; - - complete(waiting); -} - -static int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode, - int flags, void *buf, int len) -{ - DECLARE_COMPLETION_ONSTACK(wait); - struct bio *bio; - int ret; - unsigned long hang_check; - - bio = bio_map_kern(dev->q, buf, len, GFP_KERNEL); - if (IS_ERR_OR_NULL(bio)) - return -ENOMEM; - - nvm_generic_to_addr_mode(dev, rqd); - - rqd->dev = NULL; - rqd->opcode = opcode; - rqd->flags = flags; - rqd->bio = bio; - rqd->wait = &wait; - rqd->end_io = nvm_end_io_sync; - - ret = dev->ops->submit_io(dev, rqd); - if (ret) { - bio_put(bio); - return ret; - } - - /* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; - if (hang_check) - while (!wait_for_completion_io_timeout(&wait, - hang_check * (HZ/2))) - ; - else - wait_for_completion_io(&wait); - - return rqd->error; -} - -/** - * nvm_submit_ppa_list - submit user-defined ppa list to device. The user must - * take to free ppa list if necessary. - * @dev: device - * @ppa_list: user created ppa_list - * @nr_ppas: length of ppa_list - * @opcode: device opcode - * @flags: device flags - * @buf: data buffer - * @len: data buffer length - */ -int nvm_submit_ppa_list(struct nvm_dev *dev, struct ppa_addr *ppa_list, - int nr_ppas, int opcode, int flags, void *buf, int len) -{ - struct nvm_rq rqd; - - if (dev->ops->max_phys_sect < nr_ppas) - return -EINVAL; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - rqd.nr_ppas = nr_ppas; - if (nr_ppas > 1) - rqd.ppa_list = ppa_list; - else - rqd.ppa_addr = ppa_list[0]; - - return __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len); -} -EXPORT_SYMBOL(nvm_submit_ppa_list); - -/** - * nvm_submit_ppa - submit PPAs to device. PPAs will automatically be unfolded - * as single, dual, quad plane PPAs depending on device type. - * @dev: device - * @ppa: user created ppa_list - * @nr_ppas: length of ppa_list - * @opcode: device opcode - * @flags: device flags - * @buf: data buffer - * @len: data buffer length - */ -int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas, - int opcode, int flags, void *buf, int len) -{ - struct nvm_rq rqd; - int ret; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas, 1); - if (ret) - return ret; - - ret = __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len); - - nvm_free_rqd_ppalist(dev, &rqd); - - return ret; -} -EXPORT_SYMBOL(nvm_submit_ppa); - /* * folds a bad block list from its plane representation to its virtual * block representation. The fold is done in place and reduced size is @@ -559,21 +831,14 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) } EXPORT_SYMBOL(nvm_bb_tbl_fold); -int nvm_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa, u8 *blks) -{ - ppa = generic_to_dev_addr(dev, ppa); - - return dev->ops->get_bb_tbl(dev, ppa, blks); -} -EXPORT_SYMBOL(nvm_get_bb_tbl); - int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, u8 *blks) { struct nvm_dev *dev = tgt_dev->parent; - ppa = dev->mt->trans_ppa(tgt_dev, ppa, TRANS_TGT_TO_DEV); - return nvm_get_bb_tbl(dev, ppa, blks); + nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); + + return dev->ops->get_bb_tbl(dev, ppa, blks); } EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); @@ -627,7 +892,7 @@ static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp) static int nvm_core_init(struct nvm_dev *dev) { struct nvm_id *id = &dev->identity; - struct nvm_id_group *grp = &id->groups[0]; + struct nvm_id_group *grp = &id->grp; struct nvm_geo *geo = &dev->geo; int ret; @@ -691,36 +956,31 @@ static int nvm_core_init(struct nvm_dev *dev) goto err_fmtype; } + INIT_LIST_HEAD(&dev->area_list); + INIT_LIST_HEAD(&dev->targets); mutex_init(&dev->mlock); spin_lock_init(&dev->lock); - blk_queue_logical_block_size(dev->q, geo->sec_size); + ret = nvm_register_map(dev); + if (ret) + goto err_fmtype; + blk_queue_logical_block_size(dev->q, geo->sec_size); return 0; err_fmtype: kfree(dev->lun_map); return ret; } -static void nvm_free_mgr(struct nvm_dev *dev) -{ - if (!dev->mt) - return; - - dev->mt->unregister_mgr(dev); - dev->mt = NULL; -} - void nvm_free(struct nvm_dev *dev) { if (!dev) return; - nvm_free_mgr(dev); - if (dev->dma_pool) dev->ops->destroy_dma_pool(dev->dma_pool); + kfree(dev->rmap); kfree(dev->lptbl); kfree(dev->lun_map); kfree(dev); @@ -731,28 +991,19 @@ static int nvm_init(struct nvm_dev *dev) struct nvm_geo *geo = &dev->geo; int ret = -EINVAL; - if (!dev->q || !dev->ops) - return ret; - if (dev->ops->identity(dev, &dev->identity)) { pr_err("nvm: device could not be identified\n"); goto err; } - pr_debug("nvm: ver:%x nvm_vendor:%x groups:%u\n", - dev->identity.ver_id, dev->identity.vmnt, - dev->identity.cgrps); + pr_debug("nvm: ver:%x nvm_vendor:%x\n", + dev->identity.ver_id, dev->identity.vmnt); if (dev->identity.ver_id != 1) { pr_err("nvm: device not supported by kernel."); goto err; } - if (dev->identity.cgrps != 1) { - pr_err("nvm: only one group configuration supported."); - goto err; - } - ret = nvm_core_init(dev); if (ret) { pr_err("nvm: could not initialize core structures.\n"); @@ -779,49 +1030,50 @@ int nvm_register(struct nvm_dev *dev) { int ret; - ret = nvm_init(dev); - if (ret) - goto err_init; + if (!dev->q || !dev->ops) + return -EINVAL; if (dev->ops->max_phys_sect > 256) { pr_info("nvm: max sectors supported is 256.\n"); - ret = -EINVAL; - goto err_init; + return -EINVAL; } if (dev->ops->max_phys_sect > 1) { dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); if (!dev->dma_pool) { pr_err("nvm: could not create dma pool\n"); - ret = -ENOMEM; - goto err_init; + return -ENOMEM; } } - if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { - ret = nvm_get_sysblock(dev, &dev->sb); - if (!ret) - pr_err("nvm: device not initialized.\n"); - else if (ret < 0) - pr_err("nvm: err (%d) on device initialization\n", ret); - } + ret = nvm_init(dev); + if (ret) + goto err_init; /* register device with a supported media manager */ down_write(&nvm_lock); - if (ret > 0) - dev->mt = nvm_init_mgr(dev); list_add(&dev->devices, &nvm_devices); up_write(&nvm_lock); return 0; err_init: - kfree(dev->lun_map); + dev->ops->destroy_dma_pool(dev->dma_pool); return ret; } EXPORT_SYMBOL(nvm_register); void nvm_unregister(struct nvm_dev *dev) { + struct nvm_target *t, *tmp; + + mutex_lock(&dev->mlock); + list_for_each_entry_safe(t, tmp, &dev->targets, list) { + if (t->dev->parent != dev) + continue; + __nvm_remove_target(t); + } + mutex_unlock(&dev->mlock); + down_write(&nvm_lock); list_del(&dev->devices); up_write(&nvm_lock); @@ -844,24 +1096,24 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) return -EINVAL; } - if (!dev->mt) { - pr_info("nvm: device has no media manager registered.\n"); - return -ENODEV; - } - if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { pr_err("nvm: config type not valid\n"); return -EINVAL; } s = &create->conf.s; - if (s->lun_begin > s->lun_end || s->lun_end > dev->geo.nr_luns) { + if (s->lun_begin == -1 && s->lun_end == -1) { + s->lun_begin = 0; + s->lun_end = dev->geo.nr_luns - 1; + } + + if (s->lun_begin > s->lun_end || s->lun_end >= dev->geo.nr_luns) { pr_err("nvm: lun out of bound (%u:%u > %u)\n", - s->lun_begin, s->lun_end, dev->geo.nr_luns); + s->lun_begin, s->lun_end, dev->geo.nr_luns - 1); return -EINVAL; } - return dev->mt->create_tgt(dev, create); + return nvm_create_tgt(dev, create); } static long nvm_ioctl_info(struct file *file, void __user *arg) @@ -923,16 +1175,14 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg) struct nvm_ioctl_device_info *info = &devices->info[i]; sprintf(info->devname, "%s", dev->name); - if (dev->mt) { - info->bmversion[0] = dev->mt->version[0]; - info->bmversion[1] = dev->mt->version[1]; - info->bmversion[2] = dev->mt->version[2]; - sprintf(info->bmname, "%s", dev->mt->name); - } else { - sprintf(info->bmname, "none"); - } + /* kept for compatibility */ + info->bmversion[0] = 1; + info->bmversion[1] = 0; + info->bmversion[2] = 0; + sprintf(info->bmname, "%s", "gennvm"); i++; + if (i > 31) { pr_err("nvm: max 31 devices can be reported.\n"); break; @@ -994,7 +1244,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) } list_for_each_entry(dev, &nvm_devices, devices) { - ret = dev->mt->remove_tgt(dev, &remove); + ret = nvm_remove_tgt(dev, &remove); if (!ret) break; } @@ -1002,47 +1252,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) return ret; } -static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info) -{ - info->seqnr = 1; - info->erase_cnt = 0; - info->version = 1; -} - -static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init) -{ - struct nvm_dev *dev; - struct nvm_sb_info info; - int ret; - - down_write(&nvm_lock); - dev = nvm_find_nvm_dev(init->dev); - up_write(&nvm_lock); - if (!dev) { - pr_err("nvm: device not found\n"); - return -EINVAL; - } - - nvm_setup_nvm_sb_info(&info); - - strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN); - info.fs_ppa.ppa = -1; - - if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { - ret = nvm_init_sysblock(dev, &info); - if (ret) - return ret; - } - - memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info)); - - down_write(&nvm_lock); - dev->mt = nvm_init_mgr(dev); - up_write(&nvm_lock); - - return 0; -} - +/* kept for compatibility reasons */ static long nvm_ioctl_dev_init(struct file *file, void __user *arg) { struct nvm_ioctl_dev_init init; @@ -1058,15 +1268,13 @@ static long nvm_ioctl_dev_init(struct file *file, void __user *arg) return -EINVAL; } - init.dev[DISK_NAME_LEN - 1] = '\0'; - - return __nvm_ioctl_dev_init(&init); + return 0; } +/* Kept for compatibility reasons */ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) { struct nvm_ioctl_dev_factory fact; - struct nvm_dev *dev; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1079,19 +1287,6 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1)) return -EINVAL; - down_write(&nvm_lock); - dev = nvm_find_nvm_dev(fact.dev); - up_write(&nvm_lock); - if (!dev) { - pr_err("nvm: device not found\n"); - return -EINVAL; - } - - nvm_free_mgr(dev); - - if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) - return nvm_dev_factory(dev, fact.flags); - return 0; } diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c deleted file mode 100644 index ca78800..0000000 --- a/drivers/lightnvm/gennvm.c +++ /dev/null @@ -1,657 +0,0 @@ -/* - * Copyright (C) 2015 Matias Bjorling <m@bjorling.me> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, - * USA. - * - * Implementation of a general nvm manager for Open-Channel SSDs. - */ - -#include "gennvm.h" - -static struct nvm_target *gen_find_target(struct gen_dev *gn, const char *name) -{ - struct nvm_target *tgt; - - list_for_each_entry(tgt, &gn->targets, list) - if (!strcmp(name, tgt->disk->disk_name)) - return tgt; - - return NULL; -} - -static const struct block_device_operations gen_fops = { - .owner = THIS_MODULE, -}; - -static int gen_reserve_luns(struct nvm_dev *dev, struct nvm_target *t, - int lun_begin, int lun_end) -{ - int i; - - for (i = lun_begin; i <= lun_end; i++) { - if (test_and_set_bit(i, dev->lun_map)) { - pr_err("nvm: lun %d already allocated\n", i); - goto err; - } - } - - return 0; - -err: - while (--i > lun_begin) - clear_bit(i, dev->lun_map); - - return -EBUSY; -} - -static void gen_release_luns_err(struct nvm_dev *dev, int lun_begin, - int lun_end) -{ - int i; - - for (i = lun_begin; i <= lun_end; i++) - WARN_ON(!test_and_clear_bit(i, dev->lun_map)); -} - -static void gen_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct gen_dev_map *dev_map = tgt_dev->map; - int i, j; - - for (i = 0; i < dev_map->nr_chnls; i++) { - struct gen_ch_map *ch_map = &dev_map->chnls[i]; - int *lun_offs = ch_map->lun_offs; - int ch = i + ch_map->ch_off; - - for (j = 0; j < ch_map->nr_luns; j++) { - int lun = j + lun_offs[j]; - int lunid = (ch * dev->geo.luns_per_chnl) + lun; - - WARN_ON(!test_and_clear_bit(lunid, dev->lun_map)); - } - - kfree(ch_map->lun_offs); - } - - kfree(dev_map->chnls); - kfree(dev_map); - kfree(tgt_dev->luns); - kfree(tgt_dev); -} - -static struct nvm_tgt_dev *gen_create_tgt_dev(struct nvm_dev *dev, - int lun_begin, int lun_end) -{ - struct nvm_tgt_dev *tgt_dev = NULL; - struct gen_dev_map *dev_rmap = dev->rmap; - struct gen_dev_map *dev_map; - struct ppa_addr *luns; - int nr_luns = lun_end - lun_begin + 1; - int luns_left = nr_luns; - int nr_chnls = nr_luns / dev->geo.luns_per_chnl; - int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl; - int bch = lun_begin / dev->geo.luns_per_chnl; - int blun = lun_begin % dev->geo.luns_per_chnl; - int lunid = 0; - int lun_balanced = 1; - int prev_nr_luns; - int i, j; - - nr_chnls = nr_luns / dev->geo.luns_per_chnl; - nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1; - - dev_map = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL); - if (!dev_map) - goto err_dev; - - dev_map->chnls = kcalloc(nr_chnls, sizeof(struct gen_ch_map), - GFP_KERNEL); - if (!dev_map->chnls) - goto err_chnls; - - luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL); - if (!luns) - goto err_luns; - - prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ? - dev->geo.luns_per_chnl : luns_left; - for (i = 0; i < nr_chnls; i++) { - struct gen_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; - int *lun_roffs = ch_rmap->lun_offs; - struct gen_ch_map *ch_map = &dev_map->chnls[i]; - int *lun_offs; - int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ? - dev->geo.luns_per_chnl : luns_left; - - if (lun_balanced && prev_nr_luns != luns_in_chnl) - lun_balanced = 0; - - ch_map->ch_off = ch_rmap->ch_off = bch; - ch_map->nr_luns = luns_in_chnl; - - lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); - if (!lun_offs) - goto err_ch; - - for (j = 0; j < luns_in_chnl; j++) { - luns[lunid].ppa = 0; - luns[lunid].g.ch = i; - luns[lunid++].g.lun = j; - - lun_offs[j] = blun; - lun_roffs[j + blun] = blun; - } - - ch_map->lun_offs = lun_offs; - - /* when starting a new channel, lun offset is reset */ - blun = 0; - luns_left -= luns_in_chnl; - } - - dev_map->nr_chnls = nr_chnls; - - tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL); - if (!tgt_dev) - goto err_ch; - - memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); - /* Target device only owns a portion of the physical device */ - tgt_dev->geo.nr_chnls = nr_chnls; - tgt_dev->geo.nr_luns = nr_luns; - tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1; - tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; - tgt_dev->q = dev->q; - tgt_dev->map = dev_map; - tgt_dev->luns = luns; - memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id)); - - tgt_dev->parent = dev; - - return tgt_dev; -err_ch: - while (--i > 0) - kfree(dev_map->chnls[i].lun_offs); - kfree(luns); -err_luns: - kfree(dev_map->chnls); -err_chnls: - kfree(dev_map); -err_dev: - return tgt_dev; -} - -static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) -{ - struct gen_dev *gn = dev->mp; - struct nvm_ioctl_create_simple *s = &create->conf.s; - struct request_queue *tqueue; - struct gendisk *tdisk; - struct nvm_tgt_type *tt; - struct nvm_target *t; - struct nvm_tgt_dev *tgt_dev; - void *targetdata; - - tt = nvm_find_target_type(create->tgttype, 1); - if (!tt) { - pr_err("nvm: target type %s not found\n", create->tgttype); - return -EINVAL; - } - - mutex_lock(&gn->lock); - t = gen_find_target(gn, create->tgtname); - if (t) { - pr_err("nvm: target name already exists.\n"); - mutex_unlock(&gn->lock); - return -EINVAL; - } - mutex_unlock(&gn->lock); - - t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); - if (!t) - return -ENOMEM; - - if (gen_reserve_luns(dev, t, s->lun_begin, s->lun_end)) - goto err_t; - - tgt_dev = gen_create_tgt_dev(dev, s->lun_begin, s->lun_end); - if (!tgt_dev) { - pr_err("nvm: could not create target device\n"); - goto err_reserve; - } - - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); - if (!tqueue) - goto err_dev; - blk_queue_make_request(tqueue, tt->make_rq); - - tdisk = alloc_disk(0); - if (!tdisk) - goto err_queue; - - sprintf(tdisk->disk_name, "%s", create->tgtname); - tdisk->flags = GENHD_FL_EXT_DEVT; - tdisk->major = 0; - tdisk->first_minor = 0; - tdisk->fops = &gen_fops; - tdisk->queue = tqueue; - - targetdata = tt->init(tgt_dev, tdisk); - if (IS_ERR(targetdata)) - goto err_init; - - tdisk->private_data = targetdata; - tqueue->queuedata = targetdata; - - blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); - - set_capacity(tdisk, tt->capacity(targetdata)); - add_disk(tdisk); - - t->type = tt; - t->disk = tdisk; - t->dev = tgt_dev; - - mutex_lock(&gn->lock); - list_add_tail(&t->list, &gn->targets); - mutex_unlock(&gn->lock); - - return 0; -err_init: - put_disk(tdisk); -err_queue: - blk_cleanup_queue(tqueue); -err_dev: - kfree(tgt_dev); -err_reserve: - gen_release_luns_err(dev, s->lun_begin, s->lun_end); -err_t: - kfree(t); - return -ENOMEM; -} - -static void __gen_remove_target(struct nvm_target *t) -{ - struct nvm_tgt_type *tt = t->type; - struct gendisk *tdisk = t->disk; - struct request_queue *q = tdisk->queue; - - del_gendisk(tdisk); - blk_cleanup_queue(q); - - if (tt->exit) - tt->exit(tdisk->private_data); - - gen_remove_tgt_dev(t->dev); - put_disk(tdisk); - - list_del(&t->list); - kfree(t); -} - -/** - * gen_remove_tgt - Removes a target from the media manager - * @dev: device - * @remove: ioctl structure with target name to remove. - * - * Returns: - * 0: on success - * 1: on not found - * <0: on error - */ -static int gen_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) -{ - struct gen_dev *gn = dev->mp; - struct nvm_target *t; - - if (!gn) - return 1; - - mutex_lock(&gn->lock); - t = gen_find_target(gn, remove->tgtname); - if (!t) { - mutex_unlock(&gn->lock); - return 1; - } - __gen_remove_target(t); - mutex_unlock(&gn->lock); - - return 0; -} - -static int gen_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len) -{ - struct nvm_geo *geo = &dev->geo; - struct gen_dev *gn = dev->mp; - struct gen_area *area, *prev, *next; - sector_t begin = 0; - sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9; - - if (len > max_sectors) - return -EINVAL; - - area = kmalloc(sizeof(struct gen_area), GFP_KERNEL); - if (!area) - return -ENOMEM; - - prev = NULL; - - spin_lock(&dev->lock); - list_for_each_entry(next, &gn->area_list, list) { - if (begin + len > next->begin) { - begin = next->end; - prev = next; - continue; - } - break; - } - - if ((begin + len) > max_sectors) { - spin_unlock(&dev->lock); - kfree(area); - return -EINVAL; - } - - area->begin = *lba = begin; - area->end = begin + len; - - if (prev) /* insert into sorted order */ - list_add(&area->list, &prev->list); - else - list_add(&area->list, &gn->area_list); - spin_unlock(&dev->lock); - - return 0; -} - -static void gen_put_area(struct nvm_dev *dev, sector_t begin) -{ - struct gen_dev *gn = dev->mp; - struct gen_area *area; - - spin_lock(&dev->lock); - list_for_each_entry(area, &gn->area_list, list) { - if (area->begin != begin) - continue; - - list_del(&area->list); - spin_unlock(&dev->lock); - kfree(area); - return; - } - spin_unlock(&dev->lock); -} - -static void gen_free(struct nvm_dev *dev) -{ - kfree(dev->mp); - kfree(dev->rmap); - dev->mp = NULL; -} - -static int gen_register(struct nvm_dev *dev) -{ - struct gen_dev *gn; - struct gen_dev_map *dev_rmap; - int i, j; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - gn = kzalloc(sizeof(struct gen_dev), GFP_KERNEL); - if (!gn) - goto err_gn; - - dev_rmap = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL); - if (!dev_rmap) - goto err_rmap; - - dev_rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct gen_ch_map), - GFP_KERNEL); - if (!dev_rmap->chnls) - goto err_chnls; - - for (i = 0; i < dev->geo.nr_chnls; i++) { - struct gen_ch_map *ch_rmap; - int *lun_roffs; - int luns_in_chnl = dev->geo.luns_per_chnl; - - ch_rmap = &dev_rmap->chnls[i]; - - ch_rmap->ch_off = -1; - ch_rmap->nr_luns = luns_in_chnl; - - lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); - if (!lun_roffs) - goto err_ch; - - for (j = 0; j < luns_in_chnl; j++) - lun_roffs[j] = -1; - - ch_rmap->lun_offs = lun_roffs; - } - - gn->dev = dev; - gn->nr_luns = dev->geo.nr_luns; - INIT_LIST_HEAD(&gn->area_list); - mutex_init(&gn->lock); - INIT_LIST_HEAD(&gn->targets); - dev->mp = gn; - dev->rmap = dev_rmap; - - return 1; -err_ch: - while (--i >= 0) - kfree(dev_rmap->chnls[i].lun_offs); -err_chnls: - kfree(dev_rmap); -err_rmap: - gen_free(dev); -err_gn: - module_put(THIS_MODULE); - return -ENOMEM; -} - -static void gen_unregister(struct nvm_dev *dev) -{ - struct gen_dev *gn = dev->mp; - struct nvm_target *t, *tmp; - - mutex_lock(&gn->lock); - list_for_each_entry_safe(t, tmp, &gn->targets, list) { - if (t->dev->parent != dev) - continue; - __gen_remove_target(t); - } - mutex_unlock(&gn->lock); - - gen_free(dev); - module_put(THIS_MODULE); -} - -static int gen_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) -{ - struct gen_dev_map *dev_map = tgt_dev->map; - struct gen_ch_map *ch_map = &dev_map->chnls[p->g.ch]; - int lun_off = ch_map->lun_offs[p->g.lun]; - struct nvm_dev *dev = tgt_dev->parent; - struct gen_dev_map *dev_rmap = dev->rmap; - struct gen_ch_map *ch_rmap; - int lun_roff; - - p->g.ch += ch_map->ch_off; - p->g.lun += lun_off; - - ch_rmap = &dev_rmap->chnls[p->g.ch]; - lun_roff = ch_rmap->lun_offs[p->g.lun]; - - if (unlikely(ch_rmap->ch_off < 0 || lun_roff < 0)) { - pr_err("nvm: corrupted device partition table\n"); - return -EINVAL; - } - - return 0; -} - -static int gen_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct gen_dev_map *dev_rmap = dev->rmap; - struct gen_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch]; - int lun_roff = ch_rmap->lun_offs[p->g.lun]; - - p->g.ch -= ch_rmap->ch_off; - p->g.lun -= lun_roff; - - return 0; -} - -static int gen_trans_rq(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, - int flag) -{ - gen_trans_fn *f; - int i; - int ret = 0; - - f = (flag == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt; - - if (rqd->nr_ppas == 1) - return f(tgt_dev, &rqd->ppa_addr); - - for (i = 0; i < rqd->nr_ppas; i++) { - ret = f(tgt_dev, &rqd->ppa_list[i]); - if (ret) - goto out; - } - -out: - return ret; -} - -static void gen_end_io(struct nvm_rq *rqd) -{ - struct nvm_tgt_dev *tgt_dev = rqd->dev; - struct nvm_tgt_instance *ins = rqd->ins; - - /* Convert address space */ - if (tgt_dev) - gen_trans_rq(tgt_dev, rqd, TRANS_DEV_TO_TGT); - - ins->tt->end_io(rqd); -} - -static int gen_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) -{ - struct nvm_dev *dev = tgt_dev->parent; - - if (!dev->ops->submit_io) - return -ENODEV; - - /* Convert address space */ - gen_trans_rq(tgt_dev, rqd, TRANS_TGT_TO_DEV); - nvm_generic_to_addr_mode(dev, rqd); - - rqd->dev = tgt_dev; - rqd->end_io = gen_end_io; - return dev->ops->submit_io(dev, rqd); -} - -static int gen_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, - int flags) -{ - /* Convert address space */ - gen_map_to_dev(tgt_dev, p); - - return nvm_erase_ppa(tgt_dev->parent, p, 1, flags); -} - -static struct ppa_addr gen_trans_ppa(struct nvm_tgt_dev *tgt_dev, - struct ppa_addr p, int direction) -{ - gen_trans_fn *f; - struct ppa_addr ppa = p; - - f = (direction == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt; - f(tgt_dev, &ppa); - - return ppa; -} - -static void gen_part_to_tgt(struct nvm_dev *dev, sector_t *entries, - int len) -{ - struct nvm_geo *geo = &dev->geo; - struct gen_dev_map *dev_rmap = dev->rmap; - u64 i; - - for (i = 0; i < len; i++) { - struct gen_ch_map *ch_rmap; - int *lun_roffs; - struct ppa_addr gaddr; - u64 pba = le64_to_cpu(entries[i]); - int off; - u64 diff; - - if (!pba) - continue; - - gaddr = linear_to_generic_addr(geo, pba); - ch_rmap = &dev_rmap->chnls[gaddr.g.ch]; - lun_roffs = ch_rmap->lun_offs; - - off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun; - - diff = ((ch_rmap->ch_off * geo->luns_per_chnl) + - (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun; - - entries[i] -= cpu_to_le64(diff); - } -} - -static struct nvmm_type gen = { - .name = "gennvm", - .version = {0, 1, 0}, - - .register_mgr = gen_register, - .unregister_mgr = gen_unregister, - - .create_tgt = gen_create_tgt, - .remove_tgt = gen_remove_tgt, - - .submit_io = gen_submit_io, - .erase_blk = gen_erase_blk, - - .get_area = gen_get_area, - .put_area = gen_put_area, - - .trans_ppa = gen_trans_ppa, - .part_to_tgt = gen_part_to_tgt, -}; - -static int __init gen_module_init(void) -{ - return nvm_register_mgr(&gen); -} - -static void gen_module_exit(void) -{ - nvm_unregister_mgr(&gen); -} - -module_init(gen_module_init); -module_exit(gen_module_exit); -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("General media manager for Open-Channel SSDs"); diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h deleted file mode 100644 index 6a4b3f3..0000000 --- a/drivers/lightnvm/gennvm.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright: Matias Bjorling <mb@bjorling.me> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - */ - -#ifndef GENNVM_H_ -#define GENNVM_H_ - -#include <linux/module.h> -#include <linux/vmalloc.h> - -#include <linux/lightnvm.h> - -struct gen_dev { - struct nvm_dev *dev; - - int nr_luns; - struct list_head area_list; - - struct mutex lock; - struct list_head targets; -}; - -/* Map between virtual and physical channel and lun */ -struct gen_ch_map { - int ch_off; - int nr_luns; - int *lun_offs; -}; - -struct gen_dev_map { - struct gen_ch_map *chnls; - int nr_chnls; -}; - -struct gen_area { - struct list_head list; - sector_t begin; - sector_t end; /* end is excluded */ -}; - -static inline void *ch_map_to_lun_offs(struct gen_ch_map *ch_map) -{ - return ch_map + 1; -} - -typedef int (gen_trans_fn)(struct nvm_tgt_dev *, struct ppa_addr *); - -#define gen_for_each_lun(bm, lun, i) \ - for ((i) = 0, lun = &(bm)->luns[0]; \ - (i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)]) - -#endif /* GENNVM_H_ */ diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 9fb7de3..e00b1d7 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c @@ -779,7 +779,7 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd, static void rrpc_end_io(struct nvm_rq *rqd) { - struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance); + struct rrpc *rrpc = rqd->private; struct nvm_tgt_dev *dev = rrpc->dev; struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); uint8_t npages = rqd->nr_ppas; @@ -972,8 +972,9 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, bio_get(bio); rqd->bio = bio; - rqd->ins = &rrpc->instance; + rqd->private = rrpc; rqd->nr_ppas = nr_pages; + rqd->end_io = rrpc_end_io; rrq->flags = flags; err = nvm_submit_io(dev, rqd); @@ -1532,7 +1533,6 @@ static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk) if (!rrpc) return ERR_PTR(-ENOMEM); - rrpc->instance.tt = &tt_rrpc; rrpc->dev = dev; rrpc->disk = tdisk; @@ -1611,7 +1611,6 @@ static struct nvm_tgt_type tt_rrpc = { .make_rq = rrpc_make_rq, .capacity = rrpc_capacity, - .end_io = rrpc_end_io, .init = rrpc_init, .exit = rrpc_exit, diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h index 94e4d73..fdb6ff9 100644 --- a/drivers/lightnvm/rrpc.h +++ b/drivers/lightnvm/rrpc.h @@ -102,9 +102,6 @@ struct rrpc_lun { }; struct rrpc { - /* instance must be kept in top to resolve rrpc in unprep */ - struct nvm_tgt_instance instance; - struct nvm_tgt_dev *dev; struct gendisk *disk; diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c deleted file mode 100644 index 12002bf..0000000 --- a/drivers/lightnvm/sysblk.c +++ /dev/null @@ -1,733 +0,0 @@ -/* - * Copyright (C) 2015 Matias Bjorling. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, - * USA. - * - */ - -#include <linux/lightnvm.h> - -#define MAX_SYSBLKS 3 /* remember to update mapping scheme on change */ -#define MAX_BLKS_PR_SYSBLK 2 /* 2 blks with 256 pages and 3000 erases - * enables ~1.5M updates per sysblk unit - */ - -struct sysblk_scan { - /* A row is a collection of flash blocks for a system block. */ - int nr_rows; - int row; - int act_blk[MAX_SYSBLKS]; - - int nr_ppas; - struct ppa_addr ppas[MAX_SYSBLKS * MAX_BLKS_PR_SYSBLK];/* all sysblks */ -}; - -static inline int scan_ppa_idx(int row, int blkid) -{ - return (row * MAX_BLKS_PR_SYSBLK) + blkid; -} - -static void nvm_sysblk_to_cpu(struct nvm_sb_info *info, - struct nvm_system_block *sb) -{ - info->seqnr = be32_to_cpu(sb->seqnr); - info->erase_cnt = be32_to_cpu(sb->erase_cnt); - info->version = be16_to_cpu(sb->version); - strncpy(info->mmtype, sb->mmtype, NVM_MMTYPE_LEN); - info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa); -} - -static void nvm_cpu_to_sysblk(struct nvm_system_block *sb, - struct nvm_sb_info *info) -{ - sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC); - sb->seqnr = cpu_to_be32(info->seqnr); - sb->erase_cnt = cpu_to_be32(info->erase_cnt); - sb->version = cpu_to_be16(info->version); - strncpy(sb->mmtype, info->mmtype, NVM_MMTYPE_LEN); - sb->fs_ppa = cpu_to_be64(info->fs_ppa.ppa); -} - -static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas) -{ - struct nvm_geo *geo = &dev->geo; - int nr_rows = min_t(int, MAX_SYSBLKS, geo->nr_chnls); - int i; - - for (i = 0; i < nr_rows; i++) - sysblk_ppas[i].ppa = 0; - - /* if possible, place sysblk at first channel, middle channel and last - * channel of the device. If not, create only one or two sys blocks - */ - switch (geo->nr_chnls) { - case 2: - sysblk_ppas[1].g.ch = 1; - /* fall-through */ - case 1: - sysblk_ppas[0].g.ch = 0; - break; - default: - sysblk_ppas[0].g.ch = 0; - sysblk_ppas[1].g.ch = geo->nr_chnls / 2; - sysblk_ppas[2].g.ch = geo->nr_chnls - 1; - break; - } - - return nr_rows; -} - -static void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s, - struct ppa_addr *sysblk_ppas) -{ - memset(s, 0, sizeof(struct sysblk_scan)); - s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas); -} - -static int sysblk_get_free_blks(struct nvm_dev *dev, struct ppa_addr ppa, - u8 *blks, int nr_blks, - struct sysblk_scan *s) -{ - struct ppa_addr *sppa; - int i, blkid = 0; - - nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); - if (nr_blks < 0) - return nr_blks; - - for (i = 0; i < nr_blks; i++) { - if (blks[i] == NVM_BLK_T_HOST) - return -EEXIST; - - if (blks[i] != NVM_BLK_T_FREE) - continue; - - sppa = &s->ppas[scan_ppa_idx(s->row, blkid)]; - sppa->g.ch = ppa.g.ch; - sppa->g.lun = ppa.g.lun; - sppa->g.blk = i; - s->nr_ppas++; - blkid++; - - pr_debug("nvm: use (%u %u %u) as sysblk\n", - sppa->g.ch, sppa->g.lun, sppa->g.blk); - if (blkid > MAX_BLKS_PR_SYSBLK - 1) - return 0; - } - - pr_err("nvm: sysblk failed get sysblk\n"); - return -EINVAL; -} - -static int sysblk_get_host_blks(struct nvm_dev *dev, struct ppa_addr ppa, - u8 *blks, int nr_blks, - struct sysblk_scan *s) -{ - int i, nr_sysblk = 0; - - nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); - if (nr_blks < 0) - return nr_blks; - - for (i = 0; i < nr_blks; i++) { - if (blks[i] != NVM_BLK_T_HOST) - continue; - - if (s->nr_ppas == MAX_BLKS_PR_SYSBLK * MAX_SYSBLKS) { - pr_err("nvm: too many host blks\n"); - return -EINVAL; - } - - ppa.g.blk = i; - - s->ppas[scan_ppa_idx(s->row, nr_sysblk)] = ppa; - s->nr_ppas++; - nr_sysblk++; - } - - return 0; -} - -static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s, - struct ppa_addr *ppas, int get_free) -{ - struct nvm_geo *geo = &dev->geo; - int i, nr_blks, ret = 0; - u8 *blks; - - s->nr_ppas = 0; - nr_blks = geo->blks_per_lun * geo->plane_mode; - - blks = kmalloc(nr_blks, GFP_KERNEL); - if (!blks) - return -ENOMEM; - - for (i = 0; i < s->nr_rows; i++) { - s->row = i; - - ret = nvm_get_bb_tbl(dev, ppas[i], blks); - if (ret) { - pr_err("nvm: failed bb tbl for ppa (%u %u)\n", - ppas[i].g.ch, - ppas[i].g.blk); - goto err_get; - } - - if (get_free) - ret = sysblk_get_free_blks(dev, ppas[i], blks, nr_blks, - s); - else - ret = sysblk_get_host_blks(dev, ppas[i], blks, nr_blks, - s); - - if (ret) - goto err_get; - } - -err_get: - kfree(blks); - return ret; -} - -/* - * scans a block for latest sysblk. - * Returns: - * 0 - newer sysblk not found. PPA is updated to latest page. - * 1 - newer sysblk found and stored in *cur. PPA is updated to - * next valid page. - * <0- error. - */ -static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa, - struct nvm_system_block *sblk) -{ - struct nvm_geo *geo = &dev->geo; - struct nvm_system_block *cur; - int pg, ret, found = 0; - - /* the full buffer for a flash page is allocated. Only the first of it - * contains the system block information - */ - cur = kmalloc(geo->pfpg_size, GFP_KERNEL); - if (!cur) - return -ENOMEM; - - /* perform linear scan through the block */ - for (pg = 0; pg < dev->lps_per_blk; pg++) { - ppa->g.pg = ppa_to_slc(dev, pg); - - ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE, - cur, geo->pfpg_size); - if (ret) { - if (ret == NVM_RSP_ERR_EMPTYPAGE) { - pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n", - ppa->g.ch, - ppa->g.lun, - ppa->g.blk, - ppa->g.pg); - break; - } - pr_err("nvm: read failed (%x) for ppa (%u %u %u %u)", - ret, - ppa->g.ch, - ppa->g.lun, - ppa->g.blk, - ppa->g.pg); - break; /* if we can't read a page, continue to the - * next blk - */ - } - - if (be32_to_cpu(cur->magic) != NVM_SYSBLK_MAGIC) { - pr_debug("nvm: scan break for ppa (%u %u %u %u)\n", - ppa->g.ch, - ppa->g.lun, - ppa->g.blk, - ppa->g.pg); - break; /* last valid page already found */ - } - - if (be32_to_cpu(cur->seqnr) < be32_to_cpu(sblk->seqnr)) - continue; - - memcpy(sblk, cur, sizeof(struct nvm_system_block)); - found = 1; - } - - kfree(cur); - - return found; -} - -static int nvm_sysblk_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, - int type) -{ - return nvm_set_bb_tbl(dev, s->ppas, s->nr_ppas, type); -} - -static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info, - struct sysblk_scan *s) -{ - struct nvm_geo *geo = &dev->geo; - struct nvm_system_block nvmsb; - void *buf; - int i, sect, ret = 0; - struct ppa_addr *ppas; - - nvm_cpu_to_sysblk(&nvmsb, info); - - buf = kzalloc(geo->pfpg_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - memcpy(buf, &nvmsb, sizeof(struct nvm_system_block)); - - ppas = kcalloc(geo->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL); - if (!ppas) { - ret = -ENOMEM; - goto err; - } - - /* Write and verify */ - for (i = 0; i < s->nr_rows; i++) { - ppas[0] = s->ppas[scan_ppa_idx(i, s->act_blk[i])]; - - pr_debug("nvm: writing sysblk to ppa (%u %u %u %u)\n", - ppas[0].g.ch, - ppas[0].g.lun, - ppas[0].g.blk, - ppas[0].g.pg); - - /* Expand to all sectors within a flash page */ - if (geo->sec_per_pg > 1) { - for (sect = 1; sect < geo->sec_per_pg; sect++) { - ppas[sect].ppa = ppas[0].ppa; - ppas[sect].g.sec = sect; - } - } - - ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PWRITE, - NVM_IO_SLC_MODE, buf, geo->pfpg_size); - if (ret) { - pr_err("nvm: sysblk failed program (%u %u %u)\n", - ppas[0].g.ch, - ppas[0].g.lun, - ppas[0].g.blk); - break; - } - - ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PREAD, - NVM_IO_SLC_MODE, buf, geo->pfpg_size); - if (ret) { - pr_err("nvm: sysblk failed read (%u %u %u)\n", - ppas[0].g.ch, - ppas[0].g.lun, - ppas[0].g.blk); - break; - } - - if (memcmp(buf, &nvmsb, sizeof(struct nvm_system_block))) { - pr_err("nvm: sysblk failed verify (%u %u %u)\n", - ppas[0].g.ch, - ppas[0].g.lun, - ppas[0].g.blk); - ret = -EINVAL; - break; - } - } - - kfree(ppas); -err: - kfree(buf); - - return ret; -} - -static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s) -{ - int i, ret; - unsigned long nxt_blk; - struct ppa_addr *ppa; - - for (i = 0; i < s->nr_rows; i++) { - nxt_blk = (s->act_blk[i] + 1) % MAX_BLKS_PR_SYSBLK; - ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)]; - ppa->g.pg = ppa_to_slc(dev, 0); - - ret = nvm_erase_ppa(dev, ppa, 1, 0); - if (ret) - return ret; - - s->act_blk[i] = nxt_blk; - } - - return 0; -} - -int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info) -{ - struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; - struct sysblk_scan s; - struct nvm_system_block *cur; - int i, j, found = 0; - int ret = -ENOMEM; - - /* - * 1. setup sysblk locations - * 2. get bad block list - * 3. filter on host-specific (type 3) - * 4. iterate through all and find the highest seq nr. - * 5. return superblock information - */ - - if (!dev->ops->get_bb_tbl) - return -EINVAL; - - nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); - - mutex_lock(&dev->mlock); - ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); - if (ret) - goto err_sysblk; - - /* no sysblocks initialized */ - if (!s.nr_ppas) - goto err_sysblk; - - cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL); - if (!cur) - goto err_sysblk; - - /* find the latest block across all sysblocks */ - for (i = 0; i < s.nr_rows; i++) { - for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) { - struct ppa_addr ppa = s.ppas[scan_ppa_idx(i, j)]; - - ret = nvm_scan_block(dev, &ppa, cur); - if (ret > 0) - found = 1; - else if (ret < 0) - break; - } - } - - nvm_sysblk_to_cpu(info, cur); - - kfree(cur); -err_sysblk: - mutex_unlock(&dev->mlock); - - if (found) - return 1; - return ret; -} - -int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new) -{ - /* 1. for each latest superblock - * 2. if room - * a. write new flash page entry with the updated information - * 3. if no room - * a. find next available block on lun (linear search) - * if none, continue to next lun - * if none at all, report error. also report that it wasn't - * possible to write to all superblocks. - * c. write data to block. - */ - struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; - struct sysblk_scan s; - struct nvm_system_block *cur; - int i, j, ppaidx, found = 0; - int ret = -ENOMEM; - - if (!dev->ops->get_bb_tbl) - return -EINVAL; - - nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); - - mutex_lock(&dev->mlock); - ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); - if (ret) - goto err_sysblk; - - cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL); - if (!cur) - goto err_sysblk; - - /* Get the latest sysblk for each sysblk row */ - for (i = 0; i < s.nr_rows; i++) { - found = 0; - for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) { - ppaidx = scan_ppa_idx(i, j); - ret = nvm_scan_block(dev, &s.ppas[ppaidx], cur); - if (ret > 0) { - s.act_blk[i] = j; - found = 1; - } else if (ret < 0) - break; - } - } - - if (!found) { - pr_err("nvm: no valid sysblks found to update\n"); - ret = -EINVAL; - goto err_cur; - } - - /* - * All sysblocks found. Check that they have same page id in their flash - * blocks - */ - for (i = 1; i < s.nr_rows; i++) { - struct ppa_addr l = s.ppas[scan_ppa_idx(0, s.act_blk[0])]; - struct ppa_addr r = s.ppas[scan_ppa_idx(i, s.act_blk[i])]; - - if (l.g.pg != r.g.pg) { - pr_err("nvm: sysblks not on same page. Previous update failed.\n"); - ret = -EINVAL; - goto err_cur; - } - } - - /* - * Check that there haven't been another update to the seqnr since we - * began - */ - if ((new->seqnr - 1) != be32_to_cpu(cur->seqnr)) { - pr_err("nvm: seq is not sequential\n"); - ret = -EINVAL; - goto err_cur; - } - - /* - * When all pages in a block has been written, a new block is selected - * and writing is performed on the new block. - */ - if (s.ppas[scan_ppa_idx(0, s.act_blk[0])].g.pg == - dev->lps_per_blk - 1) { - ret = nvm_prepare_new_sysblks(dev, &s); - if (ret) - goto err_cur; - } - - ret = nvm_write_and_verify(dev, new, &s); -err_cur: - kfree(cur); -err_sysblk: - mutex_unlock(&dev->mlock); - - return ret; -} - -int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info) -{ - struct nvm_geo *geo = &dev->geo; - struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; - struct sysblk_scan s; - int ret; - - /* - * 1. select master blocks and select first available blks - * 2. get bad block list - * 3. mark MAX_SYSBLKS block as host-based device allocated. - * 4. write and verify data to block - */ - - if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl) - return -EINVAL; - - if (!(geo->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) { - pr_err("nvm: memory does not support SLC access\n"); - return -EINVAL; - } - - /* Index all sysblocks and mark them as host-driven */ - nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); - - mutex_lock(&dev->mlock); - ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 1); - if (ret) - goto err_mark; - - ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_HOST); - if (ret) - goto err_mark; - - /* Write to the first block of each row */ - ret = nvm_write_and_verify(dev, info, &s); -err_mark: - mutex_unlock(&dev->mlock); - return ret; -} - -static int factory_nblks(int nblks) -{ - /* Round up to nearest BITS_PER_LONG */ - return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); -} - -static unsigned int factory_blk_offset(struct nvm_geo *geo, struct ppa_addr ppa) -{ - int nblks = factory_nblks(geo->blks_per_lun); - - return ((ppa.g.ch * geo->luns_per_chnl * nblks) + (ppa.g.lun * nblks)) / - BITS_PER_LONG; -} - -static int nvm_factory_blks(struct nvm_dev *dev, struct ppa_addr ppa, - u8 *blks, int nr_blks, - unsigned long *blk_bitmap, int flags) -{ - int i, lunoff; - - nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); - if (nr_blks < 0) - return nr_blks; - - lunoff = factory_blk_offset(&dev->geo, ppa); - - /* non-set bits correspond to the block must be erased */ - for (i = 0; i < nr_blks; i++) { - switch (blks[i]) { - case NVM_BLK_T_FREE: - if (flags & NVM_FACTORY_ERASE_ONLY_USER) - set_bit(i, &blk_bitmap[lunoff]); - break; - case NVM_BLK_T_HOST: - if (!(flags & NVM_FACTORY_RESET_HOST_BLKS)) - set_bit(i, &blk_bitmap[lunoff]); - break; - case NVM_BLK_T_GRWN_BAD: - if (!(flags & NVM_FACTORY_RESET_GRWN_BBLKS)) - set_bit(i, &blk_bitmap[lunoff]); - break; - default: - set_bit(i, &blk_bitmap[lunoff]); - break; - } - } - - return 0; -} - -static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list, - int max_ppas, unsigned long *blk_bitmap) -{ - struct nvm_geo *geo = &dev->geo; - struct ppa_addr ppa; - int ch, lun, blkid, idx, done = 0, ppa_cnt = 0; - unsigned long *offset; - - while (!done) { - done = 1; - nvm_for_each_lun_ppa(geo, ppa, ch, lun) { - idx = factory_blk_offset(geo, ppa); - offset = &blk_bitmap[idx]; - - blkid = find_first_zero_bit(offset, geo->blks_per_lun); - if (blkid >= geo->blks_per_lun) - continue; - set_bit(blkid, offset); - - ppa.g.blk = blkid; - pr_debug("nvm: erase ppa (%u %u %u)\n", - ppa.g.ch, - ppa.g.lun, - ppa.g.blk); - - erase_list[ppa_cnt] = ppa; - ppa_cnt++; - done = 0; - - if (ppa_cnt == max_ppas) - return ppa_cnt; - } - } - - return ppa_cnt; -} - -static int nvm_fact_select_blks(struct nvm_dev *dev, unsigned long *blk_bitmap, - int flags) -{ - struct nvm_geo *geo = &dev->geo; - struct ppa_addr ppa; - int ch, lun, nr_blks, ret = 0; - u8 *blks; - - nr_blks = geo->blks_per_lun * geo->plane_mode; - blks = kmalloc(nr_blks, GFP_KERNEL); - if (!blks) - return -ENOMEM; - - nvm_for_each_lun_ppa(geo, ppa, ch, lun) { - ret = nvm_get_bb_tbl(dev, ppa, blks); - if (ret) - pr_err("nvm: failed bb tbl for ch%u lun%u\n", - ppa.g.ch, ppa.g.blk); - - ret = nvm_factory_blks(dev, ppa, blks, nr_blks, blk_bitmap, - flags); - if (ret) - break; - } - - kfree(blks); - return ret; -} - -int nvm_dev_factory(struct nvm_dev *dev, int flags) -{ - struct nvm_geo *geo = &dev->geo; - struct ppa_addr *ppas; - int ppa_cnt, ret = -ENOMEM; - int max_ppas = dev->ops->max_phys_sect / geo->nr_planes; - struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; - struct sysblk_scan s; - unsigned long *blk_bitmap; - - blk_bitmap = kzalloc(factory_nblks(geo->blks_per_lun) * geo->nr_luns, - GFP_KERNEL); - if (!blk_bitmap) - return ret; - - ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL); - if (!ppas) - goto err_blks; - - /* create list of blks to be erased */ - ret = nvm_fact_select_blks(dev, blk_bitmap, flags); - if (ret) - goto err_ppas; - - /* continue to erase until list of blks until empty */ - while ((ppa_cnt = - nvm_fact_get_blks(dev, ppas, max_ppas, blk_bitmap)) > 0) - nvm_erase_ppa(dev, ppas, ppa_cnt, 0); - - /* mark host reserved blocks free */ - if (flags & NVM_FACTORY_RESET_HOST_BLKS) { - nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); - mutex_lock(&dev->mlock); - ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); - if (!ret) - ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_FREE); - mutex_unlock(&dev->mlock); - } -err_ppas: - kfree(ppas); -err_blks: - kfree(blk_bitmap); - return ret; -} -EXPORT_SYMBOL(nvm_dev_factory); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 76d2087..709c9cc 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio, s->iop.write_prio = 0; s->iop.error = 0; s->iop.flags = 0; - s->iop.flush_journal = (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0; + s->iop.flush_journal = op_is_flush(bio->bi_opf); s->iop.wq = bcache_wq; return s; @@ -1009,7 +1009,7 @@ static int cached_dev_congested(void *data, int bits) struct request_queue *q = bdev_get_queue(dc->bdev); int ret = 0; - if (bdi_congested(&q->backing_dev_info, bits)) + if (bdi_congested(q->backing_dev_info, bits)) return 1; if (cached_dev_get(dc)) { @@ -1018,7 +1018,7 @@ static int cached_dev_congested(void *data, int bits) for_each_cache(ca, d->c, i) { q = bdev_get_queue(ca->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } cached_dev_put(dc); @@ -1032,7 +1032,7 @@ void bch_cached_dev_request_init(struct cached_dev *dc) struct gendisk *g = dc->disk.disk; g->queue->make_request_fn = cached_dev_make_request; - g->queue->backing_dev_info.congested_fn = cached_dev_congested; + g->queue->backing_dev_info->congested_fn = cached_dev_congested; dc->disk.cache_miss = cached_dev_cache_miss; dc->disk.ioctl = cached_dev_ioctl; } @@ -1125,7 +1125,7 @@ static int flash_dev_congested(void *data, int bits) for_each_cache(ca, d->c, i) { q = bdev_get_queue(ca->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } return ret; @@ -1136,7 +1136,7 @@ void bch_flash_dev_request_init(struct bcache_device *d) struct gendisk *g = d->disk; g->queue->make_request_fn = flash_dev_make_request; - g->queue->backing_dev_info.congested_fn = flash_dev_congested; + g->queue->backing_dev_info->congested_fn = flash_dev_congested; d->cache_miss = flash_dev_cache_miss; d->ioctl = flash_dev_ioctl; } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3a19cbc..85e3f21 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -807,7 +807,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, blk_queue_make_request(q, NULL); d->disk->queue = q; q->queuedata = d; - q->backing_dev_info.congested_data = d; + q->backing_dev_info->congested_data = d; q->limits.max_hw_sectors = UINT_MAX; q->limits.max_sectors = UINT_MAX; q->limits.max_segment_size = UINT_MAX; @@ -1132,9 +1132,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size) set_capacity(dc->disk.disk, dc->bdev->bd_part->nr_sects - dc->sb.data_offset); - dc->disk.disk->queue->backing_dev_info.ra_pages = - max(dc->disk.disk->queue->backing_dev_info.ra_pages, - q->backing_dev_info.ra_pages); + dc->disk.disk->queue->backing_dev_info->ra_pages = + max(dc->disk.disk->queue->backing_dev_info->ra_pages, + q->backing_dev_info->ra_pages); bch_cached_dev_request_init(dc); bch_cached_dev_writeback_init(dc); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index e04c61e..894bc14 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -787,8 +787,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); spin_lock_irqsave(&cache->lock, flags); - if (cache->need_tick_bio && - !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) && + if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) && bio_op(bio) != REQ_OP_DISCARD) { pb->tick = true; cache->need_tick_bio = false; @@ -828,11 +827,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) return to_oblock(block_nr); } -static int bio_triggers_commit(struct cache *cache, struct bio *bio) -{ - return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); -} - /* * You must increment the deferred set whilst the prison cell is held. To * encourage this, we ask for 'cell' to be passed in. @@ -884,7 +878,7 @@ static void issue(struct cache *cache, struct bio *bio) { unsigned long flags; - if (!bio_triggers_commit(cache, bio)) { + if (!op_is_flush(bio->bi_opf)) { accounted_request(cache, bio); return; } @@ -1069,8 +1063,7 @@ static void dec_io_migrations(struct cache *cache) static bool discard_or_flush(struct bio *bio) { - return bio_op(bio) == REQ_OP_DISCARD || - bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); + return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf); } static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) @@ -2291,7 +2284,7 @@ static void do_waker(struct work_struct *ws) static int is_congested(struct dm_dev *dev, int bdi_bits) { struct request_queue *q = bdev_get_queue(dev->bdev); - return bdi_congested(&q->backing_dev_info, bdi_bits); + return bdi_congested(q->backing_dev_info, bdi_bits); } static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 40ceba1..136fda3 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -92,7 +92,6 @@ struct mapped_device { * io objects are allocated from here. */ mempool_t *io_pool; - mempool_t *rq_pool; struct bio_set *bs; diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index bf2b267..9fab33b 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1379,7 +1379,7 @@ static void stop_worker(struct era *era) static int dev_is_congested(struct dm_dev *dev, int bdi_bits) { struct request_queue *q = bdev_get_queue(dev->bdev); - return bdi_congested(&q->backing_dev_info, bdi_bits); + return bdi_congested(q->backing_dev_info, bdi_bits); } static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3570bcb..7f223db 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -92,12 +92,6 @@ struct multipath { unsigned queue_mode; - /* - * We must use a mempool of dm_mpath_io structs so that we - * can resubmit bios on error. - */ - mempool_t *mpio_pool; - struct mutex work_mutex; struct work_struct trigger_event; @@ -115,8 +109,6 @@ struct dm_mpath_io { typedef int (*action_fn) (struct pgpath *pgpath); -static struct kmem_cache *_mpio_cache; - static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void trigger_event(struct work_struct *work); static void activate_path(struct work_struct *work); @@ -209,7 +201,6 @@ static struct multipath *alloc_multipath(struct dm_target *ti) init_waitqueue_head(&m->pg_init_wait); mutex_init(&m->work_mutex); - m->mpio_pool = NULL; m->queue_mode = DM_TYPE_NONE; m->ti = ti; @@ -229,16 +220,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; else m->queue_mode = DM_TYPE_REQUEST_BASED; - } - - if (m->queue_mode == DM_TYPE_REQUEST_BASED) { - unsigned min_ios = dm_get_reserved_rq_based_ios(); - - m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); - if (!m->mpio_pool) - return -ENOMEM; - } - else if (m->queue_mode == DM_TYPE_BIO_BASED) { + } else if (m->queue_mode == DM_TYPE_BIO_BASED) { INIT_WORK(&m->process_queued_bios, process_queued_bios); /* * bio-based doesn't support any direct scsi_dh management; @@ -263,7 +245,6 @@ static void free_multipath(struct multipath *m) kfree(m->hw_handler_name); kfree(m->hw_handler_params); - mempool_destroy(m->mpio_pool); kfree(m); } @@ -272,38 +253,6 @@ static struct dm_mpath_io *get_mpio(union map_info *info) return info->ptr; } -static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info) -{ - struct dm_mpath_io *mpio; - - if (!m->mpio_pool) { - /* Use blk-mq pdu memory requested via per_io_data_size */ - mpio = get_mpio(info); - memset(mpio, 0, sizeof(*mpio)); - return mpio; - } - - mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); - if (!mpio) - return NULL; - - memset(mpio, 0, sizeof(*mpio)); - info->ptr = mpio; - - return mpio; -} - -static void clear_request_fn_mpio(struct multipath *m, union map_info *info) -{ - /* Only needed for non blk-mq (.request_fn) multipath */ - if (m->mpio_pool) { - struct dm_mpath_io *mpio = info->ptr; - - info->ptr = NULL; - mempool_free(mpio, m->mpio_pool); - } -} - static size_t multipath_per_bio_data_size(void) { return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details); @@ -530,16 +479,17 @@ static bool must_push_back_bio(struct multipath *m) /* * Map cloned requests (request-based multipath) */ -static int __multipath_map(struct dm_target *ti, struct request *clone, - union map_info *map_context, - struct request *rq, struct request **__clone) +static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, + union map_info *map_context, + struct request **__clone) { struct multipath *m = ti->private; int r = DM_MAPIO_REQUEUE; - size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq); + size_t nr_bytes = blk_rq_bytes(rq); struct pgpath *pgpath; struct block_device *bdev; - struct dm_mpath_io *mpio; + struct dm_mpath_io *mpio = get_mpio(map_context); + struct request *clone; /* Do we need to select a new pgpath? */ pgpath = lockless_dereference(m->current_pgpath); @@ -556,42 +506,23 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, return r; } - mpio = set_mpio(m, map_context); - if (!mpio) - /* ENOMEM, requeue */ - return r; - + memset(mpio, 0, sizeof(*mpio)); mpio->pgpath = pgpath; mpio->nr_bytes = nr_bytes; bdev = pgpath->path.dev->bdev; - if (clone) { - /* - * Old request-based interface: allocated clone is passed in. - * Used by: .request_fn stacked on .request_fn path(s). - */ - clone->q = bdev_get_queue(bdev); - clone->rq_disk = bdev->bd_disk; - clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; - } else { - /* - * blk-mq request-based interface; used by both: - * .request_fn stacked on blk-mq path(s) and - * blk-mq stacked on blk-mq path(s). - */ - clone = blk_mq_alloc_request(bdev_get_queue(bdev), - rq_data_dir(rq), BLK_MQ_REQ_NOWAIT); - if (IS_ERR(clone)) { - /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ - clear_request_fn_mpio(m, map_context); - return r; - } - clone->bio = clone->biotail = NULL; - clone->rq_disk = bdev->bd_disk; - clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; - *__clone = clone; + clone = blk_get_request(bdev_get_queue(bdev), + rq->cmd_flags | REQ_NOMERGE, + GFP_ATOMIC); + if (IS_ERR(clone)) { + /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ + return r; } + clone->bio = clone->biotail = NULL; + clone->rq_disk = bdev->bd_disk; + clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; + *__clone = clone; if (pgpath->pg->ps.type->start_io) pgpath->pg->ps.type->start_io(&pgpath->pg->ps, @@ -600,22 +531,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, return DM_MAPIO_REMAPPED; } -static int multipath_map(struct dm_target *ti, struct request *clone, - union map_info *map_context) -{ - return __multipath_map(ti, clone, map_context, NULL, NULL); -} - -static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, - union map_info *map_context, - struct request **clone) -{ - return __multipath_map(ti, NULL, map_context, rq, clone); -} - static void multipath_release_clone(struct request *clone) { - blk_mq_free_request(clone); + blk_put_request(clone); } /* @@ -1187,7 +1105,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_write_same_bios = 1; if (m->queue_mode == DM_TYPE_BIO_BASED) ti->per_io_data_size = multipath_per_bio_data_size(); - else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) + else ti->per_io_data_size = sizeof(struct dm_mpath_io); return 0; @@ -1610,7 +1528,6 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, if (ps->type->end_io) ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); } - clear_request_fn_mpio(m, map_context); return r; } @@ -2060,7 +1977,6 @@ static struct target_type multipath_target = { .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, - .map_rq = multipath_map, .clone_and_map_rq = multipath_clone_and_map, .release_clone_rq = multipath_release_clone, .rq_end_io = multipath_end_io, @@ -2080,11 +1996,6 @@ static int __init dm_multipath_init(void) { int r; - /* allocate a slab for the dm_mpath_ios */ - _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); - if (!_mpio_cache) - return -ENOMEM; - r = dm_register_target(&multipath_target); if (r < 0) { DMERR("request-based register failed %d", r); @@ -2120,8 +2031,6 @@ bad_alloc_kmpath_handlerd: bad_alloc_kmultipathd: dm_unregister_target(&multipath_target); bad_register_target: - kmem_cache_destroy(_mpio_cache); - return r; } @@ -2131,7 +2040,6 @@ static void __exit dm_multipath_exit(void) destroy_workqueue(kmultipathd); dm_unregister_target(&multipath_target); - kmem_cache_destroy(_mpio_cache); } module_init(dm_multipath_init); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 6e702fc..67d76f2 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -109,28 +109,6 @@ void dm_stop_queue(struct request_queue *q) dm_mq_stop_queue(q); } -static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, - gfp_t gfp_mask) -{ - return mempool_alloc(md->io_pool, gfp_mask); -} - -static void free_old_rq_tio(struct dm_rq_target_io *tio) -{ - mempool_free(tio, tio->md->io_pool); -} - -static struct request *alloc_old_clone_request(struct mapped_device *md, - gfp_t gfp_mask) -{ - return mempool_alloc(md->rq_pool, gfp_mask); -} - -static void free_old_clone_request(struct mapped_device *md, struct request *rq) -{ - mempool_free(rq, md->rq_pool); -} - /* * Partial completion handling for request-based dm */ @@ -185,7 +163,7 @@ static void end_clone_bio(struct bio *clone) static struct dm_rq_target_io *tio_from_request(struct request *rq) { - return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); + return blk_mq_rq_to_pdu(rq); } static void rq_end_stats(struct mapped_device *md, struct request *orig) @@ -233,31 +211,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) dm_put(md); } -static void free_rq_clone(struct request *clone) -{ - struct dm_rq_target_io *tio = clone->end_io_data; - struct mapped_device *md = tio->md; - - blk_rq_unprep_clone(clone); - - /* - * It is possible for a clone_old_rq() allocated clone to - * get passed in -- it may not yet have a request_queue. - * This is known to occur if the error target replaces - * a multipath target that has a request_fn queue stacked - * on blk-mq queue(s). - */ - if (clone->q && clone->q->mq_ops) - /* stacked on blk-mq queue(s) */ - tio->ti->type->release_clone_rq(clone); - else if (!md->queue->mq_ops) - /* request_fn queue stacked on request_fn queue(s) */ - free_old_clone_request(md, clone); - - if (!md->queue->mq_ops) - free_old_rq_tio(tio); -} - /* * Complete the clone and the original request. * Must be called without clone's queue lock held, @@ -270,20 +223,9 @@ static void dm_end_request(struct request *clone, int error) struct mapped_device *md = tio->md; struct request *rq = tio->orig; - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { - rq->errors = clone->errors; - rq->resid_len = clone->resid_len; - - if (rq->sense) - /* - * We are using the sense buffer of the original - * request. - * So setting the length of the sense data is enough. - */ - rq->sense_len = clone->sense_len; - } + blk_rq_unprep_clone(clone); + tio->ti->type->release_clone_rq(clone); - free_rq_clone(clone); rq_end_stats(md, rq); if (!rq->q->mq_ops) blk_end_request_all(rq, error); @@ -292,22 +234,6 @@ static void dm_end_request(struct request *clone, int error) rq_completed(md, rw, true); } -static void dm_unprep_request(struct request *rq) -{ - struct dm_rq_target_io *tio = tio_from_request(rq); - struct request *clone = tio->clone; - - if (!rq->q->mq_ops) { - rq->special = NULL; - rq->rq_flags &= ~RQF_DONTPREP; - } - - if (clone) - free_rq_clone(clone); - else if (!tio->md->queue->mq_ops) - free_old_rq_tio(tio); -} - /* * Requeue the original request of a clone. */ @@ -346,7 +272,10 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ int rw = rq_data_dir(rq); rq_end_stats(md, rq); - dm_unprep_request(rq); + if (tio->clone) { + blk_rq_unprep_clone(tio->clone); + tio->ti->type->release_clone_rq(tio->clone); + } if (!rq->q->mq_ops) dm_old_requeue_request(rq); @@ -401,14 +330,11 @@ static void dm_softirq_done(struct request *rq) if (!clone) { rq_end_stats(tio->md, rq); rw = rq_data_dir(rq); - if (!rq->q->mq_ops) { + if (!rq->q->mq_ops) blk_end_request_all(rq, tio->error); - rq_completed(tio->md, rw, false); - free_old_rq_tio(tio); - } else { + else blk_mq_end_request(rq, tio->error); - rq_completed(tio->md, rw, false); - } + rq_completed(tio->md, rw, false); return; } @@ -452,16 +378,6 @@ static void end_clone_request(struct request *clone, int error) { struct dm_rq_target_io *tio = clone->end_io_data; - if (!clone->q->mq_ops) { - /* - * For just cleaning up the information of the queue in which - * the clone was dispatched. - * The clone is *NOT* freed actually here because it is alloced - * from dm own mempool (RQF_ALLOCED isn't set). - */ - __blk_put_request(clone->q, clone); - } - /* * Actual request completion is done in a softirq context which doesn't * hold the clone's queue lock. Otherwise, deadlock could occur because: @@ -511,9 +427,6 @@ static int setup_clone(struct request *clone, struct request *rq, if (r) return r; - clone->cmd = rq->cmd; - clone->cmd_len = rq->cmd_len; - clone->sense = rq->sense; clone->end_io = end_clone_request; clone->end_io_data = tio; @@ -522,28 +435,6 @@ static int setup_clone(struct request *clone, struct request *rq, return 0; } -static struct request *clone_old_rq(struct request *rq, struct mapped_device *md, - struct dm_rq_target_io *tio, gfp_t gfp_mask) -{ - /* - * Create clone for use with .request_fn request_queue - */ - struct request *clone; - - clone = alloc_old_clone_request(md, gfp_mask); - if (!clone) - return NULL; - - blk_rq_init(NULL, clone); - if (setup_clone(clone, rq, tio, gfp_mask)) { - /* -ENOMEM */ - free_old_clone_request(md, clone); - return NULL; - } - - return clone; -} - static void map_tio_request(struct kthread_work *work); static void init_tio(struct dm_rq_target_io *tio, struct request *rq, @@ -565,60 +456,6 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq, kthread_init_work(&tio->work, map_tio_request); } -static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq, - struct mapped_device *md, - gfp_t gfp_mask) -{ - struct dm_rq_target_io *tio; - int srcu_idx; - struct dm_table *table; - - tio = alloc_old_rq_tio(md, gfp_mask); - if (!tio) - return NULL; - - init_tio(tio, rq, md); - - table = dm_get_live_table(md, &srcu_idx); - /* - * Must clone a request if this .request_fn DM device - * is stacked on .request_fn device(s). - */ - if (!dm_table_all_blk_mq_devices(table)) { - if (!clone_old_rq(rq, md, tio, gfp_mask)) { - dm_put_live_table(md, srcu_idx); - free_old_rq_tio(tio); - return NULL; - } - } - dm_put_live_table(md, srcu_idx); - - return tio; -} - -/* - * Called with the queue lock held. - */ -static int dm_old_prep_fn(struct request_queue *q, struct request *rq) -{ - struct mapped_device *md = q->queuedata; - struct dm_rq_target_io *tio; - - if (unlikely(rq->special)) { - DMWARN("Already has something in rq->special."); - return BLKPREP_KILL; - } - - tio = dm_old_prep_tio(rq, md, GFP_ATOMIC); - if (!tio) - return BLKPREP_DEFER; - - rq->special = tio; - rq->rq_flags |= RQF_DONTPREP; - - return BLKPREP_OK; -} - /* * Returns: * DM_MAPIO_* : the request has been processed as indicated @@ -633,31 +470,18 @@ static int map_request(struct dm_rq_target_io *tio) struct request *rq = tio->orig; struct request *clone = NULL; - if (tio->clone) { - clone = tio->clone; - r = ti->type->map_rq(ti, clone, &tio->info); - if (r == DM_MAPIO_DELAY_REQUEUE) - return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */ - } else { - r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); - if (r < 0) { - /* The target wants to complete the I/O */ - dm_kill_unmapped_request(rq, r); - return r; - } - if (r == DM_MAPIO_REMAPPED && - setup_clone(clone, rq, tio, GFP_ATOMIC)) { - /* -ENOMEM */ - ti->type->release_clone_rq(clone); - return DM_MAPIO_REQUEUE; - } - } - + r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); switch (r) { case DM_MAPIO_SUBMITTED: /* The target has taken the I/O to submit by itself later */ break; case DM_MAPIO_REMAPPED: + if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { + /* -ENOMEM */ + ti->type->release_clone_rq(clone); + return DM_MAPIO_REQUEUE; + } + /* The target has remapped the I/O so dispatch it */ trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), blk_rq_pos(rq)); @@ -716,6 +540,29 @@ static void dm_start_request(struct mapped_device *md, struct request *orig) dm_get(md); } +static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq) +{ + struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); + + /* + * Must initialize md member of tio, otherwise it won't + * be available in dm_mq_queue_rq. + */ + tio->md = md; + + if (md->init_tio_pdu) { + /* target-specific per-io data is immediately after the tio */ + tio->info.ptr = tio + 1; + } + + return 0; +} + +static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp) +{ + return __dm_rq_init_rq(q->rq_alloc_data, rq); +} + static void map_tio_request(struct kthread_work *work) { struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); @@ -814,6 +661,7 @@ static void dm_old_request_fn(struct request_queue *q) dm_start_request(md, rq); tio = tio_from_request(rq); + init_tio(tio, rq, md); /* Establish tio->ti before queuing work (map_tio_request) */ tio->ti = ti; kthread_queue_work(&md->kworker, &tio->work); @@ -824,10 +672,23 @@ static void dm_old_request_fn(struct request_queue *q) /* * Fully initialize a .request_fn request-based queue. */ -int dm_old_init_request_queue(struct mapped_device *md) +int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t) { + struct dm_target *immutable_tgt; + /* Fully initialize the queue */ - if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL)) + md->queue->cmd_size = sizeof(struct dm_rq_target_io); + md->queue->rq_alloc_data = md; + md->queue->request_fn = dm_old_request_fn; + md->queue->init_rq_fn = dm_rq_init_rq; + + immutable_tgt = dm_table_get_immutable_target(t); + if (immutable_tgt && immutable_tgt->per_io_data_size) { + /* any target-specific per-io data is immediately after the tio */ + md->queue->cmd_size += immutable_tgt->per_io_data_size; + md->init_tio_pdu = true; + } + if (blk_init_allocated_queue(md->queue) < 0) return -EINVAL; /* disable dm_old_request_fn's merge heuristic by default */ @@ -835,7 +696,6 @@ int dm_old_init_request_queue(struct mapped_device *md) dm_init_normal_md_queue(md); blk_queue_softirq_done(md->queue, dm_softirq_done); - blk_queue_prep_rq(md->queue, dm_old_prep_fn); /* Initialize the request-based DM worker thread */ kthread_init_worker(&md->kworker); @@ -856,21 +716,7 @@ static int dm_mq_init_request(void *data, struct request *rq, unsigned int hctx_idx, unsigned int request_idx, unsigned int numa_node) { - struct mapped_device *md = data; - struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); - - /* - * Must initialize md member of tio, otherwise it won't - * be available in dm_mq_queue_rq. - */ - tio->md = md; - - if (md->init_tio_pdu) { - /* target-specific per-io data is immediately after the tio */ - tio->info.ptr = tio + 1; - } - - return 0; + return __dm_rq_init_rq(data, rq); } static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h index 4da06ca..f0020d2 100644 --- a/drivers/md/dm-rq.h +++ b/drivers/md/dm-rq.h @@ -48,7 +48,7 @@ struct dm_rq_clone_bio_info { bool dm_use_blk_mq_default(void); bool dm_use_blk_mq(struct mapped_device *md); -int dm_old_init_request_queue(struct mapped_device *md); +int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t); int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t); void dm_mq_cleanup_mapped_device(struct mapped_device *md); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 0a427de..3ad16d9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1750,7 +1750,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) char b[BDEVNAME_SIZE]; if (likely(q)) - r |= bdi_congested(&q->backing_dev_info, bdi_bits); + r |= bdi_congested(q->backing_dev_info, bdi_bits); else DMWARN_LIMIT("%s: any_congested: nonexistent device %s", dm_device_name(t->md), diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 710ae28..43d3445 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -131,12 +131,6 @@ static int io_err_map(struct dm_target *tt, struct bio *bio) return -EIO; } -static int io_err_map_rq(struct dm_target *ti, struct request *clone, - union map_info *map_context) -{ - return -EIO; -} - static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq, union map_info *map_context, struct request **clone) @@ -161,7 +155,6 @@ static struct target_type error_target = { .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, - .map_rq = io_err_map_rq, .clone_and_map_rq = io_err_clone_and_map_rq, .release_clone_rq = io_err_release_clone_rq, .direct_access = io_err_direct_access, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index d1c05c1..2b266a2 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio) static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) { - return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && + return op_is_flush(bio->bi_opf) && dm_thin_changed_this_transaction(tc->td); } @@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context, struct bio *bio; while ((bio = bio_list_pop(&cell->bios))) { - if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || - bio_op(bio) == REQ_OP_DISCARD) + if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) bio_list_add(&info->defer_bios, bio); else { inc_all_io_entry(info->tc->pool, bio); @@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context, struct bio *bio; while ((bio = bio_list_pop(&cell->bios))) { - if ((bio_data_dir(bio) == WRITE) || - (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || - bio_op(bio) == REQ_OP_DISCARD)) + if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) || + bio_op(bio) == REQ_OP_DISCARD) bio_list_add(&info->defer_bios, bio); else { struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));; @@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } - if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || - bio_op(bio) == REQ_OP_DISCARD) { + if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) { thin_defer_bio_with_throttle(tc, bio); return DM_MAPIO_SUBMITTED; } @@ -2714,7 +2711,7 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) return 1; q = bdev_get_queue(pt->data_dev->bdev); - return bdi_congested(&q->backing_dev_info, bdi_bits); + return bdi_congested(q->backing_dev_info, bdi_bits); } static void requeue_bios(struct pool *pool) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3086da5..5bd9ab0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -91,7 +91,6 @@ static int dm_numa_node = DM_NUMA_NODE; */ struct dm_md_mempools { mempool_t *io_pool; - mempool_t *rq_pool; struct bio_set *bs; }; @@ -466,13 +465,16 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, if (r > 0) { /* - * Target determined this ioctl is being issued against - * a logical partition of the parent bdev; so extra - * validation is needed. + * Target determined this ioctl is being issued against a + * subset of the parent bdev; require extra privileges. */ - r = scsi_verify_blk_ioctl(NULL, cmd); - if (r) + if (!capable(CAP_SYS_RAWIO)) { + DMWARN_LIMIT( + "%s: sending ioctl %x to DM device without required privilege.", + current->comm, cmd); + r = -ENOIOCTLCMD; goto out; + } } r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); @@ -1314,7 +1316,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) * With request-based DM we only need to check the * top-level queue for congestion. */ - r = md->queue->backing_dev_info.wb.state & bdi_bits; + r = md->queue->backing_dev_info->wb.state & bdi_bits; } else { map = dm_get_live_table_fast(md); if (map) @@ -1397,7 +1399,7 @@ void dm_init_md_queue(struct mapped_device *md) * - must do so here (in alloc_dev callchain) before queue is used */ md->queue->queuedata = md; - md->queue->backing_dev_info.congested_data = md; + md->queue->backing_dev_info->congested_data = md; } void dm_init_normal_md_queue(struct mapped_device *md) @@ -1408,7 +1410,7 @@ void dm_init_normal_md_queue(struct mapped_device *md) /* * Initialize aspects of queue that aren't relevant for blk-mq */ - md->queue->backing_dev_info.congested_fn = dm_any_congested; + md->queue->backing_dev_info->congested_fn = dm_any_congested; blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); } @@ -1419,7 +1421,6 @@ static void cleanup_mapped_device(struct mapped_device *md) if (md->kworker_task) kthread_stop(md->kworker_task); mempool_destroy(md->io_pool); - mempool_destroy(md->rq_pool); if (md->bs) bioset_free(md->bs); @@ -1595,12 +1596,10 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) goto out; } - BUG_ON(!p || md->io_pool || md->rq_pool || md->bs); + BUG_ON(!p || md->io_pool || md->bs); md->io_pool = p->io_pool; p->io_pool = NULL; - md->rq_pool = p->rq_pool; - p->rq_pool = NULL; md->bs = p->bs; p->bs = NULL; @@ -1777,7 +1776,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) switch (type) { case DM_TYPE_REQUEST_BASED: - r = dm_old_init_request_queue(md); + r = dm_old_init_request_queue(md, t); if (r) { DMERR("Cannot initialize queue for request-based mapped device"); return r; @@ -2493,7 +2492,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t unsigned integrity, unsigned per_io_data_size) { struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id); - struct kmem_cache *cachep = NULL; unsigned int pool_size = 0; unsigned int front_pad; @@ -2503,20 +2501,16 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t switch (type) { case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: - cachep = _io_cache; pool_size = dm_get_reserved_bio_based_ios(); front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); + + pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache); + if (!pools->io_pool) + goto out; break; case DM_TYPE_REQUEST_BASED: - cachep = _rq_tio_cache; - pool_size = dm_get_reserved_rq_based_ios(); - pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); - if (!pools->rq_pool) - goto out; - /* fall through to setup remaining rq-based pools */ case DM_TYPE_MQ_REQUEST_BASED: - if (!pool_size) - pool_size = dm_get_reserved_rq_based_ios(); + pool_size = dm_get_reserved_rq_based_ios(); front_pad = offsetof(struct dm_rq_clone_bio_info, clone); /* per_io_data_size is used for blk-mq pdu at queue allocation */ break; @@ -2524,12 +2518,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t BUG(); } - if (cachep) { - pools->io_pool = mempool_create_slab_pool(pool_size, cachep); - if (!pools->io_pool) - goto out; - } - pools->bs = bioset_create_nobvec(pool_size, front_pad); if (!pools->bs) goto out; @@ -2551,7 +2539,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) return; mempool_destroy(pools->io_pool); - mempool_destroy(pools->rq_pool); if (pools->bs) bioset_free(pools->bs); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index f0aad08..f298b01 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -95,8 +95,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t); /* * To check whether the target type is request-based or not (bio-based). */ -#define dm_target_request_based(t) (((t)->type->map_rq != NULL) || \ - ((t)->type->clone_and_map_rq != NULL)) +#define dm_target_request_based(t) ((t)->type->clone_and_map_rq != NULL) /* * To check whether the target type is a hybrid (capable of being diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 5975c99..f1c7bba 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -62,7 +62,7 @@ static int linear_congested(struct mddev *mddev, int bits) for (i = 0; i < mddev->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } return ret; diff --git a/drivers/md/md.c b/drivers/md/md.c index 01175da..ba485dc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5346,8 +5346,8 @@ int md_run(struct mddev *mddev) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue); else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue); - mddev->queue->backing_dev_info.congested_data = mddev; - mddev->queue->backing_dev_info.congested_fn = md_congested; + mddev->queue->backing_dev_info->congested_data = mddev; + mddev->queue->backing_dev_info->congested_fn = md_congested; } if (pers->sync_request) { if (mddev->kobj.sd && @@ -5704,7 +5704,7 @@ static int do_md_stop(struct mddev *mddev, int mode, __md_stop_writes(mddev); __md_stop(mddev); - mddev->queue->backing_dev_info.congested_fn = NULL; + mddev->queue->backing_dev_info->congested_fn = NULL; /* tell userspace to handle 'inactive' */ sysfs_notify_dirent_safe(mddev->sysfs_state); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index aa8c4e5c..d457afa 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -169,7 +169,7 @@ static int multipath_congested(struct mddev *mddev, int bits) if (rdev && !test_bit(Faulty, &rdev->flags)) { struct request_queue *q = bdev_get_queue(rdev->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); /* Just like multipath_map, we just check the * first available device */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 848365d..d658523 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -41,7 +41,7 @@ static int raid0_congested(struct mddev *mddev, int bits) for (i = 0; i < raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(devlist[i]->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } return ret; } @@ -420,8 +420,8 @@ static int raid0_run(struct mddev *mddev) */ int stripe = mddev->raid_disks * (mddev->chunk_sectors << 9) / PAGE_SIZE; - if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) - mddev->queue->backing_dev_info.ra_pages = 2* stripe; + if (mddev->queue->backing_dev_info->ra_pages < 2* stripe) + mddev->queue->backing_dev_info->ra_pages = 2* stripe; } dump_zones(mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7b0f647..830ff2b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -744,9 +744,9 @@ static int raid1_congested(struct mddev *mddev, int bits) * non-congested targets, it can be removed */ if ((bits & (1 << WB_async_congested)) || 1) - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); else - ret &= bdi_congested(&q->backing_dev_info, bits); + ret &= bdi_congested(q->backing_dev_info, bits); } } rcu_read_unlock(); @@ -1170,10 +1170,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, int i, disks; struct bitmap *bitmap = mddev->bitmap; unsigned long flags; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); - const unsigned long do_flush_fua = (bio->bi_opf & - (REQ_PREFLUSH | REQ_FUA)); struct md_rdev *blocked_rdev; struct blk_plug_cb *cb; struct raid1_plug_cb *plug = NULL; @@ -1389,7 +1385,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, conf->mirrors[i].rdev->data_offset); mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_end_io = raid1_end_write_request; - bio_set_op_attrs(mbio, op, do_flush_fua | do_sync); + mbio->bi_opf = bio_op(bio) | + (bio->bi_opf & (REQ_SYNC | REQ_PREFLUSH | REQ_FUA)); if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) && !test_bit(WriteMostly, &conf->mirrors[i].rdev->flags) && conf->raid_disks - mddev->degraded > 1) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1920756..6bc5c2a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -860,7 +860,7 @@ static int raid10_congested(struct mddev *mddev, int bits) if (rdev && !test_bit(Faulty, &rdev->flags)) { struct request_queue *q = bdev_get_queue(rdev->bdev); - ret |= bdi_congested(&q->backing_dev_info, bits); + ret |= bdi_congested(q->backing_dev_info, bits); } } rcu_read_unlock(); @@ -3841,8 +3841,8 @@ static int raid10_run(struct mddev *mddev) * maybe... */ stripe /= conf->geo.near_copies; - if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) + mddev->queue->backing_dev_info->ra_pages = 2 * stripe; } if (md_integrity_register(mddev)) @@ -4643,8 +4643,8 @@ static void end_reshape(struct r10conf *conf) int stripe = conf->geo.raid_disks * ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE); stripe /= conf->geo.near_copies; - if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) + conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; } conf->fullsync = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3c7e106..6214e69 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6331,10 +6331,10 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) mddev_suspend(mddev); conf->skip_copy = new; if (new) - mddev->queue->backing_dev_info.capabilities |= + mddev->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; else - mddev->queue->backing_dev_info.capabilities &= + mddev->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; mddev_resume(mddev); } @@ -7153,8 +7153,8 @@ static int raid5_run(struct mddev *mddev) int data_disks = conf->previous_raid_disks - conf->max_degraded; int stripe = data_disks * ((mddev->chunk_sectors << 9) / PAGE_SIZE); - if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe) + mddev->queue->backing_dev_info->ra_pages = 2 * stripe; chunk_size = mddev->chunk_sectors << 9; blk_queue_io_min(mddev->queue, chunk_size); @@ -7763,8 +7763,8 @@ static void end_reshape(struct r5conf *conf) int data_disks = conf->raid_disks - conf->max_degraded; int stripe = data_disks * ((conf->chunk_sectors << 9) / PAGE_SIZE); - if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) - conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; + if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe) + conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe; } } } diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index f3512404..99e651c 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2000,16 +2000,6 @@ static int msb_bd_getgeo(struct block_device *bdev, return 0; } -static int msb_prepare_req(struct request_queue *q, struct request *req) -{ - if (req->cmd_type != REQ_TYPE_FS) { - blk_dump_rq_flags(req, "MS unsupported request"); - return BLKPREP_KILL; - } - req->rq_flags |= RQF_DONTPREP; - return BLKPREP_OK; -} - static void msb_submit_req(struct request_queue *q) { struct memstick_dev *card = q->queuedata; @@ -2132,7 +2122,6 @@ static int msb_init_disk(struct memstick_dev *card) } msb->queue->queuedata = card; - blk_queue_prep_rq(msb->queue, msb_prepare_req); blk_queue_bounce_limit(msb->queue, limit); blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES); diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index fa0746d..c00d8a2 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -827,18 +827,6 @@ static void mspro_block_start(struct memstick_dev *card) spin_unlock_irqrestore(&msb->q_lock, flags); } -static int mspro_block_prepare_req(struct request_queue *q, struct request *req) -{ - if (req->cmd_type != REQ_TYPE_FS) { - blk_dump_rq_flags(req, "MSPro unsupported request"); - return BLKPREP_KILL; - } - - req->rq_flags |= RQF_DONTPREP; - - return BLKPREP_OK; -} - static void mspro_block_submit_req(struct request_queue *q) { struct memstick_dev *card = q->queuedata; @@ -1228,7 +1216,6 @@ static int mspro_block_init_disk(struct memstick_dev *card) } msb->queue->queuedata = card; - blk_queue_prep_rq(msb->queue, mspro_block_prepare_req); blk_queue_bounce_limit(msb->queue, limit); blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES); diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 7ee1667..b8c4b2b 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -2320,10 +2320,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, SmpPassthroughReply_t *smprep; smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply; - memcpy(req->sense, smprep, sizeof(*smprep)); - req->sense_len = sizeof(*smprep); - req->resid_len = 0; - rsp->resid_len -= smprep->ResponseDataLength; + memcpy(scsi_req(req)->sense, smprep, sizeof(*smprep)); + scsi_req(req)->sense_len = sizeof(*smprep); + scsi_req(req)->resid_len = 0; + scsi_req(rsp)->resid_len -= smprep->ResponseDataLength; } else { printk(MYIOC_s_ERR_FMT "%s: smp passthru reply failed to be returned\n", diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index a6496d8..033f641 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -30,15 +30,6 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) { struct mmc_queue *mq = q->queuedata; - /* - * We only like normal block requests and discards. - */ - if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD && - req_op(req) != REQ_OP_SECURE_ERASE) { - blk_dump_rq_flags(req, "MMC bad request"); - return BLKPREP_KILL; - } - if (mq && (mmc_card_removed(mq->card) || mmc_access_rpmb(mq))) return BLKPREP_KILL; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index df8a5ef..6b8d5cd 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -84,9 +84,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, nsect = blk_rq_cur_bytes(req) >> tr->blkshift; buf = bio_data(req->bio); - if (req->cmd_type != REQ_TYPE_FS) - return -EIO; - if (req_op(req) == REQ_OP_FLUSH) return tr->flush(dev); @@ -94,16 +91,16 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, get_capacity(req->rq_disk)) return -EIO; - if (req_op(req) == REQ_OP_DISCARD) + switch (req_op(req)) { + case REQ_OP_DISCARD: return tr->discard(dev, block, nsect); - - if (rq_data_dir(req) == READ) { + case REQ_OP_READ: for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->readsect(dev, block, buf)) return -EIO; rq_flush_dcache_pages(req); return 0; - } else { + case REQ_OP_WRITE: if (!tr->writesect) return -EIO; @@ -112,6 +109,8 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, if (tr->writesect(dev, block, buf)) return -EIO; return 0; + default: + return -EIO; } } diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index d1e6931..c80869e 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -323,16 +323,15 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx, struct ubiblock *dev = hctx->queue->queuedata; struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req); - if (req->cmd_type != REQ_TYPE_FS) + switch (req_op(req)) { + case REQ_OP_READ: + ubi_sgl_init(&pdu->usgl); + queue_work(dev->wq, &pdu->work); + return BLK_MQ_RQ_QUEUE_OK; + default: return BLK_MQ_RQ_QUEUE_ERROR; + } - if (rq_data_dir(req) != READ) - return BLK_MQ_RQ_QUEUE_ERROR; /* Write not implemented */ - - ubi_sgl_init(&pdu->usgl); - queue_work(dev->wq, &pdu->work); - - return BLK_MQ_RQ_QUEUE_OK; } static int ubiblock_init_request(void *data, struct request *req, diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8a3c3e3..44a1a25 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -208,18 +208,18 @@ EXPORT_SYMBOL_GPL(nvme_requeue_req); struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { + unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; struct request *req; if (qid == NVME_QID_ANY) { - req = blk_mq_alloc_request(q, nvme_is_write(cmd), flags); + req = blk_mq_alloc_request(q, op, flags); } else { - req = blk_mq_alloc_request_hctx(q, nvme_is_write(cmd), flags, + req = blk_mq_alloc_request_hctx(q, op, flags, qid ? qid - 1 : 0); } if (IS_ERR(req)) return req; - req->cmd_type = REQ_TYPE_DRV_PRIV; req->cmd_flags |= REQ_FAILFAST_DRIVER; nvme_req(req)->cmd = cmd; @@ -238,26 +238,38 @@ static inline void nvme_setup_flush(struct nvme_ns *ns, static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { + unsigned short segments = blk_rq_nr_discard_segments(req), n = 0; struct nvme_dsm_range *range; - unsigned int nr_bytes = blk_rq_bytes(req); + struct bio *bio; - range = kmalloc(sizeof(*range), GFP_ATOMIC); + range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); if (!range) return BLK_MQ_RQ_QUEUE_BUSY; - range->cattr = cpu_to_le32(0); - range->nlb = cpu_to_le32(nr_bytes >> ns->lba_shift); - range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + __rq_for_each_bio(bio, req) { + u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); + u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; + + range[n].cattr = cpu_to_le32(0); + range[n].nlb = cpu_to_le32(nlb); + range[n].slba = cpu_to_le64(slba); + n++; + } + + if (WARN_ON_ONCE(n != segments)) { + kfree(range); + return BLK_MQ_RQ_QUEUE_ERROR; + } memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); - cmnd->dsm.nr = 0; + cmnd->dsm.nr = segments - 1; cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); req->special_vec.bv_page = virt_to_page(range); req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = sizeof(*range); + req->special_vec.bv_len = sizeof(*range) * segments; req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_MQ_RQ_QUEUE_OK; @@ -309,17 +321,27 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { int ret = BLK_MQ_RQ_QUEUE_OK; - if (req->cmd_type == REQ_TYPE_DRV_PRIV) + switch (req_op(req)) { + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); - else if (req_op(req) == REQ_OP_FLUSH) + break; + case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); - else if (req_op(req) == REQ_OP_DISCARD) + break; + case REQ_OP_DISCARD: ret = nvme_setup_discard(ns, req, cmd); - else + break; + case REQ_OP_READ: + case REQ_OP_WRITE: nvme_setup_rw(ns, req, cmd); + break; + default: + WARN_ON_ONCE(1); + return BLK_MQ_RQ_QUEUE_ERROR; + } cmd->common.command_id = req->tag; - return ret; } EXPORT_SYMBOL_GPL(nvme_setup_cmd); @@ -784,6 +806,13 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, return nvme_sg_io(ns, (void __user *)arg); #endif default: +#ifdef CONFIG_NVM + if (ns->ndev) + return nvme_nvm_ioctl(ns, cmd, arg); +#endif + if (is_sed_ioctl(cmd)) + return sed_ioctl(ns->ctrl->opal_dev, cmd, + (void __user *) arg); return -ENOTTY; } } @@ -861,6 +890,9 @@ static void nvme_config_discard(struct nvme_ns *ns) struct nvme_ctrl *ctrl = ns->ctrl; u32 logical_block_size = queue_logical_block_size(ns->queue); + BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < + NVME_DSM_MAX_RANGES); + if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) ns->queue->limits.discard_zeroes_data = 1; else @@ -869,6 +901,7 @@ static void nvme_config_discard(struct nvme_ns *ns) ns->queue->limits.discard_alignment = logical_block_size; ns->queue->limits.discard_granularity = logical_block_size; blk_queue_max_discard_sectors(ns->queue, UINT_MAX); + blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); } @@ -1051,6 +1084,28 @@ static const struct pr_ops nvme_pr_ops = { .pr_clear = nvme_pr_clear, }; +#ifdef CONFIG_BLK_SED_OPAL +int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, + bool send) +{ + struct nvme_ctrl *ctrl = data; + struct nvme_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + if (send) + cmd.common.opcode = nvme_admin_security_send; + else + cmd.common.opcode = nvme_admin_security_recv; + cmd.common.nsid = 0; + cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8); + cmd.common.cdw10[1] = cpu_to_le32(len); + + return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, + ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0); +} +EXPORT_SYMBOL_GPL(nvme_sec_submit); +#endif /* CONFIG_BLK_SED_OPAL */ + static const struct block_device_operations nvme_fops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, @@ -1230,6 +1285,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) return -EIO; } + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); atomic_set(&ctrl->abort_limit, id->acl + 1); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e65041c..fb51a8d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1937,7 +1937,7 @@ nvme_fc_complete_rq(struct request *rq) return; } - if (rq->cmd_type == REQ_TYPE_DRV_PRIV) + if (blk_rq_is_passthrough(rq)) error = rq->errors; else error = nvme_error_status(rq->errors); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 588d4a3..21cac85 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -26,6 +26,8 @@ #include <linux/bitops.h> #include <linux/lightnvm.h> #include <linux/vmalloc.h> +#include <linux/sched/sysctl.h> +#include <uapi/linux/lightnvm.h> enum nvme_nvm_admin_opcode { nvme_nvm_admin_identity = 0xe2, @@ -248,50 +250,48 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) { struct nvme_nvm_id_group *src; struct nvm_id_group *dst; - int i, end; - - end = min_t(u32, 4, nvm_id->cgrps); - - for (i = 0; i < end; i++) { - src = &nvme_nvm_id->groups[i]; - dst = &nvm_id->groups[i]; - - dst->mtype = src->mtype; - dst->fmtype = src->fmtype; - dst->num_ch = src->num_ch; - dst->num_lun = src->num_lun; - dst->num_pln = src->num_pln; - - dst->num_pg = le16_to_cpu(src->num_pg); - dst->num_blk = le16_to_cpu(src->num_blk); - dst->fpg_sz = le16_to_cpu(src->fpg_sz); - dst->csecs = le16_to_cpu(src->csecs); - dst->sos = le16_to_cpu(src->sos); - - dst->trdt = le32_to_cpu(src->trdt); - dst->trdm = le32_to_cpu(src->trdm); - dst->tprt = le32_to_cpu(src->tprt); - dst->tprm = le32_to_cpu(src->tprm); - dst->tbet = le32_to_cpu(src->tbet); - dst->tbem = le32_to_cpu(src->tbem); - dst->mpos = le32_to_cpu(src->mpos); - dst->mccap = le32_to_cpu(src->mccap); - - dst->cpar = le16_to_cpu(src->cpar); - - if (dst->fmtype == NVM_ID_FMTYPE_MLC) { - memcpy(dst->lptbl.id, src->lptbl.id, 8); - dst->lptbl.mlc.num_pairs = - le16_to_cpu(src->lptbl.mlc.num_pairs); - - if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) { - pr_err("nvm: number of MLC pairs not supported\n"); - return -EINVAL; - } - memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, - dst->lptbl.mlc.num_pairs); + if (nvme_nvm_id->cgrps != 1) + return -EINVAL; + + src = &nvme_nvm_id->groups[0]; + dst = &nvm_id->grp; + + dst->mtype = src->mtype; + dst->fmtype = src->fmtype; + dst->num_ch = src->num_ch; + dst->num_lun = src->num_lun; + dst->num_pln = src->num_pln; + + dst->num_pg = le16_to_cpu(src->num_pg); + dst->num_blk = le16_to_cpu(src->num_blk); + dst->fpg_sz = le16_to_cpu(src->fpg_sz); + dst->csecs = le16_to_cpu(src->csecs); + dst->sos = le16_to_cpu(src->sos); + + dst->trdt = le32_to_cpu(src->trdt); + dst->trdm = le32_to_cpu(src->trdm); + dst->tprt = le32_to_cpu(src->tprt); + dst->tprm = le32_to_cpu(src->tprm); + dst->tbet = le32_to_cpu(src->tbet); + dst->tbem = le32_to_cpu(src->tbem); + dst->mpos = le32_to_cpu(src->mpos); + dst->mccap = le32_to_cpu(src->mccap); + + dst->cpar = le16_to_cpu(src->cpar); + + if (dst->fmtype == NVM_ID_FMTYPE_MLC) { + memcpy(dst->lptbl.id, src->lptbl.id, 8); + dst->lptbl.mlc.num_pairs = + le16_to_cpu(src->lptbl.mlc.num_pairs); + + if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) { + pr_err("nvm: number of MLC pairs not supported\n"); + return -EINVAL; } + + memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, + dst->lptbl.mlc.num_pairs); } return 0; @@ -321,7 +321,6 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) nvm_id->ver_id = nvme_nvm_id->ver_id; nvm_id->vmnt = nvme_nvm_id->vmnt; - nvm_id->cgrps = nvme_nvm_id->cgrps; nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, @@ -372,7 +371,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, } /* Transform physical address to target address space */ - nvmdev->mt->part_to_tgt(nvmdev, entries, cmd_nlb); + nvm_part_to_tgt(nvmdev, entries, cmd_nlb); if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { ret = -EINTR; @@ -485,7 +484,8 @@ static void nvme_nvm_end_io(struct request *rq, int error) struct nvm_rq *rqd = rq->end_io_data; rqd->ppa_status = nvme_req(rq)->result.u64; - nvm_end_io(rqd, error); + rqd->error = error; + nvm_end_io(rqd); kfree(nvme_req(rq)->cmd); blk_mq_free_request(rq); @@ -586,6 +586,224 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .max_phys_sect = 64, }; +static void nvme_nvm_end_user_vio(struct request *rq, int error) +{ + struct completion *waiting = rq->end_io_data; + + complete(waiting); +} + +static int nvme_nvm_submit_user_cmd(struct request_queue *q, + struct nvme_ns *ns, + struct nvme_nvm_command *vcmd, + void __user *ubuf, unsigned int bufflen, + void __user *meta_buf, unsigned int meta_len, + void __user *ppa_buf, unsigned int ppa_len, + u32 *result, u64 *status, unsigned int timeout) +{ + bool write = nvme_is_write((struct nvme_command *)vcmd); + struct nvm_dev *dev = ns->ndev; + struct gendisk *disk = ns->disk; + struct request *rq; + struct bio *bio = NULL; + __le64 *ppa_list = NULL; + dma_addr_t ppa_dma; + __le64 *metadata = NULL; + dma_addr_t metadata_dma; + DECLARE_COMPLETION_ONSTACK(wait); + int ret; + + rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, + NVME_QID_ANY); + if (IS_ERR(rq)) { + ret = -ENOMEM; + goto err_cmd; + } + + rq->timeout = timeout ? timeout : ADMIN_TIMEOUT; + + rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; + rq->end_io_data = &wait; + + if (ppa_buf && ppa_len) { + ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); + if (!ppa_list) { + ret = -ENOMEM; + goto err_rq; + } + if (copy_from_user(ppa_list, (void __user *)ppa_buf, + sizeof(u64) * (ppa_len + 1))) { + ret = -EFAULT; + goto err_ppa; + } + vcmd->ph_rw.spba = cpu_to_le64(ppa_dma); + } else { + vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf); + } + + if (ubuf && bufflen) { + ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL); + if (ret) + goto err_ppa; + bio = rq->bio; + + if (meta_buf && meta_len) { + metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, + &metadata_dma); + if (!metadata) { + ret = -ENOMEM; + goto err_map; + } + + if (write) { + if (copy_from_user(metadata, + (void __user *)meta_buf, + meta_len)) { + ret = -EFAULT; + goto err_meta; + } + } + vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma); + } + + if (!disk) + goto submit; + + bio->bi_bdev = bdget_disk(disk, 0); + if (!bio->bi_bdev) { + ret = -ENODEV; + goto err_meta; + } + } + +submit: + blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio); + + wait_for_completion_io(&wait); + + ret = nvme_error_status(rq->errors); + if (result) + *result = rq->errors & 0x7ff; + if (status) + *status = le64_to_cpu(nvme_req(rq)->result.u64); + + if (metadata && !ret && !write) { + if (copy_to_user(meta_buf, (void *)metadata, meta_len)) + ret = -EFAULT; + } +err_meta: + if (meta_buf && meta_len) + dma_pool_free(dev->dma_pool, metadata, metadata_dma); +err_map: + if (bio) { + if (disk && bio->bi_bdev) + bdput(bio->bi_bdev); + blk_rq_unmap_user(bio); + } +err_ppa: + if (ppa_buf && ppa_len) + dma_pool_free(dev->dma_pool, ppa_list, ppa_dma); +err_rq: + blk_mq_free_request(rq); +err_cmd: + return ret; +} + +static int nvme_nvm_submit_vio(struct nvme_ns *ns, + struct nvm_user_vio __user *uvio) +{ + struct nvm_user_vio vio; + struct nvme_nvm_command c; + unsigned int length; + int ret; + + if (copy_from_user(&vio, uvio, sizeof(vio))) + return -EFAULT; + if (vio.flags) + return -EINVAL; + + memset(&c, 0, sizeof(c)); + c.ph_rw.opcode = vio.opcode; + c.ph_rw.nsid = cpu_to_le32(ns->ns_id); + c.ph_rw.control = cpu_to_le16(vio.control); + c.ph_rw.length = cpu_to_le16(vio.nppas); + + length = (vio.nppas + 1) << ns->lba_shift; + + ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c, + (void __user *)(uintptr_t)vio.addr, length, + (void __user *)(uintptr_t)vio.metadata, + vio.metadata_len, + (void __user *)(uintptr_t)vio.ppa_list, vio.nppas, + &vio.result, &vio.status, 0); + + if (ret && copy_to_user(uvio, &vio, sizeof(vio))) + return -EFAULT; + + return ret; +} + +static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, + struct nvm_passthru_vio __user *uvcmd) +{ + struct nvm_passthru_vio vcmd; + struct nvme_nvm_command c; + struct request_queue *q; + unsigned int timeout = 0; + int ret; + + if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd))) + return -EFAULT; + if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN))) + return -EACCES; + if (vcmd.flags) + return -EINVAL; + + memset(&c, 0, sizeof(c)); + c.common.opcode = vcmd.opcode; + c.common.nsid = cpu_to_le32(ns->ns_id); + c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); + /* cdw11-12 */ + c.ph_rw.length = cpu_to_le16(vcmd.nppas); + c.ph_rw.control = cpu_to_le32(vcmd.control); + c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13); + c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14); + c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15); + + if (vcmd.timeout_ms) + timeout = msecs_to_jiffies(vcmd.timeout_ms); + + q = admin ? ns->ctrl->admin_q : ns->queue; + + ret = nvme_nvm_submit_user_cmd(q, ns, + (struct nvme_nvm_command *)&c, + (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len, + (void __user *)(uintptr_t)vcmd.metadata, + vcmd.metadata_len, + (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas, + &vcmd.result, &vcmd.status, timeout); + + if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd))) + return -EFAULT; + + return ret; +} + +int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case NVME_NVM_IOCTL_ADMIN_VIO: + return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg); + case NVME_NVM_IOCTL_IO_VIO: + return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg); + case NVME_NVM_IOCTL_SUBMIT_VIO: + return nvme_nvm_submit_vio(ns, (void __user *)arg); + default: + return -ENOTTY; + } +} + int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { struct request_queue *q = ns->queue; @@ -622,7 +840,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev, return 0; id = &ndev->identity; - grp = &id->groups[0]; + grp = &id->grp; attr = &dattr->attr; if (strcmp(attr->name, "version") == 0) { @@ -633,10 +851,9 @@ static ssize_t nvm_dev_attr_show(struct device *dev, return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); } else if (strcmp(attr->name, "device_mode") == 0) { return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); + /* kept for compatibility */ } else if (strcmp(attr->name, "media_manager") == 0) { - if (!ndev->mt) - return scnprintf(page, PAGE_SIZE, "%s\n", "none"); - return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name); + return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); } else if (strcmp(attr->name, "ppa_format") == 0) { return scnprintf(page, PAGE_SIZE, "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index aead6d0..14cfc6f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -19,6 +19,7 @@ #include <linux/kref.h> #include <linux/blk-mq.h> #include <linux/lightnvm.h> +#include <linux/sed-opal.h> enum { /* @@ -125,6 +126,8 @@ struct nvme_ctrl { struct list_head node; struct ida ns_ida; + struct opal_dev *opal_dev; + char name[12]; char serial[20]; char model[40]; @@ -137,6 +140,7 @@ struct nvme_ctrl { u32 max_hw_sectors; u16 oncs; u16 vid; + u16 oacs; atomic_t abort_limit; u8 event_limit; u8 vwc; @@ -267,6 +271,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl); void nvme_queue_scan(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); +int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, + bool send); + #define NVME_NR_AERS 1 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res); @@ -318,6 +325,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); int nvme_nvm_register_sysfs(struct nvme_ns *ns); void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); +int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) @@ -335,6 +343,11 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i { return 0; } +static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, + unsigned long arg) +{ + return -ENOTTY; +} #endif /* CONFIG_NVM */ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3faefab..ddc51ad 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -43,6 +43,7 @@ #include <linux/types.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <asm/unaligned.h> +#include <linux/sed-opal.h> #include "nvme.h" @@ -588,7 +589,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, */ if (ns && ns->ms && !blk_integrity_rq(req)) { if (!(ns->pi_type && ns->ms == 8) && - req->cmd_type != REQ_TYPE_DRV_PRIV) { + !blk_rq_is_passthrough(req)) { blk_mq_end_request(req, -EFAULT); return BLK_MQ_RQ_QUEUE_OK; } @@ -645,7 +646,7 @@ static void nvme_complete_rq(struct request *req) return; } - if (req->cmd_type == REQ_TYPE_DRV_PRIV) + if (blk_rq_is_passthrough(req)) error = req->errors; else error = nvme_error_status(req->errors); @@ -895,12 +896,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) return BLK_EH_HANDLED; } - iod->aborted = 1; - if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } + iod->aborted = 1; memset(&cmd, 0, sizeof(cmd)); cmd.abort.opcode = nvme_admin_abort_cmd; @@ -1178,6 +1178,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev_to_node(dev->dev); dev->admin_tagset.cmd_size = nvme_cmd_size(dev); + dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; dev->admin_tagset.driver_data = dev; if (blk_mq_alloc_tag_set(&dev->admin_tagset)) @@ -1738,6 +1739,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); + kfree(dev->ctrl.opal_dev); kfree(dev); } @@ -1754,6 +1756,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) static void nvme_reset_work(struct work_struct *work) { struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); + bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result = -ENODEV; if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) @@ -1786,6 +1789,14 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; + if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) { + dev->ctrl.opal_dev = + init_opal_dev(&dev->ctrl, &nvme_sec_submit); + } + + if (was_suspend) + opal_unlock_from_suspend(dev->ctrl.opal_dev); + result = nvme_setup_io_queues(dev); if (result) goto out; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 557f29b..a75e95d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1423,7 +1423,7 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { struct nvme_command *cmd = nvme_req(rq)->cmd; - if (rq->cmd_type != REQ_TYPE_DRV_PRIV || + if (!blk_rq_is_passthrough(rq) || cmd->common.opcode != nvme_fabrics_command || cmd->fabrics.fctype != nvme_fabrics_type_connect) return false; @@ -1471,7 +1471,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); - if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH) + if (req_op(rq) == REQ_OP_FLUSH) flush = true; ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); @@ -1522,7 +1522,7 @@ static void nvme_rdma_complete_rq(struct request *rq) return; } - if (rq->cmd_type == REQ_TYPE_DRV_PRIV) + if (blk_rq_is_passthrough(rq)) error = rq->errors; else error = nvme_error_status(rq->errors); diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index a5c09e7..f49ae27 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -43,6 +43,7 @@ #include <asm/unaligned.h> #include <scsi/sg.h> #include <scsi/scsi.h> +#include <scsi/scsi_request.h> #include "nvme.h" @@ -2347,12 +2348,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) { - u8 cmd[BLK_MAX_CDB]; + u8 cmd[16]; int retcode; unsigned int opcode; if (hdr->cmdp == NULL) return -EMSGSIZE; + if (hdr->cmd_len > sizeof(cmd)) + return -EINVAL; if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; @@ -2451,8 +2454,6 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr) return -EFAULT; if (hdr.interface_id != 'S') return -EINVAL; - if (hdr.cmd_len > BLK_MAX_CDB) - return -EINVAL; /* * A positive return code means a NVMe status, which has been diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9aaa700..f3862e3 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -104,7 +104,7 @@ static void nvme_loop_complete_rq(struct request *req) return; } - if (req->cmd_type == REQ_TYPE_DRV_PRIV) + if (blk_rq_is_passthrough(req)) error = req->errors; else error = nvme_error_status(req->errors); diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 9f16ea6..152de68 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -300,13 +300,6 @@ static void scm_blk_request(struct request_queue *rq) struct request *req; while ((req = blk_peek_request(rq))) { - if (req->cmd_type != REQ_TYPE_FS) { - blk_start_request(req); - blk_dump_rq_flags(req, KMSG_COMPONENT " bad request"); - __blk_end_request_all(req, -EIO); - continue; - } - if (!scm_permit_request(bdev, req)) goto out; diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index a4f6b0d..d4023bf 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -18,6 +18,7 @@ config SCSI depends on BLOCK select SCSI_DMA if HAS_DMA select SG_POOL + select BLK_SCSI_REQUEST ---help--- If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or any other SCSI device under Linux, say Y and make sure that you know diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 5b80746..4a7679f 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -88,12 +88,6 @@ struct clariion_dh_data { */ unsigned char buffer[CLARIION_BUFFER_SIZE]; /* - * SCSI sense buffer for commands -- assumes serial issuance - * and completion sequence of all commands for same multipath. - */ - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; - unsigned int senselen; - /* * LUN state */ int lun_state; @@ -116,44 +110,38 @@ struct clariion_dh_data { /* * Parse MODE_SELECT cmd reply. */ -static int trespass_endio(struct scsi_device *sdev, char *sense) +static int trespass_endio(struct scsi_device *sdev, + struct scsi_sense_hdr *sshdr) { int err = SCSI_DH_IO; - struct scsi_sense_hdr sshdr; - - if (!scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr)) { - sdev_printk(KERN_ERR, sdev, "%s: Found valid sense data 0x%2x, " - "0x%2x, 0x%2x while sending CLARiiON trespass " - "command.\n", CLARIION_NAME, sshdr.sense_key, - sshdr.asc, sshdr.ascq); - if ((sshdr.sense_key == 0x05) && (sshdr.asc == 0x04) && - (sshdr.ascq == 0x00)) { - /* - * Array based copy in progress -- do not send - * mode_select or copy will be aborted mid-stream. - */ - sdev_printk(KERN_INFO, sdev, "%s: Array Based Copy in " - "progress while sending CLARiiON trespass " - "command.\n", CLARIION_NAME); - err = SCSI_DH_DEV_TEMP_BUSY; - } else if ((sshdr.sense_key == 0x02) && (sshdr.asc == 0x04) && - (sshdr.ascq == 0x03)) { - /* - * LUN Not Ready - Manual Intervention Required - * indicates in-progress ucode upgrade (NDU). - */ - sdev_printk(KERN_INFO, sdev, "%s: Detected in-progress " - "ucode upgrade NDU operation while sending " - "CLARiiON trespass command.\n", CLARIION_NAME); - err = SCSI_DH_DEV_TEMP_BUSY; - } else - err = SCSI_DH_DEV_FAILED; - } else { - sdev_printk(KERN_INFO, sdev, - "%s: failed to send MODE SELECT, no sense available\n", - CLARIION_NAME); - } + sdev_printk(KERN_ERR, sdev, "%s: Found valid sense data 0x%2x, " + "0x%2x, 0x%2x while sending CLARiiON trespass " + "command.\n", CLARIION_NAME, sshdr->sense_key, + sshdr->asc, sshdr->ascq); + + if (sshdr->sense_key == 0x05 && sshdr->asc == 0x04 && + sshdr->ascq == 0x00) { + /* + * Array based copy in progress -- do not send + * mode_select or copy will be aborted mid-stream. + */ + sdev_printk(KERN_INFO, sdev, "%s: Array Based Copy in " + "progress while sending CLARiiON trespass " + "command.\n", CLARIION_NAME); + err = SCSI_DH_DEV_TEMP_BUSY; + } else if (sshdr->sense_key == 0x02 && sshdr->asc == 0x04 && + sshdr->ascq == 0x03) { + /* + * LUN Not Ready - Manual Intervention Required + * indicates in-progress ucode upgrade (NDU). + */ + sdev_printk(KERN_INFO, sdev, "%s: Detected in-progress " + "ucode upgrade NDU operation while sending " + "CLARiiON trespass command.\n", CLARIION_NAME); + err = SCSI_DH_DEV_TEMP_BUSY; + } else + err = SCSI_DH_DEV_FAILED; return err; } @@ -257,103 +245,15 @@ out: return sp_model; } -/* - * Get block request for REQ_BLOCK_PC command issued to path. Currently - * limited to MODE_SELECT (trespass) and INQUIRY (VPD page 0xC0) commands. - * - * Uses data and sense buffers in hardware handler context structure and - * assumes serial servicing of commands, both issuance and completion. - */ -static struct request *get_req(struct scsi_device *sdev, int cmd, - unsigned char *buffer) -{ - struct request *rq; - int len = 0; - - rq = blk_get_request(sdev->request_queue, - (cmd != INQUIRY) ? WRITE : READ, GFP_NOIO); - if (IS_ERR(rq)) { - sdev_printk(KERN_INFO, sdev, "get_req: blk_get_request failed"); - return NULL; - } - - blk_rq_set_block_pc(rq); - rq->cmd_len = COMMAND_SIZE(cmd); - rq->cmd[0] = cmd; - - switch (cmd) { - case MODE_SELECT: - len = sizeof(short_trespass); - rq->cmd[1] = 0x10; - rq->cmd[4] = len; - break; - case MODE_SELECT_10: - len = sizeof(long_trespass); - rq->cmd[1] = 0x10; - rq->cmd[8] = len; - break; - case INQUIRY: - len = CLARIION_BUFFER_SIZE; - rq->cmd[4] = len; - memset(buffer, 0, len); - break; - default: - BUG_ON(1); - break; - } - - rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | - REQ_FAILFAST_DRIVER; - rq->timeout = CLARIION_TIMEOUT; - rq->retries = CLARIION_RETRIES; - - if (blk_rq_map_kern(rq->q, rq, buffer, len, GFP_NOIO)) { - blk_put_request(rq); - return NULL; - } - - return rq; -} - -static int send_inquiry_cmd(struct scsi_device *sdev, int page, - struct clariion_dh_data *csdev) -{ - struct request *rq = get_req(sdev, INQUIRY, csdev->buffer); - int err; - - if (!rq) - return SCSI_DH_RES_TEMP_UNAVAIL; - - rq->sense = csdev->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = csdev->senselen = 0; - - rq->cmd[0] = INQUIRY; - if (page != 0) { - rq->cmd[1] = 1; - rq->cmd[2] = page; - } - err = blk_execute_rq(sdev->request_queue, NULL, rq, 1); - if (err == -EIO) { - sdev_printk(KERN_INFO, sdev, - "%s: failed to send %s INQUIRY: %x\n", - CLARIION_NAME, page?"EVPD":"standard", - rq->errors); - csdev->senselen = rq->sense_len; - err = SCSI_DH_IO; - } - - blk_put_request(rq); - - return err; -} - static int send_trespass_cmd(struct scsi_device *sdev, struct clariion_dh_data *csdev) { - struct request *rq; unsigned char *page22; - int err, len, cmd; + unsigned char cdb[COMMAND_SIZE(MODE_SELECT)]; + int err, res = SCSI_DH_OK, len; + struct scsi_sense_hdr sshdr; + u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; if (csdev->flags & CLARIION_SHORT_TRESPASS) { page22 = short_trespass; @@ -361,40 +261,37 @@ static int send_trespass_cmd(struct scsi_device *sdev, /* Set Honor Reservations bit */ page22[6] |= 0x80; len = sizeof(short_trespass); - cmd = MODE_SELECT; + cdb[0] = MODE_SELECT; + cdb[1] = 0x10; + cdb[4] = len; } else { page22 = long_trespass; if (!(csdev->flags & CLARIION_HONOR_RESERVATIONS)) /* Set Honor Reservations bit */ page22[10] |= 0x80; len = sizeof(long_trespass); - cmd = MODE_SELECT_10; + cdb[0] = MODE_SELECT_10; + cdb[8] = len; } BUG_ON((len > CLARIION_BUFFER_SIZE)); memcpy(csdev->buffer, page22, len); - rq = get_req(sdev, cmd, csdev->buffer); - if (!rq) - return SCSI_DH_RES_TEMP_UNAVAIL; - - rq->sense = csdev->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = csdev->senselen = 0; - - err = blk_execute_rq(sdev->request_queue, NULL, rq, 1); - if (err == -EIO) { - if (rq->sense_len) { - err = trespass_endio(sdev, csdev->sense); - } else { + err = scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE, + csdev->buffer, len, &sshdr, + CLARIION_TIMEOUT * HZ, CLARIION_RETRIES, + NULL, req_flags, 0); + if (err) { + if (scsi_sense_valid(&sshdr)) + res = trespass_endio(sdev, &sshdr); + else { sdev_printk(KERN_INFO, sdev, "%s: failed to send MODE SELECT: %x\n", - CLARIION_NAME, rq->errors); + CLARIION_NAME, err); + res = SCSI_DH_IO; } } - blk_put_request(rq); - - return err; + return res; } static int clariion_check_sense(struct scsi_device *sdev, @@ -464,21 +361,7 @@ static int clariion_std_inquiry(struct scsi_device *sdev, int err; char *sp_model; - err = send_inquiry_cmd(sdev, 0, csdev); - if (err != SCSI_DH_OK && csdev->senselen) { - struct scsi_sense_hdr sshdr; - - if (scsi_normalize_sense(csdev->sense, SCSI_SENSE_BUFFERSIZE, - &sshdr)) { - sdev_printk(KERN_ERR, sdev, "%s: INQUIRY sense code " - "%02x/%02x/%02x\n", CLARIION_NAME, - sshdr.sense_key, sshdr.asc, sshdr.ascq); - } - err = SCSI_DH_IO; - goto out; - } - - sp_model = parse_sp_model(sdev, csdev->buffer); + sp_model = parse_sp_model(sdev, sdev->inquiry); if (!sp_model) { err = SCSI_DH_DEV_UNSUPP; goto out; @@ -500,30 +383,12 @@ out: static int clariion_send_inquiry(struct scsi_device *sdev, struct clariion_dh_data *csdev) { - int err, retry = CLARIION_RETRIES; - -retry: - err = send_inquiry_cmd(sdev, 0xC0, csdev); - if (err != SCSI_DH_OK && csdev->senselen) { - struct scsi_sense_hdr sshdr; - - err = scsi_normalize_sense(csdev->sense, SCSI_SENSE_BUFFERSIZE, - &sshdr); - if (!err) - return SCSI_DH_IO; - - err = clariion_check_sense(sdev, &sshdr); - if (retry > 0 && err == ADD_TO_MLQUEUE) { - retry--; - goto retry; - } - sdev_printk(KERN_ERR, sdev, "%s: INQUIRY sense code " - "%02x/%02x/%02x\n", CLARIION_NAME, - sshdr.sense_key, sshdr.asc, sshdr.ascq); - err = SCSI_DH_IO; - } else { + int err = SCSI_DH_IO; + + if (!scsi_get_vpd_page(sdev, 0xC0, csdev->buffer, + CLARIION_BUFFER_SIZE)) err = parse_sp_info_reply(sdev, csdev); - } + return err; } diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 308e871..be43c94 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -38,13 +38,10 @@ #define HP_SW_PATH_PASSIVE 1 struct hp_sw_dh_data { - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; int path_state; int retries; int retry_cnt; struct scsi_device *sdev; - activate_complete callback_fn; - void *callback_data; }; static int hp_sw_start_stop(struct hp_sw_dh_data *); @@ -56,43 +53,34 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *); * * Returns SCSI_DH_DEV_OFFLINED if the sdev is on the passive path */ -static int tur_done(struct scsi_device *sdev, unsigned char *sense) +static int tur_done(struct scsi_device *sdev, struct hp_sw_dh_data *h, + struct scsi_sense_hdr *sshdr) { - struct scsi_sense_hdr sshdr; - int ret; + int ret = SCSI_DH_IO; - ret = scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr); - if (!ret) { - sdev_printk(KERN_WARNING, sdev, - "%s: sending tur failed, no sense available\n", - HP_SW_NAME); - ret = SCSI_DH_IO; - goto done; - } - switch (sshdr.sense_key) { + switch (sshdr->sense_key) { case UNIT_ATTENTION: ret = SCSI_DH_IMM_RETRY; break; case NOT_READY: - if ((sshdr.asc == 0x04) && (sshdr.ascq == 2)) { + if (sshdr->asc == 0x04 && sshdr->ascq == 2) { /* * LUN not ready - Initialization command required * * This is the passive path */ - ret = SCSI_DH_DEV_OFFLINED; + h->path_state = HP_SW_PATH_PASSIVE; + ret = SCSI_DH_OK; break; } /* Fallthrough */ default: sdev_printk(KERN_WARNING, sdev, "%s: sending tur failed, sense %x/%x/%x\n", - HP_SW_NAME, sshdr.sense_key, sshdr.asc, - sshdr.ascq); + HP_SW_NAME, sshdr->sense_key, sshdr->asc, + sshdr->ascq); break; } - -done: return ret; } @@ -105,131 +93,36 @@ done: */ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) { - struct request *req; - int ret; + unsigned char cmd[6] = { TEST_UNIT_READY }; + struct scsi_sense_hdr sshdr; + int ret = SCSI_DH_OK, res; + u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; retry: - req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO); - if (IS_ERR(req)) - return SCSI_DH_RES_TEMP_UNAVAIL; - - blk_rq_set_block_pc(req); - req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | - REQ_FAILFAST_DRIVER; - req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY); - req->cmd[0] = TEST_UNIT_READY; - req->timeout = HP_SW_TIMEOUT; - req->sense = h->sense; - memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); - req->sense_len = 0; - - ret = blk_execute_rq(req->q, NULL, req, 1); - if (ret == -EIO) { - if (req->sense_len > 0) { - ret = tur_done(sdev, h->sense); - } else { + res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr, + HP_SW_TIMEOUT, HP_SW_RETRIES, + NULL, req_flags, 0); + if (res) { + if (scsi_sense_valid(&sshdr)) + ret = tur_done(sdev, h, &sshdr); + else { sdev_printk(KERN_WARNING, sdev, "%s: sending tur failed with %x\n", - HP_SW_NAME, req->errors); + HP_SW_NAME, res); ret = SCSI_DH_IO; } } else { h->path_state = HP_SW_PATH_ACTIVE; ret = SCSI_DH_OK; } - if (ret == SCSI_DH_IMM_RETRY) { - blk_put_request(req); + if (ret == SCSI_DH_IMM_RETRY) goto retry; - } - if (ret == SCSI_DH_DEV_OFFLINED) { - h->path_state = HP_SW_PATH_PASSIVE; - ret = SCSI_DH_OK; - } - - blk_put_request(req); return ret; } /* - * start_done - Handle START STOP UNIT return status - * @sdev: sdev the command has been sent to - * @errors: blk error code - */ -static int start_done(struct scsi_device *sdev, unsigned char *sense) -{ - struct scsi_sense_hdr sshdr; - int rc; - - rc = scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr); - if (!rc) { - sdev_printk(KERN_WARNING, sdev, - "%s: sending start_stop_unit failed, " - "no sense available\n", - HP_SW_NAME); - return SCSI_DH_IO; - } - switch (sshdr.sense_key) { - case NOT_READY: - if ((sshdr.asc == 0x04) && (sshdr.ascq == 3)) { - /* - * LUN not ready - manual intervention required - * - * Switch-over in progress, retry. - */ - rc = SCSI_DH_RETRY; - break; - } - /* fall through */ - default: - sdev_printk(KERN_WARNING, sdev, - "%s: sending start_stop_unit failed, sense %x/%x/%x\n", - HP_SW_NAME, sshdr.sense_key, sshdr.asc, - sshdr.ascq); - rc = SCSI_DH_IO; - } - - return rc; -} - -static void start_stop_endio(struct request *req, int error) -{ - struct hp_sw_dh_data *h = req->end_io_data; - unsigned err = SCSI_DH_OK; - - if (error || host_byte(req->errors) != DID_OK || - msg_byte(req->errors) != COMMAND_COMPLETE) { - sdev_printk(KERN_WARNING, h->sdev, - "%s: sending start_stop_unit failed with %x\n", - HP_SW_NAME, req->errors); - err = SCSI_DH_IO; - goto done; - } - - if (req->sense_len > 0) { - err = start_done(h->sdev, h->sense); - if (err == SCSI_DH_RETRY) { - err = SCSI_DH_IO; - if (--h->retry_cnt) { - blk_put_request(req); - err = hp_sw_start_stop(h); - if (err == SCSI_DH_OK) - return; - } - } - } -done: - req->end_io_data = NULL; - __blk_put_request(req->q, req); - if (h->callback_fn) { - h->callback_fn(h->callback_data, err); - h->callback_fn = h->callback_data = NULL; - } - return; - -} - -/* * hp_sw_start_stop - Send START STOP UNIT command * @sdev: sdev command should be sent to * @@ -237,26 +130,48 @@ done: */ static int hp_sw_start_stop(struct hp_sw_dh_data *h) { - struct request *req; - - req = blk_get_request(h->sdev->request_queue, WRITE, GFP_ATOMIC); - if (IS_ERR(req)) - return SCSI_DH_RES_TEMP_UNAVAIL; - - blk_rq_set_block_pc(req); - req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | - REQ_FAILFAST_DRIVER; - req->cmd_len = COMMAND_SIZE(START_STOP); - req->cmd[0] = START_STOP; - req->cmd[4] = 1; /* Start spin cycle */ - req->timeout = HP_SW_TIMEOUT; - req->sense = h->sense; - memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); - req->sense_len = 0; - req->end_io_data = h; + unsigned char cmd[6] = { START_STOP, 0, 0, 0, 1, 0 }; + struct scsi_sense_hdr sshdr; + struct scsi_device *sdev = h->sdev; + int res, rc = SCSI_DH_OK; + int retry_cnt = HP_SW_RETRIES; + u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; - blk_execute_rq_nowait(req->q, NULL, req, 1, start_stop_endio); - return SCSI_DH_OK; +retry: + res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr, + HP_SW_TIMEOUT, HP_SW_RETRIES, + NULL, req_flags, 0); + if (res) { + if (!scsi_sense_valid(&sshdr)) { + sdev_printk(KERN_WARNING, sdev, + "%s: sending start_stop_unit failed, " + "no sense available\n", HP_SW_NAME); + return SCSI_DH_IO; + } + switch (sshdr.sense_key) { + case NOT_READY: + if (sshdr.asc == 0x04 && sshdr.ascq == 3) { + /* + * LUN not ready - manual intervention required + * + * Switch-over in progress, retry. + */ + if (--retry_cnt) + goto retry; + rc = SCSI_DH_RETRY; + break; + } + /* fall through */ + default: + sdev_printk(KERN_WARNING, sdev, + "%s: sending start_stop_unit failed, " + "sense %x/%x/%x\n", HP_SW_NAME, + sshdr.sense_key, sshdr.asc, sshdr.ascq); + rc = SCSI_DH_IO; + } + } + return rc; } static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) @@ -290,15 +205,8 @@ static int hp_sw_activate(struct scsi_device *sdev, ret = hp_sw_tur(sdev, h); - if (ret == SCSI_DH_OK && h->path_state == HP_SW_PATH_PASSIVE) { - h->retry_cnt = h->retries; - h->callback_fn = fn; - h->callback_data = data; + if (ret == SCSI_DH_OK && h->path_state == HP_SW_PATH_PASSIVE) ret = hp_sw_start_stop(h); - if (ret == SCSI_DH_OK) - return 0; - h->callback_fn = h->callback_data = NULL; - } if (fn) fn(data, ret); diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 00d9c32..b64eaae 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -205,7 +205,6 @@ struct rdac_dh_data { #define RDAC_NON_PREFERRED 1 char preferred; - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; union { struct c2_inquiry c2; struct c4_inquiry c4; @@ -262,40 +261,12 @@ do { \ sdev_printk(KERN_INFO, sdev, RDAC_NAME ": " f "\n", ## arg); \ } while (0); -static struct request *get_rdac_req(struct scsi_device *sdev, - void *buffer, unsigned buflen, int rw) +static unsigned int rdac_failover_get(struct rdac_controller *ctlr, + struct list_head *list, + unsigned char *cdb) { - struct request *rq; - struct request_queue *q = sdev->request_queue; - - rq = blk_get_request(q, rw, GFP_NOIO); - - if (IS_ERR(rq)) { - sdev_printk(KERN_INFO, sdev, - "get_rdac_req: blk_get_request failed.\n"); - return NULL; - } - blk_rq_set_block_pc(rq); - - if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) { - blk_put_request(rq); - sdev_printk(KERN_INFO, sdev, - "get_rdac_req: blk_rq_map_kern failed.\n"); - return NULL; - } - - rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | - REQ_FAILFAST_DRIVER; - rq->retries = RDAC_RETRIES; - rq->timeout = RDAC_TIMEOUT; - - return rq; -} - -static struct request *rdac_failover_get(struct scsi_device *sdev, - struct rdac_dh_data *h, struct list_head *list) -{ - struct request *rq; + struct scsi_device *sdev = ctlr->ms_sdev; + struct rdac_dh_data *h = sdev->handler_data; struct rdac_mode_common *common; unsigned data_size; struct rdac_queue_data *qdata; @@ -332,27 +303,17 @@ static struct request *rdac_failover_get(struct scsi_device *sdev, lun_table[qdata->h->lun] = 0x81; } - /* get request for block layer packet command */ - rq = get_rdac_req(sdev, &h->ctlr->mode_select, data_size, WRITE); - if (!rq) - return NULL; - /* Prepare the command. */ if (h->ctlr->use_ms10) { - rq->cmd[0] = MODE_SELECT_10; - rq->cmd[7] = data_size >> 8; - rq->cmd[8] = data_size & 0xff; + cdb[0] = MODE_SELECT_10; + cdb[7] = data_size >> 8; + cdb[8] = data_size & 0xff; } else { - rq->cmd[0] = MODE_SELECT; - rq->cmd[4] = data_size; + cdb[0] = MODE_SELECT; + cdb[4] = data_size; } - rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = 0; - return rq; + return data_size; } static void release_controller(struct kref *kref) @@ -400,46 +361,14 @@ static struct rdac_controller *get_controller(int index, char *array_name, return ctlr; } -static int submit_inquiry(struct scsi_device *sdev, int page_code, - unsigned int len, struct rdac_dh_data *h) -{ - struct request *rq; - struct request_queue *q = sdev->request_queue; - int err = SCSI_DH_RES_TEMP_UNAVAIL; - - rq = get_rdac_req(sdev, &h->inq, len, READ); - if (!rq) - goto done; - - /* Prepare the command. */ - rq->cmd[0] = INQUIRY; - rq->cmd[1] = 1; - rq->cmd[2] = page_code; - rq->cmd[4] = len; - rq->cmd_len = COMMAND_SIZE(INQUIRY); - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = 0; - - err = blk_execute_rq(q, NULL, rq, 1); - if (err == -EIO) - err = SCSI_DH_IO; - - blk_put_request(rq); -done: - return err; -} - static int get_lun_info(struct scsi_device *sdev, struct rdac_dh_data *h, char *array_name, u8 *array_id) { - int err, i; - struct c8_inquiry *inqp; + int err = SCSI_DH_IO, i; + struct c8_inquiry *inqp = &h->inq.c8; - err = submit_inquiry(sdev, 0xC8, sizeof(struct c8_inquiry), h); - if (err == SCSI_DH_OK) { - inqp = &h->inq.c8; + if (!scsi_get_vpd_page(sdev, 0xC8, (unsigned char *)inqp, + sizeof(struct c8_inquiry))) { if (inqp->page_code != 0xc8) return SCSI_DH_NOSYS; if (inqp->page_id[0] != 'e' || inqp->page_id[1] != 'd' || @@ -453,20 +382,20 @@ static int get_lun_info(struct scsi_device *sdev, struct rdac_dh_data *h, *(array_name+ARRAY_LABEL_LEN-1) = '\0'; memset(array_id, 0, UNIQUE_ID_LEN); memcpy(array_id, inqp->array_unique_id, inqp->array_uniq_id_len); + err = SCSI_DH_OK; } return err; } static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h) { - int err, access_state; + int err = SCSI_DH_IO, access_state; struct rdac_dh_data *tmp; - struct c9_inquiry *inqp; + struct c9_inquiry *inqp = &h->inq.c9; h->state = RDAC_STATE_ACTIVE; - err = submit_inquiry(sdev, 0xC9, sizeof(struct c9_inquiry), h); - if (err == SCSI_DH_OK) { - inqp = &h->inq.c9; + if (!scsi_get_vpd_page(sdev, 0xC9, (unsigned char *)inqp, + sizeof(struct c9_inquiry))) { /* detect the operating mode */ if ((inqp->avte_cvp >> 5) & 0x1) h->mode = RDAC_MODE_IOSHIP; /* LUN in IOSHIP mode */ @@ -501,6 +430,7 @@ static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h) tmp->sdev->access_state = access_state; } rcu_read_unlock(); + err = SCSI_DH_OK; } return err; @@ -509,12 +439,11 @@ static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h) static int initialize_controller(struct scsi_device *sdev, struct rdac_dh_data *h, char *array_name, u8 *array_id) { - int err, index; - struct c4_inquiry *inqp; + int err = SCSI_DH_IO, index; + struct c4_inquiry *inqp = &h->inq.c4; - err = submit_inquiry(sdev, 0xC4, sizeof(struct c4_inquiry), h); - if (err == SCSI_DH_OK) { - inqp = &h->inq.c4; + if (!scsi_get_vpd_page(sdev, 0xC4, (unsigned char *)inqp, + sizeof(struct c4_inquiry))) { /* get the controller index */ if (inqp->slot_id[1] == 0x31) index = 0; @@ -530,18 +459,18 @@ static int initialize_controller(struct scsi_device *sdev, h->sdev = sdev; } spin_unlock(&list_lock); + err = SCSI_DH_OK; } return err; } static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h) { - int err; - struct c2_inquiry *inqp; + int err = SCSI_DH_IO; + struct c2_inquiry *inqp = &h->inq.c2; - err = submit_inquiry(sdev, 0xC2, sizeof(struct c2_inquiry), h); - if (err == SCSI_DH_OK) { - inqp = &h->inq.c2; + if (!scsi_get_vpd_page(sdev, 0xC2, (unsigned char *)inqp, + sizeof(struct c2_inquiry))) { /* * If more than MODE6_MAX_LUN luns are supported, use * mode select 10 @@ -550,36 +479,35 @@ static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h) h->ctlr->use_ms10 = 1; else h->ctlr->use_ms10 = 0; + err = SCSI_DH_OK; } return err; } static int mode_select_handle_sense(struct scsi_device *sdev, - unsigned char *sensebuf) + struct scsi_sense_hdr *sense_hdr) { - struct scsi_sense_hdr sense_hdr; - int err = SCSI_DH_IO, ret; + int err = SCSI_DH_IO; struct rdac_dh_data *h = sdev->handler_data; - ret = scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sense_hdr); - if (!ret) + if (!scsi_sense_valid(sense_hdr)) goto done; - switch (sense_hdr.sense_key) { + switch (sense_hdr->sense_key) { case NO_SENSE: case ABORTED_COMMAND: case UNIT_ATTENTION: err = SCSI_DH_RETRY; break; case NOT_READY: - if (sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x01) + if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01) /* LUN Not Ready and is in the Process of Becoming * Ready */ err = SCSI_DH_RETRY; break; case ILLEGAL_REQUEST: - if (sense_hdr.asc == 0x91 && sense_hdr.ascq == 0x36) + if (sense_hdr->asc == 0x91 && sense_hdr->ascq == 0x36) /* * Command Lock contention */ @@ -592,7 +520,7 @@ static int mode_select_handle_sense(struct scsi_device *sdev, RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, " "MODE_SELECT returned with sense %02x/%02x/%02x", (char *) h->ctlr->array_name, h->ctlr->index, - sense_hdr.sense_key, sense_hdr.asc, sense_hdr.ascq); + sense_hdr->sense_key, sense_hdr->asc, sense_hdr->ascq); done: return err; @@ -602,13 +530,16 @@ static void send_mode_select(struct work_struct *work) { struct rdac_controller *ctlr = container_of(work, struct rdac_controller, ms_work); - struct request *rq; struct scsi_device *sdev = ctlr->ms_sdev; struct rdac_dh_data *h = sdev->handler_data; - struct request_queue *q = sdev->request_queue; - int err, retry_cnt = RDAC_RETRY_COUNT; + int err = SCSI_DH_OK, retry_cnt = RDAC_RETRY_COUNT; struct rdac_queue_data *tmp, *qdata; LIST_HEAD(list); + unsigned char cdb[COMMAND_SIZE(MODE_SELECT_10)]; + struct scsi_sense_hdr sshdr; + unsigned int data_size; + u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + REQ_FAILFAST_DRIVER; spin_lock(&ctlr->ms_lock); list_splice_init(&ctlr->ms_head, &list); @@ -616,21 +547,19 @@ static void send_mode_select(struct work_struct *work) ctlr->ms_sdev = NULL; spin_unlock(&ctlr->ms_lock); -retry: - err = SCSI_DH_RES_TEMP_UNAVAIL; - rq = rdac_failover_get(sdev, h, &list); - if (!rq) - goto done; + retry: + data_size = rdac_failover_get(ctlr, &list, cdb); RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, " "%s MODE_SELECT command", (char *) h->ctlr->array_name, h->ctlr->index, (retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying"); - err = blk_execute_rq(q, NULL, rq, 1); - blk_put_request(rq); - if (err != SCSI_DH_OK) { - err = mode_select_handle_sense(sdev, h->sense); + if (scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE, + &h->ctlr->mode_select, data_size, &sshdr, + RDAC_TIMEOUT * HZ, + RDAC_RETRIES, NULL, req_flags, 0)) { + err = mode_select_handle_sense(sdev, &sshdr); if (err == SCSI_DH_RETRY && retry_cnt--) goto retry; if (err == SCSI_DH_IMM_RETRY) @@ -643,7 +572,6 @@ retry: (char *) h->ctlr->array_name, h->ctlr->index); } -done: list_for_each_entry_safe(qdata, tmp, &list, entry) { list_del(&qdata->entry); if (err == SCSI_DH_OK) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 258a3f9..831a1c8 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -213,6 +213,10 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, goto fail; } + error = scsi_init_sense_cache(shost); + if (error) + goto fail; + if (shost_use_blk_mq(shost)) { error = scsi_mq_setup_tags(shost); if (error) @@ -226,19 +230,6 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, } } - /* - * Note that we allocate the freelist even for the MQ case for now, - * as we need a command set aside for scsi_reset_provider. Having - * the full host freelist and one command available for that is a - * little heavy-handed, but avoids introducing a special allocator - * just for this. Eventually the structure of scsi_reset_provider - * will need a major overhaul. - */ - error = scsi_setup_command_freelist(shost); - if (error) - goto out_destroy_tags; - - if (!shost->shost_gendev.parent) shost->shost_gendev.parent = dev ? dev : &platform_bus; if (!dma_dev) @@ -258,7 +249,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, error = device_add(&shost->shost_gendev); if (error) - goto out_destroy_freelist; + goto out_disable_runtime_pm; scsi_host_set_state(shost, SHOST_RUNNING); get_device(shost->shost_gendev.parent); @@ -308,13 +299,11 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, device_del(&shost->shost_dev); out_del_gendev: device_del(&shost->shost_gendev); - out_destroy_freelist: + out_disable_runtime_pm: device_disable_async_suspend(&shost->shost_gendev); pm_runtime_disable(&shost->shost_gendev); pm_runtime_set_suspended(&shost->shost_gendev); pm_runtime_put_noidle(&shost->shost_gendev); - scsi_destroy_command_freelist(shost); - out_destroy_tags: if (shost_use_blk_mq(shost)) scsi_mq_destroy_tags(shost); fail: @@ -355,7 +344,6 @@ static void scsi_host_dev_release(struct device *dev) kfree(dev_name(&shost->shost_dev)); } - scsi_destroy_command_freelist(shost); if (shost_use_blk_mq(shost)) { if (shost->tag_set.tags) scsi_mq_destroy_tags(shost); diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index cbc0c5f..c611412 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5539,8 +5539,8 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) * Retries always go down the normal I/O path. */ if (likely(cmd->retries == 0 && - cmd->request->cmd_type == REQ_TYPE_FS && - h->acciopath_status)) { + !blk_rq_is_passthrough(cmd->request) && + h->acciopath_status)) { rc = hpsa_ioaccel_submit(h, c, cmd, scsi3addr); if (rc == 0) return 0; diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 919736a..aa76f36 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -2095,7 +2095,7 @@ int fc_lport_bsg_request(struct bsg_job *job) bsg_reply->reply_payload_rcv_len = 0; if (rsp) - rsp->resid_len = job->reply_payload.payload_len; + scsi_req(rsp)->resid_len = job->reply_payload.payload_len; mutex_lock(&lport->lp_mutex); diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 022bb6e..570b2cb 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2174,12 +2174,12 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, bio_data(rsp->bio), blk_rq_bytes(rsp)); if (ret > 0) { /* positive number is the untransferred residual */ - rsp->resid_len = ret; - req->resid_len = 0; + scsi_req(rsp)->resid_len = ret; + scsi_req(req)->resid_len = 0; ret = 0; } else if (ret == 0) { - rsp->resid_len = 0; - req->resid_len = 0; + scsi_req(rsp)->resid_len = 0; + scsi_req(req)->resid_len = 0; } return ret; diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c index d247925..45cbbc4 100644 --- a/drivers/scsi/libsas/sas_host_smp.c +++ b/drivers/scsi/libsas/sas_host_smp.c @@ -274,15 +274,15 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, switch (req_data[1]) { case SMP_REPORT_GENERAL: - req->resid_len -= 8; - rsp->resid_len -= 32; + scsi_req(req)->resid_len -= 8; + scsi_req(rsp)->resid_len -= 32; resp_data[2] = SMP_RESP_FUNC_ACC; resp_data[9] = sas_ha->num_phys; break; case SMP_REPORT_MANUF_INFO: - req->resid_len -= 8; - rsp->resid_len -= 64; + scsi_req(req)->resid_len -= 8; + scsi_req(rsp)->resid_len -= 64; resp_data[2] = SMP_RESP_FUNC_ACC; memcpy(resp_data + 12, shost->hostt->name, SAS_EXPANDER_VENDOR_ID_LEN); @@ -295,13 +295,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, break; case SMP_DISCOVER: - req->resid_len -= 16; - if ((int)req->resid_len < 0) { - req->resid_len = 0; + scsi_req(req)->resid_len -= 16; + if ((int)scsi_req(req)->resid_len < 0) { + scsi_req(req)->resid_len = 0; error = -EINVAL; goto out; } - rsp->resid_len -= 56; + scsi_req(rsp)->resid_len -= 56; sas_host_smp_discover(sas_ha, resp_data, req_data[9]); break; @@ -311,13 +311,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, break; case SMP_REPORT_PHY_SATA: - req->resid_len -= 16; - if ((int)req->resid_len < 0) { - req->resid_len = 0; + scsi_req(req)->resid_len -= 16; + if ((int)scsi_req(req)->resid_len < 0) { + scsi_req(req)->resid_len = 0; error = -EINVAL; goto out; } - rsp->resid_len -= 60; + scsi_req(rsp)->resid_len -= 60; sas_report_phy_sata(sas_ha, resp_data, req_data[9]); break; @@ -331,15 +331,15 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, int to_write = req_data[4]; if (blk_rq_bytes(req) < base_frame_size + to_write * 4 || - req->resid_len < base_frame_size + to_write * 4) { + scsi_req(req)->resid_len < base_frame_size + to_write * 4) { resp_data[2] = SMP_RESP_INV_FRM_LEN; break; } to_write = sas_host_smp_write_gpio(sas_ha, resp_data, req_data[2], req_data[3], to_write, &req_data[8]); - req->resid_len -= base_frame_size + to_write * 4; - rsp->resid_len -= 8; + scsi_req(req)->resid_len -= base_frame_size + to_write * 4; + scsi_req(rsp)->resid_len -= 8; break; } @@ -348,13 +348,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req, break; case SMP_PHY_CONTROL: - req->resid_len -= 44; - if ((int)req->resid_len < 0) { - req->resid_len = 0; + scsi_req(req)->resid_len -= 44; + if ((int)scsi_req(req)->resid_len < 0) { + scsi_req(req)->resid_len = 0; error = -EINVAL; goto out; } - rsp->resid_len -= 8; + scsi_req(rsp)->resid_len -= 8; sas_phy_control(sas_ha, req_data[9], req_data[10], req_data[32] >> 4, req_data[33] >> 4, resp_data); diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 0b5b423..c6d5505 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4723,7 +4723,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) * then scsi-ml does not need to handle this misbehavior. */ sector_sz = scmd->device->sector_size; - if (unlikely(scmd->request->cmd_type == REQ_TYPE_FS && sector_sz && + if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz && xfer_cnt % sector_sz)) { sdev_printk(KERN_INFO, scmd->device, "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n", diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index 7f1d578..e7a7a70 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -2057,10 +2057,10 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, ioc->name, __func__, le16_to_cpu(mpi_reply->ResponseDataLength))); - memcpy(req->sense, mpi_reply, sizeof(*mpi_reply)); - req->sense_len = sizeof(*mpi_reply); - req->resid_len = 0; - rsp->resid_len -= + memcpy(scsi_req(req)->sense, mpi_reply, sizeof(*mpi_reply)); + scsi_req(req)->sense_len = sizeof(*mpi_reply); + scsi_req(req)->resid_len = 0; + scsi_req(rsp)->resid_len -= le16_to_cpu(mpi_reply->ResponseDataLength); /* check if the resp needs to be copied from the allocated diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index ef99f62..30b9050 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -48,6 +48,7 @@ #include <scsi/osd_sense.h> #include <scsi/scsi_device.h> +#include <scsi/scsi_request.h> #include "osd_debug.h" @@ -477,11 +478,13 @@ static void _set_error_resid(struct osd_request *or, struct request *req, { or->async_error = error; or->req_errors = req->errors ? : error; - or->sense_len = req->sense_len; + or->sense_len = scsi_req(req)->sense_len; + if (or->sense_len) + memcpy(or->sense, scsi_req(req)->sense, or->sense_len); if (or->out.req) - or->out.residual = or->out.req->resid_len; + or->out.residual = scsi_req(or->out.req)->resid_len; if (or->in.req) - or->in.residual = or->in.req->resid_len; + or->in.residual = scsi_req(or->in.req)->resid_len; } int osd_execute_request(struct osd_request *or) @@ -1562,10 +1565,11 @@ static struct request *_make_request(struct request_queue *q, bool has_write, struct bio *bio = oii->bio; int ret; - req = blk_get_request(q, has_write ? WRITE : READ, flags); + req = blk_get_request(q, has_write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + flags); if (IS_ERR(req)) return req; - blk_rq_set_block_pc(req); + scsi_req_init(req); for_each_bio(bio) { struct bio *bounce_bio = bio; @@ -1599,8 +1603,6 @@ static int _init_blk_request(struct osd_request *or, req->timeout = or->timeout; req->retries = or->retries; - req->sense = or->sense; - req->sense_len = 0; if (has_out) { or->out.req = req; @@ -1612,7 +1614,7 @@ static int _init_blk_request(struct osd_request *or, ret = PTR_ERR(req); goto out; } - blk_rq_set_block_pc(req); + scsi_req_init(req); or->in.req = or->request->next_rq = req; } } else if (has_in) @@ -1699,8 +1701,8 @@ int osd_finalize_request(struct osd_request *or, osd_sec_sign_cdb(&or->cdb, cap_key); - or->request->cmd = or->cdb.buff; - or->request->cmd_len = _osd_req_cdb_len(or); + scsi_req(or->request)->cmd = or->cdb.buff; + scsi_req(or->request)->cmd_len = _osd_req_cdb_len(or); return 0; } diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index e8196c5..451de6c 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -322,6 +322,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt) /* Wakeup from interrupt */ static void osst_end_async(struct request *req, int update) { + struct scsi_request *rq = scsi_req(req); struct osst_request *SRpnt = req->end_io_data; struct osst_tape *STp = SRpnt->stp; struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data; @@ -330,6 +331,8 @@ static void osst_end_async(struct request *req, int update) #if DEBUG STp->write_pending = 0; #endif + if (rq->sense_len) + memcpy(SRpnt->sense, rq->sense, SCSI_SENSE_BUFFERSIZE); if (SRpnt->waiting) complete(SRpnt->waiting); @@ -357,17 +360,20 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd, int use_sg, int timeout, int retries) { struct request *req; + struct scsi_request *rq; struct page **pages = NULL; struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data; int err = 0; int write = (data_direction == DMA_TO_DEVICE); - req = blk_get_request(SRpnt->stp->device->request_queue, write, GFP_KERNEL); + req = blk_get_request(SRpnt->stp->device->request_queue, + write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(req)) return DRIVER_ERROR << 24; - blk_rq_set_block_pc(req); + rq = scsi_req(req); + scsi_req_init(req); req->rq_flags |= RQF_QUIET; SRpnt->bio = NULL; @@ -404,11 +410,9 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd, goto free_req; } - req->cmd_len = cmd_len; - memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ - memcpy(req->cmd, cmd, req->cmd_len); - req->sense = SRpnt->sense; - req->sense_len = 0; + rq->cmd_len = cmd_len; + memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ + memcpy(rq->cmd, cmd, rq->cmd_len); req->timeout = timeout; req->retries = retries; req->end_io_data = SRpnt; diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 1bf8061..40ca75b 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -921,7 +921,7 @@ qla2x00_process_loopback(struct bsg_job *bsg_job) bsg_job->reply_len = sizeof(struct fc_bsg_reply) + sizeof(response) + sizeof(uint8_t); - fw_sts_ptr = ((uint8_t *)bsg_job->req->sense) + + fw_sts_ptr = ((uint8_t *)scsi_req(bsg_job->req)->sense) + sizeof(struct fc_bsg_reply); memcpy(fw_sts_ptr, response, sizeof(response)); fw_sts_ptr += sizeof(response); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index a94b0b6..9281bf4 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1468,7 +1468,8 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req, type, sp->handle, comp_status, fw_status[1], fw_status[2], le16_to_cpu(((struct els_sts_entry_24xx *) pkt)->total_byte_count)); - fw_sts_ptr = ((uint8_t*)bsg_job->req->sense) + sizeof(struct fc_bsg_reply); + fw_sts_ptr = ((uint8_t*)scsi_req(bsg_job->req)->sense) + + sizeof(struct fc_bsg_reply); memcpy( fw_sts_ptr, fw_status, sizeof(fw_status)); } else { @@ -1482,7 +1483,8 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req, pkt)->error_subcode_2)); res = DID_ERROR << 16; bsg_reply->reply_payload_rcv_len = 0; - fw_sts_ptr = ((uint8_t*)bsg_job->req->sense) + sizeof(struct fc_bsg_reply); + fw_sts_ptr = ((uint8_t*)scsi_req(bsg_job->req)->sense) + + sizeof(struct fc_bsg_reply); memcpy( fw_sts_ptr, fw_status, sizeof(fw_status)); } ql_dump_buffer(ql_dbg_user + ql_dbg_buffer, vha, 0x5056, diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index 02f1de1..96c33e29 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -2244,7 +2244,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req, memcpy(fstatus.reserved_3, pkt->reserved_2, 20 * sizeof(uint8_t)); - fw_sts_ptr = ((uint8_t *)bsg_job->req->sense) + + fw_sts_ptr = ((uint8_t *)scsi_req(bsg_job->req)->sense) + sizeof(struct fc_bsg_reply); memcpy(fw_sts_ptr, (uint8_t *)&fstatus, diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 75455d4..7bfbcfa 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -98,176 +98,6 @@ EXPORT_SYMBOL(scsi_sd_probe_domain); ASYNC_DOMAIN_EXCLUSIVE(scsi_sd_pm_domain); EXPORT_SYMBOL(scsi_sd_pm_domain); -struct scsi_host_cmd_pool { - struct kmem_cache *cmd_slab; - struct kmem_cache *sense_slab; - unsigned int users; - char *cmd_name; - char *sense_name; - unsigned int slab_flags; - gfp_t gfp_mask; -}; - -static struct scsi_host_cmd_pool scsi_cmd_pool = { - .cmd_name = "scsi_cmd_cache", - .sense_name = "scsi_sense_cache", - .slab_flags = SLAB_HWCACHE_ALIGN, -}; - -static struct scsi_host_cmd_pool scsi_cmd_dma_pool = { - .cmd_name = "scsi_cmd_cache(DMA)", - .sense_name = "scsi_sense_cache(DMA)", - .slab_flags = SLAB_HWCACHE_ALIGN|SLAB_CACHE_DMA, - .gfp_mask = __GFP_DMA, -}; - -static DEFINE_MUTEX(host_cmd_pool_mutex); - -/** - * scsi_host_free_command - internal function to release a command - * @shost: host to free the command for - * @cmd: command to release - * - * the command must previously have been allocated by - * scsi_host_alloc_command. - */ -static void -scsi_host_free_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd) -{ - struct scsi_host_cmd_pool *pool = shost->cmd_pool; - - if (cmd->prot_sdb) - kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb); - kmem_cache_free(pool->sense_slab, cmd->sense_buffer); - kmem_cache_free(pool->cmd_slab, cmd); -} - -/** - * scsi_host_alloc_command - internal function to allocate command - * @shost: SCSI host whose pool to allocate from - * @gfp_mask: mask for the allocation - * - * Returns a fully allocated command with sense buffer and protection - * data buffer (where applicable) or NULL on failure - */ -static struct scsi_cmnd * -scsi_host_alloc_command(struct Scsi_Host *shost, gfp_t gfp_mask) -{ - struct scsi_host_cmd_pool *pool = shost->cmd_pool; - struct scsi_cmnd *cmd; - - cmd = kmem_cache_zalloc(pool->cmd_slab, gfp_mask | pool->gfp_mask); - if (!cmd) - goto fail; - - cmd->sense_buffer = kmem_cache_alloc(pool->sense_slab, - gfp_mask | pool->gfp_mask); - if (!cmd->sense_buffer) - goto fail_free_cmd; - - if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) { - cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp_mask); - if (!cmd->prot_sdb) - goto fail_free_sense; - } - - return cmd; - -fail_free_sense: - kmem_cache_free(pool->sense_slab, cmd->sense_buffer); -fail_free_cmd: - kmem_cache_free(pool->cmd_slab, cmd); -fail: - return NULL; -} - -/** - * __scsi_get_command - Allocate a struct scsi_cmnd - * @shost: host to transmit command - * @gfp_mask: allocation mask - * - * Description: allocate a struct scsi_cmd from host's slab, recycling from the - * host's free_list if necessary. - */ -static struct scsi_cmnd * -__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask) -{ - struct scsi_cmnd *cmd = scsi_host_alloc_command(shost, gfp_mask); - - if (unlikely(!cmd)) { - unsigned long flags; - - spin_lock_irqsave(&shost->free_list_lock, flags); - if (likely(!list_empty(&shost->free_list))) { - cmd = list_entry(shost->free_list.next, - struct scsi_cmnd, list); - list_del_init(&cmd->list); - } - spin_unlock_irqrestore(&shost->free_list_lock, flags); - - if (cmd) { - void *buf, *prot; - - buf = cmd->sense_buffer; - prot = cmd->prot_sdb; - - memset(cmd, 0, sizeof(*cmd)); - - cmd->sense_buffer = buf; - cmd->prot_sdb = prot; - } - } - - return cmd; -} - -/** - * scsi_get_command - Allocate and setup a scsi command block - * @dev: parent scsi device - * @gfp_mask: allocator flags - * - * Returns: The allocated scsi command structure. - */ -struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask) -{ - struct scsi_cmnd *cmd = __scsi_get_command(dev->host, gfp_mask); - unsigned long flags; - - if (unlikely(cmd == NULL)) - return NULL; - - cmd->device = dev; - INIT_LIST_HEAD(&cmd->list); - INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler); - spin_lock_irqsave(&dev->list_lock, flags); - list_add_tail(&cmd->list, &dev->cmd_list); - spin_unlock_irqrestore(&dev->list_lock, flags); - cmd->jiffies_at_alloc = jiffies; - return cmd; -} - -/** - * __scsi_put_command - Free a struct scsi_cmnd - * @shost: dev->host - * @cmd: Command to free - */ -static void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd) -{ - unsigned long flags; - - if (unlikely(list_empty(&shost->free_list))) { - spin_lock_irqsave(&shost->free_list_lock, flags); - if (list_empty(&shost->free_list)) { - list_add(&cmd->list, &shost->free_list); - cmd = NULL; - } - spin_unlock_irqrestore(&shost->free_list_lock, flags); - } - - if (likely(cmd != NULL)) - scsi_host_free_command(shost, cmd); -} - /** * scsi_put_command - Free a scsi command block * @cmd: command block to free @@ -287,188 +117,6 @@ void scsi_put_command(struct scsi_cmnd *cmd) spin_unlock_irqrestore(&cmd->device->list_lock, flags); BUG_ON(delayed_work_pending(&cmd->abort_work)); - - __scsi_put_command(cmd->device->host, cmd); -} - -static struct scsi_host_cmd_pool * -scsi_find_host_cmd_pool(struct Scsi_Host *shost) -{ - if (shost->hostt->cmd_size) - return shost->hostt->cmd_pool; - if (shost->unchecked_isa_dma) - return &scsi_cmd_dma_pool; - return &scsi_cmd_pool; -} - -static void -scsi_free_host_cmd_pool(struct scsi_host_cmd_pool *pool) -{ - kfree(pool->sense_name); - kfree(pool->cmd_name); - kfree(pool); -} - -static struct scsi_host_cmd_pool * -scsi_alloc_host_cmd_pool(struct Scsi_Host *shost) -{ - struct scsi_host_template *hostt = shost->hostt; - struct scsi_host_cmd_pool *pool; - - pool = kzalloc(sizeof(*pool), GFP_KERNEL); - if (!pool) - return NULL; - - pool->cmd_name = kasprintf(GFP_KERNEL, "%s_cmd", hostt->proc_name); - pool->sense_name = kasprintf(GFP_KERNEL, "%s_sense", hostt->proc_name); - if (!pool->cmd_name || !pool->sense_name) { - scsi_free_host_cmd_pool(pool); - return NULL; - } - - pool->slab_flags = SLAB_HWCACHE_ALIGN; - if (shost->unchecked_isa_dma) { - pool->slab_flags |= SLAB_CACHE_DMA; - pool->gfp_mask = __GFP_DMA; - } - - if (hostt->cmd_size) - hostt->cmd_pool = pool; - - return pool; -} - -static struct scsi_host_cmd_pool * -scsi_get_host_cmd_pool(struct Scsi_Host *shost) -{ - struct scsi_host_template *hostt = shost->hostt; - struct scsi_host_cmd_pool *retval = NULL, *pool; - size_t cmd_size = sizeof(struct scsi_cmnd) + hostt->cmd_size; - - /* - * Select a command slab for this host and create it if not - * yet existent. - */ - mutex_lock(&host_cmd_pool_mutex); - pool = scsi_find_host_cmd_pool(shost); - if (!pool) { - pool = scsi_alloc_host_cmd_pool(shost); - if (!pool) - goto out; - } - - if (!pool->users) { - pool->cmd_slab = kmem_cache_create(pool->cmd_name, cmd_size, 0, - pool->slab_flags, NULL); - if (!pool->cmd_slab) - goto out_free_pool; - - pool->sense_slab = kmem_cache_create(pool->sense_name, - SCSI_SENSE_BUFFERSIZE, 0, - pool->slab_flags, NULL); - if (!pool->sense_slab) - goto out_free_slab; - } - - pool->users++; - retval = pool; -out: - mutex_unlock(&host_cmd_pool_mutex); - return retval; - -out_free_slab: - kmem_cache_destroy(pool->cmd_slab); -out_free_pool: - if (hostt->cmd_size) { - scsi_free_host_cmd_pool(pool); - hostt->cmd_pool = NULL; - } - goto out; -} - -static void scsi_put_host_cmd_pool(struct Scsi_Host *shost) -{ - struct scsi_host_template *hostt = shost->hostt; - struct scsi_host_cmd_pool *pool; - - mutex_lock(&host_cmd_pool_mutex); - pool = scsi_find_host_cmd_pool(shost); - - /* - * This may happen if a driver has a mismatched get and put - * of the command pool; the driver should be implicated in - * the stack trace - */ - BUG_ON(pool->users == 0); - - if (!--pool->users) { - kmem_cache_destroy(pool->cmd_slab); - kmem_cache_destroy(pool->sense_slab); - if (hostt->cmd_size) { - scsi_free_host_cmd_pool(pool); - hostt->cmd_pool = NULL; - } - } - mutex_unlock(&host_cmd_pool_mutex); -} - -/** - * scsi_setup_command_freelist - Setup the command freelist for a scsi host. - * @shost: host to allocate the freelist for. - * - * Description: The command freelist protects against system-wide out of memory - * deadlock by preallocating one SCSI command structure for each host, so the - * system can always write to a swap file on a device associated with that host. - * - * Returns: Nothing. - */ -int scsi_setup_command_freelist(struct Scsi_Host *shost) -{ - const gfp_t gfp_mask = shost->unchecked_isa_dma ? GFP_DMA : GFP_KERNEL; - struct scsi_cmnd *cmd; - - spin_lock_init(&shost->free_list_lock); - INIT_LIST_HEAD(&shost->free_list); - - shost->cmd_pool = scsi_get_host_cmd_pool(shost); - if (!shost->cmd_pool) - return -ENOMEM; - - /* - * Get one backup command for this host. - */ - cmd = scsi_host_alloc_command(shost, gfp_mask); - if (!cmd) { - scsi_put_host_cmd_pool(shost); - shost->cmd_pool = NULL; - return -ENOMEM; - } - list_add(&cmd->list, &shost->free_list); - return 0; -} - -/** - * scsi_destroy_command_freelist - Release the command freelist for a scsi host. - * @shost: host whose freelist is going to be destroyed - */ -void scsi_destroy_command_freelist(struct Scsi_Host *shost) -{ - /* - * If cmd_pool is NULL the free list was not initialized, so - * do not attempt to release resources. - */ - if (!shost->cmd_pool) - return; - - while (!list_empty(&shost->free_list)) { - struct scsi_cmnd *cmd; - - cmd = list_entry(shost->free_list.next, struct scsi_cmnd, list); - list_del_init(&cmd->list); - scsi_host_free_command(shost, cmd); - } - shost->cmd_pool = NULL; - scsi_put_host_cmd_pool(shost); } #ifdef CONFIG_SCSI_LOGGING @@ -590,7 +238,7 @@ void scsi_finish_command(struct scsi_cmnd *cmd) "(result %x)\n", cmd->result)); good_bytes = scsi_bufflen(cmd); - if (cmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { + if (!blk_rq_is_passthrough(cmd->request)) { int old_good_bytes = good_bytes; drv = scsi_cmd_to_driver(cmd); if (drv->done) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 996e134..9e82fa5 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1106,7 +1106,7 @@ static int scsi_request_sense(struct scsi_cmnd *scmd) static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn) { - if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { + if (!blk_rq_is_passthrough(scmd->request)) { struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); if (sdrv->eh_action) rtn = sdrv->eh_action(scmd, rtn); @@ -1746,7 +1746,7 @@ check_type: * the check condition was retryable. */ if (scmd->request->cmd_flags & REQ_FAILFAST_DEV || - scmd->request->cmd_type == REQ_TYPE_BLOCK_PC) + blk_rq_is_passthrough(scmd->request)) return 1; else return 0; @@ -1968,25 +1968,25 @@ static void eh_lock_door_done(struct request *req, int uptodate) static void scsi_eh_lock_door(struct scsi_device *sdev) { struct request *req; + struct scsi_request *rq; /* * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a * request becomes available */ - req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); + req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(req)) return; + rq = scsi_req(req); + scsi_req_init(req); - blk_rq_set_block_pc(req); - - req->cmd[0] = ALLOW_MEDIUM_REMOVAL; - req->cmd[1] = 0; - req->cmd[2] = 0; - req->cmd[3] = 0; - req->cmd[4] = SCSI_REMOVAL_PREVENT; - req->cmd[5] = 0; - - req->cmd_len = COMMAND_SIZE(req->cmd[0]); + rq->cmd[0] = ALLOW_MEDIUM_REMOVAL; + rq->cmd[1] = 0; + rq->cmd[2] = 0; + rq->cmd[3] = 0; + rq->cmd[4] = SCSI_REMOVAL_PREVENT; + rq->cmd[5] = 0; + rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); req->rq_flags |= RQF_QUIET; req->timeout = 10 * HZ; @@ -2331,7 +2331,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) { struct scsi_cmnd *scmd; struct Scsi_Host *shost = dev->host; - struct request req; + struct request *rq; unsigned long flags; int error = 0, rtn, val; @@ -2346,14 +2346,16 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) return -EIO; error = -EIO; - scmd = scsi_get_command(dev, GFP_KERNEL); - if (!scmd) + rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) + + shost->hostt->cmd_size, GFP_KERNEL); + if (!rq) goto out_put_autopm_host; + blk_rq_init(NULL, rq); - blk_rq_init(NULL, &req); - scmd->request = &req; - - scmd->cmnd = req.cmd; + scmd = (struct scsi_cmnd *)(rq + 1); + scsi_init_command(dev, scmd); + scmd->request = rq; + scmd->cmnd = scsi_req(rq)->cmd; scmd->scsi_done = scsi_reset_provider_done_command; memset(&scmd->sdb, 0, sizeof(scmd->sdb)); @@ -2413,6 +2415,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) scsi_run_host_queues(shost); scsi_put_command(scmd); + kfree(rq); out_put_autopm_host: scsi_autopm_put_host(shost); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 78db07f..912fbc3 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -37,8 +37,59 @@ #include "scsi_priv.h" #include "scsi_logging.h" +static struct kmem_cache *scsi_sdb_cache; +static struct kmem_cache *scsi_sense_cache; +static struct kmem_cache *scsi_sense_isadma_cache; +static DEFINE_MUTEX(scsi_sense_cache_mutex); -struct kmem_cache *scsi_sdb_cache; +static inline struct kmem_cache * +scsi_select_sense_cache(struct Scsi_Host *shost) +{ + return shost->unchecked_isa_dma ? + scsi_sense_isadma_cache : scsi_sense_cache; +} + +static void scsi_free_sense_buffer(struct Scsi_Host *shost, + unsigned char *sense_buffer) +{ + kmem_cache_free(scsi_select_sense_cache(shost), sense_buffer); +} + +static unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost, + gfp_t gfp_mask, int numa_node) +{ + return kmem_cache_alloc_node(scsi_select_sense_cache(shost), gfp_mask, + numa_node); +} + +int scsi_init_sense_cache(struct Scsi_Host *shost) +{ + struct kmem_cache *cache; + int ret = 0; + + cache = scsi_select_sense_cache(shost); + if (cache) + return 0; + + mutex_lock(&scsi_sense_cache_mutex); + if (shost->unchecked_isa_dma) { + scsi_sense_isadma_cache = + kmem_cache_create("scsi_sense_cache(DMA)", + SCSI_SENSE_BUFFERSIZE, 0, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL); + if (!scsi_sense_isadma_cache) + ret = -ENOMEM; + } else { + scsi_sense_cache = + kmem_cache_create("scsi_sense_cache", + SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL); + if (!scsi_sense_cache) + ret = -ENOMEM; + } + + mutex_unlock(&scsi_sense_cache_mutex); + return ret; +} /* * When to reinvoke queueing after a resource shortage. It's 3 msecs to @@ -168,22 +219,23 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req_flags_t rq_flags, int *resid) { struct request *req; - int write = (data_direction == DMA_TO_DEVICE); + struct scsi_request *rq; int ret = DRIVER_ERROR << 24; - req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM); + req = blk_get_request(sdev->request_queue, + data_direction == DMA_TO_DEVICE ? + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM); if (IS_ERR(req)) return ret; - blk_rq_set_block_pc(req); + rq = scsi_req(req); + scsi_req_init(req); if (bufflen && blk_rq_map_kern(sdev->request_queue, req, buffer, bufflen, __GFP_RECLAIM)) goto out; - req->cmd_len = COMMAND_SIZE(cmd[0]); - memcpy(req->cmd, cmd, req->cmd_len); - req->sense = sense; - req->sense_len = 0; + rq->cmd_len = COMMAND_SIZE(cmd[0]); + memcpy(rq->cmd, cmd, rq->cmd_len); req->retries = retries; req->timeout = timeout; req->cmd_flags |= flags; @@ -200,11 +252,13 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, * is invalid. Prevent the garbage from being misinterpreted * and prevent security leaks by zeroing out the excess data. */ - if (unlikely(req->resid_len > 0 && req->resid_len <= bufflen)) - memset(buffer + (bufflen - req->resid_len), 0, req->resid_len); + if (unlikely(rq->resid_len > 0 && rq->resid_len <= bufflen)) + memset(buffer + (bufflen - rq->resid_len), 0, rq->resid_len); if (resid) - *resid = req->resid_len; + *resid = rq->resid_len; + if (sense && rq->sense_len) + memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE); ret = req->errors; out: blk_put_request(req); @@ -529,7 +583,7 @@ void scsi_run_host_queues(struct Scsi_Host *shost) static void scsi_uninit_cmd(struct scsi_cmnd *cmd) { - if (cmd->request->cmd_type == REQ_TYPE_FS) { + if (!blk_rq_is_passthrough(cmd->request)) { struct scsi_driver *drv = scsi_cmd_to_driver(cmd); if (drv->uninit_command) @@ -645,14 +699,13 @@ static bool scsi_end_request(struct request *req, int error, if (bidi_bytes) scsi_release_bidi_buffers(cmd); + scsi_release_buffers(cmd); + scsi_put_command(cmd); spin_lock_irqsave(q->queue_lock, flags); blk_finish_request(req, error); spin_unlock_irqrestore(q->queue_lock, flags); - scsi_release_buffers(cmd); - - scsi_put_command(cmd); scsi_run_queue(q); } @@ -754,18 +807,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) sense_deferred = scsi_sense_is_deferred(&sshdr); } - if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */ + if (blk_rq_is_passthrough(req)) { if (result) { - if (sense_valid && req->sense) { + if (sense_valid) { /* * SG_IO wants current and deferred errors */ - int len = 8 + cmd->sense_buffer[7]; - - if (len > SCSI_SENSE_BUFFERSIZE) - len = SCSI_SENSE_BUFFERSIZE; - memcpy(req->sense, cmd->sense_buffer, len); - req->sense_len = len; + scsi_req(req)->sense_len = + min(8 + cmd->sense_buffer[7], + SCSI_SENSE_BUFFERSIZE); } if (!sense_deferred) error = __scsi_error_from_host_byte(cmd, result); @@ -775,14 +825,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) */ req->errors = cmd->result; - req->resid_len = scsi_get_resid(cmd); + scsi_req(req)->resid_len = scsi_get_resid(cmd); if (scsi_bidi_cmnd(cmd)) { /* * Bidi commands Must be complete as a whole, * both sides at once. */ - req->next_rq->resid_len = scsi_in(cmd)->resid; + scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid; if (scsi_end_request(req, 0, blk_rq_bytes(req), blk_rq_bytes(req->next_rq))) BUG(); @@ -790,15 +840,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) } } else if (blk_rq_bytes(req) == 0 && result && !sense_deferred) { /* - * Certain non BLOCK_PC requests are commands that don't - * actually transfer anything (FLUSH), so cannot use + * Flush commands do not transfers any data, and thus cannot use * good_bytes != blk_rq_bytes(req) as the signal for an error. * This sets the error explicitly for the problem case. */ error = __scsi_error_from_host_byte(cmd, result); } - /* no bidi support for !REQ_TYPE_BLOCK_PC yet */ + /* no bidi support for !blk_rq_is_passthrough yet */ BUG_ON(blk_bidi_rq(req)); /* @@ -810,8 +859,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) blk_rq_sectors(req), good_bytes)); /* - * Recovered errors need reporting, but they're always treated - * as success, so fiddle the result code here. For BLOCK_PC + * Recovered errors need reporting, but they're always treated as + * success, so fiddle the result code here. For passthrough requests * we already took a copy of the original into rq->errors which * is what gets returned to the user */ @@ -825,7 +874,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) else if (!(req->rq_flags & RQF_QUIET)) scsi_print_sense(cmd); result = 0; - /* BLOCK_PC may have set error */ + /* for passthrough error may be set */ error = 0; } @@ -1110,42 +1159,33 @@ err_exit: } EXPORT_SYMBOL(scsi_init_io); -static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev, - struct request *req) +void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) { - struct scsi_cmnd *cmd; - - if (!req->special) { - /* Bail if we can't get a reference to the device */ - if (!get_device(&sdev->sdev_gendev)) - return NULL; - - cmd = scsi_get_command(sdev, GFP_ATOMIC); - if (unlikely(!cmd)) { - put_device(&sdev->sdev_gendev); - return NULL; - } - req->special = cmd; - } else { - cmd = req->special; - } + void *buf = cmd->sense_buffer; + void *prot = cmd->prot_sdb; + unsigned long flags; - /* pull a tag out of the request if we have one */ - cmd->tag = req->tag; - cmd->request = req; + /* zero out the cmd, except for the embedded scsi_request */ + memset((char *)cmd + sizeof(cmd->req), 0, + sizeof(*cmd) - sizeof(cmd->req)); - cmd->cmnd = req->cmd; - cmd->prot_op = SCSI_PROT_NORMAL; + cmd->device = dev; + cmd->sense_buffer = buf; + cmd->prot_sdb = prot; + INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler); + cmd->jiffies_at_alloc = jiffies; - return cmd; + spin_lock_irqsave(&dev->list_lock, flags); + list_add_tail(&cmd->list, &dev->cmd_list); + spin_unlock_irqrestore(&dev->list_lock, flags); } -static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) +static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) { struct scsi_cmnd *cmd = req->special; /* - * BLOCK_PC requests may transfer data, in which case they must + * Passthrough requests may transfer data, in which case they must * a bio attached to them. Or they might contain a SCSI command * that does not transfer data, in which case they may optionally * submit a request without an attached bio. @@ -1160,14 +1200,15 @@ static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) memset(&cmd->sdb, 0, sizeof(cmd->sdb)); } - cmd->cmd_len = req->cmd_len; + cmd->cmd_len = scsi_req(req)->cmd_len; + cmd->cmnd = scsi_req(req)->cmd; cmd->transfersize = blk_rq_bytes(req); cmd->allowed = req->retries; return BLKPREP_OK; } /* - * Setup a REQ_TYPE_FS command. These are simple request from filesystems + * Setup a normal block command. These are simple request from filesystems * that still need to be translated to SCSI CDBs from the ULD. */ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) @@ -1180,6 +1221,7 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) return ret; } + cmd->cmnd = scsi_req(req)->cmd = scsi_req(req)->__cmd; memset(cmd->cmnd, 0, BLK_MAX_CDB); return scsi_cmd_to_driver(cmd)->init_command(cmd); } @@ -1195,14 +1237,10 @@ static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req) else cmd->sc_data_direction = DMA_FROM_DEVICE; - switch (req->cmd_type) { - case REQ_TYPE_FS: + if (blk_rq_is_scsi(req)) + return scsi_setup_scsi_cmnd(sdev, req); + else return scsi_setup_fs_cmnd(sdev, req); - case REQ_TYPE_BLOCK_PC: - return scsi_setup_blk_pc_cmnd(sdev, req); - default: - return BLKPREP_KILL; - } } static int @@ -1298,19 +1336,28 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret) static int scsi_prep_fn(struct request_queue *q, struct request *req) { struct scsi_device *sdev = q->queuedata; - struct scsi_cmnd *cmd; + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); int ret; ret = scsi_prep_state_check(sdev, req); if (ret != BLKPREP_OK) goto out; - cmd = scsi_get_cmd_from_req(sdev, req); - if (unlikely(!cmd)) { - ret = BLKPREP_DEFER; - goto out; + if (!req->special) { + /* Bail if we can't get a reference to the device */ + if (unlikely(!get_device(&sdev->sdev_gendev))) { + ret = BLKPREP_DEFER; + goto out; + } + + scsi_init_command(sdev, cmd); + req->special = cmd; } + cmd->tag = req->tag; + cmd->request = req; + cmd->prot_op = SCSI_PROT_NORMAL; + ret = scsi_setup_cmnd(sdev, req); out: return scsi_prep_return(q, req, ret); @@ -1827,7 +1874,9 @@ static int scsi_mq_prep_fn(struct request *req) unsigned char *sense_buf = cmd->sense_buffer; struct scatterlist *sg; - memset(cmd, 0, sizeof(struct scsi_cmnd)); + /* zero out the cmd, except for the embedded scsi_request */ + memset((char *)cmd + sizeof(cmd->req), 0, + sizeof(*cmd) - sizeof(cmd->req)); req->special = cmd; @@ -1837,7 +1886,6 @@ static int scsi_mq_prep_fn(struct request *req) cmd->tag = req->tag; - cmd->cmnd = req->cmd; cmd->prot_op = SCSI_PROT_NORMAL; INIT_LIST_HEAD(&cmd->list); @@ -1912,7 +1960,6 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (!scsi_host_queue_ready(q, shost, sdev)) goto out_dec_target_busy; - if (!(req->rq_flags & RQF_DONTPREP)) { ret = prep_to_mq(scsi_mq_prep_fn(req)); if (ret != BLK_MQ_RQ_QUEUE_OK) @@ -1982,21 +2029,24 @@ static int scsi_init_request(void *data, struct request *rq, unsigned int hctx_idx, unsigned int request_idx, unsigned int numa_node) { + struct Scsi_Host *shost = data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); - cmd->sense_buffer = kzalloc_node(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL, - numa_node); + cmd->sense_buffer = + scsi_alloc_sense_buffer(shost, GFP_KERNEL, numa_node); if (!cmd->sense_buffer) return -ENOMEM; + cmd->req.sense = cmd->sense_buffer; return 0; } static void scsi_exit_request(void *data, struct request *rq, unsigned int hctx_idx, unsigned int request_idx) { + struct Scsi_Host *shost = data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); - kfree(cmd->sense_buffer); + scsi_free_sense_buffer(shost, cmd->sense_buffer); } static int scsi_map_queues(struct blk_mq_tag_set *set) @@ -2029,7 +2079,7 @@ static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost) return bounce_limit; } -static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) +void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) { struct device *dev = shost->dma_dev; @@ -2064,28 +2114,64 @@ static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) */ blk_queue_dma_alignment(q, 0x03); } +EXPORT_SYMBOL_GPL(__scsi_init_queue); -struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, - request_fn_proc *request_fn) +static int scsi_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp) { - struct request_queue *q; + struct Scsi_Host *shost = q->rq_alloc_data; + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); - q = blk_init_queue(request_fn, NULL); - if (!q) - return NULL; - __scsi_init_queue(shost, q); - return q; + memset(cmd, 0, sizeof(*cmd)); + + cmd->sense_buffer = scsi_alloc_sense_buffer(shost, gfp, NUMA_NO_NODE); + if (!cmd->sense_buffer) + goto fail; + cmd->req.sense = cmd->sense_buffer; + + if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) { + cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp); + if (!cmd->prot_sdb) + goto fail_free_sense; + } + + return 0; + +fail_free_sense: + scsi_free_sense_buffer(shost, cmd->sense_buffer); +fail: + return -ENOMEM; +} + +static void scsi_exit_rq(struct request_queue *q, struct request *rq) +{ + struct Scsi_Host *shost = q->rq_alloc_data; + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); + + if (cmd->prot_sdb) + kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb); + scsi_free_sense_buffer(shost, cmd->sense_buffer); } -EXPORT_SYMBOL(__scsi_alloc_queue); struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) { + struct Scsi_Host *shost = sdev->host; struct request_queue *q; - q = __scsi_alloc_queue(sdev->host, scsi_request_fn); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); if (!q) return NULL; + q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; + q->rq_alloc_data = shost; + q->request_fn = scsi_request_fn; + q->init_rq_fn = scsi_init_rq; + q->exit_rq_fn = scsi_exit_rq; + + if (blk_init_allocated_queue(q) < 0) { + blk_cleanup_queue(q); + return NULL; + } + __scsi_init_queue(shost, q); blk_queue_prep_rq(q, scsi_prep_fn); blk_queue_unprep_rq(q, scsi_unprep_fn); blk_queue_softirq_done(q, scsi_softirq_done); @@ -2209,6 +2295,8 @@ int __init scsi_init_queue(void) void scsi_exit_queue(void) { + kmem_cache_destroy(scsi_sense_cache); + kmem_cache_destroy(scsi_sense_isadma_cache); kmem_cache_destroy(scsi_sdb_cache); } diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 193636a..99bfc98 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -30,8 +30,8 @@ extern void scsi_exit_hosts(void); /* scsi.c */ extern bool scsi_use_blk_mq; -extern int scsi_setup_command_freelist(struct Scsi_Host *shost); -extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); +int scsi_init_sense_cache(struct Scsi_Host *shost); +void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd); #ifdef CONFIG_SCSI_LOGGING void scsi_log_send(struct scsi_cmnd *cmd); void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); @@ -96,7 +96,6 @@ extern void scsi_exit_queue(void); extern void scsi_evt_thread(struct work_struct *work); struct request_queue; struct request; -extern struct kmem_cache *scsi_sdb_cache; /* scsi_proc.c */ #ifdef CONFIG_SCSI_PROC_FS diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 03577bd..13dcb9b 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3765,7 +3765,6 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) struct device *dev = &shost->shost_gendev; struct fc_internal *i = to_fc_internal(shost->transportt); struct request_queue *q; - int err; char bsg_name[20]; fc_host->rqst_q = NULL; @@ -3776,23 +3775,14 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host) snprintf(bsg_name, sizeof(bsg_name), "fc_host%d", shost->host_no); - q = __scsi_alloc_queue(shost, bsg_request_fn); - if (!q) { - dev_err(dev, - "fc_host%d: bsg interface failed to initialize - no request queue\n", - shost->host_no); - return -ENOMEM; - } - - err = bsg_setup_queue(dev, q, bsg_name, fc_bsg_dispatch, - i->f->dd_bsg_size); - if (err) { + q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size); + if (IS_ERR(q)) { dev_err(dev, "fc_host%d: bsg interface failed to initialize - setup queue\n", shost->host_no); - blk_cleanup_queue(q); - return err; + return PTR_ERR(q); } + __scsi_init_queue(shost, q); blk_queue_rq_timed_out(q, fc_bsg_job_timeout); blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT); fc_host->rqst_q = q; @@ -3824,26 +3814,18 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport) struct device *dev = &rport->dev; struct fc_internal *i = to_fc_internal(shost->transportt); struct request_queue *q; - int err; rport->rqst_q = NULL; if (!i->f->bsg_request) return -ENOTSUPP; - q = __scsi_alloc_queue(shost, bsg_request_fn); - if (!q) { - dev_err(dev, "bsg interface failed to initialize - no request queue\n"); - return -ENOMEM; - } - - err = bsg_setup_queue(dev, q, NULL, fc_bsg_dispatch, i->f->dd_bsg_size); - if (err) { + q = bsg_setup_queue(dev, NULL, fc_bsg_dispatch, i->f->dd_bsg_size); + if (IS_ERR(q)) { dev_err(dev, "failed to setup bsg queue\n"); - blk_cleanup_queue(q); - return err; + return PTR_ERR(q); } - + __scsi_init_queue(shost, q); blk_queue_prep_rq(q, fc_bsg_rport_prep); blk_queue_rq_timed_out(q, fc_bsg_job_timeout); blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 42bca61..568c9f2 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1537,24 +1537,18 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) struct iscsi_internal *i = to_iscsi_internal(shost->transportt); struct request_queue *q; char bsg_name[20]; - int ret; if (!i->iscsi_transport->bsg_request) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); - - q = __scsi_alloc_queue(shost, bsg_request_fn); - if (!q) - return -ENOMEM; - - ret = bsg_setup_queue(dev, q, bsg_name, iscsi_bsg_host_dispatch, 0); - if (ret) { + q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0); + if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); - blk_cleanup_queue(q); - return ret; + return PTR_ERR(q); } + __scsi_init_queue(shost, q); ihost->bsg_q = q; return 0; diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 60b651b..126a5ee 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -33,6 +33,7 @@ #include <linux/bsg.h> #include <scsi/scsi.h> +#include <scsi/scsi_request.h> #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport.h> @@ -177,6 +178,10 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost, while ((req = blk_fetch_request(q)) != NULL) { spin_unlock_irq(q->queue_lock); + scsi_req(req)->resid_len = blk_rq_bytes(req); + if (req->next_rq) + scsi_req(req->next_rq)->resid_len = + blk_rq_bytes(req->next_rq); handler = to_sas_internal(shost->transportt)->f->smp_handler; ret = handler(shost, rphy, req); req->errors = ret; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 1f5d92a..40b4038 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -781,7 +781,7 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd) rq->special_vec.bv_len = len; rq->rq_flags |= RQF_SPECIAL_PAYLOAD; - rq->resid_len = len; + scsi_req(rq)->resid_len = len; ret = scsi_init_io(cmd); out: @@ -1179,7 +1179,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) __free_page(rq->special_vec.bv_page); - if (SCpnt->cmnd != rq->cmd) { + if (SCpnt->cmnd != scsi_req(rq)->cmd) { mempool_free(SCpnt->cmnd, sd_cdb_pool); SCpnt->cmnd = NULL; SCpnt->cmd_len = 0; @@ -1750,9 +1750,6 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) unsigned int transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd); unsigned int good_bytes; - if (scmd->request->cmd_type != REQ_TYPE_FS) - return 0; - info_valid = scsi_get_sense_info_fld(scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, &bad_lba); @@ -3082,6 +3079,23 @@ static void sd_probe_async(void *data, async_cookie_t cookie) put_device(&sdkp->dev); } +struct sd_devt { + int idx; + struct disk_devt disk_devt; +}; + +void sd_devt_release(struct disk_devt *disk_devt) +{ + struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt, + disk_devt); + + spin_lock(&sd_index_lock); + ida_remove(&sd_index_ida, sd_devt->idx); + spin_unlock(&sd_index_lock); + + kfree(sd_devt); +} + /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once @@ -3103,6 +3117,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) static int sd_probe(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); + struct sd_devt *sd_devt; struct scsi_disk *sdkp; struct gendisk *gd; int index; @@ -3128,9 +3143,13 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; + sd_devt = kzalloc(sizeof(*sd_devt), GFP_KERNEL); + if (!sd_devt) + goto out_free; + gd = alloc_disk(SD_MINORS); if (!gd) - goto out_free; + goto out_free_devt; do { if (!ida_pre_get(&sd_index_ida, GFP_KERNEL)) @@ -3146,6 +3165,11 @@ static int sd_probe(struct device *dev) goto out_put; } + atomic_set(&sd_devt->disk_devt.count, 1); + sd_devt->disk_devt.release = sd_devt_release; + sd_devt->idx = index; + gd->disk_devt = &sd_devt->disk_devt; + error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); if (error) { sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); @@ -3185,13 +3209,14 @@ static int sd_probe(struct device *dev) return 0; out_free_index: - spin_lock(&sd_index_lock); - ida_remove(&sd_index_ida, index); - spin_unlock(&sd_index_lock); + put_disk_devt(&sd_devt->disk_devt); + sd_devt = NULL; out_put: put_disk(gd); out_free: kfree(sdkp); + out_free_devt: + kfree(sd_devt); out: scsi_autopm_put_device(sdp); return error; @@ -3250,10 +3275,7 @@ static void scsi_disk_release(struct device *dev) struct scsi_disk *sdkp = to_scsi_disk(dev); struct gendisk *disk = sdkp->disk; - spin_lock(&sd_index_lock); - ida_remove(&sd_index_ida, sdkp->index); - spin_unlock(&sd_index_lock); - + put_disk_devt(disk->disk_devt); disk->private_data = NULL; put_disk(disk); put_device(&sdkp->device->sdev_gendev); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 121de0a..e831e01 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -781,9 +781,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, } if (atomic_read(&sdp->detaching)) { if (srp->bio) { - if (srp->rq->cmd != srp->rq->__cmd) - kfree(srp->rq->cmd); - + scsi_req_free_cmd(scsi_req(srp->rq)); blk_end_request_all(srp->rq, -EIO); srp->rq = NULL; } @@ -1279,6 +1277,7 @@ static void sg_rq_end_io(struct request *rq, int uptodate) { struct sg_request *srp = rq->end_io_data; + struct scsi_request *req = scsi_req(rq); Sg_device *sdp; Sg_fd *sfp; unsigned long iflags; @@ -1297,9 +1296,9 @@ sg_rq_end_io(struct request *rq, int uptodate) if (unlikely(atomic_read(&sdp->detaching))) pr_info("%s: device detaching\n", __func__); - sense = rq->sense; + sense = req->sense; result = rq->errors; - resid = rq->resid_len; + resid = req->resid_len; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, "sg_cmd_done: pack_id=%d, res=0x%x\n", @@ -1333,6 +1332,10 @@ sg_rq_end_io(struct request *rq, int uptodate) sdp->device->changed = 1; } } + + if (req->sense_len) + memcpy(srp->sense_b, req->sense, SCSI_SENSE_BUFFERSIZE); + /* Rely on write phase to clean out srp status values, so no "else" */ /* @@ -1342,8 +1345,7 @@ sg_rq_end_io(struct request *rq, int uptodate) * blk_rq_unmap_user() can be called from user context. */ srp->rq = NULL; - if (rq->cmd != rq->__cmd) - kfree(rq->cmd); + scsi_req_free_cmd(scsi_req(rq)); __blk_put_request(rq->q, rq); write_lock_irqsave(&sfp->rq_list_lock, iflags); @@ -1658,6 +1660,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) { int res; struct request *rq; + struct scsi_request *req; Sg_fd *sfp = srp->parentfp; sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; @@ -1695,22 +1698,23 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) * With scsi-mq disabled, blk_get_request() with GFP_KERNEL usually * does not sleep except under memory pressure. */ - rq = blk_get_request(q, rw, GFP_KERNEL); + rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(rq)) { kfree(long_cmdp); return PTR_ERR(rq); } + req = scsi_req(rq); - blk_rq_set_block_pc(rq); + scsi_req_init(rq); if (hp->cmd_len > BLK_MAX_CDB) - rq->cmd = long_cmdp; - memcpy(rq->cmd, cmd, hp->cmd_len); - rq->cmd_len = hp->cmd_len; + req->cmd = long_cmdp; + memcpy(req->cmd, cmd, hp->cmd_len); + req->cmd_len = hp->cmd_len; srp->rq = rq; rq->end_io_data = srp; - rq->sense = srp->sense_b; rq->retries = SG_DEFAULT_RETRIES; if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) @@ -1790,8 +1794,7 @@ sg_finish_rem_req(Sg_request *srp) ret = blk_rq_unmap_user(srp->bio); if (srp->rq) { - if (srp->rq->cmd != srp->rq->__cmd) - kfree(srp->rq->cmd); + scsi_req_free_cmd(scsi_req(srp->rq)); blk_put_request(srp->rq); } diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 8702d9c..11c0dfb 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -4499,7 +4499,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, if (pqi_is_logical_device(device)) { raid_bypassed = false; if (device->offload_enabled && - scmd->request->cmd_type == REQ_TYPE_FS) { + !blk_rq_is_passthrough(scmd->request)) { rc = pqi_raid_bypass_submit_scsi_cmd(ctrl_info, device, scmd, queue_group); if (rc == 0 || diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 94352e4..0b29b93 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -117,7 +117,7 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi, unsigned int clearing, int slot); static int sr_packet(struct cdrom_device_info *, struct packet_command *); -static struct cdrom_device_ops sr_dops = { +static const struct cdrom_device_ops sr_dops = { .open = sr_open, .release = sr_release, .drive_status = sr_drive_status, @@ -437,14 +437,17 @@ static int sr_init_command(struct scsi_cmnd *SCpnt) goto out; } - if (rq_data_dir(rq) == WRITE) { + switch (req_op(rq)) { + case REQ_OP_WRITE: if (!cd->writeable) goto out; SCpnt->cmnd[0] = WRITE_10; cd->cdi.media_written = 1; - } else if (rq_data_dir(rq) == READ) { + break; + case REQ_OP_READ: SCpnt->cmnd[0] = READ_10; - } else { + break; + default: blk_dump_rq_flags(rq, "Unknown sr command"); goto out; } diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 5f35b86..81212d4 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -475,7 +475,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) ktime_t now; now = ktime_get(); - if (req->cmd[0] == WRITE_6) { + if (scsi_req(req)->cmd[0] == WRITE_6) { now = ktime_sub(now, STp->stats->write_time); atomic64_add(ktime_to_ns(now), &STp->stats->tot_write_time); atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time); @@ -489,7 +489,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) } else atomic64_add(atomic_read(&STp->stats->last_write_size), &STp->stats->write_byte_cnt); - } else if (req->cmd[0] == READ_6) { + } else if (scsi_req(req)->cmd[0] == READ_6) { now = ktime_sub(now, STp->stats->read_time); atomic64_add(ktime_to_ns(now), &STp->stats->tot_read_time); atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time); @@ -514,15 +514,18 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) static void st_scsi_execute_end(struct request *req, int uptodate) { struct st_request *SRpnt = req->end_io_data; + struct scsi_request *rq = scsi_req(req); struct scsi_tape *STp = SRpnt->stp; struct bio *tmp; STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors; - STp->buffer->cmdstat.residual = req->resid_len; + STp->buffer->cmdstat.residual = rq->resid_len; st_do_stats(STp, req); tmp = SRpnt->bio; + if (rq->sense_len) + memcpy(SRpnt->sense, rq->sense, SCSI_SENSE_BUFFERSIZE); if (SRpnt->waiting) complete(SRpnt->waiting); @@ -535,17 +538,18 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, int timeout, int retries) { struct request *req; + struct scsi_request *rq; struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data; int err = 0; - int write = (data_direction == DMA_TO_DEVICE); struct scsi_tape *STp = SRpnt->stp; - req = blk_get_request(SRpnt->stp->device->request_queue, write, - GFP_KERNEL); + req = blk_get_request(SRpnt->stp->device->request_queue, + data_direction == DMA_TO_DEVICE ? + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(req)) return DRIVER_ERROR << 24; - - blk_rq_set_block_pc(req); + rq = scsi_req(req); + scsi_req_init(req); req->rq_flags |= RQF_QUIET; mdata->null_mapped = 1; @@ -571,11 +575,9 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, } SRpnt->bio = req->bio; - req->cmd_len = COMMAND_SIZE(cmd[0]); - memset(req->cmd, 0, BLK_MAX_CDB); - memcpy(req->cmd, cmd, req->cmd_len); - req->sense = SRpnt->sense; - req->sense_len = 0; + rq->cmd_len = COMMAND_SIZE(cmd[0]); + memset(rq->cmd, 0, BLK_MAX_CDB); + memcpy(rq->cmd, cmd, rq->cmd_len); req->timeout = timeout; req->retries = retries; req->end_io_data = SRpnt; diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c index 88db699..bcf7d05 100644 --- a/drivers/scsi/sun3_scsi.c +++ b/drivers/scsi/sun3_scsi.c @@ -260,7 +260,7 @@ static int sun3scsi_dma_xfer_len(struct NCR5380_hostdata *hostdata, { int wanted_len = cmd->SCp.this_residual; - if (wanted_len < DMA_MIN_SIZE || cmd->request->cmd_type != REQ_TYPE_FS) + if (wanted_len < DMA_MIN_SIZE || blk_rq_is_passthrough(cmd->request)) return 0; return wanted_len; diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig index 2573612..e2bc999 100644 --- a/drivers/target/Kconfig +++ b/drivers/target/Kconfig @@ -4,6 +4,7 @@ menuconfig TARGET_CORE depends on SCSI && BLOCK select CONFIGFS_FS select CRC_T10DIF + select BLK_SCSI_REQUEST # only for scsi_command_size_tbl.. default n help Say Y or M here to enable the TCM Storage Engine and ConfigFS enabled diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 04d7aa7..a8f8e53 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1005,7 +1005,8 @@ pscsi_execute_cmd(struct se_cmd *cmd) scsi_command_size(cmd->t_task_cdb)); req = blk_get_request(pdv->pdv_sd->request_queue, - (cmd->data_direction == DMA_TO_DEVICE), + cmd->data_direction == DMA_TO_DEVICE ? + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(req)) { pr_err("PSCSI: blk_get_request() failed\n"); @@ -1013,7 +1014,7 @@ pscsi_execute_cmd(struct se_cmd *cmd) goto fail; } - blk_rq_set_block_pc(req); + scsi_req_init(req); if (sgl) { ret = pscsi_map_sg(cmd, sgl, sgl_nents, req); @@ -1023,10 +1024,8 @@ pscsi_execute_cmd(struct se_cmd *cmd) req->end_io = pscsi_req_done; req->end_io_data = cmd; - req->cmd_len = scsi_command_size(pt->pscsi_cdb); - req->cmd = &pt->pscsi_cdb[0]; - req->sense = &pt->pscsi_sense[0]; - req->sense_len = 0; + scsi_req(req)->cmd_len = scsi_command_size(pt->pscsi_cdb); + scsi_req(req)->cmd = &pt->pscsi_cdb[0]; if (pdv->pdv_sd->type == TYPE_DISK) req->timeout = PS_TIMEOUT_DISK; else @@ -1075,7 +1074,7 @@ static void pscsi_req_done(struct request *req, int uptodate) struct pscsi_plugin_task *pt = cmd->priv; pt->pscsi_result = req->errors; - pt->pscsi_resid = req->resid_len; + pt->pscsi_resid = scsi_req(req)->resid_len; cmd->scsi_status = status_byte(pt->pscsi_result) << 1; if (cmd->scsi_status) { @@ -1096,6 +1095,7 @@ static void pscsi_req_done(struct request *req, int uptodate) break; } + memcpy(pt->pscsi_sense, scsi_req(req)->sense, TRANSPORT_SENSE_BUFFER); __blk_put_request(req->q, req); kfree(pt); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 3c47614..73031ec 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -884,6 +884,8 @@ static void bdev_evict_inode(struct inode *inode) spin_lock(&bdev_lock); list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); + if (bdev->bd_bdi != &noop_backing_dev_info) + bdi_put(bdev->bd_bdi); } static const struct super_operations bdev_sops = { @@ -954,6 +956,21 @@ static int bdev_set(struct inode *inode, void *data) static LIST_HEAD(all_bdevs); +/* + * If there is a bdev inode for this device, unhash it so that it gets evicted + * as soon as last inode reference is dropped. + */ +void bdev_unhash_inode(dev_t dev) +{ + struct inode *inode; + + inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev); + if (inode) { + remove_inode_hash(inode); + iput(inode); + } +} + struct block_device *bdget(dev_t dev) { struct block_device *bdev; @@ -971,6 +988,7 @@ struct block_device *bdget(dev_t dev) bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; + bdev->bd_bdi = &noop_backing_dev_info; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; bdev->bd_invalidated = 0; @@ -1527,6 +1545,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; + if (bdev->bd_bdi == &noop_backing_dev_info) + bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); if (!partno) { ret = -ENXIO; @@ -1622,6 +1642,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_queue = NULL; + bdi_put(bdev->bd_bdi); + bdev->bd_bdi = &noop_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1800416..37a31b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1800,7 +1800,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; - bdi = blk_get_backing_dev_info(device->bdev); + bdi = device->bdev->bd_bdi; if (bdi_congested(bdi, bdi_bits)) { ret = 1; break; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3c3c69c..b2e7007 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -366,7 +366,7 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) */ blk_start_plug(&plug); - bdi = blk_get_backing_dev_info(device->bdev); + bdi = device->bdev->bd_bdi; limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index f17fcf8..7fb1732 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -248,6 +248,42 @@ static struct file_system_type debug_fs_type = { }; MODULE_ALIAS_FS("debugfs"); +/** + * debugfs_lookup() - look up an existing debugfs file + * @name: a pointer to a string containing the name of the file to look up. + * @parent: a pointer to the parent dentry of the file. + * + * This function will return a pointer to a dentry if it succeeds. If the file + * doesn't exist or an error occurs, %NULL will be returned. The returned + * dentry must be passed to dput() when it is no longer needed. + * + * If debugfs is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *debugfs_lookup(const char *name, struct dentry *parent) +{ + struct dentry *dentry; + + if (IS_ERR(parent)) + return NULL; + + if (!parent) + parent = debugfs_mount->mnt_root; + + inode_lock(d_inode(parent)); + dentry = lookup_one_len(name, parent, strlen(name)); + inode_unlock(d_inode(parent)); + + if (IS_ERR(dentry)) + return NULL; + if (!d_really_is_positive(dentry)) { + dput(dentry); + return NULL; + } + return dentry; +} +EXPORT_SYMBOL_GPL(debugfs_lookup); + static struct dentry *start_creating(const char *name, struct dentry *parent) { struct dentry *dentry; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 5870479..b108e7b 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1227,7 +1227,7 @@ static int set_gfs2_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 47febcf..20b1c17 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -104,6 +104,7 @@ config NFSD_SCSILAYOUT depends on NFSD_V4 && BLOCK select NFSD_PNFS select EXPORTFS_BLOCK_OPS + select BLK_SCSI_REQUEST help This option enables support for the exporting pNFS SCSI layouts in the kernel's NFS server. The pNFS SCSI layout enables NFS diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 0780ff8..a06115e 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -10,6 +10,7 @@ #include <linux/nfsd/debug.h> #include <scsi/scsi_proto.h> #include <scsi/scsi_common.h> +#include <scsi/scsi_request.h> #include "blocklayoutxdr.h" #include "pnfs.h" @@ -213,6 +214,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, { struct request_queue *q = bdev->bd_disk->queue; struct request *rq; + struct scsi_request *req; size_t bufflen = 252, len, id_len; u8 *buf, *d, type, assoc; int error; @@ -221,23 +223,24 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, if (!buf) return -ENOMEM; - rq = blk_get_request(q, READ, GFP_KERNEL); + rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(rq)) { error = -ENOMEM; goto out_free_buf; } - blk_rq_set_block_pc(rq); + req = scsi_req(rq); + scsi_req_init(rq); error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); if (error) goto out_put_request; - rq->cmd[0] = INQUIRY; - rq->cmd[1] = 1; - rq->cmd[2] = 0x83; - rq->cmd[3] = bufflen >> 8; - rq->cmd[4] = bufflen & 0xff; - rq->cmd_len = COMMAND_SIZE(INQUIRY); + req->cmd[0] = INQUIRY; + req->cmd[1] = 1; + req->cmd[2] = 0x83; + req->cmd[3] = bufflen >> 8; + req->cmd[4] = bufflen & 0xff; + req->cmd_len = COMMAND_SIZE(INQUIRY); error = blk_execute_rq(rq->q, NULL, rq, 1); if (error) { diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 12eeae6..e1872f3 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1068,7 +1068,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; - sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; + sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info; err = load_nilfs(nilfs, sb); if (err) @@ -1047,7 +1047,7 @@ static int set_bdev_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index ac3b4db..8c7d01b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -758,7 +758,7 @@ xfs_buf_readahead_map( int nmaps, const struct xfs_buf_ops *ops) { - if (bdi_read_congested(target->bt_bdi)) + if (bdi_read_congested(target->bt_bdev->bd_bdi)) return; xfs_buf_read_map(target, map, nmaps, @@ -1791,7 +1791,6 @@ xfs_alloc_buftarg( btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; - btp->bt_bdi = blk_get_backing_dev_info(bdev); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 8a9d3a9..3c867e5 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -109,7 +109,6 @@ typedef unsigned int xfs_buf_flags_t; typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; - struct backing_dev_info *bt_bdi; struct xfs_mount *bt_mount; unsigned int bt_meta_sectorsize; size_t bt_meta_sectormask; diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index e850e76..ad95581 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -10,6 +10,7 @@ #include <linux/flex_proportions.h> #include <linux/timer.h> #include <linux/workqueue.h> +#include <linux/kref.h> struct page; struct device; @@ -144,6 +145,7 @@ struct backing_dev_info { char *name; + struct kref refcnt; /* Reference counter for the structure */ unsigned int capabilities; /* Device capabilities */ unsigned int min_ratio; unsigned int max_ratio, max_prop_frac; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 43b93a9..c52a48c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,7 +18,14 @@ #include <linux/slab.h> int __must_check bdi_init(struct backing_dev_info *bdi); -void bdi_exit(struct backing_dev_info *bdi); + +static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) +{ + kref_get(&bdi->refcnt); + return bdi; +} + +void bdi_put(struct backing_dev_info *bdi); __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -29,6 +36,7 @@ void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_destroy(struct backing_dev_info *bdi); +struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, bool range_cyclic, enum wb_reason reason); @@ -183,7 +191,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) sb = inode->i_sb; #ifdef CONFIG_BLOCK if (sb_is_blkdev_sb(sb)) - return blk_get_backing_dev_info(I_BDEV(inode)); + return I_BDEV(inode)->bd_bdi; #endif return sb->s_bdi; } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4a2ab5d9..8e4df3d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -22,6 +22,7 @@ struct blk_mq_hw_ctx { unsigned long flags; /* BLK_MQ_F_* flags */ + void *sched_data; struct request_queue *queue; struct blk_flush_queue *fq; @@ -35,6 +36,7 @@ struct blk_mq_hw_ctx { atomic_t wait_index; struct blk_mq_tags *tags; + struct blk_mq_tags *sched_tags; struct srcu_struct queue_rq_srcu; @@ -60,7 +62,7 @@ struct blk_mq_hw_ctx { struct blk_mq_tag_set { unsigned int *mq_map; - struct blk_mq_ops *ops; + const struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; /* max hw supported */ unsigned int reserved_tags; @@ -151,11 +153,13 @@ enum { BLK_MQ_F_SG_MERGE = 1 << 2, BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_BLOCKING = 1 << 5, + BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, + BLK_MQ_S_SCHED_RESTART = 2, BLK_MQ_MAX_DEPTH = 10240, @@ -179,14 +183,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_free_request(struct request *rq); -void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); enum { BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ + BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ }; struct request *blk_mq_alloc_request(struct request_queue *q, int rw, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 519ea2c..d703acb 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -162,6 +162,13 @@ enum req_opf { /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 8, + /* SCSI passthrough using struct scsi_request */ + REQ_OP_SCSI_IN = 32, + REQ_OP_SCSI_OUT = 33, + /* Driver private requests */ + REQ_OP_DRV_IN = 34, + REQ_OP_DRV_OUT = 35, + REQ_OP_LAST, }; @@ -221,6 +228,15 @@ static inline bool op_is_write(unsigned int op) } /* + * Check if the bio or request is one that needs special treatment in the + * flush state machine. + */ +static inline bool op_is_flush(unsigned int op) +{ + return op & (REQ_FUA | REQ_PREFLUSH); +} + +/* * Reads are always treated as synchronous, as are requests with the FUA or * PREFLUSH flag. Other operations may be marked as synchronous using the * REQ_SYNC flag. @@ -232,22 +248,29 @@ static inline bool op_is_sync(unsigned int op) } typedef unsigned int blk_qc_t; -#define BLK_QC_T_NONE -1U -#define BLK_QC_T_SHIFT 16 +#define BLK_QC_T_NONE -1U +#define BLK_QC_T_SHIFT 16 +#define BLK_QC_T_INTERNAL (1U << 31) static inline bool blk_qc_t_valid(blk_qc_t cookie) { return cookie != BLK_QC_T_NONE; } -static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) +static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num, + bool internal) { - return tag | (queue_num << BLK_QC_T_SHIFT); + blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT); + + if (internal) + ret |= BLK_QC_T_INTERNAL; + + return ret; } static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) { - return cookie >> BLK_QC_T_SHIFT; + return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; } static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) @@ -255,6 +278,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) return cookie & ((1u << BLK_QC_T_SHIFT) - 1); } +static inline bool blk_qc_t_is_internal(blk_qc_t cookie) +{ + return (cookie & BLK_QC_T_INTERNAL) != 0; +} + struct blk_issue_stat { u64 time; }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1ca8e8f..aecca0e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -71,15 +71,6 @@ struct request_list { }; /* - * request command types - */ -enum rq_cmd_type_bits { - REQ_TYPE_FS = 1, /* fs request */ - REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_DRV_PRIV, /* driver defined types from here */ -}; - -/* * request flags */ typedef __u32 __bitwise req_flags_t; @@ -128,8 +119,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_NOMERGE_FLAGS \ (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) -#define BLK_MAX_CDB 16 - /* * Try to put the fields that are referenced together in the same cacheline. * @@ -147,13 +136,16 @@ struct request { struct blk_mq_ctx *mq_ctx; int cpu; - unsigned cmd_type; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; + + int internal_tag; + unsigned long atomic_flags; /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ + int tag; sector_t __sector; /* sector cursor */ struct bio *bio; @@ -222,20 +214,9 @@ struct request { void *special; /* opaque pointer available for LLD use */ - int tag; int errors; - /* - * when request is used as a packet command carrier - */ - unsigned char __cmd[BLK_MAX_CDB]; - unsigned char *cmd; - unsigned short cmd_len; - unsigned int extra_len; /* length of alignment and padding */ - unsigned int sense_len; - unsigned int resid_len; /* residual count */ - void *sense; unsigned long deadline; struct list_head timeout_list; @@ -252,6 +233,21 @@ struct request { struct request *next_rq; }; +static inline bool blk_rq_is_scsi(struct request *rq) +{ + return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; +} + +static inline bool blk_rq_is_private(struct request *rq) +{ + return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; +} + +static inline bool blk_rq_is_passthrough(struct request *rq) +{ + return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; @@ -271,6 +267,8 @@ typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); typedef int (bsg_job_fn) (struct bsg_job *); +typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); +typedef void (exit_rq_fn)(struct request_queue *, struct request *); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -333,6 +331,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; unsigned short max_integrity_segments; + unsigned short max_discard_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -406,8 +405,10 @@ struct request_queue { rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; + init_rq_fn *init_rq_fn; + exit_rq_fn *exit_rq_fn; - struct blk_mq_ops *mq_ops; + const struct blk_mq_ops *mq_ops; unsigned int *mq_map; @@ -432,7 +433,8 @@ struct request_queue { */ struct delayed_work delay_work; - struct backing_dev_info backing_dev_info; + struct backing_dev_info *backing_dev_info; + struct disk_devt *disk_devt; /* * The queue owner gets to use this for whatever they like. @@ -569,7 +571,15 @@ struct request_queue { struct list_head tag_set_list; struct bio_set *bio_split; +#ifdef CONFIG_BLK_DEBUG_FS + struct dentry *debugfs_dir; + struct dentry *mq_debugfs_dir; +#endif + bool mq_sysfs_init_done; + + size_t cmd_size; + void *rq_alloc_data; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ @@ -600,6 +610,7 @@ struct request_queue { #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_STATS 27 /* track rq completion times */ +#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -695,9 +706,10 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) -#define blk_account_rq(rq) \ - (((rq)->rq_flags & RQF_STARTED) && \ - ((rq)->cmd_type == REQ_TYPE_FS)) +static inline bool blk_account_rq(struct request *rq) +{ + return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); +} #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) @@ -772,7 +784,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync) static inline bool rq_mergeable(struct request *rq) { - if (rq->cmd_type != REQ_TYPE_FS) + if (blk_rq_is_passthrough(rq)) return false; if (req_op(rq) == REQ_OP_FLUSH) @@ -910,7 +922,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); -extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, @@ -1047,7 +1058,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, { struct request_queue *q = rq->q; - if (unlikely(rq->cmd_type != REQ_TYPE_FS)) + if (blk_rq_is_passthrough(rq)) return q->limits.max_hw_sectors; if (!q->limits.chunk_sectors || @@ -1129,14 +1140,15 @@ extern void blk_unprep_request(struct request *); extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id); extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); -extern struct request_queue *blk_init_allocated_queue(struct request_queue *, - request_fn_proc *, spinlock_t *); +extern int blk_init_allocated_queue(struct request_queue *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); +extern void blk_queue_max_discard_segments(struct request_queue *, + unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); @@ -1179,8 +1191,16 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); +/* + * Number of physical segments as sent to the device. + * + * Normally this is the number of discontiguous data segments sent by the + * submitter. But for data-less command like discard we might have no + * actual data segments submitted, but the driver might have to add it's + * own special payload. In that case we still return 1 here so that this + * special payload will be mapped. + */ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) @@ -1188,6 +1208,15 @@ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) return rq->nr_phys_segments; } +/* + * Number of discard segments (or ranges) the driver needs to fill in. + * Each discard bio merged into a request is counted as one segment. + */ +static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) +{ + return max_t(unsigned short, rq->nr_phys_segments, 1); +} + extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); @@ -1376,6 +1405,11 @@ static inline unsigned short queue_max_segments(struct request_queue *q) return q->limits.max_segments; } +static inline unsigned short queue_max_discard_segments(struct request_queue *q) +{ + return q->limits.max_discard_segments; +} + static inline unsigned int queue_max_segment_size(struct request_queue *q) { return q->limits.max_segment_size; @@ -1620,6 +1654,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, return __bvec_gap_to_prev(q, bprv, offset); } +/* + * Check if the two bvecs from two bios can be merged to one segment. + * If yes, no need to check gap between the two bios since the 1st bio + * and the 1st bvec in the 2nd bio can be handled in one segment. + */ +static inline bool bios_segs_mergeable(struct request_queue *q, + struct bio *prev, struct bio_vec *prev_last_bv, + struct bio_vec *next_first_bv) +{ + if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) + return false; + if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) + return false; + if (prev->bi_seg_back_size + next_first_bv->bv_len > + queue_max_segment_size(q)) + return false; + return true; +} + static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, struct bio *next) { @@ -1629,7 +1682,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, bio_get_last_bvec(prev, &pb); bio_get_first_bvec(next, &nb); - return __bvec_gap_to_prev(q, &pb, nb.bv_offset); + if (!bios_segs_mergeable(q, prev, &pb, &nb)) + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); } return false; diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index e417f08..d2e9085 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -30,9 +30,6 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern int do_blk_trace_setup(struct request_queue *q, char *name, - dev_t dev, struct block_device *bdev, - struct blk_user_trace_setup *buts); extern __printf(2, 3) void __trace_note_message(struct blk_trace *, const char *fmt, ...); @@ -80,7 +77,6 @@ extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY) # define blk_add_driver_data(q, rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) @@ -110,16 +106,16 @@ struct compat_blk_user_trace_setup { #endif -#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK) +extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); -static inline int blk_cmd_buf_len(struct request *rq) +static inline sector_t blk_rq_trace_sector(struct request *rq) { - return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len * 3 : 1; + return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); } -extern void blk_dump_cmd(char *buf, struct request *rq); -extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); - -#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ +static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) +{ + return blk_rq_is_passthrough(rq) ? 0 : blk_rq_sectors(rq); +} #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 657a718..e34dde2 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -66,9 +66,8 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); -int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, - bsg_job_fn *job_fn, int dd_job_size); -void bsg_request_fn(struct request_queue *q); +struct request_queue *bsg_setup_queue(struct device *dev, char *name, + bsg_job_fn *job_fn, int dd_job_size); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 8609d57..6e8f209 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -36,7 +36,7 @@ struct packet_command /* Uniform cdrom data structures for cdrom.c */ struct cdrom_device_info { - struct cdrom_device_ops *ops; /* link to device_ops */ + const struct cdrom_device_ops *ops; /* link to device_ops */ struct list_head list; /* linked list of all device_info */ struct gendisk *disk; /* matching block layer disk */ void *handle; /* driver-dependent data */ @@ -87,7 +87,6 @@ struct cdrom_device_ops { /* driver specifications */ const int capability; /* capability flags */ - int n_minors; /* number of active minor devices */ /* handle uniform packets for scsi type devices (scsi,atapi) */ int (*generic_packet) (struct cdrom_device_info *, struct packet_command *); @@ -123,6 +122,8 @@ extern int cdrom_mode_sense(struct cdrom_device_info *cdi, int page_code, int page_control); extern void init_cdrom_command(struct packet_command *cgc, void *buffer, int len, int type); +extern int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, + struct packet_command *cgc); /* The SCSI spec says there could be 256 slots. */ #define CDROM_MAX_SLOTS 256 diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 014cc56..c0befcf 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -80,6 +80,8 @@ static const struct file_operations __fops = { \ #if defined(CONFIG_DEBUG_FS) +struct dentry *debugfs_lookup(const char *name, struct dentry *parent); + struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); @@ -181,6 +183,12 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, * want to duplicate the design decision mistakes of procfs and devfs again. */ +static inline struct dentry *debugfs_lookup(const char *name, + struct dentry *parent) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ef7962e..a7e6903 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -55,8 +55,6 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); * = 2: The target wants to push back the io */ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio); -typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, - union map_info *map_context); typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti, struct request *rq, union map_info *map_context, @@ -163,7 +161,6 @@ struct target_type { dm_ctr_fn ctr; dm_dtr_fn dtr; dm_map_fn map; - dm_map_request_fn map_rq; dm_clone_and_map_request_fn clone_and_map_rq; dm_release_clone_request_fn release_clone_rq; dm_endio_fn end_io; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index b276e9e..aebecc4 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -9,12 +9,22 @@ struct io_cq; struct elevator_type; -typedef int (elevator_merge_fn) (struct request_queue *, struct request **, +/* + * Return values from elevator merger + */ +enum elv_merge { + ELEVATOR_NO_MERGE = 0, + ELEVATOR_FRONT_MERGE = 1, + ELEVATOR_BACK_MERGE = 2, + ELEVATOR_DISCARD_MERGE = 3, +}; + +typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **, struct bio *); typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *); -typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int); +typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge); typedef int (elevator_allow_bio_merge_fn) (struct request_queue *, struct request *, struct bio *); @@ -77,6 +87,34 @@ struct elevator_ops elevator_registered_fn *elevator_registered_fn; }; +struct blk_mq_alloc_data; +struct blk_mq_hw_ctx; + +struct elevator_mq_ops { + int (*init_sched)(struct request_queue *, struct elevator_type *); + void (*exit_sched)(struct elevator_queue *); + + bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); + bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); + int (*request_merge)(struct request_queue *q, struct request **, struct bio *); + void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); + void (*requests_merged)(struct request_queue *, struct request *, struct request *); + struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); + void (*put_request)(struct request *); + void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); + struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); + bool (*has_work)(struct blk_mq_hw_ctx *); + void (*completed_request)(struct blk_mq_hw_ctx *, struct request *); + void (*started_request)(struct request *); + void (*requeue_request)(struct request *); + struct request *(*former_request)(struct request_queue *, struct request *); + struct request *(*next_request)(struct request_queue *, struct request *); + int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *); + void (*put_rq_priv)(struct request_queue *, struct request *); + void (*init_icq)(struct io_cq *); + void (*exit_icq)(struct io_cq *); +}; + #define ELV_NAME_MAX (16) struct elv_fs_entry { @@ -94,12 +132,16 @@ struct elevator_type struct kmem_cache *icq_cache; /* fields provided by elevator implementation */ - struct elevator_ops ops; + union { + struct elevator_ops sq; + struct elevator_mq_ops mq; + } ops; size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; + bool uses_mq; /* managed by elevator core */ char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ @@ -123,6 +165,7 @@ struct elevator_queue struct kobject kobj; struct mutex sysfs_lock; unsigned int registered:1; + unsigned int uses_mq:1; DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; @@ -133,12 +176,15 @@ extern void elv_dispatch_sort(struct request_queue *, struct request *); extern void elv_dispatch_add_tail(struct request_queue *, struct request *); extern void elv_add_request(struct request_queue *, struct request *, int); extern void __elv_add_request(struct request_queue *, struct request *, int); -extern int elv_merge(struct request_queue *, struct request **, struct bio *); +extern enum elv_merge elv_merge(struct request_queue *, struct request **, + struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); -extern void elv_merged_request(struct request_queue *, struct request *, int); +extern void elv_merged_request(struct request_queue *, struct request *, + enum elv_merge); extern void elv_bio_merged(struct request_queue *q, struct request *, struct bio *); +extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); extern void elv_requeue_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); @@ -185,13 +231,6 @@ extern void elv_rb_del(struct rb_root *, struct request *); extern struct request *elv_rb_find(struct rb_root *, sector_t); /* - * Return values from elevator merger - */ -#define ELEVATOR_NO_MERGE 0 -#define ELEVATOR_FRONT_MERGE 1 -#define ELEVATOR_BACK_MERGE 2 - -/* * Insertion selection */ #define ELEVATOR_INSERT_FRONT 1 diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ba0743..c930cbc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -423,6 +423,7 @@ struct block_device { int bd_invalidated; struct gendisk * bd_disk; struct request_queue * bd_queue; + struct backing_dev_info *bd_bdi; struct list_head bd_list; /* * Private data. You must have bd_claim'ed the block_device @@ -2342,6 +2343,7 @@ extern struct kmem_cache *names_cachep; #ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); extern void unregister_blkdev(unsigned int, const char *); +extern void bdev_unhash_inode(dev_t dev); extern struct block_device *bdget(dev_t); extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 76f3975..a999d28 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -167,6 +167,13 @@ struct blk_integrity { }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ +struct disk_devt { + atomic_t count; + void (*release)(struct disk_devt *disk_devt); +}; + +void put_disk_devt(struct disk_devt *disk_devt); +void get_disk_devt(struct disk_devt *disk_devt); struct gendisk { /* major, first_minor and minors are input parameters only, @@ -176,6 +183,7 @@ struct gendisk { int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + struct disk_devt *disk_devt; char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); diff --git a/include/linux/ide.h b/include/linux/ide.h index a633898..2f51c17 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -20,6 +20,7 @@ #include <linux/mutex.h> /* for request_sense */ #include <linux/cdrom.h> +#include <scsi/scsi_cmnd.h> #include <asm/byteorder.h> #include <asm/io.h> @@ -39,18 +40,53 @@ struct device; -/* IDE-specific values for req->cmd_type */ -enum ata_cmd_type_bits { - REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, - REQ_TYPE_ATA_PC, - REQ_TYPE_ATA_SENSE, /* sense request */ - REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */ - REQ_TYPE_ATA_PM_RESUME, /* resume request */ +/* values for ide_request.type */ +enum ata_priv_type { + ATA_PRIV_MISC, + ATA_PRIV_TASKFILE, + ATA_PRIV_PC, + ATA_PRIV_SENSE, /* sense request */ + ATA_PRIV_PM_SUSPEND, /* suspend request */ + ATA_PRIV_PM_RESUME, /* resume request */ }; -#define ata_pm_request(rq) \ - ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ - (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) +struct ide_request { + struct scsi_request sreq; + u8 sense[SCSI_SENSE_BUFFERSIZE]; + u8 type; +}; + +static inline struct ide_request *ide_req(struct request *rq) +{ + return blk_mq_rq_to_pdu(rq); +} + +static inline bool ata_misc_request(struct request *rq) +{ + return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_MISC; +} + +static inline bool ata_taskfile_request(struct request *rq) +{ + return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_TASKFILE; +} + +static inline bool ata_pc_request(struct request *rq) +{ + return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_PC; +} + +static inline bool ata_sense_request(struct request *rq) +{ + return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_SENSE; +} + +static inline bool ata_pm_request(struct request *rq) +{ + return blk_rq_is_private(rq) && + (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND || + ide_req(rq)->type == ATA_PRIV_PM_RESUME); +} /* Error codes returned in rq->errors to the higher part of the driver. */ enum { @@ -579,7 +615,7 @@ struct ide_drive_s { /* current sense rq and buffer */ bool sense_rq_armed; - struct request sense_rq; + struct request *sense_rq; struct request_sense sense_data; }; diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7c273bb..ca45e4a 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -80,8 +80,6 @@ struct nvm_dev_ops { unsigned int max_phys_sect; }; - - #ifdef CONFIG_NVM #include <linux/blkdev.h> @@ -109,6 +107,7 @@ enum { NVM_RSP_ERR_FAILWRITE = 0x40ff, NVM_RSP_ERR_EMPTYPAGE = 0x42ff, NVM_RSP_ERR_FAILECC = 0x4281, + NVM_RSP_ERR_FAILCRC = 0x4004, NVM_RSP_WARN_HIGHECC = 0x4700, /* Device opcodes */ @@ -202,11 +201,10 @@ struct nvm_addr_format { struct nvm_id { u8 ver_id; u8 vmnt; - u8 cgrps; u32 cap; u32 dom; struct nvm_addr_format ppaf; - struct nvm_id_group groups[4]; + struct nvm_id_group grp; } __packed; struct nvm_target { @@ -216,10 +214,6 @@ struct nvm_target { struct gendisk *disk; }; -struct nvm_tgt_instance { - struct nvm_tgt_type *tt; -}; - #define ADDR_EMPTY (~0ULL) #define NVM_VERSION_MAJOR 1 @@ -230,7 +224,6 @@ struct nvm_rq; typedef void (nvm_end_io_fn)(struct nvm_rq *); struct nvm_rq { - struct nvm_tgt_instance *ins; struct nvm_tgt_dev *dev; struct bio *bio; @@ -254,6 +247,8 @@ struct nvm_rq { u64 ppa_status; /* ppa media status */ int error; + + void *private; }; static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu) @@ -272,15 +267,6 @@ enum { NVM_BLK_ST_BAD = 0x8, /* Bad block */ }; -/* system block cpu representation */ -struct nvm_sb_info { - unsigned long seqnr; - unsigned long erase_cnt; - unsigned int version; - char mmtype[NVM_MMTYPE_LEN]; - struct ppa_addr fs_ppa; -}; - /* Device generic information */ struct nvm_geo { int nr_chnls; @@ -308,6 +294,7 @@ struct nvm_geo { int sec_per_lun; }; +/* sub-device structure */ struct nvm_tgt_dev { /* Device information */ struct nvm_geo geo; @@ -329,17 +316,10 @@ struct nvm_dev { struct list_head devices; - /* Media manager */ - struct nvmm_type *mt; - void *mp; - - /* System blocks */ - struct nvm_sb_info sb; - /* Device information */ struct nvm_geo geo; - /* lower page table */ + /* lower page table */ int lps_per_blk; int *lptbl; @@ -359,6 +339,10 @@ struct nvm_dev { struct mutex mlock; spinlock_t lock; + + /* target management */ + struct list_head area_list; + struct list_head targets; }; static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, @@ -391,10 +375,10 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, return l; } -static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, - struct ppa_addr r) +static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, + struct ppa_addr r) { - struct nvm_geo *geo = &dev->geo; + struct nvm_geo *geo = &tgt_dev->geo; struct ppa_addr l; l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset; @@ -407,10 +391,10 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, return l; } -static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, - struct ppa_addr r) +static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, + struct ppa_addr r) { - struct nvm_geo *geo = &dev->geo; + struct nvm_geo *geo = &tgt_dev->geo; struct ppa_addr l; l.ppa = 0; @@ -452,15 +436,12 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2) (ppa1.g.blk == ppa2.g.blk)); } -static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg) -{ - return dev->lptbl[slc_pg]; -} - typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); typedef void (nvm_tgt_exit_fn)(void *); +typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); +typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); struct nvm_tgt_type { const char *name; @@ -469,12 +450,15 @@ struct nvm_tgt_type { /* target entry points */ nvm_tgt_make_rq_fn *make_rq; nvm_tgt_capacity_fn *capacity; - nvm_end_io_fn *end_io; /* module-specific init/teardown */ nvm_tgt_init_fn *init; nvm_tgt_exit_fn *exit; + /* sysfs */ + nvm_tgt_sysfs_init_fn *sysfs_init; + nvm_tgt_sysfs_exit_fn *sysfs_exit; + /* For internal use */ struct list_head list; }; @@ -487,103 +471,29 @@ extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); -typedef int (nvmm_register_fn)(struct nvm_dev *); -typedef void (nvmm_unregister_fn)(struct nvm_dev *); - -typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *); -typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *); -typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *); -typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int); -typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t); -typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t); -typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *, - struct ppa_addr, int); -typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int); - -enum { - TRANS_TGT_TO_DEV = 0x0, - TRANS_DEV_TO_TGT = 0x1, -}; - -struct nvmm_type { - const char *name; - unsigned int version[3]; - - nvmm_register_fn *register_mgr; - nvmm_unregister_fn *unregister_mgr; - - nvmm_create_tgt_fn *create_tgt; - nvmm_remove_tgt_fn *remove_tgt; - - nvmm_submit_io_fn *submit_io; - nvmm_erase_blk_fn *erase_blk; - - nvmm_get_area_fn *get_area; - nvmm_put_area_fn *put_area; - - nvmm_trans_ppa_fn *trans_ppa; - nvmm_part_to_tgt_fn *part_to_tgt; - - struct list_head list; -}; - -extern int nvm_register_mgr(struct nvmm_type *); -extern void nvm_unregister_mgr(struct nvmm_type *); - extern struct nvm_dev *nvm_alloc_dev(int); extern int nvm_register(struct nvm_dev *); extern void nvm_unregister(struct nvm_dev *); -extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int); extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); -extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *); -extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *); extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, const struct ppa_addr *, int, int); extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); -extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int); extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); -extern void nvm_end_io(struct nvm_rq *, int); -extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int, - void *, int); -extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int, - int, void *, int); +extern void nvm_end_io(struct nvm_rq *); extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); -extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *); extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); -/* sysblk.c */ -#define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */ - -/* system block on disk representation */ -struct nvm_system_block { - __be32 magic; /* magic signature */ - __be32 seqnr; /* sequence number */ - __be32 erase_cnt; /* erase count */ - __be16 version; /* version number */ - u8 mmtype[NVM_MMTYPE_LEN]; /* media manager name */ - __be64 fs_ppa; /* PPA for media manager - * superblock */ -}; - -extern int nvm_get_sysblock(struct nvm_dev *, struct nvm_sb_info *); -extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *); -extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *); - extern int nvm_dev_factory(struct nvm_dev *, int flags); -#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid) \ - for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls; \ - (chid)++, (ppa).g.ch = (chid)) \ - for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl; \ - (lunid)++, (ppa).g.lun = (lunid)) +extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int); #else /* CONFIG_NVM */ struct nvm_dev_ops; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3d1c6f1..0b676a0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -244,6 +244,7 @@ enum { NVME_CTRL_ONCS_DSM = 1 << 2, NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, NVME_CTRL_VWC_PRESENT = 1 << 0, + NVME_CTRL_OACS_SEC_SUPP = 1 << 0, }; struct nvme_lbaf { @@ -553,6 +554,8 @@ enum { NVME_DSMGMT_AD = 1 << 2, }; +#define NVME_DSM_MAX_RANGES 256 + struct nvme_dsm_range { __le32 cattr; __le32 nlb; diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index f017fd6..d4e0a20 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -259,6 +259,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) unsigned int sbitmap_weight(const struct sbitmap *sb); /** + * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. + * @sb: Bitmap to show. + * @m: struct seq_file to write to. + * + * This is intended for debugging. The format may change at any time. + */ +void sbitmap_show(struct sbitmap *sb, struct seq_file *m); + +/** + * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct + * seq_file. + * @sb: Bitmap to show. + * @m: struct seq_file to write to. + * + * This is intended for debugging. The output isn't guaranteed to be internally + * consistent. + */ +void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m); + +/** * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific * memory node. * @sbq: Bitmap queue to initialize. @@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, */ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); +/** + * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct + * seq_file. + * @sbq: Bitmap queue to show. + * @m: struct seq_file to write to. + * + * This is intended for debugging. The format may change at any time. + */ +void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); + #endif /* __LINUX_SCALE_BITMAP_H */ diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h new file mode 100644 index 0000000..deee23d --- /dev/null +++ b/include/linux/sed-opal.h @@ -0,0 +1,70 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Authors: + * Rafael Antognolli <rafael.antognolli@intel.com> + * Scott Bauer <scott.bauer@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef LINUX_OPAL_H +#define LINUX_OPAL_H + +#include <uapi/linux/sed-opal.h> +#include <linux/kernel.h> + +struct opal_dev; + +typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer, + size_t len, bool send); + +#ifdef CONFIG_BLK_SED_OPAL +bool opal_unlock_from_suspend(struct opal_dev *dev); +struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv); +int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); + +static inline bool is_sed_ioctl(unsigned int cmd) +{ + switch (cmd) { + case IOC_OPAL_SAVE: + case IOC_OPAL_LOCK_UNLOCK: + case IOC_OPAL_TAKE_OWNERSHIP: + case IOC_OPAL_ACTIVATE_LSP: + case IOC_OPAL_SET_PW: + case IOC_OPAL_ACTIVATE_USR: + case IOC_OPAL_REVERT_TPR: + case IOC_OPAL_LR_SETUP: + case IOC_OPAL_ADD_USR_TO_LR: + case IOC_OPAL_ENABLE_DISABLE_MBR: + case IOC_OPAL_ERASE_LR: + case IOC_OPAL_SECURE_ERASE_LR: + return true; + } + return false; +} +#else +static inline bool is_sed_ioctl(unsigned int cmd) +{ + return false; +} + +static inline int sed_ioctl(struct opal_dev *dev, unsigned int cmd, + void __user *ioctl_ptr) +{ + return 0; +} +static inline bool opal_unlock_from_suspend(struct opal_dev *dev) +{ + return false; +} +#define init_opal_dev(data, send_recv) NULL +#endif /* CONFIG_BLK_SED_OPAL */ +#endif /* LINUX_OPAL_H */ diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 9fc1aec..b379f93 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -8,6 +8,7 @@ #include <linux/timer.h> #include <linux/scatterlist.h> #include <scsi/scsi_device.h> +#include <scsi/scsi_request.h> struct Scsi_Host; struct scsi_driver; @@ -57,6 +58,7 @@ struct scsi_pointer { #define SCMD_TAGGED (1 << 0) struct scsi_cmnd { + struct scsi_request req; struct scsi_device *device; struct list_head list; /* scsi_cmnd participates in queue lists */ struct list_head eh_entry; /* entry for the host eh_cmd_q */ @@ -149,7 +151,7 @@ static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd) return cmd + 1; } -/* make sure not to use it with REQ_TYPE_BLOCK_PC commands */ +/* make sure not to use it with passthrough commands */ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) { return *(struct scsi_driver **)cmd->request->rq_disk->private_data; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 36680f1..3cd8c3b 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -551,9 +551,6 @@ struct Scsi_Host { struct list_head __devices; struct list_head __targets; - struct scsi_host_cmd_pool *cmd_pool; - spinlock_t free_list_lock; - struct list_head free_list; /* backup store of cmd structs */ struct list_head starved_list; spinlock_t default_lock; @@ -826,8 +823,6 @@ extern void scsi_block_requests(struct Scsi_Host *); struct class_container; -extern struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, - void (*) (struct request_queue *)); /* * These two functions are used to allocate and free a pseudo device * which will connect to the host adapter itself rather than any diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h new file mode 100644 index 0000000..ba0aeb9 --- /dev/null +++ b/include/scsi/scsi_request.h @@ -0,0 +1,30 @@ +#ifndef _SCSI_SCSI_REQUEST_H +#define _SCSI_SCSI_REQUEST_H + +#include <linux/blk-mq.h> + +#define BLK_MAX_CDB 16 + +struct scsi_request { + unsigned char __cmd[BLK_MAX_CDB]; + unsigned char *cmd; + unsigned short cmd_len; + unsigned int sense_len; + unsigned int resid_len; /* residual count */ + void *sense; +}; + +static inline struct scsi_request *scsi_req(struct request *rq) +{ + return blk_mq_rq_to_pdu(rq); +} + +static inline void scsi_req_free_cmd(struct scsi_request *req) +{ + if (req->cmd != req->__cmd) + kfree(req->cmd); +} + +void scsi_req_init(struct request *); + +#endif /* _SCSI_SCSI_REQUEST_H */ diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h index 8129239..b6e07b5 100644 --- a/include/scsi/scsi_transport.h +++ b/include/scsi/scsi_transport.h @@ -119,4 +119,6 @@ scsi_transport_device_data(struct scsi_device *sdev) + shost->transportt->device_private_offset; } +void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q); + #endif /* SCSI_TRANSPORT_H */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 3e02e3a..a88ed13 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -73,19 +73,17 @@ DECLARE_EVENT_CLASS(block_rq_with_error, __field( unsigned int, nr_sector ) __field( int, errors ) __array( char, rwbs, RWBS_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? - 0 : blk_rq_pos(rq); - __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? - 0 : blk_rq_sectors(rq); + __entry->sector = blk_rq_trace_sector(rq); + __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->errors = rq->errors; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); - blk_dump_cmd(__get_str(cmd), rq); + __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u [%d]", @@ -153,7 +151,7 @@ TRACE_EVENT(block_rq_complete, __field( unsigned int, nr_sector ) __field( int, errors ) __array( char, rwbs, RWBS_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( @@ -163,7 +161,7 @@ TRACE_EVENT(block_rq_complete, __entry->errors = rq->errors; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); - blk_dump_cmd(__get_str(cmd), rq); + __get_str(cmd)[0] = '\0'; ), TP_printk("%d,%d %s (%s) %llu + %u [%d]", @@ -186,20 +184,17 @@ DECLARE_EVENT_CLASS(block_rq, __field( unsigned int, bytes ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + __dynamic_array( char, cmd, 1 ) ), TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? - 0 : blk_rq_pos(rq); - __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? - 0 : blk_rq_sectors(rq); - __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? - blk_rq_bytes(rq) : 0; + __entry->sector = blk_rq_trace_sector(rq); + __entry->nr_sector = blk_rq_trace_nr_sectors(rq); + __entry->bytes = blk_rq_bytes(rq); blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); - blk_dump_cmd(__get_str(cmd), rq); + __get_str(cmd)[0] = '\0'; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index 774a431..fd19f36 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h @@ -122,6 +122,44 @@ struct nvm_ioctl_dev_factory { __u32 flags; }; +struct nvm_user_vio { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nppas; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 ppa_list; + __u32 metadata_len; + __u32 data_len; + __u64 status; + __u32 result; + __u32 rsvd3[3]; +}; + +struct nvm_passthru_vio { + __u8 opcode; + __u8 flags; + __u8 rsvd[2]; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u64 ppa_list; + __u16 nppas; + __u16 control; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u64 status; + __u32 result; + __u32 timeout_ms; +}; + /* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */ enum { /* top level cmds */ @@ -137,6 +175,11 @@ enum { /* Factory reset device */ NVM_DEV_FACTORY_CMD, + + /* Vector user I/O */ + NVM_DEV_VIO_ADMIN_CMD = 0x41, + NVM_DEV_VIO_CMD = 0x42, + NVM_DEV_VIO_USER_CMD = 0x43, }; #define NVM_IOCTL 'L' /* 0x4c */ @@ -154,6 +197,13 @@ enum { #define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \ struct nvm_ioctl_dev_factory) +#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \ + struct nvm_passthru_vio) +#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\ + struct nvm_passthru_vio) +#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\ + struct nvm_user_vio) + #define NVM_VERSION_MAJOR 1 #define NVM_VERSION_MINOR 0 #define NVM_VERSION_PATCHLEVEL 0 diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h new file mode 100644 index 0000000..c72e073 --- /dev/null +++ b/include/uapi/linux/sed-opal.h @@ -0,0 +1,119 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Authors: + * Rafael Antognolli <rafael.antognolli@intel.com> + * Scott Bauer <scott.bauer@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI_SED_OPAL_H +#define _UAPI_SED_OPAL_H + +#include <linux/types.h> + +#define OPAL_KEY_MAX 256 +#define OPAL_MAX_LRS 9 + +enum opal_mbr { + OPAL_MBR_ENABLE = 0x0, + OPAL_MBR_DISABLE = 0x01, +}; + +enum opal_user { + OPAL_ADMIN1 = 0x0, + OPAL_USER1 = 0x01, + OPAL_USER2 = 0x02, + OPAL_USER3 = 0x03, + OPAL_USER4 = 0x04, + OPAL_USER5 = 0x05, + OPAL_USER6 = 0x06, + OPAL_USER7 = 0x07, + OPAL_USER8 = 0x08, + OPAL_USER9 = 0x09, +}; + +enum opal_lock_state { + OPAL_RO = 0x01, /* 0001 */ + OPAL_RW = 0x02, /* 0010 */ + OPAL_LK = 0x04, /* 0100 */ +}; + +struct opal_key { + __u8 lr; + __u8 key_len; + __u8 __align[6]; + __u8 key[OPAL_KEY_MAX]; +}; + +struct opal_lr_act { + struct opal_key key; + __u32 sum; + __u8 num_lrs; + __u8 lr[OPAL_MAX_LRS]; + __u8 align[2]; /* Align to 8 byte boundary */ +}; + +struct opal_session_info { + __u32 sum; + __u32 who; + struct opal_key opal_key; +}; + +struct opal_user_lr_setup { + __u64 range_start; + __u64 range_length; + __u32 RLE; /* Read Lock enabled */ + __u32 WLE; /* Write Lock Enabled */ + struct opal_session_info session; +}; + +struct opal_lock_unlock { + struct opal_session_info session; + __u32 l_state; + __u8 __align[4]; +}; + +struct opal_new_pw { + struct opal_session_info session; + + /* When we're not operating in sum, and we first set + * passwords we need to set them via ADMIN authority. + * After passwords are changed, we can set them via, + * User authorities. + * Because of this restriction we need to know about + * Two different users. One in 'session' which we will use + * to start the session and new_userr_pw as the user we're + * chaning the pw for. + */ + struct opal_session_info new_user_pw; +}; + +struct opal_mbr_data { + struct opal_key key; + __u8 enable_disable; + __u8 __align[7]; +}; + +#define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) +#define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) +#define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) +#define IOC_OPAL_ACTIVATE_LSP _IOW('p', 223, struct opal_lr_act) +#define IOC_OPAL_SET_PW _IOW('p', 224, struct opal_new_pw) +#define IOC_OPAL_ACTIVATE_USR _IOW('p', 225, struct opal_session_info) +#define IOC_OPAL_REVERT_TPR _IOW('p', 226, struct opal_key) +#define IOC_OPAL_LR_SETUP _IOW('p', 227, struct opal_user_lr_setup) +#define IOC_OPAL_ADD_USR_TO_LR _IOW('p', 228, struct opal_lock_unlock) +#define IOC_OPAL_ENABLE_DISABLE_MBR _IOW('p', 229, struct opal_mbr_data) +#define IOC_OPAL_ERASE_LR _IOW('p', 230, struct opal_session_info) +#define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info) + +#endif /* _UAPI_SED_OPAL_H */ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 95cecbf..b2058a7 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -28,6 +28,8 @@ #include <linux/uaccess.h> #include <linux/list.h> +#include "../../block/blk.h" + #include <trace/events/block.h> #include "trace_output.h" @@ -292,9 +294,6 @@ record_it: local_irq_restore(flags); } -static struct dentry *blk_tree_root; -static DEFINE_MUTEX(blk_tree_mutex); - static void blk_trace_free(struct blk_trace *bt) { debugfs_remove(bt->msg_file); @@ -433,9 +432,9 @@ static void blk_trace_setup_lba(struct blk_trace *bt, /* * Setup everything required to start tracing */ -int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct block_device *bdev, - struct blk_user_trace_setup *buts) +static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, + struct blk_user_trace_setup *buts) { struct blk_trace *bt = NULL; struct dentry *dir = NULL; @@ -468,22 +467,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ret = -ENOENT; - mutex_lock(&blk_tree_mutex); - if (!blk_tree_root) { - blk_tree_root = debugfs_create_dir("block", NULL); - if (!blk_tree_root) { - mutex_unlock(&blk_tree_mutex); - goto err; - } - } - mutex_unlock(&blk_tree_mutex); - - dir = debugfs_create_dir(buts->name, blk_tree_root); + if (!blk_debugfs_root) + goto err; + dir = debugfs_lookup(buts->name, blk_debugfs_root); + if (!dir) + bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); if (!dir) goto err; - bt->dir = dir; bt->dev = dev; atomic_set(&bt->dropped, 0); INIT_LIST_HEAD(&bt->running_list); @@ -525,9 +517,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (atomic_inc_return(&blk_probes_ref) == 1) blk_register_tracepoints(); - return 0; + ret = 0; err: - blk_trace_free(bt); + if (dir && !bt->dir) + dput(dir); + if (ret) + blk_trace_free(bt); return ret; } @@ -712,15 +707,13 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (likely(!bt)) return; - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { + if (blk_rq_is_passthrough(rq)) what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, nr_bytes, req_op(rq), rq->cmd_flags, - what, rq->errors, rq->cmd_len, rq->cmd); - } else { + else what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, req_op(rq), - rq->cmd_flags, what, rq->errors, 0, NULL); - } + + __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), + rq->cmd_flags, what, rq->errors, 0, NULL); } static void blk_add_trace_rq_abort(void *ignore, @@ -972,11 +965,7 @@ void blk_add_driver_data(struct request_queue *q, if (likely(!bt)) return; - if (rq->cmd_type == REQ_TYPE_BLOCK_PC) - __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, 0, - BLK_TA_DRV_DATA, rq->errors, len, data); - else - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, 0, + __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, rq->errors, len, data); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1752,31 +1741,6 @@ void blk_trace_remove_sysfs(struct device *dev) #ifdef CONFIG_EVENT_TRACING -void blk_dump_cmd(char *buf, struct request *rq) -{ - int i, end; - int len = rq->cmd_len; - unsigned char *cmd = rq->cmd; - - if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { - buf[0] = '\0'; - return; - } - - for (end = len - 1; end >= 0; end--) - if (cmd[end]) - break; - end++; - - for (i = 0; i < len; i++) { - buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]); - if (i == end && end != len - 1) { - sprintf(buf, " .."); - break; - } - } -} - void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes) { int i = 0; diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 2cecf05..55e11c4 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -17,6 +17,7 @@ #include <linux/random.h> #include <linux/sbitmap.h> +#include <linux/seq_file.h> int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node) @@ -180,6 +181,62 @@ unsigned int sbitmap_weight(const struct sbitmap *sb) } EXPORT_SYMBOL_GPL(sbitmap_weight); +void sbitmap_show(struct sbitmap *sb, struct seq_file *m) +{ + seq_printf(m, "depth=%u\n", sb->depth); + seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); + seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); + seq_printf(m, "map_nr=%u\n", sb->map_nr); +} +EXPORT_SYMBOL_GPL(sbitmap_show); + +static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) +{ + if ((offset & 0xf) == 0) { + if (offset != 0) + seq_putc(m, '\n'); + seq_printf(m, "%08x:", offset); + } + if ((offset & 0x1) == 0) + seq_putc(m, ' '); + seq_printf(m, "%02x", byte); +} + +void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) +{ + u8 byte = 0; + unsigned int byte_bits = 0; + unsigned int offset = 0; + int i; + + for (i = 0; i < sb->map_nr; i++) { + unsigned long word = READ_ONCE(sb->map[i].word); + unsigned int word_bits = READ_ONCE(sb->map[i].depth); + + while (word_bits > 0) { + unsigned int bits = min(8 - byte_bits, word_bits); + + byte |= (word & (BIT(bits) - 1)) << byte_bits; + byte_bits += bits; + if (byte_bits == 8) { + emit_byte(m, offset, byte); + byte = 0; + byte_bits = 0; + offset++; + } + word >>= bits; + word_bits -= bits; + } + } + if (byte_bits) { + emit_byte(m, offset, byte); + offset++; + } + if (offset) + seq_putc(m, '\n'); +} +EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); + static unsigned int sbq_calc_wake_batch(unsigned int depth) { unsigned int wake_batch; @@ -239,7 +296,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { - sbq->wake_batch = sbq_calc_wake_batch(depth); + unsigned int wake_batch = sbq_calc_wake_batch(depth); + int i; + + if (sbq->wake_batch != wake_batch) { + WRITE_ONCE(sbq->wake_batch, wake_batch); + /* + * Pairs with the memory barrier in sbq_wake_up() to ensure that + * the batch size is updated before the wait counts. + */ + smp_mb__before_atomic(); + for (i = 0; i < SBQ_WAIT_QUEUES; i++) + atomic_set(&sbq->ws[i].wait_cnt, 1); + } sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); @@ -297,20 +366,39 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) static void sbq_wake_up(struct sbitmap_queue *sbq) { struct sbq_wait_state *ws; + unsigned int wake_batch; int wait_cnt; - /* Ensure that the wait list checks occur after clear_bit(). */ - smp_mb(); + /* + * Pairs with the memory barrier in set_current_state() to ensure the + * proper ordering of clear_bit()/waitqueue_active() in the waker and + * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See + * the comment on waitqueue_active(). This is __after_atomic because we + * just did clear_bit() in the caller. + */ + smp_mb__after_atomic(); ws = sbq_wake_ptr(sbq); if (!ws) return; wait_cnt = atomic_dec_return(&ws->wait_cnt); - if (unlikely(wait_cnt < 0)) - wait_cnt = atomic_inc_return(&ws->wait_cnt); - if (wait_cnt == 0) { - atomic_add(sbq->wake_batch, &ws->wait_cnt); + if (wait_cnt <= 0) { + wake_batch = READ_ONCE(sbq->wake_batch); + /* + * Pairs with the memory barrier in sbitmap_queue_resize() to + * ensure that we see the batch size update before the wait + * count is reset. + */ + smp_mb__before_atomic(); + /* + * If there are concurrent callers to sbq_wake_up(), the last + * one to decrement the wait count below zero will bump it back + * up. If there is a concurrent resize, the count reset will + * either cause the cmpxchg to fail or overwrite after the + * cmpxchg. + */ + atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); sbq_index_atomic_inc(&sbq->wake_index); wake_up(&ws->wait); } @@ -331,7 +419,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) int i, wake_index; /* - * Make sure all changes prior to this are visible from other CPUs. + * Pairs with the memory barrier in set_current_state() like in + * sbq_wake_up(). */ smp_mb(); wake_index = atomic_read(&sbq->wake_index); @@ -345,3 +434,37 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) } } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); + +void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) +{ + bool first; + int i; + + sbitmap_show(&sbq->sb, m); + + seq_puts(m, "alloc_hint={"); + first = true; + for_each_possible_cpu(i) { + if (!first) + seq_puts(m, ", "); + first = false; + seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); + } + seq_puts(m, "}\n"); + + seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); + seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); + + seq_puts(m, "ws={\n"); + for (i = 0; i < SBQ_WAIT_QUEUES; i++) { + struct sbq_wait_state *ws = &sbq->ws[i]; + + seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", + atomic_read(&ws->wait_cnt), + waitqueue_active(&ws->wait) ? "active" : "inactive"); + } + seq_puts(m, "}\n"); + + seq_printf(m, "round_robin=%d\n", sbq->round_robin); +} +EXPORT_SYMBOL_GPL(sbitmap_queue_show); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 3bfed5ab..39ce616 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -237,6 +237,7 @@ static __init int bdi_class_init(void) bdi_class->dev_groups = bdi_dev_groups; bdi_debug_init(); + return 0; } postcore_initcall(bdi_class_init); @@ -758,15 +759,20 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) if (!bdi->wb_congested) return -ENOMEM; + atomic_set(&bdi->wb_congested->refcnt, 1); + err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (err) { - kfree(bdi->wb_congested); + wb_congested_put(bdi->wb_congested); return err; } return 0; } -static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } +static void cgwb_bdi_destroy(struct backing_dev_info *bdi) +{ + wb_congested_put(bdi->wb_congested); +} #endif /* CONFIG_CGROUP_WRITEBACK */ @@ -776,6 +782,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi->dev = NULL; + kref_init(&bdi->refcnt); bdi->min_ratio = 0; bdi->max_ratio = 100; bdi->max_prop_frac = FPROP_FRAC_BASE; @@ -791,6 +798,22 @@ int bdi_init(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_init); +struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) +{ + struct backing_dev_info *bdi; + + bdi = kmalloc_node(sizeof(struct backing_dev_info), + gfp_mask | __GFP_ZERO, node_id); + if (!bdi) + return NULL; + + if (bdi_init(bdi)) { + kfree(bdi); + return NULL; + } + return bdi; +} + int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...) { @@ -871,12 +894,26 @@ void bdi_unregister(struct backing_dev_info *bdi) } } -void bdi_exit(struct backing_dev_info *bdi) +static void bdi_exit(struct backing_dev_info *bdi) { WARN_ON_ONCE(bdi->dev); wb_exit(&bdi->wb); } +static void release_bdi(struct kref *ref) +{ + struct backing_dev_info *bdi = + container_of(ref, struct backing_dev_info, refcnt); + + bdi_exit(bdi); + kfree(bdi); +} + +void bdi_put(struct backing_dev_info *bdi) +{ + kref_put(&bdi->refcnt, release_bdi); +} + void bdi_destroy(struct backing_dev_info *bdi) { bdi_unregister(bdi); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 290e8b7..2164498 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1988,11 +1988,11 @@ void laptop_mode_timer_fn(unsigned long data) * We want to write everything out, not just down to the dirty * threshold */ - if (!bdi_has_dirty_io(&q->backing_dev_info)) + if (!bdi_has_dirty_io(q->backing_dev_info)) return; rcu_read_lock(); - list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node) + list_for_each_entry_rcu(wb, &q->backing_dev_info->wb_list, bdi_node) if (wb_has_dirty_io(wb)) wb_start_writeback(wb, nr_pages, true, WB_REASON_LAPTOP_TIMER); |