From 5953316dbf90067ebdeca626c34488bc166b73a8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 23 May 2013 12:25:08 +0200 Subject: block: make rq->cmd_flags be 64-bit We have officially run out of flags in a 32-bit space. Extend it to 64-bit even on 32-bit archs. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 04ceb7e..000abe2 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2886,9 +2886,9 @@ static void do_fd_request(struct request_queue *q) return; if (WARN(atomic_read(&usage_count) == 0, - "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n", + "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n", current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, - current_req->cmd_flags)) + (unsigned long long) current_req->cmd_flags)) return; if (test_and_set_bit(0, &fdc_busy)) { -- cgit v1.1 From f2298c0403b0dfcaef637eba0c02c4a06d7a25ab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Oct 2013 11:52:25 +0100 Subject: null_blk: multi queue aware block test driver A driver that simply completes IO it receives, it does no transfers. Written to fascilitate testing of the blk-mq code. It supports various module options to use either bio queueing, rq queueing, or mq mode. Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 3 + drivers/block/Makefile | 1 + drivers/block/null_blk.c | 635 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 639 insertions(+) create mode 100644 drivers/block/null_blk.c (limited to 'drivers/block') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index e07a5fd..4682546 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -15,6 +15,9 @@ menuconfig BLK_DEV if BLK_DEV +config BLK_DEV_NULL_BLK + tristate "Null test block driver" + config BLK_DEV_FD tristate "Normal floppy disk support" depends on ARCH_MAY_HAVE_PC_FDC diff --git a/drivers/block/Makefile b/drivers/block/Makefile index ca07399..03b3b4a 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o nvme-y := nvme-core.o nvme-scsi.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c new file mode 100644 index 0000000..b5d8423 --- /dev/null +++ b/drivers/block/null_blk.c @@ -0,0 +1,635 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nullb_cmd { + struct list_head list; + struct llist_node ll_list; + struct call_single_data csd; + struct request *rq; + struct bio *bio; + unsigned int tag; + struct nullb_queue *nq; +}; + +struct nullb_queue { + unsigned long *tag_map; + wait_queue_head_t wait; + unsigned int queue_depth; + + struct nullb_cmd *cmds; +}; + +struct nullb { + struct list_head list; + unsigned int index; + struct request_queue *q; + struct gendisk *disk; + struct hrtimer timer; + unsigned int queue_depth; + spinlock_t lock; + + struct nullb_queue *queues; + unsigned int nr_queues; +}; + +static LIST_HEAD(nullb_list); +static struct mutex lock; +static int null_major; +static int nullb_indexes; + +struct completion_queue { + struct llist_head list; + struct hrtimer timer; +}; + +/* + * These are per-cpu for now, they will need to be configured by the + * complete_queues parameter and appropriately mapped. + */ +static DEFINE_PER_CPU(struct completion_queue, completion_queues); + +enum { + NULL_IRQ_NONE = 0, + NULL_IRQ_SOFTIRQ = 1, + NULL_IRQ_TIMER = 2, + + NULL_Q_BIO = 0, + NULL_Q_RQ = 1, + NULL_Q_MQ = 2, +}; + +static int submit_queues = 1; +module_param(submit_queues, int, S_IRUGO); +MODULE_PARM_DESC(submit_queues, "Number of submission queues"); + +static int home_node = NUMA_NO_NODE; +module_param(home_node, int, S_IRUGO); +MODULE_PARM_DESC(home_node, "Home node for the device"); + +static int queue_mode = NULL_Q_MQ; +module_param(queue_mode, int, S_IRUGO); +MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)"); + +static int gb = 250; +module_param(gb, int, S_IRUGO); +MODULE_PARM_DESC(gb, "Size in GB"); + +static int bs = 512; +module_param(bs, int, S_IRUGO); +MODULE_PARM_DESC(bs, "Block size (in bytes)"); + +static int nr_devices = 2; +module_param(nr_devices, int, S_IRUGO); +MODULE_PARM_DESC(nr_devices, "Number of devices to register"); + +static int irqmode = NULL_IRQ_SOFTIRQ; +module_param(irqmode, int, S_IRUGO); +MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); + +static int completion_nsec = 10000; +module_param(completion_nsec, int, S_IRUGO); +MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); + +static int hw_queue_depth = 64; +module_param(hw_queue_depth, int, S_IRUGO); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); + +static bool use_per_node_hctx = true; +module_param(use_per_node_hctx, bool, S_IRUGO); +MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true"); + +static void put_tag(struct nullb_queue *nq, unsigned int tag) +{ + clear_bit_unlock(tag, nq->tag_map); + + if (waitqueue_active(&nq->wait)) + wake_up(&nq->wait); +} + +static unsigned int get_tag(struct nullb_queue *nq) +{ + unsigned int tag; + + do { + tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); + if (tag >= nq->queue_depth) + return -1U; + } while (test_and_set_bit_lock(tag, nq->tag_map)); + + return tag; +} + +static void free_cmd(struct nullb_cmd *cmd) +{ + put_tag(cmd->nq, cmd->tag); +} + +static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) +{ + struct nullb_cmd *cmd; + unsigned int tag; + + tag = get_tag(nq); + if (tag != -1U) { + cmd = &nq->cmds[tag]; + cmd->tag = tag; + cmd->nq = nq; + return cmd; + } + + return NULL; +} + +static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) +{ + struct nullb_cmd *cmd; + DEFINE_WAIT(wait); + + cmd = __alloc_cmd(nq); + if (cmd || !can_wait) + return cmd; + + do { + prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); + cmd = __alloc_cmd(nq); + if (cmd) + break; + + io_schedule(); + } while (1); + + finish_wait(&nq->wait, &wait); + return cmd; +} + +static void end_cmd(struct nullb_cmd *cmd) +{ + if (cmd->rq) { + if (queue_mode == NULL_Q_MQ) + blk_mq_end_io(cmd->rq, 0); + else { + INIT_LIST_HEAD(&cmd->rq->queuelist); + blk_end_request_all(cmd->rq, 0); + } + } else if (cmd->bio) + bio_endio(cmd->bio, 0); + + if (queue_mode != NULL_Q_MQ) + free_cmd(cmd); +} + +static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) +{ + struct completion_queue *cq; + struct llist_node *entry; + struct nullb_cmd *cmd; + + cq = &per_cpu(completion_queues, smp_processor_id()); + + while ((entry = llist_del_all(&cq->list)) != NULL) { + do { + cmd = container_of(entry, struct nullb_cmd, ll_list); + end_cmd(cmd); + entry = entry->next; + } while (entry); + } + + return HRTIMER_NORESTART; +} + +static void null_cmd_end_timer(struct nullb_cmd *cmd) +{ + struct completion_queue *cq = &per_cpu(completion_queues, get_cpu()); + + cmd->ll_list.next = NULL; + if (llist_add(&cmd->ll_list, &cq->list)) { + ktime_t kt = ktime_set(0, completion_nsec); + + hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); + } + + put_cpu(); +} + +static void null_softirq_done_fn(struct request *rq) +{ + blk_end_request_all(rq, 0); +} + +#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) + +static void null_ipi_cmd_end_io(void *data) +{ + struct completion_queue *cq; + struct llist_node *entry, *next; + struct nullb_cmd *cmd; + + cq = &per_cpu(completion_queues, smp_processor_id()); + + entry = llist_del_all(&cq->list); + + while (entry) { + next = entry->next; + cmd = llist_entry(entry, struct nullb_cmd, ll_list); + end_cmd(cmd); + entry = next; + } +} + +static void null_cmd_end_ipi(struct nullb_cmd *cmd) +{ + struct call_single_data *data = &cmd->csd; + int cpu = get_cpu(); + struct completion_queue *cq = &per_cpu(completion_queues, cpu); + + cmd->ll_list.next = NULL; + + if (llist_add(&cmd->ll_list, &cq->list)) { + data->func = null_ipi_cmd_end_io; + data->flags = 0; + __smp_call_function_single(cpu, data, 0); + } + + put_cpu(); +} + +#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ + +static inline void null_handle_cmd(struct nullb_cmd *cmd) +{ + /* Complete IO by inline, softirq or timer */ + switch (irqmode) { + case NULL_IRQ_NONE: + end_cmd(cmd); + break; + case NULL_IRQ_SOFTIRQ: +#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) + null_cmd_end_ipi(cmd); +#else + end_cmd(cmd); +#endif + break; + case NULL_IRQ_TIMER: + null_cmd_end_timer(cmd); + break; + } +} + +static struct nullb_queue *nullb_to_queue(struct nullb *nullb) +{ + int index = 0; + + if (nullb->nr_queues != 1) + index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); + + return &nullb->queues[index]; +} + +static void null_queue_bio(struct request_queue *q, struct bio *bio) +{ + struct nullb *nullb = q->queuedata; + struct nullb_queue *nq = nullb_to_queue(nullb); + struct nullb_cmd *cmd; + + cmd = alloc_cmd(nq, 1); + cmd->bio = bio; + + null_handle_cmd(cmd); +} + +static int null_rq_prep_fn(struct request_queue *q, struct request *req) +{ + struct nullb *nullb = q->queuedata; + struct nullb_queue *nq = nullb_to_queue(nullb); + struct nullb_cmd *cmd; + + cmd = alloc_cmd(nq, 0); + if (cmd) { + cmd->rq = req; + req->special = cmd; + return BLKPREP_OK; + } + + return BLKPREP_DEFER; +} + +static void null_request_fn(struct request_queue *q) +{ + struct request *rq; + + while ((rq = blk_fetch_request(q)) != NULL) { + struct nullb_cmd *cmd = rq->special; + + spin_unlock_irq(q->queue_lock); + null_handle_cmd(cmd); + spin_lock_irq(q->queue_lock); + } +} + +static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) +{ + struct nullb_cmd *cmd = rq->special; + + cmd->rq = rq; + cmd->nq = hctx->driver_data; + + null_handle_cmd(cmd); + return BLK_MQ_RQ_QUEUE_OK; +} + +static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) +{ + return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, + hctx_index); +} + +static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) +{ + kfree(hctx); +} + +static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int index) +{ + struct nullb *nullb = data; + struct nullb_queue *nq = &nullb->queues[index]; + + init_waitqueue_head(&nq->wait); + nq->queue_depth = nullb->queue_depth; + nullb->nr_queues++; + hctx->driver_data = nq; + + return 0; +} + +static struct blk_mq_ops null_mq_ops = { + .queue_rq = null_queue_rq, + .map_queue = blk_mq_map_queue, + .init_hctx = null_init_hctx, +}; + +static struct blk_mq_reg null_mq_reg = { + .ops = &null_mq_ops, + .queue_depth = 64, + .cmd_size = sizeof(struct nullb_cmd), + .flags = BLK_MQ_F_SHOULD_MERGE, +}; + +static void null_del_dev(struct nullb *nullb) +{ + list_del_init(&nullb->list); + + del_gendisk(nullb->disk); + if (queue_mode == NULL_Q_MQ) + blk_mq_free_queue(nullb->q); + else + blk_cleanup_queue(nullb->q); + put_disk(nullb->disk); + kfree(nullb); +} + +static int null_open(struct block_device *bdev, fmode_t mode) +{ + return 0; +} + +static void null_release(struct gendisk *disk, fmode_t mode) +{ +} + +static const struct block_device_operations null_fops = { + .owner = THIS_MODULE, + .open = null_open, + .release = null_release, +}; + +static int setup_commands(struct nullb_queue *nq) +{ + struct nullb_cmd *cmd; + int i, tag_size; + + nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); + if (!nq->cmds) + return 1; + + tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; + nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); + if (!nq->tag_map) { + kfree(nq->cmds); + return 1; + } + + for (i = 0; i < nq->queue_depth; i++) { + cmd = &nq->cmds[i]; + INIT_LIST_HEAD(&cmd->list); + cmd->ll_list.next = NULL; + cmd->tag = -1U; + } + + return 0; +} + +static void cleanup_queue(struct nullb_queue *nq) +{ + kfree(nq->tag_map); + kfree(nq->cmds); +} + +static void cleanup_queues(struct nullb *nullb) +{ + int i; + + for (i = 0; i < nullb->nr_queues; i++) + cleanup_queue(&nullb->queues[i]); + + kfree(nullb->queues); +} + +static int setup_queues(struct nullb *nullb) +{ + struct nullb_queue *nq; + int i; + + nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL); + if (!nullb->queues) + return 1; + + nullb->nr_queues = 0; + nullb->queue_depth = hw_queue_depth; + + if (queue_mode == NULL_Q_MQ) + return 0; + + for (i = 0; i < submit_queues; i++) { + nq = &nullb->queues[i]; + init_waitqueue_head(&nq->wait); + nq->queue_depth = hw_queue_depth; + if (setup_commands(nq)) + break; + nullb->nr_queues++; + } + + if (i == submit_queues) + return 0; + + cleanup_queues(nullb); + return 1; +} + +static int null_add_dev(void) +{ + struct gendisk *disk; + struct nullb *nullb; + sector_t size; + + nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); + if (!nullb) + return -ENOMEM; + + spin_lock_init(&nullb->lock); + + if (setup_queues(nullb)) + goto err; + + if (queue_mode == NULL_Q_MQ) { + null_mq_reg.numa_node = home_node; + null_mq_reg.queue_depth = hw_queue_depth; + + if (use_per_node_hctx) { + null_mq_reg.ops->alloc_hctx = null_alloc_hctx; + null_mq_reg.ops->free_hctx = null_free_hctx; + + null_mq_reg.nr_hw_queues = nr_online_nodes; + } else { + null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue; + null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue; + + null_mq_reg.nr_hw_queues = submit_queues; + } + + nullb->q = blk_mq_init_queue(&null_mq_reg, nullb); + } else if (queue_mode == NULL_Q_BIO) { + nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); + blk_queue_make_request(nullb->q, null_queue_bio); + } else { + nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); + blk_queue_prep_rq(nullb->q, null_rq_prep_fn); + if (nullb->q) + blk_queue_softirq_done(nullb->q, null_softirq_done_fn); + } + + if (!nullb->q) + goto queue_fail; + + nullb->q->queuedata = nullb; + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); + + disk = nullb->disk = alloc_disk_node(1, home_node); + if (!disk) { +queue_fail: + if (queue_mode == NULL_Q_MQ) + blk_mq_free_queue(nullb->q); + else + blk_cleanup_queue(nullb->q); + cleanup_queues(nullb); +err: + kfree(nullb); + return -ENOMEM; + } + + mutex_lock(&lock); + list_add_tail(&nullb->list, &nullb_list); + nullb->index = nullb_indexes++; + mutex_unlock(&lock); + + blk_queue_logical_block_size(nullb->q, bs); + blk_queue_physical_block_size(nullb->q, bs); + + size = gb * 1024 * 1024 * 1024ULL; + sector_div(size, bs); + set_capacity(disk, size); + + disk->flags |= GENHD_FL_EXT_DEVT; + disk->major = null_major; + disk->first_minor = nullb->index; + disk->fops = &null_fops; + disk->private_data = nullb; + disk->queue = nullb->q; + sprintf(disk->disk_name, "nullb%d", nullb->index); + add_disk(disk); + return 0; +} + +static int __init null_init(void) +{ + unsigned int i; + +#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS) + if (irqmode == NULL_IRQ_SOFTIRQ) { + pr_warn("null_blk: softirq completions not available.\n"); + pr_warn("null_blk: using direct completions.\n"); + irqmode = NULL_IRQ_NONE; + } +#endif + + if (submit_queues > nr_cpu_ids) + submit_queues = nr_cpu_ids; + else if (!submit_queues) + submit_queues = 1; + + mutex_init(&lock); + + /* Initialize a separate list for each CPU for issuing softirqs */ + for_each_possible_cpu(i) { + struct completion_queue *cq = &per_cpu(completion_queues, i); + + init_llist_head(&cq->list); + + if (irqmode != NULL_IRQ_TIMER) + continue; + + hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cq->timer.function = null_cmd_timer_expired; + } + + null_major = register_blkdev(0, "nullb"); + if (null_major < 0) + return null_major; + + for (i = 0; i < nr_devices; i++) { + if (null_add_dev()) { + unregister_blkdev(null_major, "nullb"); + return -EINVAL; + } + } + + pr_info("null: module loaded\n"); + return 0; +} + +static void __exit null_exit(void) +{ + struct nullb *nullb; + + unregister_blkdev(null_major, "nullb"); + + mutex_lock(&lock); + while (!list_empty(&nullb_list)) { + nullb = list_entry(nullb_list.next, struct nullb, list); + null_del_dev(nullb); + } + mutex_unlock(&lock); +} + +module_init(null_init); +module_exit(null_exit); + +MODULE_AUTHOR("Jens Axboe "); +MODULE_LICENSE("GPL"); -- cgit v1.1 From 3ec981e30fae1f3c8728a05c730acaa1f627bcfb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 14 Oct 2013 12:12:24 -0400 Subject: loop: fix crash if blk_alloc_queue fails loop: fix crash if blk_alloc_queue fails If blk_alloc_queue fails, loop_add cleans up, but it doesn't clean up the identifier allocated with idr_alloc. That causes crash on module unload in idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); where we attempt to remove non-existed device with that id. BUG: unable to handle kernel NULL pointer dereference at 0000000000000380 IP: [] del_gendisk+0x19/0x2d0 PGD 43d399067 PUD 43d0ad067 PMD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: loop(-) dm_snapshot dm_zero dm_mirror dm_region_hash dm_log dm_loop dm_mod ip6table_filter ip6_tables uvesafb cfbcopyarea cfbimgblt cfbfillrect fbcon font bitblit fbcon_rotate fbcon_cw fbcon_ud fbcon_ccw softcursor fb fbdev msr ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc tun ipv6 cpufreq_userspace cpufreq_stats cpufreq_ondemand cpufreq_conservative cpufreq_powersave spadfs fuse hid_generic usbhid hid raid0 md_mod dmi_sysfs nf_nat_ftp nf_nat nf_conntrack_ftp nf_conntrack snd_usb_audio snd_pcm_oss snd_mixer_oss snd_pcm snd_timer snd_page_alloc lm85 hwmon_vid snd_hwdep snd_usbmidi_lib snd_rawmidi snd soundcore acpi_cpufreq ohci_hcd freq_table tg3 ehci_pci mperf ehci_hcd kvm_amd kvm sata_svw serverworks libphy libata ide_core k10temp usbcore hwmon microcode ptp pcspkr pps_core e100 skge mii usb_common i2c_piix4 floppy evdev rtc_cmos i2c_core processor but! ton unix CPU: 7 PID: 2735 Comm: rmmod Tainted: G W 3.10.15-devel #15 Hardware name: empty empty/S3992-E, BIOS 'V1.06 ' 06/09/2009 task: ffff88043d38e780 ti: ffff88043d21e000 task.ti: ffff88043d21e000 RIP: 0010:[] [] del_gendisk+0x19/0x2d0 RSP: 0018:ffff88043d21fe10 EFLAGS: 00010282 RAX: ffffffffa05102e0 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff88043ea82800 RDI: 0000000000000000 RBP: ffff88043d21fe48 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000000 R12: 00000000000000ff R13: 0000000000000080 R14: 0000000000000000 R15: ffff88043ea82800 FS: 00007ff646534700(0000) GS:ffff880447000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000380 CR3: 000000043e9bf000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffff8100aba4 0000000000000092 ffff88043d21fe48 ffff88043ea82800 00000000000000ff ffff88043d21fe98 0000000000000000 ffff88043d21fe60 ffffffffa05102b4 0000000000000000 ffff88043d21fe70 ffffffffa05102ec Call Trace: [] ? native_sched_clock+0x24/0x80 [] loop_remove+0x14/0x40 [loop] [] loop_exit_cb+0xc/0x10 [loop] [] idr_for_each+0x104/0x190 [] ? loop_remove+0x40/0x40 [loop] [] ? trace_hardirqs_on_caller+0x105/0x1d0 [] loop_exit+0x34/0xa58 [loop] [] SyS_delete_module+0x13a/0x260 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x1a/0x1f Code: f0 4c 8b 6d f8 c9 c3 66 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 56 41 55 4c 8d af 80 00 00 00 41 54 53 48 89 fb 48 83 ec 18 <48> 83 bf 80 03 00 00 00 74 4d e8 98 fe ff ff 31 f6 48 c7 c7 20 RIP [] del_gendisk+0x19/0x2d0 RSP CR2: 0000000000000380 ---[ end trace 64ec069ec70f1309 ]--- Signed-off-by: Mikulas Patocka Acked-by: Tejun Heo Cc: stable@kernel.org # 3.1+ Signed-off-by: Jens Axboe --- drivers/block/loop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 40e7155..de7f456 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1633,7 +1633,7 @@ static int loop_add(struct loop_device **l, int i) err = -ENOMEM; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) - goto out_free_dev; + goto out_free_idr; disk = lo->lo_disk = alloc_disk(1 << part_shift); if (!disk) @@ -1678,6 +1678,8 @@ static int loop_add(struct loop_device **l, int i) out_free_queue: blk_cleanup_queue(lo->lo_queue); +out_free_idr: + idr_remove(&loop_index_idr, i); out_free_dev: kfree(lo); out: -- cgit v1.1 From a207f5937630dd35bd2550620bef416937a1365e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 14 Oct 2013 12:13:24 -0400 Subject: block: fix a probe argument to blk_register_region The probe function is supposed to return NULL on failure (as we can see in kobj_lookup: kobj = probe(dev, index, data); ... if (kobj) return kobj; However, in loop and brd, it returns negative error from ERR_PTR. This causes a crash if we simulate disk allocation failure and run less -f /dev/loop0 because the negative number is interpreted as a pointer: BUG: unable to handle kernel NULL pointer dereference at 00000000000002b4 IP: [] __blkdev_get+0x28/0x450 PGD 23c677067 PUD 23d6d1067 PMD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: loop hpfs nvidia(PO) ip6table_filter ip6_tables uvesafb cfbcopyarea cfbimgblt cfbfillrect fbcon font bitblit fbcon_rotate fbcon_cw fbcon_ud fbcon_ccw softcursor fb fbdev msr ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc tun ipv6 cpufreq_stats cpufreq_ondemand cpufreq_userspace cpufreq_powersave cpufreq_conservative hid_generic spadfs usbhid hid fuse raid0 snd_usb_audio snd_pcm_oss snd_mixer_oss md_mod snd_pcm snd_timer snd_page_alloc snd_hwdep snd_usbmidi_lib dmi_sysfs snd_rawmidi nf_nat_ftp nf_nat nf_conntrack_ftp nf_conntrack snd soundcore lm85 hwmon_vid ohci_hcd ehci_pci ehci_hcd serverworks sata_svw libata acpi_cpufreq freq_table mperf ide_core usbcore kvm_amd kvm tg3 i2c_piix4 libphy microcode e100 usb_common ptp skge i2c_core pcspkr k10temp evdev floppy hwmon pps_core mii rtc_cmos button processor unix [last unloaded: nvidia] CPU: 1 PID: 6831 Comm: less Tainted: P W O 3.10.15-devel #18 Hardware name: empty empty/S3992-E, BIOS 'V1.06 ' 06/09/2009 task: ffff880203cc6bc0 ti: ffff88023e47c000 task.ti: ffff88023e47c000 RIP: 0010:[] [] __blkdev_get+0x28/0x450 RSP: 0018:ffff88023e47dbd8 EFLAGS: 00010286 RAX: ffffffffffffff74 RBX: ffffffffffffff74 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001 RBP: ffff88023e47dc18 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff88023f519658 R13: ffffffff8118c300 R14: 0000000000000000 R15: ffff88023f519640 FS: 00007f2070bf7700(0000) GS:ffff880247400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000002b4 CR3: 000000023da1d000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: 0000000000000002 0000001d00000000 000000003e47dc50 ffff88023f519640 ffff88043d5bb668 ffffffff8118c300 ffff88023d683550 ffff88023e47de60 ffff88023e47dc98 ffffffff8118c10d 0000001d81605698 0000000000000292 Call Trace: [] ? blkdev_get_by_dev+0x60/0x60 [] blkdev_get+0x1dd/0x370 [] ? blkdev_get_by_dev+0x60/0x60 [] ? _raw_spin_unlock+0x2c/0x50 [] ? blkdev_get_by_dev+0x60/0x60 [] blkdev_open+0x65/0x80 [] do_dentry_open.isra.18+0x23e/0x2f0 [] finish_open+0x34/0x50 [] do_last.isra.62+0x2d2/0xc50 [] path_openat.isra.63+0xb8/0x4d0 [] ? might_fault+0x4e/0xa0 [] do_filp_open+0x40/0x90 [] ? _raw_spin_unlock+0x2c/0x50 [] ? __alloc_fd+0xa5/0x1f0 [] do_sys_open+0xef/0x1d0 [] SyS_open+0x19/0x20 [] system_call_fastpath+0x1a/0x1f Code: 44 00 00 55 48 89 e5 41 57 49 89 ff 41 56 41 89 d6 41 55 41 54 4c 8d 67 18 53 48 83 ec 18 89 75 cc e9 f2 00 00 00 0f 1f 44 00 00 <48> 8b 80 40 03 00 00 48 89 df 4c 8b 68 58 e8 d5 a4 07 00 44 89 RIP [] __blkdev_get+0x28/0x450 RSP CR2: 00000000000002b4 ---[ end trace bb7f32dbf02398dc ]--- The brd change should be backported to stable kernels starting with 2.6.25. The loop change should be backported to stable kernels starting with 2.6.22. Signed-off-by: Mikulas Patocka Acked-by: Tejun Heo Cc: stable@kernel.org # 2.6.22+ Signed-off-by: Jens Axboe --- drivers/block/brd.c | 2 +- drivers/block/loop.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 9bf4371..d91f1a5 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -545,7 +545,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) mutex_lock(&brd_devices_mutex); brd = brd_init_one(MINOR(dev) >> part_shift); - kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM); + kobj = brd ? get_disk(brd->brd_disk) : NULL; mutex_unlock(&brd_devices_mutex); *part = 0; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index de7f456..dbdb88a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1743,7 +1743,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) if (err < 0) err = loop_add(&lo, MINOR(dev) >> part_shift); if (err < 0) - kobj = ERR_PTR(err); + kobj = NULL; else kobj = get_disk(lo->lo_disk); mutex_unlock(&loop_index_mutex); -- cgit v1.1 From 6678d83f18386eb103f8345024e52c5abe61725c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 Aug 2013 11:14:32 -0700 Subject: block: Consolidate duplicated bio_trim() implementations Someone cut and pasted md's md_trim_bio() into xen-blkfront.c. Come on, we should know better than this. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Neil Brown Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 53 +------------------------------------------- 1 file changed, 1 insertion(+), 52 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a4660bb..8d53ed2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1336,57 +1336,6 @@ static int blkfront_probe(struct xenbus_device *dev, return 0; } -/* - * This is a clone of md_trim_bio, used to split a bio into smaller ones - */ -static void trim_bio(struct bio *bio, int offset, int size) -{ - /* 'bio' is a cloned bio which we need to trim to match - * the given offset and size. - * This requires adjusting bi_sector, bi_size, and bi_io_vec - */ - int i; - struct bio_vec *bvec; - int sofar = 0; - - size <<= 9; - if (offset == 0 && size == bio->bi_size) - return; - - bio->bi_sector += offset; - bio->bi_size = size; - offset <<= 9; - clear_bit(BIO_SEG_VALID, &bio->bi_flags); - - while (bio->bi_idx < bio->bi_vcnt && - bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { - /* remove this whole bio_vec */ - offset -= bio->bi_io_vec[bio->bi_idx].bv_len; - bio->bi_idx++; - } - if (bio->bi_idx < bio->bi_vcnt) { - bio->bi_io_vec[bio->bi_idx].bv_offset += offset; - bio->bi_io_vec[bio->bi_idx].bv_len -= offset; - } - /* avoid any complications with bi_idx being non-zero*/ - if (bio->bi_idx) { - memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, - (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); - bio->bi_vcnt -= bio->bi_idx; - bio->bi_idx = 0; - } - /* Make sure vcnt and last bv are not too big */ - bio_for_each_segment(bvec, bio, i) { - if (sofar + bvec->bv_len > size) - bvec->bv_len = size - sofar; - if (bvec->bv_len == 0) { - bio->bi_vcnt = i; - break; - } - sofar += bvec->bv_len; - } -} - static void split_bio_end(struct bio *bio, int error) { struct split_bio *split_bio = bio->bi_private; @@ -1522,7 +1471,7 @@ static int blkif_recover(struct blkfront_info *info) (unsigned int)(bio->bi_size >> 9) - offset); cloned_bio = bio_clone(bio, GFP_NOIO); BUG_ON(cloned_bio == NULL); - trim_bio(cloned_bio, offset, size); + bio_trim(cloned_bio, offset, size); cloned_bio->bi_private = split_bio; cloned_bio->bi_end_io = split_bio_end; submit_bio(cloned_bio->bi_rw, cloned_bio); -- cgit v1.1