diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 21 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/ataflop.c | 12 | ||||
-rw-r--r-- | drivers/block/brd.c | 48 | ||||
-rw-r--r-- | drivers/block/cciss.c | 42 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nla.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 27 | ||||
-rw-r--r-- | drivers/block/floppy.c | 6 | ||||
-rw-r--r-- | drivers/block/loop.c | 38 | ||||
-rw-r--r-- | drivers/block/loop.h | 1 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 527 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.h | 11 | ||||
-rw-r--r-- | drivers/block/nbd.c | 73 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 11 | ||||
-rw-r--r-- | drivers/block/osdblk.c | 693 | ||||
-rw-r--r-- | drivers/block/paride/pd.c | 7 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 2 | ||||
-rw-r--r-- | drivers/block/rbd.c | 368 | ||||
-rw-r--r-- | drivers/block/swim3.c | 4 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 29 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 36 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 579 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.h | 6 |
24 files changed, 883 insertions, 1663 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ebe8c1a..8ddc982 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -219,7 +219,7 @@ config BLK_DEV_LOOP To use the loop device, you need the losetup utility, found in the util-linux package, see - <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>. + <https://www.kernel.org/pub/linux/utils/util-linux/>. The loop device driver can also be used to "hide" a file system in a disk partition, floppy, or regular file, either using encryption @@ -312,22 +312,6 @@ config BLK_DEV_SKD Use device /dev/skd$N amd /dev/skd$Np$M. -config BLK_DEV_OSD - tristate "OSD object-as-blkdev support" - depends on SCSI_OSD_ULD - ---help--- - Saying Y or M here will allow the exporting of a single SCSI - OSD (object-based storage) object as a Linux block device. - - For example, if you create a 2G object on an OSD device, - you can then use this module to present that 2G object as - a Linux block device. - - To compile this driver as a module, choose M here: the - module will be called osdblk. - - If unsure, say N. - config BLK_DEV_SX8 tristate "Promise SATA SX8 support" depends on PCI @@ -339,6 +323,7 @@ config BLK_DEV_SX8 config BLK_DEV_RAM tristate "RAM block device support" + select DAX if BLK_DEV_RAM_DAX ---help--- Saying Y here will allow you to use a portion of your RAM memory as a block device, so that you can make file systems on it, read and @@ -495,7 +480,7 @@ config VIRTIO_BLK_SCSI Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on virtio-blk devices. This is only supported for the legacy virtio protocol and not enabled by default by any hypervisor. - Your probably want to virtio-scsi instead. + You probably want to use virtio-scsi instead. config BLK_DEV_RBD tristate "Rados block device (RBD)" diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 5ceead8..ec8c368 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_SKD) += skd.o -obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 2104b1b..fa69ecd 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -617,12 +617,12 @@ static void fd_error( void ) if (!fd_request) return; - fd_request->errors++; - if (fd_request->errors >= MAX_ERRORS) { + fd_request->error_count++; + if (fd_request->error_count >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); fd_end_request_cur(-EIO); } - else if (fd_request->errors == RECALIBRATE_ERRORS) { + else if (fd_request->error_count == RECALIBRATE_ERRORS) { printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); if (SelectedDrive != -1) SUD.track = -1; @@ -1386,7 +1386,7 @@ static void setup_req_params( int drive ) ReqData = ReqBuffer + 512 * ReqCnt; if (UseTrackbuffer) - read_track = (ReqCmd == READ && fd_request->errors == 0); + read_track = (ReqCmd == READ && fd_request->error_count == 0); else read_track = 0; @@ -1409,8 +1409,10 @@ static struct request *set_next_request(void) fdc_queue = 0; if (q) { rq = blk_fetch_request(q); - if (rq) + if (rq) { + rq->error_count = 0; break; + } } } while (fdc_queue != old_pos); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 4ec84d5..57b574f 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #ifdef CONFIG_BLK_DEV_RAM_DAX #include <linux/pfn_t.h> +#include <linux/dax.h> #endif #include <linux/uaccess.h> @@ -41,6 +42,9 @@ struct brd_device { struct request_queue *brd_queue; struct gendisk *brd_disk; +#ifdef CONFIG_BLK_DEV_RAM_DAX + struct dax_device *dax_dev; +#endif struct list_head brd_list; /* @@ -326,30 +330,38 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, } #ifdef CONFIG_BLK_DEV_RAM_DAX -static long brd_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, pfn_t *pfn, long size) +static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) { - struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; if (!brd) return -ENODEV; - page = brd_insert_page(brd, sector); + page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512); if (!page) return -ENOSPC; *kaddr = page_address(page); *pfn = page_to_pfn_t(page); - return PAGE_SIZE; + return 1; } -#else -#define brd_direct_access NULL + +static long brd_dax_direct_access(struct dax_device *dax_dev, + pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) +{ + struct brd_device *brd = dax_get_private(dax_dev); + + return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn); +} + +static const struct dax_operations brd_dax_ops = { + .direct_access = brd_dax_direct_access, +}; #endif static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, .rw_page = brd_rw_page, - .direct_access = brd_direct_access, }; /* @@ -415,9 +427,6 @@ static struct brd_device *brd_alloc(int i) * is harmless) */ blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE); -#ifdef CONFIG_BLK_DEV_RAM_DAX - queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); -#endif disk = brd->brd_disk = alloc_disk(max_part); if (!disk) goto out_free_queue; @@ -430,8 +439,21 @@ static struct brd_device *brd_alloc(int i) sprintf(disk->disk_name, "ram%d", i); set_capacity(disk, rd_size * 2); +#ifdef CONFIG_BLK_DEV_RAM_DAX + queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue); + brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops); + if (!brd->dax_dev) + goto out_free_inode; +#endif + + return brd; +#ifdef CONFIG_BLK_DEV_RAM_DAX +out_free_inode: + kill_dax(brd->dax_dev); + put_dax(brd->dax_dev); +#endif out_free_queue: blk_cleanup_queue(brd->brd_queue); out_free_dev: @@ -471,6 +493,10 @@ out: static void brd_del_one(struct brd_device *brd) { list_del(&brd->brd_list); +#ifdef CONFIG_BLK_DEV_RAM_DAX + kill_dax(brd->dax_dev); + put_dax(brd->dax_dev); +#endif del_gendisk(brd->brd_disk); brd_free(brd); } diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 8e1a455..cd37550 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1864,8 +1864,7 @@ static void cciss_softirq_done(struct request *rq) /* set the residual count for pc requests */ if (blk_rq_is_passthrough(rq)) scsi_req(rq)->resid_len = c->err_info->ResidualCnt; - - blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO); + blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0); spin_lock_irqsave(&h->lock, flags); cmd_free(h, c); @@ -3140,18 +3139,19 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, { int retry_cmd = 0; struct request *rq = cmd->rq; + struct scsi_request *sreq = scsi_req(rq); - rq->errors = 0; + sreq->result = 0; if (timeout) - rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT); + sreq->result = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT); if (cmd->err_info->CommandStatus == 0) /* no error has occurred */ goto after_error_processing; switch (cmd->err_info->CommandStatus) { case CMD_TARGET_STATUS: - rq->errors = evaluate_target_status(h, cmd, &retry_cmd); + sreq->result = evaluate_target_status(h, cmd, &retry_cmd); break; case CMD_DATA_UNDERRUN: if (!blk_rq_is_passthrough(cmd->rq)) { @@ -3169,7 +3169,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_INVALID: dev_warn(&h->pdev->dev, "cciss: cmd %p is " "reported invalid\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3177,7 +3177,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_PROTOCOL_ERR: dev_warn(&h->pdev->dev, "cciss: cmd %p has " "protocol error\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3185,7 +3185,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_HARDWARE_ERR: dev_warn(&h->pdev->dev, "cciss: cmd %p had " " hardware error\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3193,7 +3193,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_CONNECTION_LOST: dev_warn(&h->pdev->dev, "cciss: cmd %p had " "connection lost\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3201,7 +3201,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_ABORTED: dev_warn(&h->pdev->dev, "cciss: cmd %p was " "aborted\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); @@ -3209,7 +3209,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, case CMD_ABORT_FAILED: dev_warn(&h->pdev->dev, "cciss: cmd %p reports " "abort failed\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3224,21 +3224,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, } else dev_warn(&h->pdev->dev, "%p retried too many times\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT); break; case CMD_TIMEOUT: dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); break; case CMD_UNABORTABLE: dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3247,7 +3247,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd, dev_warn(&h->pdev->dev, "cmd %p returned " "unknown status %x\n", cmd, cmd->err_info->CommandStatus); - rq->errors = make_status_bytes(SAM_STAT_GOOD, + sreq->result = make_status_bytes(SAM_STAT_GOOD, cmd->err_info->CommandStatus, DRIVER_OK, blk_rq_is_passthrough(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR); @@ -3380,9 +3380,9 @@ static void do_cciss_request(struct request_queue *q) if (dma_mapping_error(&h->pdev->dev, temp64.val)) { dev_warn(&h->pdev->dev, "%s: error mapping page for DMA\n", __func__); - creq->errors = make_status_bytes(SAM_STAT_GOOD, - 0, DRIVER_OK, - DID_SOFT_ERROR); + scsi_req(creq)->result = + make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK, + DID_SOFT_ERROR); cmd_free(h, c); return; } @@ -3395,9 +3395,9 @@ static void do_cciss_request(struct request_queue *q) if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex], (seg - (h->max_cmd_sgentries - 1)) * sizeof(SGDescriptor_struct))) { - creq->errors = make_status_bytes(SAM_STAT_GOOD, - 0, DRIVER_OK, - DID_SOFT_ERROR); + scsi_req(creq)->result = + make_status_bytes(SAM_STAT_GOOD, 0, DRIVER_OK, + DID_SOFT_ERROR); cmd_free(h, c); return; } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index dece26f..a804a41 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -409,7 +409,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN); if (!new_pages) { new_pages = __vmalloc(bytes, - GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO, + GFP_NOIO | __GFP_ZERO, PAGE_KERNEL); if (!new_pages) return NULL; diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c index b2d4791..6bf806d 100644 --- a/drivers/block/drbd/drbd_nla.c +++ b/drivers/block/drbd/drbd_nla.c @@ -34,7 +34,7 @@ int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, err = drbd_nla_check_mandatory(maxtype, nla); if (!err) - err = nla_parse_nested(tb, maxtype, nla, policy); + err = nla_parse_nested(tb, maxtype, nla, policy, NULL); return err; } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b5730e1..6566243 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -315,24 +315,32 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) } /* still holds resource->req_lock */ -static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) +static void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) { struct drbd_device *device = req->device; D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED)); + if (!put) + return; + if (!atomic_sub_and_test(put, &req->completion_ref)) - return 0; + return; drbd_req_complete(req, m); + /* local completion may still come in later, + * we need to keep the req object around. */ + if (req->rq_state & RQ_LOCAL_ABORTED) + return; + if (req->rq_state & RQ_POSTPONED) { /* don't destroy the req object just yet, * but queue it for retry */ drbd_restart_request(req); - return 0; + return; } - return 1; + kref_put(&req->kref, drbd_req_destroy); } static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) @@ -519,12 +527,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (req->i.waiting) wake_up(&device->misc_wait); - if (c_put) { - if (drbd_req_put_completion_ref(req, m, c_put)) - kref_put(&req->kref, drbd_req_destroy); - } else { - kref_put(&req->kref, drbd_req_destroy); - } + drbd_req_put_completion_ref(req, m, c_put); + kref_put(&req->kref, drbd_req_destroy); } static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) @@ -1366,8 +1370,7 @@ nodata: } out: - if (drbd_req_put_completion_ref(req, &m, 1)) - kref_put(&req->kref, drbd_req_destroy); + drbd_req_put_completion_ref(req, &m, 1); spin_unlock_irq(&resource->req_lock); /* Even though above is a kref_put(), this is safe. diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index ce102ec..60d4c76 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2805,8 +2805,10 @@ static int set_next_request(void) fdc_queue = 0; if (q) { current_req = blk_fetch_request(q); - if (current_req) + if (current_req) { + current_req->error_count = 0; break; + } } } while (fdc_queue != old_pos); @@ -2866,7 +2868,7 @@ do_request: _floppy = floppy_type + DP->autodetect[DRS->probed_format]; } else probing = 0; - errors = &(current_req->errors); + errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3081d83..ebbd0c3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -445,32 +445,27 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq) return ret; } -static inline void handle_partial_read(struct loop_cmd *cmd, long bytes) +static void lo_complete_rq(struct request *rq) { - if (bytes < 0 || op_is_write(req_op(cmd->rq))) - return; + struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); - if (unlikely(bytes < blk_rq_bytes(cmd->rq))) { + if (unlikely(req_op(cmd->rq) == REQ_OP_READ && cmd->use_aio && + cmd->ret >= 0 && cmd->ret < blk_rq_bytes(cmd->rq))) { struct bio *bio = cmd->rq->bio; - bio_advance(bio, bytes); + bio_advance(bio, cmd->ret); zero_fill_bio(bio); } + + blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0); } static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); - struct request *rq = cmd->rq; - handle_partial_read(cmd, ret); - - if (ret > 0) - ret = 0; - else if (ret < 0) - ret = -EIO; - - blk_mq_complete_request(rq, ret); + cmd->ret = ret; + blk_mq_complete_request(cmd->rq); } static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, @@ -613,6 +608,9 @@ static int loop_switch(struct loop_device *lo, struct file *file) */ static int loop_flush(struct loop_device *lo) { + /* loop not yet configured, no running thread, nothing to flush */ + if (lo->lo_state != Lo_bound) + return 0; return loop_switch(lo, NULL); } @@ -1688,8 +1686,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd) ret = do_req_filebacked(lo, cmd->rq); failed: /* complete non-aio request */ - if (!cmd->use_aio || ret) - blk_mq_complete_request(cmd->rq, ret ? -EIO : 0); + if (!cmd->use_aio || ret) { + cmd->ret = ret ? -EIO : 0; + blk_mq_complete_request(cmd->rq); + } } static void loop_queue_work(struct kthread_work *work) @@ -1700,9 +1700,8 @@ static void loop_queue_work(struct kthread_work *work) loop_handle_cmd(cmd); } -static int loop_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int loop_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); @@ -1715,6 +1714,7 @@ static int loop_init_request(void *data, struct request *rq, static const struct blk_mq_ops loop_mq_ops = { .queue_rq = loop_queue_rq, .init_request = loop_init_request, + .complete = lo_complete_rq, }; static int loop_add(struct loop_device **l, int i) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index fb2237c..fecd3f9 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -70,6 +70,7 @@ struct loop_cmd { struct request *rq; struct list_head list; bool use_aio; /* use AIO interface to handle I/O */ + long ret; struct kiocb iocb; }; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 05e3e66..3a779a4 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -169,6 +169,25 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) return false; /* device present */ } +/* we have to use runtime tag to setup command header */ +static void mtip_init_cmd_header(struct request *rq) +{ + struct driver_data *dd = rq->q->queuedata; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; + + /* Point the command headers at the command tables. */ + cmd->command_header = dd->port->command_list + + (sizeof(struct mtip_cmd_hdr) * rq->tag); + cmd->command_header_dma = dd->port->command_list_dma + + (sizeof(struct mtip_cmd_hdr) * rq->tag); + + if (host_cap_64) + cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); + + cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); +} + static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { struct request *rq; @@ -176,72 +195,22 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) if (mtip_check_surprise_removal(dd->pdev)) return NULL; - rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED); + rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED); if (IS_ERR(rq)) return NULL; - return blk_mq_rq_to_pdu(rq); -} - -static void mtip_put_int_command(struct driver_data *dd, struct mtip_cmd *cmd) -{ - blk_put_request(blk_mq_rq_from_pdu(cmd)); -} - -/* - * Once we add support for one hctx per mtip group, this will change a bit - */ -static struct request *mtip_rq_from_tag(struct driver_data *dd, - unsigned int tag) -{ - struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; + /* Internal cmd isn't submitted via .queue_rq */ + mtip_init_cmd_header(rq); - return blk_mq_tag_to_rq(hctx->tags, tag); + return blk_mq_rq_to_pdu(rq); } static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd, unsigned int tag) { - struct request *rq = mtip_rq_from_tag(dd, tag); - - return blk_mq_rq_to_pdu(rq); -} - -/* - * IO completion function. - * - * This completion function is called by the driver ISR when a - * command that was issued by the kernel completes. It first calls the - * asynchronous completion function which normally calls back into the block - * layer passing the asynchronous callback data, then unmaps the - * scatter list associated with the completed command, and finally - * clears the allocated bit associated with the completed command. - * - * @port Pointer to the port data structure. - * @tag Tag of the command. - * @data Pointer to driver_data. - * @status Completion status. - * - * return value - * None - */ -static void mtip_async_complete(struct mtip_port *port, - int tag, struct mtip_cmd *cmd, int status) -{ - struct driver_data *dd = port->dd; - struct request *rq; - - if (unlikely(!dd) || unlikely(!port)) - return; - - if (unlikely(status == PORT_IRQ_TF_ERR)) { - dev_warn(&port->dd->pdev->dev, - "Command tag %d failed due to TFE\n", tag); - } - - rq = mtip_rq_from_tag(dd, tag); + struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; - blk_mq_complete_request(rq, status); + return blk_mq_rq_to_pdu(blk_mq_tag_to_rq(hctx->tags, tag)); } /* @@ -558,43 +527,19 @@ static void print_tags(struct driver_data *dd, "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap); } -/* - * Internal command completion callback function. - * - * This function is normally called by the driver ISR when an internal - * command completed. This function signals the command completion by - * calling complete(). - * - * @port Pointer to the port data structure. - * @tag Tag of the command that has completed. - * @data Pointer to a completion structure. - * @status Completion status. - * - * return value - * None - */ -static void mtip_completion(struct mtip_port *port, - int tag, struct mtip_cmd *command, int status) -{ - struct completion *waiting = command->comp_data; - if (unlikely(status == PORT_IRQ_TF_ERR)) - dev_warn(&port->dd->pdev->dev, - "Internal command %d completed with TFE\n", tag); - - command->comp_func = NULL; - command->comp_data = NULL; - complete(waiting); -} - -static void mtip_null_completion(struct mtip_port *port, - int tag, struct mtip_cmd *command, int status) -{ -} - static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, dma_addr_t buffer_dma, unsigned int sectors); static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, struct smart_attr *attrib); + +static void mtip_complete_command(struct mtip_cmd *cmd, int status) +{ + struct request *req = blk_mq_rq_from_pdu(cmd); + + cmd->status = status; + blk_mq_complete_request(req); +} + /* * Handle an error. * @@ -623,11 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd) if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); - - if (cmd->comp_data && cmd->comp_func) { - cmd->comp_func(port, MTIP_TAG_INTERNAL, - cmd, PORT_IRQ_TF_ERR); - } + mtip_complete_command(cmd, -EIO); return; } @@ -654,19 +595,9 @@ static void mtip_handle_tfe(struct driver_data *dd) continue; cmd = mtip_cmd_from_tag(dd, tag); - if (likely(cmd->comp_func)) { - set_bit(tag, tagaccum); - cmd_cnt++; - cmd->comp_func(port, tag, cmd, 0); - } else { - dev_err(&port->dd->pdev->dev, - "Missing completion func for tag %d", - tag); - if (mtip_check_surprise_removal(dd->pdev)) { - /* don't proceed further */ - return; - } - } + mtip_complete_command(cmd, 0); + set_bit(tag, tagaccum); + cmd_cnt++; } } @@ -736,10 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd) tag, fail_reason != NULL ? fail_reason : "unknown"); - if (cmd->comp_func) { - cmd->comp_func(port, tag, - cmd, -ENODATA); - } + mtip_complete_command(cmd, -ENODATA); continue; } } @@ -762,12 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd) dev_warn(&port->dd->pdev->dev, "retiring tag %d\n", tag); - if (cmd->comp_func) - cmd->comp_func(port, tag, cmd, PORT_IRQ_TF_ERR); - else - dev_warn(&port->dd->pdev->dev, - "Bad completion for tag %d\n", - tag); + mtip_complete_command(cmd, -EIO); } } print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); @@ -800,18 +723,7 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, continue; command = mtip_cmd_from_tag(dd, tag); - if (likely(command->comp_func)) - command->comp_func(port, tag, command, 0); - else { - dev_dbg(&dd->pdev->dev, - "Null completion for tag %d", - tag); - - if (mtip_check_surprise_removal( - dd->pdev)) { - return; - } - } + mtip_complete_command(command, 0); } completed >>= 1; } @@ -829,16 +741,13 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) struct mtip_port *port = dd->port; struct mtip_cmd *cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); - if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && - (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL))) { - if (cmd->comp_func) { - cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); - return; - } - } + if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && cmd) { + int group = MTIP_TAG_INDEX(MTIP_TAG_INTERNAL); + int status = readl(port->cmd_issue[group]); - return; + if (!(status & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL)))) + mtip_complete_command(cmd, 0); + } } /* @@ -846,7 +755,6 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) */ static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) { - if (unlikely(port_stat & PORT_IRQ_CONNECT)) { dev_warn(&dd->pdev->dev, "Clearing PxSERR.DIAG.x\n"); @@ -973,8 +881,7 @@ static irqreturn_t mtip_irq_handler(int irq, void *instance) static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) { - writel(1 << MTIP_TAG_BIT(tag), - port->cmd_issue[MTIP_TAG_INDEX(tag)]); + writel(1 << MTIP_TAG_BIT(tag), port->cmd_issue[MTIP_TAG_INDEX(tag)]); } static bool mtip_pause_ncq(struct mtip_port *port, @@ -1012,53 +919,53 @@ static bool mtip_pause_ncq(struct mtip_port *port, return false; } +static bool mtip_commands_active(struct mtip_port *port) +{ + unsigned int active; + unsigned int n; + + /* + * Ignore s_active bit 0 of array element 0. + * This bit will always be set + */ + active = readl(port->s_active[0]) & 0xFFFFFFFE; + for (n = 1; n < port->dd->slot_groups; n++) + active |= readl(port->s_active[n]); + + return active != 0; +} + /* * Wait for port to quiesce * * @port Pointer to port data structure * @timeout Max duration to wait (ms) - * @atomic gfp_t flag to indicate blockable context or not * * return value * 0 Success * -EBUSY Commands still active */ -static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout, - gfp_t atomic) +static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) { unsigned long to; - unsigned int n; - unsigned int active = 1; + bool active = true; blk_mq_stop_hw_queues(port->dd->queue); to = jiffies + msecs_to_jiffies(timeout); do { if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && - test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) && - atomic == GFP_KERNEL) { + test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { msleep(20); continue; /* svc thd is actively issuing commands */ } - if (atomic == GFP_KERNEL) - msleep(100); - else { - cpu_relax(); - udelay(100); - } + msleep(100); if (mtip_check_surprise_removal(port->dd->pdev)) goto err_fault; - /* - * Ignore s_active bit 0 of array element 0. - * This bit will always be set - */ - active = readl(port->s_active[0]) & 0xFFFFFFFE; - for (n = 1; n < port->dd->slot_groups; n++) - active |= readl(port->s_active[n]); - + active = mtip_commands_active(port); if (!active) break; } while (time_before(jiffies, to)); @@ -1070,6 +977,13 @@ err_fault: return -EFAULT; } +struct mtip_int_cmd { + int fis_len; + dma_addr_t buffer; + int buf_len; + u32 opts; +}; + /* * Execute an internal command and wait for the completion. * @@ -1094,13 +1008,17 @@ static int mtip_exec_internal_command(struct mtip_port *port, dma_addr_t buffer, int buf_len, u32 opts, - gfp_t atomic, unsigned long timeout) { - struct mtip_cmd_sg *command_sg; - DECLARE_COMPLETION_ONSTACK(wait); struct mtip_cmd *int_cmd; struct driver_data *dd = port->dd; + struct request *rq; + struct mtip_int_cmd icmd = { + .fis_len = fis_len, + .buffer = buffer, + .buf_len = buf_len, + .opts = opts + }; int rv = 0; unsigned long start; @@ -1115,6 +1033,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n"); return -EFAULT; } + rq = blk_mq_rq_from_pdu(int_cmd); + rq->special = &icmd; set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); @@ -1123,135 +1043,60 @@ static int mtip_exec_internal_command(struct mtip_port *port, clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); - if (atomic == GFP_KERNEL) { - if (fis->command != ATA_CMD_STANDBYNOW1) { - /* wait for io to complete if non atomic */ - if (mtip_quiesce_io(port, - MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) { - dev_warn(&dd->pdev->dev, - "Failed to quiesce IO\n"); - mtip_put_int_command(dd, int_cmd); - clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); - wake_up_interruptible(&port->svc_wait); - return -EBUSY; - } + if (fis->command != ATA_CMD_STANDBYNOW1) { + /* wait for io to complete if non atomic */ + if (mtip_quiesce_io(port, MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) { + dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n"); + blk_mq_free_request(rq); + clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); + wake_up_interruptible(&port->svc_wait); + return -EBUSY; } - - /* Set the completion function and data for the command. */ - int_cmd->comp_data = &wait; - int_cmd->comp_func = mtip_completion; - - } else { - /* Clear completion - we're going to poll */ - int_cmd->comp_data = NULL; - int_cmd->comp_func = mtip_null_completion; } /* Copy the command to the command table */ memcpy(int_cmd->command, fis, fis_len*4); - /* Populate the SG list */ - int_cmd->command_header->opts = - __force_bit2int cpu_to_le32(opts | fis_len); - if (buf_len) { - command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ; - - command_sg->info = - __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF); - command_sg->dba = - __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF); - command_sg->dba_upper = - __force_bit2int cpu_to_le32((buffer >> 16) >> 16); - - int_cmd->command_header->opts |= - __force_bit2int cpu_to_le32((1 << 16)); - } - - /* Populate the command header */ - int_cmd->command_header->byte_count = 0; - start = jiffies; + rq->timeout = timeout; - /* Issue the command to the hardware */ - mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL); - - if (atomic == GFP_KERNEL) { - /* Wait for the command to complete or timeout. */ - if ((rv = wait_for_completion_interruptible_timeout( - &wait, - msecs_to_jiffies(timeout))) <= 0) { - - if (rv == -ERESTARTSYS) { /* interrupted */ - dev_err(&dd->pdev->dev, - "Internal command [%02X] was interrupted after %u ms\n", - fis->command, - jiffies_to_msecs(jiffies - start)); - rv = -EINTR; - goto exec_ic_exit; - } else if (rv == 0) /* timeout */ - dev_err(&dd->pdev->dev, - "Internal command did not complete [%02X] within timeout of %lu ms\n", - fis->command, timeout); - else - dev_err(&dd->pdev->dev, - "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", - fis->command, rv, timeout); - - if (mtip_check_surprise_removal(dd->pdev) || - test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &dd->dd_flag)) { - dev_err(&dd->pdev->dev, - "Internal command [%02X] wait returned due to SR\n", - fis->command); - rv = -ENXIO; - goto exec_ic_exit; - } - mtip_device_reset(dd); /* recover from timeout issue */ - rv = -EAGAIN; + /* insert request and run queue */ + blk_execute_rq(rq->q, NULL, rq, true); + + rv = int_cmd->status; + if (rv < 0) { + if (rv == -ERESTARTSYS) { /* interrupted */ + dev_err(&dd->pdev->dev, + "Internal command [%02X] was interrupted after %u ms\n", + fis->command, + jiffies_to_msecs(jiffies - start)); + rv = -EINTR; goto exec_ic_exit; - } - } else { - u32 hba_stat, port_stat; - - /* Spin for <timeout> checking if command still outstanding */ - timeout = jiffies + msecs_to_jiffies(timeout); - while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL)) - && time_before(jiffies, timeout)) { - if (mtip_check_surprise_removal(dd->pdev)) { - rv = -ENXIO; - goto exec_ic_exit; - } - if ((fis->command != ATA_CMD_STANDBYNOW1) && - test_bit(MTIP_DDF_REMOVE_PENDING_BIT, - &dd->dd_flag)) { - rv = -ENXIO; - goto exec_ic_exit; - } - port_stat = readl(port->mmio + PORT_IRQ_STAT); - if (!port_stat) - continue; + } else if (rv == 0) /* timeout */ + dev_err(&dd->pdev->dev, + "Internal command did not complete [%02X] within timeout of %lu ms\n", + fis->command, timeout); + else + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n", + fis->command, rv, timeout); - if (port_stat & PORT_IRQ_ERR) { - dev_err(&dd->pdev->dev, - "Internal command [%02X] failed\n", - fis->command); - mtip_device_reset(dd); - rv = -EIO; - goto exec_ic_exit; - } else { - writel(port_stat, port->mmio + PORT_IRQ_STAT); - hba_stat = readl(dd->mmio + HOST_IRQ_STAT); - if (hba_stat) - writel(hba_stat, - dd->mmio + HOST_IRQ_STAT); - } - break; + if (mtip_check_surprise_removal(dd->pdev) || + test_bit(MTIP_DDF_REMOVE_PENDING_BIT, + &dd->dd_flag)) { + dev_err(&dd->pdev->dev, + "Internal command [%02X] wait returned due to SR\n", + fis->command); + rv = -ENXIO; + goto exec_ic_exit; } + mtip_device_reset(dd); /* recover from timeout issue */ + rv = -EAGAIN; + goto exec_ic_exit; } - if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) - & (1 << MTIP_TAG_INTERNAL)) { + if (readl(port->cmd_issue[MTIP_TAG_INDEX(MTIP_TAG_INTERNAL)]) + & (1 << MTIP_TAG_BIT(MTIP_TAG_INTERNAL))) { rv = -ENXIO; if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { mtip_device_reset(dd); @@ -1260,7 +1105,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, } exec_ic_exit: /* Clear the allocated and active bits for the internal command. */ - mtip_put_int_command(dd, int_cmd); + blk_mq_free_request(rq); clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); if (rv >= 0 && mtip_pause_ncq(port, fis)) { /* NCQ paused */ @@ -1368,7 +1213,6 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) port->identify_dma, sizeof(u16) * ATA_ID_WORDS, 0, - GFP_KERNEL, MTIP_INT_CMD_TIMEOUT_MS) < 0) { rv = -1; @@ -1454,7 +1298,6 @@ static int mtip_standby_immediate(struct mtip_port *port) 0, 0, 0, - GFP_ATOMIC, timeout); dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n", jiffies_to_msecs(jiffies - start)); @@ -1500,7 +1343,6 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, buffer_dma, sectors * ATA_SECT_SIZE, 0, - GFP_ATOMIC, MTIP_INT_CMD_TIMEOUT_MS); } @@ -1535,7 +1377,6 @@ static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer, buffer_dma, ATA_SECT_SIZE, 0, - GFP_ATOMIC, 15000); } @@ -1663,7 +1504,6 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba, dma_addr, ATA_SECT_SIZE, 0, - GFP_KERNEL, MTIP_TRIM_TIMEOUT_MS) < 0) rv = -EIO; @@ -1827,7 +1667,6 @@ static int exec_drive_task(struct mtip_port *port, u8 *command) 0, 0, 0, - GFP_KERNEL, to) < 0) { return -1; } @@ -1923,7 +1762,6 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, (xfer_sz ? dma_addr : 0), (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0), 0, - GFP_KERNEL, to) < 0) { rv = -EFAULT; @@ -2166,7 +2004,6 @@ static int exec_drive_taskfile(struct driver_data *dd, dma_buffer, transfer_size, 0, - GFP_KERNEL, timeout) < 0) { err = -EIO; goto abort; @@ -2423,12 +2260,6 @@ static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, (nents << 16) | 5 | AHCI_CMD_PREFETCH); command->command_header->byte_count = 0; - /* - * Set the completion function and data for the command - * within this layer. - */ - command->comp_data = dd; - command->comp_func = mtip_async_complete; command->direction = dma_dir; /* @@ -2910,18 +2741,19 @@ static void mtip_softirq_done_fn(struct request *rq) if (unlikely(cmd->unaligned)) up(&dd->port->cmd_slot_unal); - blk_mq_end_request(rq, rq->errors); + blk_mq_end_request(rq, cmd->status); } static void mtip_abort_cmd(struct request *req, void *data, bool reserved) { + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); clear_bit(req->tag, dd->port->cmds_to_issue); - req->errors = -EIO; + cmd->status = -EIO; mtip_softirq_done_fn(req); } @@ -3801,12 +3633,53 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, return false; } +static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + struct driver_data *dd = hctx->queue->queuedata; + struct mtip_int_cmd *icmd = rq->special; + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct mtip_cmd_sg *command_sg; + + if (mtip_commands_active(dd->port)) + return BLK_MQ_RQ_QUEUE_BUSY; + + /* Populate the SG list */ + cmd->command_header->opts = + __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len); + if (icmd->buf_len) { + command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ; + + command_sg->info = + __force_bit2int cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF); + command_sg->dba = + __force_bit2int cpu_to_le32(icmd->buffer & 0xFFFFFFFF); + command_sg->dba_upper = + __force_bit2int cpu_to_le32((icmd->buffer >> 16) >> 16); + + cmd->command_header->opts |= + __force_bit2int cpu_to_le32((1 << 16)); + } + + /* Populate the command header */ + cmd->command_header->byte_count = 0; + + blk_mq_start_request(rq); + mtip_issue_non_ncq_command(dd->port, rq->tag); + return BLK_MQ_RQ_QUEUE_OK; +} + static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request *rq = bd->rq; int ret; + mtip_init_cmd_header(rq); + + if (blk_rq_is_passthrough(rq)) + return mtip_issue_reserved_cmd(hctx, rq); + if (unlikely(mtip_check_unal_depth(hctx, rq))) return BLK_MQ_RQ_QUEUE_BUSY; @@ -3816,14 +3689,13 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, if (likely(!ret)) return BLK_MQ_RQ_QUEUE_OK; - rq->errors = ret; return BLK_MQ_RQ_QUEUE_ERROR; } -static void mtip_free_cmd(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx) +static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx) { - struct driver_data *dd = data; + struct driver_data *dd = set->driver_data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); if (!cmd->command) @@ -3833,20 +3705,11 @@ static void mtip_free_cmd(void *data, struct request *rq, cmd->command, cmd->command_dma); } -static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, - unsigned int request_idx, unsigned int numa_node) +static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct driver_data *dd = data; + struct driver_data *dd = set->driver_data; struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; - - /* - * For flush requests, request_idx starts at the end of the - * tag space. Since we don't support FLUSH/FUA, simply return - * 0 as there's nothing to be done. - */ - if (request_idx >= MTIP_MAX_COMMAND_SLOTS) - return 0; cmd->command = dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, &cmd->command_dma, GFP_KERNEL); @@ -3855,17 +3718,6 @@ static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, memset(cmd->command, 0, CMD_DMA_ALLOC_SZ); - /* Point the command headers at the command tables. */ - cmd->command_header = dd->port->command_list + - (sizeof(struct mtip_cmd_hdr) * request_idx); - cmd->command_header_dma = dd->port->command_list_dma + - (sizeof(struct mtip_cmd_hdr) * request_idx); - - if (host_cap_64) - cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); - - cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); - sg_init_table(cmd->sg, MTIP_MAX_SG); return 0; } @@ -3875,8 +3727,12 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, { struct driver_data *dd = req->q->queuedata; - if (reserved) - goto exit_handler; + if (reserved) { + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); + + cmd->status = -ETIME; + return BLK_EH_HANDLED; + } if (test_bit(req->tag, dd->port->cmds_to_issue)) goto exit_handler; @@ -4103,18 +3959,10 @@ protocol_init_error: static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) { - struct driver_data *dd = (struct driver_data *)data; - struct mtip_cmd *cmd; - - if (likely(!reserv)) - blk_mq_complete_request(rq, -ENODEV); - else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) { + struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); - if (cmd->comp_func) - cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, - cmd, -ENODEV); - } + cmd->status = -ENODEV; + blk_mq_complete_request(rq); } /* @@ -4153,8 +4001,7 @@ static int mtip_block_remove(struct driver_data *dd) * Explicitly wait here for IOs to quiesce, * as mtip_standby_drive usually won't wait for IOs. */ - if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS, - GFP_KERNEL)) + if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS)) mtip_standby_drive(dd); } else diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 7617888..37b8e3e 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -333,16 +333,6 @@ struct mtip_cmd { dma_addr_t command_dma; /* corresponding physical address */ - void *comp_data; /* data passed to completion function comp_func() */ - /* - * Completion function called by the ISR upon completion of - * a command. - */ - void (*comp_func)(struct mtip_port *port, - int tag, - struct mtip_cmd *cmd, - int status); - int scatter_ents; /* Number of scatter list entries used */ int unaligned; /* command is unaligned on 4k boundary */ @@ -352,6 +342,7 @@ struct mtip_cmd { int retries; /* The number of retries left for this command. */ int direction; /* Data transfer direction */ + int status; }; /* Structure used to describe a port. */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b78f23c..f3f191b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/fs.h> #include <linux/bio.h> #include <linux/stat.h> @@ -115,6 +116,7 @@ struct nbd_cmd { int index; int cookie; struct completion send_complete; + int status; }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -244,16 +246,14 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, nbd_size_update(nbd); } -static void nbd_end_request(struct nbd_cmd *cmd) +static void nbd_complete_rq(struct request *req) { - struct nbd_device *nbd = cmd->nbd; - struct request *req = blk_mq_rq_from_pdu(cmd); - int error = req->errors ? -EIO : 0; + struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); - dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd, - error ? "failed" : "done"); + dev_dbg(nbd_to_dev(cmd->nbd), "request %p: %s\n", cmd, + cmd->status ? "failed" : "done"); - blk_mq_complete_request(req, error); + blk_mq_end_request(req, cmd->status); } /* @@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, struct nbd_config *config; if (!refcount_inc_not_zero(&nbd->config_refs)) { - req->errors = -EIO; + cmd->status = -EIO; return BLK_EH_HANDLED; } @@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, "Connection timed out\n"); } set_bit(NBD_TIMEDOUT, &config->runtime_flags); - req->errors = -EIO; + cmd->status = -EIO; sock_shutdown(nbd); nbd_config_put(nbd); @@ -348,7 +348,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, struct socket *sock = config->socks[index]->sock; int result; struct msghdr msg; - unsigned long pflags = current->flags; + unsigned int noreclaim_flag; if (unlikely(!sock)) { dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -359,7 +359,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, msg.msg_iter = *iter; - current->flags |= PF_MEMALLOC; + noreclaim_flag = memalloc_noreclaim_save(); do { sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; msg.msg_name = NULL; @@ -382,7 +382,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, *sent += result; } while (msg_data_left(&msg)); - tsk_restore_flags(current, pflags, PF_MEMALLOC); + memalloc_noreclaim_restore(noreclaim_flag); return result; } @@ -574,7 +574,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); - req->errors = -EIO; + cmd->status = -EIO; return cmd; } @@ -599,7 +599,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) */ if (nbd_disconnected(config) || config->num_connections <= 1) { - req->errors = -EIO; + cmd->status = -EIO; return cmd; } return ERR_PTR(-EIO); @@ -636,7 +636,7 @@ static void recv_work(struct work_struct *work) break; } - nbd_end_request(cmd); + blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); } atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); @@ -651,8 +651,8 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved) if (!blk_mq_request_started(req)) return; cmd = blk_mq_rq_to_pdu(req); - req->errors = -EIO; - nbd_end_request(cmd); + cmd->status = -EIO; + blk_mq_complete_request(req); } static void nbd_clear_que(struct nbd_device *nbd) @@ -740,7 +740,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) nbd_config_put(nbd); return -EINVAL; } - req->errors = 0; + cmd->status = 0; again: nsock = config->socks[index]; mutex_lock(&nsock->tx_lock); @@ -937,14 +937,6 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) return -ENOSPC; } -/* Reset all properties of an NBD device */ -static void nbd_reset(struct nbd_device *nbd) -{ - nbd->config = NULL; - nbd->tag_set.timeout = 0; - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); -} - static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) @@ -1029,7 +1021,11 @@ static void nbd_config_put(struct nbd_device *nbd) } kfree(config->socks); } - nbd_reset(nbd); + kfree(nbd->config); + nbd->config = NULL; + + nbd->tag_set.timeout = 0; + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); mutex_unlock(&nbd->config_lock); nbd_put(nbd); @@ -1397,17 +1393,17 @@ static void nbd_dbg_close(void) #endif -static int nbd_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd->nbd = data; + cmd->nbd = set->driver_data; return 0; } static const struct blk_mq_ops nbd_mq_ops = { .queue_rq = nbd_queue_rq, + .complete = nbd_complete_rq, .init_request = nbd_init_request, .timeout = nbd_xmit_timeout, }; @@ -1469,6 +1465,8 @@ static int nbd_dev_add(int index) queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); disk->queue->limits.discard_granularity = 512; blk_queue_max_discard_sectors(disk->queue, UINT_MAX); + blk_queue_max_segment_size(disk->queue, UINT_MAX); + blk_queue_max_segments(disk->queue, USHRT_MAX); blk_queue_max_hw_sectors(disk->queue, 65536); disk->queue->limits.max_sectors = 256; @@ -1481,7 +1479,6 @@ static int nbd_dev_add(int index) disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); - nbd_reset(nbd); add_disk(disk); nbd_total_devices++; return index; @@ -1660,7 +1657,7 @@ again: goto out; } ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, - nbd_sock_policy); + nbd_sock_policy, info->extack); if (ret != 0) { printk(KERN_ERR "nbd: error processing sock list\n"); ret = -EINVAL; @@ -1816,7 +1813,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) goto out; } ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, - nbd_sock_policy); + nbd_sock_policy, info->extack); if (ret != 0) { printk(KERN_ERR "nbd: error processing sock list\n"); ret = -EINVAL; @@ -2088,7 +2085,6 @@ static int nbd_exit_cb(int id, void *ptr, void *data) struct list_head *list = (struct list_head *)data; struct nbd_device *nbd = ptr; - refcount_inc(&nbd->refs); list_add_tail(&nbd->list, list); return 0; } @@ -2104,11 +2100,12 @@ static void __exit nbd_cleanup(void) idr_for_each(&nbd_index_idr, &nbd_exit_cb, &del_list); mutex_unlock(&nbd_index_mutex); - list_for_each_entry(nbd, &del_list, list) { - if (refcount_read(&nbd->refs) != 2) + while (!list_empty(&del_list)) { + nbd = list_first_entry(&del_list, struct nbd_device, list); + list_del_init(&nbd->list); + if (refcount_read(&nbd->refs) != 1) printk(KERN_ERR "nbd: possibly leaking a device\n"); nbd_put(nbd); - nbd_put(nbd); } idr_destroy(&nbd_index_idr); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index f93906f..d946e1e 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -281,7 +281,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) case NULL_IRQ_SOFTIRQ: switch (queue_mode) { case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq, cmd->rq->errors); + blk_mq_complete_request(cmd->rq); break; case NULL_Q_RQ: blk_complete_request(cmd->rq); @@ -443,14 +443,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) if (IS_ERR(rq)) return -ENOMEM; - rq->__sector = bio->bi_iter.bi_sector; - rq->ioprio = bio_prio(bio); - - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; + blk_init_request_from_bio(rq, bio); rq->end_io_data = rqd; diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c deleted file mode 100644 index 8127b82..0000000 --- a/drivers/block/osdblk.c +++ /dev/null @@ -1,693 +0,0 @@ - -/* - osdblk.c -- Export a single SCSI OSD object as a Linux block device - - - Copyright 2009 Red Hat, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. If not, write to - the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - - Instructions for use - -------------------- - - 1) Map a Linux block device to an existing OSD object. - - In this example, we will use partition id 1234, object id 5678, - OSD device /dev/osd1. - - $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add - - - 2) List all active blkdev<->object mappings. - - In this example, we have performed step #1 twice, creating two blkdevs, - mapped to two separate OSD objects. - - $ cat /sys/class/osdblk/list - 0 174 1234 5678 /dev/osd1 - 1 179 1994 897123 /dev/osd0 - - The columns, in order, are: - - blkdev unique id - - blkdev assigned major - - OSD object partition id - - OSD object id - - OSD device - - - 3) Remove an active blkdev<->object mapping. - - In this example, we remove the mapping with blkdev unique id 1. - - $ echo 1 > /sys/class/osdblk/remove - - - NOTE: The actual creation and deletion of OSD objects is outside the scope - of this driver. - - */ - -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <scsi/osd_initiator.h> -#include <scsi/osd_attributes.h> -#include <scsi/osd_sec.h> -#include <scsi/scsi_device.h> - -#define DRV_NAME "osdblk" -#define PFX DRV_NAME ": " - -/* #define _OSDBLK_DEBUG */ -#ifdef _OSDBLK_DEBUG -#define OSDBLK_DEBUG(fmt, a...) \ - printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a) -#else -#define OSDBLK_DEBUG(fmt, a...) \ - do { if (0) printk(fmt, ##a); } while (0) -#endif - -MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); -MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko"); -MODULE_LICENSE("GPL"); - -struct osdblk_device; - -enum { - OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ - OSDBLK_MAX_REQ = 32, /* max parallel requests */ - OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ -}; - -struct osdblk_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct osdblk_device *osdev; /* associated blkdev */ -}; - -struct osdblk_device { - int id; /* blkdev unique id */ - - int major; /* blkdev assigned major */ - struct gendisk *disk; /* blkdev's gendisk and rq */ - struct request_queue *q; - - struct osd_dev *osd; /* associated OSD */ - - char name[32]; /* blkdev name, e.g. osdblk34 */ - - spinlock_t lock; /* queue lock */ - - struct osd_obj_id obj; /* OSD partition, obj id */ - uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ - - struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ - - struct list_head node; - - char osd_path[0]; /* OSD device path */ -}; - -static struct class *class_osdblk; /* /sys/class/osdblk */ -static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ -static LIST_HEAD(osdblkdev_list); - -static const struct block_device_operations osdblk_bd_ops = { - .owner = THIS_MODULE, -}; - -static const struct osd_attr g_attr_logical_length = ATTR_DEF( - OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); - -static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], - const struct osd_obj_id *obj) -{ - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); -} - -/* copied from exofs; move to libosd? */ -/* - * Perform a synchronous OSD operation. copied from exofs; move to libosd? - */ -static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) -{ - int ret; - - or->timeout = timeout; - ret = osd_finalize_request(or, 0, credential, NULL); - if (ret) - return ret; - - ret = osd_execute_request(or); - - /* osd_req_decode_sense(or, ret); */ - return ret; -} - -/* - * Perform an asynchronous OSD operation. copied from exofs; move to libosd? - */ -static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, - void *caller_context, u8 *cred) -{ - int ret; - - ret = osd_finalize_request(or, 0, cred, NULL); - if (ret) - return ret; - - ret = osd_execute_request_async(or, async_done, caller_context); - - return ret; -} - -/* copied from exofs; move to libosd? */ -static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) -{ - struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ - void *iter = NULL; - int nelem; - - do { - nelem = 1; - osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); - if ((cur_attr.attr_page == attr->attr_page) && - (cur_attr.attr_id == attr->attr_id)) { - attr->len = cur_attr.len; - attr->val_ptr = cur_attr.val_ptr; - return 0; - } - } while (iter); - - return -EIO; -} - -static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) -{ - struct osd_request *or; - struct osd_attr attr; - int ret; - - /* start request */ - or = osd_start_request(osdev->osd, GFP_KERNEL); - if (!or) - return -ENOMEM; - - /* create a get-attributes(length) request */ - osd_req_get_attributes(or, &osdev->obj); - - osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); - - /* execute op synchronously */ - ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); - if (ret) - goto out; - - /* extract length from returned attribute info */ - attr = g_attr_logical_length; - ret = extract_attr_from_req(or, &attr); - if (ret) - goto out; - - *size_out = get_unaligned_be64(attr.val_ptr); - -out: - osd_end_request(or); - return ret; - -} - -static void osdblk_osd_complete(struct osd_request *or, void *private) -{ - struct osdblk_request *orq = private; - struct osd_sense_info osi; - int ret = osd_req_decode_sense(or, &osi); - - if (ret) { - ret = -EIO; - OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret); - } - - /* complete OSD request */ - osd_end_request(or); - - /* complete request passed to osdblk by block layer */ - __blk_end_request_all(orq->rq, ret); -} - -static void bio_chain_put(struct bio *chain) -{ - struct bio *tmp; - - while (chain) { - tmp = chain; - chain = chain->bi_next; - - bio_put(tmp); - } -} - -static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) -{ - struct bio *tmp, *new_chain = NULL, *tail = NULL; - - while (old_chain) { - tmp = bio_clone_kmalloc(old_chain, gfpmask); - if (!tmp) - goto err_out; - - tmp->bi_bdev = NULL; - gfpmask &= ~__GFP_DIRECT_RECLAIM; - tmp->bi_next = NULL; - - if (!new_chain) - new_chain = tail = tmp; - else { - tail->bi_next = tmp; - tail = tmp; - } - - old_chain = old_chain->bi_next; - } - - return new_chain; - -err_out: - OSDBLK_DEBUG("bio_chain_clone with err\n"); - bio_chain_put(new_chain); - return NULL; -} - -static void osdblk_rq_fn(struct request_queue *q) -{ - struct osdblk_device *osdev = q->queuedata; - - while (1) { - struct request *rq; - struct osdblk_request *orq; - struct osd_request *or; - struct bio *bio; - bool do_write, do_flush; - - /* peek at request from block layer */ - rq = blk_fetch_request(q); - if (!rq) - break; - - /* deduce our operation (read, write, flush) */ - /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] - * into a clearly defined set of RPC commands: - * read, write, flush, scsi command, power mgmt req, - * driver-specific, etc. - */ - - do_flush = (req_op(rq) == REQ_OP_FLUSH); - do_write = (rq_data_dir(rq) == WRITE); - - if (!do_flush) { /* osd_flush does not use a bio */ - /* a bio clone to be passed down to OSD request */ - bio = bio_chain_clone(rq->bio, GFP_ATOMIC); - if (!bio) - break; - } else - bio = NULL; - - /* alloc internal OSD request, for OSD command execution */ - or = osd_start_request(osdev->osd, GFP_ATOMIC); - if (!or) { - bio_chain_put(bio); - OSDBLK_DEBUG("osd_start_request with err\n"); - break; - } - - orq = &osdev->req[rq->tag]; - orq->rq = rq; - orq->bio = bio; - orq->osdev = osdev; - - /* init OSD command: flush, write or read */ - if (do_flush) - osd_req_flush_object(or, &osdev->obj, - OSD_CDB_FLUSH_ALL, 0, 0); - else if (do_write) - osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, - bio, blk_rq_bytes(rq)); - else - osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, - bio, blk_rq_bytes(rq)); - - OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n", - do_flush ? "flush" : do_write ? - "write" : "read", blk_rq_bytes(rq), - blk_rq_pos(rq) * 512ULL); - - /* begin OSD command execution */ - if (osd_async_op(or, osdblk_osd_complete, orq, - osdev->obj_cred)) { - osd_end_request(or); - blk_requeue_request(q, rq); - bio_chain_put(bio); - OSDBLK_DEBUG("osd_execute_request_async with err\n"); - break; - } - - /* remove the special 'flush' marker, now that the command - * is executing - */ - rq->special = NULL; - } -} - -static void osdblk_free_disk(struct osdblk_device *osdev) -{ - struct gendisk *disk = osdev->disk; - - if (!disk) - return; - - if (disk->flags & GENHD_FL_UP) - del_gendisk(disk); - if (disk->queue) - blk_cleanup_queue(disk->queue); - put_disk(disk); -} - -static int osdblk_init_disk(struct osdblk_device *osdev) -{ - struct gendisk *disk; - struct request_queue *q; - int rc; - u64 obj_size = 0; - - /* contact OSD, request size info about the object being mapped */ - rc = osdblk_get_obj_size(osdev, &obj_size); - if (rc) - return rc; - - /* create gendisk info */ - disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); - if (!disk) - return -ENOMEM; - - sprintf(disk->disk_name, DRV_NAME "%d", osdev->id); - disk->major = osdev->major; - disk->first_minor = 0; - disk->fops = &osdblk_bd_ops; - disk->private_data = osdev; - - /* init rq */ - q = blk_init_queue(osdblk_rq_fn, &osdev->lock); - if (!q) { - put_disk(disk); - return -ENOMEM; - } - - /* switch queue to TCQ mode; allocate tag map */ - rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO); - if (rc) { - blk_cleanup_queue(q); - put_disk(disk); - return rc; - } - - /* Set our limits to the lower device limits, because osdblk cannot - * sleep when allocating a lower-request and therefore cannot be - * bouncing. - */ - blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); - - blk_queue_prep_rq(q, blk_queue_start_tag); - blk_queue_write_cache(q, true, false); - - disk->queue = q; - - q->queuedata = osdev; - - osdev->disk = disk; - osdev->q = q; - - /* finally, announce the disk to the world */ - set_capacity(disk, obj_size / 512ULL); - add_disk(disk); - - printk(KERN_INFO "%s: Added of size 0x%llx\n", - disk->disk_name, (unsigned long long)obj_size); - - return 0; -} - -/******************************************************************** - * /sys/class/osdblk/ - * add map OSD object to blkdev - * remove unmap OSD object - * list show mappings - *******************************************************************/ - -static void class_osdblk_release(struct class *cls) -{ - kfree(cls); -} - -static ssize_t class_osdblk_list(struct class *c, - struct class_attribute *attr, - char *data) -{ - int n = 0; - struct list_head *tmp; - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - struct osdblk_device *osdev; - - osdev = list_entry(tmp, struct osdblk_device, node); - - n += sprintf(data+n, "%d %d %llu %llu %s\n", - osdev->id, - osdev->major, - osdev->obj.partition, - osdev->obj.id, - osdev->osd_path); - } - - mutex_unlock(&ctl_mutex); - return n; -} - -static ssize_t class_osdblk_add(struct class *c, - struct class_attribute *attr, - const char *buf, size_t count) -{ - struct osdblk_device *osdev; - ssize_t rc; - int irc, new_id = 0; - struct list_head *tmp; - - if (!try_module_get(THIS_MODULE)) - return -ENODEV; - - /* new osdblk_device object */ - osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); - if (!osdev) { - rc = -ENOMEM; - goto err_out_mod; - } - - /* static osdblk_device initialization */ - spin_lock_init(&osdev->lock); - INIT_LIST_HEAD(&osdev->node); - - /* generate unique id: find highest unique id, add one */ - - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - struct osdblk_device *osdev; - - osdev = list_entry(tmp, struct osdblk_device, node); - if (osdev->id > new_id) - new_id = osdev->id + 1; - } - - osdev->id = new_id; - - /* add to global list */ - list_add_tail(&osdev->node, &osdblkdev_list); - - mutex_unlock(&ctl_mutex); - - /* parse add command */ - if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, - osdev->osd_path) != 3) { - rc = -EINVAL; - goto err_out_slot; - } - - /* initialize rest of new object */ - sprintf(osdev->name, DRV_NAME "%d", osdev->id); - - /* contact requested OSD */ - osdev->osd = osduld_path_lookup(osdev->osd_path); - if (IS_ERR(osdev->osd)) { - rc = PTR_ERR(osdev->osd); - goto err_out_slot; - } - - /* build OSD credential */ - osdblk_make_credential(osdev->obj_cred, &osdev->obj); - - /* register our block device */ - irc = register_blkdev(0, osdev->name); - if (irc < 0) { - rc = irc; - goto err_out_osd; - } - - osdev->major = irc; - - /* set up and announce blkdev mapping */ - rc = osdblk_init_disk(osdev); - if (rc) - goto err_out_blkdev; - - return count; - -err_out_blkdev: - unregister_blkdev(osdev->major, osdev->name); -err_out_osd: - osduld_put_device(osdev->osd); -err_out_slot: - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - list_del_init(&osdev->node); - mutex_unlock(&ctl_mutex); - - kfree(osdev); -err_out_mod: - OSDBLK_DEBUG("Error adding device %s\n", buf); - module_put(THIS_MODULE); - return rc; -} - -static ssize_t class_osdblk_remove(struct class *c, - struct class_attribute *attr, - const char *buf, - size_t count) -{ - struct osdblk_device *osdev = NULL; - int target_id, rc; - unsigned long ul; - struct list_head *tmp; - - rc = kstrtoul(buf, 10, &ul); - if (rc) - return rc; - - /* convert to int; abort if we lost anything in the conversion */ - target_id = (int) ul; - if (target_id != ul) - return -EINVAL; - - /* remove object from list immediately */ - mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - - list_for_each(tmp, &osdblkdev_list) { - osdev = list_entry(tmp, struct osdblk_device, node); - if (osdev->id == target_id) { - list_del_init(&osdev->node); - break; - } - osdev = NULL; - } - - mutex_unlock(&ctl_mutex); - - if (!osdev) - return -ENOENT; - - /* clean up and free blkdev and associated OSD connection */ - osdblk_free_disk(osdev); - unregister_blkdev(osdev->major, osdev->name); - osduld_put_device(osdev->osd); - kfree(osdev); - - /* release module ref */ - module_put(THIS_MODULE); - - return count; -} - -static struct class_attribute class_osdblk_attrs[] = { - __ATTR(add, 0200, NULL, class_osdblk_add), - __ATTR(remove, 0200, NULL, class_osdblk_remove), - __ATTR(list, 0444, class_osdblk_list, NULL), - __ATTR_NULL -}; - -static int osdblk_sysfs_init(void) -{ - int ret = 0; - - /* - * create control files in sysfs - * /sys/class/osdblk/... - */ - class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); - if (!class_osdblk) - return -ENOMEM; - - class_osdblk->name = DRV_NAME; - class_osdblk->owner = THIS_MODULE; - class_osdblk->class_release = class_osdblk_release; - class_osdblk->class_attrs = class_osdblk_attrs; - - ret = class_register(class_osdblk); - if (ret) { - kfree(class_osdblk); - class_osdblk = NULL; - printk(PFX "failed to create class osdblk\n"); - return ret; - } - - return 0; -} - -static void osdblk_sysfs_cleanup(void) -{ - if (class_osdblk) - class_destroy(class_osdblk); - class_osdblk = NULL; -} - -static int __init osdblk_init(void) -{ - int rc; - - rc = osdblk_sysfs_init(); - if (rc) - return rc; - - return 0; -} - -static void __exit osdblk_exit(void) -{ - osdblk_sysfs_cleanup(); -} - -module_init(osdblk_init); -module_exit(osdblk_exit); - diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index b05e151..7d2402f 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -739,18 +739,15 @@ static int pd_special_command(struct pd_unit *disk, enum action (*func)(struct pd_unit *disk)) { struct request *rq; - int err = 0; rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); rq->special = func; - - err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); - + blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); blk_put_request(rq); - return err; + return 0; } /* kernel glue structures */ diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 66d846b..205b865 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -724,7 +724,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->rq_flags |= RQF_QUIET; blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0); - if (rq->errors) + if (scsi_req(rq)->result) ret = -EIO; out: blk_put_request(rq); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 089ac41..c16f745 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -387,6 +387,7 @@ struct rbd_device { struct rw_semaphore lock_rwsem; enum rbd_lock_state lock_state; + char lock_cookie[32]; struct rbd_client_id owner_cid; struct work_struct acquired_lock_work; struct work_struct released_lock_work; @@ -477,13 +478,6 @@ static int minor_to_rbd_dev_id(int minor) return minor >> RBD_SINGLE_MAJOR_PART_SHIFT; } -static bool rbd_is_lock_supported(struct rbd_device *rbd_dev) -{ - return (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) && - rbd_dev->spec->snap_id == CEPH_NOSNAP && - !rbd_dev->mapping.read_only; -} - static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev) { return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED || @@ -731,7 +725,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) kref_init(&rbdc->kref); INIT_LIST_HEAD(&rbdc->node); - rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); + rbdc->client = ceph_create_client(ceph_opts, rbdc); if (IS_ERR(rbdc->client)) goto out_rbdc; ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ @@ -804,6 +798,7 @@ enum { Opt_read_only, Opt_read_write, Opt_lock_on_read, + Opt_exclusive, Opt_err }; @@ -816,6 +811,7 @@ static match_table_t rbd_opts_tokens = { {Opt_read_write, "read_write"}, {Opt_read_write, "rw"}, /* Alternate spelling */ {Opt_lock_on_read, "lock_on_read"}, + {Opt_exclusive, "exclusive"}, {Opt_err, NULL} }; @@ -823,11 +819,13 @@ struct rbd_options { int queue_depth; bool read_only; bool lock_on_read; + bool exclusive; }; #define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ #define RBD_READ_ONLY_DEFAULT false #define RBD_LOCK_ON_READ_DEFAULT false +#define RBD_EXCLUSIVE_DEFAULT false static int parse_rbd_opts_token(char *c, void *private) { @@ -866,6 +864,9 @@ static int parse_rbd_opts_token(char *c, void *private) case Opt_lock_on_read: rbd_opts->lock_on_read = true; break; + case Opt_exclusive: + rbd_opts->exclusive = true; + break; default: /* libceph prints "bad option" msg */ return -EINVAL; @@ -1922,7 +1923,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req; - osd_req->r_mtime = CURRENT_TIME; + ktime_get_real_ts(&osd_req->r_mtime); osd_req->r_data_offset = obj_request->offset; } @@ -3079,7 +3080,8 @@ static int rbd_lock(struct rbd_device *rbd_dev) char cookie[32]; int ret; - WARN_ON(__rbd_is_lock_owner(rbd_dev)); + WARN_ON(__rbd_is_lock_owner(rbd_dev) || + rbd_dev->lock_cookie[0] != '\0'); format_lock_cookie(rbd_dev, cookie); ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, @@ -3089,6 +3091,7 @@ static int rbd_lock(struct rbd_device *rbd_dev) return ret; rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED; + strcpy(rbd_dev->lock_cookie, cookie); rbd_set_owner_cid(rbd_dev, &cid); queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work); return 0; @@ -3097,27 +3100,24 @@ static int rbd_lock(struct rbd_device *rbd_dev) /* * lock_rwsem must be held for write */ -static int rbd_unlock(struct rbd_device *rbd_dev) +static void rbd_unlock(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; - char cookie[32]; int ret; - WARN_ON(!__rbd_is_lock_owner(rbd_dev)); - - rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED; + WARN_ON(!__rbd_is_lock_owner(rbd_dev) || + rbd_dev->lock_cookie[0] == '\0'); - format_lock_cookie(rbd_dev, cookie); ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, - RBD_LOCK_NAME, cookie); - if (ret && ret != -ENOENT) { - rbd_warn(rbd_dev, "cls_unlock failed: %d", ret); - return ret; - } + RBD_LOCK_NAME, rbd_dev->lock_cookie); + if (ret && ret != -ENOENT) + rbd_warn(rbd_dev, "failed to unlock: %d", ret); + /* treat errors as the image is unlocked */ + rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED; + rbd_dev->lock_cookie[0] = '\0'; rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work); - return 0; } static int __rbd_notify_op_lock(struct rbd_device *rbd_dev, @@ -3447,6 +3447,18 @@ again: ret = rbd_request_lock(rbd_dev); if (ret == -ETIMEDOUT) { goto again; /* treat this as a dead client */ + } else if (ret == -EROFS) { + rbd_warn(rbd_dev, "peer will not release lock"); + /* + * If this is rbd_add_acquire_lock(), we want to fail + * immediately -- reuse BLACKLISTED flag. Otherwise we + * want to block. + */ + if (!(rbd_dev->disk->flags & GENHD_FL_UP)) { + set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags); + /* wake "rbd map --exclusive" process */ + wake_requests(rbd_dev, false); + } } else if (ret < 0) { rbd_warn(rbd_dev, "error requesting lock: %d", ret); mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, @@ -3490,16 +3502,15 @@ static bool rbd_release_lock(struct rbd_device *rbd_dev) if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) return false; - if (!rbd_unlock(rbd_dev)) - /* - * Give others a chance to grab the lock - we would re-acquire - * almost immediately if we got new IO during ceph_osdc_sync() - * otherwise. We need to ack our own notifications, so this - * lock_dwork will be requeued from rbd_wait_state_locked() - * after wake_requests() in rbd_handle_released_lock(). - */ - cancel_delayed_work(&rbd_dev->lock_dwork); - + rbd_unlock(rbd_dev); + /* + * Give others a chance to grab the lock - we would re-acquire + * almost immediately if we got new IO during ceph_osdc_sync() + * otherwise. We need to ack our own notifications, so this + * lock_dwork will be requeued from rbd_wait_state_locked() + * after wake_requests() in rbd_handle_released_lock(). + */ + cancel_delayed_work(&rbd_dev->lock_dwork); return true; } @@ -3580,12 +3591,16 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v, up_read(&rbd_dev->lock_rwsem); } -static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v, - void **p) +/* + * Returns result for ResponseMessage to be encoded (<= 0), or 1 if no + * ResponseMessage is needed. + */ +static int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v, + void **p) { struct rbd_client_id my_cid = rbd_get_cid(rbd_dev); struct rbd_client_id cid = { 0 }; - bool need_to_send; + int result = 1; if (struct_v >= 2) { cid.gid = ceph_decode_64(p); @@ -3595,19 +3610,36 @@ static bool rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v, dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, cid.handle); if (rbd_cid_equal(&cid, &my_cid)) - return false; + return result; down_read(&rbd_dev->lock_rwsem); - need_to_send = __rbd_is_lock_owner(rbd_dev); - if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) { - if (!rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) { - dout("%s rbd_dev %p queueing unlock_work\n", __func__, - rbd_dev); - queue_work(rbd_dev->task_wq, &rbd_dev->unlock_work); + if (__rbd_is_lock_owner(rbd_dev)) { + if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED && + rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) + goto out_unlock; + + /* + * encode ResponseMessage(0) so the peer can detect + * a missing owner + */ + result = 0; + + if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) { + if (!rbd_dev->opts->exclusive) { + dout("%s rbd_dev %p queueing unlock_work\n", + __func__, rbd_dev); + queue_work(rbd_dev->task_wq, + &rbd_dev->unlock_work); + } else { + /* refuse to release the lock */ + result = -EROFS; + } } } + +out_unlock: up_read(&rbd_dev->lock_rwsem); - return need_to_send; + return result; } static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev, @@ -3690,13 +3722,10 @@ static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie, rbd_acknowledge_notify(rbd_dev, notify_id, cookie); break; case RBD_NOTIFY_OP_REQUEST_LOCK: - if (rbd_handle_request_lock(rbd_dev, struct_v, &p)) - /* - * send ResponseMessage(0) back so the client - * can detect a missing owner - */ + ret = rbd_handle_request_lock(rbd_dev, struct_v, &p); + if (ret <= 0) rbd_acknowledge_notify_result(rbd_dev, notify_id, - cookie, 0); + cookie, ret); else rbd_acknowledge_notify(rbd_dev, notify_id, cookie); break; @@ -3821,24 +3850,51 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev) ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); } +/* + * lock_rwsem must be held for write + */ +static void rbd_reacquire_lock(struct rbd_device *rbd_dev) +{ + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + char cookie[32]; + int ret; + + WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED); + + format_lock_cookie(rbd_dev, cookie); + ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid, + &rbd_dev->header_oloc, RBD_LOCK_NAME, + CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie, + RBD_LOCK_TAG, cookie); + if (ret) { + if (ret != -EOPNOTSUPP) + rbd_warn(rbd_dev, "failed to update lock cookie: %d", + ret); + + /* + * Lock cookie cannot be updated on older OSDs, so do + * a manual release and queue an acquire. + */ + if (rbd_release_lock(rbd_dev)) + queue_delayed_work(rbd_dev->task_wq, + &rbd_dev->lock_dwork, 0); + } else { + strcpy(rbd_dev->lock_cookie, cookie); + } +} + static void rbd_reregister_watch(struct work_struct *work) { struct rbd_device *rbd_dev = container_of(to_delayed_work(work), struct rbd_device, watch_dwork); - bool was_lock_owner = false; - bool need_to_wake = false; int ret; dout("%s rbd_dev %p\n", __func__, rbd_dev); - down_write(&rbd_dev->lock_rwsem); - if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) - was_lock_owner = rbd_release_lock(rbd_dev); - mutex_lock(&rbd_dev->watch_mutex); if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) { mutex_unlock(&rbd_dev->watch_mutex); - goto out; + return; } ret = __rbd_register_watch(rbd_dev); @@ -3846,36 +3902,28 @@ static void rbd_reregister_watch(struct work_struct *work) rbd_warn(rbd_dev, "failed to reregister watch: %d", ret); if (ret == -EBLACKLISTED || ret == -ENOENT) { set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags); - need_to_wake = true; + wake_requests(rbd_dev, true); } else { queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, RBD_RETRY_DELAY); } mutex_unlock(&rbd_dev->watch_mutex); - goto out; + return; } - need_to_wake = true; rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; mutex_unlock(&rbd_dev->watch_mutex); + down_write(&rbd_dev->lock_rwsem); + if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) + rbd_reacquire_lock(rbd_dev); + up_write(&rbd_dev->lock_rwsem); + ret = rbd_dev_refresh(rbd_dev); if (ret) rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret); - - if (was_lock_owner) { - ret = rbd_try_lock(rbd_dev); - if (ret) - rbd_warn(rbd_dev, "reregisteration lock failed: %d", - ret); - } - -out: - up_write(&rbd_dev->lock_rwsem); - if (need_to_wake) - wake_requests(rbd_dev, true); } /* @@ -3975,6 +4023,7 @@ static void rbd_queue_workfn(struct work_struct *work) switch (req_op(rq)) { case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: op_type = OBJ_OP_DISCARD; break; case REQ_OP_WRITE: @@ -4034,10 +4083,6 @@ static void rbd_queue_workfn(struct work_struct *work) if (op_type != OBJ_OP_READ) { snapc = rbd_dev->header.snapc; ceph_get_snap_context(snapc); - must_be_locked = rbd_is_lock_supported(rbd_dev); - } else { - must_be_locked = rbd_dev->opts->lock_on_read && - rbd_is_lock_supported(rbd_dev); } up_read(&rbd_dev->header_rwsem); @@ -4048,14 +4093,20 @@ static void rbd_queue_workfn(struct work_struct *work) goto err_rq; } + must_be_locked = + (rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK) && + (op_type != OBJ_OP_READ || rbd_dev->opts->lock_on_read); if (must_be_locked) { down_read(&rbd_dev->lock_rwsem); if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && - !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) + !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { + if (rbd_dev->opts->exclusive) { + rbd_warn(rbd_dev, "exclusive lock required"); + result = -EROFS; + goto err_unlock; + } rbd_wait_state_locked(rbd_dev); - - WARN_ON((rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) ^ - !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); + } if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { result = -EBLACKLISTED; goto err_unlock; @@ -4114,19 +4165,10 @@ static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx, static void rbd_free_disk(struct rbd_device *rbd_dev) { - struct gendisk *disk = rbd_dev->disk; - - if (!disk) - return; - + blk_cleanup_queue(rbd_dev->disk->queue); + blk_mq_free_tag_set(&rbd_dev->tag_set); + put_disk(rbd_dev->disk); rbd_dev->disk = NULL; - if (disk->flags & GENHD_FL_UP) { - del_gendisk(disk); - if (disk->queue) - blk_cleanup_queue(disk->queue); - blk_mq_free_tag_set(&rbd_dev->tag_set); - } - put_disk(disk); } static int rbd_obj_read_sync(struct rbd_device *rbd_dev, @@ -4307,9 +4349,8 @@ out: return ret; } -static int rbd_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int rbd_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { struct work_struct *work = blk_mq_rq_to_pdu(rq); @@ -4380,12 +4421,17 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_granularity = segment_size; q->limits.discard_alignment = segment_size; blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + /* + * disk_release() expects a queue ref from add_disk() and will + * put it. Hold an extra ref until add_disk() is called. + */ + WARN_ON(!blk_get_queue(q)); disk->queue = q; - q->queuedata = rbd_dev; rbd_dev->disk = disk; @@ -5625,6 +5671,7 @@ static int rbd_add_parse_args(const char *buf, rbd_opts->read_only = RBD_READ_ONLY_DEFAULT; rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; rbd_opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; + rbd_opts->exclusive = RBD_EXCLUSIVE_DEFAULT; copts = ceph_parse_options(options, mon_addrs, mon_addrs + mon_addrs_size - 1, @@ -5683,6 +5730,33 @@ again: return ret; } +static void rbd_dev_image_unlock(struct rbd_device *rbd_dev) +{ + down_write(&rbd_dev->lock_rwsem); + if (__rbd_is_lock_owner(rbd_dev)) + rbd_unlock(rbd_dev); + up_write(&rbd_dev->lock_rwsem); +} + +static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) +{ + if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { + rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); + return -EINVAL; + } + + /* FIXME: "rbd map --exclusive" should be in interruptible */ + down_read(&rbd_dev->lock_rwsem); + rbd_wait_state_locked(rbd_dev); + up_read(&rbd_dev->lock_rwsem); + if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { + rbd_warn(rbd_dev, "failed to acquire exclusive lock"); + return -EROFS; + } + + return 0; +} + /* * An rbd format 2 image has a unique identifier, distinct from the * name given to it by the user. Internally, that identifier is @@ -5874,6 +5948,15 @@ out_err: return ret; } +static void rbd_dev_device_release(struct rbd_device *rbd_dev) +{ + clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); + rbd_dev_mapping_clear(rbd_dev); + rbd_free_disk(rbd_dev); + if (!single_major) + unregister_blkdev(rbd_dev->major, rbd_dev->name); +} + /* * rbd_dev->header_rwsem must be locked for write and will be unlocked * upon return. @@ -5909,26 +5992,13 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); - dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); - ret = device_add(&rbd_dev->dev); + ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); if (ret) goto err_out_mapping; - /* Everything's ready. Announce the disk to the world. */ - set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); up_write(&rbd_dev->header_rwsem); - - spin_lock(&rbd_dev_list_lock); - list_add_tail(&rbd_dev->node, &rbd_dev_list); - spin_unlock(&rbd_dev_list_lock); - - add_disk(rbd_dev->disk); - pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name, - (unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT, - rbd_dev->header.features); - - return ret; + return 0; err_out_mapping: rbd_dev_mapping_clear(rbd_dev); @@ -5963,11 +6033,11 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) static void rbd_dev_image_release(struct rbd_device *rbd_dev) { rbd_dev_unprobe(rbd_dev); + if (rbd_dev->opts) + rbd_unregister_watch(rbd_dev); rbd_dev->image_format = 0; kfree(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; - - rbd_dev_destroy(rbd_dev); } /* @@ -6127,22 +6197,43 @@ static ssize_t do_rbd_add(struct bus_type *bus, rbd_dev->mapping.read_only = read_only; rc = rbd_dev_device_setup(rbd_dev); - if (rc) { - /* - * rbd_unregister_watch() can't be moved into - * rbd_dev_image_release() without refactoring, see - * commit 1f3ef78861ac. - */ - rbd_unregister_watch(rbd_dev); - rbd_dev_image_release(rbd_dev); - goto out; + if (rc) + goto err_out_image_probe; + + if (rbd_dev->opts->exclusive) { + rc = rbd_add_acquire_lock(rbd_dev); + if (rc) + goto err_out_device_setup; } + /* Everything's ready. Announce the disk to the world. */ + + rc = device_add(&rbd_dev->dev); + if (rc) + goto err_out_image_lock; + + add_disk(rbd_dev->disk); + /* see rbd_init_disk() */ + blk_put_queue(rbd_dev->disk->queue); + + spin_lock(&rbd_dev_list_lock); + list_add_tail(&rbd_dev->node, &rbd_dev_list); + spin_unlock(&rbd_dev_list_lock); + + pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name, + (unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT, + rbd_dev->header.features); rc = count; out: module_put(THIS_MODULE); return rc; +err_out_image_lock: + rbd_dev_image_unlock(rbd_dev); +err_out_device_setup: + rbd_dev_device_release(rbd_dev); +err_out_image_probe: + rbd_dev_image_release(rbd_dev); err_out_rbd_dev: rbd_dev_destroy(rbd_dev); err_out_client: @@ -6170,21 +6261,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, return do_rbd_add(bus, buf, count); } -static void rbd_dev_device_release(struct rbd_device *rbd_dev) -{ - rbd_free_disk(rbd_dev); - - spin_lock(&rbd_dev_list_lock); - list_del_init(&rbd_dev->node); - spin_unlock(&rbd_dev_list_lock); - - clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); - device_del(&rbd_dev->dev); - rbd_dev_mapping_clear(rbd_dev); - if (!single_major) - unregister_blkdev(rbd_dev->major, rbd_dev->name); -} - static void rbd_dev_remove_parent(struct rbd_device *rbd_dev) { while (rbd_dev->parent) { @@ -6202,6 +6278,7 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev) } rbd_assert(second); rbd_dev_image_release(second); + rbd_dev_destroy(second); first->parent = NULL; first->parent_overlap = 0; @@ -6270,21 +6347,16 @@ static ssize_t do_rbd_remove(struct bus_type *bus, blk_set_queue_dying(rbd_dev->disk->queue); } - down_write(&rbd_dev->lock_rwsem); - if (__rbd_is_lock_owner(rbd_dev)) - rbd_unlock(rbd_dev); - up_write(&rbd_dev->lock_rwsem); - rbd_unregister_watch(rbd_dev); + del_gendisk(rbd_dev->disk); + spin_lock(&rbd_dev_list_lock); + list_del_init(&rbd_dev->node); + spin_unlock(&rbd_dev_list_lock); + device_del(&rbd_dev->dev); - /* - * Don't free anything from rbd_dev->disk until after all - * notifies are completely processed. Otherwise - * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting - * in a potential use after free of rbd_dev->disk or rbd_dev. - */ + rbd_dev_image_unlock(rbd_dev); rbd_dev_device_release(rbd_dev); rbd_dev_image_release(rbd_dev); - + rbd_dev_destroy(rbd_dev); return count; } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 61b3ffa..ba4809c 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -343,8 +343,8 @@ static void start_request(struct floppy_state *fs) req->rq_disk->disk_name, req->cmd, (long)blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio)); - swim3_dbg(" errors=%d current_nr_sectors=%u\n", - req->errors, blk_rq_cur_sectors(req)); + swim3_dbg(" current_nr_sectors=%u\n", + blk_rq_cur_sectors(req)); #endif if (blk_rq_pos(req) >= fs->total_secs) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2d82901..553cc4c 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -111,7 +111,7 @@ static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr, return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); } -static inline void virtblk_scsi_reques_done(struct request *req) +static inline void virtblk_scsi_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); struct virtio_blk *vblk = req->q->queuedata; @@ -119,7 +119,7 @@ static inline void virtblk_scsi_reques_done(struct request *req) sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); - req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); + sreq->result = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); } static int virtblk_ioctl(struct block_device *bdev, fmode_t mode, @@ -144,7 +144,7 @@ static inline int virtblk_add_req_scsi(struct virtqueue *vq, { return -EIO; } -static inline void virtblk_scsi_reques_done(struct request *req) +static inline void virtblk_scsi_request_done(struct request *req) { } #define virtblk_ioctl NULL @@ -175,19 +175,15 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, static inline void virtblk_request_done(struct request *req) { struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); - int error = virtblk_result(vbr); switch (req_op(req)) { case REQ_OP_SCSI_IN: case REQ_OP_SCSI_OUT: - virtblk_scsi_reques_done(req); - break; - case REQ_OP_DRV_IN: - req->errors = (error != 0); + virtblk_scsi_request_done(req); break; } - blk_mq_end_request(req, error); + blk_mq_end_request(req, virtblk_result(vbr)); } static void virtblk_done(struct virtqueue *vq) @@ -205,7 +201,7 @@ static void virtblk_done(struct virtqueue *vq) while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { struct request *req = blk_mq_rq_from_pdu(vbr); - blk_mq_complete_request(req, req->errors); + blk_mq_complete_request(req); req_done = true; } if (unlikely(virtqueue_is_broken(vq))) @@ -310,7 +306,8 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) if (err) goto out; - err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); + blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); + err = virtblk_result(blk_mq_rq_to_pdu(req)); out: blk_put_request(req); return err; @@ -455,8 +452,7 @@ static int init_vq(struct virtio_blk *vblk) } /* Discover virtqueues and write information to configuration. */ - err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names, - &desc); + err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); if (err) goto out; @@ -576,11 +572,10 @@ static const struct device_attribute dev_attr_cache_type_rw = __ATTR(cache_type, S_IRUGO|S_IWUSR, virtblk_cache_type_show, virtblk_cache_type_store); -static int virtblk_init_request(void *data, struct request *rq, - unsigned int hctx_idx, unsigned int request_idx, - unsigned int numa_node) +static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { - struct virtio_blk *vblk = data; + struct virtio_blk *vblk = set->driver_data; struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); #ifdef CONFIG_VIRTIO_BLK_SCSI diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index abed296..3945963 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -115,6 +115,15 @@ struct split_bio { atomic_t pending; }; +struct blkif_req { + int error; +}; + +static inline struct blkif_req *blkif_req(struct request *rq) +{ + return blk_mq_rq_to_pdu(rq); +} + static DEFINE_MUTEX(blkfront_mutex); static const struct block_device_operations xlvbd_block_fops; @@ -907,8 +916,14 @@ out_busy: return BLK_MQ_RQ_QUEUE_BUSY; } +static void blkif_complete_rq(struct request *rq) +{ + blk_mq_end_request(rq, blkif_req(rq)->error); +} + static const struct blk_mq_ops blkfront_mq_ops = { .queue_rq = blkif_queue_rq, + .complete = blkif_complete_rq, }; static void blkif_set_queue_limits(struct blkfront_info *info) @@ -969,7 +984,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, info->tag_set.queue_depth = BLK_RING_SIZE(info); info->tag_set.numa_node = NUMA_NO_NODE; info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; - info->tag_set.cmd_size = 0; + info->tag_set.cmd_size = sizeof(struct blkif_req); info->tag_set.driver_data = info; if (blk_mq_alloc_tag_set(&info->tag_set)) @@ -1543,7 +1558,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; - int error; if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) return IRQ_HANDLED; @@ -1587,37 +1601,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) continue; } - error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; + blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); queue_flag_clear(QUEUE_FLAG_SECERASE, rq); } - blk_mq_complete_request(req, error); break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && rinfo->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - error = -EOPNOTSUPP; + blkif_req(req)->error = -EOPNOTSUPP; } - if (unlikely(error)) { - if (error == -EOPNOTSUPP) - error = 0; + if (unlikely(blkif_req(req)->error)) { + if (blkif_req(req)->error == -EOPNOTSUPP) + blkif_req(req)->error = 0; info->feature_fua = 0; info->feature_flush = 0; xlvbd_flush(info); @@ -1629,11 +1642,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - blk_mq_complete_request(req, error); break; default: BUG(); } + + blk_mq_complete_request(req); } rinfo->ring.rsp_cons = i; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1710b06..debee95 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -45,6 +45,8 @@ static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +static void zram_free_page(struct zram *zram, size_t index); + static inline bool init_done(struct zram *zram) { return zram->disksize; @@ -55,53 +57,70 @@ static inline struct zram *dev_to_zram(struct device *dev) return (struct zram *)dev_to_disk(dev)->private_data; } +static unsigned long zram_get_handle(struct zram *zram, u32 index) +{ + return zram->table[index].handle; +} + +static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) +{ + zram->table[index].handle = handle; +} + /* flag operations require table entry bit_spin_lock() being held */ -static int zram_test_flag(struct zram_meta *meta, u32 index, +static int zram_test_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - return meta->table[index].value & BIT(flag); + return zram->table[index].value & BIT(flag); } -static void zram_set_flag(struct zram_meta *meta, u32 index, +static void zram_set_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - meta->table[index].value |= BIT(flag); + zram->table[index].value |= BIT(flag); } -static void zram_clear_flag(struct zram_meta *meta, u32 index, +static void zram_clear_flag(struct zram *zram, u32 index, enum zram_pageflags flag) { - meta->table[index].value &= ~BIT(flag); + zram->table[index].value &= ~BIT(flag); } -static inline void zram_set_element(struct zram_meta *meta, u32 index, +static inline void zram_set_element(struct zram *zram, u32 index, unsigned long element) { - meta->table[index].element = element; + zram->table[index].element = element; } -static inline void zram_clear_element(struct zram_meta *meta, u32 index) +static unsigned long zram_get_element(struct zram *zram, u32 index) { - meta->table[index].element = 0; + return zram->table[index].element; } -static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) +static size_t zram_get_obj_size(struct zram *zram, u32 index) { - return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); + return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); } -static void zram_set_obj_size(struct zram_meta *meta, +static void zram_set_obj_size(struct zram *zram, u32 index, size_t size) { - unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; + unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; - meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; + zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; } +#if PAGE_SIZE != 4096 static inline bool is_partial_io(struct bio_vec *bvec) { return bvec->bv_len != PAGE_SIZE; } +#else +static inline bool is_partial_io(struct bio_vec *bvec) +{ + return false; +} +#endif static void zram_revalidate_disk(struct zram *zram) { @@ -137,8 +156,7 @@ static inline bool valid_io_request(struct zram *zram, static void update_position(u32 *index, int *offset, struct bio_vec *bvec) { - if (*offset + bvec->bv_len >= PAGE_SIZE) - (*index)++; + *index += (*offset + bvec->bv_len) / PAGE_SIZE; *offset = (*offset + bvec->bv_len) % PAGE_SIZE; } @@ -177,31 +195,21 @@ static bool page_same_filled(void *ptr, unsigned long *element) { unsigned int pos; unsigned long *page; + unsigned long val; page = (unsigned long *)ptr; + val = page[0]; - for (pos = 0; pos < PAGE_SIZE / sizeof(*page) - 1; pos++) { - if (page[pos] != page[pos + 1]) + for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { + if (val != page[pos]) return false; } - *element = page[pos]; + *element = val; return true; } -static void handle_same_page(struct bio_vec *bvec, unsigned long element) -{ - struct page *page = bvec->bv_page; - void *user_mem; - - user_mem = kmap_atomic(page); - zram_fill_page(user_mem + bvec->bv_offset, bvec->bv_len, element); - kunmap_atomic(user_mem); - - flush_dcache_page(page); -} - static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -254,9 +262,8 @@ static ssize_t mem_used_max_store(struct device *dev, down_read(&zram->init_lock); if (init_done(zram)) { - struct zram_meta *meta = zram->meta; atomic_long_set(&zram->stats.max_used_pages, - zs_get_total_pages(meta->mem_pool)); + zs_get_total_pages(zram->mem_pool)); } up_read(&zram->init_lock); @@ -329,7 +336,6 @@ static ssize_t compact_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta; down_read(&zram->init_lock); if (!init_done(zram)) { @@ -337,8 +343,7 @@ static ssize_t compact_store(struct device *dev, return -EINVAL; } - meta = zram->meta; - zs_compact(meta->mem_pool); + zs_compact(zram->mem_pool); up_read(&zram->init_lock); return len; @@ -375,8 +380,8 @@ static ssize_t mm_stat_show(struct device *dev, down_read(&zram->init_lock); if (init_done(zram)) { - mem_used = zs_get_total_pages(zram->meta->mem_pool); - zs_pool_stats(zram->meta->mem_pool, &pool_stats); + mem_used = zs_get_total_pages(zram->mem_pool); + zs_pool_stats(zram->mem_pool, &pool_stats); } orig_size = atomic64_read(&zram->stats.pages_stored); @@ -417,56 +422,89 @@ static DEVICE_ATTR_RO(io_stat); static DEVICE_ATTR_RO(mm_stat); static DEVICE_ATTR_RO(debug_stat); -static void zram_meta_free(struct zram_meta *meta, u64 disksize) +static void zram_slot_lock(struct zram *zram, u32 index) +{ + bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); +} + +static void zram_slot_unlock(struct zram *zram, u32 index) +{ + bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); +} + +static bool zram_same_page_read(struct zram *zram, u32 index, + struct page *page, + unsigned int offset, unsigned int len) +{ + zram_slot_lock(zram, index); + if (unlikely(!zram_get_handle(zram, index) || + zram_test_flag(zram, index, ZRAM_SAME))) { + void *mem; + + zram_slot_unlock(zram, index); + mem = kmap_atomic(page); + zram_fill_page(mem + offset, len, + zram_get_element(zram, index)); + kunmap_atomic(mem); + return true; + } + zram_slot_unlock(zram, index); + + return false; +} + +static bool zram_same_page_write(struct zram *zram, u32 index, + struct page *page) +{ + unsigned long element; + void *mem = kmap_atomic(page); + + if (page_same_filled(mem, &element)) { + kunmap_atomic(mem); + /* Free memory associated with this sector now. */ + zram_slot_lock(zram, index); + zram_free_page(zram, index); + zram_set_flag(zram, index, ZRAM_SAME); + zram_set_element(zram, index, element); + zram_slot_unlock(zram, index); + + atomic64_inc(&zram->stats.same_pages); + return true; + } + kunmap_atomic(mem); + + return false; +} + +static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; size_t index; /* Free all pages that are still in this zram device */ - for (index = 0; index < num_pages; index++) { - unsigned long handle = meta->table[index].handle; - /* - * No memory is allocated for same element filled pages. - * Simply clear same page flag. - */ - if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) - continue; - - zs_free(meta->mem_pool, handle); - } + for (index = 0; index < num_pages; index++) + zram_free_page(zram, index); - zs_destroy_pool(meta->mem_pool); - vfree(meta->table); - kfree(meta); + zs_destroy_pool(zram->mem_pool); + vfree(zram->table); } -static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) +static bool zram_meta_alloc(struct zram *zram, u64 disksize) { size_t num_pages; - struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); - - if (!meta) - return NULL; num_pages = disksize >> PAGE_SHIFT; - meta->table = vzalloc(num_pages * sizeof(*meta->table)); - if (!meta->table) { - pr_err("Error allocating zram address table\n"); - goto out_error; - } + zram->table = vzalloc(num_pages * sizeof(*zram->table)); + if (!zram->table) + return false; - meta->mem_pool = zs_create_pool(pool_name); - if (!meta->mem_pool) { - pr_err("Error creating memory pool\n"); - goto out_error; + zram->mem_pool = zs_create_pool(zram->disk->disk_name); + if (!zram->mem_pool) { + vfree(zram->table); + return false; } - return meta; - -out_error: - vfree(meta->table); - kfree(meta); - return NULL; + return true; } /* @@ -476,16 +514,15 @@ out_error: */ static void zram_free_page(struct zram *zram, size_t index) { - struct zram_meta *meta = zram->meta; - unsigned long handle = meta->table[index].handle; + unsigned long handle = zram_get_handle(zram, index); /* * No memory is allocated for same element filled pages. * Simply clear same page flag. */ - if (zram_test_flag(meta, index, ZRAM_SAME)) { - zram_clear_flag(meta, index, ZRAM_SAME); - zram_clear_element(meta, index); + if (zram_test_flag(zram, index, ZRAM_SAME)) { + zram_clear_flag(zram, index, ZRAM_SAME); + zram_set_element(zram, index, 0); atomic64_dec(&zram->stats.same_pages); return; } @@ -493,179 +530,111 @@ static void zram_free_page(struct zram *zram, size_t index) if (!handle) return; - zs_free(meta->mem_pool, handle); + zs_free(zram->mem_pool, handle); - atomic64_sub(zram_get_obj_size(meta, index), + atomic64_sub(zram_get_obj_size(zram, index), &zram->stats.compr_data_size); atomic64_dec(&zram->stats.pages_stored); - meta->table[index].handle = 0; - zram_set_obj_size(meta, index, 0); + zram_set_handle(zram, index, 0); + zram_set_obj_size(zram, index, 0); } -static int zram_decompress_page(struct zram *zram, char *mem, u32 index) +static int zram_decompress_page(struct zram *zram, struct page *page, u32 index) { - int ret = 0; - unsigned char *cmem; - struct zram_meta *meta = zram->meta; + int ret; unsigned long handle; unsigned int size; + void *src, *dst; - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); - handle = meta->table[index].handle; - size = zram_get_obj_size(meta, index); - - if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) { - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); - zram_fill_page(mem, PAGE_SIZE, meta->table[index].element); + if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE)) return 0; - } - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); + zram_slot_lock(zram, index); + handle = zram_get_handle(zram, index); + size = zram_get_obj_size(zram, index); + + src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { - copy_page(mem, cmem); + dst = kmap_atomic(page); + memcpy(dst, src, PAGE_SIZE); + kunmap_atomic(dst); + ret = 0; } else { struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); - ret = zcomp_decompress(zstrm, cmem, size, mem); + dst = kmap_atomic(page); + ret = zcomp_decompress(zstrm, src, size, dst); + kunmap_atomic(dst); zcomp_stream_put(zram->comp); } - zs_unmap_object(meta->mem_pool, handle); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zs_unmap_object(zram->mem_pool, handle); + zram_slot_unlock(zram, index); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret)) { + if (unlikely(ret)) pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - return ret; - } - return 0; + return ret; } static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset) + u32 index, int offset) { int ret; struct page *page; - unsigned char *user_mem, *uncmem = NULL; - struct zram_meta *meta = zram->meta; - page = bvec->bv_page; - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); - if (unlikely(!meta->table[index].handle) || - zram_test_flag(meta, index, ZRAM_SAME)) { - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); - handle_same_page(bvec, meta->table[index].element); - return 0; + page = bvec->bv_page; + if (is_partial_io(bvec)) { + /* Use a temporary buffer to decompress the page */ + page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); + if (!page) + return -ENOMEM; } - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); - if (is_partial_io(bvec)) - /* Use a temporary buffer to decompress the page */ - uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); + ret = zram_decompress_page(zram, page, index); + if (unlikely(ret)) + goto out; - user_mem = kmap_atomic(page); - if (!is_partial_io(bvec)) - uncmem = user_mem; + if (is_partial_io(bvec)) { + void *dst = kmap_atomic(bvec->bv_page); + void *src = kmap_atomic(page); - if (!uncmem) { - pr_err("Unable to allocate temp memory\n"); - ret = -ENOMEM; - goto out_cleanup; + memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); + kunmap_atomic(src); + kunmap_atomic(dst); } - - ret = zram_decompress_page(zram, uncmem, index); - /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret)) - goto out_cleanup; - +out: if (is_partial_io(bvec)) - memcpy(user_mem + bvec->bv_offset, uncmem + offset, - bvec->bv_len); + __free_page(page); - flush_dcache_page(page); - ret = 0; -out_cleanup: - kunmap_atomic(user_mem); - if (is_partial_io(bvec)) - kfree(uncmem); return ret; } -static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset) +static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm, + struct page *page, + unsigned long *out_handle, unsigned int *out_comp_len) { - int ret = 0; - unsigned int clen; - unsigned long handle = 0; - struct page *page; - unsigned char *user_mem, *cmem, *src, *uncmem = NULL; - struct zram_meta *meta = zram->meta; - struct zcomp_strm *zstrm = NULL; + int ret; + unsigned int comp_len; + void *src; unsigned long alloced_pages; - unsigned long element; - - page = bvec->bv_page; - if (is_partial_io(bvec)) { - /* - * This is a partial IO. We need to read the full page - * before to write the changes. - */ - uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); - if (!uncmem) { - ret = -ENOMEM; - goto out; - } - ret = zram_decompress_page(zram, uncmem, index); - if (ret) - goto out; - } + unsigned long handle = 0; compress_again: - user_mem = kmap_atomic(page); - if (is_partial_io(bvec)) { - memcpy(uncmem + offset, user_mem + bvec->bv_offset, - bvec->bv_len); - kunmap_atomic(user_mem); - user_mem = NULL; - } else { - uncmem = user_mem; - } - - if (page_same_filled(uncmem, &element)) { - if (user_mem) - kunmap_atomic(user_mem); - /* Free memory associated with this sector now. */ - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); - zram_free_page(zram, index); - zram_set_flag(meta, index, ZRAM_SAME); - zram_set_element(meta, index, element); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); - - atomic64_inc(&zram->stats.same_pages); - ret = 0; - goto out; - } - - zstrm = zcomp_stream_get(zram->comp); - ret = zcomp_compress(zstrm, uncmem, &clen); - if (!is_partial_io(bvec)) { - kunmap_atomic(user_mem); - user_mem = NULL; - uncmem = NULL; - } + src = kmap_atomic(page); + ret = zcomp_compress(*zstrm, src, &comp_len); + kunmap_atomic(src); if (unlikely(ret)) { pr_err("Compression failed! err=%d\n", ret); - goto out; + if (handle) + zs_free(zram->mem_pool, handle); + return ret; } - src = zstrm->buffer; - if (unlikely(clen > max_zpage_size)) { - clen = PAGE_SIZE; - if (is_partial_io(bvec)) - src = uncmem; - } + if (unlikely(comp_len > max_zpage_size)) + comp_len = PAGE_SIZE; /* * handle allocation has 2 paths: @@ -681,71 +650,121 @@ compress_again: * from the slow path and handle has already been allocated. */ if (!handle) - handle = zs_malloc(meta->mem_pool, clen, + handle = zs_malloc(zram->mem_pool, comp_len, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN | __GFP_HIGHMEM | __GFP_MOVABLE); if (!handle) { zcomp_stream_put(zram->comp); - zstrm = NULL; - atomic64_inc(&zram->stats.writestall); - - handle = zs_malloc(meta->mem_pool, clen, + handle = zs_malloc(zram->mem_pool, comp_len, GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE); + *zstrm = zcomp_stream_get(zram->comp); if (handle) goto compress_again; - - pr_err("Error allocating memory for compressed page: %u, size=%u\n", - index, clen); - ret = -ENOMEM; - goto out; + return -ENOMEM; } - alloced_pages = zs_get_total_pages(meta->mem_pool); + alloced_pages = zs_get_total_pages(zram->mem_pool); update_used_max(zram, alloced_pages); if (zram->limit_pages && alloced_pages > zram->limit_pages) { - zs_free(meta->mem_pool, handle); - ret = -ENOMEM; - goto out; + zs_free(zram->mem_pool, handle); + return -ENOMEM; } - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); + *out_handle = handle; + *out_comp_len = comp_len; + return 0; +} + +static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index) +{ + int ret; + unsigned long handle; + unsigned int comp_len; + void *src, *dst; + struct zcomp_strm *zstrm; + struct page *page = bvec->bv_page; - if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { + if (zram_same_page_write(zram, index, page)) + return 0; + + zstrm = zcomp_stream_get(zram->comp); + ret = zram_compress(zram, &zstrm, page, &handle, &comp_len); + if (ret) { + zcomp_stream_put(zram->comp); + return ret; + } + + dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); + + src = zstrm->buffer; + if (comp_len == PAGE_SIZE) src = kmap_atomic(page); - copy_page(cmem, src); + memcpy(dst, src, comp_len); + if (comp_len == PAGE_SIZE) kunmap_atomic(src); - } else { - memcpy(cmem, src, clen); - } zcomp_stream_put(zram->comp); - zstrm = NULL; - zs_unmap_object(meta->mem_pool, handle); + zs_unmap_object(zram->mem_pool, handle); /* * Free memory associated with this sector * before overwriting unused sectors. */ - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_slot_lock(zram, index); zram_free_page(zram, index); - - meta->table[index].handle = handle; - zram_set_obj_size(meta, index, clen); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_set_handle(zram, index, handle); + zram_set_obj_size(zram, index, comp_len); + zram_slot_unlock(zram, index); /* Update stats */ - atomic64_add(clen, &zram->stats.compr_data_size); + atomic64_add(comp_len, &zram->stats.compr_data_size); atomic64_inc(&zram->stats.pages_stored); + return 0; +} + +static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset) +{ + int ret; + struct page *page = NULL; + void *src; + struct bio_vec vec; + + vec = *bvec; + if (is_partial_io(bvec)) { + void *dst; + /* + * This is a partial IO. We need to read the full page + * before to write the changes. + */ + page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); + if (!page) + return -ENOMEM; + + ret = zram_decompress_page(zram, page, index); + if (ret) + goto out; + + src = kmap_atomic(bvec->bv_page); + dst = kmap_atomic(page); + memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); + kunmap_atomic(dst); + kunmap_atomic(src); + + vec.bv_page = page; + vec.bv_len = PAGE_SIZE; + vec.bv_offset = 0; + } + + ret = __zram_bvec_write(zram, &vec, index); out: - if (zstrm) - zcomp_stream_put(zram->comp); if (is_partial_io(bvec)) - kfree(uncmem); + __free_page(page); return ret; } @@ -758,7 +777,6 @@ static void zram_bio_discard(struct zram *zram, u32 index, int offset, struct bio *bio) { size_t n = bio->bi_iter.bi_size; - struct zram_meta *meta = zram->meta; /* * zram manages data in physical block size units. Because logical block @@ -779,9 +797,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, } while (n >= PAGE_SIZE) { - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_slot_lock(zram, index); zram_free_page(zram, index); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_slot_unlock(zram, index); atomic64_inc(&zram->stats.notify_free); index++; n -= PAGE_SIZE; @@ -801,6 +819,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (!is_write) { atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset); + flush_dcache_page(bvec->bv_page); } else { atomic64_inc(&zram->stats.num_writes); ret = zram_bvec_write(zram, bvec, index, offset); @@ -840,34 +859,21 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) } bio_for_each_segment(bvec, bio, iter) { - int max_transfer_size = PAGE_SIZE - offset; - - if (bvec.bv_len > max_transfer_size) { - /* - * zram_bvec_rw() can only make operation on a single - * zram page. Split the bio vector. - */ - struct bio_vec bv; - - bv.bv_page = bvec.bv_page; - bv.bv_len = max_transfer_size; - bv.bv_offset = bvec.bv_offset; + struct bio_vec bv = bvec; + unsigned int unwritten = bvec.bv_len; + do { + bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, + unwritten); if (zram_bvec_rw(zram, &bv, index, offset, - op_is_write(bio_op(bio))) < 0) + op_is_write(bio_op(bio))) < 0) goto out; - bv.bv_len = bvec.bv_len - max_transfer_size; - bv.bv_offset += max_transfer_size; - if (zram_bvec_rw(zram, &bv, index + 1, 0, - op_is_write(bio_op(bio))) < 0) - goto out; - } else - if (zram_bvec_rw(zram, &bvec, index, offset, - op_is_write(bio_op(bio))) < 0) - goto out; + bv.bv_offset += bv.bv_len; + unwritten -= bv.bv_len; - update_position(&index, &offset, &bvec); + update_position(&index, &offset, &bv); + } while (unwritten); } bio_endio(bio); @@ -884,8 +890,6 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) { struct zram *zram = queue->queuedata; - blk_queue_split(queue, &bio, queue->bio_split); - if (!valid_io_request(zram, bio->bi_iter.bi_sector, bio->bi_iter.bi_size)) { atomic64_inc(&zram->stats.invalid_io); @@ -904,14 +908,12 @@ static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { struct zram *zram; - struct zram_meta *meta; zram = bdev->bd_disk->private_data; - meta = zram->meta; - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_slot_lock(zram, index); zram_free_page(zram, index); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_slot_unlock(zram, index); atomic64_inc(&zram->stats.notify_free); } @@ -932,7 +934,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, } index = sector >> SECTORS_PER_PAGE_SHIFT; - offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; + offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; bv.bv_page = page; bv.bv_len = PAGE_SIZE; @@ -955,7 +957,6 @@ out: static void zram_reset_device(struct zram *zram) { - struct zram_meta *meta; struct zcomp *comp; u64 disksize; @@ -968,12 +969,8 @@ static void zram_reset_device(struct zram *zram) return; } - meta = zram->meta; comp = zram->comp; disksize = zram->disksize; - - /* Reset stats */ - memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; set_capacity(zram->disk, 0); @@ -981,7 +978,8 @@ static void zram_reset_device(struct zram *zram) up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ - zram_meta_free(meta, disksize); + zram_meta_free(zram, disksize); + memset(&zram->stats, 0, sizeof(zram->stats)); zcomp_destroy(comp); } @@ -990,7 +988,6 @@ static ssize_t disksize_store(struct device *dev, { u64 disksize; struct zcomp *comp; - struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); int err; @@ -998,10 +995,18 @@ static ssize_t disksize_store(struct device *dev, if (!disksize) return -EINVAL; + down_write(&zram->init_lock); + if (init_done(zram)) { + pr_info("Cannot change disksize for initialized device\n"); + err = -EBUSY; + goto out_unlock; + } + disksize = PAGE_ALIGN(disksize); - meta = zram_meta_alloc(zram->disk->disk_name, disksize); - if (!meta) - return -ENOMEM; + if (!zram_meta_alloc(zram, disksize)) { + err = -ENOMEM; + goto out_unlock; + } comp = zcomp_create(zram->compressor); if (IS_ERR(comp)) { @@ -1011,14 +1016,6 @@ static ssize_t disksize_store(struct device *dev, goto out_free_meta; } - down_write(&zram->init_lock); - if (init_done(zram)) { - pr_info("Cannot change disksize for initialized device\n"); - err = -EBUSY; - goto out_destroy_comp; - } - - zram->meta = meta; zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); @@ -1027,11 +1024,10 @@ static ssize_t disksize_store(struct device *dev, return len; -out_destroy_comp: - up_write(&zram->init_lock); - zcomp_destroy(comp); out_free_meta: - zram_meta_free(meta, disksize); + zram_meta_free(zram, disksize); +out_unlock: + up_write(&zram->init_lock); return err; } @@ -1193,8 +1189,6 @@ static int zram_add(void) blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; - zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE; - zram->disk->queue->limits.chunk_sectors = 0; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); @@ -1219,7 +1213,6 @@ static int zram_add(void) goto out_free_disk; } strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); - zram->meta = NULL; pr_info("Added device: %s\n", zram->disk->disk_name); return device_id; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index caeff51..e34e44d 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -92,13 +92,9 @@ struct zram_stats { atomic64_t writestall; /* no. of write slow paths */ }; -struct zram_meta { +struct zram { struct zram_table_entry *table; struct zs_pool *mem_pool; -}; - -struct zram { - struct zram_meta *meta; struct zcomp *comp; struct gendisk *disk; /* Prevent concurrent execution of device init */ |