diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 51 | ||||
-rw-r--r-- | block/blk-lib.c | 104 | ||||
-rw-r--r-- | block/blk-merge.c | 53 | ||||
-rw-r--r-- | block/blk-settings.c | 16 | ||||
-rw-r--r-- | block/blk-sysfs.c | 44 | ||||
-rw-r--r-- | block/blk-tag.c | 6 | ||||
-rw-r--r-- | block/blk.h | 5 | ||||
-rw-r--r-- | block/elevator.c | 6 | ||||
-rw-r--r-- | block/ioctl.c | 27 |
9 files changed, 236 insertions, 76 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index d2da641..a33870b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) /* * A queue starts its life with bypass turned on to avoid * unnecessary bypass on/off overhead and nasty surprises during - * init. The initial bypass will be finished at the end of - * blk_init_allocated_queue(). + * init. The initial bypass will be finished when the queue is + * registered by blk_register_queue(). */ q->bypass_depth = 1; __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); @@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, q->request_fn = rfn; q->prep_rq_fn = NULL; q->unprep_rq_fn = NULL; - q->queue_flags = QUEUE_FLAG_DEFAULT; + q->queue_flags |= QUEUE_FLAG_DEFAULT; /* Override internal queue lock with supplied lock pointer */ if (lock) @@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, /* init elevator */ if (elevator_init(q, NULL)) return NULL; - - blk_queue_congestion_threshold(q); - - /* all done, end the initial bypass */ - blk_queue_bypass_end(q); return q; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1657,8 +1652,8 @@ generic_make_request_checks(struct bio *bio) goto end_io; } - if (unlikely(!(bio->bi_rw & REQ_DISCARD) && - nr_sectors > queue_max_hw_sectors(q))) { + if (likely(bio_is_rw(bio) && + nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), bio_sectors(bio), @@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio) if ((bio->bi_rw & REQ_DISCARD) && (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && - !blk_queue_secdiscard(q)))) { + ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { + err = -EOPNOTSUPP; + goto end_io; + } + + if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { err = -EOPNOTSUPP; goto end_io; } @@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request); */ void submit_bio(int rw, struct bio *bio) { - int count = bio_sectors(bio); - bio->bi_rw |= rw; /* * If it's a regular read/write or a barrier with data attached, * go through the normal accounting stuff before submission. */ - if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { + if (bio_has_data(bio)) { + unsigned int count; + + if (unlikely(rw & REQ_WRITE_SAME)) + count = bdev_logical_block_size(bio->bi_bdev) >> 9; + else + count = bio_sectors(bio); + if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { @@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio); */ int blk_rq_check_limits(struct request_queue *q, struct request *rq) { - if (rq->cmd_flags & REQ_DISCARD) + if (!rq_mergeable(rq)) return 0; - if (blk_rq_sectors(rq) > queue_max_sectors(q) || - blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { + if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } @@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->buffer = bio_data(req->bio); /* update sector only for requests with clear definition of sector */ - if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) + if (req->cmd_type == REQ_TYPE_FS) req->__sector += total_bytes >> 9; /* mixed attributes always follow the first bio */ @@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, blk_rq_init(NULL, rq); __rq_for_each_bio(bio_src, rq_src) { - bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); + bio = bio_clone_bioset(bio_src, gfp_mask, bs); if (!bio) goto free_and_out; - __bio_clone(bio, bio_src); - - if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask, bs)) - goto free_and_out; - if (bio_ctr && bio_ctr(bio, bio_src, data)) goto free_and_out; @@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, free_and_out: if (bio) - bio_free(bio, bs); + bio_put(bio); blk_rq_unprep_clone(rq); return -ENOMEM; diff --git a/block/blk-lib.c b/block/blk-lib.c index 19cc761..9373b58 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -130,6 +130,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, EXPORT_SYMBOL(blkdev_issue_discard); /** + * blkdev_issue_write_same - queue a write same operation + * @bdev: target blockdev + * @sector: start sector + * @nr_sects: number of sectors to write + * @gfp_mask: memory allocation flags (for bio_alloc) + * @page: page containing data to write + * + * Description: + * Issue a write same request for the sectors in question. + */ +int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, + struct page *page) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_write_same_sectors; + struct bio_batch bb; + struct bio *bio; + int ret = 0; + + if (!q) + return -ENXIO; + + max_write_same_sectors = q->limits.max_write_same_sectors; + + if (max_write_same_sectors == 0) + return -EOPNOTSUPP; + + atomic_set(&bb.done, 1); + bb.flags = 1 << BIO_UPTODATE; + bb.wait = &wait; + + while (nr_sects) { + bio = bio_alloc(gfp_mask, 1); + if (!bio) { + ret = -ENOMEM; + break; + } + + bio->bi_sector = sector; + bio->bi_end_io = bio_batch_end_io; + bio->bi_bdev = bdev; + bio->bi_private = &bb; + bio->bi_vcnt = 1; + bio->bi_io_vec->bv_page = page; + bio->bi_io_vec->bv_offset = 0; + bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); + + if (nr_sects > max_write_same_sectors) { + bio->bi_size = max_write_same_sectors << 9; + nr_sects -= max_write_same_sectors; + sector += max_write_same_sectors; + } else { + bio->bi_size = nr_sects << 9; + nr_sects = 0; + } + + atomic_inc(&bb.done); + submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); + } + + /* Wait for bios in-flight */ + if (!atomic_dec_and_test(&bb.done)) + wait_for_completion(&wait); + + if (!test_bit(BIO_UPTODATE, &bb.flags)) + ret = -ENOTSUPP; + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_write_same); + +/** * blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue * @sector: start sector @@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard); * Generate and issue number of bios with zerofiled pages. */ -int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, +int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask) { int ret; @@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, return ret; } + +/** + * blkdev_issue_zeroout - zero-fill a block range + * @bdev: blockdev to write + * @sector: start sector + * @nr_sects: number of sectors to write + * @gfp_mask: memory allocation flags (for bio_alloc) + * + * Description: + * Generate and issue number of bios with zerofiled pages. + */ + +int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask) +{ + if (bdev_write_same(bdev)) { + unsigned char bdn[BDEVNAME_SIZE]; + + if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, + ZERO_PAGE(0))) + return 0; + + bdevname(bdev, bdn); + pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); + } + + return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); +} EXPORT_SYMBOL(blkdev_issue_zeroout); diff --git a/block/blk-merge.c b/block/blk-merge.c index e76279e..936a110 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -275,14 +275,8 @@ no_merge: int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { - unsigned short max_sectors; - - if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) - max_sectors = queue_max_hw_sectors(q); - else - max_sectors = queue_max_sectors(q); - - if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { - unsigned short max_sectors; - - if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) - max_sectors = queue_max_hw_sectors(q); - else - max_sectors = queue_max_sectors(q); - - - if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { + if (blk_rq_sectors(req) + bio_sectors(bio) > + blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, /* * Will it become too large? */ - if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q)) + if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > + blk_rq_get_max_sectors(req)) return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; @@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (!rq_mergeable(req) || !rq_mergeable(next)) return 0; - /* - * Don't merge file system requests and discard requests - */ - if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD)) - return 0; - - /* - * Don't merge discard requests and secure discard requests - */ - if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE)) + if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) return 0; /* @@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, || next->special) return 0; + if (req->cmd_flags & REQ_WRITE_SAME && + !blk_write_same_mergeable(req->bio, next->bio)) + return 0; + /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn @@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, bool blk_rq_merge_ok(struct request *rq, struct bio *bio) { - if (!rq_mergeable(rq)) + if (!rq_mergeable(rq) || !bio_mergeable(bio)) return false; - /* don't merge file system requests and discard requests */ - if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) - return false; - - /* don't merge discard requests and secure discard requests */ - if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE)) + if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) return false; /* different data direction or already started, don't merge */ @@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (bio_integrity(bio) != blk_integrity_rq(rq)) return false; + /* must be using the same buffer */ + if (rq->cmd_flags & REQ_WRITE_SAME && + !blk_write_same_mergeable(rq->bio, bio)) + return false; + return true; } diff --git a/block/blk-settings.c b/block/blk-settings.c index 565a678..779bb76 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; + lim->max_write_same_sectors = 0; lim->max_discard_sectors = 0; lim->discard_granularity = 0; lim->discard_alignment = 0; @@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_segments = USHRT_MAX; lim->max_hw_sectors = UINT_MAX; lim->max_sectors = UINT_MAX; + lim->max_write_same_sectors = UINT_MAX; } EXPORT_SYMBOL(blk_set_stacking_limits); @@ -286,6 +288,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, EXPORT_SYMBOL(blk_queue_max_discard_sectors); /** + * blk_queue_max_write_same_sectors - set max sectors for a single write same + * @q: the request queue for the device + * @max_write_same_sectors: maximum number of sectors to write per command + **/ +void blk_queue_max_write_same_sectors(struct request_queue *q, + unsigned int max_write_same_sectors) +{ + q->limits.max_write_same_sectors = max_write_same_sectors; +} +EXPORT_SYMBOL(blk_queue_max_write_same_sectors); + +/** * blk_queue_max_segments - set max hw segments for a request for this queue * @q: the request queue for the device * @max_segments: max number of segments @@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); + t->max_write_same_sectors = min(t->max_write_same_sectors, + b->max_write_same_sectors); t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9628b29..ce62046 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page) static ssize_t queue_var_store(unsigned long *var, const char *page, size_t count) { - char *p = (char *) page; + int err; + unsigned long v; + + err = strict_strtoul(page, 10, &v); + if (err || v > UINT_MAX) + return -EINVAL; + + *var = v; - *var = simple_strtoul(p, &p, 10); return count; } @@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) return -EINVAL; ret = queue_var_store(&nr, page, count); + if (ret < 0) + return ret; + if (nr < BLKDEV_MIN_RQ) nr = BLKDEV_MIN_RQ; @@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) unsigned long ra_kb; ssize_t ret = queue_var_store(&ra_kb, page, count); + if (ret < 0) + return ret; + q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); return ret; @@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag return queue_var_show(queue_discard_zeroes_data(q), page); } +static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_write_same_sectors << 9); +} + + static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { @@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) page_kb = 1 << (PAGE_CACHE_SHIFT - 10); ssize_t ret = queue_var_store(&max_sectors_kb, page, count); + if (ret < 0) + return ret; + if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) return -EINVAL; @@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, unsigned long nm; ssize_t ret = queue_var_store(&nm, page, count); + if (ret < 0) + return ret; + spin_lock_irq(q->queue_lock); queue_flag_clear(QUEUE_FLAG_NOMERGES, q); queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); @@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) unsigned long val; ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + spin_lock_irq(q->queue_lock); if (val == 2) { queue_flag_set(QUEUE_FLAG_SAME_COMP, q); @@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { .show = queue_discard_zeroes_data_show, }; +static struct queue_sysfs_entry queue_write_same_max_entry = { + .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO }, + .show = queue_write_same_max_show, +}; + static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .show = queue_show_nonrot, @@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = { &queue_discard_granularity_entry.attr, &queue_discard_max_entry.attr, &queue_discard_zeroes_data_entry.attr, + &queue_write_same_max_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, @@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk) if (WARN_ON(!q)) return -ENXIO; + /* + * Initialization must be complete by now. Finish the initial + * bypass from queue allocation. + */ + blk_queue_bypass_end(q); + ret = blk_trace_init_sysfs(dev); if (ret) return ret; diff --git a/block/blk-tag.c b/block/blk-tag.c index 4af6f5c..cc345e1 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth, tags = __blk_queue_init_tags(q, depth); if (!tags) - goto fail; + return -ENOMEM; + } else if (q->queue_tags) { rc = blk_queue_resize_tags(q, depth); if (rc) @@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth, queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); INIT_LIST_HEAD(&q->tag_busy_list); return 0; -fail: - kfree(tags); - return -ENOMEM; } EXPORT_SYMBOL(blk_queue_init_tags); diff --git a/block/blk.h b/block/blk.h index 2a0ea32..ca51543 100644 --- a/block/blk.h +++ b/block/blk.h @@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) * * a) it's attached to a gendisk, and * b) the queue had IO stats enabled when this request was started, and - * c) it's a file system request or a discard request + * c) it's a file system request */ static inline int blk_do_io_stat(struct request *rq) { return rq->rq_disk && (rq->cmd_flags & REQ_IO_STAT) && - (rq->cmd_type == REQ_TYPE_FS || - (rq->cmd_flags & REQ_DISCARD)); + (rq->cmd_type == REQ_TYPE_FS); } /* diff --git a/block/elevator.c b/block/elevator.c index 6a55d41..9b1d42b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (rq->cmd_flags & REQ_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ - if (rq->cmd_type == REQ_TYPE_FS || - (rq->cmd_flags & REQ_DISCARD)) { + if (rq->cmd_type == REQ_TYPE_FS) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } @@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) if (elv_attempt_insert_merge(q, rq)) break; case ELEVATOR_INSERT_SORT: - BUG_ON(rq->cmd_type != REQ_TYPE_FS && - !(rq->cmd_flags & REQ_DISCARD)); + BUG_ON(rq->cmd_type != REQ_TYPE_FS); rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { diff --git a/block/ioctl.c b/block/ioctl.c index 4a85096..a31d91d9b 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); } +static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start, + uint64_t len) +{ + if (start & 511) + return -EINVAL; + if (len & 511) + return -EINVAL; + start >>= 9; + len >>= 9; + + if (start + len > (i_size_read(bdev->bd_inode) >> 9)) + return -EINVAL; + + return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); +} + static int put_ushort(unsigned long arg, unsigned short val) { return put_user(val, (unsigned short __user *)arg); @@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return blk_ioctl_discard(bdev, range[0], range[1], cmd == BLKSECDISCARD); } + case BLKZEROOUT: { + uint64_t range[2]; + + if (!(mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + + return blk_ioctl_zeroout(bdev, range[0], range[1]); + } case HDIO_GETGEO: { struct hd_geometry geo; |