From 74d46992e0d9dee7f1f376de0d56d31614c8a17a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Aug 2017 19:10:32 +0200 Subject: block: replace bi_bdev with a gendisk pointer and partitions index This way we don't need a block_device structure to submit I/O. The block_device has different life time rules from the gendisk and request_queue and is usually only available when the block device node is open. Other callers need to explicitly create one (e.g. the lightnvm passthrough code, or the new nvme multipathing code). For the actual I/O path all that we need is the gendisk, which exists once per block device. But given that the block layer also does partition remapping we additionally need a partition index, which is used for said remapping in generic_make_request. Note that all the block drivers generally want request_queue or sometimes the gendisk, so this removes a layer of indirection all over the stack. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio-integrity.c | 18 ++++----- block/bio.c | 10 ++--- block/blk-core.c | 100 +++++++++++++++++++++++++------------------------- block/blk-flush.c | 2 +- block/blk-lib.c | 8 ++-- block/blk-merge.c | 2 +- block/blk-zoned.c | 4 +- 7 files changed, 70 insertions(+), 74 deletions(-) (limited to 'block') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5fa9a740fd99a..fc71e61728696 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -146,7 +146,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, iv = bip->bip_vec + bip->bip_vcnt; if (bip->bip_vcnt && - bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev), + bvec_gap_to_prev(bio->bi_disk->queue, &bip->bip_vec[bip->bip_vcnt - 1], offset)) return 0; @@ -190,7 +190,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, static blk_status_t bio_integrity_process(struct bio *bio, struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn) { - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); struct blk_integrity_iter iter; struct bvec_iter bviter; struct bio_vec bv; @@ -199,7 +199,7 @@ static blk_status_t bio_integrity_process(struct bio *bio, void *prot_buf = page_address(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset; - iter.disk_name = bio->bi_bdev->bd_disk->disk_name; + iter.disk_name = bio->bi_disk->disk_name; iter.interval = 1 << bi->interval_exp; iter.seed = proc_iter->bi_sector; iter.prot_buf = prot_buf; @@ -236,8 +236,8 @@ static blk_status_t bio_integrity_process(struct bio *bio, bool bio_integrity_prep(struct bio *bio) { struct bio_integrity_payload *bip; - struct blk_integrity *bi; - struct request_queue *q; + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); + struct request_queue *q = bio->bi_disk->queue; void *buf; unsigned long start, end; unsigned int len, nr_pages; @@ -245,11 +245,9 @@ bool bio_integrity_prep(struct bio *bio) unsigned int intervals; blk_status_t status; - bi = bdev_get_integrity(bio->bi_bdev); if (!bi) return true; - q = bdev_get_queue(bio->bi_bdev); if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) return true; @@ -354,7 +352,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) struct bio_integrity_payload *bip = container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); struct bvec_iter iter = bio->bi_iter; /* @@ -411,7 +409,7 @@ bool __bio_integrity_endio(struct bio *bio) void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { struct bio_integrity_payload *bip = bio_integrity(bio); - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); bip->bip_iter.bi_sector += bytes_done >> 9; @@ -428,7 +426,7 @@ EXPORT_SYMBOL(bio_integrity_advance); void bio_integrity_trim(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); } diff --git a/block/bio.c b/block/bio.c index ecd1a9c7a3016..6745759028da5 100644 --- a/block/bio.c +++ b/block/bio.c @@ -593,10 +593,10 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio)); /* - * most users will be overriding ->bi_bdev with a new target, + * most users will be overriding ->bi_disk with a new target, * so we don't set nor calculate new physical/hw segment counts here */ - bio->bi_bdev = bio_src->bi_bdev; + bio->bi_disk = bio_src->bi_disk; bio_set_flag(bio, BIO_CLONED); bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; @@ -681,7 +681,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); if (!bio) return NULL; - bio->bi_bdev = bio_src->bi_bdev; + bio->bi_disk = bio_src->bi_disk; bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; @@ -1830,8 +1830,8 @@ again: goto again; } - if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, + if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) { + trace_block_bio_complete(bio->bi_disk->queue, bio, blk_status_to_errno(bio->bi_status)); bio_clear_flag(bio, BIO_TRACE_COMPLETION); } diff --git a/block/blk-core.c b/block/blk-core.c index d579501f24ba7..fc1af9097dff8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1910,40 +1910,15 @@ out_unlock: return BLK_QC_T_NONE; } -/* - * If bio->bi_dev is a partition, remap the location - */ -static inline void blk_partition_remap(struct bio *bio) -{ - struct block_device *bdev = bio->bi_bdev; - - /* - * Zone reset does not include bi_size so bio_sectors() is always 0. - * Include a test for the reset op code and perform the remap if needed. - */ - if (bdev != bdev->bd_contains && - (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)) { - struct hd_struct *p = bdev->bd_part; - - bio->bi_iter.bi_sector += p->start_sect; - bio->bi_bdev = bdev->bd_contains; - - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, - bdev->bd_dev, - bio->bi_iter.bi_sector - p->start_sect); - } -} - static void handle_bad_sector(struct bio *bio) { char b[BDEVNAME_SIZE]; printk(KERN_INFO "attempt to access beyond end of device\n"); printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", - bdevname(bio->bi_bdev, b), - bio->bi_opf, + bio_devname(bio, b), bio->bi_opf, (unsigned long long)bio_end_sector(bio), - (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); + (long long)get_capacity(bio->bi_disk)); } #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -1981,6 +1956,38 @@ static inline bool should_fail_request(struct hd_struct *part, #endif /* CONFIG_FAIL_MAKE_REQUEST */ +/* + * Remap block n of partition p to block n+start(p) of the disk. + */ +static inline int blk_partition_remap(struct bio *bio) +{ + struct hd_struct *p; + int ret = 0; + + /* + * Zone reset does not include bi_size so bio_sectors() is always 0. + * Include a test for the reset op code and perform the remap if needed. + */ + if (!bio->bi_partno || + (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)) + return 0; + + rcu_read_lock(); + p = __disk_get_part(bio->bi_disk, bio->bi_partno); + if (likely(p && !should_fail_request(p, bio->bi_iter.bi_size))) { + bio->bi_iter.bi_sector += p->start_sect; + bio->bi_partno = 0; + trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), + bio->bi_iter.bi_sector - p->start_sect); + } else { + printk("%s: fail for partition %d\n", __func__, bio->bi_partno); + ret = -EIO; + } + rcu_read_unlock(); + + return ret; +} + /* * Check whether this bio extends beyond the end of the device. */ @@ -1992,7 +1999,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) return 0; /* Test device or partition size, when known. */ - maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; + maxsector = get_capacity(bio->bi_disk); if (maxsector) { sector_t sector = bio->bi_iter.bi_sector; @@ -2017,20 +2024,18 @@ generic_make_request_checks(struct bio *bio) int nr_sectors = bio_sectors(bio); blk_status_t status = BLK_STS_IOERR; char b[BDEVNAME_SIZE]; - struct hd_struct *part; might_sleep(); if (bio_check_eod(bio, nr_sectors)) goto end_io; - q = bdev_get_queue(bio->bi_bdev); + q = bio->bi_disk->queue; if (unlikely(!q)) { printk(KERN_ERR "generic_make_request: Trying to access " "nonexistent block-device %s (%Lu)\n", - bdevname(bio->bi_bdev, b), - (long long) bio->bi_iter.bi_sector); + bio_devname(bio, b), (long long)bio->bi_iter.bi_sector); goto end_io; } @@ -2042,17 +2047,11 @@ generic_make_request_checks(struct bio *bio) if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q)) goto not_supported; - part = bio->bi_bdev->bd_part; - if (should_fail_request(part, bio->bi_iter.bi_size) || - should_fail_request(&part_to_disk(part)->part0, - bio->bi_iter.bi_size)) + if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size)) goto end_io; - /* - * If this device has partitions, remap block n - * of partition p to block n+start(p) of the disk. - */ - blk_partition_remap(bio); + if (blk_partition_remap(bio)) + goto end_io; if (bio_check_eod(bio, nr_sectors)) goto end_io; @@ -2081,16 +2080,16 @@ generic_make_request_checks(struct bio *bio) goto not_supported; break; case REQ_OP_WRITE_SAME: - if (!bdev_write_same(bio->bi_bdev)) + if (!q->limits.max_write_same_sectors) goto not_supported; break; case REQ_OP_ZONE_REPORT: case REQ_OP_ZONE_RESET: - if (!bdev_is_zoned(bio->bi_bdev)) + if (!blk_queue_is_zoned(q)) goto not_supported; break; case REQ_OP_WRITE_ZEROES: - if (!bdev_write_zeroes_sectors(bio->bi_bdev)) + if (!q->limits.max_write_zeroes_sectors) goto not_supported; break; default: @@ -2197,7 +2196,7 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&bio_list_on_stack[0]); current->bio_list = bio_list_on_stack; do { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct request_queue *q = bio->bi_disk->queue; if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) { struct bio_list lower, same; @@ -2215,7 +2214,7 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&lower); bio_list_init(&same); while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) - if (q == bdev_get_queue(bio->bi_bdev)) + if (q == bio->bi_disk->queue) bio_list_add(&same, bio); else bio_list_add(&lower, bio); @@ -2258,7 +2257,7 @@ blk_qc_t submit_bio(struct bio *bio) unsigned int count; if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) - count = bdev_logical_block_size(bio->bi_bdev) >> 9; + count = queue_logical_block_size(bio->bi_disk->queue); else count = bio_sectors(bio); @@ -2275,8 +2274,7 @@ blk_qc_t submit_bio(struct bio *bio) current->comm, task_pid_nr(current), op_is_write(bio_op(bio)) ? "WRITE" : "READ", (unsigned long long)bio->bi_iter.bi_sector, - bdevname(bio->bi_bdev, b), - count); + bio_devname(bio, b), count); } } @@ -3049,8 +3047,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; - if (bio->bi_bdev) - rq->rq_disk = bio->bi_bdev->bd_disk; + if (bio->bi_disk) + rq->rq_disk = bio->bi_disk; } #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE diff --git a/block/blk-flush.c b/block/blk-flush.c index ed5fe322abba5..83b7d5b41c790 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -525,7 +525,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, return -ENXIO; bio = bio_alloc(gfp_mask, 0); - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); diff --git a/block/blk-lib.c b/block/blk-lib.c index 3fe0aec905972..e01adb5145b3a 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -77,7 +77,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio = next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio_set_op_attrs(bio, op, 0); bio->bi_iter.bi_size = req_sects << 9; @@ -168,7 +168,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, while (nr_sects) { bio = next_bio(bio, 1, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_vcnt = 1; bio->bi_io_vec->bv_page = page; bio->bi_io_vec->bv_offset = 0; @@ -241,7 +241,7 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev, while (nr_sects) { bio = next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_opf = REQ_OP_WRITE_ZEROES; if (flags & BLKDEV_ZERO_NOUNMAP) bio->bi_opf |= REQ_NOUNMAP; @@ -323,7 +323,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); while (nr_sects != 0) { diff --git a/block/blk-merge.c b/block/blk-merge.c index 05f116bfb99da..aa524cad5bea3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -786,7 +786,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) return false; /* must be same device and not a special request */ - if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) + if (rq->rq_disk != bio->bi_disk || req_no_special_merge(rq)) return false; /* only merge integrity protected bio into ditto rq */ diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 3bd15d8095b10..ff57fb51b3380 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -116,7 +116,7 @@ int blkdev_report_zones(struct block_device *bdev, if (!bio) return -ENOMEM; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = blk_zone_start(q, sector); bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0); @@ -234,7 +234,7 @@ int blkdev_reset_zones(struct block_device *bdev, bio = bio_alloc(gfp_mask, 0); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); ret = submit_bio_wait(bio); -- cgit v1.2.3