From 6fca6a611c27f1f0d90fbe1cc3c229dbf8c09e48 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 May 2014 08:08:02 -0600 Subject: blk-mq: add helper to insert requests from irq context Both the cache flush state machine and the SCSI midlayer want to submit requests from irq context, and the current per-request requeue_work unfortunately causes corruption due to sharing with the csd field for flushes. Replace them with a per-request_queue list of requests to be requeued. Based on an earlier test by Ming Lei. Signed-off-by: Christoph Hellwig Reported-by: Ming Lei Tested-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-flush.c | 16 ++++--------- block/blk-mq.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/blk-mq.h | 2 ++ include/linux/blkdev.h | 5 +++- 4 files changed, 73 insertions(+), 14 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index ec7a224d67335..ef608b35d9be2 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -130,21 +130,13 @@ static void blk_flush_restore_request(struct request *rq) blk_clear_rq_complete(rq); } -static void mq_flush_run(struct work_struct *work) -{ - struct request *rq; - - rq = container_of(work, struct request, requeue_work); - - memset(&rq->csd, 0, sizeof(rq->csd)); - blk_mq_insert_request(rq, false, true, false); -} - static bool blk_flush_queue_rq(struct request *rq, bool add_front) { if (rq->q->mq_ops) { - INIT_WORK(&rq->requeue_work, mq_flush_run); - kblockd_schedule_work(&rq->requeue_work); + struct request_queue *q = rq->q; + + blk_mq_add_to_requeue_list(rq, add_front); + blk_mq_kick_requeue_list(q); return false; } else { if (add_front) diff --git a/block/blk-mq.c b/block/blk-mq.c index 010b878d53b38..67066ecc79c06 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -516,10 +516,68 @@ void blk_mq_requeue_request(struct request *rq) blk_clear_rq_complete(rq); BUG_ON(blk_queued_rq(rq)); - blk_mq_insert_request(rq, true, true, false); + blk_mq_add_to_requeue_list(rq, true); } EXPORT_SYMBOL(blk_mq_requeue_request); +static void blk_mq_requeue_work(struct work_struct *work) +{ + struct request_queue *q = + container_of(work, struct request_queue, requeue_work); + LIST_HEAD(rq_list); + struct request *rq, *next; + unsigned long flags; + + spin_lock_irqsave(&q->requeue_lock, flags); + list_splice_init(&q->requeue_list, &rq_list); + spin_unlock_irqrestore(&q->requeue_lock, flags); + + list_for_each_entry_safe(rq, next, &rq_list, queuelist) { + if (!(rq->cmd_flags & REQ_SOFTBARRIER)) + continue; + + rq->cmd_flags &= ~REQ_SOFTBARRIER; + list_del_init(&rq->queuelist); + blk_mq_insert_request(rq, true, false, false); + } + + while (!list_empty(&rq_list)) { + rq = list_entry(rq_list.next, struct request, queuelist); + list_del_init(&rq->queuelist); + blk_mq_insert_request(rq, false, false, false); + } + + blk_mq_run_queues(q, false); +} + +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) +{ + struct request_queue *q = rq->q; + unsigned long flags; + + /* + * We abuse this flag that is otherwise used by the I/O scheduler to + * request head insertation from the workqueue. + */ + BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER); + + spin_lock_irqsave(&q->requeue_lock, flags); + if (at_head) { + rq->cmd_flags |= REQ_SOFTBARRIER; + list_add(&rq->queuelist, &q->requeue_list); + } else { + list_add_tail(&rq->queuelist, &q->requeue_list); + } + spin_unlock_irqrestore(&q->requeue_lock, flags); +} +EXPORT_SYMBOL(blk_mq_add_to_requeue_list); + +void blk_mq_kick_requeue_list(struct request_queue *q) +{ + kblockd_schedule_work(&q->requeue_work); +} +EXPORT_SYMBOL(blk_mq_kick_requeue_list); + struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) { return tags->rqs[tag]; @@ -1812,6 +1870,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) q->sg_reserved_size = INT_MAX; + INIT_WORK(&q->requeue_work, blk_mq_requeue_work); + INIT_LIST_HEAD(&q->requeue_list); + spin_lock_init(&q->requeue_lock); + if (q->nr_hw_queues > 1) blk_queue_make_request(q, blk_mq_make_request); else diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5b171fbe95c5e..b9a74a386dbcd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -172,6 +172,8 @@ void blk_mq_end_io(struct request *rq, int error); void __blk_mq_end_io(struct request *rq, int error); void blk_mq_requeue_request(struct request *rq); +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); +void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6bc011a09e828..913f1c2d3be0b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -99,7 +99,6 @@ struct request { struct list_head queuelist; union { struct call_single_data csd; - struct work_struct requeue_work; unsigned long fifo_time; }; @@ -463,6 +462,10 @@ struct request_queue { struct request *flush_rq; spinlock_t mq_flush_lock; + struct list_head requeue_list; + spinlock_t requeue_lock; + struct work_struct requeue_work; + struct mutex sysfs_lock; int bypass_depth; -- cgit v1.2.3