blk-mq: merge bio into sw queue before plugging
authorMing Lei <ming.lei@redhat.com>
Fri, 26 May 2017 11:53:19 +0000 (19:53 +0800)
committerJens Axboe <axboe@fb.com>
Fri, 26 May 2017 20:12:03 +0000 (14:12 -0600)
Before blk-mq is introduced, I/O is merged to elevator
before being putted into plug queue, but blk-mq changed the
order and makes merging to sw queue basically impossible.
Then it is observed that throughput of sequential I/O is degraded
about 10%~20% on virtio-blk in the test[1] if mq-deadline isn't used.

This patch moves the bio merging per sw queue before plugging,
like what blk_queue_bio() does, and the performance regression is
fixed under this situation.

[1]. test script:
sudo fio --direct=1 --size=128G --bsrange=4k-4k --runtime=40 --numjobs=16 --ioengine=libaio --iodepth=64 --group_reporting=1 --filename=/dev/vdb --name=virtio_blk-test-$RW --rw=$RW --output-format=json

RW=read or write

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
block/blk-mq.c

index f2224ffd225da8acb9b4775a19125f015cc6ab0a..fd8244cf50a433f0c4080c7bc5480173f323ee2a 100644 (file)
@@ -1427,30 +1427,30 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
                !blk_queue_nomerges(hctx->queue);
 }
 
-static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
-                                        struct blk_mq_ctx *ctx,
-                                        struct request *rq, struct bio *bio)
+/* attempt to merge bio into current sw queue */
+static inline bool blk_mq_merge_bio(struct request_queue *q, struct bio *bio)
 {
-       if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
-               blk_mq_bio_to_request(rq, bio);
-               spin_lock(&ctx->lock);
-insert_rq:
-               __blk_mq_insert_request(hctx, rq, false);
-               spin_unlock(&ctx->lock);
-               return false;
-       } else {
-               struct request_queue *q = hctx->queue;
+       bool ret = false;
+       struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+       struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
 
+       if (hctx_allow_merges(hctx) && bio_mergeable(bio)) {
                spin_lock(&ctx->lock);
-               if (!blk_mq_attempt_merge(q, ctx, bio)) {
-                       blk_mq_bio_to_request(rq, bio);
-                       goto insert_rq;
-               }
-
+               ret = blk_mq_attempt_merge(q, ctx, bio);
                spin_unlock(&ctx->lock);
-               __blk_mq_finish_request(hctx, ctx, rq);
-               return true;
        }
+
+       blk_mq_put_ctx(ctx);
+       return ret;
+}
+
+static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
+                                  struct blk_mq_ctx *ctx,
+                                  struct request *rq)
+{
+       spin_lock(&ctx->lock);
+       __blk_mq_insert_request(hctx, rq, false);
+       spin_unlock(&ctx->lock);
 }
 
 static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1549,6 +1549,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
        if (blk_mq_sched_bio_merge(q, bio))
                return BLK_QC_T_NONE;
 
+       if (blk_mq_merge_bio(q, bio))
+               return BLK_QC_T_NONE;
+
        wb_acct = wbt_wait(q->rq_wb, bio, NULL);
 
        trace_block_getrq(q, bio, bio->bi_opf);
@@ -1630,11 +1633,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                blk_mq_put_ctx(data.ctx);
                blk_mq_bio_to_request(rq, bio);
                blk_mq_sched_insert_request(rq, false, true, true, true);
-       } else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
+       } else {
                blk_mq_put_ctx(data.ctx);
+               blk_mq_bio_to_request(rq, bio);
+               blk_mq_queue_io(data.hctx, data.ctx, rq);
                blk_mq_run_hw_queue(data.hctx, true);
-       } else
-               blk_mq_put_ctx(data.ctx);
+       }
 
        return cookie;
 }