From: Jens Axboe Date: Tue, 20 May 2014 17:49:02 +0000 (-0600) Subject: blk-mq: allow changing of queue depth through sysfs X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=e3a2b3f931f59d5284abd13faf8bded726884ffd;p=openwrt%2Fstaging%2Fblogic.git blk-mq: allow changing of queue depth through sysfs For request_fn based devices, the block layer exports a 'nr_requests' file through sysfs to allow adjusting of queue depth on the fly. Currently this returns -EINVAL for blk-mq, since it's not wired up. Wire this up for blk-mq, so that it now also always dynamic adjustments of the allowed queue depth for any given block device managed by blk-mq. Signed-off-by: Jens Axboe --- diff --git a/block/blk-core.c b/block/blk-core.c index a6bd3e702201..fe81e19099a1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags) __freed_request(rl, sync ^ 1); } +int blk_update_nr_requests(struct request_queue *q, unsigned int nr) +{ + struct request_list *rl; + + spin_lock_irq(q->queue_lock); + q->nr_requests = nr; + blk_queue_congestion_threshold(q); + + /* congestion isn't cgroup aware and follows root blkcg for now */ + rl = &q->root_rl; + + if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_SYNC); + else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_SYNC); + + if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_ASYNC); + else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_ASYNC); + + blk_queue_for_each_rl(rl, q) { + if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { + blk_set_rl_full(rl, BLK_RW_SYNC); + } else { + blk_clear_rl_full(rl, BLK_RW_SYNC); + wake_up(&rl->wait[BLK_RW_SYNC]); + } + + if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { + blk_set_rl_full(rl, BLK_RW_ASYNC); + } else { + blk_clear_rl_full(rl, BLK_RW_ASYNC); + wake_up(&rl->wait[BLK_RW_ASYNC]); + } + } + + spin_unlock_irq(q->queue_lock); + return 0; +} + /* * Determine if elevator data should be initialized when allocating the * request associated with @bio. diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index e6b3fbae9862..f6dea968b710 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) } /* - * If a previously busy queue goes inactive, potential waiters could now - * be allowed to queue. Wake them up and check. + * Wakeup all potentially sleeping on normal (non-reserved) tags */ -void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) +static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) { - struct blk_mq_tags *tags = hctx->tags; struct blk_mq_bitmap_tags *bt; int i, wake_index; - if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) - return; - - atomic_dec(&tags->active_queues); - - /* - * Will only throttle depth on non-reserved tags - */ bt = &tags->bitmap_tags; wake_index = bt->wake_index; for (i = 0; i < BT_WAIT_QUEUES; i++) { @@ -86,6 +76,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) } } +/* + * If a previously busy queue goes inactive, potential waiters could now + * be allowed to queue. Wake them up and check. + */ +void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) +{ + struct blk_mq_tags *tags = hctx->tags; + + if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + return; + + atomic_dec(&tags->active_queues); + + blk_mq_tag_wakeup_all(tags); +} + /* * For shared tag users, we track the number of currently active users * and attempt to provide a fair share of the tag depth for each of them. @@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt) return bt->depth - used; } +static void bt_update_count(struct blk_mq_bitmap_tags *bt, + unsigned int depth) +{ + unsigned int tags_per_word = 1U << bt->bits_per_word; + unsigned int map_depth = depth; + + if (depth) { + int i; + + for (i = 0; i < bt->map_nr; i++) { + bt->map[i].depth = min(map_depth, tags_per_word); + map_depth -= bt->map[i].depth; + } + } + + bt->wake_cnt = BT_WAIT_BATCH; + if (bt->wake_cnt > depth / 4) + bt->wake_cnt = max(1U, depth / 4); + + bt->depth = depth; +} + static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, int node, bool reserved) { @@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, * condition. */ if (depth) { - unsigned int nr, i, map_depth, tags_per_word; + unsigned int nr, tags_per_word; tags_per_word = (1 << bt->bits_per_word); @@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, return -ENOMEM; bt->map_nr = nr; - map_depth = depth; - for (i = 0; i < nr; i++) { - bt->map[i].depth = min(map_depth, tags_per_word); - map_depth -= tags_per_word; - } } bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); @@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, for (i = 0; i < BT_WAIT_QUEUES; i++) init_waitqueue_head(&bt->bs[i].wait); - bt->wake_cnt = BT_WAIT_BATCH; - if (bt->wake_cnt > depth / 4) - bt->wake_cnt = max(1U, depth / 4); - - bt->depth = depth; + bt_update_count(bt, depth); return 0; } @@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag) *tag = prandom_u32() % depth; } +int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) +{ + tdepth -= tags->nr_reserved_tags; + if (tdepth > tags->nr_tags) + return -EINVAL; + + /* + * Don't need (or can't) update reserved tags here, they remain + * static and should never need resizing. + */ + bt_update_count(&tags->bitmap_tags, tdepth); + blk_mq_tag_wakeup_all(tags); + return 0; +} + ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) { char *orig_page = page; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index e144f68ec45f..e7ff5ceeeb97 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); +extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); enum { BLK_MQ_TAG_CACHE_MIN = 1, diff --git a/block/blk-mq.c b/block/blk-mq.c index 0fbef7e9bef1..7b71ab1b1536 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) } EXPORT_SYMBOL(blk_mq_free_tag_set); +int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) +{ + struct blk_mq_tag_set *set = q->tag_set; + struct blk_mq_hw_ctx *hctx; + int i, ret; + + if (!set || nr > set->queue_depth) + return -EINVAL; + + ret = 0; + queue_for_each_hw_ctx(q, hctx, i) { + ret = blk_mq_tag_update_depth(hctx->tags, nr); + if (ret) + break; + } + + if (!ret) + q->nr_requests = nr; + + return ret; +} + void blk_mq_disable_hotplug(void) { mutex_lock(&all_q_mutex); diff --git a/block/blk-mq.h b/block/blk-mq.h index 5e5a378962b7..7db4fe4bd002 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); void blk_mq_clone_flush_request(struct request *flush_rq, struct request *orig_rq); +int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); /* * CPU hotplug helpers diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 7500f876dae4..4d6811ac13fd 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page) static ssize_t queue_requests_store(struct request_queue *q, const char *page, size_t count) { - struct request_list *rl; unsigned long nr; - int ret; + int ret, err; - if (!q->request_fn) + if (!q->request_fn && !q->mq_ops) return -EINVAL; ret = queue_var_store(&nr, page, count); @@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) if (nr < BLKDEV_MIN_RQ) nr = BLKDEV_MIN_RQ; - spin_lock_irq(q->queue_lock); - q->nr_requests = nr; - blk_queue_congestion_threshold(q); - - /* congestion isn't cgroup aware and follows root blkcg for now */ - rl = &q->root_rl; - - if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, BLK_RW_SYNC); - else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, BLK_RW_SYNC); - - if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, BLK_RW_ASYNC); - else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, BLK_RW_ASYNC); - - blk_queue_for_each_rl(rl, q) { - if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { - blk_set_rl_full(rl, BLK_RW_SYNC); - } else { - blk_clear_rl_full(rl, BLK_RW_SYNC); - wake_up(&rl->wait[BLK_RW_SYNC]); - } - - if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { - blk_set_rl_full(rl, BLK_RW_ASYNC); - } else { - blk_clear_rl_full(rl, BLK_RW_ASYNC); - wake_up(&rl->wait[BLK_RW_ASYNC]); - } - } + if (q->request_fn) + err = blk_update_nr_requests(q, nr); + else + err = blk_mq_update_nr_requests(q, nr); + + if (err) + return err; - spin_unlock_irq(q->queue_lock); return ret; } diff --git a/block/blk.h b/block/blk.h index 95cab70000e3..45385e9abf6f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) return q->nr_congestion_off; } +extern int blk_update_nr_requests(struct request_queue *, unsigned int); + /* * Contribute to IO statistics IFF: * diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a06ca7b5ea05..f45424453338 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -63,7 +63,7 @@ struct blk_mq_hw_ctx { struct blk_mq_tag_set { struct blk_mq_ops *ops; unsigned int nr_hw_queues; - unsigned int queue_depth; + unsigned int queue_depth; /* max hw supported */ unsigned int reserved_tags; unsigned int cmd_size; /* per-request extra data */ int numa_node;