blk-mq: improve tag waiting setup for non-shared tags
If we run out of driver tags, we currently treat shared and non-shared tags the same - both cases hook into the tag waitqueue. This is a bit more costly than it needs to be on unshared tags, since we have to both grab the hctx lock, and the waitqueue lock (and disable interrupts). For the non-shared case, we can simply mark the queue as needing a restart. Split blk_mq_dispatch_wait_add() to account for both cases, and rename it to blk_mq_mark_tag_wait() to better reflect what it does now. Without this patch, shared and non-shared performance is about the same with 4 fio thread hammering on a single null_blk device (~410K, at 75% sys). With the patch, the shared case is the same, but the non-shared tags case runs at 431K at 71% sys. Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
15f7b41f70
commit
f906a6a0f4
@ -1006,44 +1006,68 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
|
||||
struct request *rq)
|
||||
/*
|
||||
* Mark us waiting for a tag. For shared tags, this involves hooking us into
|
||||
* the tag wakeups. For non-shared tags, we can simply mark us nedeing a
|
||||
* restart. For both caes, take care to check the condition again after
|
||||
* marking us as waiting.
|
||||
*/
|
||||
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
struct blk_mq_hw_ctx *this_hctx = *hctx;
|
||||
wait_queue_entry_t *wait = &this_hctx->dispatch_wait;
|
||||
bool shared_tags = (this_hctx->flags & BLK_MQ_F_TAG_SHARED) != 0;
|
||||
struct sbq_wait_state *ws;
|
||||
wait_queue_entry_t *wait;
|
||||
bool ret;
|
||||
|
||||
if (!list_empty_careful(&wait->entry))
|
||||
return false;
|
||||
if (!shared_tags) {
|
||||
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state))
|
||||
set_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state);
|
||||
} else {
|
||||
wait = &this_hctx->dispatch_wait;
|
||||
if (!list_empty_careful(&wait->entry))
|
||||
return false;
|
||||
|
||||
spin_lock(&this_hctx->lock);
|
||||
if (!list_empty(&wait->entry)) {
|
||||
spin_unlock(&this_hctx->lock);
|
||||
return false;
|
||||
spin_lock(&this_hctx->lock);
|
||||
if (!list_empty(&wait->entry)) {
|
||||
spin_unlock(&this_hctx->lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
|
||||
add_wait_queue(&ws->wait, wait);
|
||||
}
|
||||
|
||||
ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx);
|
||||
add_wait_queue(&ws->wait, wait);
|
||||
|
||||
/*
|
||||
* It's possible that a tag was freed in the window between the
|
||||
* allocation failure and adding the hardware queue to the wait
|
||||
* queue.
|
||||
*/
|
||||
if (!blk_mq_get_driver_tag(rq, hctx, false)) {
|
||||
spin_unlock(&this_hctx->lock);
|
||||
return false;
|
||||
}
|
||||
ret = blk_mq_get_driver_tag(rq, hctx, false);
|
||||
|
||||
/*
|
||||
* We got a tag, remove ourselves from the wait queue to ensure
|
||||
* someone else gets the wakeup.
|
||||
*/
|
||||
spin_lock_irq(&ws->wait.lock);
|
||||
list_del_init(&wait->entry);
|
||||
spin_unlock_irq(&ws->wait.lock);
|
||||
spin_unlock(&this_hctx->lock);
|
||||
return true;
|
||||
if (!shared_tags) {
|
||||
/*
|
||||
* Don't clear RESTART here, someone else could have set it.
|
||||
* At most this will cost an extra queue run.
|
||||
*/
|
||||
return ret;
|
||||
} else {
|
||||
if (!ret) {
|
||||
spin_unlock(&this_hctx->lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* We got a tag, remove ourselves from the wait queue to ensure
|
||||
* someone else gets the wakeup.
|
||||
*/
|
||||
spin_lock_irq(&ws->wait.lock);
|
||||
list_del_init(&wait->entry);
|
||||
spin_unlock_irq(&ws->wait.lock);
|
||||
spin_unlock(&this_hctx->lock);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
@ -1076,10 +1100,15 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
|
||||
* before we add this entry back on the dispatch list,
|
||||
* we'll re-run it below.
|
||||
*/
|
||||
if (!blk_mq_dispatch_wait_add(&hctx, rq)) {
|
||||
if (!blk_mq_mark_tag_wait(&hctx, rq)) {
|
||||
if (got_budget)
|
||||
blk_mq_put_dispatch_budget(hctx);
|
||||
no_tag = true;
|
||||
/*
|
||||
* For non-shared tags, the RESTART check
|
||||
* will suffice.
|
||||
*/
|
||||
if (hctx->flags & BLK_MQ_F_TAG_SHARED)
|
||||
no_tag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user