LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>, Robert Elliott <Elliott@hp.com>,
	Ming Lei <ming.lei@canonical.com>,
	Sagi Grimberg <sagig@mellanox.com>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH] blk-mq: Avoid that I/O hangs in bt_get()
Date: Mon, 06 Oct 2014 14:27:03 +0200	[thread overview]
Message-ID: <54328A97.3030201@acm.org> (raw)

Ensure that bt_clear_tag() increments bs->wait_cnt if one or more
threads are waiting for a tag. Remove a superfluous
waitqueue_active() call from blk_mq_tag_wakeup_all(). This patch
avoids that bt_get() hangs as follows if the number of hardware
contexts is below the number of CPU threads:

INFO: task fio:6739 blocked for more than 120 seconds.
      Not tainted 3.17.0-rc7+ #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
fio             D ffff88085fcd2740     0  6739   6688 0x00000000
 ffff8807fb96f830 0000000000000086 ffff8808350eb000 ffff8807fb96ffd8
 0000000000012740 0000000000012740 ffff88085b885000 ffff8808350eb000
 ffff88085fcd2fe0 ffff8807fb96f920 ffff88080061ddc8 ffffe8ffffcc4600
Call Trace:
 [<ffffffff8142f52d>] io_schedule+0x9d/0x130
 [<ffffffff812016bf>] bt_get+0xef/0x180
 [<ffffffff8107f440>] ? prepare_to_wait_event+0x110/0x110
 [<ffffffff81201a0f>] blk_mq_get_tag+0x9f/0xd0
 [<ffffffff811fdedb>] __blk_mq_alloc_request+0x1b/0x200
 [<ffffffff811ff3bc>] blk_mq_map_request+0x15c/0x1b0
 [<ffffffff8120079e>] blk_mq_make_request+0x6e/0x270
 [<ffffffff8110a99f>] ? mempool_alloc+0x4f/0x130
 [<ffffffff811f3af0>] generic_make_request+0xc0/0x110
 [<ffffffff811f3bab>] submit_bio+0x6b/0x140
 [<ffffffff81110e4b>] ? set_page_dirty_lock+0x2b/0x40
 [<ffffffff811eea57>] ? bio_set_pages_dirty+0x47/0x60
 [<ffffffff811907d0>] do_blockdev_direct_IO+0x2080/0x3220
 [<ffffffff8118bac0>] ? I_BDEV+0x10/0x10
 [<ffffffff811919bc>] __blockdev_direct_IO+0x4c/0x50
 [<ffffffff8118bac0>] ? I_BDEV+0x10/0x10
 [<ffffffff8118c1de>] blkdev_direct_IO+0x4e/0x50
 [<ffffffff8118bac0>] ? I_BDEV+0x10/0x10
 [<ffffffff81109a13>] generic_file_read_iter+0x513/0x5e0
 [<ffffffff8119d8a7>] ? kiocb_free+0x37/0x40
 [<ffffffff8118c660>] ? ioctl_by_bdev+0x40/0x40
 [<ffffffff8118c697>] blkdev_read_iter+0x37/0x40
 [<ffffffff8119e6b4>] aio_run_iocb+0x1e4/0x3c0
 [<ffffffff8114dfa6>] ? kmem_cache_alloc+0xd6/0x3d0
 [<ffffffff8114e045>] ? kmem_cache_alloc+0x175/0x3d0
 [<ffffffff8119f5cc>] do_io_submit+0x11c/0x490
 [<ffffffff8119f950>] SyS_io_submit+0x10/0x20
 [<ffffffff81432fd2>] system_call_fastpath+0x16/0x1b

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Robert Elliott <Elliott@hp.com>
Cc: <stable@vger.kernel.org>
---
 block/blk-mq-tag.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index b5088f0..08d3b1c 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -80,8 +80,7 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
 	for (i = 0; i < BT_WAIT_QUEUES; i++) {
 		struct bt_wait_state *bs = &bt->bs[wake_index];
 
-		if (waitqueue_active(&bs->wait))
-			wake_up(&bs->wait);
+		wake_up(&bs->wait);
 
 		wake_index = bt_index_inc(wake_index);
 	}
@@ -346,20 +345,20 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
 	 */
 	clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word);
 
-	bs = bt_wake_ptr(bt);
-	if (!bs)
-		return;
-
-	wait_cnt = atomic_dec_return(&bs->wait_cnt);
-	if (wait_cnt == 0) {
-wake:
-		atomic_add(bt->wake_cnt, &bs->wait_cnt);
-		bt_index_atomic_inc(&bt->wake_index);
-		wake_up(&bs->wait);
-	} else if (wait_cnt < 0) {
-		wait_cnt = atomic_inc_return(&bs->wait_cnt);
-		if (!wait_cnt)
-			goto wake;
+	for (;;) {
+		bs = bt_wake_ptr(bt);
+		if (!bs)
+			return;
+
+		wait_cnt = atomic_dec_return(&bs->wait_cnt);
+		if (unlikely(wait_cnt < 0))
+			wait_cnt = atomic_inc_return(&bs->wait_cnt);
+		if (wait_cnt == 0) {
+			atomic_add(bt->wake_cnt, &bs->wait_cnt);
+			bt_index_atomic_inc(&bt->wake_index);
+			wake_up(&bs->wait);
+			return;
+		}
 	}
 }
 
-- 
1.8.4.5


             reply	other threads:[~2014-10-06 12:27 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-06 12:27 Bart Van Assche [this message]
2014-10-06 17:40 ` Jens Axboe
2014-10-06 18:53   ` Jens Axboe
2014-10-07  8:46     ` Bart Van Assche
2014-10-07 14:44       ` Jens Axboe
2014-11-06 13:41       ` Bart Van Assche
2014-12-08 14:55         ` Bart Van Assche
2014-12-08 16:49           ` Jens Axboe
2014-12-08 17:59             ` Bart Van Assche

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=54328A97.3030201@acm.org \
    --to=bvanassche@acm.org \
    --cc=Elliott@hp.com \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.lei@canonical.com \
    --cc=sagig@mellanox.com \
    --subject='Re: [PATCH] blk-mq: Avoid that I/O hangs in bt_get()' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).