* [PATCH v4 1/6] blk-mq: export two symbols to get request by tag
2021-09-07 14:01 [PATCH v4 0/6] handle unexpected message from server Yu Kuai
@ 2021-09-07 14:01 ` Yu Kuai
2021-09-07 14:01 ` [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req() Yu Kuai
` (4 subsequent siblings)
5 siblings, 0 replies; 14+ messages in thread
From: Yu Kuai @ 2021-09-07 14:01 UTC (permalink / raw)
To: axboe, josef, ming.lei; +Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
nbd has a defect that blk_mq_tag_to_rq() might return a freed
request in nbd_read_stat(). We need a new mechanism if we want to
fix this in nbd driver, which is rather complicated.
Thus use blk_mq_find_and_get_req() to replace blk_mq_tag_to_rq(),
which can make sure the returned request is not freed, and then we
can do more checking while 'cmd->lock' is hold.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
block/blk-mq-tag.c | 5 +++--
block/blk-mq.c | 1 +
include/linux/blk-mq.h | 3 +++
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 86f87346232a..b4f66b75b4d1 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -200,8 +200,8 @@ struct bt_iter_data {
bool reserved;
};
-static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
- unsigned int bitnr)
+struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
+ unsigned int bitnr)
{
struct request *rq;
unsigned long flags;
@@ -213,6 +213,7 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
spin_unlock_irqrestore(&tags->lock, flags);
return rq;
}
+EXPORT_SYMBOL(blk_mq_find_and_get_req);
static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
{
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 08626cb0534c..5113aa3788a2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -916,6 +916,7 @@ void blk_mq_put_rq_ref(struct request *rq)
else if (refcount_dec_and_test(&rq->ref))
__blk_mq_free_request(rq);
}
+EXPORT_SYMBOL(blk_mq_put_rq_ref);
static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 13ba1861e688..03e02990609d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -637,4 +637,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio);
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
struct lock_class_key *key);
+void blk_mq_put_rq_ref(struct request *rq);
+struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
+ unsigned int bitnr);
#endif
--
2.31.1
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req()
2021-09-07 14:01 [PATCH v4 0/6] handle unexpected message from server Yu Kuai
2021-09-07 14:01 ` [PATCH v4 1/6] blk-mq: export two symbols to get request by tag Yu Kuai
@ 2021-09-07 14:01 ` Yu Kuai
2021-09-08 7:30 ` Ming Lei
2021-09-07 14:01 ` [PATCH v4 3/6] nbd: don't handle response without a corresponding request message Yu Kuai
` (3 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Yu Kuai @ 2021-09-07 14:01 UTC (permalink / raw)
To: axboe, josef, ming.lei; +Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
blk_mq_tag_to_rq() can only ensure to return valid request in
following situation:
1) client send request message to server first
submit_bio
...
blk_mq_get_tag
...
blk_mq_get_driver_tag
...
nbd_queue_rq
nbd_handle_cmd
nbd_send_cmd
2) client receive respond message from server
recv_work
nbd_read_stat
blk_mq_tag_to_rq
If step 1) is missing, blk_mq_tag_to_rq() will return a stale
request, which might be freed. Thus convert to use
blk_mq_find_and_get_req() to make sure the returned request is not
freed. However, there are still some problems if the request is
started, and this will be fixed in next patch.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5170a630778d..920da390635c 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -718,12 +718,13 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues)
- req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
- blk_mq_unique_tag_to_tag(tag));
+ req = blk_mq_find_and_get_req(nbd->tag_set.tags[hwq],
+ blk_mq_unique_tag_to_tag(tag));
if (!req || !blk_mq_request_started(req)) {
dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
tag, req);
- return ERR_PTR(-ENOENT);
+ ret = -ENOENT;
+ goto put_req;
}
trace_nbd_header_received(req, handle);
cmd = blk_mq_rq_to_pdu(req);
@@ -785,6 +786,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
out:
trace_nbd_payload_received(req, handle);
mutex_unlock(&cmd->lock);
+put_req:
+ if (req)
+ blk_mq_put_rq_ref(req);
return ret ? ERR_PTR(ret) : cmd;
}
--
2.31.1
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req()
2021-09-07 14:01 ` [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req() Yu Kuai
@ 2021-09-08 7:30 ` Ming Lei
2021-09-08 7:37 ` yukuai (C)
0 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2021-09-08 7:30 UTC (permalink / raw)
To: Yu Kuai; +Cc: axboe, josef, linux-block, linux-kernel, nbd, yi.zhang
On Tue, Sep 07, 2021 at 10:01:50PM +0800, Yu Kuai wrote:
> blk_mq_tag_to_rq() can only ensure to return valid request in
> following situation:
>
> 1) client send request message to server first
> submit_bio
> ...
> blk_mq_get_tag
> ...
> blk_mq_get_driver_tag
> ...
> nbd_queue_rq
> nbd_handle_cmd
> nbd_send_cmd
>
> 2) client receive respond message from server
> recv_work
> nbd_read_stat
> blk_mq_tag_to_rq
>
> If step 1) is missing, blk_mq_tag_to_rq() will return a stale
> request, which might be freed. Thus convert to use
> blk_mq_find_and_get_req() to make sure the returned request is not
> freed. However, there are still some problems if the request is
> started, and this will be fixed in next patch.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/block/nbd.c | 10 +++++++---
> 1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 5170a630778d..920da390635c 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -718,12 +718,13 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
> tag = nbd_handle_to_tag(handle);
> hwq = blk_mq_unique_tag_to_hwq(tag);
> if (hwq < nbd->tag_set.nr_hw_queues)
> - req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
> - blk_mq_unique_tag_to_tag(tag));
> + req = blk_mq_find_and_get_req(nbd->tag_set.tags[hwq],
> + blk_mq_unique_tag_to_tag(tag));
> if (!req || !blk_mq_request_started(req)) {
> dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
> tag, req);
> - return ERR_PTR(-ENOENT);
> + ret = -ENOENT;
> + goto put_req;
> }
> trace_nbd_header_received(req, handle);
> cmd = blk_mq_rq_to_pdu(req);
> @@ -785,6 +786,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
> out:
> trace_nbd_payload_received(req, handle);
> mutex_unlock(&cmd->lock);
> +put_req:
> + if (req)
> + blk_mq_put_rq_ref(req);
> return ret ? ERR_PTR(ret) : cmd;
After the request's refcnt is dropped, it can be freed immediately, then
one stale command is returned to caller.
Thanks,
Ming
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req()
2021-09-08 7:30 ` Ming Lei
@ 2021-09-08 7:37 ` yukuai (C)
2021-09-08 8:00 ` Ming Lei
0 siblings, 1 reply; 14+ messages in thread
From: yukuai (C) @ 2021-09-08 7:37 UTC (permalink / raw)
To: Ming Lei; +Cc: axboe, josef, linux-block, linux-kernel, nbd, yi.zhang
On 2021/09/08 15:30, Ming Lei wrote:
>> +put_req:
>> + if (req)
>> + blk_mq_put_rq_ref(req);
>> return ret ? ERR_PTR(ret) : cmd;
>
> After the request's refcnt is dropped, it can be freed immediately, then
> one stale command is returned to caller.
Hi, Ming
It's right this patch leave this problem. Please take a look at patch 3
and patch 4, the problem should be fixed with these patches.
Thanks,
Kuai
>
> Thanks,
> Ming
>
> .
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req()
2021-09-08 7:37 ` yukuai (C)
@ 2021-09-08 8:00 ` Ming Lei
2021-09-08 8:29 ` yukuai (C)
0 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2021-09-08 8:00 UTC (permalink / raw)
To: yukuai (C); +Cc: axboe, josef, linux-block, linux-kernel, nbd, yi.zhang
On Wed, Sep 08, 2021 at 03:37:06PM +0800, yukuai (C) wrote:
> On 2021/09/08 15:30, Ming Lei wrote:
>
> > > +put_req:
> > > + if (req)
> > > + blk_mq_put_rq_ref(req);
> > > return ret ? ERR_PTR(ret) : cmd;
> >
> > After the request's refcnt is dropped, it can be freed immediately, then
> > one stale command is returned to caller.
>
> Hi, Ming
>
> It's right this patch leave this problem. Please take a look at patch 3
> and patch 4, the problem should be fixed with these patches.
Not see it is addressed in patch 3 & 4, and it is one obvious fault in
patch 2, please fix it from beginning by moving the refcnt drop
into recv_work().
BTW, the approach in patch 3 looks fine, which is very similar with
SCSI's handling.
Thanks,
Ming
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req()
2021-09-08 8:00 ` Ming Lei
@ 2021-09-08 8:29 ` yukuai (C)
2021-09-08 9:27 ` Ming Lei
0 siblings, 1 reply; 14+ messages in thread
From: yukuai (C) @ 2021-09-08 8:29 UTC (permalink / raw)
To: Ming Lei; +Cc: axboe, josef, linux-block, linux-kernel, nbd, yi.zhang
On 2021/09/08 16:00, Ming Lei wrote:
> On Wed, Sep 08, 2021 at 03:37:06PM +0800, yukuai (C) wrote:
>> On 2021/09/08 15:30, Ming Lei wrote:
>>
>>>> +put_req:
>>>> + if (req)
>>>> + blk_mq_put_rq_ref(req);
>>>> return ret ? ERR_PTR(ret) : cmd;
>>>
>>> After the request's refcnt is dropped, it can be freed immediately, then
>>> one stale command is returned to caller.
>>
>> Hi, Ming
>>
>> It's right this patch leave this problem. Please take a look at patch 3
>> and patch 4, the problem should be fixed with these patches.
>
> Not see it is addressed in patch 3 & 4, and it is one obvious fault in
> patch 2, please fix it from beginning by moving the refcnt drop
> into recv_work().
Hi, Ming
With patch 3 & 4:
if nbd_read_stat() return a valid cmd, then the refcnt should not drop
to 0 before blk_mq_complete_request() in recv_work().
if nbd_read_stat() failed, it won't be a problem if the request is freed
immediately when refcnt is dropped in nbd_read_stat().
That's why I said that the problem will be fixed.
BTW, if we move the refcnt drop into recv_work, we can only do that if
nbd_read_stat() return a valid cmd. If we get a valid rq and failed in
the following checkings in nbd_read_stat(), it's better to drop the
refcnt in nbd_read_stat().
>
> BTW, the approach in patch 3 looks fine, which is very similar with
> SCSI's handling.
Thanks for taking time reviewing these patches.
Kuai
>
> Thanks,
> Ming
>
> .
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req()
2021-09-08 8:29 ` yukuai (C)
@ 2021-09-08 9:27 ` Ming Lei
2021-09-08 11:03 ` yukuai (C)
0 siblings, 1 reply; 14+ messages in thread
From: Ming Lei @ 2021-09-08 9:27 UTC (permalink / raw)
To: yukuai (C); +Cc: axboe, josef, linux-block, linux-kernel, nbd, yi.zhang
On Wed, Sep 08, 2021 at 04:29:57PM +0800, yukuai (C) wrote:
> On 2021/09/08 16:00, Ming Lei wrote:
> > On Wed, Sep 08, 2021 at 03:37:06PM +0800, yukuai (C) wrote:
> > > On 2021/09/08 15:30, Ming Lei wrote:
> > >
> > > > > +put_req:
> > > > > + if (req)
> > > > > + blk_mq_put_rq_ref(req);
> > > > > return ret ? ERR_PTR(ret) : cmd;
> > > >
> > > > After the request's refcnt is dropped, it can be freed immediately, then
> > > > one stale command is returned to caller.
> > >
> > > Hi, Ming
> > >
> > > It's right this patch leave this problem. Please take a look at patch 3
> > > and patch 4, the problem should be fixed with these patches.
> >
> > Not see it is addressed in patch 3 & 4, and it is one obvious fault in
> > patch 2, please fix it from beginning by moving the refcnt drop
> > into recv_work().
>
> Hi, Ming
>
> With patch 3 & 4:
>
> if nbd_read_stat() return a valid cmd, then the refcnt should not drop
> to 0 before blk_mq_complete_request() in recv_work().
The valid cmd won't be timed out just between nbd_read_stat() and
calling blk_mq_complete_request()?
Yeah, the issue is addressed by patch 4, then please put 2 after
3 & 4, and suggest to add comment why request ref won't drop to zero
in recv_work().
>
> if nbd_read_stat() failed, it won't be a problem if the request is freed
> immediately when refcnt is dropped in nbd_read_stat().
>
> That's why I said that the problem will be fixed.
>
> BTW, if we move the refcnt drop into recv_work, we can only do that if
> nbd_read_stat() return a valid cmd. If we get a valid rq and failed in
> the following checkings in nbd_read_stat(), it's better to drop the
> refcnt in nbd_read_stat().
The usual pattern is to drop the refcnt on when the object isn't used
any more, so it is perfectly fine to hold the ref until that time.
Thanks,
Ming
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req()
2021-09-08 9:27 ` Ming Lei
@ 2021-09-08 11:03 ` yukuai (C)
0 siblings, 0 replies; 14+ messages in thread
From: yukuai (C) @ 2021-09-08 11:03 UTC (permalink / raw)
To: Ming Lei; +Cc: axboe, josef, linux-block, linux-kernel, nbd, yi.zhang
On 2021/09/08 17:27, Ming Lei wrote:
> On Wed, Sep 08, 2021 at 04:29:57PM +0800, yukuai (C) wrote:
>> On 2021/09/08 16:00, Ming Lei wrote:
>>> On Wed, Sep 08, 2021 at 03:37:06PM +0800, yukuai (C) wrote:
>>>> On 2021/09/08 15:30, Ming Lei wrote:
>>>>
>>>>>> +put_req:
>>>>>> + if (req)
>>>>>> + blk_mq_put_rq_ref(req);
>>>>>> return ret ? ERR_PTR(ret) : cmd;
>>>>>
>>>>> After the request's refcnt is dropped, it can be freed immediately, then
>>>>> one stale command is returned to caller.
>>>>
>>>> Hi, Ming
>>>>
>>>> It's right this patch leave this problem. Please take a look at patch 3
>>>> and patch 4, the problem should be fixed with these patches.
>>>
>>> Not see it is addressed in patch 3 & 4, and it is one obvious fault in
>>> patch 2, please fix it from beginning by moving the refcnt drop
>>> into recv_work().
>>
>> Hi, Ming
>>
>> With patch 3 & 4:
>>
>> if nbd_read_stat() return a valid cmd, then the refcnt should not drop
>> to 0 before blk_mq_complete_request() in recv_work().
>
> The valid cmd won't be timed out just between nbd_read_stat() and
> calling blk_mq_complete_request()?
>
> Yeah, the issue is addressed by patch 4, then please put 2 after
> 3 & 4, and suggest to add comment why request ref won't drop to zero
> in recv_work().
Hi, Ming
Thanks for the advice, will do this in next iteration.
Best regards
Kuai
>
>>
>> if nbd_read_stat() failed, it won't be a problem if the request is freed
>> immediately when refcnt is dropped in nbd_read_stat().
>>
>> That's why I said that the problem will be fixed.
>>
>> BTW, if we move the refcnt drop into recv_work, we can only do that if
>> nbd_read_stat() return a valid cmd. If we get a valid rq and failed in
>> the following checkings in nbd_read_stat(), it's better to drop the
>> refcnt in nbd_read_stat().
>
> The usual pattern is to drop the refcnt on when the object isn't used
> any more, so it is perfectly fine to hold the ref until that time.
>
>
> Thanks,
> Ming
>
> .
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 3/6] nbd: don't handle response without a corresponding request message
2021-09-07 14:01 [PATCH v4 0/6] handle unexpected message from server Yu Kuai
2021-09-07 14:01 ` [PATCH v4 1/6] blk-mq: export two symbols to get request by tag Yu Kuai
2021-09-07 14:01 ` [PATCH v4 2/6] nbd: convert to use blk_mq_find_and_get_req() Yu Kuai
@ 2021-09-07 14:01 ` Yu Kuai
2021-09-07 14:01 ` [PATCH v4 4/6] nbd: make sure request completion won't concurrent Yu Kuai
` (2 subsequent siblings)
5 siblings, 0 replies; 14+ messages in thread
From: Yu Kuai @ 2021-09-07 14:01 UTC (permalink / raw)
To: axboe, josef, ming.lei; +Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
While handling a response message from server, nbd_read_stat() will
try to get request by tag, and then complete the request. However,
this is problematic if nbd haven't sent a corresponding request
message:
t1 t2
submit_bio
nbd_queue_rq
blk_mq_start_request
recv_work
nbd_read_stat
blk_mq_find_and_get_req
blk_mq_complete_request
nbd_send_cmd
Thus add a new cmd flag 'NBD_CMD_INFLIGHT', it will be set in
nbd_send_cmd() and checked in nbd_read_stat().
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 920da390635c..521a8d913741 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -126,6 +126,12 @@ struct nbd_device {
};
#define NBD_CMD_REQUEUED 1
+/*
+ * This flag will be set if nbd_queue_rq() succeed, and will be checked and
+ * cleared in completion. Both setting and clearing of the flag are protected
+ * by cmd->lock.
+ */
+#define NBD_CMD_INFLIGHT 2
struct nbd_cmd {
struct nbd_device *nbd;
@@ -400,6 +406,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
mutex_unlock(&cmd->lock);
@@ -730,6 +737,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
+ tag, cmd->status, cmd->flags);
+ ret = -ENOENT;
+ goto out;
+ }
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
@@ -833,6 +846,7 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
+ __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
blk_mq_complete_request(req);
@@ -968,7 +982,13 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
* returns EAGAIN can be retried on a different socket.
*/
ret = nbd_send_cmd(nbd, cmd, index);
- if (ret == -EAGAIN) {
+ /*
+ * Access to this flag is protected by cmd->lock, thus it's safe to set
+ * the flag after nbd_send_cmd() succeed to send request to server.
+ */
+ if (!ret)
+ __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ else if (ret == -EAGAIN) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Request send failed, requeueing\n");
nbd_mark_nsock_dead(nbd, nsock, 1);
--
2.31.1
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 4/6] nbd: make sure request completion won't concurrent
2021-09-07 14:01 [PATCH v4 0/6] handle unexpected message from server Yu Kuai
` (2 preceding siblings ...)
2021-09-07 14:01 ` [PATCH v4 3/6] nbd: don't handle response without a corresponding request message Yu Kuai
@ 2021-09-07 14:01 ` Yu Kuai
2021-09-07 14:01 ` [PATCH v4 5/6] nbd: check sock index in nbd_read_stat() Yu Kuai
2021-09-07 14:01 ` [PATCH v4 6/6] nbd: don't start request if nbd_queue_rq() failed Yu Kuai
5 siblings, 0 replies; 14+ messages in thread
From: Yu Kuai @ 2021-09-07 14:01 UTC (permalink / raw)
To: axboe, josef, ming.lei; +Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
commit cddce0116058 ("nbd: Aovid double completion of a request")
try to fix that nbd_clear_que() and recv_work() can complete a
request concurrently. However, the problem still exists:
t1 t2 t3
nbd_disconnect_and_put
flush_workqueue
recv_work
blk_mq_complete_request
blk_mq_complete_request_remote -> this is true
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE)
blk_mq_raise_softirq
blk_done_softirq
blk_complete_reqs
nbd_complete_rq
blk_mq_end_request
blk_mq_free_request
WRITE_ONCE(rq->state, MQ_RQ_IDLE)
nbd_clear_que
blk_mq_tagset_busy_iter
nbd_clear_req
__blk_mq_free_request
blk_mq_put_tag
blk_mq_complete_request -> complete again
There are three places where request can be completed in nbd:
recv_work(), nbd_clear_que() and nbd_xmit_timeout(). Since they
all hold cmd->lock before completing the request, it's easy to
avoid the problem by setting and checking a cmd flag.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 521a8d913741..6e22e80a5488 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -406,7 +406,11 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
- __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ mutex_unlock(&cmd->lock);
+ return BLK_EH_DONE;
+ }
+
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
mutex_unlock(&cmd->lock);
@@ -846,7 +850,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
- __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ mutex_unlock(&cmd->lock);
+ return true;
+ }
mutex_unlock(&cmd->lock);
blk_mq_complete_request(req);
--
2.31.1
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 5/6] nbd: check sock index in nbd_read_stat()
2021-09-07 14:01 [PATCH v4 0/6] handle unexpected message from server Yu Kuai
` (3 preceding siblings ...)
2021-09-07 14:01 ` [PATCH v4 4/6] nbd: make sure request completion won't concurrent Yu Kuai
@ 2021-09-07 14:01 ` Yu Kuai
2021-09-07 14:01 ` [PATCH v4 6/6] nbd: don't start request if nbd_queue_rq() failed Yu Kuai
5 siblings, 0 replies; 14+ messages in thread
From: Yu Kuai @ 2021-09-07 14:01 UTC (permalink / raw)
To: axboe, josef, ming.lei; +Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
The sock that clent send request in nbd_send_cmd() and receive reply
in nbd_read_stat() should be the same.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6e22e80a5488..807c8cbccaae 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -747,6 +747,10 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
ret = -ENOENT;
goto out;
}
+ if (cmd->index != index) {
+ dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d from different sock %d (expected %d)",
+ tag, index, cmd->index);
+ }
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
--
2.31.1
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 6/6] nbd: don't start request if nbd_queue_rq() failed
2021-09-07 14:01 [PATCH v4 0/6] handle unexpected message from server Yu Kuai
` (4 preceding siblings ...)
2021-09-07 14:01 ` [PATCH v4 5/6] nbd: check sock index in nbd_read_stat() Yu Kuai
@ 2021-09-07 14:01 ` Yu Kuai
2021-09-09 6:41 ` Christoph Hellwig
5 siblings, 1 reply; 14+ messages in thread
From: Yu Kuai @ 2021-09-07 14:01 UTC (permalink / raw)
To: axboe, josef, ming.lei; +Cc: linux-block, linux-kernel, nbd, yukuai3, yi.zhang
Currently, blk_mq_end_request() will be called if nbd_queue_rq()
failed, thus start request in such situation is useless.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/block/nbd.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 807c8cbccaae..122e49ae86fb 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -938,7 +938,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
if (!refcount_inc_not_zero(&nbd->config_refs)) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Socks array is empty\n");
- blk_mq_start_request(req);
return -EINVAL;
}
config = nbd->config;
@@ -947,7 +946,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on invalid socket\n");
nbd_config_put(nbd);
- blk_mq_start_request(req);
return -EINVAL;
}
cmd->status = BLK_STS_OK;
@@ -971,7 +969,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
*/
sock_shutdown(nbd);
nbd_config_put(nbd);
- blk_mq_start_request(req);
return -EIO;
}
goto again;
--
2.31.1
^ permalink raw reply [flat|nested] 14+ messages in thread