Netdev Archive on lore.kernel.org help / color / mirror / Atom feed
* [net-next] tipc: fix a deadlock when flushing scheduled work @ 2020-09-05 4:45 Hoang Huu Le 2020-09-06 17:56 ` Jakub Kicinski 2020-09-07 6:17 ` [net-next v2] " Hoang Huu Le 0 siblings, 2 replies; 5+ messages in thread From: Hoang Huu Le @ 2020-09-05 4:45 UTC (permalink / raw) To: ying.xue, netdev; +Cc: jmaloy, maloy, syzbot+d5aa7e0385f6a5d0f4fd In the commit fdeba99b1e58 ("tipc: fix use-after-free in tipc_bcast_get_mode"), we're trying to make sure the tipc_net_finalize_work work item finished if it enqueued. But calling flush_scheduled_work() is not just affecting above work item but either any scheduled work. This has turned out to be overkill and caused to deadlock as syzbot reported: ====================================================== WARNING: possible circular locking dependency detected 5.9.0-rc2-next-20200828-syzkaller #0 Not tainted ------------------------------------------------------ kworker/u4:6/349 is trying to acquire lock: ffff8880aa063d38 ((wq_completion)events){+.+.}-{0:0}, at: flush_workqueue+0xe1/0x13e0 kernel/workqueue.c:2777 but task is already holding lock: ffffffff8a879430 (pernet_ops_rwsem){++++}-{3:3}, at: cleanup_net+0x9b/0xb10 net/core/net_namespace.c:565 [...] Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(pernet_ops_rwsem); lock(&sb->s_type->i_mutex_key#13); lock(pernet_ops_rwsem); lock((wq_completion)events); *** DEADLOCK *** [...] To fix the original issue, we replace above calling by introducing a bit flag. When a namespace cleaned-up, bit flag is set to zero and: - tipc_net_finalize functionial just does return immediately. - tipc_net_finalize_work does not enqueue into the scheduled work queue. Reported-by: syzbot+d5aa7e0385f6a5d0f4fd@syzkaller.appspotmail.com Fixes: fdeba99b1e58 ("tipc: fix use-after-free in tipc_bcast_get_mode") Acked-by: Jon Maloy <jmaloy@redhat.com> Signed-off-by: Hoang Huu Le <hoang.h.le@dektech.com.au> --- net/tipc/core.c | 8 ++++---- net/tipc/core.h | 1 + net/tipc/net.c | 10 +++++++++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 37d8695548cf..5e7bb768f45c 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -60,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->trial_addr = 0; tn->addr_trial_end = 0; tn->capabilities = TIPC_NODE_CAPABILITIES; + test_and_set_bit_lock(0, &tn->net_exit_flag); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; @@ -110,10 +111,6 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_detach_loopback(net); tipc_net_stop(net); - /* Make sure the tipc_net_finalize_work stopped - * before releasing the resources. - */ - flush_scheduled_work(); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); @@ -124,6 +121,9 @@ static void __net_exit tipc_exit_net(struct net *net) static void __net_exit tipc_pernet_pre_exit(struct net *net) { + struct tipc_net *tn = tipc_net(net); + + clear_bit_unlock(0, &tn->net_exit_flag); tipc_node_pre_cleanup_net(net); } diff --git a/net/tipc/core.h b/net/tipc/core.h index 631d83c9705f..aa75882dd932 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -143,6 +143,7 @@ struct tipc_net { /* TX crypto handler */ struct tipc_crypto *crypto_tx; #endif + unsigned long net_exit_flag; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/net.c b/net/tipc/net.c index 85400e4242de..8ad5b9ad89c0 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -132,6 +132,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); + if (unlikely(!test_bit(0, &tn->net_exit_flag))) + return; + if (cmpxchg(&tn->node_addr, 0, addr)) return; tipc_set_node_addr(net, addr); @@ -153,8 +156,13 @@ static void tipc_net_finalize_work(struct work_struct *work) void tipc_sched_net_finalize(struct net *net, u32 addr) { - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); + struct tipc_net *tn = tipc_net(net); + struct tipc_net_work *fwork; + + if (unlikely(!test_bit(0, &tn->net_exit_flag))) + return; + fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); if (!fwork) return; INIT_WORK(&fwork->work, tipc_net_finalize_work); -- 2.25.1 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [net-next] tipc: fix a deadlock when flushing scheduled work 2020-09-05 4:45 [net-next] tipc: fix a deadlock when flushing scheduled work Hoang Huu Le @ 2020-09-06 17:56 ` Jakub Kicinski 2020-09-07 8:22 ` Hoang Huu Le 2020-09-07 6:17 ` [net-next v2] " Hoang Huu Le 1 sibling, 1 reply; 5+ messages in thread From: Jakub Kicinski @ 2020-09-06 17:56 UTC (permalink / raw) To: Hoang Huu Le; +Cc: ying.xue, netdev, jmaloy, maloy, syzbot+d5aa7e0385f6a5d0f4fd On Sat, 5 Sep 2020 11:45:18 +0700 Hoang Huu Le wrote: > In the commit fdeba99b1e58 > ("tipc: fix use-after-free in tipc_bcast_get_mode"), we're trying > to make sure the tipc_net_finalize_work work item finished if it > enqueued. But calling flush_scheduled_work() is not just affecting > above work item but either any scheduled work. This has turned out > to be overkill and caused to deadlock as syzbot reported: > > ====================================================== > WARNING: possible circular locking dependency detected > 5.9.0-rc2-next-20200828-syzkaller #0 Not tainted > ------------------------------------------------------ > kworker/u4:6/349 is trying to acquire lock: > ffff8880aa063d38 ((wq_completion)events){+.+.}-{0:0}, at: flush_workqueue+0xe1/0x13e0 kernel/workqueue.c:2777 > > but task is already holding lock: > ffffffff8a879430 (pernet_ops_rwsem){++++}-{3:3}, at: cleanup_net+0x9b/0xb10 net/core/net_namespace.c:565 > > [...] > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(pernet_ops_rwsem); > lock(&sb->s_type->i_mutex_key#13); > lock(pernet_ops_rwsem); > lock((wq_completion)events); > > *** DEADLOCK *** > [...] > > To fix the original issue, we replace above calling by introducing > a bit flag. When a namespace cleaned-up, bit flag is set to zero and: > - tipc_net_finalize functionial just does return immediately. > - tipc_net_finalize_work does not enqueue into the scheduled work queue. Is struct tipc_net not going to be freed right after tipc_exit_net() returns? In that case you'd be back to UAF if the flag is in this structure. > @@ -110,10 +111,6 @@ static void __net_exit tipc_exit_net(struct net *net) > tipc_detach_loopback(net); > tipc_net_stop(net); > > - /* Make sure the tipc_net_finalize_work stopped > - * before releasing the resources. > - */ > - flush_scheduled_work(); > tipc_bcast_stop(net); > tipc_nametbl_stop(net); > tipc_sk_rht_destroy(net); > @@ -124,6 +121,9 @@ static void __net_exit tipc_exit_net(struct net *net) > > static void __net_exit tipc_pernet_pre_exit(struct net *net) > { > + struct tipc_net *tn = tipc_net(net); > + > + clear_bit_unlock(0, &tn->net_exit_flag); > tipc_node_pre_cleanup_net(net); > } > > diff --git a/net/tipc/core.h b/net/tipc/core.h > index 631d83c9705f..aa75882dd932 100644 > --- a/net/tipc/core.h > +++ b/net/tipc/core.h > @@ -143,6 +143,7 @@ struct tipc_net { > /* TX crypto handler */ > struct tipc_crypto *crypto_tx; > #endif > + unsigned long net_exit_flag; > }; > > static inline struct tipc_net *tipc_net(struct net *net) > diff --git a/net/tipc/net.c b/net/tipc/net.c > index 85400e4242de..8ad5b9ad89c0 100644 > --- a/net/tipc/net.c > +++ b/net/tipc/net.c > @@ -132,6 +132,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) > { > struct tipc_net *tn = tipc_net(net); > > + if (unlikely(!test_bit(0, &tn->net_exit_flag))) > + return; > + > if (cmpxchg(&tn->node_addr, 0, addr)) > return; > tipc_set_node_addr(net, addr); > @@ -153,8 +156,13 @@ static void tipc_net_finalize_work(struct work_struct *work) > > void tipc_sched_net_finalize(struct net *net, u32 addr) > { > - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); > + struct tipc_net *tn = tipc_net(net); > + struct tipc_net_work *fwork; > + > + if (unlikely(!test_bit(0, &tn->net_exit_flag))) > + return; > > + fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); > if (!fwork) > return; > INIT_WORK(&fwork->work, tipc_net_finalize_work); ^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [net-next] tipc: fix a deadlock when flushing scheduled work 2020-09-06 17:56 ` Jakub Kicinski @ 2020-09-07 8:22 ` Hoang Huu Le 0 siblings, 0 replies; 5+ messages in thread From: Hoang Huu Le @ 2020-09-07 8:22 UTC (permalink / raw) To: Jakub Kicinski Cc: ying.xue, netdev, jmaloy, maloy, syzbot+d5aa7e0385f6a5d0f4fd > -----Original Message----- > From: Jakub Kicinski <kuba@kernel.org> > Sent: Monday, September 7, 2020 12:57 AM > To: Hoang Huu Le <hoang.h.le@dektech.com.au> > Cc: ying.xue@windriver.com; netdev@vger.kernel.org; jmaloy@redhat.com; maloy@donjonn.com; > syzbot+d5aa7e0385f6a5d0f4fd@syzkaller.appspotmail.com > Subject: Re: [net-next] tipc: fix a deadlock when flushing scheduled work > > On Sat, 5 Sep 2020 11:45:18 +0700 Hoang Huu Le wrote: > > In the commit fdeba99b1e58 > > ("tipc: fix use-after-free in tipc_bcast_get_mode"), we're trying > > to make sure the tipc_net_finalize_work work item finished if it > > enqueued. But calling flush_scheduled_work() is not just affecting > > above work item but either any scheduled work. This has turned out > > to be overkill and caused to deadlock as syzbot reported: > > > > ====================================================== > > WARNING: possible circular locking dependency detected > > 5.9.0-rc2-next-20200828-syzkaller #0 Not tainted > > ------------------------------------------------------ > > kworker/u4:6/349 is trying to acquire lock: > > ffff8880aa063d38 ((wq_completion)events){+.+.}-{0:0}, at: flush_workqueue+0xe1/0x13e0 kernel/workqueue.c:2777 > > > > but task is already holding lock: > > ffffffff8a879430 (pernet_ops_rwsem){++++}-{3:3}, at: cleanup_net+0x9b/0xb10 net/core/net_namespace.c:565 > > > > [...] > > Possible unsafe locking scenario: > > > > CPU0 CPU1 > > ---- ---- > > lock(pernet_ops_rwsem); > > lock(&sb->s_type->i_mutex_key#13); > > lock(pernet_ops_rwsem); > > lock((wq_completion)events); > > > > *** DEADLOCK *** > > [...] > > > > To fix the original issue, we replace above calling by introducing > > a bit flag. When a namespace cleaned-up, bit flag is set to zero and: > > - tipc_net_finalize functionial just does return immediately. > > - tipc_net_finalize_work does not enqueue into the scheduled work queue. > > Is struct tipc_net not going to be freed right after tipc_exit_net() > returns? In that case you'd be back to UAF if the flag is in this > structure. > I rework the fix with version 2. In there, I use cancel_work_sync() API to cancel the specific tipc_net_finalize_work work. > > @@ -110,10 +111,6 @@ static void __net_exit tipc_exit_net(struct net *net) > > tipc_detach_loopback(net); > > tipc_net_stop(net); > > > > - /* Make sure the tipc_net_finalize_work stopped > > - * before releasing the resources. > > - */ > > - flush_scheduled_work(); > > tipc_bcast_stop(net); > > tipc_nametbl_stop(net); > > tipc_sk_rht_destroy(net); > > @@ -124,6 +121,9 @@ static void __net_exit tipc_exit_net(struct net *net) > > > > static void __net_exit tipc_pernet_pre_exit(struct net *net) > > { > > + struct tipc_net *tn = tipc_net(net); > > + > > + clear_bit_unlock(0, &tn->net_exit_flag); > > tipc_node_pre_cleanup_net(net); > > } > > > > diff --git a/net/tipc/core.h b/net/tipc/core.h > > index 631d83c9705f..aa75882dd932 100644 > > --- a/net/tipc/core.h > > +++ b/net/tipc/core.h > > @@ -143,6 +143,7 @@ struct tipc_net { > > /* TX crypto handler */ > > struct tipc_crypto *crypto_tx; > > #endif > > + unsigned long net_exit_flag; > > }; > > > > static inline struct tipc_net *tipc_net(struct net *net) > > diff --git a/net/tipc/net.c b/net/tipc/net.c > > index 85400e4242de..8ad5b9ad89c0 100644 > > --- a/net/tipc/net.c > > +++ b/net/tipc/net.c > > @@ -132,6 +132,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) > > { > > struct tipc_net *tn = tipc_net(net); > > > > + if (unlikely(!test_bit(0, &tn->net_exit_flag))) > > + return; > > + > > if (cmpxchg(&tn->node_addr, 0, addr)) > > return; > > tipc_set_node_addr(net, addr); > > @@ -153,8 +156,13 @@ static void tipc_net_finalize_work(struct work_struct *work) > > > > void tipc_sched_net_finalize(struct net *net, u32 addr) > > { > > - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); > > + struct tipc_net *tn = tipc_net(net); > > + struct tipc_net_work *fwork; > > + > > + if (unlikely(!test_bit(0, &tn->net_exit_flag))) > > + return; > > > > + fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); > > if (!fwork) > > return; > > INIT_WORK(&fwork->work, tipc_net_finalize_work); ^ permalink raw reply [flat|nested] 5+ messages in thread
* [net-next v2] tipc: fix a deadlock when flushing scheduled work 2020-09-05 4:45 [net-next] tipc: fix a deadlock when flushing scheduled work Hoang Huu Le 2020-09-06 17:56 ` Jakub Kicinski @ 2020-09-07 6:17 ` Hoang Huu Le 2020-09-07 15:05 ` Xue, Ying 1 sibling, 1 reply; 5+ messages in thread From: Hoang Huu Le @ 2020-09-07 6:17 UTC (permalink / raw) To: ying.xue, netdev; +Cc: jmaloy, maloy, syzbot+d5aa7e0385f6a5d0f4fd In the commit fdeba99b1e58 ("tipc: fix use-after-free in tipc_bcast_get_mode"), we're trying to make sure the tipc_net_finalize_work work item finished if it enqueued. But calling flush_scheduled_work() is not just affecting above work item but either any scheduled work. This has turned out to be overkill and caused to deadlock as syzbot reported: ====================================================== WARNING: possible circular locking dependency detected 5.9.0-rc2-next-20200828-syzkaller #0 Not tainted ------------------------------------------------------ kworker/u4:6/349 is trying to acquire lock: ffff8880aa063d38 ((wq_completion)events){+.+.}-{0:0}, at: flush_workqueue+0xe1/0x13e0 kernel/workqueue.c:2777 but task is already holding lock: ffffffff8a879430 (pernet_ops_rwsem){++++}-{3:3}, at: cleanup_net+0x9b/0xb10 net/core/net_namespace.c:565 [...] Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(pernet_ops_rwsem); lock(&sb->s_type->i_mutex_key#13); lock(pernet_ops_rwsem); lock((wq_completion)events); *** DEADLOCK *** [...] v1: To fix the original issue, we replace above calling by introducing a bit flag. When a namespace cleaned-up, bit flag is set to zero and: - tipc_net_finalize functionial just does return immediately. - tipc_net_finalize_work does not enqueue into the scheduled work queue. v2: Use cancel_work_sync() helper to make sure ONLY the tipc_net_finalize_work() stopped before releasing bcbase object. Reported-by: syzbot+d5aa7e0385f6a5d0f4fd@syzkaller.appspotmail.com Fixes: fdeba99b1e58 ("tipc: fix use-after-free in tipc_bcast_get_mode") Acked-by: Jon Maloy <jmaloy@redhat.com> Signed-off-by: Hoang Huu Le <hoang.h.le@dektech.com.au> --- net/tipc/core.c | 9 +++++---- net/tipc/core.h | 8 ++++++++ net/tipc/net.c | 20 +++++--------------- net/tipc/net.h | 1 + 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 37d8695548cf..c2ff42900b53 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -60,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->trial_addr = 0; tn->addr_trial_end = 0; tn->capabilities = TIPC_NODE_CAPABILITIES; + INIT_WORK(&tn->final_work.work, tipc_net_finalize_work); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; @@ -107,13 +108,13 @@ static int __net_init tipc_init_net(struct net *net) static void __net_exit tipc_exit_net(struct net *net) { + struct tipc_net *tn = tipc_net(net); + tipc_detach_loopback(net); + /* Make sure the tipc_net_finalize_work() finished */ + cancel_work_sync(&tn->final_work.work); tipc_net_stop(net); - /* Make sure the tipc_net_finalize_work stopped - * before releasing the resources. - */ - flush_scheduled_work(); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 631d83c9705f..1d57a4d3b05e 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -90,6 +90,12 @@ extern unsigned int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; +struct tipc_net_work { + struct work_struct work; + struct net *net; + u32 addr; +}; + struct tipc_net { u8 node_id[NODE_ID_LEN]; u32 node_addr; @@ -143,6 +149,8 @@ struct tipc_net { /* TX crypto handler */ struct tipc_crypto *crypto_tx; #endif + /* Work item for net finalize */ + struct tipc_net_work final_work; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/net.c b/net/tipc/net.c index 85400e4242de..0bb2323201da 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -105,12 +105,6 @@ * - A local spin_lock protecting the queue of subscriber events. */ -struct tipc_net_work { - struct work_struct work; - struct net *net; - u32 addr; -}; - static void tipc_net_finalize(struct net *net, u32 addr); int tipc_net_init(struct net *net, u8 *node_id, u32 addr) @@ -142,25 +136,21 @@ static void tipc_net_finalize(struct net *net, u32 addr) TIPC_CLUSTER_SCOPE, 0, addr); } -static void tipc_net_finalize_work(struct work_struct *work) +void tipc_net_finalize_work(struct work_struct *work) { struct tipc_net_work *fwork; fwork = container_of(work, struct tipc_net_work, work); tipc_net_finalize(fwork->net, fwork->addr); - kfree(fwork); } void tipc_sched_net_finalize(struct net *net, u32 addr) { - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); + struct tipc_net *tn = tipc_net(net); - if (!fwork) - return; - INIT_WORK(&fwork->work, tipc_net_finalize_work); - fwork->net = net; - fwork->addr = addr; - schedule_work(&fwork->work); + tn->final_work.net = net; + tn->final_work.addr = addr; + schedule_work(&tn->final_work.work); } void tipc_net_stop(struct net *net) diff --git a/net/tipc/net.h b/net/tipc/net.h index 6740d97c706e..d0c91d2df20a 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -42,6 +42,7 @@ extern const struct nla_policy tipc_nl_net_policy[]; int tipc_net_init(struct net *net, u8 *node_id, u32 addr); +void tipc_net_finalize_work(struct work_struct *work); void tipc_sched_net_finalize(struct net *net, u32 addr); void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); -- 2.25.1 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* RE: [net-next v2] tipc: fix a deadlock when flushing scheduled work 2020-09-07 6:17 ` [net-next v2] " Hoang Huu Le @ 2020-09-07 15:05 ` Xue, Ying 0 siblings, 0 replies; 5+ messages in thread From: Xue, Ying @ 2020-09-07 15:05 UTC (permalink / raw) To: Hoang Huu Le, netdev; +Cc: jmaloy, maloy, syzbot+d5aa7e0385f6a5d0f4fd [-- Attachment #1: Type: text/plain, Size: 6320 bytes --] I don't think we need to make so many lines of changes. Please take a look at the attached patch which was created to fix the original deadlock: syzbot+6ea1f7a8df64596ef4d7@syzkaller.appspotmail.com. If you think the attached solution is fine, please consider to rebase it on the latest net-next tree. -----Original Message----- From: Hoang Huu Le <hoang.h.le@dektech.com.au> Sent: Monday, September 7, 2020 2:17 PM To: Xue, Ying <Ying.Xue@windriver.com>; netdev@vger.kernel.org Cc: jmaloy@redhat.com; maloy@donjonn.com; syzbot+d5aa7e0385f6a5d0f4fd@syzkaller.appspotmail.com Subject: [net-next v2] tipc: fix a deadlock when flushing scheduled work In the commit fdeba99b1e58 ("tipc: fix use-after-free in tipc_bcast_get_mode"), we're trying to make sure the tipc_net_finalize_work work item finished if it enqueued. But calling flush_scheduled_work() is not just affecting above work item but either any scheduled work. This has turned out to be overkill and caused to deadlock as syzbot reported: ====================================================== WARNING: possible circular locking dependency detected 5.9.0-rc2-next-20200828-syzkaller #0 Not tainted ------------------------------------------------------ kworker/u4:6/349 is trying to acquire lock: ffff8880aa063d38 ((wq_completion)events){+.+.}-{0:0}, at: flush_workqueue+0xe1/0x13e0 kernel/workqueue.c:2777 but task is already holding lock: ffffffff8a879430 (pernet_ops_rwsem){++++}-{3:3}, at: cleanup_net+0x9b/0xb10 net/core/net_namespace.c:565 [...] Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(pernet_ops_rwsem); lock(&sb->s_type->i_mutex_key#13); lock(pernet_ops_rwsem); lock((wq_completion)events); *** DEADLOCK *** [...] v1: To fix the original issue, we replace above calling by introducing a bit flag. When a namespace cleaned-up, bit flag is set to zero and: - tipc_net_finalize functionial just does return immediately. - tipc_net_finalize_work does not enqueue into the scheduled work queue. v2: Use cancel_work_sync() helper to make sure ONLY the tipc_net_finalize_work() stopped before releasing bcbase object. Reported-by: syzbot+d5aa7e0385f6a5d0f4fd@syzkaller.appspotmail.com Fixes: fdeba99b1e58 ("tipc: fix use-after-free in tipc_bcast_get_mode") Acked-by: Jon Maloy <jmaloy@redhat.com> Signed-off-by: Hoang Huu Le <hoang.h.le@dektech.com.au> --- net/tipc/core.c | 9 +++++---- net/tipc/core.h | 8 ++++++++ net/tipc/net.c | 20 +++++--------------- net/tipc/net.h | 1 + 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/net/tipc/core.c b/net/tipc/core.c index 37d8695548cf..c2ff42900b53 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -60,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->trial_addr = 0; tn->addr_trial_end = 0; tn->capabilities = TIPC_NODE_CAPABILITIES; + INIT_WORK(&tn->final_work.work, tipc_net_finalize_work); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; @@ -107,13 +108,13 @@ static int __net_init tipc_init_net(struct net *net) static void __net_exit tipc_exit_net(struct net *net) { + struct tipc_net *tn = tipc_net(net); + tipc_detach_loopback(net); + /* Make sure the tipc_net_finalize_work() finished */ + cancel_work_sync(&tn->final_work.work); tipc_net_stop(net); - /* Make sure the tipc_net_finalize_work stopped - * before releasing the resources. - */ - flush_scheduled_work(); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 631d83c9705f..1d57a4d3b05e 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -90,6 +90,12 @@ extern unsigned int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; +struct tipc_net_work { + struct work_struct work; + struct net *net; + u32 addr; +}; + struct tipc_net { u8 node_id[NODE_ID_LEN]; u32 node_addr; @@ -143,6 +149,8 @@ struct tipc_net { /* TX crypto handler */ struct tipc_crypto *crypto_tx; #endif + /* Work item for net finalize */ + struct tipc_net_work final_work; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/net.c b/net/tipc/net.c index 85400e4242de..0bb2323201da 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -105,12 +105,6 @@ * - A local spin_lock protecting the queue of subscriber events. */ -struct tipc_net_work { - struct work_struct work; - struct net *net; - u32 addr; -}; - static void tipc_net_finalize(struct net *net, u32 addr); int tipc_net_init(struct net *net, u8 *node_id, u32 addr) @@ -142,25 +136,21 @@ static void tipc_net_finalize(struct net *net, u32 addr) TIPC_CLUSTER_SCOPE, 0, addr); } -static void tipc_net_finalize_work(struct work_struct *work) +void tipc_net_finalize_work(struct work_struct *work) { struct tipc_net_work *fwork; fwork = container_of(work, struct tipc_net_work, work); tipc_net_finalize(fwork->net, fwork->addr); - kfree(fwork); } void tipc_sched_net_finalize(struct net *net, u32 addr) { - struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); + struct tipc_net *tn = tipc_net(net); - if (!fwork) - return; - INIT_WORK(&fwork->work, tipc_net_finalize_work); - fwork->net = net; - fwork->addr = addr; - schedule_work(&fwork->work); + tn->final_work.net = net; + tn->final_work.addr = addr; + schedule_work(&tn->final_work.work); } void tipc_net_stop(struct net *net) diff --git a/net/tipc/net.h b/net/tipc/net.h index 6740d97c706e..d0c91d2df20a 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -42,6 +42,7 @@ extern const struct nla_policy tipc_nl_net_policy[]; int tipc_net_init(struct net *net, u8 *node_id, u32 addr); +void tipc_net_finalize_work(struct work_struct *work); void tipc_sched_net_finalize(struct net *net, u32 addr); void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); -- 2.25.1 [-- Attachment #2: 0001-tipc-fix-use-after-free-Read-in-tipc_bcast_get_mode.patch --] [-- Type: application/octet-stream, Size: 6020 bytes --] From be6fc632e788b2165988cebb977edef1c538ad64 Mon Sep 17 00:00:00 2001 From: Ying Xue <ying.xue@windriver.com> Date: Sun, 16 Aug 2020 21:29:57 +0800 Subject: [PATCH] tipc: fix use-after-free Read in tipc_bcast_get_mode syzbot found the following issue on: =========================================================== BUG: KASAN: use-after-free in tipc_bcast_get_mode+0x3ab/0x400 net/tipc/bcast.c:759 Read of size 1 at addr ffff88805e6b3571 by task kworker/0:6/3850 CPU: 0 PID: 3850 Comm: kworker/0:6 Not tainted 5.8.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events tipc_net_finalize_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x18f/0x20d lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xae/0x436 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 tipc_bcast_get_mode+0x3ab/0x400 net/tipc/bcast.c:759 tipc_node_broadcast+0x9e/0xcc0 net/tipc/node.c:1744 tipc_nametbl_publish+0x60b/0x970 net/tipc/name_table.c:752 tipc_net_finalize net/tipc/net.c:141 [inline] tipc_net_finalize+0x1fa/0x310 net/tipc/net.c:131 tipc_net_finalize_work+0x55/0x80 net/tipc/net.c:150 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:291 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 Allocated by task 8062: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:494 kmem_cache_alloc_trace+0x14f/0x2d0 mm/slab.c:3551 kmalloc include/linux/slab.h:555 [inline] kzalloc include/linux/slab.h:669 [inline] tipc_bcast_init+0x21e/0x7b0 net/tipc/bcast.c:689 tipc_init_net+0x4f6/0x5c0 net/tipc/core.c:85 ops_init+0xaf/0x470 net/core/net_namespace.c:151 setup_net+0x2d8/0x850 net/core/net_namespace.c:341 copy_net_ns+0x2cf/0x5e0 net/core/net_namespace.c:482 create_new_namespaces+0x3f6/0xb10 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xbd/0x1f0 kernel/nsproxy.c:231 ksys_unshare+0x36c/0x9a0 kernel/fork.c:2979 __do_sys_unshare kernel/fork.c:3047 [inline] __se_sys_unshare kernel/fork.c:3045 [inline] __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3045 do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:384 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 8843: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0xf5/0x140 mm/kasan/common.c:455 __cache_free mm/slab.c:3426 [inline] kfree+0x103/0x2c0 mm/slab.c:3757 tipc_bcast_stop+0x1b0/0x2f0 net/tipc/bcast.c:721 tipc_exit_net+0x24/0x270 net/tipc/core.c:112 ops_exit_list+0xb0/0x160 net/core/net_namespace.c:186 cleanup_net+0x4ea/0xa00 net/core/net_namespace.c:603 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:291 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 The buggy address belongs to the object at ffff88805e6b3500 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 113 bytes inside of 128-byte region [ffff88805e6b3500, ffff88805e6b3580) The buggy address belongs to the page: page:ffffea000179acc0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0002375c48 ffff8880aa001550 ffff8880aa000700 raw: 0000000000000000 ffff88805e6b3000 0000000100000010 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88805e6b3400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88805e6b3480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc > ffff88805e6b3500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88805e6b3580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88805e6b3600: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc ================================================================== Commit adba75be0d23 ("tipc: fix lockdep warning when reinitilaizing sockets") introduced an asynchronous method to finalize TIPC net in work context, however, it caused a race condition: even after "tn->bcbase" instance has been released in tipc_bcast_stop() when tipc namespace is destroyed through tipc_exit_net(), the instance may be accessed in tipc_bcast_get_mode() because tipc_bcast_get_mode() is asynchronously called by tipc_net_finalize_work() in work context. In order to eliminate the race condition, we ensure the finalizing tipc net work must be completed by calling cancel_work_sync() before tipc_bcast_stop(). Reported-by: syzbot+6ea1f7a8df64596ef4d7@syzkaller.appspotmail.com Fixes: adba75be0d23 ("tipc: fix lockdep warning when reinitilaizing sockets") Signed-off-by: Ying Xue <ying.xue@windriver.com> --- net/tipc/core.h | 3 +++ net/tipc/net.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/tipc/core.h b/net/tipc/core.h index 631d83c9705f..64f0207d2943 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -139,6 +139,9 @@ struct tipc_net { /* Tracing of node internal messages */ struct packet_type loopback_pt; + /* Finalize net worker */ + struct work_struct *fwork; + #ifdef CONFIG_TIPC_CRYPTO /* TX crypto handler */ struct tipc_crypto *crypto_tx; diff --git a/net/tipc/net.c b/net/tipc/net.c index 85400e4242de..925474f625e0 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -160,6 +160,7 @@ void tipc_sched_net_finalize(struct net *net, u32 addr) INIT_WORK(&fwork->work, tipc_net_finalize_work); fwork->net = net; fwork->addr = addr; + net->fwork = &fwork->work; schedule_work(&fwork->work); } @@ -173,6 +174,8 @@ void tipc_net_stop(struct net *net) tipc_node_stop(net); rtnl_unlock(); + cancel_work_sync(net->fwork); + pr_info("Left network mode\n"); } -- 2.26.1 ^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-09-07 15:23 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2020-09-05 4:45 [net-next] tipc: fix a deadlock when flushing scheduled work Hoang Huu Le 2020-09-06 17:56 ` Jakub Kicinski 2020-09-07 8:22 ` Hoang Huu Le 2020-09-07 6:17 ` [net-next v2] " Hoang Huu Le 2020-09-07 15:05 ` Xue, Ying
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).