Netdev Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Karsten Graul <kgraul@linux.ibm.com>
To: Wen Gu <guwen@linux.alibaba.com>, davem@davemloft.net, kuba@kernel.org
Cc: linux-s390@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH net 1/3] net/smc: Resolve the race between link group access and termination
Date: Mon, 10 Jan 2022 13:25:49 +0100	[thread overview]
Message-ID: <3525a4cd-1bc7-1008-910b-fb89597cc10a@linux.ibm.com> (raw)
In-Reply-To: <1641806784-93141-2-git-send-email-guwen@linux.alibaba.com>

On 10/01/2022 10:26, Wen Gu wrote:
> We encountered some crashes caused by the race between the access
> and the termination of link groups.
> 
<snip>
> 
> diff --git a/net/smc/smc.h b/net/smc/smc.h
> index 1a4fc1c..3d0b8e3 100644
> --- a/net/smc/smc.h
> +++ b/net/smc/smc.h
> @@ -221,6 +221,7 @@ struct smc_connection {
>  						 */
>  	u64			peer_token;	/* SMC-D token of peer */
>  	u8			killed : 1;	/* abnormal termination */
> +	u8			freed : 1;	/* normal termiation */
>  	u8			out_of_sync : 1; /* out of sync with peer */
>  };
>  
> diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
> index cd3c3b8..26a113d 100644
> --- a/net/smc/smc_core.c
> +++ b/net/smc/smc_core.c
> @@ -186,6 +186,7 @@ static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
>  			conn->alert_token_local = 0;
>  	}
>  	smc_lgr_add_alert_token(conn);
> +	smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
>  	conn->lgr->conns_num++;
>  	return 0;
>  }
> @@ -218,7 +219,6 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
>  		__smc_lgr_unregister_conn(conn);
>  	}
>  	write_unlock_bh(&lgr->conns_lock);
> -	conn->lgr = NULL;
>  }
>  
>  int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
> @@ -749,6 +749,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
>  	lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
>  	lnk->link_id = smcr_next_link_id(lgr);
>  	lnk->lgr = lgr;
> +	smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
>  	lnk->link_idx = link_idx;
>  	smc_ibdev_cnt_inc(lnk);
>  	smcr_copy_dev_info_to_link(lnk);
> @@ -841,6 +842,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
>  	lgr->terminating = 0;
>  	lgr->freeing = 0;
>  	lgr->vlan_id = ini->vlan_id;
> +	refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
>  	mutex_init(&lgr->sndbufs_lock);
>  	mutex_init(&lgr->rmbs_lock);
>  	rwlock_init(&lgr->conns_lock);
> @@ -1120,8 +1122,22 @@ void smc_conn_free(struct smc_connection *conn)
>  {
>  	struct smc_link_group *lgr = conn->lgr;
>  
> -	if (!lgr)
> +	if (!lgr || conn->freed)
> +		/* The connection has never been registered in a
> +		 * link group, or has already been freed.
> +		 *
> +		 * Check to ensure that the refcnt of link group
> +		 * won't be put incorrectly.

I would delete the second sentence here, its obvious enough.

> +		 */
>  		return;
> +
> +	conn->freed = 1;
> +	if (!conn->alert_token_local)
> +		/* The connection was registered in a link group
> +		 * defore, but now it is unregistered from it.

'before' ... But would maybe the following be more exact:

'Connection already unregistered from link group.'


We still review the patches...

> +		 */
> +		goto lgr_put;
> +
>  	if (lgr->is_smcd) {
>  		if (!list_empty(&lgr->list))
>  			smc_ism_unset_conn(conn);
> @@ -1138,6 +1154,8 @@ void smc_conn_free(struct smc_connection *conn)
>  
>  	if (!lgr->conns_num)
>  		smc_lgr_schedule_free_work(lgr);
> +lgr_put:
> +	smc_lgr_put(lgr); /* lgr_hold in smc_lgr_register_conn() */
>  }
>  
>  /* unregister a link from a buf_desc */
> @@ -1209,6 +1227,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log)
>  	smc_ib_destroy_queue_pair(lnk);
>  	smc_ib_dealloc_protection_domain(lnk);
>  	smc_wr_free_link_mem(lnk);
> +	smc_lgr_put(lnk->lgr); /* lgr_hold in smcr_link_init() */
>  	smc_ibdev_cnt_dec(lnk);
>  	put_device(&lnk->smcibdev->ibdev->dev);
>  	smcibdev = lnk->smcibdev;
> @@ -1283,6 +1302,15 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
>  	__smc_lgr_free_bufs(lgr, true);
>  }
>  
> +/* won't be freed until no one accesses to lgr anymore */
> +static void __smc_lgr_free(struct smc_link_group *lgr)
> +{
> +	smc_lgr_free_bufs(lgr);
> +	if (!lgr->is_smcd)
> +		smc_wr_free_lgr_mem(lgr);
> +	kfree(lgr);
> +}
> +
>  /* remove a link group */
>  static void smc_lgr_free(struct smc_link_group *lgr)
>  {
> @@ -1298,7 +1326,6 @@ static void smc_lgr_free(struct smc_link_group *lgr)
>  		smc_llc_lgr_clear(lgr);
>  	}
>  
> -	smc_lgr_free_bufs(lgr);
>  	destroy_workqueue(lgr->tx_wq);
>  	if (lgr->is_smcd) {
>  		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
> @@ -1306,11 +1333,21 @@ static void smc_lgr_free(struct smc_link_group *lgr)
>  		if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
>  			wake_up(&lgr->smcd->lgrs_deleted);
>  	} else {
> -		smc_wr_free_lgr_mem(lgr);
>  		if (!atomic_dec_return(&lgr_cnt))
>  			wake_up(&lgrs_deleted);
>  	}
> -	kfree(lgr);
> +	smc_lgr_put(lgr); /* theoretically last lgr_put */
> +}
> +
> +void smc_lgr_hold(struct smc_link_group *lgr)
> +{
> +	refcount_inc(&lgr->refcnt);
> +}
> +
> +void smc_lgr_put(struct smc_link_group *lgr)
> +{
> +	if (refcount_dec_and_test(&lgr->refcnt))
> +		__smc_lgr_free(lgr);
>  }
>  
>  static void smc_sk_wake_ups(struct smc_sock *smc)
> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
> index 73d0c35..edbd401 100644
> --- a/net/smc/smc_core.h
> +++ b/net/smc/smc_core.h
> @@ -249,6 +249,7 @@ struct smc_link_group {
>  	u8			terminating : 1;/* lgr is terminating */
>  	u8			freeing : 1;	/* lgr is being freed */
>  
> +	refcount_t		refcnt;		/* lgr reference count */
>  	bool			is_smcd;	/* SMC-R or SMC-D */
>  	u8			smc_version;
>  	u8			negotiated_eid[SMC_MAX_EID_LEN];
> @@ -470,6 +471,8 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev,
>  
>  void smc_lgr_cleanup_early(struct smc_link_group *lgr);
>  void smc_lgr_terminate_sched(struct smc_link_group *lgr);
> +void smc_lgr_hold(struct smc_link_group *lgr);
> +void smc_lgr_put(struct smc_link_group *lgr);
>  void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
>  void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
>  void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,

-- 
Karsten

  reply	other threads:[~2022-01-10 12:26 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-10  9:26 [PATCH net 0/3] net/smc: Fixes for race in smc link group termination Wen Gu
2022-01-10  9:26 ` [PATCH net 1/3] net/smc: Resolve the race between link group access and termination Wen Gu
2022-01-10 12:25   ` Karsten Graul [this message]
2022-01-10 13:56     ` Wen Gu
2022-01-11  8:23   ` Karsten Graul
2022-01-11 15:36     ` Wen Gu
2022-01-11 15:46       ` Karsten Graul
2022-01-11 15:51         ` Wen Gu
2022-01-10  9:26 ` [PATCH net 2/3] net/smc: Introduce a new conn->lgr validity check helper Wen Gu
2022-01-10  9:26 ` [PATCH net 3/3] net/smc: Resolve the race between SMC-R link access and clear Wen Gu
2022-01-11  8:40   ` Karsten Graul
2022-01-11 15:49     ` Wen Gu
2022-01-11 16:02       ` Karsten Graul
2022-01-11 16:44         ` Wen Gu
2022-01-11 17:42           ` Karsten Graul

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3525a4cd-1bc7-1008-910b-fb89597cc10a@linux.ibm.com \
    --to=kgraul@linux.ibm.com \
    --cc=davem@davemloft.net \
    --cc=guwen@linux.alibaba.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --subject='Re: [PATCH net 1/3] net/smc: Resolve the race between link group access and termination' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).