Netdev Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Yunsheng Lin <linyunsheng@huawei.com>
To: <davem@davemloft.net>, <kuba@kernel.org>
Cc: <alexander.duyck@gmail.com>, <linux@armlinux.org.uk>,
	<mw@semihalf.com>, <linuxarm@openeuler.org>,
	<yisen.zhuang@huawei.com>, <salil.mehta@huawei.com>,
	<thomas.petazzoni@bootlin.com>, <hawk@kernel.org>,
	<ilias.apalodimas@linaro.org>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <john.fastabend@gmail.com>,
	<akpm@linux-foundation.org>, <peterz@infradead.org>,
	<will@kernel.org>, <willy@infradead.org>, <vbabka@suse.cz>,
	<fenghua.yu@intel.com>, <guro@fb.com>, <peterx@redhat.com>,
	<feng.tang@intel.com>, <jgg@ziepe.ca>, <mcroce@microsoft.com>,
	<hughd@google.com>, <jonathan.lemon@gmail.com>, <alobakin@pm.me>,
	<willemb@google.com>, <wenxu@ucloud.cn>,
	<cong.wang@bytedance.com>, <haokexin@gmail.com>,
	<nogikh@google.com>, <elver@google.com>, <yhs@fb.com>,
	<kpsingh@kernel.org>, <andrii@kernel.org>, <kafai@fb.com>,
	<songliubraving@fb.com>, <netdev@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, <bpf@vger.kernel.org>
Subject: Re: [Linuxarm] [PATCH rfc v3 3/4] page_pool: add page recycling support based on elevated refcnt
Date: Mon, 12 Jul 2021 17:28:32 +0800	[thread overview]
Message-ID: <7342ad1a-f272-f599-2ce4-e8019acbcbcb@huawei.com> (raw)
In-Reply-To: <1626081581-54524-5-git-send-email-linyunsheng@huawei.com>

Please ignore this one, the title name has been changed to:
"page_pool: add frag page recycling support in page pool".

On 2021/7/12 17:19, Yunsheng Lin wrote:
> Currently page pool only support page recycling only when
> there is only one user of the page, and the split page
> reusing implemented in the most driver can not use the
> page pool as bing-pong way of reusing requires the elevated
> refcnt support.
> 
> Those reusing or recycling has below limitations:
> 1. page from page pool can only be used be one user in order
>    for the page recycling to happen.
> 2. Bing-pong way of reusing in most driver does not support
>    multi desc using different part of the same page in order
>    to save memory.
> 
> So add elevated refcnt support in page pool to in order to
> overcome the above limitation.
> 
> This is a preparation to support allocating page frag in page
> pool.
> 
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/net/page_pool.h |  22 ++++++++-
>  net/core/page_pool.c    | 121 ++++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 129 insertions(+), 14 deletions(-)
> 
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index 84cd972..d9a736f 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -45,7 +45,10 @@
>  					* Please note DMA-sync-for-CPU is still
>  					* device driver responsibility
>  					*/
> -#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
> +#define PP_FLAG_PAGE_FRAG	BIT(2)	/* for page frag feature */
> +#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP |\
> +				 PP_FLAG_DMA_SYNC_DEV |\
> +				 PP_FLAG_PAGE_FRAG)
>  
>  /*
>   * Fast allocation side cache array/stack
> @@ -88,6 +91,9 @@ struct page_pool {
>  	unsigned long defer_warn;
>  
>  	u32 pages_state_hold_cnt;
> +	unsigned int frag_offset;
> +	int frag_bias;
> +	struct page *frag_page;
>  
>  	/*
>  	 * Data structure for allocation side
> @@ -137,6 +143,20 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
>  	return page_pool_alloc_pages(pool, gfp);
>  }
>  
> +struct page *page_pool_alloc_frag(struct page_pool *pool,
> +				  unsigned int *offset,
> +				  unsigned int size,
> +				  gfp_t gfp);
> +
> +static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
> +						    unsigned int *offset,
> +						    unsigned int size)
> +{
> +	gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
> +
> +	return page_pool_alloc_frag(pool, offset, size, gfp);
> +}
> +
>  /* get the stored dma direction. A driver might decide to treat this locally and
>   * avoid the extra cache line from page_pool to determine the direction
>   */
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 1abefc6..9f518dc 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -24,6 +24,8 @@
>  #define DEFER_TIME (msecs_to_jiffies(1000))
>  #define DEFER_WARN_INTERVAL (60 * HZ)
>  
> +#define BIAS_MAX	(PAGE_SIZE - 1)
> +
>  static int page_pool_init(struct page_pool *pool,
>  			  const struct page_pool_params *params)
>  {
> @@ -304,6 +306,33 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
>  	return page;
>  }
>  
> +/* nr could be negative */
> +static int page_pool_atomic_add_bias(struct page *page, int nr)
> +{
> +	unsigned long *bias_ptr = page_pool_pagecnt_bias_ptr(page);
> +	unsigned long old_bias = READ_ONCE(*bias_ptr);
> +	unsigned long new_bias;
> +
> +	do {
> +		int bias = (int)(old_bias & ~PAGE_MASK);
> +
> +		/* Warn when page_pool_dev_alloc_pages() is called
> +		 * with PP_FLAG_PAGE_FRAG flag in driver.
> +		 */
> +		WARN_ON(!bias);
> +
> +		/* already the last user */
> +		if (!(bias + nr))
> +			return 0;
> +
> +		new_bias = old_bias + nr;
> +	} while (!try_cmpxchg(bias_ptr, &old_bias, new_bias));
> +
> +	WARN_ON((new_bias & PAGE_MASK) != (old_bias & PAGE_MASK));
> +
> +	return new_bias & ~PAGE_MASK;
> +}
> +
>  /* For using page_pool replace: alloc_pages() API calls, but provide
>   * synchronization guarantee for allocation side.
>   */
> @@ -425,6 +454,11 @@ static __always_inline struct page *
>  __page_pool_put_page(struct page_pool *pool, struct page *page,
>  		     unsigned int dma_sync_size, bool allow_direct)
>  {
> +	/* It is not the last user for the page frag case */
> +	if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
> +	    page_pool_atomic_add_bias(page, -1))
> +		return NULL;
> +
>  	/* This allocator is optimized for the XDP mode that uses
>  	 * one-frame-per-page, but have fallbacks that act like the
>  	 * regular page allocator APIs.
> @@ -448,19 +482,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
>  		/* Page found as candidate for recycling */
>  		return page;
>  	}
> -	/* Fallback/non-XDP mode: API user have elevated refcnt.
> -	 *
> -	 * Many drivers split up the page into fragments, and some
> -	 * want to keep doing this to save memory and do refcnt based
> -	 * recycling. Support this use case too, to ease drivers
> -	 * switching between XDP/non-XDP.
> -	 *
> -	 * In-case page_pool maintains the DMA mapping, API user must
> -	 * call page_pool_put_page once.  In this elevated refcnt
> -	 * case, the DMA is unmapped/released, as driver is likely
> -	 * doing refcnt based recycle tricks, meaning another process
> -	 * will be invoking put_page.
> -	 */
> +
>  	/* Do not replace this with page_pool_return_page() */
>  	page_pool_release_page(pool, page);
>  	put_page(page);
> @@ -517,6 +539,77 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
>  }
>  EXPORT_SYMBOL(page_pool_put_page_bulk);
>  
> +/* When BIAS_RESERVE to avoid frag page being recycled back to
> + * page pool while the frag page is still in pool->frag_page
> + * waiting for more user. As minimum align size for DMA seems to
> + * be 32, so we support max size of 2047 * 32 for 4K page size.
> + */
> +#define BIAS_RESERVE		((int)(BIAS_MAX / 2 + 1))
> +#define BIAS_NEGATIVE_RESERVE	(0 - BIAS_RESERVE)
> +
> +static struct page *page_pool_drain_frag(struct page_pool *pool,
> +					 struct page *page)
> +{
> +	/* page pool is not the last user */
> +	if (page_pool_atomic_add_bias(page, pool->frag_bias +
> +				      BIAS_NEGATIVE_RESERVE))
> +		return NULL;
> +	else
> +		return page;
> +}
> +
> +static void page_pool_free_frag(struct page_pool *pool)
> +{
> +	struct page *page = pool->frag_page;
> +
> +	if (!page ||
> +	    page_pool_atomic_add_bias(page, pool->frag_bias +
> +				      BIAS_NEGATIVE_RESERVE))
> +		return;
> +
> +	page_pool_return_page(pool, page);
> +	pool->frag_page = NULL;
> +}
> +
> +struct page *page_pool_alloc_frag(struct page_pool *pool,
> +				  unsigned int *offset,
> +				  unsigned int size,
> +				  gfp_t gfp)
> +{
> +	unsigned int max_size = PAGE_SIZE << pool->p.order;
> +	unsigned int frag_offset = pool->frag_offset;
> +	struct page *frag_page = pool->frag_page;
> +
> +	if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
> +		    size > max_size))
> +		return NULL;
> +
> +	size = ALIGN(size, dma_get_cache_alignment());
> +
> +	if (frag_page && frag_offset + size > max_size)
> +		frag_page = page_pool_drain_frag(pool, frag_page);
> +
> +	if (!frag_page) {
> +		frag_page = page_pool_alloc_pages(pool, gfp);
> +		if (unlikely(!frag_page)) {
> +			pool->frag_page = NULL;
> +			return NULL;
> +		}
> +
> +		pool->frag_page = frag_page;
> +		pool->frag_bias = 0;
> +		frag_offset = 0;
> +		page_pool_set_pagecnt_bias(frag_page, BIAS_RESERVE);
> +	}
> +
> +	pool->frag_bias++;
> +	*offset = frag_offset;
> +	pool->frag_offset = frag_offset + size;
> +
> +	return frag_page;
> +}
> +EXPORT_SYMBOL(page_pool_alloc_frag);
> +
>  static void page_pool_empty_ring(struct page_pool *pool)
>  {
>  	struct page *page;
> @@ -622,6 +715,8 @@ void page_pool_destroy(struct page_pool *pool)
>  	if (!page_pool_put(pool))
>  		return;
>  
> +	page_pool_free_frag(pool);
> +
>  	if (!page_pool_release(pool))
>  		return;
>  
> 

  reply	other threads:[~2021-07-12  9:28 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-12  9:19 [PATCH rfc v3 0/4] add frag page support in page pool Yunsheng Lin
2021-07-12  9:19 ` [PATCH rfc v3 1/4] page_pool: keep pp info as long as page pool owns the page Yunsheng Lin
2021-07-12  9:19 ` [PATCH rfc v3 2/4] page_pool: add interface for getting and setting pagecnt_bias Yunsheng Lin
2021-07-12  9:19 ` [PATCH rfc v3 3/4] page_pool: add frag page recycling support in page pool Yunsheng Lin
2021-07-12  9:19 ` [PATCH rfc v3 3/4] page_pool: add page recycling support based on elevated refcnt Yunsheng Lin
2021-07-12  9:28   ` Yunsheng Lin [this message]
2021-07-12  9:19 ` [PATCH rfc v3 4/4] net: hns3: support skb's frag page recycling based on page pool Yunsheng Lin
2021-07-12 10:39 ` [Linuxarm] [PATCH rfc v3 0/4] add frag page support in " Yunsheng Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7342ad1a-f272-f599-2ce4-e8019acbcbcb@huawei.com \
    --to=linyunsheng@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.duyck@gmail.com \
    --cc=alobakin@pm.me \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=cong.wang@bytedance.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=elver@google.com \
    --cc=feng.tang@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=guro@fb.com \
    --cc=haokexin@gmail.com \
    --cc=hawk@kernel.org \
    --cc=hughd@google.com \
    --cc=ilias.apalodimas@linaro.org \
    --cc=jgg@ziepe.ca \
    --cc=john.fastabend@gmail.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@armlinux.org.uk \
    --cc=linuxarm@openeuler.org \
    --cc=mcroce@microsoft.com \
    --cc=mw@semihalf.com \
    --cc=netdev@vger.kernel.org \
    --cc=nogikh@google.com \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=salil.mehta@huawei.com \
    --cc=songliubraving@fb.com \
    --cc=thomas.petazzoni@bootlin.com \
    --cc=vbabka@suse.cz \
    --cc=wenxu@ucloud.cn \
    --cc=will@kernel.org \
    --cc=willemb@google.com \
    --cc=willy@infradead.org \
    --cc=yhs@fb.com \
    --cc=yisen.zhuang@huawei.com \
    --subject='Re: [Linuxarm] [PATCH rfc v3 3/4] page_pool: add page recycling support based on elevated refcnt' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).