LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Yunsheng Lin <linyunsheng@huawei.com>
To: <davem@davemloft.net>, <kuba@kernel.org>
Cc: <alexander.duyck@gmail.com>, <linux@armlinux.org.uk>,
	<mw@semihalf.com>, <linuxarm@openeuler.org>,
	<yisen.zhuang@huawei.com>, <salil.mehta@huawei.com>,
	<thomas.petazzoni@bootlin.com>, <hawk@kernel.org>,
	<ilias.apalodimas@linaro.org>, <ast@kernel.org>,
	<daniel@iogearbox.net>, <john.fastabend@gmail.com>,
	<akpm@linux-foundation.org>, <peterz@infradead.org>,
	<will@kernel.org>, <willy@infradead.org>, <vbabka@suse.cz>,
	<fenghua.yu@intel.com>, <guro@fb.com>, <peterx@redhat.com>,
	<feng.tang@intel.com>, <jgg@ziepe.ca>, <mcroce@microsoft.com>,
	<hughd@google.com>, <jonathan.lemon@gmail.com>, <alobakin@pm.me>,
	<willemb@google.com>, <wenxu@ucloud.cn>,
	<cong.wang@bytedance.com>, <haokexin@gmail.com>,
	<nogikh@google.com>, <elver@google.com>, <yhs@fb.com>,
	<kpsingh@kernel.org>, <andrii@kernel.org>, <kafai@fb.com>,
	<songliubraving@fb.com>, <netdev@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, <bpf@vger.kernel.org>,
	<chenhao288@hisilicon.com>, <edumazet@google.com>,
	<yoshfuji@linux-ipv6.org>, <dsahern@kernel.org>,
	<memxor@gmail.com>, <linux@rempel-privat.de>,
	<atenart@kernel.org>, <weiwan@google.com>, <ap420073@gmail.com>,
	<arnd@arndb.de>, <mathew.j.martineau@linux.intel.com>,
	<aahringo@redhat.com>, <ceggers@arri.de>, <yangbo.lu@nxp.com>,
	<fw@strlen.de>, <xiangxia.m.yue@gmail.com>,
	<linmiaohe@huawei.com>
Subject: [PATCH RFC 1/7] page_pool: refactor the page pool to support multi alloc context
Date: Wed, 18 Aug 2021 11:32:17 +0800	[thread overview]
Message-ID: <1629257542-36145-2-git-send-email-linyunsheng@huawei.com> (raw)
In-Reply-To: <1629257542-36145-1-git-send-email-linyunsheng@huawei.com>

Currently the page pool assumes the caller MUST guarantee safe
non-concurrent access, e.g. softirq for rx.

This patch refactors the page pool to support multi allocation
contexts, in order to support the tx recycling support in the
page pool(tx means 'socket to netdev' here).

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/net/page_pool.h | 10 ++++++
 net/core/page_pool.c    | 86 +++++++++++++++++++++++++++----------------------
 2 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index a408240..8d4ae4b 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -135,6 +135,9 @@ struct page_pool {
 };
 
 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
+struct page *__page_pool_alloc_pages(struct page_pool *pool,
+				     struct pp_alloc_cache *alloc,
+				     gfp_t gfp);
 
 static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
 {
@@ -155,6 +158,13 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
 	return page_pool_alloc_frag(pool, offset, size, gfp);
 }
 
+struct page *page_pool_drain_frag(struct page_pool *pool, struct page *page,
+				  long drain_count);
+void page_pool_free_frag(struct page_pool *pool, struct page *page,
+			 long drain_count);
+void page_pool_empty_alloc_cache_once(struct page_pool *pool,
+				      struct pp_alloc_cache *alloc);
+
 /* get the stored dma direction. A driver might decide to treat this locally and
  * avoid the extra cache line from page_pool to determine the direction
  */
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index e140905..7194dcc 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -110,7 +110,8 @@ EXPORT_SYMBOL(page_pool_create);
 static void page_pool_return_page(struct page_pool *pool, struct page *page);
 
 noinline
-static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
+						 struct pp_alloc_cache *alloc)
 {
 	struct ptr_ring *r = &pool->ring;
 	struct page *page;
@@ -140,7 +141,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
 			break;
 
 		if (likely(page_to_nid(page) == pref_nid)) {
-			pool->alloc.cache[pool->alloc.count++] = page;
+			alloc->cache[alloc->count++] = page;
 		} else {
 			/* NUMA mismatch;
 			 * (1) release 1 page to page-allocator and
@@ -151,27 +152,28 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
 			page = NULL;
 			break;
 		}
-	} while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
+	} while (alloc->count < PP_ALLOC_CACHE_REFILL);
 
 	/* Return last page */
-	if (likely(pool->alloc.count > 0))
-		page = pool->alloc.cache[--pool->alloc.count];
+	if (likely(alloc->count > 0))
+		page = alloc->cache[--alloc->count];
 
 	spin_unlock(&r->consumer_lock);
 	return page;
 }
 
 /* fast path */
-static struct page *__page_pool_get_cached(struct page_pool *pool)
+static struct page *__page_pool_get_cached(struct page_pool *pool,
+					   struct pp_alloc_cache *alloc)
 {
 	struct page *page;
 
 	/* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
-	if (likely(pool->alloc.count)) {
+	if (likely(alloc->count)) {
 		/* Fast-path */
-		page = pool->alloc.cache[--pool->alloc.count];
+		page = alloc->cache[--alloc->count];
 	} else {
-		page = page_pool_refill_alloc_cache(pool);
+		page = page_pool_refill_alloc_cache(pool, alloc);
 	}
 
 	return page;
@@ -252,6 +254,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
 /* slow path */
 noinline
 static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
+						 struct pp_alloc_cache *alloc,
 						 gfp_t gfp)
 {
 	const int bulk = PP_ALLOC_CACHE_REFILL;
@@ -265,13 +268,13 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 		return __page_pool_alloc_page_order(pool, gfp);
 
 	/* Unnecessary as alloc cache is empty, but guarantees zero count */
-	if (unlikely(pool->alloc.count > 0))
-		return pool->alloc.cache[--pool->alloc.count];
+	if (unlikely(alloc->count > 0))
+		return alloc->cache[--alloc->count];
 
 	/* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
-	memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
+	memset(alloc->cache, 0, sizeof(void *) * bulk);
 
-	nr_pages = alloc_pages_bulk_array(gfp, bulk, pool->alloc.cache);
+	nr_pages = alloc_pages_bulk_array(gfp, bulk, alloc->cache);
 	if (unlikely(!nr_pages))
 		return NULL;
 
@@ -279,7 +282,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 	 * page element have not been (possibly) DMA mapped.
 	 */
 	for (i = 0; i < nr_pages; i++) {
-		page = pool->alloc.cache[i];
+		page = alloc->cache[i];
 		if ((pp_flags & PP_FLAG_DMA_MAP) &&
 		    unlikely(!page_pool_dma_map(pool, page))) {
 			put_page(page);
@@ -287,7 +290,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 		}
 
 		page_pool_set_pp_info(pool, page);
-		pool->alloc.cache[pool->alloc.count++] = page;
+		alloc->cache[alloc->count++] = page;
 		/* Track how many pages are held 'in-flight' */
 		pool->pages_state_hold_cnt++;
 		trace_page_pool_state_hold(pool, page,
@@ -295,8 +298,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 	}
 
 	/* Return last page */
-	if (likely(pool->alloc.count > 0))
-		page = pool->alloc.cache[--pool->alloc.count];
+	if (likely(alloc->count > 0))
+		page = alloc->cache[--alloc->count];
 	else
 		page = NULL;
 
@@ -307,19 +310,27 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 /* For using page_pool replace: alloc_pages() API calls, but provide
  * synchronization guarantee for allocation side.
  */
-struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
+struct page *__page_pool_alloc_pages(struct page_pool *pool,
+				     struct pp_alloc_cache *alloc,
+				     gfp_t gfp)
 {
 	struct page *page;
 
 	/* Fast-path: Get a page from cache */
-	page = __page_pool_get_cached(pool);
+	page = __page_pool_get_cached(pool, alloc);
 	if (page)
 		return page;
 
 	/* Slow-path: cache empty, do real allocation */
-	page = __page_pool_alloc_pages_slow(pool, gfp);
+	page = __page_pool_alloc_pages_slow(pool, alloc, gfp);
 	return page;
 }
+EXPORT_SYMBOL(__page_pool_alloc_pages);
+
+struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
+{
+	return __page_pool_alloc_pages(pool, &pool->alloc, gfp);
+}
 EXPORT_SYMBOL(page_pool_alloc_pages);
 
 /* Calculate distance between two u32 values, valid if distance is below 2^(31)
@@ -522,11 +533,9 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 }
 EXPORT_SYMBOL(page_pool_put_page_bulk);
 
-static struct page *page_pool_drain_frag(struct page_pool *pool,
-					 struct page *page)
+struct page *page_pool_drain_frag(struct page_pool *pool, struct page *page,
+				  long drain_count)
 {
-	long drain_count = BIAS_MAX - pool->frag_users;
-
 	/* Some user is still using the page frag */
 	if (likely(page_pool_atomic_sub_frag_count_return(page,
 							  drain_count)))
@@ -543,13 +552,9 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
 	return NULL;
 }
 
-static void page_pool_free_frag(struct page_pool *pool)
+void page_pool_free_frag(struct page_pool *pool, struct page *page,
+			 long drain_count)
 {
-	long drain_count = BIAS_MAX - pool->frag_users;
-	struct page *page = pool->frag_page;
-
-	pool->frag_page = NULL;
-
 	if (!page ||
 	    page_pool_atomic_sub_frag_count_return(page, drain_count))
 		return;
@@ -572,7 +577,8 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,
 	*offset = pool->frag_offset;
 
 	if (page && *offset + size > max_size) {
-		page = page_pool_drain_frag(pool, page);
+		page = page_pool_drain_frag(pool, page,
+					    BIAS_MAX - pool->frag_users);
 		if (page)
 			goto frag_reset;
 	}
@@ -628,26 +634,26 @@ static void page_pool_free(struct page_pool *pool)
 	kfree(pool);
 }
 
-static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
+void page_pool_empty_alloc_cache_once(struct page_pool *pool,
+				      struct pp_alloc_cache *alloc)
 {
 	struct page *page;
 
-	if (pool->destroy_cnt)
-		return;
-
 	/* Empty alloc cache, assume caller made sure this is
 	 * no-longer in use, and page_pool_alloc_pages() cannot be
 	 * call concurrently.
 	 */
-	while (pool->alloc.count) {
-		page = pool->alloc.cache[--pool->alloc.count];
+	while (alloc->count) {
+		page = alloc->cache[--alloc->count];
 		page_pool_return_page(pool, page);
 	}
 }
 
 static void page_pool_scrub(struct page_pool *pool)
 {
-	page_pool_empty_alloc_cache_once(pool);
+	if (!pool->destroy_cnt)
+		page_pool_empty_alloc_cache_once(pool, &pool->alloc);
+
 	pool->destroy_cnt++;
 
 	/* No more consumers should exist, but producers could still
@@ -705,7 +711,9 @@ void page_pool_destroy(struct page_pool *pool)
 	if (!page_pool_put(pool))
 		return;
 
-	page_pool_free_frag(pool);
+	page_pool_free_frag(pool, pool->frag_page,
+			    BIAS_MAX - pool->frag_users);
+	pool->frag_page = NULL;
 
 	if (!page_pool_release(pool))
 		return;
-- 
2.7.4


  reply	other threads:[~2021-08-18  3:33 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-18  3:32 [PATCH RFC 0/7] add socket to netdev page frag recycling support Yunsheng Lin
2021-08-18  3:32 ` Yunsheng Lin [this message]
2021-08-18  3:32 ` [PATCH RFC 2/7] skbuff: add interface to manipulate frag count for tx recycling Yunsheng Lin
2021-08-18  3:32 ` [PATCH RFC 3/7] net: add NAPI api to register and retrieve the page pool ptr Yunsheng Lin
2021-08-18  3:32 ` [PATCH RFC 4/7] net: pfrag_pool: add pfrag pool support based on page pool Yunsheng Lin
2021-08-18  3:32 ` [PATCH RFC 5/7] sock: support refilling pfrag from pfrag_pool Yunsheng Lin
2021-08-18  3:32 ` [PATCH RFC 6/7] net: hns3: support tx recycling in the hns3 driver Yunsheng Lin
2021-08-18  8:57 ` [PATCH RFC 0/7] add socket to netdev page frag recycling support Eric Dumazet
2021-08-18  9:36   ` Yunsheng Lin
2021-08-23  9:25     ` [Linuxarm] " Yunsheng Lin
2021-08-23 15:04       ` Eric Dumazet
2021-08-24  8:03         ` Yunsheng Lin
2021-08-25 16:29         ` David Ahern
2021-08-25 16:32           ` Eric Dumazet
2021-08-25 16:38             ` David Ahern
2021-08-25 17:24               ` Eric Dumazet
2021-08-26  4:05                 ` David Ahern
2021-08-18 22:05 ` David Ahern
2021-08-19  8:18   ` Yunsheng Lin
2021-08-20 14:35     ` David Ahern
2021-08-23  3:32       ` Yunsheng Lin
2021-08-24  3:34         ` David Ahern
2021-08-24  8:41           ` Yunsheng Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1629257542-36145-2-git-send-email-linyunsheng@huawei.com \
    --to=linyunsheng@huawei.com \
    --cc=aahringo@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.duyck@gmail.com \
    --cc=alobakin@pm.me \
    --cc=andrii@kernel.org \
    --cc=ap420073@gmail.com \
    --cc=arnd@arndb.de \
    --cc=ast@kernel.org \
    --cc=atenart@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=ceggers@arri.de \
    --cc=chenhao288@hisilicon.com \
    --cc=cong.wang@bytedance.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=elver@google.com \
    --cc=feng.tang@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=fw@strlen.de \
    --cc=guro@fb.com \
    --cc=haokexin@gmail.com \
    --cc=hawk@kernel.org \
    --cc=hughd@google.com \
    --cc=ilias.apalodimas@linaro.org \
    --cc=jgg@ziepe.ca \
    --cc=john.fastabend@gmail.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linmiaohe@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@armlinux.org.uk \
    --cc=linux@rempel-privat.de \
    --cc=linuxarm@openeuler.org \
    --cc=mathew.j.martineau@linux.intel.com \
    --cc=mcroce@microsoft.com \
    --cc=memxor@gmail.com \
    --cc=mw@semihalf.com \
    --cc=netdev@vger.kernel.org \
    --cc=nogikh@google.com \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=salil.mehta@huawei.com \
    --cc=songliubraving@fb.com \
    --cc=thomas.petazzoni@bootlin.com \
    --cc=vbabka@suse.cz \
    --cc=weiwan@google.com \
    --cc=wenxu@ucloud.cn \
    --cc=will@kernel.org \
    --cc=willemb@google.com \
    --cc=willy@infradead.org \
    --cc=xiangxia.m.yue@gmail.com \
    --cc=yangbo.lu@nxp.com \
    --cc=yhs@fb.com \
    --cc=yisen.zhuang@huawei.com \
    --cc=yoshfuji@linux-ipv6.org \
    --subject='Re: [PATCH RFC 1/7] page_pool: refactor the page pool to support multi alloc context' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).