Netdev Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH net-next] net: mvneta: try to use in-irq pp cache in mvneta_txq_bufs_free
@ 2020-09-25 10:01 Lorenzo Bianconi
2020-09-25 10:52 ` Jesper Dangaard Brouer
0 siblings, 1 reply; 5+ messages in thread
From: Lorenzo Bianconi @ 2020-09-25 10:01 UTC (permalink / raw)
To: netdev; +Cc: davem, kuba, lorenzo.bianconi, brouer, echaudro, thomas.petazzoni
Try to recycle the xdp tx buffer into the in-irq page_pool cache if
mvneta_txq_bufs_free is executed in the NAPI context.
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/ethernet/marvell/mvneta.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 14df3aec285d..646fbf4ed638 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1831,7 +1831,7 @@ static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp,
/* Free tx queue skbuffs */
static void mvneta_txq_bufs_free(struct mvneta_port *pp,
struct mvneta_tx_queue *txq, int num,
- struct netdev_queue *nq)
+ struct netdev_queue *nq, bool napi)
{
unsigned int bytes_compl = 0, pkts_compl = 0;
int i;
@@ -1854,7 +1854,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
dev_kfree_skb_any(buf->skb);
} else if (buf->type == MVNETA_TYPE_XDP_TX ||
buf->type == MVNETA_TYPE_XDP_NDO) {
- xdp_return_frame(buf->xdpf);
+ if (napi)
+ xdp_return_frame_rx_napi(buf->xdpf);
+ else
+ xdp_return_frame(buf->xdpf);
}
}
@@ -1872,7 +1875,7 @@ static void mvneta_txq_done(struct mvneta_port *pp,
if (!tx_done)
return;
- mvneta_txq_bufs_free(pp, txq, tx_done, nq);
+ mvneta_txq_bufs_free(pp, txq, tx_done, nq, true);
txq->count -= tx_done;
@@ -2859,7 +2862,7 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
int tx_done = txq->count;
- mvneta_txq_bufs_free(pp, txq, tx_done, nq);
+ mvneta_txq_bufs_free(pp, txq, tx_done, nq, false);
/* reset txq */
txq->count = 0;
--
2.26.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next] net: mvneta: try to use in-irq pp cache in mvneta_txq_bufs_free
2020-09-25 10:01 [PATCH net-next] net: mvneta: try to use in-irq pp cache in mvneta_txq_bufs_free Lorenzo Bianconi
@ 2020-09-25 10:52 ` Jesper Dangaard Brouer
2020-09-25 11:29 ` Lorenzo Bianconi
0 siblings, 1 reply; 5+ messages in thread
From: Jesper Dangaard Brouer @ 2020-09-25 10:52 UTC (permalink / raw)
To: Lorenzo Bianconi
Cc: netdev, davem, kuba, lorenzo.bianconi, echaudro,
thomas.petazzoni, brouer
On Fri, 25 Sep 2020 12:01:32 +0200
Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> Try to recycle the xdp tx buffer into the in-irq page_pool cache if
> mvneta_txq_bufs_free is executed in the NAPI context.
NACK - I don't think this is safe. That is also why I named the
function postfix rx_napi. The page pool->alloc.cache is associated
with the drivers RX-queue. The xdp_frame's that gets freed could be
coming from a remote driver that use page_pool. This remote drivers
RX-queue processing can run concurrently on a different CPU, than this
drivers TXQ-cleanup.
If you want to speedup this, I instead suggest that you add a
xdp_return_frame_bulk API.
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
> drivers/net/ethernet/marvell/mvneta.c | 11 +++++++----
> 1 file changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
> index 14df3aec285d..646fbf4ed638 100644
> --- a/drivers/net/ethernet/marvell/mvneta.c
> +++ b/drivers/net/ethernet/marvell/mvneta.c
> @@ -1831,7 +1831,7 @@ static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp,
> /* Free tx queue skbuffs */
> static void mvneta_txq_bufs_free(struct mvneta_port *pp,
> struct mvneta_tx_queue *txq, int num,
> - struct netdev_queue *nq)
> + struct netdev_queue *nq, bool napi)
> {
> unsigned int bytes_compl = 0, pkts_compl = 0;
> int i;
> @@ -1854,7 +1854,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
> dev_kfree_skb_any(buf->skb);
> } else if (buf->type == MVNETA_TYPE_XDP_TX ||
> buf->type == MVNETA_TYPE_XDP_NDO) {
> - xdp_return_frame(buf->xdpf);
> + if (napi)
> + xdp_return_frame_rx_napi(buf->xdpf);
> + else
> + xdp_return_frame(buf->xdpf);
> }
> }
>
> @@ -1872,7 +1875,7 @@ static void mvneta_txq_done(struct mvneta_port *pp,
> if (!tx_done)
> return;
>
> - mvneta_txq_bufs_free(pp, txq, tx_done, nq);
> + mvneta_txq_bufs_free(pp, txq, tx_done, nq, true);
>
> txq->count -= tx_done;
>
> @@ -2859,7 +2862,7 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
> struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
> int tx_done = txq->count;
>
> - mvneta_txq_bufs_free(pp, txq, tx_done, nq);
> + mvneta_txq_bufs_free(pp, txq, tx_done, nq, false);
>
> /* reset txq */
> txq->count = 0;
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
LinkedIn: http://www.linkedin.com/in/brouer
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next] net: mvneta: try to use in-irq pp cache in mvneta_txq_bufs_free
2020-09-25 10:52 ` Jesper Dangaard Brouer
@ 2020-09-25 11:29 ` Lorenzo Bianconi
2020-09-25 12:09 ` Jesper Dangaard Brouer
0 siblings, 1 reply; 5+ messages in thread
From: Lorenzo Bianconi @ 2020-09-25 11:29 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: Lorenzo Bianconi, Network Development, David S. Miller,
Jakub Kicinski, Eelco Chaudron, thomas.petazzoni
>
> On Fri, 25 Sep 2020 12:01:32 +0200
> Lorenzo Bianconi <lorenzo@kernel.org> wrote:
>
> > Try to recycle the xdp tx buffer into the in-irq page_pool cache if
> > mvneta_txq_bufs_free is executed in the NAPI context.
>
> NACK - I don't think this is safe. That is also why I named the
> function postfix rx_napi. The page pool->alloc.cache is associated
> with the drivers RX-queue. The xdp_frame's that gets freed could be
> coming from a remote driver that use page_pool. This remote drivers
> RX-queue processing can run concurrently on a different CPU, than this
> drivers TXQ-cleanup.
ack, right. What about if we do it just XDP_TX use case? Like:
if (napi && buf->type == MVNETA_TYPE_XDP_TX)
xdp_return_frame_rx_napi(buf->xdpf);
else
xdp_return_frame(buf->xdpf);
In this way we are sure the packet is coming from local page_pool.
>
> If you want to speedup this, I instead suggest that you add a
> xdp_return_frame_bulk API.
I will look at it
Regards,
Lorenzo
>
>
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> > drivers/net/ethernet/marvell/mvneta.c | 11 +++++++----
> > 1 file changed, 7 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
> > index 14df3aec285d..646fbf4ed638 100644
> > --- a/drivers/net/ethernet/marvell/mvneta.c
> > +++ b/drivers/net/ethernet/marvell/mvneta.c
> > @@ -1831,7 +1831,7 @@ static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp,
> > /* Free tx queue skbuffs */
> > static void mvneta_txq_bufs_free(struct mvneta_port *pp,
> > struct mvneta_tx_queue *txq, int num,
> > - struct netdev_queue *nq)
> > + struct netdev_queue *nq, bool napi)
> > {
> > unsigned int bytes_compl = 0, pkts_compl = 0;
> > int i;
> > @@ -1854,7 +1854,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
> > dev_kfree_skb_any(buf->skb);
> > } else if (buf->type == MVNETA_TYPE_XDP_TX ||
> > buf->type == MVNETA_TYPE_XDP_NDO) {
> > - xdp_return_frame(buf->xdpf);
> > + if (napi)
> > + xdp_return_frame_rx_napi(buf->xdpf);
> > + else
> > + xdp_return_frame(buf->xdpf);
> > }
> > }
> >
> > @@ -1872,7 +1875,7 @@ static void mvneta_txq_done(struct mvneta_port *pp,
> > if (!tx_done)
> > return;
> >
> > - mvneta_txq_bufs_free(pp, txq, tx_done, nq);
> > + mvneta_txq_bufs_free(pp, txq, tx_done, nq, true);
> >
> > txq->count -= tx_done;
> >
> > @@ -2859,7 +2862,7 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
> > struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
> > int tx_done = txq->count;
> >
> > - mvneta_txq_bufs_free(pp, txq, tx_done, nq);
> > + mvneta_txq_bufs_free(pp, txq, tx_done, nq, false);
> >
> > /* reset txq */
> > txq->count = 0;
>
>
>
> --
> Best regards,
> Jesper Dangaard Brouer
> MSc.CS, Principal Kernel Engineer at Red Hat
> LinkedIn: http://www.linkedin.com/in/brouer
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next] net: mvneta: try to use in-irq pp cache in mvneta_txq_bufs_free
2020-09-25 11:29 ` Lorenzo Bianconi
@ 2020-09-25 12:09 ` Jesper Dangaard Brouer
2020-09-25 13:05 ` Lorenzo Bianconi
0 siblings, 1 reply; 5+ messages in thread
From: Jesper Dangaard Brouer @ 2020-09-25 12:09 UTC (permalink / raw)
To: Lorenzo Bianconi
Cc: Lorenzo Bianconi, Network Development, David S. Miller,
Jakub Kicinski, Eelco Chaudron, thomas.petazzoni, brouer
On Fri, 25 Sep 2020 13:29:00 +0200
Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote:
> >
> > On Fri, 25 Sep 2020 12:01:32 +0200
> > Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> >
> > > Try to recycle the xdp tx buffer into the in-irq page_pool cache if
> > > mvneta_txq_bufs_free is executed in the NAPI context.
> >
> > NACK - I don't think this is safe. That is also why I named the
> > function postfix rx_napi. The page pool->alloc.cache is associated
> > with the drivers RX-queue. The xdp_frame's that gets freed could be
> > coming from a remote driver that use page_pool. This remote drivers
> > RX-queue processing can run concurrently on a different CPU, than this
> > drivers TXQ-cleanup.
>
> ack, right. What about if we do it just XDP_TX use case? Like:
>
> if (napi && buf->type == MVNETA_TYPE_XDP_TX)
> xdp_return_frame_rx_napi(buf->xdpf);
> else
> xdp_return_frame(buf->xdpf);
>
> In this way we are sure the packet is coming from local page_pool.
Yes, that case XDP_TX should be safe.
> >
> > If you want to speedup this, I instead suggest that you add a
> > xdp_return_frame_bulk API.
>
> I will look at it
Great!
Notice that bulk return should be easy/obvious in most drivers, as they
(like mvneta in mvneta_txq_bufs_free()) have a loop that process
several TXQ completions.
I did a quick tests on mlx5 with xdp_redirect_map and perf report shows
__xdp_return calls at the top#1 overhead.
# Overhead CPU Symbol
# ........ ... ....................................
#
8.46% 003 [k] __xdp_return
6.41% 003 [k] dma_direct_map_page
4.65% 003 [k] bpf_xdp_redirect_map
4.58% 003 [k] dma_direct_unmap_page
4.04% 003 [k] xdp_do_redirect
3.53% 003 [k] __page_pool_put_page
3.27% 003 [k] dma_direct_sync_single_for_cpu
2.63% 003 [k] dev_map_enqueue
2.28% 003 [k] page_pool_refill_alloc_cache
1.69% 003 [k] bq_enqueue.isra.0
1.15% 003 [k] _raw_spin_lock
0.92% 003 [k] xdp_return_frame
Thus, there will be a benefit from implementing a bulk return. Also
for your XDP_TX case, as the overhead in __xdp_return also exist for
rx_napi variant.
> > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > ---
> > > drivers/net/ethernet/marvell/mvneta.c | 11 +++++++----
> > > 1 file changed, 7 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
> > > index 14df3aec285d..646fbf4ed638 100644
> > > --- a/drivers/net/ethernet/marvell/mvneta.c
> > > +++ b/drivers/net/ethernet/marvell/mvneta.c
> > > @@ -1831,7 +1831,7 @@ static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp,
> > > /* Free tx queue skbuffs */
> > > static void mvneta_txq_bufs_free(struct mvneta_port *pp,
> > > struct mvneta_tx_queue *txq, int num,
> > > - struct netdev_queue *nq)
> > > + struct netdev_queue *nq, bool napi)
> > > {
> > > unsigned int bytes_compl = 0, pkts_compl = 0;
> > > int i;
> > > @@ -1854,7 +1854,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
> > > dev_kfree_skb_any(buf->skb);
> > > } else if (buf->type == MVNETA_TYPE_XDP_TX ||
> > > buf->type == MVNETA_TYPE_XDP_NDO) {
> > > - xdp_return_frame(buf->xdpf);
> > > + if (napi)
> > > + xdp_return_frame_rx_napi(buf->xdpf);
> > > + else
> > > + xdp_return_frame(buf->xdpf);
> > > }
> > > }
> > >
> > > @@ -1872,7 +1875,7 @@ static void mvneta_txq_done(struct mvneta_port *pp,
> > > if (!tx_done)
> > > return;
> > >
> > > - mvneta_txq_bufs_free(pp, txq, tx_done, nq);
> > > + mvneta_txq_bufs_free(pp, txq, tx_done, nq, true);
> > >
> > > txq->count -= tx_done;
> > >
> > > @@ -2859,7 +2862,7 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
> > > struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
> > > int tx_done = txq->count;
> > >
> > > - mvneta_txq_bufs_free(pp, txq, tx_done, nq);
> > > + mvneta_txq_bufs_free(pp, txq, tx_done, nq, false);
> > >
> > > /* reset txq */
> > > txq->count = 0;
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
LinkedIn: http://www.linkedin.com/in/brouer
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next] net: mvneta: try to use in-irq pp cache in mvneta_txq_bufs_free
2020-09-25 12:09 ` Jesper Dangaard Brouer
@ 2020-09-25 13:05 ` Lorenzo Bianconi
0 siblings, 0 replies; 5+ messages in thread
From: Lorenzo Bianconi @ 2020-09-25 13:05 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: Lorenzo Bianconi, Network Development, David S. Miller,
Jakub Kicinski, Eelco Chaudron, thomas.petazzoni
[-- Attachment #1: Type: text/plain, Size: 5235 bytes --]
> On Fri, 25 Sep 2020 13:29:00 +0200
> Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote:
>
> > >
> > > On Fri, 25 Sep 2020 12:01:32 +0200
> > > Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> > >
> > > > Try to recycle the xdp tx buffer into the in-irq page_pool cache if
> > > > mvneta_txq_bufs_free is executed in the NAPI context.
> > >
> > > NACK - I don't think this is safe. That is also why I named the
> > > function postfix rx_napi. The page pool->alloc.cache is associated
> > > with the drivers RX-queue. The xdp_frame's that gets freed could be
> > > coming from a remote driver that use page_pool. This remote drivers
> > > RX-queue processing can run concurrently on a different CPU, than this
> > > drivers TXQ-cleanup.
> >
> > ack, right. What about if we do it just XDP_TX use case? Like:
> >
> > if (napi && buf->type == MVNETA_TYPE_XDP_TX)
> > xdp_return_frame_rx_napi(buf->xdpf);
> > else
> > xdp_return_frame(buf->xdpf);
> >
> > In this way we are sure the packet is coming from local page_pool.
>
> Yes, that case XDP_TX should be safe.
>
> > >
> > > If you want to speedup this, I instead suggest that you add a
> > > xdp_return_frame_bulk API.
> >
> > I will look at it
>
> Great!
>
> Notice that bulk return should be easy/obvious in most drivers, as they
> (like mvneta in mvneta_txq_bufs_free()) have a loop that process
> several TXQ completions.
>
> I did a quick tests on mlx5 with xdp_redirect_map and perf report shows
> __xdp_return calls at the top#1 overhead.
>
> # Overhead CPU Symbol
> # ........ ... ....................................
> #
> 8.46% 003 [k] __xdp_return
> 6.41% 003 [k] dma_direct_map_page
> 4.65% 003 [k] bpf_xdp_redirect_map
> 4.58% 003 [k] dma_direct_unmap_page
> 4.04% 003 [k] xdp_do_redirect
> 3.53% 003 [k] __page_pool_put_page
> 3.27% 003 [k] dma_direct_sync_single_for_cpu
> 2.63% 003 [k] dev_map_enqueue
> 2.28% 003 [k] page_pool_refill_alloc_cache
> 1.69% 003 [k] bq_enqueue.isra.0
> 1.15% 003 [k] _raw_spin_lock
> 0.92% 003 [k] xdp_return_frame
>
> Thus, there will be a benefit from implementing a bulk return. Also
> for your XDP_TX case, as the overhead in __xdp_return also exist for
> rx_napi variant.
ack, I will post a v2 limiting the xdp_return_frame_rx_napi just for XDP_TX
use-case and then I will look at implementing a xdp bulk return.
Regards,
Lorenzo
>
>
> > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > ---
> > > > drivers/net/ethernet/marvell/mvneta.c | 11 +++++++----
> > > > 1 file changed, 7 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
> > > > index 14df3aec285d..646fbf4ed638 100644
> > > > --- a/drivers/net/ethernet/marvell/mvneta.c
> > > > +++ b/drivers/net/ethernet/marvell/mvneta.c
> > > > @@ -1831,7 +1831,7 @@ static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp,
> > > > /* Free tx queue skbuffs */
> > > > static void mvneta_txq_bufs_free(struct mvneta_port *pp,
> > > > struct mvneta_tx_queue *txq, int num,
> > > > - struct netdev_queue *nq)
> > > > + struct netdev_queue *nq, bool napi)
> > > > {
> > > > unsigned int bytes_compl = 0, pkts_compl = 0;
> > > > int i;
> > > > @@ -1854,7 +1854,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp,
> > > > dev_kfree_skb_any(buf->skb);
> > > > } else if (buf->type == MVNETA_TYPE_XDP_TX ||
> > > > buf->type == MVNETA_TYPE_XDP_NDO) {
> > > > - xdp_return_frame(buf->xdpf);
> > > > + if (napi)
> > > > + xdp_return_frame_rx_napi(buf->xdpf);
> > > > + else
> > > > + xdp_return_frame(buf->xdpf);
> > > > }
> > > > }
> > > >
> > > > @@ -1872,7 +1875,7 @@ static void mvneta_txq_done(struct mvneta_port *pp,
> > > > if (!tx_done)
> > > > return;
> > > >
> > > > - mvneta_txq_bufs_free(pp, txq, tx_done, nq);
> > > > + mvneta_txq_bufs_free(pp, txq, tx_done, nq, true);
> > > >
> > > > txq->count -= tx_done;
> > > >
> > > > @@ -2859,7 +2862,7 @@ static void mvneta_txq_done_force(struct mvneta_port *pp,
> > > > struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
> > > > int tx_done = txq->count;
> > > >
> > > > - mvneta_txq_bufs_free(pp, txq, tx_done, nq);
> > > > + mvneta_txq_bufs_free(pp, txq, tx_done, nq, false);
> > > >
> > > > /* reset txq */
> > > > txq->count = 0;
>
> --
> Best regards,
> Jesper Dangaard Brouer
> MSc.CS, Principal Kernel Engineer at Red Hat
> LinkedIn: http://www.linkedin.com/in/brouer
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-09-25 13:05 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-25 10:01 [PATCH net-next] net: mvneta: try to use in-irq pp cache in mvneta_txq_bufs_free Lorenzo Bianconi
2020-09-25 10:52 ` Jesper Dangaard Brouer
2020-09-25 11:29 ` Lorenzo Bianconi
2020-09-25 12:09 ` Jesper Dangaard Brouer
2020-09-25 13:05 ` Lorenzo Bianconi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).