LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
netdev@vger.kernel.org, trond.myklebust@fys.uio.no
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 21/28] netvm: skb processing
Date: Wed, 20 Feb 2008 15:46:31 +0100 [thread overview]
Message-ID: <20080220150307.968389000@chello.nl> (raw)
In-Reply-To: <20080220144610.548202000@chello.nl>
[-- Attachment #1: netvm.patch --]
[-- Type: text/plain, Size: 4836 bytes --]
In order to make sure emergency packets receive all memory needed to proceed
ensure processing of emergency SKBs happens under PF_MEMALLOC.
Use the (new) sk_backlog_rcv() wrapper to ensure this for backlog processing.
Skip taps, since those are user-space again.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/net/sock.h | 5 ++++
net/core/dev.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++------
net/core/sock.c | 18 ++++++++++++++++
3 files changed, 76 insertions(+), 6 deletions(-)
Index: linux-2.6/net/core/dev.c
===================================================================
--- linux-2.6.orig/net/core/dev.c
+++ linux-2.6/net/core/dev.c
@@ -2004,6 +2004,30 @@ out:
}
#endif
+/*
+ * Filter the protocols for which the reserves are adequate.
+ *
+ * Before adding a protocol make sure that it is either covered by the existing
+ * reserves, or add reserves covering the memory need of the new protocol's
+ * packet processing.
+ */
+static int skb_emergency_protocol(struct sk_buff *skb)
+{
+ if (skb_emergency(skb))
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_ARP):
+ case __constant_htons(ETH_P_IP):
+ case __constant_htons(ETH_P_IPV6):
+ case __constant_htons(ETH_P_8021Q):
+ break;
+
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
@@ -2025,10 +2049,23 @@ int netif_receive_skb(struct sk_buff *sk
struct net_device *orig_dev;
int ret = NET_RX_DROP;
__be16 type;
+ unsigned long pflags = current->flags;
+
+ /* Emergency skb are special, they should
+ * - be delivered to SOCK_MEMALLOC sockets only
+ * - stay away from userspace
+ * - have bounded memory usage
+ *
+ * Use PF_MEMALLOC as a poor mans memory pool - the grouping kind.
+ * This saves us from propagating the allocation context down to all
+ * allocation sites.
+ */
+ if (skb_emergency(skb))
+ current->flags |= PF_MEMALLOC;
/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
- return NET_RX_DROP;
+ goto out;
if (!skb->tstamp.tv64)
net_timestamp(skb);
@@ -2039,7 +2076,7 @@ int netif_receive_skb(struct sk_buff *sk
orig_dev = skb_bond(skb);
if (!orig_dev)
- return NET_RX_DROP;
+ goto out;
__get_cpu_var(netdev_rx_stat).total++;
@@ -2058,6 +2095,9 @@ int netif_receive_skb(struct sk_buff *sk
}
#endif
+ if (skb_emergency(skb))
+ goto skip_taps;
+
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
@@ -2066,19 +2106,23 @@ int netif_receive_skb(struct sk_buff *sk
}
}
+skip_taps:
#ifdef CONFIG_NET_CLS_ACT
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
- goto out;
+ goto unlock;
ncls:
#endif
+ if (!skb_emergency_protocol(skb))
+ goto drop;
+
skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
if (!skb)
- goto out;
+ goto unlock;
skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
if (!skb)
- goto out;
+ goto unlock;
type = skb->protocol;
list_for_each_entry_rcu(ptype,
@@ -2094,6 +2138,7 @@ ncls:
if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
+drop:
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
@@ -2101,8 +2146,10 @@ ncls:
ret = NET_RX_DROP;
}
-out:
+unlock:
rcu_read_unlock();
+out:
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
return ret;
}
Index: linux-2.6/include/net/sock.h
===================================================================
--- linux-2.6.orig/include/net/sock.h
+++ linux-2.6/include/net/sock.h
@@ -512,8 +512,13 @@ static inline void sk_add_backlog(struct
skb->next = NULL;
}
+extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+
static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
+ if (skb_emergency(skb))
+ return __sk_backlog_rcv(sk, skb);
+
return sk->sk_backlog_rcv(sk, skb);
}
Index: linux-2.6/net/core/sock.c
===================================================================
--- linux-2.6.orig/net/core/sock.c
+++ linux-2.6/net/core/sock.c
@@ -319,6 +319,24 @@ int sk_clear_memalloc(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_clear_memalloc);
+#ifdef CONFIG_NETVM
+int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+ unsigned long pflags = current->flags;
+
+ /* these should have been dropped before queueing */
+ BUG_ON(!sk_has_memalloc(sk));
+
+ current->flags |= PF_MEMALLOC;
+ ret = sk->sk_backlog_rcv(sk, skb);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
+
+ return ret;
+}
+EXPORT_SYMBOL(__sk_backlog_rcv);
+#endif
+
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
struct timeval tv;
--
next prev parent reply other threads:[~2008-02-20 15:27 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-02-20 14:46 [PATCH 00/28] Swap over NFS -v16 Peter Zijlstra
2008-02-20 14:46 ` [PATCH 01/28] mm: gfp_to_alloc_flags() Peter Zijlstra
2008-02-20 14:46 ` [PATCH 02/28] mm: tag reseve pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 03/28] mm: slb: add knowledge of reserve pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 04/28] mm: kmem_estimate_pages() Peter Zijlstra
2008-02-23 8:05 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 05/28] mm: allow PF_MEMALLOC from softirq context Peter Zijlstra
2008-02-23 8:05 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 06/28] mm: serialize access to min_free_kbytes Peter Zijlstra
2008-02-20 14:46 ` [PATCH 07/28] mm: emergency pool Peter Zijlstra
2008-02-23 8:05 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 08/28] mm: system wide ALLOC_NO_WATERMARK Peter Zijlstra
2008-02-23 8:05 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 09/28] mm: __GFP_MEMALLOC Peter Zijlstra
2008-02-23 8:06 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 10/28] mm: memory reserve management Peter Zijlstra
2008-02-23 8:06 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 11/28] selinux: tag avc cache alloc as non-critical Peter Zijlstra
2008-02-20 14:46 ` [PATCH 12/28] net: wrap sk->sk_backlog_rcv() Peter Zijlstra
2008-02-20 14:46 ` [PATCH 13/28] net: packet split receive api Peter Zijlstra
2008-02-20 14:46 ` [PATCH 14/28] net: sk_allocation() - concentrate socket related allocations Peter Zijlstra
2008-02-20 14:46 ` [PATCH 15/28] netvm: network reserve infrastructure Peter Zijlstra
2008-02-23 8:06 ` Andrew Morton
2008-02-24 6:52 ` Mike Snitzer
2008-02-20 14:46 ` [PATCH 16/28] netvm: INET reserves Peter Zijlstra
2008-02-20 14:46 ` [PATCH 17/28] netvm: hook skb allocation to reserves Peter Zijlstra
2008-02-23 8:06 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 18/28] netvm: filter emergency skbs Peter Zijlstra
2008-02-20 14:46 ` [PATCH 19/28] netvm: prevent a stream specific deadlock Peter Zijlstra
2008-02-20 14:46 ` [PATCH 20/28] netfilter: NF_QUEUE vs emergency skbs Peter Zijlstra
2008-02-20 14:46 ` Peter Zijlstra [this message]
2008-02-20 14:46 ` [PATCH 22/28] mm: add support for non block device backed swap files Peter Zijlstra
2008-02-20 16:30 ` Randy Dunlap
2008-02-20 16:46 ` Peter Zijlstra
2008-02-26 12:45 ` Miklos Szeredi
2008-02-26 12:58 ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 23/28] mm: methods for teaching filesystems about PG_swapcache pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 24/28] nfs: remove mempools Peter Zijlstra
2008-02-20 14:46 ` [PATCH 25/28] nfs: teach the NFS client how to treat PG_swapcache pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 26/28] nfs: disable data cache revalidation for swapfiles Peter Zijlstra
2008-02-20 14:46 ` [PATCH 27/28] nfs: enable swap on NFS Peter Zijlstra
2008-02-20 14:46 ` [PATCH 28/28] nfs: fix various memory recursions possible with swap over NFS Peter Zijlstra
2008-02-23 8:06 ` [PATCH 00/28] Swap over NFS -v16 Andrew Morton
2008-02-26 6:03 ` Neil Brown
2008-02-26 10:50 ` Peter Zijlstra
2008-02-26 12:00 ` Peter Zijlstra
2008-02-26 15:29 ` Miklos Szeredi
2008-02-26 15:41 ` Peter Zijlstra
2008-02-26 15:43 ` Peter Zijlstra
2008-02-26 15:47 ` Miklos Szeredi
2008-02-26 17:56 ` Andrew Morton
2008-02-27 5:51 ` Neil Brown
2008-02-27 7:58 ` Peter Zijlstra
2008-02-27 8:05 ` Pekka Enberg
2008-02-27 8:14 ` Peter Zijlstra
2008-02-27 8:33 ` Peter Zijlstra
2008-02-27 8:43 ` Pekka J Enberg
2008-02-29 11:51 ` Peter Zijlstra
2008-02-29 11:58 ` Pekka Enberg
2008-02-29 12:18 ` Peter Zijlstra
2008-02-29 12:29 ` Pekka Enberg
2008-02-29 1:29 ` Neil Brown
2008-02-29 10:21 ` Peter Zijlstra
2008-03-02 22:18 ` Neil Brown
2008-03-02 23:33 ` Peter Zijlstra
2008-03-03 23:41 ` Neil Brown
2008-03-04 10:28 ` Peter Zijlstra
[not found] ` <1837 <1204626509.6241.39.camel@lappy>
2008-03-07 3:33 ` Neil Brown
2008-03-07 11:17 ` Peter Zijlstra
2008-03-07 11:55 ` Peter Zijlstra
2008-03-10 5:15 ` Neil Brown
2008-03-10 9:17 ` Peter Zijlstra
2008-03-14 5:22 ` Neil Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080220150307.968389000@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=netdev@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=trond.myklebust@fys.uio.no \
--subject='Re: [PATCH 21/28] netvm: skb processing' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).