LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
netdev@vger.kernel.org, trond.myklebust@fys.uio.no
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 16/28] netvm: INET reserves.
Date: Wed, 20 Feb 2008 15:46:26 +0100 [thread overview]
Message-ID: <20080220150307.332560000@chello.nl> (raw)
In-Reply-To: <20080220144610.548202000@chello.nl>
[-- Attachment #1: netvm-reserve-inet.patch --]
[-- Type: text/plain, Size: 12009 bytes --]
Add reserves for INET.
The two big users seem to be the route cache and ip-fragment cache.
Reserve the route cache under generic RX reserve, its usage is bounded by
the high reclaim watermark, and thus does not need further accounting.
Reserve the ip-fragement caches under SKB data reserve, these add to the
SKB RX limit. By ensuring we can at least receive as much data as fits in
the reassmbly line we avoid fragment attack deadlocks.
Use proc conv() routines to update these limits and return -ENOMEM to user
space.
Adds to the reserve tree:
total network reserve
network TX reserve
protocol TX pages
network RX reserve
+ IPv6 route cache
+ IPv4 route cache
SKB data reserve
+ IPv6 fragment cache
+ IPv4 fragment cache
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
net/ipv4/ip_fragment.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++--
net/ipv4/route.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++--
net/ipv6/reassembly.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++--
net/ipv6/route.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++--
4 files changed, 252 insertions(+), 8 deletions(-)
Index: linux-2.6/net/ipv4/ip_fragment.c
===================================================================
--- linux-2.6.orig/net/ipv4/ip_fragment.c
+++ linux-2.6/net/ipv4/ip_fragment.c
@@ -44,6 +44,7 @@
#include <linux/udp.h>
#include <linux/inet.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/reserve.h>
/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -591,17 +592,72 @@ int ip_defrag(struct sk_buff *skb, u32 u
return -ENOMEM;
}
+static struct mem_reserve ipv4_frag_reserve;
+
#ifdef CONFIG_SYSCTL
+static int ipv4_frag_bytes;
+
+static int proc_dointvec_fragment(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int old_bytes, ret;
+
+ if (!write)
+ ipv4_frag_bytes = init_net.ipv4.frags.high_thresh;
+ old_bytes = ipv4_frag_bytes;
+
+ ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+
+ if (!ret && write) {
+ ret = mem_reserve_kmalloc_set(&ipv4_frag_reserve,
+ ipv4_frag_bytes);
+ if (!ret)
+ init_net.ipv4.frags.high_thresh = ipv4_frag_bytes;
+ else
+ ipv4_frag_bytes = old_bytes;
+ }
+
+ return ret;
+}
+
+static int sysctl_intvec_fragment(struct ctl_table *table,
+ int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+{
+ int old_bytes, ret;
+ int write = (newval && newlen);
+
+ if (!write)
+ ipv4_frag_bytes = init_net.ipv4.frags.high_thresh;
+ old_bytes = ipv4_frag_bytes;
+
+ ret = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
+
+ if (!ret && write) {
+ ret = mem_reserve_kmalloc_set(&ipv4_frag_reserve,
+ ipv4_frag_bytes);
+ if (!ret)
+ init_net.ipv4.frags.high_thresh = ipv4_frag_bytes;
+ else
+ ipv4_frag_bytes = old_bytes;
+ }
+
+ return ret;
+}
+
static int zero;
static struct ctl_table ip4_frags_ctl_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
.procname = "ipfrag_high_thresh",
- .data = &init_net.ipv4.frags.high_thresh,
+ .data = &ipv4_frag_bytes,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_fragment,
+ .strategy = &sysctl_intvec_fragment,
},
{
.ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
@@ -736,6 +792,11 @@ void __init ipfrag_init(void)
ip4_frags.frag_expire = ip_expire;
ip4_frags.secret_interval = 10 * 60 * HZ;
inet_frags_init(&ip4_frags);
+
+ mem_reserve_init(&ipv4_frag_reserve, "IPv4 fragment cache",
+ &net_skb_reserve);
+ mem_reserve_kmalloc_set(&ipv4_frag_reserve,
+ init_net.ipv4.frags.high_thresh);
}
EXPORT_SYMBOL(ip_defrag);
Index: linux-2.6/net/ipv6/reassembly.c
===================================================================
--- linux-2.6.orig/net/ipv6/reassembly.c
+++ linux-2.6/net/ipv6/reassembly.c
@@ -43,6 +43,7 @@
#include <linux/random.h>
#include <linux/jhash.h>
#include <linux/skbuff.h>
+#include <linux/reserve.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -628,15 +629,70 @@ static struct inet6_protocol frag_protoc
.flags = INET6_PROTO_NOPOLICY,
};
+static struct mem_reserve ipv6_frag_reserve;
+
#ifdef CONFIG_SYSCTL
+static int ipv6_frag_bytes;
+
+static int proc_dointvec_fragment(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int old_bytes, ret;
+
+ if (!write)
+ ipv6_frag_bytes = init_net.ipv6.frags.high_thresh;
+ old_bytes = ipv6_frag_bytes;
+
+ ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+
+ if (!ret && write) {
+ ret = mem_reserve_kmalloc_set(&ipv6_frag_reserve,
+ ipv6_frag_bytes);
+ if (!ret)
+ init_net.ipv6.frags.high_thresh = ipv6_frag_bytes;
+ else
+ ipv6_frag_bytes = old_bytes;
+ }
+
+ return ret;
+}
+
+static int sysctl_intvec_fragment(struct ctl_table *table,
+ int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+{
+ int old_bytes, ret;
+ int write = (newval && newlen);
+
+ if (!write)
+ ipv6_frag_bytes = init_net.ipv6.frags.high_thresh;
+ old_bytes = ipv6_frag_bytes;
+
+ ret = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
+
+ if (!ret && write) {
+ ret = mem_reserve_kmalloc_set(&ipv6_frag_reserve,
+ ipv6_frag_bytes);
+ if (!ret)
+ init_net.ipv6.frags.high_thresh = ipv6_frag_bytes;
+ else
+ ipv6_frag_bytes = old_bytes;
+ }
+
+ return ret;
+}
+
static struct ctl_table ip6_frags_ctl_table[] = {
{
.ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
.procname = "ip6frag_high_thresh",
- .data = &init_net.ipv6.frags.high_thresh,
+ .data = &ipv6_frag_bytes,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_fragment,
+ .strategy = &sysctl_intvec_fragment,
},
{
.ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
@@ -758,6 +814,11 @@ int __init ipv6_frag_init(void)
ip6_frags.frag_expire = ip6_frag_expire;
ip6_frags.secret_interval = 10 * 60 * HZ;
inet_frags_init(&ip6_frags);
+
+ mem_reserve_init(&ipv6_frag_reserve, "IPv6 fragment cache",
+ &net_skb_reserve);
+ mem_reserve_kmalloc_set(&ipv6_frag_reserve,
+ init_net.ipv6.frags.high_thresh);
out:
return ret;
}
Index: linux-2.6/net/ipv4/route.c
===================================================================
--- linux-2.6.orig/net/ipv4/route.c
+++ linux-2.6/net/ipv4/route.c
@@ -109,6 +109,7 @@
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
+#include <linux/reserve.h>
#define RT_FL_TOS(oldflp) \
((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
@@ -2794,6 +2795,8 @@ void ip_rt_multicast_event(struct in_dev
rt_cache_flush(0);
}
+static struct mem_reserve ipv4_route_reserve;
+
#ifdef CONFIG_SYSCTL
static int flush_delay;
@@ -2827,6 +2830,58 @@ static int ipv4_sysctl_rtcache_flush_str
return 0;
}
+static int ipv4_route_size;
+
+static int proc_dointvec_route(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int old_size, ret;
+
+ if (!write)
+ ipv4_route_size = ip_rt_max_size;
+ old_size = ipv4_route_size;
+
+ ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+
+ if (!ret && write) {
+ ret = mem_reserve_kmem_cache_set(&ipv4_route_reserve,
+ ipv4_dst_ops.kmem_cachep, ipv4_route_size);
+ if (!ret)
+ ip_rt_max_size = ipv4_route_size;
+ else
+ ipv4_route_size = old_size;
+ }
+
+ return ret;
+}
+
+static int sysctl_intvec_route(struct ctl_table *table,
+ int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+{
+ int old_size, ret;
+ int write = (newval && newlen);
+
+ if (!write)
+ ipv4_route_size = ip_rt_max_size;
+ old_size = ipv4_route_size;
+
+ ret = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
+
+ if (!ret && write) {
+ ret = mem_reserve_kmem_cache_set(&ipv4_route_reserve,
+ ipv4_dst_ops.kmem_cachep, ipv4_route_size);
+ if (!ret)
+ ip_rt_max_size = ipv4_route_size;
+ else
+ ipv4_route_size = old_size;
+ }
+
+ return ret;
+}
+
ctl_table ipv4_route_table[] = {
{
.ctl_name = NET_IPV4_ROUTE_FLUSH,
@@ -2848,10 +2903,11 @@ ctl_table ipv4_route_table[] = {
{
.ctl_name = NET_IPV4_ROUTE_MAX_SIZE,
.procname = "max_size",
- .data = &ip_rt_max_size,
+ .data = &ipv4_route_size,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_dointvec_route,
+ .strategy = &sysctl_intvec_route,
},
{
/* Deprecated. Use gc_min_interval_ms */
@@ -3026,6 +3082,11 @@ int __init ip_rt_init(void)
ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
ip_rt_max_size = (rt_hash_mask + 1) * 16;
+ mem_reserve_init(&ipv4_route_reserve, "IPv4 route cache",
+ &net_rx_reserve);
+ mem_reserve_kmem_cache_set(&ipv4_route_reserve,
+ ipv4_dst_ops.kmem_cachep, ip_rt_max_size);
+
devinet_init();
ip_fib_init();
Index: linux-2.6/net/ipv6/route.c
===================================================================
--- linux-2.6.orig/net/ipv6/route.c
+++ linux-2.6/net/ipv6/route.c
@@ -38,6 +38,7 @@
#include <linux/in6.h>
#include <linux/init.h>
#include <linux/if_arp.h>
+#include <linux/reserve.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/net_namespace.h>
@@ -2391,6 +2392,8 @@ static inline void ipv6_route_proc_fini(
}
#endif /* CONFIG_PROC_FS */
+static struct mem_reserve ipv6_route_reserve;
+
#ifdef CONFIG_SYSCTL
static
@@ -2406,6 +2409,58 @@ int ipv6_sysctl_rtcache_flush(ctl_table
return -EINVAL;
}
+static int ipv6_route_size;
+
+static int proc_dointvec_route(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int old_size, ret;
+
+ if (!write)
+ ipv6_route_size = ip6_rt_max_size;
+ old_size = ipv6_route_size;
+
+ ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+
+ if (!ret && write) {
+ ret = mem_reserve_kmem_cache_set(&ipv6_route_reserve,
+ ip6_dst_ops.kmem_cachep, ipv6_route_size);
+ if (!ret)
+ ip6_rt_max_size = ipv6_route_size;
+ else
+ ipv6_route_size = old_size;
+ }
+
+ return ret;
+}
+
+static int sysctl_intvec_route(struct ctl_table *table,
+ int __user *name, int nlen,
+ void __user *oldval, size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
+{
+ int old_size, ret;
+ int write = (newval && newlen);
+
+ if (!write)
+ ipv6_route_size = ip6_rt_max_size;
+ old_size = ipv6_route_size;
+
+ ret = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
+
+ if (!ret && write) {
+ ret = mem_reserve_kmem_cache_set(&ipv6_route_reserve,
+ ip6_dst_ops.kmem_cachep, ipv6_route_size);
+ if (!ret)
+ ip6_rt_max_size = ipv6_route_size;
+ else
+ ipv6_route_size = old_size;
+ }
+
+ return ret;
+}
+
ctl_table ipv6_route_table_template[] = {
{
.procname = "flush",
@@ -2425,10 +2480,11 @@ ctl_table ipv6_route_table_template[] =
{
.ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
.procname = "max_size",
- .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
+ .data = &ipv6_route_size,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_dointvec_route,
+ .strategy = &sysctl_intvec_route,
},
{
.ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
@@ -2519,6 +2575,11 @@ int __init ip6_route_init(void)
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
+ mem_reserve_init(&ipv6_route_reserve, "IPv6 route cache",
+ &net_rx_reserve);
+ mem_reserve_kmem_cache_set(&ipv6_route_reserve,
+ ip6_dst_ops.kmem_cachep, ip6_rt_max_size);
+
ret = fib6_init();
if (ret)
goto out_kmem_cache;
--
next prev parent reply other threads:[~2008-02-20 15:31 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-02-20 14:46 [PATCH 00/28] Swap over NFS -v16 Peter Zijlstra
2008-02-20 14:46 ` [PATCH 01/28] mm: gfp_to_alloc_flags() Peter Zijlstra
2008-02-20 14:46 ` [PATCH 02/28] mm: tag reseve pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 03/28] mm: slb: add knowledge of reserve pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 04/28] mm: kmem_estimate_pages() Peter Zijlstra
2008-02-23 8:05 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 05/28] mm: allow PF_MEMALLOC from softirq context Peter Zijlstra
2008-02-23 8:05 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 06/28] mm: serialize access to min_free_kbytes Peter Zijlstra
2008-02-20 14:46 ` [PATCH 07/28] mm: emergency pool Peter Zijlstra
2008-02-23 8:05 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 08/28] mm: system wide ALLOC_NO_WATERMARK Peter Zijlstra
2008-02-23 8:05 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 09/28] mm: __GFP_MEMALLOC Peter Zijlstra
2008-02-23 8:06 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 10/28] mm: memory reserve management Peter Zijlstra
2008-02-23 8:06 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 11/28] selinux: tag avc cache alloc as non-critical Peter Zijlstra
2008-02-20 14:46 ` [PATCH 12/28] net: wrap sk->sk_backlog_rcv() Peter Zijlstra
2008-02-20 14:46 ` [PATCH 13/28] net: packet split receive api Peter Zijlstra
2008-02-20 14:46 ` [PATCH 14/28] net: sk_allocation() - concentrate socket related allocations Peter Zijlstra
2008-02-20 14:46 ` [PATCH 15/28] netvm: network reserve infrastructure Peter Zijlstra
2008-02-23 8:06 ` Andrew Morton
2008-02-24 6:52 ` Mike Snitzer
2008-02-20 14:46 ` Peter Zijlstra [this message]
2008-02-20 14:46 ` [PATCH 17/28] netvm: hook skb allocation to reserves Peter Zijlstra
2008-02-23 8:06 ` Andrew Morton
2008-02-20 14:46 ` [PATCH 18/28] netvm: filter emergency skbs Peter Zijlstra
2008-02-20 14:46 ` [PATCH 19/28] netvm: prevent a stream specific deadlock Peter Zijlstra
2008-02-20 14:46 ` [PATCH 20/28] netfilter: NF_QUEUE vs emergency skbs Peter Zijlstra
2008-02-20 14:46 ` [PATCH 21/28] netvm: skb processing Peter Zijlstra
2008-02-20 14:46 ` [PATCH 22/28] mm: add support for non block device backed swap files Peter Zijlstra
2008-02-20 16:30 ` Randy Dunlap
2008-02-20 16:46 ` Peter Zijlstra
2008-02-26 12:45 ` Miklos Szeredi
2008-02-26 12:58 ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 23/28] mm: methods for teaching filesystems about PG_swapcache pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 24/28] nfs: remove mempools Peter Zijlstra
2008-02-20 14:46 ` [PATCH 25/28] nfs: teach the NFS client how to treat PG_swapcache pages Peter Zijlstra
2008-02-20 14:46 ` [PATCH 26/28] nfs: disable data cache revalidation for swapfiles Peter Zijlstra
2008-02-20 14:46 ` [PATCH 27/28] nfs: enable swap on NFS Peter Zijlstra
2008-02-20 14:46 ` [PATCH 28/28] nfs: fix various memory recursions possible with swap over NFS Peter Zijlstra
2008-02-23 8:06 ` [PATCH 00/28] Swap over NFS -v16 Andrew Morton
2008-02-26 6:03 ` Neil Brown
2008-02-26 10:50 ` Peter Zijlstra
2008-02-26 12:00 ` Peter Zijlstra
2008-02-26 15:29 ` Miklos Szeredi
2008-02-26 15:41 ` Peter Zijlstra
2008-02-26 15:43 ` Peter Zijlstra
2008-02-26 15:47 ` Miklos Szeredi
2008-02-26 17:56 ` Andrew Morton
2008-02-27 5:51 ` Neil Brown
2008-02-27 7:58 ` Peter Zijlstra
2008-02-27 8:05 ` Pekka Enberg
2008-02-27 8:14 ` Peter Zijlstra
2008-02-27 8:33 ` Peter Zijlstra
2008-02-27 8:43 ` Pekka J Enberg
2008-02-29 11:51 ` Peter Zijlstra
2008-02-29 11:58 ` Pekka Enberg
2008-02-29 12:18 ` Peter Zijlstra
2008-02-29 12:29 ` Pekka Enberg
2008-02-29 1:29 ` Neil Brown
2008-02-29 10:21 ` Peter Zijlstra
2008-03-02 22:18 ` Neil Brown
2008-03-02 23:33 ` Peter Zijlstra
2008-03-03 23:41 ` Neil Brown
2008-03-04 10:28 ` Peter Zijlstra
[not found] ` <1837 <1204626509.6241.39.camel@lappy>
2008-03-07 3:33 ` Neil Brown
2008-03-07 11:17 ` Peter Zijlstra
2008-03-07 11:55 ` Peter Zijlstra
2008-03-10 5:15 ` Neil Brown
2008-03-10 9:17 ` Peter Zijlstra
2008-03-14 5:22 ` Neil Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080220150307.332560000@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=netdev@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=trond.myklebust@fys.uio.no \
--subject='Re: [PATCH 16/28] netvm: INET reserves.' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).