LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
@ 2015-02-23 15:26 Imre Palik
2015-02-23 16:06 ` Florian Westphal
0 siblings, 1 reply; 13+ messages in thread
From: Imre Palik @ 2015-02-23 15:26 UTC (permalink / raw)
To: bridge, Stephen Hemminger
Cc: David S. Miller, netdev, linux-kernel, fw, Palik, Imre, Anthony Liguori
From: "Palik, Imre" <imrep@amazon.de>
The netfilter code is made with flexibility instead of performance in mind.
So when all we want is to pass packets between different interfaces, the
performance penalty of hitting netfilter code can be considerable, even when
all the firewalling is disabled for the bridge.
This change makes it possible to disable netfilter on a per bridge basis.
In the case interesting to us, this can lead to more than 15% speedup
compared to the case when only bridge-iptables is disabled.
Cc: Anthony Liguori <aliguori@amazon.com>
Signed-off-by: Imre Palik <imrep@amazon.de>
---
net/bridge/br_device.c | 2 ++
net/bridge/br_forward.c | 23 +++++++++++++++++++++--
net/bridge/br_input.c | 4 ++--
net/bridge/br_private.h | 1 +
net/bridge/br_sysfs_br.c | 23 +++++++++++++++++++++++
5 files changed, 49 insertions(+), 4 deletions(-)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ffd379d..6fb0343 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -381,6 +381,8 @@ void br_dev_setup(struct net_device *dev)
ether_addr_copy(br->group_addr, eth_reserved_addr_base);
+ br->call_nf = 1;
+
br->stp_enabled = BR_NO_STP;
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f96933a..aa944d0 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -81,6 +81,26 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
br_forward_finish);
}
+static int br_nf_do_forward(struct sk_buff *skb, struct net_device *indev)
+{
+ struct net_bridge_port *p;
+ struct net_bridge *br;
+
+ p = br_port_get_rcu(indev);
+ if (!p) {
+ kfree(skb);
+ return NF_DROP;
+ }
+ br = p->br;
+ if (!br->call_nf) {
+ skb_push(skb, ETH_HLEN);
+ return dev_queue_xmit(skb);
+ } else {
+ return NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev,
+ skb->dev, br_forward_finish);
+ }
+}
+
static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
struct net_device *indev;
@@ -98,8 +118,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
skb->dev = to->dev;
skb_forward_csum(skb);
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
- br_forward_finish);
+ br_nf_do_forward(skb, indev);
}
/* called with rcu_read_lock */
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e2aa7be..04af67f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -299,8 +299,8 @@ forward:
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
- br_handle_frame_finish);
+ NF_HOOK_COND(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev,
+ NULL, br_handle_frame_finish, p->br->call_nf);
break;
default:
drop:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index de09199..a27f4ad 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -219,6 +219,7 @@ struct net_bridge
bool nf_call_ip6tables;
bool nf_call_arptables;
#endif
+ bool call_nf;
u16 group_fwd_mask;
u16 group_fwd_mask_required;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 4c97fc5..b5061592 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -344,6 +344,28 @@ static ssize_t flush_store(struct device *d,
}
static DEVICE_ATTR_WO(flush);
+static ssize_t call_nf_show(
+ struct device *d, struct device_attribute *attr, char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%u\n", br->call_nf);
+}
+
+static int set_call_nf(struct net_bridge *br, unsigned long val)
+{
+ br->call_nf = val ? true : false;
+ return 0;
+}
+
+static ssize_t call_nf_store(
+ struct device *d, struct device_attribute *attr, const char *buf,
+ size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_call_nf);
+}
+static DEVICE_ATTR(call_nf, S_IRUGO | S_IWUSR,
+ call_nf_show, call_nf_store);
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t multicast_router_show(struct device *d,
struct device_attribute *attr, char *buf)
@@ -763,6 +785,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_gc_timer.attr,
&dev_attr_group_addr.attr,
&dev_attr_flush.attr,
+ &dev_attr_call_nf.attr,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
&dev_attr_multicast_router.attr,
&dev_attr_multicast_snooping.attr,
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-02-23 15:26 [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter Imre Palik
@ 2015-02-23 16:06 ` Florian Westphal
2015-02-26 10:19 ` Imre Palik
2015-02-26 21:17 ` Felix Fietkau
0 siblings, 2 replies; 13+ messages in thread
From: Florian Westphal @ 2015-02-23 16:06 UTC (permalink / raw)
To: Imre Palik
Cc: bridge, Stephen Hemminger, David S. Miller, netdev, linux-kernel,
fw, Palik, Imre, Anthony Liguori
Imre Palik <imrep.amz@gmail.com> wrote:
> The netfilter code is made with flexibility instead of performance in mind.
> So when all we want is to pass packets between different interfaces, the
> performance penalty of hitting netfilter code can be considerable, even when
> all the firewalling is disabled for the bridge.
>
> This change makes it possible to disable netfilter on a per bridge basis.
> In the case interesting to us, this can lead to more than 15% speedup
> compared to the case when only bridge-iptables is disabled.
I wonder what the speed difference is between no-rules (i.e., we hit jump label
in NF_HOOK), one single (ebtables) accept-all rule, and this patch, for
the call_nf==false case.
I guess your 15% speedup figure is coming from ebtables' O(n) rule
evaluation overhead? If yes, how many rules are we talking about?
Iff thats true, then the 'better' (I know, it won't help you) solution
would be to use nftables bridgeport-based verdict maps...
If thats still too much overhead, then we clearly need to do *something*...
Thanks,
Florian
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-02-23 16:06 ` Florian Westphal
@ 2015-02-26 10:19 ` Imre Palik
2015-02-26 16:34 ` David Miller
2015-02-26 21:17 ` Felix Fietkau
1 sibling, 1 reply; 13+ messages in thread
From: Imre Palik @ 2015-02-26 10:19 UTC (permalink / raw)
To: Florian Westphal, Imre Palik
Cc: bridge, Stephen Hemminger, David S. Miller, netdev, linux-kernel,
Anthony Liguori
On 02/23/15 17:06, Florian Westphal wrote:
> Imre Palik <imrep.amz@gmail.com> wrote:
>> The netfilter code is made with flexibility instead of performance in mind.
>> So when all we want is to pass packets between different interfaces, the
>> performance penalty of hitting netfilter code can be considerable, even when
>> all the firewalling is disabled for the bridge.
>>
>> This change makes it possible to disable netfilter on a per bridge basis.
>> In the case interesting to us, this can lead to more than 15% speedup
>> compared to the case when only bridge-iptables is disabled.
>
> I wonder what the speed difference is between no-rules (i.e., we hit jump label
> in NF_HOOK), one single (ebtables) accept-all rule, and this patch, for
> the call_nf==false case.
ebtables is completely empty:
# ebtables -L
Bridge table: filter
Bridge chain: INPUT, entries: 0, policy: ACCEPT
Bridge chain: FORWARD, entries: 0, policy: ACCEPT
Bridge chain: OUTPUT, entries: 0, policy: ACCEPT
on some bridges I have iptables rules, but on the critical bridges I am running with iptables disabled.
> I guess your 15% speedup figure is coming from ebtables' O(n) rule
> evaluation overhead? If yes, how many rules are we talking about?
If you are looking for peculiarities in my setup then here they are:
I am on 4k pages, and perf is not working :-(
(I am trying to fix those too, but that is far from being a low hanging fruit.)
So my guess would be that the packet pipeline doesn't fit in the cache/tlb
> Iff thats true, then the 'better' (I know, it won't help you) solution
> would be to use nftables bridgeport-based verdict maps...
>
> If thats still too much overhead, then we clearly need to do *something*...
>
> Thanks,
> Florian
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-02-26 10:19 ` Imre Palik
@ 2015-02-26 16:34 ` David Miller
2015-03-06 10:34 ` Imre Palik
0 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2015-02-26 16:34 UTC (permalink / raw)
To: imrep; +Cc: fw, imrep.amz, bridge, stephen, netdev, linux-kernel, aliguori
From: Imre Palik <imrep@amazon.de>
Date: Thu, 26 Feb 2015 11:19:25 +0100
> If you are looking for peculiarities in my setup then here they are:
> I am on 4k pages, and perf is not working :-(
> (I am trying to fix those too, but that is far from being a low hanging fruit.)
> So my guess would be that the packet pipeline doesn't fit in the cache/tlb
Pure specualtion until you can actually use perf to measure these
things.
And I don't want to apply patches which were designed based upon
pure speculation.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-02-23 16:06 ` Florian Westphal
2015-02-26 10:19 ` Imre Palik
@ 2015-02-26 21:17 ` Felix Fietkau
1 sibling, 0 replies; 13+ messages in thread
From: Felix Fietkau @ 2015-02-26 21:17 UTC (permalink / raw)
To: Florian Westphal, Imre Palik
Cc: bridge, Stephen Hemminger, David S. Miller, netdev, linux-kernel,
Palik, Imre, Anthony Liguori
On 2015-02-24 05:06, Florian Westphal wrote:
> Imre Palik <imrep.amz@gmail.com> wrote:
>> The netfilter code is made with flexibility instead of performance in mind.
>> So when all we want is to pass packets between different interfaces, the
>> performance penalty of hitting netfilter code can be considerable, even when
>> all the firewalling is disabled for the bridge.
>>
>> This change makes it possible to disable netfilter on a per bridge basis.
>> In the case interesting to us, this can lead to more than 15% speedup
>> compared to the case when only bridge-iptables is disabled.
>
> I wonder what the speed difference is between no-rules (i.e., we hit jump label
> in NF_HOOK), one single (ebtables) accept-all rule, and this patch, for
> the call_nf==false case.
>
> I guess your 15% speedup figure is coming from ebtables' O(n) rule
> evaluation overhead? If yes, how many rules are we talking about?
>
> Iff thats true, then the 'better' (I know, it won't help you) solution
> would be to use nftables bridgeport-based verdict maps...
>
> If thats still too much overhead, then we clearly need to do *something*...
I work with MIPS based routers that typically only have 32 or 64 KB of
Dcache. I've had quite a bit of 'fun' working on optimizing netfilter on
these systems. I've done a lot of measurements using oprofile (going to
use perf on my next run).
On these devices, even without netfilter compiled in, the data
structures and code are already way too big for the hot path to fit in
the Dcache (not to mention Icache). This problem has typically gotten a
little bit worse with every new kernel release, aside from just a few
exceptions.
This means that in the hot path, any unnecessary memory access to packet
data (especially IP headers) or to some degree also extra data
structures for netfilter, ebtables, etc. has a significant and visible
performance impact. The impact of the memory accesses is orders of
magnitude bigger than the pure cycles used for running the actual code.
In OpenWrt, I made similar hacks a long time ago, and on the system I
tested on, the speedup was even bigger than 15%, probably closer to 30%.
By the way, this was also with a completely empty ruleset.
Maybe there's a way to get reasonable performance by optimizing NF_HOOK,
however I'd like to remind you guys that if we have to fetch some
netfilter/nftables/ebtables data structures and run part of the table
processing code on a system where no rules are present (or ebtables
functionality is otherwise not needed for a particular bridge), then
performance is going to suck - at least on most small scale embedded
devices.
Based on that, I support the general approach taken by this patch, at
least until somebody has shown that a better approach is feasible.
- Felix
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-02-26 16:34 ` David Miller
@ 2015-03-06 10:34 ` Imre Palik
2015-03-06 14:29 ` Pablo Neira Ayuso
2015-03-06 17:49 ` David Miller
0 siblings, 2 replies; 13+ messages in thread
From: Imre Palik @ 2015-03-06 10:34 UTC (permalink / raw)
To: David Miller, imrep
Cc: fw, bridge, stephen, netdev, linux-kernel, aliguori, nbd
On 02/26/15 17:34, David Miller wrote:
> From: Imre Palik <imrep@amazon.de>
> Date: Thu, 26 Feb 2015 11:19:25 +0100
>
>> If you are looking for peculiarities in my setup then here they are:
>> I am on 4k pages, and perf is not working :-(
>> (I am trying to fix those too, but that is far from being a low hanging fruit.)
>> So my guess would be that the packet pipeline doesn't fit in the cache/tlb
>
> Pure specualtion until you can actually use perf to measure these
> things.
>
> And I don't want to apply patches which were designed based upon
> pure speculation.
>
I did performance measurements in the following way:
Removed those pieces of the packet pipeline that I don't necessarily need one-by-one. Then measured their effect on small packet performance.
This was the only part that produced considerable effect.
The pure speculation was about why the effect is more than 15% increase in packet throughput, although the code path avoided contains way less code than 15% of the packet pipeline. It seems, Felix Fietkau profiled similar changes, and found my guess well founded.
Now could anybody explain me what else is wrong with my patch? I run out of ideas what to improve.
Thanks
Imre
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-03-06 10:34 ` Imre Palik
@ 2015-03-06 14:29 ` Pablo Neira Ayuso
2015-03-06 16:37 ` Florian Westphal
2015-03-06 17:49 ` David Miller
1 sibling, 1 reply; 13+ messages in thread
From: Pablo Neira Ayuso @ 2015-03-06 14:29 UTC (permalink / raw)
To: Imre Palik
Cc: David Miller, imrep, fw, bridge, stephen, netdev, linux-kernel,
aliguori, nbd
On Fri, Mar 06, 2015 at 11:34:29AM +0100, Imre Palik wrote:
> On 02/26/15 17:34, David Miller wrote:
> > From: Imre Palik <imrep@amazon.de>
> > Date: Thu, 26 Feb 2015 11:19:25 +0100
> >
> >> If you are looking for peculiarities in my setup then here they are:
> >> I am on 4k pages, and perf is not working :-(
> >> (I am trying to fix those too, but that is far from being a low hanging fruit.)
> >> So my guess would be that the packet pipeline doesn't fit in the cache/tlb
> >
> > Pure specualtion until you can actually use perf to measure these
> > things.
> >
> > And I don't want to apply patches which were designed based upon
> > pure speculation.
> >
>
> I did performance measurements in the following way:
>
> Removed those pieces of the packet pipeline that I don't necessarily
> need one-by-one. Then measured their effect on small packet
> performance.
>
> This was the only part that produced considerable effect.
>
> The pure speculation was about why the effect is more than 15%
> increase in packet throughput, although the code path avoided
> contains way less code than 15% of the packet pipeline. It seems,
> Felix Fietkau profiled similar changes, and found my guess well
> founded.
>
> Now could anybody explain me what else is wrong with my patch?
We have to come up with a more generic solution for this.
These sysfs tweaks you're proposing look to me like an obscure way to
tune this.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-03-06 14:29 ` Pablo Neira Ayuso
@ 2015-03-06 16:37 ` Florian Westphal
2018-03-09 15:31 ` David Woodhouse
0 siblings, 1 reply; 13+ messages in thread
From: Florian Westphal @ 2015-03-06 16:37 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: Imre Palik, David Miller, imrep, fw, bridge, stephen, netdev,
linux-kernel, aliguori, nbd
Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Fri, Mar 06, 2015 at 11:34:29AM +0100, Imre Palik wrote:
> > On 02/26/15 17:34, David Miller wrote:
> > > From: Imre Palik <imrep@amazon.de>
> > > Date: Thu, 26 Feb 2015 11:19:25 +0100
> > >
> > >> If you are looking for peculiarities in my setup then here they are:
> > >> I am on 4k pages, and perf is not working :-(
> > >> (I am trying to fix those too, but that is far from being a low hanging fruit.)
> > >> So my guess would be that the packet pipeline doesn't fit in the cache/tlb
> > >
> > > Pure specualtion until you can actually use perf to measure these
> > > things.
> > >
> > > And I don't want to apply patches which were designed based upon
> > > pure speculation.
> > >
> >
> > I did performance measurements in the following way:
> >
> > Removed those pieces of the packet pipeline that I don't necessarily
> > need one-by-one. Then measured their effect on small packet
> > performance.
> >
> > This was the only part that produced considerable effect.
> >
> > The pure speculation was about why the effect is more than 15%
> > increase in packet throughput, although the code path avoided
> > contains way less code than 15% of the packet pipeline. It seems,
> > Felix Fietkau profiled similar changes, and found my guess well
> > founded.
> >
> > Now could anybody explain me what else is wrong with my patch?
>
> We have to come up with a more generic solution for this.
Jiri Benc suggested to allowing to attach netfilter hooks e.g. via tc
action, maybe that would be an option worth investigating.
Then you could for instance add filtering rules only to the bridge port
that needs it.
> These sysfs tweaks you're proposing look to me like an obscure way to
> tune this.
I agree, adding more tunables isn't all that helpful, in the past this
only helped to prolong the problem.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-03-06 10:34 ` Imre Palik
2015-03-06 14:29 ` Pablo Neira Ayuso
@ 2015-03-06 17:49 ` David Miller
1 sibling, 0 replies; 13+ messages in thread
From: David Miller @ 2015-03-06 17:49 UTC (permalink / raw)
To: imrep.amz; +Cc: imrep, fw, bridge, stephen, netdev, linux-kernel, aliguori, nbd
From: Imre Palik <imrep.amz@gmail.com>
Date: Fri, 06 Mar 2015 11:34:29 +0100
> Removed those pieces of the packet pipeline that I don't necessarily
> need one-by-one. Then measured their effect on small packet
> performance.
>
> This was the only part that produced considerable effect.
>
> The pure speculation was about why the effect is more than 15%
> increase in packet throughput, although the code path avoided
> contains way less code than 15% of the packet pipeline. It seems,
> Felix Fietkau profiled similar changes, and found my guess well
> founded.
Yes and that's the part being left out, the "why".
That's part of what I expect in the justification.
Look, we're not doing things this way. We need to find a clean
and generic way to make the netfilter hooks as cheap as possible
when netfilter rules are not in use.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2015-03-06 16:37 ` Florian Westphal
@ 2018-03-09 15:31 ` David Woodhouse
2018-03-09 15:57 ` David Miller
2018-03-09 16:26 ` Florian Westphal
0 siblings, 2 replies; 13+ messages in thread
From: David Woodhouse @ 2018-03-09 15:31 UTC (permalink / raw)
To: Florian Westphal, Pablo Neira Ayuso
Cc: David Miller, rga, bridge, stephen, netdev, linux-kernel, aliguori, nbd
[-- Attachment #1: Type: text/plain, Size: 1992 bytes --]
On Fri, 2015-03-06 at 17:37 +0100, Florian Westphal wrote:
>
> > > I did performance measurements in the following way:
> > >
> > > Removed those pieces of the packet pipeline that I don't necessarily
> > > need one-by-one. Then measured their effect on small packet
> > > performance.
> > >
> > > This was the only part that produced considerable effect.
> > >
> > > The pure speculation was about why the effect is more than 15%
> > > increase in packet throughput, although the code path avoided
> > > contains way less code than 15% of the packet pipeline. It seems,
> > > Felix Fietkau profiled similar changes, and found my guess well
> > > founded.
> > >
> > > Now could anybody explain me what else is wrong with my patch?
> >
> > We have to come up with a more generic solution for this.
>
> Jiri Benc suggested to allowing to attach netfilter hooks e.g. via tc
> action, maybe that would be an option worth investigating.
>
> Then you could for instance add filtering rules only to the bridge port
> that needs it.
>
> > These sysfs tweaks you're proposing look to me like an obscure way to
> > tune this.
>
> I agree, adding more tunables isn't all that helpful, in the past this
> only helped to prolong the problem.
How feasible would it be to make it completely dynamic?
A given hook could automatically disable itself (for a given device) if
the result of running it the first time was *tautologically* false for
that device (i.e. regardless of the packet itself, or anything else).
The hook would need to be automatically re-enabled if the rule chain
ever changes (and might subsequently disable itself again).
Is that something that's worth exploring for the general case?
Eschewing a 15% speedup on the basis that "well, even though we've had
three of these already for a decade, we're worried that adding a fourth
might open the floodgates to further patches" does seem a little odd to
me, FWIW.
[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5213 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2018-03-09 15:31 ` David Woodhouse
@ 2018-03-09 15:57 ` David Miller
2018-03-09 16:15 ` David Woodhouse
2018-03-09 16:26 ` Florian Westphal
1 sibling, 1 reply; 13+ messages in thread
From: David Miller @ 2018-03-09 15:57 UTC (permalink / raw)
To: dwmw2
Cc: fw, pablo, rga, bridge, stephen, netdev, linux-kernel, aliguori, nbd
From: David Woodhouse <dwmw2@infradead.org>
Date: Fri, 09 Mar 2018 15:31:15 +0000
> Eschewing a 15% speedup on the basis that "well, even though we've had
> three of these already for a decade, we're worried that adding a fourth
> might open the floodgates to further patches" does seem a little odd to
> me, FWIW.
The cost we are dealing with is a fundamental one which is a result of
the hook design.
Indirect calls are killer.
Indirect calls are even more killer now in the age of Spectre and
retpolines.
I definitely would rather see the fundamental issue addressed rather
than poking at it randomly with knobs for this case and that.
Thank you.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2018-03-09 15:57 ` David Miller
@ 2018-03-09 16:15 ` David Woodhouse
0 siblings, 0 replies; 13+ messages in thread
From: David Woodhouse @ 2018-03-09 16:15 UTC (permalink / raw)
To: David Miller
Cc: fw, pablo, rga, bridge, stephen, netdev, linux-kernel, aliguori, nbd
[-- Attachment #1: Type: text/plain, Size: 1132 bytes --]
On Fri, 2018-03-09 at 10:57 -0500, David Miller wrote:
> From: David Woodhouse <dwmw2@infradead.org>
> Date: Fri, 09 Mar 2018 15:31:15 +0000
>
> > Eschewing a 15% speedup on the basis that "well, even though we've had
> > three of these already for a decade, we're worried that adding a fourth
> > might open the floodgates to further patches" does seem a little odd to
> > me, FWIW.
>
> The cost we are dealing with is a fundamental one which is a result of
> the hook design.
>
> Indirect calls are killer.
>
> Indirect calls are even more killer now in the age of Spectre and
> retpolines.
Imre's 15% measurement was, obviously, before that. We should redo it
and confirm the numbers.
> I definitely would rather see the fundamental issue addressed rather
> than poking at it randomly with knobs for this case and that.
Yeah. What do you think of the suggestion I made — that a given hook
should automatically disable itself if it tautologically does nothing?
Coupled with notifiers for when the rules change, to re-enable the
hooks again? I confess I don't even know how realistic that is.
[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5213 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter
2018-03-09 15:31 ` David Woodhouse
2018-03-09 15:57 ` David Miller
@ 2018-03-09 16:26 ` Florian Westphal
1 sibling, 0 replies; 13+ messages in thread
From: Florian Westphal @ 2018-03-09 16:26 UTC (permalink / raw)
To: David Woodhouse
Cc: Florian Westphal, Pablo Neira Ayuso, David Miller, rga, bridge,
stephen, netdev, linux-kernel, aliguori, nbd
David Woodhouse <dwmw2@infradead.org> wrote:
>
>
> On Fri, 2015-03-06 at 17:37 +0100, Florian Westphal wrote:
> >
> > > > I did performance measurements in the following way:
> > > >
> > > > Removed those pieces of the packet pipeline that I don't necessarily
> > > > need one-by-one. Then measured their effect on small packet
> > > > performance.
> > > >
> > > > This was the only part that produced considerable effect.
> > > >
> > > > The pure speculation was about why the effect is more than 15%
> > > > increase in packet throughput, although the code path avoided
> > > > contains way less code than 15% of the packet pipeline. It seems,
> > > > Felix Fietkau profiled similar changes, and found my guess well
> > > > founded.
> > > >
> > > > Now could anybody explain me what else is wrong with my patch?
> > >
> > > We have to come up with a more generic solution for this.
> >
> > Jiri Benc suggested to allowing to attach netfilter hooks e.g. via tc
> > action, maybe that would be an option worth investigating.
> >
> > Then you could for instance add filtering rules only to the bridge port
> > that needs it.
> >
> > > These sysfs tweaks you're proposing look to me like an obscure way to
> > > tune this.
> >
> > I agree, adding more tunables isn't all that helpful, in the past this
> > only helped to prolong the problem.
>
> How feasible would it be to make it completely dynamic?
>
> A given hook could automatically disable itself (for a given device) if
> the result of running it the first time was *tautologically* false for
> that device (i.e. regardless of the packet itself, or anything else).
>
> The hook would need to be automatically re-enabled if the rule chain
> ever changes (and might subsequently disable itself again).
>
> Is that something that's worth exploring for the general case?
AF_BRIDGE hooks sit in the net namespace, so its enough for
one bridge to request filtering to bring in the hook overhead for all
bridges in the same netns.
Alternatives:
- place the bridges that need filtering in different netns
- use tc ingress for filtering
- use nftables ingress hook for filtering (it sits in almost same
location as tc ingress hook) to attach the ruleset to those bridge
ports that need packet filtering.
(The original request came from user with tons of bridges where only
one single bridge needed filtering).
One alternative I see is to place the bridge hooks into the
bridge device (net_bridge struct, which is in netdev private area).
But, as you already mentioned we would need to annotate the hooks
to figure out which device(s) they are for.
This sounds rather fragile to me, so i would just use nft ingress:
#!/sbin/nft -f
table netdev ingress {
chain in_public { type filter hook ingress device eth0 priority 0;
ip saddr 192.168.0.1 counter
}
}
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2018-03-09 16:26 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-23 15:26 [RFC PATCH v2] bridge: make it possible for packets to traverse the bridge without hitting netfilter Imre Palik
2015-02-23 16:06 ` Florian Westphal
2015-02-26 10:19 ` Imre Palik
2015-02-26 16:34 ` David Miller
2015-03-06 10:34 ` Imre Palik
2015-03-06 14:29 ` Pablo Neira Ayuso
2015-03-06 16:37 ` Florian Westphal
2018-03-09 15:31 ` David Woodhouse
2018-03-09 15:57 ` David Miller
2018-03-09 16:15 ` David Woodhouse
2018-03-09 16:26 ` Florian Westphal
2015-03-06 17:49 ` David Miller
2015-02-26 21:17 ` Felix Fietkau
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).