Netdev Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation
@ 2021-07-20 20:35 Vadim Fedorenko
  2021-07-20 20:35 ` [PATCH net v3 1/2] udp: check encap socket in __udp_lib_err Vadim Fedorenko
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Vadim Fedorenko @ 2021-07-20 20:35 UTC (permalink / raw)
  To: David Ahern, Willem de Bruijn, Paolo Abeni, Xin Long
  Cc: Jakub Kicinski, David S. Miller, netdev, Vadim Fedorenko

Bug 213669 uncovered regression in PMTU discovery for UDP-encapsulated
routes and some incorrect usage in udp tunnel fields. This series fixes
problems and also adds such case for selftests

v3:
 - update checking logic to account SCTP use case
v2:
 - remove refactor code that was in first patch
 - move checking logic to __udp{4,6}_lib_err_encap
 - add more tests, especially routed configuration

Vadim Fedorenko (2):
  udp: check encap socket in __udp_lib_err
  selftests: net: add ESP-in-UDP PMTU test

 net/ipv4/udp.c                        |  25 ++-
 net/ipv6/udp.c                        |  25 ++-
 tools/testing/selftests/net/nettest.c |  55 ++++++-
 tools/testing/selftests/net/pmtu.sh   | 212 +++++++++++++++++++++++++-
 4 files changed, 298 insertions(+), 19 deletions(-)

-- 
2.18.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH net v3 1/2] udp: check encap socket in __udp_lib_err
  2021-07-20 20:35 [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation Vadim Fedorenko
@ 2021-07-20 20:35 ` Vadim Fedorenko
  2021-07-21 14:08   ` Xin Long
  2021-07-20 20:35 ` [PATCH net v3 2/2] selftests: net: add ESP-in-UDP PMTU test Vadim Fedorenko
  2021-07-21 16:00 ` [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation patchwork-bot+netdevbpf
  2 siblings, 1 reply; 5+ messages in thread
From: Vadim Fedorenko @ 2021-07-20 20:35 UTC (permalink / raw)
  To: David Ahern, Willem de Bruijn, Paolo Abeni, Xin Long
  Cc: Jakub Kicinski, David S. Miller, netdev, Vadim Fedorenko

Commit d26796ae5894 ("udp: check udp sock encap_type in __udp_lib_err")
added checks for encapsulated sockets but it broke cases when there is
no implementation of encap_err_lookup for encapsulation, i.e. ESP in
UDP encapsulation. Fix it by calling encap_err_lookup only if socket
implements this method otherwise treat it as legal socket.

Fixes: d26796ae5894 ("udp: check udp sock encap_type in __udp_lib_err")
Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
---
 net/ipv4/udp.c | 25 +++++++++++++++++++------
 net/ipv6/udp.c | 25 +++++++++++++++++++------
 2 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 62cd4cd52e84..1a742b710e54 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -645,10 +645,12 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
 					 const struct iphdr *iph,
 					 struct udphdr *uh,
 					 struct udp_table *udptable,
+					 struct sock *sk,
 					 struct sk_buff *skb, u32 info)
 {
+	int (*lookup)(struct sock *sk, struct sk_buff *skb);
 	int network_offset, transport_offset;
-	struct sock *sk;
+	struct udp_sock *up;
 
 	network_offset = skb_network_offset(skb);
 	transport_offset = skb_transport_offset(skb);
@@ -659,18 +661,28 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
 	/* Transport header needs to point to the UDP header */
 	skb_set_transport_header(skb, iph->ihl << 2);
 
+	if (sk) {
+		up = udp_sk(sk);
+
+		lookup = READ_ONCE(up->encap_err_lookup);
+		if (lookup && lookup(sk, skb))
+			sk = NULL;
+
+		goto out;
+	}
+
 	sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
 			       iph->saddr, uh->dest, skb->dev->ifindex, 0,
 			       udptable, NULL);
 	if (sk) {
-		int (*lookup)(struct sock *sk, struct sk_buff *skb);
-		struct udp_sock *up = udp_sk(sk);
+		up = udp_sk(sk);
 
 		lookup = READ_ONCE(up->encap_err_lookup);
 		if (!lookup || lookup(sk, skb))
 			sk = NULL;
 	}
 
+out:
 	if (!sk)
 		sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
 
@@ -707,15 +719,16 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
 			       iph->saddr, uh->source, skb->dev->ifindex,
 			       inet_sdif(skb), udptable, NULL);
+
 	if (!sk || udp_sk(sk)->encap_type) {
 		/* No socket for error: try tunnels before discarding */
-		sk = ERR_PTR(-ENOENT);
 		if (static_branch_unlikely(&udp_encap_needed_key)) {
-			sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
+			sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
 						  info);
 			if (!sk)
 				return 0;
-		}
+		} else
+			sk = ERR_PTR(-ENOENT);
 
 		if (IS_ERR(sk)) {
 			__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0cc7ba531b34..c5e15e94bb00 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -502,12 +502,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
 					 const struct ipv6hdr *hdr, int offset,
 					 struct udphdr *uh,
 					 struct udp_table *udptable,
+					 struct sock *sk,
 					 struct sk_buff *skb,
 					 struct inet6_skb_parm *opt,
 					 u8 type, u8 code, __be32 info)
 {
+	int (*lookup)(struct sock *sk, struct sk_buff *skb);
 	int network_offset, transport_offset;
-	struct sock *sk;
+	struct udp_sock *up;
 
 	network_offset = skb_network_offset(skb);
 	transport_offset = skb_transport_offset(skb);
@@ -518,18 +520,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
 	/* Transport header needs to point to the UDP header */
 	skb_set_transport_header(skb, offset);
 
+	if (sk) {
+		up = udp_sk(sk);
+
+		lookup = READ_ONCE(up->encap_err_lookup);
+		if (lookup && lookup(sk, skb))
+			sk = NULL;
+
+		goto out;
+	}
+
 	sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
 			       &hdr->saddr, uh->dest,
 			       inet6_iif(skb), 0, udptable, skb);
 	if (sk) {
-		int (*lookup)(struct sock *sk, struct sk_buff *skb);
-		struct udp_sock *up = udp_sk(sk);
+		up = udp_sk(sk);
 
 		lookup = READ_ONCE(up->encap_err_lookup);
 		if (!lookup || lookup(sk, skb))
 			sk = NULL;
 	}
 
+out:
 	if (!sk) {
 		sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
 							offset, info));
@@ -558,16 +570,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
 			       inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
+
 	if (!sk || udp_sk(sk)->encap_type) {
 		/* No socket for error: try tunnels before discarding */
-		sk = ERR_PTR(-ENOENT);
 		if (static_branch_unlikely(&udpv6_encap_needed_key)) {
 			sk = __udp6_lib_err_encap(net, hdr, offset, uh,
-						  udptable, skb,
+						  udptable, sk, skb,
 						  opt, type, code, info);
 			if (!sk)
 				return 0;
-		}
+		} else
+			sk = ERR_PTR(-ENOENT);
 
 		if (IS_ERR(sk)) {
 			__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
-- 
2.18.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH net v3 2/2] selftests: net: add ESP-in-UDP PMTU test
  2021-07-20 20:35 [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation Vadim Fedorenko
  2021-07-20 20:35 ` [PATCH net v3 1/2] udp: check encap socket in __udp_lib_err Vadim Fedorenko
@ 2021-07-20 20:35 ` Vadim Fedorenko
  2021-07-21 16:00 ` [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation patchwork-bot+netdevbpf
  2 siblings, 0 replies; 5+ messages in thread
From: Vadim Fedorenko @ 2021-07-20 20:35 UTC (permalink / raw)
  To: David Ahern, Willem de Bruijn, Paolo Abeni, Xin Long
  Cc: Jakub Kicinski, David S. Miller, netdev, Vadim Fedorenko

The case of ESP in UDP encapsulation was not covered before. Add
cases of local changes of MTU and difference on routed path.

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
---
 tools/testing/selftests/net/nettest.c |  55 ++++++-
 tools/testing/selftests/net/pmtu.sh   | 212 +++++++++++++++++++++++++-
 2 files changed, 260 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 6365c7fd1262..bd6288302094 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -11,9 +11,11 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <linux/tcp.h>
+#include <linux/udp.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <netinet/in.h>
+#include <netinet/ip.h>
 #include <netdb.h>
 #include <fcntl.h>
 #include <libgen.h>
@@ -27,6 +29,10 @@
 #include <time.h>
 #include <errno.h>
 
+#include <linux/xfrm.h>
+#include <linux/ipsec.h>
+#include <linux/pfkeyv2.h>
+
 #ifndef IPV6_UNICAST_IF
 #define IPV6_UNICAST_IF         76
 #endif
@@ -114,6 +120,9 @@ struct sock_args {
 		struct in_addr  in;
 		struct in6_addr in6;
 	} expected_raddr;
+
+	/* ESP in UDP encap test */
+	int use_xfrm;
 };
 
 static int server_mode;
@@ -1346,6 +1355,41 @@ static int bind_socket(int sd, struct sock_args *args)
 	return 0;
 }
 
+static int config_xfrm_policy(int sd, struct sock_args *args)
+{
+	struct xfrm_userpolicy_info policy = {};
+	int type = UDP_ENCAP_ESPINUDP;
+	int xfrm_af = IP_XFRM_POLICY;
+	int level = SOL_IP;
+
+	if (args->type != SOCK_DGRAM) {
+		log_error("Invalid socket type. Only DGRAM could be used for XFRM\n");
+		return 1;
+	}
+
+	policy.action = XFRM_POLICY_ALLOW;
+	policy.sel.family = args->version;
+	if (args->version == AF_INET6) {
+		xfrm_af = IPV6_XFRM_POLICY;
+		level = SOL_IPV6;
+	}
+
+	policy.dir = XFRM_POLICY_OUT;
+	if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+		return 1;
+
+	policy.dir = XFRM_POLICY_IN;
+	if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+		return 1;
+
+	if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) {
+		log_err_errno("Failed to set xfrm encap");
+		return 1;
+	}
+
+	return 0;
+}
+
 static int lsock_init(struct sock_args *args)
 {
 	long flags;
@@ -1389,6 +1433,11 @@ static int lsock_init(struct sock_args *args)
 	if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0)
 		log_err_errno("Failed to set close-on-exec flag");
 
+	if (args->use_xfrm && config_xfrm_policy(sd, args)) {
+		log_err_errno("Failed to set xfrm policy");
+		goto err;
+	}
+
 out:
 	return sd;
 
@@ -1772,7 +1821,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
 	return client_status;
 }
 
-#define GETOPT_STR  "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq"
+#define GETOPT_STR  "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq"
 
 static void print_usage(char *prog)
 {
@@ -1795,6 +1844,7 @@ static void print_usage(char *prog)
 	"    -D|R          datagram (D) / raw (R) socket (default stream)\n"
 	"    -l addr       local address to bind to in server mode\n"
 	"    -c addr       local address to bind to in client mode\n"
+	"    -x            configure XFRM policy on socket\n"
 	"\n"
 	"    -d dev        bind socket to given device name\n"
 	"    -I dev        bind socket to given device name - server mode\n"
@@ -1966,6 +2016,9 @@ int main(int argc, char *argv[])
 		case 'q':
 			quiet = 1;
 			break;
+		case 'x':
+			args.use_xfrm = 1;
+			break;
 		default:
 			print_usage(argv[0]);
 			return 1;
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 64cd2e23c568..ac67bd78081a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -118,6 +118,16 @@
 #	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
 #	changes alone won't affect PMTU
 #
+# - pmtu_vti4_udp_exception
+#       Same as pmtu_vti4_exception, but using ESP-in-UDP
+#
+# - pmtu_vti4_udp_routed_exception
+#       Set up vti tunnel on top of veth connected through routing namespace and
+#	add xfrm states and policies with ESP-in-UDP encapsulation. Check that
+#	route exception is not created if link layer MTU is not exceeded, then
+#	lower MTU on second part of routed environment and check that exception
+#	is created with the expected PMTU.
+#
 # - pmtu_vti6_exception
 #	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is
@@ -125,6 +135,13 @@
 #	decrease and increase MTU of tunnel, checking that route exception PMTU
 #	changes accordingly
 #
+# - pmtu_vti6_udp_exception
+#       Same as pmtu_vti6_exception, but using ESP-in-UDP
+#
+# - pmtu_vti6_udp_routed_exception
+#	Same as pmtu_vti6_udp_routed_exception but with routing between vti
+#	endpoints
+#
 # - pmtu_vti4_default_mtu
 #	Set up vti4 tunnel on top of veth, in two namespaces with matching
 #	endpoints. Check that MTU assigned to vti interface is the MTU of the
@@ -224,6 +241,10 @@ tests="
 	pmtu_ipv6_ipv6_exception	IPv6 over IPv6: PMTU exceptions		1
 	pmtu_vti6_exception		vti6: PMTU exceptions			0
 	pmtu_vti4_exception		vti4: PMTU exceptions			0
+	pmtu_vti6_udp_exception		vti6: PMTU exceptions (ESP-in-UDP)	0
+	pmtu_vti4_udp_exception		vti4: PMTU exceptions (ESP-in-UDP)	0
+	pmtu_vti6_udp_routed_exception	vti6: PMTU exceptions, routed (ESP-in-UDP)	0
+	pmtu_vti4_udp_routed_exception	vti4: PMTU exceptions, routed (ESP-in-UDP)	0
 	pmtu_vti4_default_mtu		vti4: default MTU assignment		0
 	pmtu_vti6_default_mtu		vti6: default MTU assignment		0
 	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation	0
@@ -246,7 +267,6 @@ ns_b="ip netns exec ${NS_B}"
 ns_c="ip netns exec ${NS_C}"
 ns_r1="ip netns exec ${NS_R1}"
 ns_r2="ip netns exec ${NS_R2}"
-
 # Addressing and routing for tests with routers: four network segments, with
 # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
 # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
@@ -279,7 +299,6 @@ routes="
 	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
 	B	default			${prefix6}:${b_r1}::2
 "
-
 USE_NH="no"
 #	ns	family	nh id	   destination		gateway
 nexthops="
@@ -326,6 +345,7 @@ dummy6_mask="64"
 
 err_buf=
 tcpdump_pids=
+nettest_pids=
 
 err() {
 	err_buf="${err_buf}${1}
@@ -548,6 +568,14 @@ setup_vti6() {
 	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
 }
 
+setup_vti4routed() {
+	setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
+}
+
+setup_vti6routed() {
+	setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
+}
+
 setup_vxlan_or_geneve() {
 	type="${1}"
 	a_addr="${2}"
@@ -619,18 +647,36 @@ setup_xfrm() {
 	proto=${1}
 	veth_a_addr="${2}"
 	veth_b_addr="${3}"
+	encap=${4}
 
-	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
-	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1
+	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
 	run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
 	run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
 
-	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
-	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
+	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
 	run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
 	run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
 }
 
+setup_nettest_xfrm() {
+	which nettest >/dev/null
+	if [ $? -ne 0 ]; then
+        	echo "'nettest' command not found; skipping tests"
+	        return 1
+	fi
+
+	[ ${1} -eq 6 ] && proto="-6" || proto=""
+	port=${2}
+
+	run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+	nettest_pids="${nettest_pids} $!"
+
+	run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+	nettest_pids="${nettest_pids} $!"
+}
+
 setup_xfrm4() {
 	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
 }
@@ -639,6 +685,26 @@ setup_xfrm6() {
 	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
 }
 
+setup_xfrm4udp() {
+	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0"
+	setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udp() {
+	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0"
+	setup_nettest_xfrm 6 4500
+}
+
+setup_xfrm4udprouted() {
+	setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0"
+	setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udprouted() {
+	setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0"
+	setup_nettest_xfrm 6 4500
+}
+
 setup_routing_old() {
 	for i in ${routes}; do
 		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
@@ -823,6 +889,11 @@ cleanup() {
 	done
 	tcpdump_pids=
 
+	for pid in ${nettest_pids}; do
+		kill ${pid}
+	done
+	nettest_pids=
+
 	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
 		ip netns del ${n} 2> /dev/null
 	done
@@ -1432,6 +1503,135 @@ test_pmtu_vti6_exception() {
 	return ${fail}
 }
 
+test_pmtu_vti4_udp_exception() {
+	setup namespaces veth vti4 xfrm4udp || return $ksft_skip
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 20))
+
+	#                                UDP   SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 28))
+
+	mtu "${ns_a}" veth_a ${veth_mtu}
+	mtu "${ns_b}" veth_b ${veth_mtu}
+	mtu "${ns_a}" vti4_a ${vti_mtu}
+	mtu "${ns_b}" vti4_b ${vti_mtu}
+
+	# Send DF packet without exceeding link layer MTU, check that no
+	# exception is created
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+	# Now exceed link layer MTU by one byte, check that exception is created
+	# with the right PMTU value
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
+}
+
+test_pmtu_vti6_udp_exception() {
+	setup namespaces veth vti6 xfrm6udp || return $ksft_skip
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
+	fail=0
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}" veth_a 4000
+	mtu "${ns_b}" veth_b 4000
+	mtu "${ns_a}" vti6_a 5000
+	mtu "${ns_b}" vti6_b 5000
+	run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
+
+	# Check that exception was created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
+
+	# Decrease tunnel MTU, check for PMTU decrease in route exception
+	mtu "${ns_a}" vti6_a 3000
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
+
+	# Increase tunnel MTU, check for PMTU increase in route exception
+	mtu "${ns_a}" vti6_a 9000
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
+
+	return ${fail}
+}
+
+test_pmtu_vti4_udp_routed_exception() {
+	setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip
+	trace "${ns_a}" veth_A-R1    "${ns_b}" veth_B-R1 \
+	      "${ns_a}" vti4_a       "${ns_b}" vti4_b
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 20))
+
+	#                                UDP   SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 28))
+
+        mtu "${ns_a}"  veth_A-R1 ${veth_mtu}
+        mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+        mtu "${ns_b}"  veth_B-R1 ${veth_mtu}
+        mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+	mtu "${ns_a}" vti4_a ${vti_mtu}
+	mtu "${ns_b}" vti4_b ${vti_mtu}
+
+	# Send DF packet without exceeding link layer MTU, check that no
+	# exception is created
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+	# Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+	# with the right PMTU value
+        mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+	check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+}
+
+test_pmtu_vti6_udp_routed_exception() {
+	setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip
+	trace "${ns_a}" veth_A-R1    "${ns_b}" veth_B-R1 \
+	      "${ns_a}" vti6_a       "${ns_b}" vti6_b
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 40))
+
+	#                                UDP   SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 48))
+
+        mtu "${ns_a}"  veth_A-R1 ${veth_mtu}
+        mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+        mtu "${ns_b}"  veth_B-R1 ${veth_mtu}
+        mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+	# mtu "${ns_a}" vti6_a ${vti_mtu}
+	# mtu "${ns_b}" vti6_b ${vti_mtu}
+
+	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr}
+
+	# Check that exception was not created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+	# Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+	# with the right PMTU value
+        mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+
+}
+
 test_pmtu_vti4_default_mtu() {
 	setup namespaces veth vti4 || return $ksft_skip
 
-- 
2.18.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net v3 1/2] udp: check encap socket in __udp_lib_err
  2021-07-20 20:35 ` [PATCH net v3 1/2] udp: check encap socket in __udp_lib_err Vadim Fedorenko
@ 2021-07-21 14:08   ` Xin Long
  0 siblings, 0 replies; 5+ messages in thread
From: Xin Long @ 2021-07-21 14:08 UTC (permalink / raw)
  To: Vadim Fedorenko
  Cc: David Ahern, Willem de Bruijn, Paolo Abeni, Jakub Kicinski,
	David S. Miller, network dev

On Tue, Jul 20, 2021 at 4:35 PM Vadim Fedorenko <vfedorenko@novek.ru> wrote:
>
> Commit d26796ae5894 ("udp: check udp sock encap_type in __udp_lib_err")
> added checks for encapsulated sockets but it broke cases when there is
> no implementation of encap_err_lookup for encapsulation, i.e. ESP in
> UDP encapsulation. Fix it by calling encap_err_lookup only if socket
> implements this method otherwise treat it as legal socket.
>
> Fixes: d26796ae5894 ("udp: check udp sock encap_type in __udp_lib_err")
> Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
> ---
>  net/ipv4/udp.c | 25 +++++++++++++++++++------
>  net/ipv6/udp.c | 25 +++++++++++++++++++------
>  2 files changed, 38 insertions(+), 12 deletions(-)
>
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 62cd4cd52e84..1a742b710e54 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -645,10 +645,12 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
>                                          const struct iphdr *iph,
>                                          struct udphdr *uh,
>                                          struct udp_table *udptable,
> +                                        struct sock *sk,
>                                          struct sk_buff *skb, u32 info)
>  {
> +       int (*lookup)(struct sock *sk, struct sk_buff *skb);
>         int network_offset, transport_offset;
> -       struct sock *sk;
> +       struct udp_sock *up;
>
>         network_offset = skb_network_offset(skb);
>         transport_offset = skb_transport_offset(skb);
> @@ -659,18 +661,28 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
>         /* Transport header needs to point to the UDP header */
>         skb_set_transport_header(skb, iph->ihl << 2);
>
> +       if (sk) {
> +               up = udp_sk(sk);
> +
> +               lookup = READ_ONCE(up->encap_err_lookup);
> +               if (lookup && lookup(sk, skb))
> +                       sk = NULL;
> +
> +               goto out;
> +       }
> +
>         sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
>                                iph->saddr, uh->dest, skb->dev->ifindex, 0,
>                                udptable, NULL);
>         if (sk) {
> -               int (*lookup)(struct sock *sk, struct sk_buff *skb);
> -               struct udp_sock *up = udp_sk(sk);
> +               up = udp_sk(sk);
>
>                 lookup = READ_ONCE(up->encap_err_lookup);
>                 if (!lookup || lookup(sk, skb))
>                         sk = NULL;
>         }
>
> +out:
>         if (!sk)
>                 sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
>
> @@ -707,15 +719,16 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
>         sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
>                                iph->saddr, uh->source, skb->dev->ifindex,
>                                inet_sdif(skb), udptable, NULL);
> +
>         if (!sk || udp_sk(sk)->encap_type) {
>                 /* No socket for error: try tunnels before discarding */
> -               sk = ERR_PTR(-ENOENT);
>                 if (static_branch_unlikely(&udp_encap_needed_key)) {
> -                       sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
> +                       sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
>                                                   info);
>                         if (!sk)
>                                 return 0;
> -               }
> +               } else
> +                       sk = ERR_PTR(-ENOENT);
>
>                 if (IS_ERR(sk)) {
>                         __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 0cc7ba531b34..c5e15e94bb00 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -502,12 +502,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
>                                          const struct ipv6hdr *hdr, int offset,
>                                          struct udphdr *uh,
>                                          struct udp_table *udptable,
> +                                        struct sock *sk,
>                                          struct sk_buff *skb,
>                                          struct inet6_skb_parm *opt,
>                                          u8 type, u8 code, __be32 info)
>  {
> +       int (*lookup)(struct sock *sk, struct sk_buff *skb);
>         int network_offset, transport_offset;
> -       struct sock *sk;
> +       struct udp_sock *up;
>
>         network_offset = skb_network_offset(skb);
>         transport_offset = skb_transport_offset(skb);
> @@ -518,18 +520,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
>         /* Transport header needs to point to the UDP header */
>         skb_set_transport_header(skb, offset);
>
> +       if (sk) {
> +               up = udp_sk(sk);
> +
> +               lookup = READ_ONCE(up->encap_err_lookup);
> +               if (lookup && lookup(sk, skb))
> +                       sk = NULL;
> +
> +               goto out;
> +       }
> +
>         sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
>                                &hdr->saddr, uh->dest,
>                                inet6_iif(skb), 0, udptable, skb);
>         if (sk) {
> -               int (*lookup)(struct sock *sk, struct sk_buff *skb);
> -               struct udp_sock *up = udp_sk(sk);
> +               up = udp_sk(sk);
>
>                 lookup = READ_ONCE(up->encap_err_lookup);
>                 if (!lookup || lookup(sk, skb))
>                         sk = NULL;
>         }
>
> +out:
>         if (!sk) {
>                 sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
>                                                         offset, info));
> @@ -558,16 +570,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
>
>         sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
>                                inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
> +
>         if (!sk || udp_sk(sk)->encap_type) {
>                 /* No socket for error: try tunnels before discarding */
> -               sk = ERR_PTR(-ENOENT);
>                 if (static_branch_unlikely(&udpv6_encap_needed_key)) {
>                         sk = __udp6_lib_err_encap(net, hdr, offset, uh,
> -                                                 udptable, skb,
> +                                                 udptable, sk, skb,
>                                                   opt, type, code, info);
>                         if (!sk)
>                                 return 0;
> -               }
> +               } else
> +                       sk = ERR_PTR(-ENOENT);
>
>                 if (IS_ERR(sk)) {
>                         __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
> --
> 2.18.4
>
Reviewed-by: Xin Long <lucien.xin@gmail.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation
  2021-07-20 20:35 [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation Vadim Fedorenko
  2021-07-20 20:35 ` [PATCH net v3 1/2] udp: check encap socket in __udp_lib_err Vadim Fedorenko
  2021-07-20 20:35 ` [PATCH net v3 2/2] selftests: net: add ESP-in-UDP PMTU test Vadim Fedorenko
@ 2021-07-21 16:00 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 5+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-07-21 16:00 UTC (permalink / raw)
  To: Vadim Fedorenko; +Cc: dsahern, willemb, pabeni, lucien.xin, kuba, davem, netdev

Hello:

This series was applied to netdev/net.git (refs/heads/master):

On Tue, 20 Jul 2021 23:35:27 +0300 you wrote:
> Bug 213669 uncovered regression in PMTU discovery for UDP-encapsulated
> routes and some incorrect usage in udp tunnel fields. This series fixes
> problems and also adds such case for selftests
> 
> v3:
>  - update checking logic to account SCTP use case
> v2:
>  - remove refactor code that was in first patch
>  - move checking logic to __udp{4,6}_lib_err_encap
>  - add more tests, especially routed configuration
> 
> [...]

Here is the summary with links:
  - [net,v3,1/2] udp: check encap socket in __udp_lib_err
    https://git.kernel.org/netdev/net/c/9bfce73c8921
  - [net,v3,2/2] selftests: net: add ESP-in-UDP PMTU test
    https://git.kernel.org/netdev/net/c/ece1278a9b81

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-07-21 16:00 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-20 20:35 [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation Vadim Fedorenko
2021-07-20 20:35 ` [PATCH net v3 1/2] udp: check encap socket in __udp_lib_err Vadim Fedorenko
2021-07-21 14:08   ` Xin Long
2021-07-20 20:35 ` [PATCH net v3 2/2] selftests: net: add ESP-in-UDP PMTU test Vadim Fedorenko
2021-07-21 16:00 ` [PATCH net v3 0/2] Fix PMTU for ESP-in-UDP encapsulation patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).