Netdev Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Andrii Nakryiko <andriin@fb.com>
To: <bpf@vger.kernel.org>, <netdev@vger.kernel.org>, <ast@fb.com>,
	<daniel@iogearbox.net>
Cc: <andrii.nakryiko@gmail.com>, <kernel-team@fb.com>,
	Andrii Nakryiko <andriin@fb.com>
Subject: [PATCH v2 bpf-next 14/14] selftests/bpf: convert cls_redirect selftest to use __noinline
Date: Mon, 31 Aug 2020 18:50:03 -0700	[thread overview]
Message-ID: <20200901015003.2871861-15-andriin@fb.com> (raw)
In-Reply-To: <20200901015003.2871861-1-andriin@fb.com>

As one of the most complicated and close-to-real-world programs, cls_redirect
is a good candidate to excercise libbpf's logic of handling bpf2bpf calls. So
convert it to explicit __noinline for majority of functions except few most
basic ones. If those few functions are inlined, verifier starts to complain
about program instruction limit of 1mln instructions being exceeded, most
probably due to instruction overhead of doing a sub-program call.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 .../selftests/bpf/progs/test_cls_redirect.c   | 99 ++++++++++---------
 1 file changed, 50 insertions(+), 49 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index f0b72e86bee5..c3fc410f7d9c 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -125,7 +125,7 @@ typedef struct buf {
 	uint8_t *const tail;
 } buf_t;
 
-static size_t buf_off(const buf_t *buf)
+static __always_inline size_t buf_off(const buf_t *buf)
 {
 	/* Clang seems to optimize constructs like
 	 *    a - b + c
@@ -145,7 +145,7 @@ static size_t buf_off(const buf_t *buf)
 	return off;
 }
 
-static bool buf_copy(buf_t *buf, void *dst, size_t len)
+static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
 {
 	if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
 		return false;
@@ -155,7 +155,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len)
 	return true;
 }
 
-static bool buf_skip(buf_t *buf, const size_t len)
+static __always_inline bool buf_skip(buf_t *buf, const size_t len)
 {
 	/* Check whether off + len is valid in the non-linear part. */
 	if (buf_off(buf) + len > buf->skb->len) {
@@ -173,7 +173,7 @@ static bool buf_skip(buf_t *buf, const size_t len)
  * If scratch is not NULL, the function will attempt to load non-linear
  * data via bpf_skb_load_bytes. On success, scratch is returned.
  */
-static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
 {
 	if (buf->head + len > buf->tail) {
 		if (scratch == NULL) {
@@ -188,7 +188,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
 	return ptr;
 }
 
-static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+static __noinline bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
 {
 	if (ipv4->ihl <= 5) {
 		return true;
@@ -197,13 +197,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
 	return buf_skip(buf, (ipv4->ihl - 5) * 4);
 }
 
-static bool ipv4_is_fragment(const struct iphdr *ip)
+static __noinline bool ipv4_is_fragment(const struct iphdr *ip)
 {
 	uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
 	return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
 }
 
-static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
 {
 	struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
 	if (ipv4 == NULL) {
@@ -222,7 +222,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
 }
 
 /* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
-static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+static __noinline bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
 {
 	if (!buf_copy(pkt, ports, sizeof(*ports))) {
 		return false;
@@ -237,7 +237,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
 	return true;
 }
 
-static uint16_t pkt_checksum_fold(uint32_t csum)
+static __noinline uint16_t pkt_checksum_fold(uint32_t csum)
 {
 	/* The highest reasonable value for an IPv4 header
 	 * checksum requires two folds, so we just do that always.
@@ -247,7 +247,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum)
 	return (uint16_t)~csum;
 }
 
-static void pkt_ipv4_checksum(struct iphdr *iph)
+static __noinline void pkt_ipv4_checksum(struct iphdr *iph)
 {
 	iph->check = 0;
 
@@ -268,10 +268,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph)
 	iph->check = pkt_checksum_fold(acc);
 }
 
-static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
-					    const struct ipv6hdr *ipv6,
-					    uint8_t *upper_proto,
-					    bool *is_fragment)
+static __noinline
+bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+				     const struct ipv6hdr *ipv6,
+				     uint8_t *upper_proto,
+				     bool *is_fragment)
 {
 	/* We understand five extension headers.
 	 * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
@@ -336,7 +337,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
  * scratch is allocated on the stack. However, this usage should be safe since
  * it's the callers stack after all.
  */
-static inline __attribute__((__always_inline__)) struct ipv6hdr *
+static __always_inline struct ipv6hdr *
 pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
 	       bool *is_fragment)
 {
@@ -354,20 +355,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
 
 /* Global metrics, per CPU
  */
-struct bpf_map_def metrics_map SEC("maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(unsigned int),
-	.value_size = sizeof(metrics_t),
-	.max_entries = 1,
-};
-
-static metrics_t *get_global_metrics(void)
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, unsigned int);
+	__type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static __noinline metrics_t *get_global_metrics(void)
 {
 	uint64_t key = 0;
 	return bpf_map_lookup_elem(&metrics_map, &key);
 }
 
-static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+static __noinline ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
 {
 	const int payload_off =
 		sizeof(*encap) +
@@ -388,8 +389,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
 	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
 }
 
-static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
-			      struct in_addr *next_hop, metrics_t *metrics)
+static __noinline ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+					 struct in_addr *next_hop, metrics_t *metrics)
 {
 	metrics->forwarded_packets_total_gre++;
 
@@ -509,8 +510,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
 	return bpf_redirect(skb->ifindex, 0);
 }
 
-static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
-				 struct in_addr *next_hop, metrics_t *metrics)
+static __noinline ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+					    struct in_addr *next_hop, metrics_t *metrics)
 {
 	/* swap L2 addresses */
 	/* This assumes that packets are received from a router.
@@ -546,7 +547,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
 	return bpf_redirect(skb->ifindex, 0);
 }
 
-static ret_t skip_next_hops(buf_t *pkt, int n)
+static __noinline ret_t skip_next_hops(buf_t *pkt, int n)
 {
 	switch (n) {
 	case 1:
@@ -566,8 +567,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n)
  * pkt is positioned just after the variable length GLB header
  * iff the call is successful.
  */
-static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
-			  struct in_addr *next_hop)
+static __noinline ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+				     struct in_addr *next_hop)
 {
 	if (encap->unigue.next_hop > encap->unigue.hop_count) {
 		return TC_ACT_SHOT;
@@ -601,8 +602,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
  * return value, and calling code works while still being "generic" to
  * IPv4 and IPv6.
  */
-static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
-			   uint64_t iphlen, uint16_t sport, uint16_t dport)
+static __noinline uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+				      uint64_t iphlen, uint16_t sport, uint16_t dport)
 {
 	switch (iphlen) {
 	case sizeof(struct iphdr): {
@@ -630,9 +631,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
 	}
 }
 
-static verdict_t classify_tcp(struct __sk_buff *skb,
-			      struct bpf_sock_tuple *tuple, uint64_t tuplen,
-			      void *iph, struct tcphdr *tcp)
+static __noinline verdict_t classify_tcp(struct __sk_buff *skb,
+					 struct bpf_sock_tuple *tuple, uint64_t tuplen,
+					 void *iph, struct tcphdr *tcp)
 {
 	struct bpf_sock *sk =
 		bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -663,8 +664,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb,
 	return UNKNOWN;
 }
 
-static verdict_t classify_udp(struct __sk_buff *skb,
-			      struct bpf_sock_tuple *tuple, uint64_t tuplen)
+static __noinline verdict_t classify_udp(struct __sk_buff *skb,
+					 struct bpf_sock_tuple *tuple, uint64_t tuplen)
 {
 	struct bpf_sock *sk =
 		bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -681,9 +682,9 @@ static verdict_t classify_udp(struct __sk_buff *skb,
 	return UNKNOWN;
 }
 
-static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
-			       struct bpf_sock_tuple *tuple, uint64_t tuplen,
-			       metrics_t *metrics)
+static __noinline verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+					  struct bpf_sock_tuple *tuple, uint64_t tuplen,
+					  metrics_t *metrics)
 {
 	switch (proto) {
 	case IPPROTO_TCP:
@@ -698,7 +699,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
 	}
 }
 
-static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+static __noinline verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
 {
 	struct icmphdr icmp;
 	if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
@@ -745,7 +746,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
 			     sizeof(tuple.ipv4), metrics);
 }
 
-static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+static __noinline verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
 {
 	struct icmp6hdr icmp6;
 	if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
@@ -797,8 +798,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
 			     metrics);
 }
 
-static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
-			     metrics_t *metrics)
+static __noinline verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+					metrics_t *metrics)
 {
 	metrics->l4_protocol_packets_total_tcp++;
 
@@ -819,8 +820,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
 	return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
 }
 
-static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
-			     metrics_t *metrics)
+static __noinline verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+					metrics_t *metrics)
 {
 	metrics->l4_protocol_packets_total_udp++;
 
@@ -837,7 +838,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
 	return classify_udp(pkt->skb, &tuple, tuplen);
 }
 
-static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+static __noinline verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
 {
 	metrics->l3_protocol_packets_total_ipv4++;
 
@@ -874,7 +875,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
 	}
 }
 
-static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+static __noinline verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
 {
 	metrics->l3_protocol_packets_total_ipv6++;
 
-- 
2.24.1


  parent reply	other threads:[~2020-09-01  1:50 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-01  1:49 [PATCH v2 bpf-next 00/14] Add libbpf full support for BPF-to-BPF calls Andrii Nakryiko
2020-09-01  1:49 ` [PATCH v2 bpf-next 01/14] libbpf: ensure ELF symbols table is found before further ELF processing Andrii Nakryiko
2020-09-02  5:43   ` John Fastabend
2020-09-01  1:49 ` [PATCH v2 bpf-next 02/14] libbpf: parse multi-function sections into multiple BPF programs Andrii Nakryiko
2020-09-02  6:08   ` John Fastabend
2020-09-01  1:49 ` [PATCH v2 bpf-next 03/14] libbpf: support CO-RE relocations for multi-prog sections Andrii Nakryiko
2020-09-02  6:14   ` John Fastabend
2020-09-01  1:49 ` [PATCH v2 bpf-next 04/14] libbpf: make RELO_CALL work for multi-prog sections and sub-program calls Andrii Nakryiko
2020-09-02  5:36   ` Alexei Starovoitov
2020-09-02 19:52     ` Andrii Nakryiko
2020-09-01  1:49 ` [PATCH v2 bpf-next 05/14] libbpf: implement generalized .BTF.ext func/line info adjustment Andrii Nakryiko
2020-09-01  1:49 ` [PATCH v2 bpf-next 06/14] libbpf: add multi-prog section support for struct_ops Andrii Nakryiko
2020-09-01  1:49 ` [PATCH v2 bpf-next 07/14] selftests/bpf: add selftest for multi-prog sections and bpf-to-bpf calls Andrii Nakryiko
2020-09-02  5:41   ` Alexei Starovoitov
2020-09-02 19:57     ` Andrii Nakryiko
2020-09-01  1:49 ` [PATCH v2 bpf-next 08/14] tools/bpftool: replace bpf_program__title() with bpf_program__section_name() Andrii Nakryiko
2020-09-01  1:49 ` [PATCH v2 bpf-next 09/14] selftests/bpf: don't use deprecated libbpf APIs Andrii Nakryiko
2020-09-01  1:49 ` [PATCH v2 bpf-next 10/14] libbpf: deprecate notion of BPF program "title" in favor of "section name" Andrii Nakryiko
2020-09-01  1:50 ` [PATCH v2 bpf-next 11/14] selftests/bpf: turn fexit_bpf2bpf into test with subtests Andrii Nakryiko
2020-09-01  1:50 ` [PATCH v2 bpf-next 12/14] selftests/bpf: convert pyperf, strobemeta, and l4lb_noinline to __noinline Andrii Nakryiko
2020-09-02  5:45   ` Alexei Starovoitov
2020-09-02 19:58     ` Andrii Nakryiko
2020-09-01  1:50 ` [PATCH v2 bpf-next 13/14] selftests/bpf: modernize xdp_noinline test w/ skeleton and __noinline Andrii Nakryiko
2020-09-01  1:50 ` Andrii Nakryiko [this message]
2020-09-02  5:46   ` [PATCH v2 bpf-next 14/14] selftests/bpf: convert cls_redirect selftest to use __noinline Alexei Starovoitov
2020-09-02 19:58     ` Andrii Nakryiko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200901015003.2871861-15-andriin@fb.com \
    --to=andriin@fb.com \
    --cc=andrii.nakryiko@gmail.com \
    --cc=ast@fb.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    --subject='Re: [PATCH v2 bpf-next 14/14] selftests/bpf: convert cls_redirect selftest to use __noinline' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).