LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] tcp: tcp_drop adds `SNMP` and `reason` parameter for tracing
@ 2021-09-14 14:35 Zhongya Yan
  2021-09-20 16:54 ` Cong Wang
  0 siblings, 1 reply; 6+ messages in thread
From: Zhongya Yan @ 2021-09-14 14:35 UTC (permalink / raw)
  To: edumazet, rostedt, brendan.d.gregg
  Cc: netdev, linux-kernel, kuba, mingo, davem, yoshfuji, dsahern, yhs,
	2228598786, Zhongya Yan

When we wanted to trace the use of the `tcp_drop(struct sock *sk, struct sk_buff *skb)` function, we didn't know why `tcp` was deleting `skb'. To solve this problem, I updated the function `tcp_drop(struct sock *sk, struct sk_buff *skb, int field, const char *reason)`.
This way you can understand the reason for the deletion based on the prompt message.
`field`: represents the SNMP-related value
`reason`: represents the reason why `tcp` deleted the current `skb`, and contains some hints.
Of course, if you want to know more about the reason for updating the current function, you can check: https://www.brendangregg.com/blog/2018-05-31/linux-tcpdrop.html

Signed-off-by: Zhongya Yan <yan2228598786@gmail.com>
---
 include/trace/events/tcp.h |  62 ++++++++++++++++++
 net/ipv4/tcp_input.c       | 126 +++++++++++++++++++++++--------------
 2 files changed, 142 insertions(+), 46 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 521059d8dc0a..68bbe8741ce8 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -371,6 +371,68 @@ DEFINE_EVENT(tcp_event_skb, tcp_bad_csum,
 	TP_ARGS(skb)
 );
 
+TRACE_EVENT(tcp_drop,
+		TP_PROTO(struct sock *sk, struct sk_buff *skb, int field, const char *reason),
+
+		TP_ARGS(sk, skb, field, reason),
+
+		TP_STRUCT__entry(
+			__array(__u8, saddr, sizeof(struct sockaddr_in6))
+			__array(__u8, daddr, sizeof(struct sockaddr_in6))
+			__field(__u16, sport)
+			__field(__u16, dport)
+			__field(__u32, mark)
+			__field(__u16, data_len)
+			__field(__u32, snd_nxt)
+			__field(__u32, snd_una)
+			__field(__u32, snd_cwnd)
+			__field(__u32, ssthresh)
+			__field(__u32, snd_wnd)
+			__field(__u32, srtt)
+			__field(__u32, rcv_wnd)
+			__field(__u64, sock_cookie)
+			__field(int, field)
+			__string(reason, reason)
+			),
+
+		TP_fast_assign(
+				const struct tcphdr *th = (const struct tcphdr *)skb->data;
+				const struct inet_sock *inet = inet_sk(sk);
+				const struct tcp_sock *tp = tcp_sk(sk);
+
+				memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+				memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+				TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
+				__entry->sport = ntohs(inet->inet_sport);
+				__entry->dport = ntohs(inet->inet_dport);
+				__entry->mark = skb->mark;
+
+				__entry->data_len = skb->len - __tcp_hdrlen(th);
+				__entry->snd_nxt = tp->snd_nxt;
+				__entry->snd_una = tp->snd_una;
+				__entry->snd_cwnd = tp->snd_cwnd;
+				__entry->snd_wnd = tp->snd_wnd;
+				__entry->rcv_wnd = tp->rcv_wnd;
+				__entry->ssthresh = tcp_current_ssthresh(sk);
+				__entry->srtt = tp->srtt_us >> 3;
+				__entry->sock_cookie = sock_gen_cookie(sk);
+				__entry->field = field;
+
+				__assign_str(reason, reason);
+		),
+
+		TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x \
+				snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u \
+				sock_cookie=%llx field=%d reason=%s",
+				__entry->saddr, __entry->daddr, __entry->mark,
+				__entry->data_len, __entry->snd_nxt, __entry->snd_una,
+				__entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
+				__entry->srtt, __entry->rcv_wnd, __entry->sock_cookie,
+				__entry->field, __get_str(reason))
+);
+
 #endif /* _TRACE_TCP_H */
 
 /* This part must be outside protection */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f7bd7ae7d7a..1cebdcafb00f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4675,8 +4675,10 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
 	return res;
 }
 
-static void tcp_drop(struct sock *sk, struct sk_buff *skb)
+static void tcp_drop(struct sock *sk, struct sk_buff *skb,
+		int field, const char *reason)
 {
+	trace_tcp_drop(sk, skb, field, reason);
 	sk_drops_add(sk, skb);
 	__kfree_skb(skb);
 }
@@ -4708,7 +4710,7 @@ static void tcp_ofo_queue(struct sock *sk)
 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
 
 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
-			tcp_drop(sk, skb);
+			tcp_drop(sk, skb, LINUX_MIB_TCPOFOQUEUE, "TCP queue error");
 			continue;
 		}
 
@@ -4764,7 +4766,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
 		sk->sk_data_ready(sk);
-		tcp_drop(sk, skb);
+		tcp_drop(sk, skb, LINUX_MIB_TCPOFODROP, "TCP rmem failed");
 		return;
 	}
 
@@ -4827,7 +4829,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 				/* All the bits are present. Drop. */
 				NET_INC_STATS(sock_net(sk),
 					      LINUX_MIB_TCPOFOMERGE);
-				tcp_drop(sk, skb);
+				tcp_drop(sk, skb, LINUX_MIB_TCPOFOMERGE, "TCP bits are present");
 				skb = NULL;
 				tcp_dsack_set(sk, seq, end_seq);
 				goto add_sack;
@@ -4846,7 +4848,9 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 						 TCP_SKB_CB(skb1)->end_seq);
 				NET_INC_STATS(sock_net(sk),
 					      LINUX_MIB_TCPOFOMERGE);
-				tcp_drop(sk, skb1);
+				tcp_drop(sk, skb1, LINUX_MIB_TCPOFOMERGE,
+						"TCP replace(skb.seq eq skb1.seq)");
+
 				goto merge_right;
 			}
 		} else if (tcp_ooo_try_coalesce(sk, skb1,
@@ -4874,7 +4878,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
 				 TCP_SKB_CB(skb1)->end_seq);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
-		tcp_drop(sk, skb1);
+		tcp_drop(sk, skb1, LINUX_MIB_TCPOFOMERGE, "TCP useless other segments");
 	}
 	/* If there is no skb after us, we are the last_skb ! */
 	if (!skb1)
@@ -5010,7 +5014,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
 			sk->sk_data_ready(sk);
-			goto drop;
+			tcp_drop(sk, skb, LINUX_MIB_TCPRCVQDROP, "TCP rmem failed");
+			goto end;
 		}
 
 		eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5050,8 +5055,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 out_of_window:
 		tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 		inet_csk_schedule_ack(sk);
-drop:
-		tcp_drop(sk, skb);
+		tcp_drop(sk, skb, LINUX_MIB_TCPZEROWINDOWDROP, "TCP out of order or zero window");
+end:
 		return;
 	}
 
@@ -5308,7 +5313,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
 		prev = rb_prev(node);
 		rb_erase(node, &tp->out_of_order_queue);
 		goal -= rb_to_skb(node)->truesize;
-		tcp_drop(sk, rb_to_skb(node));
+		tcp_drop(sk, rb_to_skb(node), LINUX_MIB_OFOPRUNED, "TCP drop out-of-order queue");
 		if (!prev || goal <= 0) {
 			sk_mem_reclaim(sk);
 			if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
@@ -5643,7 +5648,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 						  LINUX_MIB_TCPACKSKIPPEDPAWS,
 						  &tp->last_oow_ack_time))
 				tcp_send_dupack(sk, skb);
-			goto discard;
+			tcp_drop(sk, skb, LINUX_MIB_PAWSESTABREJECTED, "TCP PAWS seq first");
+			goto end;
 		}
 		/* Reset is accepted even if it did not pass PAWS. */
 	}
@@ -5666,7 +5672,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		} else if (tcp_reset_check(sk, skb)) {
 			tcp_reset(sk, skb);
 		}
-		goto discard;
+		tcp_drop(sk, skb, LINUX_MIB_PAWSESTABREJECTED, "TCP check sequence number");
+		goto end;
 	}
 
 	/* Step 2: check RST bit */
@@ -5711,7 +5718,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 				tcp_fastopen_active_disable(sk);
 			tcp_send_challenge_ack(sk, skb);
 		}
-		goto discard;
+		tcp_drop(sk, skb, LINUX_MIB_TCPCHALLENGEACK, "TCP check RST bit ");
+		goto end;
 	}
 
 	/* step 3: check security and precedence [ignored] */
@@ -5725,15 +5733,15 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 			TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
 		tcp_send_challenge_ack(sk, skb);
-		goto discard;
+		tcp_drop(sk, skb, LINUX_MIB_TCPSYNCHALLENGE, "TCP check for a SYN");
+		goto end;
 	}
 
 	bpf_skops_parse_hdr(sk, skb);
 
 	return true;
 
-discard:
-	tcp_drop(sk, skb);
+end:
 	return false;
 }
 
@@ -5851,7 +5859,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 				return;
 			} else { /* Header too small */
 				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-				goto discard;
+				tcp_drop(sk, skb, TCP_MIB_INERRS, "TCP header too small");
+				goto end;
 			}
 		} else {
 			int eaten = 0;
@@ -5905,8 +5914,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
 		goto csum_error;
 
-	if (!th->ack && !th->rst && !th->syn)
-		goto discard;
+	if (!th->ack && !th->rst && !th->syn) {
+		tcp_drop(sk, skb, LINUX_MIB_TCPSLOWSTARTRETRANS, "TCP state not in ack|rst|syn");
+		goto end;
+	}
 
 	/*
 	 *	Standard slow path.
@@ -5916,8 +5927,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 		return;
 
 step5:
-	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
-		goto discard;
+	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) {
+		tcp_drop(sk, skb, LINUX_MIB_TCPSACKDISCARD, "TCP ack have not sent yet");
+		goto end;
+	}
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
 
@@ -5935,9 +5948,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 	trace_tcp_bad_csum(skb);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+	tcp_drop(sk, skb, TCP_MIB_CSUMERRORS, "TCP csum error");
 
-discard:
-	tcp_drop(sk, skb);
+end:
+	return;
 }
 EXPORT_SYMBOL(tcp_rcv_established);
 
@@ -6137,7 +6151,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (th->rst) {
 			tcp_reset(sk, skb);
-			goto discard;
+			tcp_drop(sk, skb, LINUX_MIB_NUM, "TCP reset");
+			goto end;
 		}
 
 		/* rfc793:
@@ -6226,9 +6241,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
+			tcp_drop(sk, skb, LINUX_MIB_TCPFASTOPENACTIVE, "TCP fast open ack error");
 
-discard:
-			tcp_drop(sk, skb);
+end:
 			return 0;
 		} else {
 			tcp_send_ack(sk);
@@ -6301,7 +6316,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 */
 		return -1;
 #else
-		goto discard;
+		tcp_drop(sk, skb, LINUX_MIB_SYNCOOKIESRECV, "TCP syn received error");
+		goto end;
 #endif
 	}
 	/* "fifth, if neither of the SYN or RST bits is set then
@@ -6311,7 +6327,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 discard_and_undo:
 	tcp_clear_options(&tp->rx_opt);
 	tp->rx_opt.mss_clamp = saved_clamp;
-	goto discard;
+	tcp_drop(sk, skb, LINUX_MIB_TCPSACKDISCARD, "TCP not neither of SYN or RST");
+	goto end;
 
 reset_and_undo:
 	tcp_clear_options(&tp->rx_opt);
@@ -6369,18 +6386,23 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 
 	switch (sk->sk_state) {
 	case TCP_CLOSE:
-		goto discard;
+		tcp_drop(sk, skb, LINUX_MIB_TCPABORTONCLOSE, "TCP close");
+		goto end;
 
 	case TCP_LISTEN:
 		if (th->ack)
 			return 1;
 
-		if (th->rst)
-			goto discard;
+		if (th->rst) {
+			tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "TCP rst");
+			goto end;
+		}
 
 		if (th->syn) {
-			if (th->fin)
-				goto discard;
+			if (th->fin) {
+				tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "TCP fin");
+				goto end;
+			}
 			/* It is possible that we process SYN packets from backlog,
 			 * so we need to make sure to disable BH and RCU right there.
 			 */
@@ -6395,7 +6417,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			consume_skb(skb);
 			return 0;
 		}
-		goto discard;
+		tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "TCP syn");
+		goto end;
 
 	case TCP_SYN_SENT:
 		tp->rx_opt.saw_tstamp = 0;
@@ -6421,12 +6444,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 		    sk->sk_state != TCP_FIN_WAIT1);
 
-		if (!tcp_check_req(sk, skb, req, true, &req_stolen))
-			goto discard;
+		if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
+			tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "TCP check req error");
+			goto end;
+		}
 	}
 
-	if (!th->ack && !th->rst && !th->syn)
-		goto discard;
+	if (!th->ack && !th->rst && !th->syn) {
+		tcp_drop(sk, skb, LINUX_MIB_LISTENDROPS, "TCP not ack|rst|syn");
+		goto end;
+	}
 
 	if (!tcp_validate_incoming(sk, skb, th, 0))
 		return 0;
@@ -6440,7 +6467,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (sk->sk_state == TCP_SYN_RECV)
 			return 1;	/* send one RST */
 		tcp_send_challenge_ack(sk, skb);
-		goto discard;
+		tcp_drop(sk, skb, LINUX_MIB_TCPCHALLENGEACK, "TCP check ack failed");
+		goto end;
 	}
 	switch (sk->sk_state) {
 	case TCP_SYN_RECV:
@@ -6533,7 +6561,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			inet_csk_reset_keepalive_timer(sk, tmo);
 		} else {
 			tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
-			goto discard;
+			tcp_drop(sk, skb, LINUX_MIB_TCPABORTONDATA, "TCP fin wait2");
+			goto end;
 		}
 		break;
 	}
@@ -6541,7 +6570,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	case TCP_CLOSING:
 		if (tp->snd_una == tp->write_seq) {
 			tcp_time_wait(sk, TCP_TIME_WAIT, 0);
-			goto discard;
+			tcp_drop(sk, skb, LINUX_MIB_TIMEWAITED, "TCP time wait");
+			goto end;
 		}
 		break;
 
@@ -6549,7 +6579,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (tp->snd_una == tp->write_seq) {
 			tcp_update_metrics(sk);
 			tcp_done(sk);
-			goto discard;
+			tcp_drop(sk, skb, LINUX_MIB_TCPPUREACKS, "TCP last ack");
+			goto end;
 		}
 		break;
 	}
@@ -6566,8 +6597,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			/* If a subflow has been reset, the packet should not
 			 * continue to be processed, drop the packet.
 			 */
-			if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
-				goto discard;
+			if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
+				tcp_drop(sk, skb, LINUX_MIB_TCPPUREACKS, "TCP subflow been reset");
+				goto end;
+			}
 			break;
 		}
 		fallthrough;
@@ -6599,9 +6632,10 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (!queued) {
-discard:
-		tcp_drop(sk, skb);
+		tcp_drop(sk, skb, LINUX_MIB_TCPOFOQUEUE, "TCP rcv synsent state process");
 	}
+
+end:
 	return 0;
 }
 EXPORT_SYMBOL(tcp_rcv_state_process);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tcp: tcp_drop adds `SNMP` and `reason` parameter for tracing
  2021-09-14 14:35 [PATCH] tcp: tcp_drop adds `SNMP` and `reason` parameter for tracing Zhongya Yan
@ 2021-09-20 16:54 ` Cong Wang
  2021-09-20 17:15   ` Steven Rostedt
  0 siblings, 1 reply; 6+ messages in thread
From: Cong Wang @ 2021-09-20 16:54 UTC (permalink / raw)
  To: Zhongya Yan
  Cc: Eric Dumazet, Steven Rostedt, Brendan Gregg,
	Linux Kernel Network Developers, LKML, Jakub Kicinski,
	Ingo Molnar, David Miller, Hideaki YOSHIFUJI, dsahern,
	Yonghong Song, 2228598786

On Tue, Sep 14, 2021 at 7:38 AM Zhongya Yan <yan2228598786@gmail.com> wrote:
>
> When we wanted to trace the use of the `tcp_drop(struct sock *sk, struct sk_buff *skb)` function, we didn't know why `tcp` was deleting `skb'. To solve this problem, I updated the function `tcp_drop(struct sock *sk, struct sk_buff *skb, int field, const char *reason)`.
> This way you can understand the reason for the deletion based on the prompt message.
> `field`: represents the SNMP-related value
> `reason`: represents the reason why `tcp` deleted the current `skb`, and contains some hints.
> Of course, if you want to know more about the reason for updating the current function, you can check: https://www.brendangregg.com/blog/2018-05-31/linux-tcpdrop.html

I think you fail to explain why only TCP needs it? This should
be useful for all kinds of drops, not just TCP, therefore you should
consider extending net/core/drop_monitor.c instead of just tcp_drop().

Also, kernel does not have to explain it in strings, those SNMP
counters are already available for user-space, so kernel could
just use SNMP enums and let user-space interpret them. In many
cases, you are just adding strings for those SNMP enums.

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tcp: tcp_drop adds `SNMP` and `reason` parameter for tracing
  2021-09-20 16:54 ` Cong Wang
@ 2021-09-20 17:15   ` Steven Rostedt
  2021-09-20 17:20     ` Cong Wang
  0 siblings, 1 reply; 6+ messages in thread
From: Steven Rostedt @ 2021-09-20 17:15 UTC (permalink / raw)
  To: Cong Wang
  Cc: Zhongya Yan, Eric Dumazet, Brendan Gregg,
	Linux Kernel Network Developers, LKML, Jakub Kicinski,
	Ingo Molnar, David Miller, Hideaki YOSHIFUJI, dsahern,
	Yonghong Song, 2228598786

On Mon, 20 Sep 2021 09:54:02 -0700
Cong Wang <xiyou.wangcong@gmail.com> wrote:

> Also, kernel does not have to explain it in strings, those SNMP
> counters are already available for user-space, so kernel could
> just use SNMP enums and let user-space interpret them. In many
> cases, you are just adding strings for those SNMP enums.

The strings were requested by the networking maintainers.

  https://lore.kernel.org/all/CANn89iJO8jzjFWvJ610TPmKDE8WKi8ojTr_HWXLz5g=4pdQHEA@mail.gmail.com/

-- Steve

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tcp: tcp_drop adds `SNMP` and `reason` parameter for tracing
  2021-09-20 17:15   ` Steven Rostedt
@ 2021-09-20 17:20     ` Cong Wang
  2021-09-20 17:24       ` Steven Rostedt
  0 siblings, 1 reply; 6+ messages in thread
From: Cong Wang @ 2021-09-20 17:20 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Zhongya Yan, Eric Dumazet, Brendan Gregg,
	Linux Kernel Network Developers, LKML, Jakub Kicinski,
	Ingo Molnar, David Miller, Hideaki YOSHIFUJI, David Ahern,
	Yonghong Song, Zhongya Yan

On Mon, Sep 20, 2021 at 10:15 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Mon, 20 Sep 2021 09:54:02 -0700
> Cong Wang <xiyou.wangcong@gmail.com> wrote:
>
> > Also, kernel does not have to explain it in strings, those SNMP
> > counters are already available for user-space, so kernel could
> > just use SNMP enums and let user-space interpret them. In many
> > cases, you are just adding strings for those SNMP enums.
>
> The strings were requested by the networking maintainers.
>
>   https://lore.kernel.org/all/CANn89iJO8jzjFWvJ610TPmKDE8WKi8ojTr_HWXLz5g=4pdQHEA@mail.gmail.com/

I think you misunderstand my point. Eric's point is hex address
vs. string, which I never disagree. With SNMP enum, user-space
can easily interpret it to string too, so at the end you still get strings
but not from kernel. This would at least save a handful of strings
from vmlinux, especially if we expand it beyond TCP.

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tcp: tcp_drop adds `SNMP` and `reason` parameter for tracing
  2021-09-20 17:20     ` Cong Wang
@ 2021-09-20 17:24       ` Steven Rostedt
  0 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2021-09-20 17:24 UTC (permalink / raw)
  To: Cong Wang
  Cc: Zhongya Yan, Eric Dumazet, Brendan Gregg,
	Linux Kernel Network Developers, LKML, Jakub Kicinski,
	Ingo Molnar, David Miller, Hideaki YOSHIFUJI, David Ahern,
	Yonghong Song, Zhongya Yan

On Mon, 20 Sep 2021 10:20:33 -0700
Cong Wang <xiyou.wangcong@gmail.com> wrote:

> > The strings were requested by the networking maintainers.
> >
> >   https://lore.kernel.org/all/CANn89iJO8jzjFWvJ610TPmKDE8WKi8ojTr_HWXLz5g=4pdQHEA@mail.gmail.com/  
> 
> I think you misunderstand my point. Eric's point is hex address
> vs. string, which I never disagree. With SNMP enum, user-space
> can easily interpret it to string too, so at the end you still get strings
> but not from kernel. This would at least save a handful of strings
> from vmlinux, especially if we expand it beyond TCP.

If an enum is saved in the ring buffer, it is also trivial to have it
show a string in the buffer, as there's a way to map enums to strings
on output as well. Which I discussed in that same thread.

-- Steve

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] tcp: tcp_drop adds `SNMP` and `reason` parameter for tracing
@ 2021-09-23  5:31 Zhongya Yan
  0 siblings, 0 replies; 6+ messages in thread
From: Zhongya Yan @ 2021-09-23  5:31 UTC (permalink / raw)
  To: Cong Wang
  Cc: Eric Dumazet, Steven Rostedt, Brendan Gregg, netdev, LKML,
	Jakub Kicinski, Ingo Molnar, David Miller, Hideaki YOSHIFUJI,
	David Ahern, Yonghong Song, Zhongya Yan

On Wed, 22 Sep 2021 23:08:15 -0400

Cong Wang <xiyou.wangcong@gmail.com> wrote:

> I think you fail to explain why only TCP needs it?

Thanks for the suggestion, I will rewrite the description document.

> This should be useful for all kinds of drops, not just TCP, therefore
> you should consider extending net/core/drop_monitor.c instead of
> just tcp_drop().

Currently, I'll look at the code in `net/core/drop_monitor.c` later

 because it only extends tcp.

> Also, kernel does not have to explain it in strings, those SNMP
> counters are already available for user-space, so kernel could
> just use SNMP enums and let user-space interpret them. In many
> cases, you are just adding strings for those SNMP enums.
>
> Thanks.

Since these drops are hardly hot path, why not simply use a string ?
An ENUM will not really help grep games.
--Eric Dumazet

I think `Eric`s point is correct, so strings are used. SNMP is auxiliary,

I can remove it if you find it unfriendly

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-09-23  5:31 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-14 14:35 [PATCH] tcp: tcp_drop adds `SNMP` and `reason` parameter for tracing Zhongya Yan
2021-09-20 16:54 ` Cong Wang
2021-09-20 17:15   ` Steven Rostedt
2021-09-20 17:20     ` Cong Wang
2021-09-20 17:24       ` Steven Rostedt
2021-09-23  5:31 Zhongya Yan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).