LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
@ 2018-04-16 15:33 Yafang Shao
  2018-04-16 15:43 ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Yafang Shao @ 2018-04-16 15:33 UTC (permalink / raw)
  To: davem; +Cc: kuznet, yoshfuji, songliubraving, netdev, linux-kernel, Yafang Shao

tcp_rcv_space_adjust is called every time data is copied to user space,
introducing a tcp tracepoint for which could show us when the packet is
copied to user.
This could help us figure out whether there's latency in user process.

When a tcp packet arrives, tcp_rcv_established() will be called and with
the existed tracepoint tcp_probe we could get the time when this packet
arrives.
Then this packet will be copied to user, and tcp_rcv_space_adjust will
be called and with this new introduced tracepoint we could get the time
when this packet is copied to user.

	arrives time : user process time    => latency caused by user
	tcp_probe      tcp_rcv_space_adjust

Hence in the prink message, sk is printed as a key to connect these two
tracepoints.

Maybe we could export sockfd in this new tracepoint as well, then we
could connect this new tracepoint with epoll/read/recv* tracepoint, and
finally that could show us the whole lifespan of this packet. But we
could also implement that with pid as these functions are executed in
process context.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/trace/events/tcp.h | 21 +++++++++++++++------
 net/ipv4/tcp_input.c       |  2 ++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 878b2be..65a6d22 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -146,10 +146,11 @@
 			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
-	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock=0x%p",
 		  __entry->sport, __entry->dport,
 		  __entry->saddr, __entry->daddr,
-		  __entry->saddr_v6, __entry->daddr_v6)
+		  __entry->saddr_v6, __entry->daddr_v6,
+		  __entry->skaddr)
 );
 
 DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
@@ -166,6 +167,13 @@
 	TP_ARGS(sk)
 );
 
+DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
+
+	TP_PROTO(const struct sock *sk),
+
+	TP_ARGS(sk)
+);
+
 TRACE_EVENT(tcp_set_state,
 
 	TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
@@ -265,6 +273,7 @@
 	TP_ARGS(sk, skb),
 
 	TP_STRUCT__entry(
+		__field(const void *, skaddr)
 		/* sockaddr_in6 is always bigger than sockaddr_in */
 		__array(__u8, saddr, sizeof(struct sockaddr_in6))
 		__array(__u8, daddr, sizeof(struct sockaddr_in6))
@@ -285,6 +294,8 @@
 		const struct tcp_sock *tp = tcp_sk(sk);
 		const struct inet_sock *inet = inet_sk(sk);
 
+		__entry->skaddr = sk;
+
 		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
 		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
 
@@ -305,13 +316,11 @@
 		__entry->srtt = tp->srtt_us >> 3;
 	),
 
-	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
-		  "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
-		  "rcv_wnd=%u",
+	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock=0x%p",
 		  __entry->saddr, __entry->daddr, __entry->mark,
 		  __entry->length, __entry->snd_nxt, __entry->snd_una,
 		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
-		  __entry->srtt, __entry->rcv_wnd)
+		  __entry->srtt, __entry->rcv_wnd, __entry->skaddr)
 );
 
 #endif /* _TRACE_TCP_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 367def6..4b4d6b9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	u32 copied;
 	int time;
 
+	trace_tcp_rcv_space_adjust(sk);
+
 	tcp_mstamp_refresh(tp);
 	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-16 15:33 [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust Yafang Shao
@ 2018-04-16 15:43 ` Eric Dumazet
  2018-04-16 16:03   ` Yafang Shao
  2018-04-17 23:44   ` Alexei Starovoitov
  0 siblings, 2 replies; 7+ messages in thread
From: Eric Dumazet @ 2018-04-16 15:43 UTC (permalink / raw)
  To: Yafang Shao, davem; +Cc: kuznet, yoshfuji, songliubraving, netdev, linux-kernel



On 04/16/2018 08:33 AM, Yafang Shao wrote:
> tcp_rcv_space_adjust is called every time data is copied to user space,
> introducing a tcp tracepoint for which could show us when the packet is
> copied to user.
> This could help us figure out whether there's latency in user process.
> 
> When a tcp packet arrives, tcp_rcv_established() will be called and with
> the existed tracepoint tcp_probe we could get the time when this packet
> arrives.
> Then this packet will be copied to user, and tcp_rcv_space_adjust will
> be called and with this new introduced tracepoint we could get the time
> when this packet is copied to user.
> 
> 	arrives time : user process time    => latency caused by user
> 	tcp_probe      tcp_rcv_space_adjust
> 
> Hence in the prink message, sk is printed as a key to connect these two
> tracepoints.
> 

socket pointer is not a key.

TCP sockets can be reused pretty fast after free.

I suggest you go for cookie instead, this is an unique 64bit identifier.
( sock_gen_cookie() for details )

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-16 15:43 ` Eric Dumazet
@ 2018-04-16 16:03   ` Yafang Shao
  2018-04-17 23:44   ` Alexei Starovoitov
  1 sibling, 0 replies; 7+ messages in thread
From: Yafang Shao @ 2018-04-16 16:03 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Alexey Kuznetsov, yoshfuji, Song Liu, netdev, LKML

On Mon, Apr 16, 2018 at 11:43 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> On 04/16/2018 08:33 AM, Yafang Shao wrote:
>> tcp_rcv_space_adjust is called every time data is copied to user space,
>> introducing a tcp tracepoint for which could show us when the packet is
>> copied to user.
>> This could help us figure out whether there's latency in user process.
>>
>> When a tcp packet arrives, tcp_rcv_established() will be called and with
>> the existed tracepoint tcp_probe we could get the time when this packet
>> arrives.
>> Then this packet will be copied to user, and tcp_rcv_space_adjust will
>> be called and with this new introduced tracepoint we could get the time
>> when this packet is copied to user.
>>
>>       arrives time : user process time    => latency caused by user
>>       tcp_probe      tcp_rcv_space_adjust
>>
>> Hence in the prink message, sk is printed as a key to connect these two
>> tracepoints.
>>
>
> socket pointer is not a key.
>
> TCP sockets can be reused pretty fast after free.
>
> I suggest you go for cookie instead, this is an unique 64bit identifier.
> ( sock_gen_cookie() for details )
>
>

Thanks for your explanation.
Will have a try.

Thanks
Yafang

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-16 15:43 ` Eric Dumazet
  2018-04-16 16:03   ` Yafang Shao
@ 2018-04-17 23:44   ` Alexei Starovoitov
  2018-04-18  4:42     ` Yafang Shao
  1 sibling, 1 reply; 7+ messages in thread
From: Alexei Starovoitov @ 2018-04-17 23:44 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Yafang Shao, davem, songliubraving, netdev, linux-kernel

On Mon, Apr 16, 2018 at 08:43:31AM -0700, Eric Dumazet wrote:
> 
> 
> On 04/16/2018 08:33 AM, Yafang Shao wrote:
> > tcp_rcv_space_adjust is called every time data is copied to user space,
> > introducing a tcp tracepoint for which could show us when the packet is
> > copied to user.
> > This could help us figure out whether there's latency in user process.
> > 
> > When a tcp packet arrives, tcp_rcv_established() will be called and with
> > the existed tracepoint tcp_probe we could get the time when this packet
> > arrives.
> > Then this packet will be copied to user, and tcp_rcv_space_adjust will
> > be called and with this new introduced tracepoint we could get the time
> > when this packet is copied to user.
> > 
> > 	arrives time : user process time    => latency caused by user
> > 	tcp_probe      tcp_rcv_space_adjust
> > 
> > Hence in the prink message, sk is printed as a key to connect these two
> > tracepoints.
> > 
> 
> socket pointer is not a key.
> 
> TCP sockets can be reused pretty fast after free.
> 
> I suggest you go for cookie instead, this is an unique 64bit identifier.
> ( sock_gen_cookie() for details )

I think would be even better if the stack would do this sock_gen_cookie()
on its own in some way that user cannnot infere the order.
In many cases we wanted to use socket cookie, but since it's not inited
by default it's kinda useless.
Turning this tracepoint on just to get cookie would be an ugly workaround.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-17 23:44   ` Alexei Starovoitov
@ 2018-04-18  4:42     ` Yafang Shao
  0 siblings, 0 replies; 7+ messages in thread
From: Yafang Shao @ 2018-04-18  4:42 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: Eric Dumazet, David Miller, Song Liu, netdev, LKML

On Wed, Apr 18, 2018 at 7:44 AM, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
> On Mon, Apr 16, 2018 at 08:43:31AM -0700, Eric Dumazet wrote:
>>
>>
>> On 04/16/2018 08:33 AM, Yafang Shao wrote:
>> > tcp_rcv_space_adjust is called every time data is copied to user space,
>> > introducing a tcp tracepoint for which could show us when the packet is
>> > copied to user.
>> > This could help us figure out whether there's latency in user process.
>> >
>> > When a tcp packet arrives, tcp_rcv_established() will be called and with
>> > the existed tracepoint tcp_probe we could get the time when this packet
>> > arrives.
>> > Then this packet will be copied to user, and tcp_rcv_space_adjust will
>> > be called and with this new introduced tracepoint we could get the time
>> > when this packet is copied to user.
>> >
>> >     arrives time : user process time    => latency caused by user
>> >     tcp_probe      tcp_rcv_space_adjust
>> >
>> > Hence in the prink message, sk is printed as a key to connect these two
>> > tracepoints.
>> >
>>
>> socket pointer is not a key.
>>
>> TCP sockets can be reused pretty fast after free.
>>
>> I suggest you go for cookie instead, this is an unique 64bit identifier.
>> ( sock_gen_cookie() for details )
>
> I think would be even better if the stack would do this sock_gen_cookie()
> on its own in some way that user cannnot infere the order.
> In many cases we wanted to use socket cookie, but since it's not inited
> by default it's kinda useless.
> Turning this tracepoint on just to get cookie would be an ugly workaround.
>

Could we init it in sk_alloc() ?
Then in other code paths, for example sock_getsockopt or tracepoints,
we only read the value through a new inline function named
sock_read_cookie().


Thanks
Yafang

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-14  9:48 Yafang Shao
@ 2018-04-14 19:47 ` David Miller
  0 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2018-04-14 19:47 UTC (permalink / raw)
  To: laoar.shao; +Cc: kuznet, yoshfuji, songliubraving, netdev, linux-kernel


The net-next tree is closed, please resubmit this when the merge window
ends and the net-next tree opens back up.

Thank you.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
@ 2018-04-14  9:48 Yafang Shao
  2018-04-14 19:47 ` David Miller
  0 siblings, 1 reply; 7+ messages in thread
From: Yafang Shao @ 2018-04-14  9:48 UTC (permalink / raw)
  To: davem, kuznet, yoshfuji, songliubraving; +Cc: netdev, linux-kernel, Yafang Shao

tcp_rcv_space_adjust is called every time data is copied to user space,
introducing a tcp tracepoint for which could show us when the packet is
copied to user.
This could help us figure out whether there's latency in user process.

When a tcp packet arrives, tcp_rcv_established() will be called and with
the existed tracepoint tcp_probe we could get the time when this packet
arrives.
Then this packet will be copied to user, and tcp_rcv_space_adjust will
be called and with this new introduced tracepoint we could get the time
when this packet is copied to user.

	arrives time : user process time    => latency caused by user
	tcp_probe      tcp_rcv_space_adjust

Hence in the prink message, sk is printed as a key to connect these two
tracepoints.

Maybe we could export sockfd in this new tracepoint as well, then we
could connect this new tracepoint with epoll/read/recv* tracepoint, and
finally that could show us the whole lifespan of this packet. But we
could also implement that with pid as these functions are executed in
process context.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/trace/events/tcp.h | 21 +++++++++++++++------
 net/ipv4/tcp_input.c       |  2 ++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 878b2be..65a6d22 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -146,10 +146,11 @@
 			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
 	),
 
-	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock=0x%p",
 		  __entry->sport, __entry->dport,
 		  __entry->saddr, __entry->daddr,
-		  __entry->saddr_v6, __entry->daddr_v6)
+		  __entry->saddr_v6, __entry->daddr_v6,
+		  __entry->skaddr)
 );
 
 DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
@@ -166,6 +167,13 @@
 	TP_ARGS(sk)
 );
 
+DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
+
+	TP_PROTO(const struct sock *sk),
+
+	TP_ARGS(sk)
+);
+
 TRACE_EVENT(tcp_set_state,
 
 	TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
@@ -265,6 +273,7 @@
 	TP_ARGS(sk, skb),
 
 	TP_STRUCT__entry(
+		__field(const void *, skaddr)
 		/* sockaddr_in6 is always bigger than sockaddr_in */
 		__array(__u8, saddr, sizeof(struct sockaddr_in6))
 		__array(__u8, daddr, sizeof(struct sockaddr_in6))
@@ -285,6 +294,8 @@
 		const struct tcp_sock *tp = tcp_sk(sk);
 		const struct inet_sock *inet = inet_sk(sk);
 
+		__entry->skaddr = sk;
+
 		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
 		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
 
@@ -305,13 +316,11 @@
 		__entry->srtt = tp->srtt_us >> 3;
 	),
 
-	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
-		  "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
-		  "rcv_wnd=%u",
+	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock=0x%p",
 		  __entry->saddr, __entry->daddr, __entry->mark,
 		  __entry->length, __entry->snd_nxt, __entry->snd_una,
 		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
-		  __entry->srtt, __entry->rcv_wnd)
+		  __entry->srtt, __entry->rcv_wnd, __entry->skaddr)
 );
 
 #endif /* _TRACE_TCP_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 367def6..4b4d6b9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	u32 copied;
 	int time;
 
+	trace_tcp_rcv_space_adjust(sk);
+
 	tcp_mstamp_refresh(tp);
 	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-04-18  4:42 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-16 15:33 [PATCH net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust Yafang Shao
2018-04-16 15:43 ` Eric Dumazet
2018-04-16 16:03   ` Yafang Shao
2018-04-17 23:44   ` Alexei Starovoitov
2018-04-18  4:42     ` Yafang Shao
  -- strict thread matches above, loose matches on Subject: below --
2018-04-14  9:48 Yafang Shao
2018-04-14 19:47 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).