LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH v2 net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
@ 2018-04-17 16:36 Yafang Shao
  2018-04-17 17:27 ` Eric Dumazet
  2018-04-17 17:38 ` Song Liu
  0 siblings, 2 replies; 5+ messages in thread
From: Yafang Shao @ 2018-04-17 16:36 UTC (permalink / raw)
  To: eric.dumazet, davem
  Cc: kuznet, yoshfuji, songliubraving, netdev, linux-kernel, Yafang Shao

tcp_rcv_space_adjust is called every time data is copied to user space,
introducing a tcp tracepoint for which could show us when the packet is
copied to user.
This could help us figure out whether there's latency in user process.

When a tcp packet arrives, tcp_rcv_established() will be called and with
the existed tracepoint tcp_probe we could get the time when this packet
arrives.
Then this packet will be copied to user, and tcp_rcv_space_adjust will
be called and with this new introduced tracepoint we could get the time
when this packet is copied to user.

        arrives time : user process time    => latency caused by user
        tcp_probe      tcp_rcv_space_adjust

Hence in the printk message, sk_cookie is printed as a key to relate
tcp_rcv_space_adjust with tcp_probe.

Maybe we could export sockfd in this new tracepoint as well, then we
could relate this new tracepoint with epoll/read/recv* tracepoints, and
finally that could show us the whole lifespan of this packet. But we
could also implement that with pid as these functions are executed in
process context.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

---
v1 -> v2: use sk_cookie as key suggested by Eric.
---
 include/trace/events/tcp.h | 33 +++++++++++++++++++++++++++------
 net/ipv4/tcp_input.c       |  2 ++
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 3dd6802..814f754 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -10,6 +10,7 @@
 #include <linux/tracepoint.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
+#include <linux/sock_diag.h>
 
 #define TP_STORE_V4MAPPED(__entry, saddr, daddr)		\
 	do {							\
@@ -125,6 +126,7 @@
 		__array(__u8, daddr, 4)
 		__array(__u8, saddr_v6, 16)
 		__array(__u8, daddr_v6, 16)
+		__field(__u64, sock_cookie)
 	),
 
 	TP_fast_assign(
@@ -144,12 +146,24 @@
 
 		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
 			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
+
+		/*
+		 * sk_cookie is used to identify a socket, with which we could
+		 * relate this tracepoint with other tracepoints,
+		 * i.e. tcp_probe.
+		 * If we needn't this relation, then sk_cookie is useless;
+		 * if we need this relation, then tcp_probe is already set,
+		 * and sk_cookie is already set in tcp_probe, so we could get
+		 * the value directly.
+		 */
+		__entry->sock_cookie = atomic64_read(&sk->sk_cookie);
 	),
 
-	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llu",
 		  __entry->sport, __entry->dport,
 		  __entry->saddr, __entry->daddr,
-		  __entry->saddr_v6, __entry->daddr_v6)
+		  __entry->saddr_v6, __entry->daddr_v6,
+		  __entry->sock_cookie)
 );
 
 DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
@@ -166,6 +180,13 @@
 	TP_ARGS(sk)
 );
 
+DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
+
+	TP_PROTO(const struct sock *sk),
+
+	TP_ARGS(sk)
+);
+
 TRACE_EVENT(tcp_retransmit_synack,
 
 	TP_PROTO(const struct sock *sk, const struct request_sock *req),
@@ -232,6 +253,7 @@
 		__field(__u32, snd_wnd)
 		__field(__u32, srtt)
 		__field(__u32, rcv_wnd)
+		__field(__u64, sock_cookie)
 	),
 
 	TP_fast_assign(
@@ -256,15 +278,14 @@
 		__entry->rcv_wnd = tp->rcv_wnd;
 		__entry->ssthresh = tcp_current_ssthresh(sk);
 		__entry->srtt = tp->srtt_us >> 3;
+		__entry->sock_cookie = sock_gen_cookie(sk);
 	),
 
-	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
-		  "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
-		  "rcv_wnd=%u",
+	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llu",
 		  __entry->saddr, __entry->daddr, __entry->mark,
 		  __entry->length, __entry->snd_nxt, __entry->snd_una,
 		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
-		  __entry->srtt, __entry->rcv_wnd)
+		  __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
 );
 
 #endif /* _TRACE_TCP_H */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f93687f..43ad468 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
 	u32 copied;
 	int time;
 
+	trace_tcp_rcv_space_adjust(sk);
+
 	tcp_mstamp_refresh(tp);
 	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-17 16:36 [PATCH v2 net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust Yafang Shao
@ 2018-04-17 17:27 ` Eric Dumazet
  2018-04-17 21:53   ` Yafang Shao
  2018-04-17 17:38 ` Song Liu
  1 sibling, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2018-04-17 17:27 UTC (permalink / raw)
  To: Yafang Shao, eric.dumazet, davem
  Cc: kuznet, yoshfuji, songliubraving, netdev, linux-kernel



On 04/17/2018 09:36 AM, Yafang Shao wrote:
> tcp_rcv_space_adjust is called every time data is copied to user space,
> introducing a tcp tracepoint for which could show us when the packet is
> copied to user.
> This could help us figure out whether there's latency in user process.
> 
> When a tcp packet arrives, tcp_rcv_established() will be called and with
> the existed tracepoint tcp_probe we could get the time when this packet
> arrives.
> Then this packet will be copied to user, and tcp_rcv_space_adjust will
> be called and with this new introduced tracepoint we could get the time
> when this packet is copied to user.
> 
>         arrives time : user process time    => latency caused by user
>         tcp_probe      tcp_rcv_space_adjust

Sorry, I could not parse these :/

> 
> Hence in the printk message, sk_cookie is printed as a key to relate
> tcp_rcv_space_adjust with tcp_probe.
> 
> Maybe we could export sockfd in this new tracepoint as well, then we
> could relate this new tracepoint with epoll/read/recv* tracepoints, and
> finally that could show us the whole lifespan of this packet. But we
> could also implement that with pid as these functions are executed in
> process context.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> 
> ---
> v1 -> v2: use sk_cookie as key suggested by Eric.
> ---
>  include/trace/events/tcp.h | 33 +++++++++++++++++++++++++++------
>  net/ipv4/tcp_input.c       |  2 ++
>  2 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 3dd6802..814f754 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -10,6 +10,7 @@
>  #include <linux/tracepoint.h>
>  #include <net/ipv6.h>
>  #include <net/tcp.h>
> +#include <linux/sock_diag.h>
>  
>  #define TP_STORE_V4MAPPED(__entry, saddr, daddr)		\
>  	do {							\
> @@ -125,6 +126,7 @@
>  		__array(__u8, daddr, 4)
>  		__array(__u8, saddr_v6, 16)
>  		__array(__u8, daddr_v6, 16)
> +		__field(__u64, sock_cookie)
>  	),
>  
>  	TP_fast_assign(
> @@ -144,12 +146,24 @@
>  
>  		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
>  			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
> +
> +		/*
> +		 * sk_cookie is used to identify a socket, with which we could
> +		 * relate this tracepoint with other tracepoints,
> +		 * i.e. tcp_probe.
> +		 * If we needn't this relation, then sk_cookie is useless;
> +		 * if we need this relation, then tcp_probe is already set,
> +		 * and sk_cookie is already set in tcp_probe, so we could get
> +		 * the value directly.
> +		 */
> +		__entry->sock_cookie = atomic64_read(&sk->sk_cookie);

Please scrap this comment and simply use the real thing.

	_entry->sock_cookie = sock_gen_cookie(sk);

We build generic events.

Being able to filter many TCP events on one socket cookie will be useful

If you worry about sock_gen_cookie(sk) being too expensive, then we can add an inline helper
for the fast path (when sk_cookie has been already set)

>  	),
>  
> -	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
> +	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llu",


iproute2/ss command uses hexadcimal output for socket cookie. Please use %llx for consistency.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-17 16:36 [PATCH v2 net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust Yafang Shao
  2018-04-17 17:27 ` Eric Dumazet
@ 2018-04-17 17:38 ` Song Liu
  2018-04-17 21:54   ` Yafang Shao
  1 sibling, 1 reply; 5+ messages in thread
From: Song Liu @ 2018-04-17 17:38 UTC (permalink / raw)
  To: Yafang Shao; +Cc: eric.dumazet, davem, kuznet, yoshfuji, netdev, linux-kernel



> On Apr 17, 2018, at 9:36 AM, Yafang Shao <laoar.shao@gmail.com> wrote:
> 
> tcp_rcv_space_adjust is called every time data is copied to user space,
> introducing a tcp tracepoint for which could show us when the packet is
> copied to user.
> This could help us figure out whether there's latency in user process.
> 
> When a tcp packet arrives, tcp_rcv_established() will be called and with
> the existed tracepoint tcp_probe we could get the time when this packet
> arrives.
> Then this packet will be copied to user, and tcp_rcv_space_adjust will
> be called and with this new introduced tracepoint we could get the time
> when this packet is copied to user.
> 
>        arrives time : user process time    => latency caused by user
>        tcp_probe      tcp_rcv_space_adjust
> 
> Hence in the printk message, sk_cookie is printed as a key to relate
> tcp_rcv_space_adjust with tcp_probe.
> 
> Maybe we could export sockfd in this new tracepoint as well, then we
> could relate this new tracepoint with epoll/read/recv* tracepoints, and
> finally that could show us the whole lifespan of this packet. But we
> could also implement that with pid as these functions are executed in
> process context.
> 
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> 
> ---
> v1 -> v2: use sk_cookie as key suggested by Eric.
> ---
> include/trace/events/tcp.h | 33 +++++++++++++++++++++++++++------
> net/ipv4/tcp_input.c       |  2 ++
> 2 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
> index 3dd6802..814f754 100644
> --- a/include/trace/events/tcp.h
> +++ b/include/trace/events/tcp.h
> @@ -10,6 +10,7 @@
> #include <linux/tracepoint.h>
> #include <net/ipv6.h>
> #include <net/tcp.h>
> +#include <linux/sock_diag.h>
> 
> #define TP_STORE_V4MAPPED(__entry, saddr, daddr)		\
> 	do {							\
> @@ -125,6 +126,7 @@
> 		__array(__u8, daddr, 4)
> 		__array(__u8, saddr_v6, 16)
> 		__array(__u8, daddr_v6, 16)
> +		__field(__u64, sock_cookie)
> 	),
> 
> 	TP_fast_assign(
> @@ -144,12 +146,24 @@
> 
> 		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
> 			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
> +
> +		/*
> +		 * sk_cookie is used to identify a socket, with which we could
> +		 * relate this tracepoint with other tracepoints,
> +		 * i.e. tcp_probe.
> +		 * If we needn't this relation, then sk_cookie is useless;
> +		 * if we need this relation, then tcp_probe is already set,
> +		 * and sk_cookie is already set in tcp_probe, so we could get
> +		 * the value directly.
> +		 */
> +		__entry->sock_cookie = atomic64_read(&sk->sk_cookie);
> 	),
> 
> -	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
> +	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llu",
> 		  __entry->sport, __entry->dport,
> 		  __entry->saddr, __entry->daddr,
> -		  __entry->saddr_v6, __entry->daddr_v6)
> +		  __entry->saddr_v6, __entry->daddr_v6,
> +		  __entry->sock_cookie)
> );
> 
> DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
> @@ -166,6 +180,13 @@
> 	TP_ARGS(sk)
> );
> 
> +DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
> +
> +	TP_PROTO(const struct sock *sk),
> +
> +	TP_ARGS(sk)
> +);
> +
> TRACE_EVENT(tcp_retransmit_synack,
> 
> 	TP_PROTO(const struct sock *sk, const struct request_sock *req),
> @@ -232,6 +253,7 @@
> 		__field(__u32, snd_wnd)
> 		__field(__u32, srtt)
> 		__field(__u32, rcv_wnd)
> +		__field(__u64, sock_cookie)
> 	),
> 
> 	TP_fast_assign(
> @@ -256,15 +278,14 @@
> 		__entry->rcv_wnd = tp->rcv_wnd;
> 		__entry->ssthresh = tcp_current_ssthresh(sk);
> 		__entry->srtt = tp->srtt_us >> 3;
> +		__entry->sock_cookie = sock_gen_cookie(sk);
> 	),
> 
> -	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
> -		  "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
> -		  "rcv_wnd=%u",
> +	TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llu",
> 		  __entry->saddr, __entry->daddr, __entry->mark,
> 		  __entry->length, __entry->snd_nxt, __entry->snd_una,
> 		  __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
> -		  __entry->srtt, __entry->rcv_wnd)
> +		  __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
> );
> 
> #endif /* _TRACE_TCP_H */
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index f93687f..43ad468 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
> 	u32 copied;
> 	int time;
> 
> +	trace_tcp_rcv_space_adjust(sk);
> +
> 	tcp_mstamp_refresh(tp);
> 	time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
> 	if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
> -- 
> 1.8.3.1
> 

If I understand this correctly, you can get all the information you need with
a kprobe on tcp_rcv_space_adjust(). Why is it necessary to introduce a new 
tracepoint? 

Thanks,
Song

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-17 17:27 ` Eric Dumazet
@ 2018-04-17 21:53   ` Yafang Shao
  0 siblings, 0 replies; 5+ messages in thread
From: Yafang Shao @ 2018-04-17 21:53 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Alexey Kuznetsov, yoshfuji, Song Liu, netdev, LKML

On Wed, Apr 18, 2018 at 1:27 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> On 04/17/2018 09:36 AM, Yafang Shao wrote:
>> tcp_rcv_space_adjust is called every time data is copied to user space,
>> introducing a tcp tracepoint for which could show us when the packet is
>> copied to user.
>> This could help us figure out whether there's latency in user process.
>>
>> When a tcp packet arrives, tcp_rcv_established() will be called and with
>> the existed tracepoint tcp_probe we could get the time when this packet
>> arrives.
>> Then this packet will be copied to user, and tcp_rcv_space_adjust will
>> be called and with this new introduced tracepoint we could get the time
>> when this packet is copied to user.
>>
>>         arrives time : user process time    => latency caused by user
>>         tcp_probe      tcp_rcv_space_adjust
>
> Sorry, I could not parse these :/
>

Sorry for the poor expression. Will improve it.
I mean with these two tracepoint we could calculate the latency that
if the user process can't process this packet immdiately.

>>
>> Hence in the printk message, sk_cookie is printed as a key to relate
>> tcp_rcv_space_adjust with tcp_probe.
>>
>> Maybe we could export sockfd in this new tracepoint as well, then we
>> could relate this new tracepoint with epoll/read/recv* tracepoints, and
>> finally that could show us the whole lifespan of this packet. But we
>> could also implement that with pid as these functions are executed in
>> process context.
>>
>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>>
>> ---
>> v1 -> v2: use sk_cookie as key suggested by Eric.
>> ---
>>  include/trace/events/tcp.h | 33 +++++++++++++++++++++++++++------
>>  net/ipv4/tcp_input.c       |  2 ++
>>  2 files changed, 29 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
>> index 3dd6802..814f754 100644
>> --- a/include/trace/events/tcp.h
>> +++ b/include/trace/events/tcp.h
>> @@ -10,6 +10,7 @@
>>  #include <linux/tracepoint.h>
>>  #include <net/ipv6.h>
>>  #include <net/tcp.h>
>> +#include <linux/sock_diag.h>
>>
>>  #define TP_STORE_V4MAPPED(__entry, saddr, daddr)             \
>>       do {                                                    \
>> @@ -125,6 +126,7 @@
>>               __array(__u8, daddr, 4)
>>               __array(__u8, saddr_v6, 16)
>>               __array(__u8, daddr_v6, 16)
>> +             __field(__u64, sock_cookie)
>>       ),
>>
>>       TP_fast_assign(
>> @@ -144,12 +146,24 @@
>>
>>               TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
>>                              sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
>> +
>> +             /*
>> +              * sk_cookie is used to identify a socket, with which we could
>> +              * relate this tracepoint with other tracepoints,
>> +              * i.e. tcp_probe.
>> +              * If we needn't this relation, then sk_cookie is useless;
>> +              * if we need this relation, then tcp_probe is already set,
>> +              * and sk_cookie is already set in tcp_probe, so we could get
>> +              * the value directly.
>> +              */
>> +             __entry->sock_cookie = atomic64_read(&sk->sk_cookie);
>
> Please scrap this comment and simply use the real thing.
>
>         _entry->sock_cookie = sock_gen_cookie(sk);
>
> We build generic events.
>
> Being able to filter many TCP events on one socket cookie will be useful
>
> If you worry about sock_gen_cookie(sk) being too expensive, then we can add an inline helper
> for the fast path (when sk_cookie has been already set)
>

Will improve.

>>       ),
>>
>> -     TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
>> +     TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llu",
>
>
> iproute2/ss command uses hexadcimal output for socket cookie. Please use %llx for consistency.
>

OK

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust
  2018-04-17 17:38 ` Song Liu
@ 2018-04-17 21:54   ` Yafang Shao
  0 siblings, 0 replies; 5+ messages in thread
From: Yafang Shao @ 2018-04-17 21:54 UTC (permalink / raw)
  To: Song Liu; +Cc: eric.dumazet, davem, kuznet, yoshfuji, netdev, linux-kernel

On Wed, Apr 18, 2018 at 1:38 AM, Song Liu <songliubraving@fb.com> wrote:
>
>
>> On Apr 17, 2018, at 9:36 AM, Yafang Shao <laoar.shao@gmail.com> wrote:
>>
>> tcp_rcv_space_adjust is called every time data is copied to user space,
>> introducing a tcp tracepoint for which could show us when the packet is
>> copied to user.
>> This could help us figure out whether there's latency in user process.
>>
>> When a tcp packet arrives, tcp_rcv_established() will be called and with
>> the existed tracepoint tcp_probe we could get the time when this packet
>> arrives.
>> Then this packet will be copied to user, and tcp_rcv_space_adjust will
>> be called and with this new introduced tracepoint we could get the time
>> when this packet is copied to user.
>>
>>        arrives time : user process time    => latency caused by user
>>        tcp_probe      tcp_rcv_space_adjust
>>
>> Hence in the printk message, sk_cookie is printed as a key to relate
>> tcp_rcv_space_adjust with tcp_probe.
>>
>> Maybe we could export sockfd in this new tracepoint as well, then we
>> could relate this new tracepoint with epoll/read/recv* tracepoints, and
>> finally that could show us the whole lifespan of this packet. But we
>> could also implement that with pid as these functions are executed in
>> process context.
>>
>> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
>>
>> ---
>> v1 -> v2: use sk_cookie as key suggested by Eric.
>> ---
>> include/trace/events/tcp.h | 33 +++++++++++++++++++++++++++------
>> net/ipv4/tcp_input.c       |  2 ++
>> 2 files changed, 29 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
>> index 3dd6802..814f754 100644
>> --- a/include/trace/events/tcp.h
>> +++ b/include/trace/events/tcp.h
>> @@ -10,6 +10,7 @@
>> #include <linux/tracepoint.h>
>> #include <net/ipv6.h>
>> #include <net/tcp.h>
>> +#include <linux/sock_diag.h>
>>
>> #define TP_STORE_V4MAPPED(__entry, saddr, daddr)              \
>>       do {                                                    \
>> @@ -125,6 +126,7 @@
>>               __array(__u8, daddr, 4)
>>               __array(__u8, saddr_v6, 16)
>>               __array(__u8, daddr_v6, 16)
>> +             __field(__u64, sock_cookie)
>>       ),
>>
>>       TP_fast_assign(
>> @@ -144,12 +146,24 @@
>>
>>               TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
>>                              sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
>> +
>> +             /*
>> +              * sk_cookie is used to identify a socket, with which we could
>> +              * relate this tracepoint with other tracepoints,
>> +              * i.e. tcp_probe.
>> +              * If we needn't this relation, then sk_cookie is useless;
>> +              * if we need this relation, then tcp_probe is already set,
>> +              * and sk_cookie is already set in tcp_probe, so we could get
>> +              * the value directly.
>> +              */
>> +             __entry->sock_cookie = atomic64_read(&sk->sk_cookie);
>>       ),
>>
>> -     TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
>> +     TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llu",
>>                 __entry->sport, __entry->dport,
>>                 __entry->saddr, __entry->daddr,
>> -               __entry->saddr_v6, __entry->daddr_v6)
>> +               __entry->saddr_v6, __entry->daddr_v6,
>> +               __entry->sock_cookie)
>> );
>>
>> DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
>> @@ -166,6 +180,13 @@
>>       TP_ARGS(sk)
>> );
>>
>> +DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust,
>> +
>> +     TP_PROTO(const struct sock *sk),
>> +
>> +     TP_ARGS(sk)
>> +);
>> +
>> TRACE_EVENT(tcp_retransmit_synack,
>>
>>       TP_PROTO(const struct sock *sk, const struct request_sock *req),
>> @@ -232,6 +253,7 @@
>>               __field(__u32, snd_wnd)
>>               __field(__u32, srtt)
>>               __field(__u32, rcv_wnd)
>> +             __field(__u64, sock_cookie)
>>       ),
>>
>>       TP_fast_assign(
>> @@ -256,15 +278,14 @@
>>               __entry->rcv_wnd = tp->rcv_wnd;
>>               __entry->ssthresh = tcp_current_ssthresh(sk);
>>               __entry->srtt = tp->srtt_us >> 3;
>> +             __entry->sock_cookie = sock_gen_cookie(sk);
>>       ),
>>
>> -     TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
>> -               "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
>> -               "rcv_wnd=%u",
>> +     TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llu",
>>                 __entry->saddr, __entry->daddr, __entry->mark,
>>                 __entry->length, __entry->snd_nxt, __entry->snd_una,
>>                 __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
>> -               __entry->srtt, __entry->rcv_wnd)
>> +               __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie)
>> );
>>
>> #endif /* _TRACE_TCP_H */
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index f93687f..43ad468 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
>>       u32 copied;
>>       int time;
>>
>> +     trace_tcp_rcv_space_adjust(sk);
>> +
>>       tcp_mstamp_refresh(tp);
>>       time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
>>       if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
>> --
>> 1.8.3.1
>>
>
> If I understand this correctly, you can get all the information you need with
> a kprobe on tcp_rcv_space_adjust(). Why is it necessary to introduce a new
> tracepoint?
>

Tracepoint is less expensive and more cnovinient, that is the same
reason why tcp_probe.c was removed and tcp_probe tracepoint was
introduced.

Thanks
Yafang

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-04-17 21:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-17 16:36 [PATCH v2 net-next] net: introduce a new tracepoint for tcp_rcv_space_adjust Yafang Shao
2018-04-17 17:27 ` Eric Dumazet
2018-04-17 21:53   ` Yafang Shao
2018-04-17 17:38 ` Song Liu
2018-04-17 21:54   ` Yafang Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).