LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Eric Dumazet <edumazet@google.com>
To: "David S . Miller" <davem@davemloft.net>
Cc: netdev <netdev@vger.kernel.org>,
	Andy Lutomirski <luto@kernel.org>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	linux-mm <linux-mm@kvack.org>,
	Ka-Cheong Poon <ka-cheong.poon@oracle.com>,
	Eric Dumazet <edumazet@google.com>,
	Eric Dumazet <eric.dumazet@gmail.com>
Subject: [PATCH v4 net-next 2/2] selftests: net: tcp_mmap must use TCP_ZEROCOPY_RECEIVE
Date: Fri, 27 Apr 2018 08:58:09 -0700	[thread overview]
Message-ID: <20180427155809.79094-3-edumazet@google.com> (raw)
In-Reply-To: <20180427155809.79094-1-edumazet@google.com>

After prior kernel change, mmap() on TCP socket only reserves VMA.

We have to use getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...)
to perform the transfert of pages from skbs in TCP receive queue into such VMA.

struct tcp_zerocopy_receive {
	__u64 address;		/* in: address of mapping */
	__u32 length;		/* in/out: number of bytes to map/mapped */
	__u32 recv_skip_hint;	/* out: amount of bytes to skip */
};

After a successful getsockopt(...TCP_ZEROCOPY_RECEIVE...), @length contains
number of bytes that were mapped, and @recv_skip_hint contains number of bytes
that should be read using conventional read()/recv()/recvmsg() system calls,
to skip a sequence of bytes that can not be mapped, because not properly page
aligned.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
---
 tools/testing/selftests/net/tcp_mmap.c | 64 +++++++++++++++-----------
 1 file changed, 37 insertions(+), 27 deletions(-)

diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index dea342fe6f4e88b5709d2ac37b2fc9a2a320bf44..77f762780199ff1f69f9f6b3f18e72deddb69f5e 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -76,9 +76,10 @@
 #include <time.h>
 #include <sys/time.h>
 #include <netinet/in.h>
-#include <netinet/tcp.h>
 #include <arpa/inet.h>
 #include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
 
 #ifndef MSG_ZEROCOPY
 #define MSG_ZEROCOPY    0x4000000
@@ -134,11 +135,12 @@ void hash_zone(void *zone, unsigned int length)
 void *child_thread(void *arg)
 {
 	unsigned long total_mmap = 0, total = 0;
+	struct tcp_zerocopy_receive zc;
 	unsigned long delta_usec;
 	int flags = MAP_SHARED;
 	struct timeval t0, t1;
 	char *buffer = NULL;
-	void *oaddr = NULL;
+	void *addr = NULL;
 	double throughput;
 	struct rusage ru;
 	int lu, fd;
@@ -153,41 +155,46 @@ void *child_thread(void *arg)
 		perror("malloc");
 		goto error;
 	}
+	if (zflg) {
+		addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+		if (addr == (void *)-1)
+			zflg = 0;
+	}
 	while (1) {
 		struct pollfd pfd = { .fd = fd, .events = POLLIN, };
 		int sub;
 
 		poll(&pfd, 1, 10000);
 		if (zflg) {
-			void *naddr;
+			socklen_t zc_len = sizeof(zc);
+			int res;
 
-			naddr = mmap(oaddr, chunk_size, PROT_READ, flags, fd, 0);
-			if (naddr == (void *)-1) {
-				if (errno == EAGAIN) {
-					/* That is if SO_RCVLOWAT is buggy */
-					usleep(1000);
-					continue;
-				}
-				if (errno == EINVAL) {
-					flags = MAP_SHARED;
-					oaddr = NULL;
-					goto fallback;
-				}
-				if (errno != EIO)
-					perror("mmap()");
+			zc.address = (__u64)addr;
+			zc.length = chunk_size;
+			zc.recv_skip_hint = 0;
+			res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+					 &zc, &zc_len);
+			if (res == -1)
 				break;
+
+			if (zc.length) {
+				assert(zc.length <= chunk_size);
+				total_mmap += zc.length;
+				if (xflg)
+					hash_zone(addr, zc.length);
+				total += zc.length;
 			}
-			total_mmap += chunk_size;
-			if (xflg)
-				hash_zone(naddr, chunk_size);
-			total += chunk_size;
-			if (!keepflag) {
-				flags |= MAP_FIXED;
-				oaddr = naddr;
+			if (zc.recv_skip_hint) {
+				assert(zc.recv_skip_hint <= chunk_size);
+				lu = read(fd, buffer, zc.recv_skip_hint);
+				if (lu > 0) {
+					if (xflg)
+						hash_zone(buffer, lu);
+					total += lu;
+				}
 			}
 			continue;
 		}
-fallback:
 		sub = 0;
 		while (sub < chunk_size) {
 			lu = read(fd, buffer + sub, chunk_size - sub);
@@ -228,6 +235,8 @@ void *child_thread(void *arg)
 error:
 	free(buffer);
 	close(fd);
+	if (zflg)
+		munmap(addr, chunk_size);
 	pthread_exit(0);
 }
 
@@ -371,7 +380,8 @@ int main(int argc, char *argv[])
 		setup_sockaddr(cfg_family, host, &listenaddr);
 
 		if (mss &&
-		    setsockopt(fdlisten, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+		    setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+			       &mss, sizeof(mss)) == -1) {
 			perror("setsockopt TCP_MAXSEG");
 			exit(1);
 		}
@@ -402,7 +412,7 @@ int main(int argc, char *argv[])
 	setup_sockaddr(cfg_family, host, &addr);
 
 	if (mss &&
-	    setsockopt(fd, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+	    setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
 		perror("setsockopt TCP_MAXSEG");
 		exit(1);
 	}
-- 
2.17.0.441.gb46fe60e1d-goog

  parent reply	other threads:[~2018-04-27 15:59 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-27 15:58 [PATCH v4 net-next 0/2] tcp: mmap: rework zerocopy receive Eric Dumazet
2018-04-27 15:58 ` [PATCH v4 net-next 1/2] tcp: add TCP_ZEROCOPY_RECEIVE support for " Eric Dumazet
2018-04-27 15:58 ` Eric Dumazet [this message]
2018-04-30  1:34 ` [PATCH v4 net-next 0/2] tcp: mmap: rework " David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180427155809.79094-3-edumazet@google.com \
    --to=edumazet@google.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=ka-cheong.poon@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=netdev@vger.kernel.org \
    --subject='Re: [PATCH v4 net-next 2/2] selftests: net: tcp_mmap must use TCP_ZEROCOPY_RECEIVE' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).