LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Minchan Kim <minchan@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	linux-mm <linux-mm@kvack.org>,
	linux-api@vger.kernel.org, oleksandr@redhat.com,
	Suren Baghdasaryan <surenb@google.com>,
	Tim Murray <timmurray@google.com>,
	Daniel Colascione <dancol@google.com>,
	Sandeep Patil <sspatil@google.com>,
	Sonny Rao <sonnyrao@google.com>,
	Brian Geffon <bgeffon@google.com>, Michal Hocko <mhocko@suse.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Shakeel Butt <shakeelb@google.com>,
	John Dias <joaodias@google.com>,
	ktkhai@virtuozzo.com, christian.brauner@ubuntu.com,
	sjpark@amazon.de, Minchan Kim <minchan@kernel.org>
Subject: [PATCH v2 5/5] mm: support both pid and pidfd for process_madvise
Date: Thu, 16 Jan 2020 15:59:53 -0800	[thread overview]
Message-ID: <20200116235953.163318-6-minchan@kernel.org> (raw)
In-Reply-To: <20200116235953.163318-1-minchan@kernel.org>

There is a demand[1] to support pid as well pidfd for process_madvise
to reduce unncessary syscall to get pidfd if the user has control of
the targer process(ie, they could gaurantee the process is not gone
or pid is not reused. Or, it might be okay to give a hint to wrong
process).

This patch aims for supporting both options like waitid(2). So, the
syscall is currently,

	int process_madvise(int which, pid_t pid, void *addr,
		size_t length, int advise, unsigned long flag);

@which is actually idtype_t for userspace libray and currently,
it supports P_PID and P_PIDFD.

[1]  https://lore.kernel.org/linux-mm/9d849087-3359-c4ab-fbec-859e8186c509@virtuozzo.com/
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 include/linux/pid.h      |  1 +
 include/linux/syscalls.h |  3 ++-
 kernel/exit.c            | 17 -----------------
 kernel/pid.c             | 17 +++++++++++++++++
 mm/madvise.c             | 34 ++++++++++++++++++++++------------
 5 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 998ae7d24450..023d9c3a8edc 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -75,6 +75,7 @@ extern const struct file_operations pidfd_fops;
 struct file;
 
 extern struct pid *pidfd_pid(const struct file *file);
+extern struct pid *pidfd_get_pid(unsigned int fd);
 
 static inline struct pid *get_pid(struct pid *pid)
 {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1b58a11ff49f..27060e59db37 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -877,7 +877,8 @@ asmlinkage long sys_munlockall(void);
 asmlinkage long sys_mincore(unsigned long start, size_t len,
 				unsigned char __user * vec);
 asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
-asmlinkage long sys_process_madvise(int pidfd, unsigned long start,
+
+asmlinkage long sys_process_madvise(int which, pid_t pid, unsigned long start,
 			size_t len, int behavior, unsigned long flags);
 asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 			unsigned long prot, unsigned long pgoff,
diff --git a/kernel/exit.c b/kernel/exit.c
index bcbd59888e67..7698843b1411 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1466,23 +1466,6 @@ static long do_wait(struct wait_opts *wo)
 	return retval;
 }
 
-static struct pid *pidfd_get_pid(unsigned int fd)
-{
-	struct fd f;
-	struct pid *pid;
-
-	f = fdget(fd);
-	if (!f.file)
-		return ERR_PTR(-EBADF);
-
-	pid = pidfd_pid(f.file);
-	if (!IS_ERR(pid))
-		get_pid(pid);
-
-	fdput(f);
-	return pid;
-}
-
 static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 			  int options, struct rusage *ru)
 {
diff --git a/kernel/pid.c b/kernel/pid.c
index 2278e249141d..a41a89d5dad2 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -496,6 +496,23 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 	return idr_get_next(&ns->idr, &nr);
 }
 
+struct pid *pidfd_get_pid(unsigned int fd)
+{
+	struct fd f;
+	struct pid *pid;
+
+	f = fdget(fd);
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	pid = pidfd_pid(f.file);
+	if (!IS_ERR(pid))
+		get_pid(pid);
+
+	fdput(f);
+	return pid;
+}
+
 /**
  * pidfd_create() - Create a new pid file descriptor.
  *
diff --git a/mm/madvise.c b/mm/madvise.c
index 89557998d287..2ac62716e5b8 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1192,11 +1192,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 	return madvise_common(current, current->mm, start, len_in, behavior);
 }
 
-SYSCALL_DEFINE5(process_madvise, int, pidfd, unsigned long, start,
+SYSCALL_DEFINE6(process_madvise, int, which, pid_t, upid, unsigned long, start,
 		size_t, len_in, int, behavior, unsigned long, flags)
 {
 	int ret;
-	struct fd f;
 	struct pid *pid;
 	struct task_struct *task;
 	struct mm_struct *mm;
@@ -1207,20 +1206,31 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, unsigned long, start,
 	if (!process_madvise_behavior_valid(behavior))
 		return -EINVAL;
 
-	f = fdget(pidfd);
-	if (!f.file)
-		return -EBADF;
+	switch (which) {
+	case P_PID:
+		if (upid <= 0)
+			return -EINVAL;
+
+		pid = find_get_pid(upid);
+		if (!pid)
+			return -ESRCH;
+		break;
+	case P_PIDFD:
+		if (upid < 0)
+			return -EINVAL;
 
-	pid = pidfd_pid(f.file);
-	if (IS_ERR(pid)) {
-		ret = PTR_ERR(pid);
-		goto fdput;
+		pid = pidfd_get_pid(upid);
+		if (IS_ERR(pid))
+			return PTR_ERR(pid);
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	task = get_pid_task(pid, PIDTYPE_PID);
 	if (!task) {
 		ret = -ESRCH;
-		goto fdput;
+		goto put_pid;
 	}
 
 	mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
@@ -1233,7 +1243,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, unsigned long, start,
 	mmput(mm);
 release_task:
 	put_task_struct(task);
-fdput:
-	fdput(f);
+put_pid:
+	put_pid(pid);
 	return ret;
 }
-- 
2.25.0.rc1.283.g88dfdc4193-goog


      parent reply	other threads:[~2020-01-17  0:00 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-16 23:59 [PATCH v2 0/5] introduce memory hinting API for external process Minchan Kim
2020-01-16 23:59 ` [PATCH v2 1/5] mm: factor out madvise's core functionality Minchan Kim
2020-01-17 10:02   ` Kirill Tkhai
2020-01-17 18:14     ` Minchan Kim
2020-01-16 23:59 ` [PATCH v2 2/5] mm: introduce external memory hinting API Minchan Kim
2020-01-17 11:52   ` Michal Hocko
2020-01-17 15:58     ` Kirill A. Shutemov
2020-01-17 17:32       ` Minchan Kim
2020-01-17 21:26         ` Kirill A. Shutemov
2020-01-18  9:40           ` SeongJae Park
2020-01-19 16:14           ` sspatil
2020-01-20  7:58             ` Michal Hocko
2020-01-20 10:39               ` Kirill Tkhai
2020-01-21 18:32               ` Minchan Kim
2020-01-22  8:28                 ` Michal Hocko
2020-01-22  9:36                   ` SeongJae Park
2020-01-22 10:02                     ` Michal Hocko
2020-01-22 13:28                       ` SeongJae Park
2020-01-23  1:41                   ` Minchan Kim
2020-01-23  9:13                     ` Michal Hocko
2020-01-21 18:11           ` Minchan Kim
2020-01-22 10:44             ` Oleksandr Natalenko
2020-01-23  1:43               ` Minchan Kim
2020-01-23  7:29                 ` Oleksandr Natalenko
2020-01-17 17:25     ` Minchan Kim
2020-01-20  8:03       ` Michal Hocko
2020-01-20 10:24     ` Kirill Tkhai
2020-01-20 11:27       ` Michal Hocko
2020-01-20 12:39         ` Kirill A. Shutemov
2020-01-20 13:24           ` Michal Hocko
2020-01-20 14:21             ` Kirill A. Shutemov
2020-01-20 15:44               ` Michal Hocko
2020-01-21 18:43             ` Minchan Kim
2020-01-16 23:59 ` [PATCH v2 3/5] mm/madvise: employ mmget_still_valid for write lock Minchan Kim
2020-01-16 23:59 ` [PATCH v2 4/5] mm/madvise: allow KSM hints for remote API Minchan Kim
2020-01-17 10:13   ` Kirill Tkhai
2020-01-17 12:34     ` Oleksandr Natalenko
2020-01-21 17:45       ` Minchan Kim
2020-01-16 23:59 ` Minchan Kim [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200116235953.163318-6-minchan@kernel.org \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=bgeffon@google.com \
    --cc=christian.brauner@ubuntu.com \
    --cc=dancol@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=joaodias@google.com \
    --cc=ktkhai@virtuozzo.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=oleksandr@redhat.com \
    --cc=shakeelb@google.com \
    --cc=sjpark@amazon.de \
    --cc=sonnyrao@google.com \
    --cc=sspatil@google.com \
    --cc=surenb@google.com \
    --cc=timmurray@google.com \
    --subject='Re: [PATCH v2 5/5] mm: support both pid and pidfd for process_madvise' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).