LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH] mm: rcu-protected get_mm_exe_file()
@ 2015-03-16 13:12 Konstantin Khlebnikov
  2015-03-16 14:07 ` Oleg Nesterov
  0 siblings, 1 reply; 7+ messages in thread
From: Konstantin Khlebnikov @ 2015-03-16 13:12 UTC (permalink / raw)
  To: linux-mm, Andrew Morton, linux-kernel, Davidlohr Bueso
  Cc: Al Viro, Oleg Nesterov

This patch removes mm->mmap_sem from mm->exe_file read side.
Also it kills dup_mm_exe_file() and moves exe_file duplication into
dup_mmap() where both mmap_sems are locked.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c                |    3 +-
 include/linux/fs.h       |    1 +
 include/linux/mm_types.h |    2 +-
 kernel/fork.c            |   56 ++++++++++++++++++++++++++++++----------------
 4 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index ee738ea028fa..93c5f89c248b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -638,8 +638,7 @@ static struct file *__fget(unsigned int fd, fmode_t mask)
 	file = fcheck_files(files, fd);
 	if (file) {
 		/* File object ref couldn't be taken */
-		if ((file->f_mode & mask) ||
-		    !atomic_long_inc_not_zero(&file->f_count))
+		if ((file->f_mode & mask) || !get_file_rcu(file))
 			file = NULL;
 	}
 	rcu_read_unlock();
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1abc7f2a5730..29bc94cfa273 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -847,6 +847,7 @@ static inline struct file *get_file(struct file *f)
 	atomic_long_inc(&f->f_count);
 	return f;
 }
+#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
 #define fput_atomic(x)	atomic_long_add_unless(&(x)->f_count, -1, 1)
 #define file_count(x)	atomic_long_read(&(x)->f_count)
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 590630eb59ba..8d37e26a1007 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -429,7 +429,7 @@ struct mm_struct {
 #endif
 
 	/* store ref to file /proc/<pid>/exe symlink points to */
-	struct file *exe_file;
+	struct file __rcu *exe_file;
 #ifdef CONFIG_MMU_NOTIFIER
 	struct mmu_notifier_mm *mmu_notifier_mm;
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 2113001ceb38..a7c596517bd6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -380,6 +380,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	 */
 	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
+	/* No ordering required: file already has been exposed. */
+	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+
 	mm->total_vm = oldmm->total_vm;
 	mm->shared_vm = oldmm->shared_vm;
 	mm->exec_vm = oldmm->exec_vm;
@@ -505,7 +508,13 @@ static inline void mm_free_pgd(struct mm_struct *mm)
 	pgd_free(mm, mm->pgd);
 }
 #else
-#define dup_mmap(mm, oldmm)	(0)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	down_write(&oldmm->mmap_sem);
+	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+	up_write(&oldmm->mmap_sem);
+	return 0;
+}
 #define mm_alloc_pgd(mm)	(0)
 #define mm_free_pgd(mm)
 #endif /* CONFIG_MMU */
@@ -674,36 +683,47 @@ void mmput(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmput);
 
+/**
+ * set_mm_exe_file - change a reference to the mm's executable file
+ *
+ * This changes mm's executale file (shown as symlink /proc/[pid]/exe).
+ *
+ * Main users are mmput(), sys_execve() and sys_prctl(PR_SET_MM_MAP/EXE_FILE).
+ * Callers prevent concurrent invocations: in mmput() nobody alive left,
+ * in execve task is single-threaded, prctl holds mmap_sem exclusively.
+ */
 void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 {
+	struct file *old_exe_file = rcu_dereference_protected(mm->exe_file,
+			!atomic_read(&mm->mm_users) || current->in_execve ||
+			lock_is_held(&mm->mmap_sem));
+
 	if (new_exe_file)
 		get_file(new_exe_file);
-	if (mm->exe_file)
-		fput(mm->exe_file);
-	mm->exe_file = new_exe_file;
+	rcu_assign_pointer(mm->exe_file, new_exe_file);
+	if (old_exe_file)
+		fput(old_exe_file);
 }
 
+/**
+ * get_mm_exe_file - acquire a reference to the mm's executable file
+ *
+ * Returns %NULL if mm has no associated executable file.
+ * User must release file via fput().
+ */
 struct file *get_mm_exe_file(struct mm_struct *mm)
 {
 	struct file *exe_file;
 
-	/* We need mmap_sem to protect against races with removal of exe_file */
-	down_read(&mm->mmap_sem);
-	exe_file = mm->exe_file;
-	if (exe_file)
-		get_file(exe_file);
-	up_read(&mm->mmap_sem);
+	rcu_read_lock();
+	exe_file = rcu_dereference(mm->exe_file);
+	if (exe_file && !get_file_rcu(exe_file))
+		exe_file = NULL;
+	rcu_read_unlock();
 	return exe_file;
 }
 EXPORT_SYMBOL(get_mm_exe_file);
 
-static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
-{
-	/* It's safe to write the exe_file pointer without exe_file_lock because
-	 * this is called during fork when the task is not yet in /proc */
-	newmm->exe_file = get_mm_exe_file(oldmm);
-}
-
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
@@ -865,8 +885,6 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
 
-	dup_mm_exe_file(oldmm, mm);
-
 	err = dup_mmap(mm, oldmm);
 	if (err)
 		goto free_pt;


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: rcu-protected get_mm_exe_file()
  2015-03-16 13:12 [PATCH] mm: rcu-protected get_mm_exe_file() Konstantin Khlebnikov
@ 2015-03-16 14:07 ` Oleg Nesterov
  2015-03-16 14:50   ` Davidlohr Bueso
  2015-03-16 16:15   ` Konstantin Khlebnikov
  0 siblings, 2 replies; 7+ messages in thread
From: Oleg Nesterov @ 2015-03-16 14:07 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: linux-mm, Andrew Morton, linux-kernel, Davidlohr Bueso, Al Viro

On 03/16, Konstantin Khlebnikov wrote:
>
> +/**
> + * set_mm_exe_file - change a reference to the mm's executable file
> + *
> + * This changes mm's executale file (shown as symlink /proc/[pid]/exe).
> + *
> + * Main users are mmput(), sys_execve() and sys_prctl(PR_SET_MM_MAP/EXE_FILE).
> + * Callers prevent concurrent invocations: in mmput() nobody alive left,
> + * in execve task is single-threaded, prctl holds mmap_sem exclusively.
> + */
>  void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
>  {
> +	struct file *old_exe_file = rcu_dereference_protected(mm->exe_file,
> +			!atomic_read(&mm->mm_users) || current->in_execve ||
> +			lock_is_held(&mm->mmap_sem));
> +
>  	if (new_exe_file)
>  		get_file(new_exe_file);
> -	if (mm->exe_file)
> -		fput(mm->exe_file);
> -	mm->exe_file = new_exe_file;
> +	rcu_assign_pointer(mm->exe_file, new_exe_file);
> +	if (old_exe_file)
> +		fput(old_exe_file);
>  }

Yes, I think this is correct, __fput() does call_rcu(file_free_rcu). And
much better than the new lock ;)

Acked-by: Oleg Nesterov <oleg@redhat.com>



So I think the patch is fine, but personally I dislike the "prctl holds
mmap_sem exclusively" and rcu_dereference_protected().

I mean, I think we can do another cleanup on top of this change.

	1. set_mm_exe_file() should be called by exit/exec only, so
	   it should use

		rcu_dereference_protected(mm->exe_file,
					atomic_read(&mm->mm_users) <= 1);

	2. prctl() should not use it, it can do

	   get_file(new_exe);
	   old_exe = xchg(&mm->exe_file);
	   if (old_exe)
	   	fput(old_exe);

	3. and we can remove down_write(mmap_sem) from prctl paths.

	   Actually we can do this even without xchg() above, but we might
	   want to kill MMF_EXE_FILE_CHANGED and test_and_set_bit() check.

What do you think?

Oleg.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: rcu-protected get_mm_exe_file()
  2015-03-16 14:07 ` Oleg Nesterov
@ 2015-03-16 14:50   ` Davidlohr Bueso
  2015-03-16 16:18     ` Konstantin Khlebnikov
  2015-03-16 16:15   ` Konstantin Khlebnikov
  1 sibling, 1 reply; 7+ messages in thread
From: Davidlohr Bueso @ 2015-03-16 14:50 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Konstantin Khlebnikov, linux-mm, Andrew Morton, linux-kernel, Al Viro

On Mon, 2015-03-16 at 15:07 +0100, Oleg Nesterov wrote:
> 	3. and we can remove down_write(mmap_sem) from prctl paths.
> 
> 	   Actually we can do this even without xchg() above, but we might
> 	   want to kill MMF_EXE_FILE_CHANGED and test_and_set_bit() check.

Yeah I was waiting for security folks input about this, otherwise this
still doesn't do it for me as we still have to deal with mmap_sem.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: rcu-protected get_mm_exe_file()
  2015-03-16 14:07 ` Oleg Nesterov
  2015-03-16 14:50   ` Davidlohr Bueso
@ 2015-03-16 16:15   ` Konstantin Khlebnikov
  2015-03-16 16:35     ` Oleg Nesterov
  1 sibling, 1 reply; 7+ messages in thread
From: Konstantin Khlebnikov @ 2015-03-16 16:15 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: linux-mm, Andrew Morton, linux-kernel, Davidlohr Bueso, Al Viro

On 16.03.2015 17:07, Oleg Nesterov wrote:
> On 03/16, Konstantin Khlebnikov wrote:
>>
>> +/**
>> + * set_mm_exe_file - change a reference to the mm's executable file
>> + *
>> + * This changes mm's executale file (shown as symlink /proc/[pid]/exe).
>> + *
>> + * Main users are mmput(), sys_execve() and sys_prctl(PR_SET_MM_MAP/EXE_FILE).
>> + * Callers prevent concurrent invocations: in mmput() nobody alive left,
>> + * in execve task is single-threaded, prctl holds mmap_sem exclusively.
>> + */
>>   void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
>>   {
>> +	struct file *old_exe_file = rcu_dereference_protected(mm->exe_file,
>> +			!atomic_read(&mm->mm_users) || current->in_execve ||
>> +			lock_is_held(&mm->mmap_sem));
>> +
>>   	if (new_exe_file)
>>   		get_file(new_exe_file);
>> -	if (mm->exe_file)
>> -		fput(mm->exe_file);
>> -	mm->exe_file = new_exe_file;
>> +	rcu_assign_pointer(mm->exe_file, new_exe_file);
>> +	if (old_exe_file)
>> +		fput(old_exe_file);
>>   }
>
> Yes, I think this is correct, __fput() does call_rcu(file_free_rcu). And
> much better than the new lock ;)
>
> Acked-by: Oleg Nesterov <oleg@redhat.com>
>
>
>
> So I think the patch is fine, but personally I dislike the "prctl holds
> mmap_sem exclusively" and rcu_dereference_protected().
>
> I mean, I think we can do another cleanup on top of this change.
>
> 	1. set_mm_exe_file() should be called by exit/exec only, so
> 	   it should use
>
> 		rcu_dereference_protected(mm->exe_file,
> 					atomic_read(&mm->mm_users) <= 1);
>
> 	2. prctl() should not use it, it can do
>
> 	   get_file(new_exe);
> 	   old_exe = xchg(&mm->exe_file);
> 	   if (old_exe)
> 	   	fput(old_exe);

I think smp_mb() is required before xchg() or
probably this stuff should be hidden inside yet another magic RCU macro
( with two screens of comments =)

>
> 	3. and we can remove down_write(mmap_sem) from prctl paths.
>
> 	   Actually we can do this even without xchg() above, but we might
> 	   want to kill MMF_EXE_FILE_CHANGED and test_and_set_bit() check.
>
> What do you think?
>
> Oleg.
>


-- 
Konstantin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: rcu-protected get_mm_exe_file()
  2015-03-16 14:50   ` Davidlohr Bueso
@ 2015-03-16 16:18     ` Konstantin Khlebnikov
  2015-03-16 17:04       ` Davidlohr Bueso
  0 siblings, 1 reply; 7+ messages in thread
From: Konstantin Khlebnikov @ 2015-03-16 16:18 UTC (permalink / raw)
  To: Davidlohr Bueso, Oleg Nesterov
  Cc: linux-mm, Andrew Morton, linux-kernel, Al Viro

On 16.03.2015 17:50, Davidlohr Bueso wrote:
> On Mon, 2015-03-16 at 15:07 +0100, Oleg Nesterov wrote:
>> 	3. and we can remove down_write(mmap_sem) from prctl paths.
>>
>> 	   Actually we can do this even without xchg() above, but we might
>> 	   want to kill MMF_EXE_FILE_CHANGED and test_and_set_bit() check.
>
> Yeah I was waiting for security folks input about this, otherwise this
> still doesn't do it for me as we still have to deal with mmap_sem.
>

Why? mm->flags are updated atomically. mmap_sem isn't required here.

-- 
Konstantin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: rcu-protected get_mm_exe_file()
  2015-03-16 16:15   ` Konstantin Khlebnikov
@ 2015-03-16 16:35     ` Oleg Nesterov
  0 siblings, 0 replies; 7+ messages in thread
From: Oleg Nesterov @ 2015-03-16 16:35 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: linux-mm, Andrew Morton, linux-kernel, Davidlohr Bueso, Al Viro

On 03/16, Konstantin Khlebnikov wrote:
>
> On 16.03.2015 17:07, Oleg Nesterov wrote:
>> I mean, I think we can do another cleanup on top of this change.
>>
>> 	1. set_mm_exe_file() should be called by exit/exec only, so
>> 	   it should use
>>
>> 		rcu_dereference_protected(mm->exe_file,
>> 					atomic_read(&mm->mm_users) <= 1);
>>
>> 	2. prctl() should not use it, it can do
>>
>> 	   get_file(new_exe);
>> 	   old_exe = xchg(&mm->exe_file);
>> 	   if (old_exe)
>> 	   	fput(old_exe);
>
> I think smp_mb() is required before xchg() or
> probably this stuff should be hidden inside yet another magic RCU macro
> ( with two screens of comments =)

Not really, xchg() implies mb's on both sides. As any other atomic operation
which returns the result.

(And in fact we do not even need rcu_assign_pointer() in set_mm_exe_file(),
 get_mm_exe_file() could do READ_ONCE() + inc_not_zero(). But this is off-
 topic, and of course rcu_* helpers look better)

Oleg.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: rcu-protected get_mm_exe_file()
  2015-03-16 16:18     ` Konstantin Khlebnikov
@ 2015-03-16 17:04       ` Davidlohr Bueso
  0 siblings, 0 replies; 7+ messages in thread
From: Davidlohr Bueso @ 2015-03-16 17:04 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Oleg Nesterov, linux-mm, Andrew Morton, linux-kernel, Al Viro

On Mon, 2015-03-16 at 19:18 +0300, Konstantin Khlebnikov wrote:
> On 16.03.2015 17:50, Davidlohr Bueso wrote:
> > On Mon, 2015-03-16 at 15:07 +0100, Oleg Nesterov wrote:
> >> 	3. and we can remove down_write(mmap_sem) from prctl paths.
> >>
> >> 	   Actually we can do this even without xchg() above, but we might
> >> 	   want to kill MMF_EXE_FILE_CHANGED and test_and_set_bit() check.
> >
> > Yeah I was waiting for security folks input about this, otherwise this
> > still doesn't do it for me as we still have to deal with mmap_sem.
> >
> 
> Why? mm->flags are updated atomically. mmap_sem isn't required here.

Right, nm I was thinking of the set but Oleg's xchg() idea is obviously
correct and a much better approach to mine. The only thing I'd suggest
is explicitly commenting the lockless get_mm_exe_file callers, if
useful, you can take it from my patch 1.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-03-16 17:04 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-16 13:12 [PATCH] mm: rcu-protected get_mm_exe_file() Konstantin Khlebnikov
2015-03-16 14:07 ` Oleg Nesterov
2015-03-16 14:50   ` Davidlohr Bueso
2015-03-16 16:18     ` Konstantin Khlebnikov
2015-03-16 17:04       ` Davidlohr Bueso
2015-03-16 16:15   ` Konstantin Khlebnikov
2015-03-16 16:35     ` Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).