Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
* severe proc dentry lock contention
@ 2020-06-18 22:17 Junxiao Bi
  2020-06-18 23:39 ` Matthew Wilcox
  0 siblings, 1 reply; 20+ messages in thread
From: Junxiao Bi @ 2020-06-18 22:17 UTC (permalink / raw)
  To: linux-kernel, linux-fsdevel
  Cc: adobriyan, Matthew Wilcox, Srinivas Eeda, joe.jin

Hi,

When debugging some performance issue, i found that thousands of threads 
exit around same time could cause a severe spin lock contention on proc 
dentry "/proc/$parent_process_pid/task/", that's because threads needs 
to clean up their pid file from that dir when exit. Check the following 
standalone test case that simulated the case and perf top result on v5.7 
kernel. Any idea on how to fix this?


    PerfTop:   48891 irqs/sec  kernel:95.6%  exact: 100.0% lost: 0/0 
drop: 0/0 [4000Hz cycles],  (all, 72 CPUs)
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 


     66.10%  [kernel]                               [k] 
native_queued_spin_lock_slowpath
      1.13%  [kernel]                               [k] _raw_spin_lock
      0.84%  [kernel]                               [k] clear_page_erms
      0.82%  [kernel]                               [k] 
queued_write_lock_slowpath
      0.64%  [kernel]                               [k] proc_task_readdir
      0.61%  [kernel]                               [k] 
find_idlest_group.isra.95
      0.61%  [kernel]                               [k] 
syscall_return_via_sysret
      0.55%  [kernel]                               [k] entry_SYSCALL_64
      0.49%  [kernel]                               [k] memcpy_erms
      0.46%  [kernel]                               [k] update_cfs_group
      0.41%  [kernel]                               [k] get_pid_task
      0.39%  [kernel]                               [k] 
_raw_spin_lock_irqsave
      0.37%  [kernel]                               [k] 
__list_del_entry_valid
      0.34%  [kernel]                               [k] 
get_page_from_freelist
      0.34%  [kernel]                               [k] __d_lookup
      0.32%  [kernel]                               [k] update_load_avg
      0.31%  libc-2.17.so                           [.] get_next_seq
      0.27%  [kernel]                               [k] avc_has_perm_noaudit
      0.26%  [kernel]                               [k] __sched_text_start
      0.25%  [kernel]                               [k] 
selinux_inode_permission
      0.25%  [kernel]                               [k] __slab_free
      0.24%  [kernel]                               [k] detach_entity_cfs_rq
      0.23%  [kernel]                               [k] zap_pte_range
      0.22%  [kernel]                               [k] 
_find_next_bit.constprop.1
      0.22%  libc-2.17.so                           [.] vfprintf
      0.20%  libc-2.17.so                           [.] _int_malloc
      0.19%  [kernel]                               [k] _raw_spin_lock_irq
      0.18%  [kernel]                               [k] rb_erase
      0.18%  [kernel]                               [k] pid_revalidate
      0.18%  [kernel]                               [k] lockref_get_not_dead
      0.18%  [kernel]                               [k] 
__alloc_pages_nodemask
      0.17%  [kernel]                               [k] set_task_cpu
      0.17%  libc-2.17.so                           [.] __strcoll_l
      0.17%  [kernel]                               [k] do_syscall_64
      0.17%  [kernel]                               [k] __vmalloc_node_range
      0.17%  libc-2.17.so                           [.] _IO_vfscanf
      0.17%  [kernel]                               [k] refcount_dec_not_one
      0.15%  [kernel]                               [k] __task_pid_nr_ns
      0.15%  [kernel]                               [k] 
native_irq_return_iret
      0.15%  [kernel]                               [k] free_pcppages_bulk
      0.14%  [kernel]                               [k] kmem_cache_alloc
      0.14%  [kernel]                               [k] link_path_walk
      0.14%  libc-2.17.so                           [.] _int_free
      0.14%  [kernel]                               [k] 
__update_load_avg_cfs_rq
      0.14%  perf.5.7.0-master.20200601.ol7.x86_64  [.] 0x00000000000eac29
      0.13%  [kernel]                               [k] kmem_cache_free
      0.13%  [kernel]                               [k] number
      0.13%  [kernel]                               [k] memset_erms
      0.12%  [kernel]                               [k] proc_pid_status
      0.12%  [kernel]                               [k] __d_lookup_rcu


=========== runme.sh ==========

#!/bin/bash

threads=${1:-10000}
prog=proc_race
while [ 1 ]; do ./$prog $threads; done &

while [ 1 ]; do
     pid=`ps aux | grep $prog | grep -v grep| awk '{print $2}'`
     if [ -z $pid ]; then continue; fi
     threadnum=`ls -l /proc/$pid/task | wc -l`
     if [ $threadnum -gt $threads ]; then
         echo kill $pid
         kill -9 $pid
     fi
done


===========proc_race.c=========


#include <pthread.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <ctype.h>

#define handle_error_en(en, msg) \
     do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)

#define handle_error(msg) \
     do { perror(msg); exit(EXIT_FAILURE); } while (0)

struct thread_info {
     pthread_t thread_id;
     int       thread_num;
};

static void *child_thread()
{
     int i;

     while (1) { if (!(i++ % 1000000)) sleep(1);}
     return NULL;
}

int main(int argc, char *argv[])
{
     int s, tnum, opt, num_threads;
     struct thread_info *tinfo;
     void *res;

     if (argc == 2)
         num_threads = atoi(argv[1]);
     else
         num_threads = 10000;

     tinfo = calloc(num_threads, sizeof(struct thread_info));
     if (tinfo == NULL)
         handle_error("calloc");


     for (tnum = 0; tnum < num_threads; tnum++) {
         tinfo[tnum].thread_num = tnum + 1;

         s = pthread_create(&tinfo[tnum].thread_id, NULL,
                 &child_thread, NULL);
         if (s != 0)
             handle_error_en(s, "pthread_create");
     }

     for (tnum = 0; tnum < num_threads; tnum++) {
         s = pthread_join(tinfo[tnum].thread_id, &res);
         if (s != 0)
             handle_error_en(s, "pthread_join");

         free(res);
     }

     free(tinfo);
     exit(EXIT_SUCCESS);
}

==========

Thanks,

Junxiao.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: severe proc dentry lock contention
  2020-06-18 22:17 severe proc dentry lock contention Junxiao Bi
@ 2020-06-18 23:39 ` Matthew Wilcox
  2020-06-19  0:02   ` Eric W. Biederman
  0 siblings, 1 reply; 20+ messages in thread
From: Matthew Wilcox @ 2020-06-18 23:39 UTC (permalink / raw)
  To: Junxiao Bi
  Cc: linux-kernel, linux-fsdevel, Matthew Wilcox, Srinivas Eeda,
	joe.jin, Eric W. Biederman

On Thu, Jun 18, 2020 at 03:17:33PM -0700, Junxiao Bi wrote:
> When debugging some performance issue, i found that thousands of threads
> exit around same time could cause a severe spin lock contention on proc
> dentry "/proc/$parent_process_pid/task/", that's because threads needs to
> clean up their pid file from that dir when exit. Check the following
> standalone test case that simulated the case and perf top result on v5.7
> kernel. Any idea on how to fix this?

Thanks, Junxiao.

We've looked at a few different ways of fixing this problem.

Even though the contention is within the dcache, it seems like a usecase
that the dcache shouldn't be optimised for -- generally we do not have
hundreds of CPUs removing dentries from a single directory in parallel.

We could fix this within procfs.  We don't have a great patch yet, but
the current approach we're looking at allows only one thread at a time
to call dput() on any /proc/*/task directory.

We could also look at fixing this within the scheduler.  Only allowing
one CPU to run the threads of an exiting process would fix this particular
problem, but might have other consequences.

I was hoping that 7bc3e6e55acf would fix this, but that patch is in 5.7,
so that hope is ruled out.

> 
>    PerfTop:   48891 irqs/sec  kernel:95.6%  exact: 100.0% lost: 0/0 drop:
> 0/0 [4000Hz cycles],  (all, 72 CPUs)
> ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> 
> 
>     66.10%  [kernel]                               [k]
> native_queued_spin_lock_slowpath
>      1.13%  [kernel]                               [k] _raw_spin_lock
>      0.84%  [kernel]                               [k] clear_page_erms
>      0.82%  [kernel]                               [k]
> queued_write_lock_slowpath
>      0.64%  [kernel]                               [k] proc_task_readdir
>      0.61%  [kernel]                               [k]
> find_idlest_group.isra.95
>      0.61%  [kernel]                               [k]
> syscall_return_via_sysret
>      0.55%  [kernel]                               [k] entry_SYSCALL_64
>      0.49%  [kernel]                               [k] memcpy_erms
>      0.46%  [kernel]                               [k] update_cfs_group
>      0.41%  [kernel]                               [k] get_pid_task
>      0.39%  [kernel]                               [k]
> _raw_spin_lock_irqsave
>      0.37%  [kernel]                               [k]
> __list_del_entry_valid
>      0.34%  [kernel]                               [k]
> get_page_from_freelist
>      0.34%  [kernel]                               [k] __d_lookup
>      0.32%  [kernel]                               [k] update_load_avg
>      0.31%  libc-2.17.so                           [.] get_next_seq
>      0.27%  [kernel]                               [k] avc_has_perm_noaudit
>      0.26%  [kernel]                               [k] __sched_text_start
>      0.25%  [kernel]                               [k]
> selinux_inode_permission
>      0.25%  [kernel]                               [k] __slab_free
>      0.24%  [kernel]                               [k] detach_entity_cfs_rq
>      0.23%  [kernel]                               [k] zap_pte_range
>      0.22%  [kernel]                               [k]
> _find_next_bit.constprop.1
>      0.22%  libc-2.17.so                           [.] vfprintf
>      0.20%  libc-2.17.so                           [.] _int_malloc
>      0.19%  [kernel]                               [k] _raw_spin_lock_irq
>      0.18%  [kernel]                               [k] rb_erase
>      0.18%  [kernel]                               [k] pid_revalidate
>      0.18%  [kernel]                               [k] lockref_get_not_dead
>      0.18%  [kernel]                               [k]
> __alloc_pages_nodemask
>      0.17%  [kernel]                               [k] set_task_cpu
>      0.17%  libc-2.17.so                           [.] __strcoll_l
>      0.17%  [kernel]                               [k] do_syscall_64
>      0.17%  [kernel]                               [k] __vmalloc_node_range
>      0.17%  libc-2.17.so                           [.] _IO_vfscanf
>      0.17%  [kernel]                               [k] refcount_dec_not_one
>      0.15%  [kernel]                               [k] __task_pid_nr_ns
>      0.15%  [kernel]                               [k]
> native_irq_return_iret
>      0.15%  [kernel]                               [k] free_pcppages_bulk
>      0.14%  [kernel]                               [k] kmem_cache_alloc
>      0.14%  [kernel]                               [k] link_path_walk
>      0.14%  libc-2.17.so                           [.] _int_free
>      0.14%  [kernel]                               [k]
> __update_load_avg_cfs_rq
>      0.14%  perf.5.7.0-master.20200601.ol7.x86_64  [.] 0x00000000000eac29
>      0.13%  [kernel]                               [k] kmem_cache_free
>      0.13%  [kernel]                               [k] number
>      0.13%  [kernel]                               [k] memset_erms
>      0.12%  [kernel]                               [k] proc_pid_status
>      0.12%  [kernel]                               [k] __d_lookup_rcu
> 
> 
> =========== runme.sh ==========
> 
> #!/bin/bash
> 
> threads=${1:-10000}
> prog=proc_race
> while [ 1 ]; do ./$prog $threads; done &
> 
> while [ 1 ]; do
>     pid=`ps aux | grep $prog | grep -v grep| awk '{print $2}'`
>     if [ -z $pid ]; then continue; fi
>     threadnum=`ls -l /proc/$pid/task | wc -l`
>     if [ $threadnum -gt $threads ]; then
>         echo kill $pid
>         kill -9 $pid
>     fi
> done
> 
> 
> ===========proc_race.c=========
> 
> 
> #include <pthread.h>
> #include <string.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <errno.h>
> #include <ctype.h>
> 
> #define handle_error_en(en, msg) \
>     do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)
> 
> #define handle_error(msg) \
>     do { perror(msg); exit(EXIT_FAILURE); } while (0)
> 
> struct thread_info {
>     pthread_t thread_id;
>     int       thread_num;
> };
> 
> static void *child_thread()
> {
>     int i;
> 
>     while (1) { if (!(i++ % 1000000)) sleep(1);}
>     return NULL;
> }
> 
> int main(int argc, char *argv[])
> {
>     int s, tnum, opt, num_threads;
>     struct thread_info *tinfo;
>     void *res;
> 
>     if (argc == 2)
>         num_threads = atoi(argv[1]);
>     else
>         num_threads = 10000;
> 
>     tinfo = calloc(num_threads, sizeof(struct thread_info));
>     if (tinfo == NULL)
>         handle_error("calloc");
> 
> 
>     for (tnum = 0; tnum < num_threads; tnum++) {
>         tinfo[tnum].thread_num = tnum + 1;
> 
>         s = pthread_create(&tinfo[tnum].thread_id, NULL,
>                 &child_thread, NULL);
>         if (s != 0)
>             handle_error_en(s, "pthread_create");
>     }
> 
>     for (tnum = 0; tnum < num_threads; tnum++) {
>         s = pthread_join(tinfo[tnum].thread_id, &res);
>         if (s != 0)
>             handle_error_en(s, "pthread_join");
> 
>         free(res);
>     }
> 
>     free(tinfo);
>     exit(EXIT_SUCCESS);
> }
> 
> ==========
> 
> Thanks,
> 
> Junxiao.
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: severe proc dentry lock contention
  2020-06-18 23:39 ` Matthew Wilcox
@ 2020-06-19  0:02   ` Eric W. Biederman
  2020-06-19  0:27     ` Junxiao Bi
  0 siblings, 1 reply; 20+ messages in thread
From: Eric W. Biederman @ 2020-06-19  0:02 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Junxiao Bi, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin

Matthew Wilcox <willy@infradead.org> writes:

> On Thu, Jun 18, 2020 at 03:17:33PM -0700, Junxiao Bi wrote:
>> When debugging some performance issue, i found that thousands of threads
>> exit around same time could cause a severe spin lock contention on proc
>> dentry "/proc/$parent_process_pid/task/", that's because threads needs to
>> clean up their pid file from that dir when exit. Check the following
>> standalone test case that simulated the case and perf top result on v5.7
>> kernel. Any idea on how to fix this?

>
> Thanks, Junxiao.
>
> We've looked at a few different ways of fixing this problem.
>
> Even though the contention is within the dcache, it seems like a usecase
> that the dcache shouldn't be optimised for -- generally we do not have
> hundreds of CPUs removing dentries from a single directory in parallel.
>
> We could fix this within procfs.  We don't have a great patch yet, but
> the current approach we're looking at allows only one thread at a time
> to call dput() on any /proc/*/task directory.
>
> We could also look at fixing this within the scheduler.  Only allowing
> one CPU to run the threads of an exiting process would fix this particular
> problem, but might have other consequences.
>
> I was hoping that 7bc3e6e55acf would fix this, but that patch is in 5.7,
> so that hope is ruled out.

Does anyone know if problem new in v5.7?  I am wondering if I introduced
this problem when I refactored the code or if I simply churned the code
but the issue remains effectively the same.

Can you try only flushing entries when the last thread of the process is
reaped?  I think in practice we would want to be a little more
sophisticated but it is a good test case to see if it solves the issue.

diff --git a/kernel/exit.c b/kernel/exit.c
index cebae77a9664..d56e4eb60bdd 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -152,7 +152,7 @@ void put_task_struct_rcu_user(struct task_struct *task)
 void release_task(struct task_struct *p)
 {
 	struct task_struct *leader;
-	struct pid *thread_pid;
+	struct pid *thread_pid = NULL;
 	int zap_leader;
 repeat:
 	/* don't need to get the RCU readlock here - the process is dead and
@@ -165,7 +165,8 @@ void release_task(struct task_struct *p)
 
 	write_lock_irq(&tasklist_lock);
 	ptrace_release_task(p);
-	thread_pid = get_pid(p->thread_pid);
+	if (p == p->group_leader)
+		thread_pid = get_pid(p->thread_pid);
 	__exit_signal(p);
 
 	/*
@@ -188,8 +189,10 @@ void release_task(struct task_struct *p)
 	}
 
 	write_unlock_irq(&tasklist_lock);
-	proc_flush_pid(thread_pid);
-	put_pid(thread_pid);
+	if (thread_pid) {
+		proc_flush_pid(thread_pid);
+		put_pid(thread_pid);
+	}
 	release_thread(p);
 	put_task_struct_rcu_user(p);
 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: severe proc dentry lock contention
  2020-06-19  0:02   ` Eric W. Biederman
@ 2020-06-19  0:27     ` Junxiao Bi
  2020-06-19  3:30       ` Eric W. Biederman
  2020-06-19 14:09       ` [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries Eric W. Biederman
  0 siblings, 2 replies; 20+ messages in thread
From: Junxiao Bi @ 2020-06-19  0:27 UTC (permalink / raw)
  To: Eric W. Biederman, Matthew Wilcox
  Cc: linux-kernel, linux-fsdevel, Matthew Wilcox, Srinivas Eeda, joe.jin

On 6/18/20 5:02 PM, ebiederm@xmission.com wrote:

> Matthew Wilcox <willy@infradead.org> writes:
>
>> On Thu, Jun 18, 2020 at 03:17:33PM -0700, Junxiao Bi wrote:
>>> When debugging some performance issue, i found that thousands of threads
>>> exit around same time could cause a severe spin lock contention on proc
>>> dentry "/proc/$parent_process_pid/task/", that's because threads needs to
>>> clean up their pid file from that dir when exit. Check the following
>>> standalone test case that simulated the case and perf top result on v5.7
>>> kernel. Any idea on how to fix this?
>> Thanks, Junxiao.
>>
>> We've looked at a few different ways of fixing this problem.
>>
>> Even though the contention is within the dcache, it seems like a usecase
>> that the dcache shouldn't be optimised for -- generally we do not have
>> hundreds of CPUs removing dentries from a single directory in parallel.
>>
>> We could fix this within procfs.  We don't have a great patch yet, but
>> the current approach we're looking at allows only one thread at a time
>> to call dput() on any /proc/*/task directory.
>>
>> We could also look at fixing this within the scheduler.  Only allowing
>> one CPU to run the threads of an exiting process would fix this particular
>> problem, but might have other consequences.
>>
>> I was hoping that 7bc3e6e55acf would fix this, but that patch is in 5.7,
>> so that hope is ruled out.
> Does anyone know if problem new in v5.7?  I am wondering if I introduced
> this problem when I refactored the code or if I simply churned the code
> but the issue remains effectively the same.
It's not new issue, we see it in old kernel like v4.14
>
> Can you try only flushing entries when the last thread of the process is
> reaped?  I think in practice we would want to be a little more
> sophisticated but it is a good test case to see if it solves the issue.

Thank you. i will try and let you know.

Thanks,

Junxiao.

>
> diff --git a/kernel/exit.c b/kernel/exit.c
> index cebae77a9664..d56e4eb60bdd 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -152,7 +152,7 @@ void put_task_struct_rcu_user(struct task_struct *task)
>   void release_task(struct task_struct *p)
>   {
>   	struct task_struct *leader;
> -	struct pid *thread_pid;
> +	struct pid *thread_pid = NULL;
>   	int zap_leader;
>   repeat:
>   	/* don't need to get the RCU readlock here - the process is dead and
> @@ -165,7 +165,8 @@ void release_task(struct task_struct *p)
>   
>   	write_lock_irq(&tasklist_lock);
>   	ptrace_release_task(p);
> -	thread_pid = get_pid(p->thread_pid);
> +	if (p == p->group_leader)
> +		thread_pid = get_pid(p->thread_pid);
>   	__exit_signal(p);
>   
>   	/*
> @@ -188,8 +189,10 @@ void release_task(struct task_struct *p)
>   	}
>   
>   	write_unlock_irq(&tasklist_lock);
> -	proc_flush_pid(thread_pid);
> -	put_pid(thread_pid);
> +	if (thread_pid) {
> +		proc_flush_pid(thread_pid);
> +		put_pid(thread_pid);
> +	}
>   	release_thread(p);
>   	put_task_struct_rcu_user(p);
>   

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: severe proc dentry lock contention
  2020-06-19  0:27     ` Junxiao Bi
@ 2020-06-19  3:30       ` Eric W. Biederman
  2020-06-19 14:09       ` [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries Eric W. Biederman
  1 sibling, 0 replies; 20+ messages in thread
From: Eric W. Biederman @ 2020-06-19  3:30 UTC (permalink / raw)
  To: Junxiao Bi
  Cc: Matthew Wilcox, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin

Junxiao Bi <junxiao.bi@oracle.com> writes:

> On 6/18/20 5:02 PM, ebiederm@xmission.com wrote:
>
>> Matthew Wilcox <willy@infradead.org> writes:
>>
>>> On Thu, Jun 18, 2020 at 03:17:33PM -0700, Junxiao Bi wrote:
>>>> When debugging some performance issue, i found that thousands of threads
>>>> exit around same time could cause a severe spin lock contention on proc
>>>> dentry "/proc/$parent_process_pid/task/", that's because threads needs to
>>>> clean up their pid file from that dir when exit. Check the following
>>>> standalone test case that simulated the case and perf top result on v5.7
>>>> kernel. Any idea on how to fix this?
>>> Thanks, Junxiao.
>>>
>>> We've looked at a few different ways of fixing this problem.
>>>
>>> Even though the contention is within the dcache, it seems like a usecase
>>> that the dcache shouldn't be optimised for -- generally we do not have
>>> hundreds of CPUs removing dentries from a single directory in parallel.
>>>
>>> We could fix this within procfs.  We don't have a great patch yet, but
>>> the current approach we're looking at allows only one thread at a time
>>> to call dput() on any /proc/*/task directory.
>>>
>>> We could also look at fixing this within the scheduler.  Only allowing
>>> one CPU to run the threads of an exiting process would fix this particular
>>> problem, but might have other consequences.
>>>
>>> I was hoping that 7bc3e6e55acf would fix this, but that patch is in 5.7,
>>> so that hope is ruled out.
>> Does anyone know if problem new in v5.7?  I am wondering if I introduced
>> this problem when I refactored the code or if I simply churned the code
>> but the issue remains effectively the same.
> It's not new issue, we see it in old kernel like v4.14
>>
>> Can you try only flushing entries when the last thread of the process is
>> reaped?  I think in practice we would want to be a little more
>> sophisticated but it is a good test case to see if it solves the issue.
>
> Thank you. i will try and let you know.

Assuming this works we can remove the hacking part of always doing
this by only doing this if SIGNAL_GROUP_EXIT when we know this
thundering herd will appear.

We still need to do something with the list however.

If your testing works out performance wise I will figure out what
we need to make the hack generale and safe.

Eric



> Thanks,
>
> Junxiao.
>
>>
>> diff --git a/kernel/exit.c b/kernel/exit.c
>> index cebae77a9664..d56e4eb60bdd 100644
>> --- a/kernel/exit.c
>> +++ b/kernel/exit.c
>> @@ -152,7 +152,7 @@ void put_task_struct_rcu_user(struct task_struct *task)
>>   void release_task(struct task_struct *p)
>>   {
>>   	struct task_struct *leader;
>> -	struct pid *thread_pid;
>> +	struct pid *thread_pid = NULL;
>>   	int zap_leader;
>>   repeat:
>>   	/* don't need to get the RCU readlock here - the process is dead and
>> @@ -165,7 +165,8 @@ void release_task(struct task_struct *p)
>>     	write_lock_irq(&tasklist_lock);
>>   	ptrace_release_task(p);
>> -	thread_pid = get_pid(p->thread_pid);
>> +	if (p == p->group_leader)
>> +		thread_pid = get_pid(p->thread_pid);
>>   	__exit_signal(p);
>>     	/*
>> @@ -188,8 +189,10 @@ void release_task(struct task_struct *p)
>>   	}
>>     	write_unlock_irq(&tasklist_lock);
>> -	proc_flush_pid(thread_pid);
>> -	put_pid(thread_pid);
>> +	if (thread_pid) {
>> +		proc_flush_pid(thread_pid);
>> +		put_pid(thread_pid);
>> +	}
>>   	release_thread(p);
>>   	put_task_struct_rcu_user(p);
>>   

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-19  0:27     ` Junxiao Bi
  2020-06-19  3:30       ` Eric W. Biederman
@ 2020-06-19 14:09       ` Eric W. Biederman
  2020-06-19 15:56         ` Junxiao Bi
  2020-06-22  5:33         ` Masahiro Yamada
  1 sibling, 2 replies; 20+ messages in thread
From: Eric W. Biederman @ 2020-06-19 14:09 UTC (permalink / raw)
  To: Junxiao Bi
  Cc: Matthew Wilcox, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin


Junxiao Bi <junxiao.bi@oracle.com> reported:
> When debugging some performance issue, i found that thousands of threads exit
> around same time could cause a severe spin lock contention on proc dentry
> "/proc/$parent_process_pid/task/", that's because threads needs to clean up
> their pid file from that dir when exit.

Matthew Wilcox <willy@infradead.org> reported:
> We've looked at a few different ways of fixing this problem.

The flushing of the proc dentries from the dcache is an optmization,
and is not necessary for correctness.  Eventually cache pressure will
cause the dentries to be freed even if no flushing happens.  Some
light testing when I refactored the proc flushg[1] indicated that at
least the memory footprint is easily measurable.

An optimization that causes a performance problem due to a thundering
herd of threads is no real optimization.

Modify the code to only flush the /proc/<tgid>/ directory when all
threads in a process are killed at once.  This continues to flush
practically everything when the process is reaped as the threads live
under /proc/<tgid>/task/<tid>.

There is a rare possibility that a debugger will access /proc/<tid>/,
which this change will no longer flush, but I believe such accesses
are sufficiently rare to not be observed in practice.

[1] 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc")
Link: https://lkml.kernel.org/r/54091fc0-ca46-2186-97a8-d1f3c4f3877b@oracle.com
Reported-by: Masahiro Yamada <masahiroy@kernel.org>
Reported-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---

I am still waiting for word on how this affects performance, but this is
a clean version that should avoid the thundering herd problem in
general.


 kernel/exit.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index cebae77a9664..567354550d62 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -151,8 +151,8 @@ void put_task_struct_rcu_user(struct task_struct *task)
 
 void release_task(struct task_struct *p)
 {
+	struct pid *flush_pid = NULL;
 	struct task_struct *leader;
-	struct pid *thread_pid;
 	int zap_leader;
 repeat:
 	/* don't need to get the RCU readlock here - the process is dead and
@@ -165,7 +165,16 @@ void release_task(struct task_struct *p)
 
 	write_lock_irq(&tasklist_lock);
 	ptrace_release_task(p);
-	thread_pid = get_pid(p->thread_pid);
+
+	/*
+	 * When all of the threads are exiting wait until the end
+	 * and flush everything.
+	 */
+	if (thread_group_leader(p))
+		flush_pid = get_pid(task_tgid(p));
+	else if (!(p->signal->flags & SIGNAL_GROUP_EXIT))
+		flush_pid = get_pid(task_pid(p));
+
 	__exit_signal(p);
 
 	/*
@@ -188,8 +197,10 @@ void release_task(struct task_struct *p)
 	}
 
 	write_unlock_irq(&tasklist_lock);
-	proc_flush_pid(thread_pid);
-	put_pid(thread_pid);
+	if (flush_pid) {
+		proc_flush_pid(flush_pid);
+		put_pid(flush_pid);
+	}
 	release_thread(p);
 	put_task_struct_rcu_user(p);
 
-- 
2.20.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-19 14:09       ` [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries Eric W. Biederman
@ 2020-06-19 15:56         ` Junxiao Bi
  2020-06-19 17:24           ` Eric W. Biederman
  2020-06-22  5:33         ` Masahiro Yamada
  1 sibling, 1 reply; 20+ messages in thread
From: Junxiao Bi @ 2020-06-19 15:56 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Matthew Wilcox, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

Hi Eric,

The patch didn't improve lock contention.

    PerfTop:   48925 irqs/sec  kernel:95.6%  exact: 100.0% lost: 0/0 
drop: 0/0 [4000Hz cycles],  (all, 104 CPUs)
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 


     69.66%  [kernel]                                        [k] 
native_queued_spin_lock_slowpath
      1.93%  [kernel]                                        [k] 
_raw_spin_lock
      1.24%  [kernel]                                        [k] 
page_counter_cancel
      0.70%  [kernel]                                        [k] 
do_syscall_64
      0.62%  [kernel]                                        [k] 
find_idlest_group.isra.96
      0.57%  [kernel]                                        [k] 
queued_write_lock_slowpath
      0.56%  [kernel]                                        [k] d_walk
      0.45%  [kernel]                                        [k] 
clear_page_erms
      0.44%  [kernel]                                        [k] 
syscall_return_via_sysret
      0.40%  [kernel]                                        [k] 
entry_SYSCALL_64
      0.38%  [kernel]                                        [k] 
refcount_dec_not_one
      0.37%  [kernel]                                        [k] 
propagate_protected_usage
      0.33%  [kernel]                                        [k] 
unmap_page_range
      0.33%  [kernel]                                        [k] 
select_collect
      0.32%  [kernel]                                        [k] memcpy_erms
      0.30%  [kernel]                                        [k] 
proc_task_readdir
      0.27%  [kernel]                                        [k] 
_raw_spin_lock_irqsave

Thanks,

Junxiao.

On 6/19/20 7:09 AM, ebiederm@xmission.com wrote:
> Junxiao Bi <junxiao.bi@oracle.com> reported:
>> When debugging some performance issue, i found that thousands of threads exit
>> around same time could cause a severe spin lock contention on proc dentry
>> "/proc/$parent_process_pid/task/", that's because threads needs to clean up
>> their pid file from that dir when exit.
> Matthew Wilcox <willy@infradead.org> reported:
>> We've looked at a few different ways of fixing this problem.
> The flushing of the proc dentries from the dcache is an optmization,
> and is not necessary for correctness.  Eventually cache pressure will
> cause the dentries to be freed even if no flushing happens.  Some
> light testing when I refactored the proc flushg[1] indicated that at
> least the memory footprint is easily measurable.
>
> An optimization that causes a performance problem due to a thundering
> herd of threads is no real optimization.
>
> Modify the code to only flush the /proc/<tgid>/ directory when all
> threads in a process are killed at once.  This continues to flush
> practically everything when the process is reaped as the threads live
> under /proc/<tgid>/task/<tid>.
>
> There is a rare possibility that a debugger will access /proc/<tid>/,
> which this change will no longer flush, but I believe such accesses
> are sufficiently rare to not be observed in practice.
>
> [1] 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc")
> Link: https://lkml.kernel.org/r/54091fc0-ca46-2186-97a8-d1f3c4f3877b@oracle.com
> Reported-by: Masahiro Yamada <masahiroy@kernel.org>
> Reported-by: Matthew Wilcox <willy@infradead.org>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---
>
> I am still waiting for word on how this affects performance, but this is
> a clean version that should avoid the thundering herd problem in
> general.
>
>
>   kernel/exit.c | 19 +++++++++++++++----
>   1 file changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/exit.c b/kernel/exit.c
> index cebae77a9664..567354550d62 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -151,8 +151,8 @@ void put_task_struct_rcu_user(struct task_struct *task)
>   
>   void release_task(struct task_struct *p)
>   {
> +	struct pid *flush_pid = NULL;
>   	struct task_struct *leader;
> -	struct pid *thread_pid;
>   	int zap_leader;
>   repeat:
>   	/* don't need to get the RCU readlock here - the process is dead and
> @@ -165,7 +165,16 @@ void release_task(struct task_struct *p)
>   
>   	write_lock_irq(&tasklist_lock);
>   	ptrace_release_task(p);
> -	thread_pid = get_pid(p->thread_pid);
> +
> +	/*
> +	 * When all of the threads are exiting wait until the end
> +	 * and flush everything.
> +	 */
> +	if (thread_group_leader(p))
> +		flush_pid = get_pid(task_tgid(p));
> +	else if (!(p->signal->flags & SIGNAL_GROUP_EXIT))
> +		flush_pid = get_pid(task_pid(p));
> +
>   	__exit_signal(p);
>   
>   	/*
> @@ -188,8 +197,10 @@ void release_task(struct task_struct *p)
>   	}
>   
>   	write_unlock_irq(&tasklist_lock);
> -	proc_flush_pid(thread_pid);
> -	put_pid(thread_pid);
> +	if (flush_pid) {
> +		proc_flush_pid(flush_pid);
> +		put_pid(flush_pid);
> +	}
>   	release_thread(p);
>   	put_task_struct_rcu_user(p);
>   

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-19 15:56         ` Junxiao Bi
@ 2020-06-19 17:24           ` Eric W. Biederman
  2020-06-19 21:56             ` Junxiao Bi
  0 siblings, 1 reply; 20+ messages in thread
From: Eric W. Biederman @ 2020-06-19 17:24 UTC (permalink / raw)
  To: Junxiao Bi
  Cc: Matthew Wilcox, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

Junxiao Bi <junxiao.bi@oracle.com> writes:

> Hi Eric,
>
> The patch didn't improve lock contention.

Which raises the question where is the lock contention coming from.

Especially with my first variant.  Only the last thread to be reaped
would free up anything in the cache.

Can you comment out the call to proc_flush_pid entirely?

That will rule out the proc_flush_pid in d_invalidate entirely.

The only candidate I can think of d_invalidate aka (proc_flush_pid) vs ps.

Eric

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-19 17:24           ` Eric W. Biederman
@ 2020-06-19 21:56             ` Junxiao Bi
  2020-06-19 22:42               ` Eric W. Biederman
  0 siblings, 1 reply; 20+ messages in thread
From: Junxiao Bi @ 2020-06-19 21:56 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Matthew Wilcox, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

On 6/19/20 10:24 AM, ebiederm@xmission.com wrote:

> Junxiao Bi <junxiao.bi@oracle.com> writes:
>
>> Hi Eric,
>>
>> The patch didn't improve lock contention.
> Which raises the question where is the lock contention coming from.
>
> Especially with my first variant.  Only the last thread to be reaped
> would free up anything in the cache.
>
> Can you comment out the call to proc_flush_pid entirely?

Still high lock contention. Collect the following hot path.

     74.90%     0.01%  proc_race 
[kernel.kallsyms]                               [k] 
entry_SYSCALL_64_after_hwframe
             |
              --74.89%--entry_SYSCALL_64_after_hwframe
                        |
                         --74.88%--do_syscall_64
                                   |
                                   |--69.70%--exit_to_usermode_loop
                                   |          |
                                   |           --69.70%--do_signal
                                   |                     |
                                   | --69.69%--get_signal
                                   | |
                                   | |--56.30%--do_group_exit
                                   | |          |
                                   | |           --56.30%--do_exit
                                   | |                     |
                                   | |                     
|--27.50%--_raw_write_lock_irq
                                   | |                     |          |
                                   | |                     | 
--27.47%--queued_write_lock_slowpath
                                   | |                     
|                     |
                                   | |                     | 
--27.18%--native_queued_spin_lock_slowpath
                                   | |                     |
                                   | |                     
|--26.10%--release_task.part.20
                                   | |                     |          |
                                   | |                     |           
--25.60%--_raw_write_lock_irq
                                   | |                     
|                     |
                                   | |                     | 
--25.56%--queued_write_lock_slowpath
                                   | |                     
|                                |
                                   | |                     | 
--25.23%--native_queued_spin_lock_slowpath
                                   | |                     |
                                   | |                      --0.56%--mmput
                                   | |                                |
                                   | |                                 
--0.55%--exit_mmap
                                   | |
|                                 --13.31%--_raw_spin_lock_irq
|                                           |
| --13.28%--native_queued_spin_lock_slowpath
                                   |

Thanks,

Junxiao.

>
> That will rule out the proc_flush_pid in d_invalidate entirely.
>
> The only candidate I can think of d_invalidate aka (proc_flush_pid) vs ps.
>
> Eric

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-19 21:56             ` Junxiao Bi
@ 2020-06-19 22:42               ` Eric W. Biederman
  2020-06-20 16:27                 ` Matthew Wilcox
  0 siblings, 1 reply; 20+ messages in thread
From: Eric W. Biederman @ 2020-06-19 22:42 UTC (permalink / raw)
  To: Junxiao Bi
  Cc: Matthew Wilcox, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

Junxiao Bi <junxiao.bi@oracle.com> writes:

> On 6/19/20 10:24 AM, ebiederm@xmission.com wrote:
>
>> Junxiao Bi <junxiao.bi@oracle.com> writes:
>>
>>> Hi Eric,
>>>
>>> The patch didn't improve lock contention.
>> Which raises the question where is the lock contention coming from.
>>
>> Especially with my first variant.  Only the last thread to be reaped
>> would free up anything in the cache.
>>
>> Can you comment out the call to proc_flush_pid entirely?
>
> Still high lock contention. Collect the following hot path.

A different location this time.

I know of at least exit_signal and exit_notify that take thread wide
locks, and it looks like exit_mm is another.  Those don't use the same
locks as flushing proc.


So I think you are simply seeing a result of the thundering herd of
threads shutting down at once.  Given that thread shutdown is fundamentally
a slow path there is only so much that can be done.

If you are up for a project to working through this thundering herd I
expect I can help some.  It will be a long process of cleaning up
the entire thread exit process with an eye to performance.

To make incremental progress we are going to need a way to understand
the impact of various changes.  Such as my change to reduce the dentry
lock contention when a process is removed from proc.  I have the feeling
that made a real world improvement on your test (as the lock no longer
shows up in your profile). Unfortunately whatever that improvement was
it was not relfected in now you read your numbers.

Eric

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-19 22:42               ` Eric W. Biederman
@ 2020-06-20 16:27                 ` Matthew Wilcox
  2020-06-22  5:15                   ` Junxiao Bi
  0 siblings, 1 reply; 20+ messages in thread
From: Matthew Wilcox @ 2020-06-20 16:27 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Junxiao Bi, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

On Fri, Jun 19, 2020 at 05:42:45PM -0500, Eric W. Biederman wrote:
> Junxiao Bi <junxiao.bi@oracle.com> writes:
> > Still high lock contention. Collect the following hot path.
> 
> A different location this time.
> 
> I know of at least exit_signal and exit_notify that take thread wide
> locks, and it looks like exit_mm is another.  Those don't use the same
> locks as flushing proc.
> 
> 
> So I think you are simply seeing a result of the thundering herd of
> threads shutting down at once.  Given that thread shutdown is fundamentally
> a slow path there is only so much that can be done.
> 
> If you are up for a project to working through this thundering herd I
> expect I can help some.  It will be a long process of cleaning up
> the entire thread exit process with an eye to performance.

Wengang had some tests which produced wall-clock values for this problem,
which I agree is more informative.

I'm not entirely sure what the customer workload is that requires a
highly threaded workload to also shut down quickly.  To my mind, an
overall workload is normally composed of highly-threaded tasks that run
for a long time and only shut down rarely (thus performance of shutdown
is not important) and single-threaded tasks that run for a short time.

Understanding this workload is important to my next suggestion, which
is that rather than searching for all the places in the exit path which
contend on a single spinlock, we simply set the allowed CPUs for an
exiting task to include only the CPU that this thread is running on.
It will probably run faster to take the threads down in series on one
CPU rather than take them down in parallel across many CPUs (or am I
mistaken?  Is there inherently a lot of parallelism in the thread
exiting process?)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-20 16:27                 ` Matthew Wilcox
@ 2020-06-22  5:15                   ` Junxiao Bi
  2020-06-22 15:20                     ` Eric W. Biederman
  2020-06-23  0:47                     ` Matthew Wilcox
  0 siblings, 2 replies; 20+ messages in thread
From: Junxiao Bi @ 2020-06-22  5:15 UTC (permalink / raw)
  To: Matthew Wilcox, Eric W. Biederman
  Cc: linux-kernel, linux-fsdevel, Matthew Wilcox, Srinivas Eeda,
	joe.jin, Wengang Wang

On 6/20/20 9:27 AM, Matthew Wilcox wrote:

> On Fri, Jun 19, 2020 at 05:42:45PM -0500, Eric W. Biederman wrote:
>> Junxiao Bi <junxiao.bi@oracle.com> writes:
>>> Still high lock contention. Collect the following hot path.
>> A different location this time.
>>
>> I know of at least exit_signal and exit_notify that take thread wide
>> locks, and it looks like exit_mm is another.  Those don't use the same
>> locks as flushing proc.
>>
>>
>> So I think you are simply seeing a result of the thundering herd of
>> threads shutting down at once.  Given that thread shutdown is fundamentally
>> a slow path there is only so much that can be done.
>>
>> If you are up for a project to working through this thundering herd I
>> expect I can help some.  It will be a long process of cleaning up
>> the entire thread exit process with an eye to performance.
> Wengang had some tests which produced wall-clock values for this problem,
> which I agree is more informative.
>
> I'm not entirely sure what the customer workload is that requires a
> highly threaded workload to also shut down quickly.  To my mind, an
> overall workload is normally composed of highly-threaded tasks that run
> for a long time and only shut down rarely (thus performance of shutdown
> is not important) and single-threaded tasks that run for a short time.

The real workload is a Java application working in server-agent mode, 
issue happened in agent side, all it do is waiting works dispatching 
from server and execute. To execute one work, agent will start lots of 
short live threads, there could be a lot of threads exit same time if 
there were a lots of work to execute, the contention on the exit path 
caused a high %sys time which impacted other workload.

Thanks,

Junxiao.

>
> Understanding this workload is important to my next suggestion, which
> is that rather than searching for all the places in the exit path which
> contend on a single spinlock, we simply set the allowed CPUs for an
> exiting task to include only the CPU that this thread is running on.
> It will probably run faster to take the threads down in series on one
> CPU rather than take them down in parallel across many CPUs (or am I
> mistaken?  Is there inherently a lot of parallelism in the thread
> exiting process?)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-19 14:09       ` [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries Eric W. Biederman
  2020-06-19 15:56         ` Junxiao Bi
@ 2020-06-22  5:33         ` Masahiro Yamada
  2020-06-22 15:13           ` Eric W. Biederman
  1 sibling, 1 reply; 20+ messages in thread
From: Masahiro Yamada @ 2020-06-22  5:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Junxiao Bi, Matthew Wilcox, Linux Kernel Mailing List,
	Linux FS-devel Mailing List, Matthew Wilcox, Srinivas Eeda,
	joe.jin

On Fri, Jun 19, 2020 at 11:14 PM Eric W. Biederman
<ebiederm@xmission.com> wrote:
>
>
> Junxiao Bi <junxiao.bi@oracle.com> reported:
> > When debugging some performance issue, i found that thousands of threads exit
> > around same time could cause a severe spin lock contention on proc dentry
> > "/proc/$parent_process_pid/task/", that's because threads needs to clean up
> > their pid file from that dir when exit.
>
> Matthew Wilcox <willy@infradead.org> reported:
> > We've looked at a few different ways of fixing this problem.
>
> The flushing of the proc dentries from the dcache is an optmization,
> and is not necessary for correctness.  Eventually cache pressure will
> cause the dentries to be freed even if no flushing happens.  Some
> light testing when I refactored the proc flushg[1] indicated that at
> least the memory footprint is easily measurable.
>
> An optimization that causes a performance problem due to a thundering
> herd of threads is no real optimization.
>
> Modify the code to only flush the /proc/<tgid>/ directory when all
> threads in a process are killed at once.  This continues to flush
> practically everything when the process is reaped as the threads live
> under /proc/<tgid>/task/<tid>.
>
> There is a rare possibility that a debugger will access /proc/<tid>/,
> which this change will no longer flush, but I believe such accesses
> are sufficiently rare to not be observed in practice.
>
> [1] 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc")
> Link: https://lkml.kernel.org/r/54091fc0-ca46-2186-97a8-d1f3c4f3877b@oracle.com


> Reported-by: Masahiro Yamada <masahiroy@kernel.org>

I did not report this.





> Reported-by: Matthew Wilcox <willy@infradead.org>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> ---


-- 
Best Regards
Masahiro Yamada

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-22  5:33         ` Masahiro Yamada
@ 2020-06-22 15:13           ` Eric W. Biederman
  0 siblings, 0 replies; 20+ messages in thread
From: Eric W. Biederman @ 2020-06-22 15:13 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: Junxiao Bi, Matthew Wilcox, Linux Kernel Mailing List,
	Linux FS-devel Mailing List, Matthew Wilcox, Srinivas Eeda,
	joe.jin

Masahiro Yamada <masahiroy@kernel.org> writes:

> On Fri, Jun 19, 2020 at 11:14 PM Eric W. Biederman
> <ebiederm@xmission.com> wrote:
>>
>>
>> Junxiao Bi <junxiao.bi@oracle.com> reported:
>> > When debugging some performance issue, i found that thousands of threads exit
>> > around same time could cause a severe spin lock contention on proc dentry
>> > "/proc/$parent_process_pid/task/", that's because threads needs to clean up
>> > their pid file from that dir when exit.
>>
>> Matthew Wilcox <willy@infradead.org> reported:
>> > We've looked at a few different ways of fixing this problem.
>>
>> The flushing of the proc dentries from the dcache is an optmization,
>> and is not necessary for correctness.  Eventually cache pressure will
>> cause the dentries to be freed even if no flushing happens.  Some
>> light testing when I refactored the proc flushg[1] indicated that at
>> least the memory footprint is easily measurable.
>>
>> An optimization that causes a performance problem due to a thundering
>> herd of threads is no real optimization.
>>
>> Modify the code to only flush the /proc/<tgid>/ directory when all
>> threads in a process are killed at once.  This continues to flush
>> practically everything when the process is reaped as the threads live
>> under /proc/<tgid>/task/<tid>.
>>
>> There is a rare possibility that a debugger will access /proc/<tid>/,
>> which this change will no longer flush, but I believe such accesses
>> are sufficiently rare to not be observed in practice.
>>
>> [1] 7bc3e6e55acf ("proc: Use a list of inodes to flush from proc")
>> Link: https://lkml.kernel.org/r/54091fc0-ca46-2186-97a8-d1f3c4f3877b@oracle.com
>
>
>> Reported-by: Masahiro Yamada <masahiroy@kernel.org>
>
> I did not report this.

Thank you for catching this.

I must have cut&pasted the wrong email address by mistake.

My apologies.

Eric

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-22  5:15                   ` Junxiao Bi
@ 2020-06-22 15:20                     ` Eric W. Biederman
  2020-06-22 15:48                       ` willy
  2020-06-22 17:16                       ` Junxiao Bi
  2020-06-23  0:47                     ` Matthew Wilcox
  1 sibling, 2 replies; 20+ messages in thread
From: Eric W. Biederman @ 2020-06-22 15:20 UTC (permalink / raw)
  To: Junxiao Bi
  Cc: Matthew Wilcox, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

Junxiao Bi <junxiao.bi@oracle.com> writes:

> On 6/20/20 9:27 AM, Matthew Wilcox wrote:
>
>> On Fri, Jun 19, 2020 at 05:42:45PM -0500, Eric W. Biederman wrote:
>>> Junxiao Bi <junxiao.bi@oracle.com> writes:
>>>> Still high lock contention. Collect the following hot path.
>>> A different location this time.
>>>
>>> I know of at least exit_signal and exit_notify that take thread wide
>>> locks, and it looks like exit_mm is another.  Those don't use the same
>>> locks as flushing proc.
>>>
>>>
>>> So I think you are simply seeing a result of the thundering herd of
>>> threads shutting down at once.  Given that thread shutdown is fundamentally
>>> a slow path there is only so much that can be done.
>>>
>>> If you are up for a project to working through this thundering herd I
>>> expect I can help some.  It will be a long process of cleaning up
>>> the entire thread exit process with an eye to performance.
>> Wengang had some tests which produced wall-clock values for this problem,
>> which I agree is more informative.
>>
>> I'm not entirely sure what the customer workload is that requires a
>> highly threaded workload to also shut down quickly.  To my mind, an
>> overall workload is normally composed of highly-threaded tasks that run
>> for a long time and only shut down rarely (thus performance of shutdown
>> is not important) and single-threaded tasks that run for a short time.
>
> The real workload is a Java application working in server-agent mode, issue
> happened in agent side, all it do is waiting works dispatching from server and
> execute. To execute one work, agent will start lots of short live threads, there
> could be a lot of threads exit same time if there were a lots of work to
> execute, the contention on the exit path caused a high %sys time which impacted
> other workload.

If I understand correctly, the Java VM is not exiting.  Just some of
it's threads.

That is a very different problem to deal with.  That are many
optimizations that are possible when _all_ of the threads are exiting
that are not possible when _many_ threads are exiting.

Do you know if it is simply the cpu time or if it is the lock contention
that is the problem?  If it is simply the cpu time we should consider if
some of the locks that can be highly contended should become mutexes.
Or perhaps something like Matthew's cpu pinning idea.

Eric



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-22 15:20                     ` Eric W. Biederman
@ 2020-06-22 15:48                       ` willy
  2020-08-17 12:19                         ` Eric W. Biederman
  2020-06-22 17:16                       ` Junxiao Bi
  1 sibling, 1 reply; 20+ messages in thread
From: willy @ 2020-06-22 15:48 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Junxiao Bi, Matthew Wilcox, linux-kernel, linux-fsdevel,
	Matthew Wilcox, Srinivas Eeda, joe.jin, Wengang Wang

On Mon, Jun 22, 2020 at 10:20:40AM -0500, Eric W. Biederman wrote:
> Junxiao Bi <junxiao.bi@oracle.com> writes:
> > On 6/20/20 9:27 AM, Matthew Wilcox wrote:
> >> On Fri, Jun 19, 2020 at 05:42:45PM -0500, Eric W. Biederman wrote:
> >>> Junxiao Bi <junxiao.bi@oracle.com> writes:
> >>>> Still high lock contention. Collect the following hot path.
> >>> A different location this time.
> >>>
> >>> I know of at least exit_signal and exit_notify that take thread wide
> >>> locks, and it looks like exit_mm is another.  Those don't use the same
> >>> locks as flushing proc.
> >>>
> >>>
> >>> So I think you are simply seeing a result of the thundering herd of
> >>> threads shutting down at once.  Given that thread shutdown is fundamentally
> >>> a slow path there is only so much that can be done.
> >>>
> >>> If you are up for a project to working through this thundering herd I
> >>> expect I can help some.  It will be a long process of cleaning up
> >>> the entire thread exit process with an eye to performance.
> >> Wengang had some tests which produced wall-clock values for this problem,
> >> which I agree is more informative.
> >>
> >> I'm not entirely sure what the customer workload is that requires a
> >> highly threaded workload to also shut down quickly.  To my mind, an
> >> overall workload is normally composed of highly-threaded tasks that run
> >> for a long time and only shut down rarely (thus performance of shutdown
> >> is not important) and single-threaded tasks that run for a short time.
> >
> > The real workload is a Java application working in server-agent mode, issue
> > happened in agent side, all it do is waiting works dispatching from server and
> > execute. To execute one work, agent will start lots of short live threads, there
> > could be a lot of threads exit same time if there were a lots of work to
> > execute, the contention on the exit path caused a high %sys time which impacted
> > other workload.
> 
> If I understand correctly, the Java VM is not exiting.  Just some of
> it's threads.
> 
> That is a very different problem to deal with.  That are many
> optimizations that are possible when _all_ of the threads are exiting
> that are not possible when _many_ threads are exiting.

Ah!  Now I get it.  This explains why the dput() lock contention was
so important.  A new thread starting would block on that lock as it
tried to create its new /proc/$pid/task/ directory.

Terminating thousands of threads but not the entire process isn't going
to hit many of the locks (eg exit_signal() and exit_mm() aren't going
to be called).  So we need a more sophisticated micro benchmark that is
continually starting threads and asking dozens-to-thousands of them to
stop at the same time.  Otherwise we'll try to fix lots of scalability
problems that our customer doesn't care about.

> Do you know if it is simply the cpu time or if it is the lock contention
> that is the problem?  If it is simply the cpu time we should consider if
> some of the locks that can be highly contended should become mutexes.
> Or perhaps something like Matthew's cpu pinning idea.

If we're not trying to optimise for the entire process going down, then
we definitely don't want my CPU pinning idea.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-22 15:20                     ` Eric W. Biederman
  2020-06-22 15:48                       ` willy
@ 2020-06-22 17:16                       ` Junxiao Bi
  1 sibling, 0 replies; 20+ messages in thread
From: Junxiao Bi @ 2020-06-22 17:16 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Matthew Wilcox, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

On 6/22/20 8:20 AM, ebiederm@xmission.com wrote:

> If I understand correctly, the Java VM is not exiting.  Just some of
> it's threads.
>
> That is a very different problem to deal with.  That are many
> optimizations that are possible when_all_  of the threads are exiting
> that are not possible when_many_  threads are exiting.
>
> Do you know if it is simply the cpu time or if it is the lock contention
> that is the problem?  If it is simply the cpu time we should consider if
> some of the locks that can be highly contended should become mutexes.
> Or perhaps something like Matthew's cpu pinning idea.

The problem is high %sys time.

Thanks,

Junxiao.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-22  5:15                   ` Junxiao Bi
  2020-06-22 15:20                     ` Eric W. Biederman
@ 2020-06-23  0:47                     ` Matthew Wilcox
  2020-06-25 22:11                       ` Junxiao Bi
  1 sibling, 1 reply; 20+ messages in thread
From: Matthew Wilcox @ 2020-06-23  0:47 UTC (permalink / raw)
  To: Junxiao Bi
  Cc: Eric W. Biederman, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

On Sun, Jun 21, 2020 at 10:15:39PM -0700, Junxiao Bi wrote:
> On 6/20/20 9:27 AM, Matthew Wilcox wrote:
> > On Fri, Jun 19, 2020 at 05:42:45PM -0500, Eric W. Biederman wrote:
> > > Junxiao Bi <junxiao.bi@oracle.com> writes:
> > > > Still high lock contention. Collect the following hot path.
> > > A different location this time.
> > > 
> > > I know of at least exit_signal and exit_notify that take thread wide
> > > locks, and it looks like exit_mm is another.  Those don't use the same
> > > locks as flushing proc.
> > > 
> > > 
> > > So I think you are simply seeing a result of the thundering herd of
> > > threads shutting down at once.  Given that thread shutdown is fundamentally
> > > a slow path there is only so much that can be done.
> > > 
> > > If you are up for a project to working through this thundering herd I
> > > expect I can help some.  It will be a long process of cleaning up
> > > the entire thread exit process with an eye to performance.
> > Wengang had some tests which produced wall-clock values for this problem,
> > which I agree is more informative.
> > 
> > I'm not entirely sure what the customer workload is that requires a
> > highly threaded workload to also shut down quickly.  To my mind, an
> > overall workload is normally composed of highly-threaded tasks that run
> > for a long time and only shut down rarely (thus performance of shutdown
> > is not important) and single-threaded tasks that run for a short time.
> 
> The real workload is a Java application working in server-agent mode, issue
> happened in agent side, all it do is waiting works dispatching from server
> and execute. To execute one work, agent will start lots of short live
> threads, there could be a lot of threads exit same time if there were a lots
> of work to execute, the contention on the exit path caused a high %sys time
> which impacted other workload.

How about this for a micro?  Executes in about ten seconds on my laptop.
You might need to tweak it a bit to get better timing on a server.

// gcc -pthread -O2 -g -W -Wall
#include <pthread.h>
#include <unistd.h>

void *worker(void *arg)
{
	int i = 0;
	int *p = arg;

	for (;;) {
		while (i < 1000 * 1000) {
			i += *p;
		}
		sleep(1);
	}
}

int main(int argc, char **argv)
{
	pthread_t threads[20][100];
	int i, j, one = 1;

	for (i = 0; i < 1000; i++) {
		for (j = 0; j < 100; j++)
			pthread_create(&threads[i % 20][j], NULL, worker, &one);
		if (i < 5)
			continue;
		for (j = 0; j < 100; j++)
			pthread_cancel(threads[(i - 5) %20][j]);
	}

	return 0;
}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-23  0:47                     ` Matthew Wilcox
@ 2020-06-25 22:11                       ` Junxiao Bi
  0 siblings, 0 replies; 20+ messages in thread
From: Junxiao Bi @ 2020-06-25 22:11 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Eric W. Biederman, linux-kernel, linux-fsdevel, Matthew Wilcox,
	Srinivas Eeda, joe.jin, Wengang Wang

On 6/22/20 5:47 PM, Matthew Wilcox wrote:

> On Sun, Jun 21, 2020 at 10:15:39PM -0700, Junxiao Bi wrote:
>> On 6/20/20 9:27 AM, Matthew Wilcox wrote:
>>> On Fri, Jun 19, 2020 at 05:42:45PM -0500, Eric W. Biederman wrote:
>>>> Junxiao Bi <junxiao.bi@oracle.com> writes:
>>>>> Still high lock contention. Collect the following hot path.
>>>> A different location this time.
>>>>
>>>> I know of at least exit_signal and exit_notify that take thread wide
>>>> locks, and it looks like exit_mm is another.  Those don't use the same
>>>> locks as flushing proc.
>>>>
>>>>
>>>> So I think you are simply seeing a result of the thundering herd of
>>>> threads shutting down at once.  Given that thread shutdown is fundamentally
>>>> a slow path there is only so much that can be done.
>>>>
>>>> If you are up for a project to working through this thundering herd I
>>>> expect I can help some.  It will be a long process of cleaning up
>>>> the entire thread exit process with an eye to performance.
>>> Wengang had some tests which produced wall-clock values for this problem,
>>> which I agree is more informative.
>>>
>>> I'm not entirely sure what the customer workload is that requires a
>>> highly threaded workload to also shut down quickly.  To my mind, an
>>> overall workload is normally composed of highly-threaded tasks that run
>>> for a long time and only shut down rarely (thus performance of shutdown
>>> is not important) and single-threaded tasks that run for a short time.
>> The real workload is a Java application working in server-agent mode, issue
>> happened in agent side, all it do is waiting works dispatching from server
>> and execute. To execute one work, agent will start lots of short live
>> threads, there could be a lot of threads exit same time if there were a lots
>> of work to execute, the contention on the exit path caused a high %sys time
>> which impacted other workload.
> How about this for a micro?  Executes in about ten seconds on my laptop.
> You might need to tweak it a bit to get better timing on a server.
>
> // gcc -pthread -O2 -g -W -Wall
> #include <pthread.h>
> #include <unistd.h>
>
> void *worker(void *arg)
> {
> 	int i = 0;
> 	int *p = arg;
>
> 	for (;;) {
> 		while (i < 1000 * 1000) {
> 			i += *p;
> 		}
> 		sleep(1);
> 	}
> }
>
> int main(int argc, char **argv)
> {
> 	pthread_t threads[20][100];

Tuning 100 to 1000 here and the following 2 loops.

Test it on 2-socket server with 104 cpu. Perf is similar on v5.7 and 
v5.7 with Eric's fix. The spin lock was shifted to spin lock in futex, 
so the fix didn't help.


     46.41%     0.11%  perf_test        [kernel.kallsyms] [k] 
entry_SYSCALL_64_after_hwframe
             |
              --46.30%--entry_SYSCALL_64_after_hwframe
                        |
                         --46.12%--do_syscall_64
                                   |
                                   |--30.47%--__x64_sys_futex
                                   |          |
                                   |           --30.45%--do_futex
                                   |                     |
                                   | |--18.04%--futex_wait
                                   |                     | |
                                   |                     | 
|--16.94%--futex_wait_setup
                                   |                     | |          |
                                   |                     | |           
--16.61%--_raw_spin_lock
                                   |                     | 
|                     |
                                   |                     | 
|                      --16.30%--native_queued_spin_lock_slowpath
                                   |                     | 
|                                |
                                   |                     | 
|                                 --0.81%--call_function_interrupt
                                   |                     | 
|                                           |
                                   |                     | | 
--0.79%--smp_call_function_interrupt
                                   |                     | 
|                                                      |
                                   |                     | | 
--0.62%--generic_smp_call_function_single_interrupt
                                   |                     | |
                                   | |           
--1.04%--futex_wait_queue_me
                                   | |                     |
                                   | |                      
--0.96%--schedule
                                   | |                                |
                                   | |                                 
--0.94%--__schedule
                                   | 
|                                           |
                                   | | --0.51%--pick_next_task_fair
                                   |                     |
                                   | --12.38%--futex_wake
                                   | |
                                   | |--11.00%--_raw_spin_lock
                                   | |          |
                                   | |           
--10.76%--native_queued_spin_lock_slowpath
                                   | |                     |
                                   | |                      
--0.55%--call_function_interrupt
                                   | |                                |
                                   | | --0.53%--smp_call_function_interrupt
                                   | |
|                                 --1.11%--wake_up_q
|                                           |
| --1.10%--try_to_wake_up
                                   |


Result of v5.7

=========

[root@jubi-bm-ol8 upstream]# time ./perf_test

real    0m4.850s
user    0m14.499s
sys    0m12.116s
[root@jubi-bm-ol8 upstream]# time ./perf_test

real    0m4.949s
user    0m14.285s
sys    0m18.408s
[root@jubi-bm-ol8 upstream]# time ./perf_test

real    0m4.885s
user    0m14.193s
sys    0m17.888s
[root@jubi-bm-ol8 upstream]# time ./perf_test

real    0m4.872s
user    0m14.451s
sys    0m18.717s
[root@jubi-bm-ol8 upstream]# uname -a
Linux jubi-bm-ol8 5.7.0-1700.20200601.el8uek.base.x86_64 #1 SMP Fri Jun 
19 07:41:06 PDT 2020 x86_64 x86_64 x86_64 GNU/Linux


Result of v5.7 with Eric's fix

=================

[root@jubi-bm-ol8 upstream]# time ./perf_test

real    0m4.889s
user    0m14.215s
sys    0m16.203s
[root@jubi-bm-ol8 upstream]# time ./perf_test

real    0m4.872s
user    0m14.431s
sys    0m17.737s
[root@jubi-bm-ol8 upstream]# time ./perf_test

real    0m4.908s
user    0m14.274s
sys    0m15.377s
[root@jubi-bm-ol8 upstream]# time ./perf_test

real    0m4.937s
user    0m14.632s
sys    0m16.255s
[root@jubi-bm-ol8 upstream]# uname -a
Linux jubi-bm-ol8 5.7.0-1700.20200601.el8uek.procfix.x86_64 #1 SMP Fri 
Jun 19 07:42:16 PDT 2020 x86_64 x86_64 x86_64 GNU/Linux

Thanks,

Junxiao.

> 	int i, j, one = 1;
>
> 	for (i = 0; i < 1000; i++) {
> 		for (j = 0; j < 100; j++)
> 			pthread_create(&threads[i % 20][j], NULL, worker, &one);
> 		if (i < 5)
> 			continue;
> 		for (j = 0; j < 100; j++)
> 			pthread_cancel(threads[(i - 5) %20][j]);
> 	}
>
> 	return 0;
> }

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries
  2020-06-22 15:48                       ` willy
@ 2020-08-17 12:19                         ` Eric W. Biederman
  0 siblings, 0 replies; 20+ messages in thread
From: Eric W. Biederman @ 2020-08-17 12:19 UTC (permalink / raw)
  To: willy
  Cc: Junxiao Bi, Matthew Wilcox, linux-kernel, linux-fsdevel,
	Matthew Wilcox, Srinivas Eeda, joe.jin, Wengang Wang

willy@casper.infradead.org writes:

> On Mon, Jun 22, 2020 at 10:20:40AM -0500, Eric W. Biederman wrote:
>> Junxiao Bi <junxiao.bi@oracle.com> writes:
>> > On 6/20/20 9:27 AM, Matthew Wilcox wrote:
>> >> On Fri, Jun 19, 2020 at 05:42:45PM -0500, Eric W. Biederman wrote:
>> >>> Junxiao Bi <junxiao.bi@oracle.com> writes:
>> >>>> Still high lock contention. Collect the following hot path.
>> >>> A different location this time.
>> >>>
>> >>> I know of at least exit_signal and exit_notify that take thread wide
>> >>> locks, and it looks like exit_mm is another.  Those don't use the same
>> >>> locks as flushing proc.
>> >>>
>> >>>
>> >>> So I think you are simply seeing a result of the thundering herd of
>> >>> threads shutting down at once.  Given that thread shutdown is fundamentally
>> >>> a slow path there is only so much that can be done.
>> >>>
>> >>> If you are up for a project to working through this thundering herd I
>> >>> expect I can help some.  It will be a long process of cleaning up
>> >>> the entire thread exit process with an eye to performance.
>> >> Wengang had some tests which produced wall-clock values for this problem,
>> >> which I agree is more informative.
>> >>
>> >> I'm not entirely sure what the customer workload is that requires a
>> >> highly threaded workload to also shut down quickly.  To my mind, an
>> >> overall workload is normally composed of highly-threaded tasks that run
>> >> for a long time and only shut down rarely (thus performance of shutdown
>> >> is not important) and single-threaded tasks that run for a short time.
>> >
>> > The real workload is a Java application working in server-agent mode, issue
>> > happened in agent side, all it do is waiting works dispatching from server and
>> > execute. To execute one work, agent will start lots of short live threads, there
>> > could be a lot of threads exit same time if there were a lots of work to
>> > execute, the contention on the exit path caused a high %sys time which impacted
>> > other workload.
>> 
>> If I understand correctly, the Java VM is not exiting.  Just some of
>> it's threads.
>> 
>> That is a very different problem to deal with.  That are many
>> optimizations that are possible when _all_ of the threads are exiting
>> that are not possible when _many_ threads are exiting.
>
> Ah!  Now I get it.  This explains why the dput() lock contention was
> so important.  A new thread starting would block on that lock as it
> tried to create its new /proc/$pid/task/ directory.
>
> Terminating thousands of threads but not the entire process isn't going
> to hit many of the locks (eg exit_signal() and exit_mm() aren't going
> to be called).  So we need a more sophisticated micro benchmark that is
> continually starting threads and asking dozens-to-thousands of them to
> stop at the same time.  Otherwise we'll try to fix lots of scalability
> problems that our customer doesn't care about.

Has anyone come up with a more sophisticated microbenchmark or otherwise
made any progress in tracking this down farther?

Eric

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2020-08-17 12:23 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-18 22:17 severe proc dentry lock contention Junxiao Bi
2020-06-18 23:39 ` Matthew Wilcox
2020-06-19  0:02   ` Eric W. Biederman
2020-06-19  0:27     ` Junxiao Bi
2020-06-19  3:30       ` Eric W. Biederman
2020-06-19 14:09       ` [PATCH] proc: Avoid a thundering herd of threads freeing proc dentries Eric W. Biederman
2020-06-19 15:56         ` Junxiao Bi
2020-06-19 17:24           ` Eric W. Biederman
2020-06-19 21:56             ` Junxiao Bi
2020-06-19 22:42               ` Eric W. Biederman
2020-06-20 16:27                 ` Matthew Wilcox
2020-06-22  5:15                   ` Junxiao Bi
2020-06-22 15:20                     ` Eric W. Biederman
2020-06-22 15:48                       ` willy
2020-08-17 12:19                         ` Eric W. Biederman
2020-06-22 17:16                       ` Junxiao Bi
2020-06-23  0:47                     ` Matthew Wilcox
2020-06-25 22:11                       ` Junxiao Bi
2020-06-22  5:33         ` Masahiro Yamada
2020-06-22 15:13           ` Eric W. Biederman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).