LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [RFC][PATCH] page reclaim throttle take2 
@ 2008-02-26  2:32 KOSAKI Motohiro
  2008-02-26 21:18 ` Peter Zijlstra
  0 siblings, 1 reply; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-02-26  2:32 UTC (permalink / raw)
  To: linux-kernel, linux-mm, KAMEZAWA Hiroyuki, Balbir Singh,
	Rik van Riel, Lee Schermerhorn, Nick Piggin
  Cc: kosaki.motohiro

Hi

this patch is page reclaim improvement.

o previous discussion:
	http://marc.info/?l=linux-mm&m=120339997125985&w=2

o test method
  $ ./hackbench 120 process 1000

o test result (average of 5 times measure)

limit   hackbench     sys-time     major-fault   max-spent-time 
        time(s)       (s)                        in shrink_zone()
                                                 (jiffies)
--------------------------------------------------------------------
3       42.06         378.70       5336          6306


o reason why restrict parallel reclaim 3 task per zone

we tested various parameter.
  - restrict 1 is best major fault.
    but worst max spent time.
  - restrict 3 is best max spent reclaim time and hackbench result.

I think "restrict 3" cause most good experience.


limit      hackbench     sys-time     major-fault   max-spent-time 
           time(s)       (s)                        in shrink_zone()
                                                    (jiffies)
--------------------------------------------------------------------
1          48.50         283.89       3690          9057
2          44.43         350.94       5245          7159
3          42.06         378.70       5336          6306
4          48.84         401.87       5474          6669
unlimited  282.30        1248.47      29026          -



Please any comments!



Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: Balbir Singh <balbir@linux.vnet.ibm.com>
CC: Rik van Riel <riel@redhat.com>
CC: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
CC: Nick Piggin <npiggin@suse.de>


---
 include/linux/mmzone.h |    3 +
 mm/page_alloc.c        |    4 +
 mm/vmscan.c            |  101 ++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 99 insertions(+), 9 deletions(-)

Index: b/include/linux/mmzone.h
===================================================================
--- a/include/linux/mmzone.h	2008-02-25 21:37:49.000000000 +0900
+++ b/include/linux/mmzone.h	2008-02-26 10:12:12.000000000 +0900
@@ -335,6 +335,9 @@ struct zone {
 	unsigned long		spanned_pages;	/* total size, including holes */
 	unsigned long		present_pages;	/* amount of memory (excluding holes) */
 
+
+	atomic_t		nr_reclaimers;
+	wait_queue_head_t	reclaim_throttle_waitq;
 	/*
 	 * rarely used fields:
 	 */
Index: b/mm/page_alloc.c
===================================================================
--- a/mm/page_alloc.c	2008-02-25 21:37:49.000000000 +0900
+++ b/mm/page_alloc.c	2008-02-26 10:12:12.000000000 +0900
@@ -3466,6 +3466,10 @@ static void __meminit free_area_init_cor
 		zone->nr_scan_inactive = 0;
 		zap_zone_vm_stats(zone);
 		zone->flags = 0;
+
+		zone->nr_reclaimers = ATOMIC_INIT(0);
+		init_waitqueue_head(&zone->reclaim_throttle_waitq);
+
 		if (!size)
 			continue;
 
Index: b/mm/vmscan.c
===================================================================
--- a/mm/vmscan.c	2008-02-25 21:37:49.000000000 +0900
+++ b/mm/vmscan.c	2008-02-26 10:59:38.000000000 +0900
@@ -1252,6 +1252,55 @@ static unsigned long shrink_zone(int pri
 	return nr_reclaimed;
 }
 
+
+#define RECLAIM_LIMIT (3)
+
+static int do_shrink_zone_throttled(int priority, struct zone *zone,
+				    struct scan_control *sc,
+				    unsigned long *ret_reclaimed)
+{
+	u64 start_time;
+	int ret = 0;
+
+	start_time = jiffies_64;
+
+	wait_event(zone->reclaim_throttle_waitq,
+		   atomic_add_unless(&zone->nr_reclaimers, 1, RECLAIM_LIMIT));
+
+	/* more reclaim until needed? */
+	if (scan_global_lru(sc) &&
+	    !(current->flags & PF_KSWAPD) &&
+	    time_after64(jiffies, start_time + HZ/10)) {
+		if (zone_watermark_ok(zone, sc->order, 4*zone->pages_high,
+				      MAX_NR_ZONES-1, 0)) {
+			ret = -EAGAIN;
+			goto out;
+		}
+	}
+
+	*ret_reclaimed += shrink_zone(priority, zone, sc);
+
+out:
+	atomic_dec(&zone->nr_reclaimers);
+	wake_up_all(&zone->reclaim_throttle_waitq);
+
+	return ret;
+}
+
+static unsigned long shrink_zone_throttled(int priority, struct zone *zone,
+					   struct scan_control *sc)
+{
+	unsigned long nr_reclaimed = 0;
+	int ret;
+
+	ret = do_shrink_zone_throttled(priority, zone, sc, &nr_reclaimed);
+
+	if (ret == -EAGAIN)
+		nr_reclaimed = 1;
+
+	return nr_reclaimed;
+}
+
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -1268,12 +1317,11 @@ static unsigned long shrink_zone(int pri
  * If a zone is deemed to be full of pinned pages then just give it a light
  * scan then give up on it.
  */
-static unsigned long shrink_zones(int priority, struct zone **zones,
-					struct scan_control *sc)
+static int shrink_zones(int priority, struct zone **zones,
+			struct scan_control *sc, unsigned long *ret_reclaimed)
 {
-	unsigned long nr_reclaimed = 0;
 	int i;
-
+	int ret;
 
 	sc->all_unreclaimable = 1;
 	for (i = 0; zones[i] != NULL; i++) {
@@ -1304,10 +1352,15 @@ static unsigned long shrink_zones(int pr
 							priority);
 		}
 
-		nr_reclaimed += shrink_zone(priority, zone, sc);
+		ret = do_shrink_zone_throttled(priority, zone, sc,
+					       ret_reclaimed);
+		if (ret == -EAGAIN)
+			goto out;
 	}
+	ret = 0;
 
-	return nr_reclaimed;
+out:
+	return ret;
 }
  
 /*
@@ -1333,6 +1386,9 @@ static unsigned long do_try_to_free_page
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 	unsigned long lru_pages = 0;
 	int i;
+	unsigned long start_time = jiffies;
+	unsigned long last_check_time = jiffies;
+	int err;
 
 	if (scan_global_lru(sc))
 		count_vm_event(ALLOCSTALL);
@@ -1356,7 +1412,12 @@ static unsigned long do_try_to_free_page
 		sc->nr_io_pages = 0;
 		if (!priority)
 			disable_swap_token();
-		nr_reclaimed += shrink_zones(priority, zones, sc);
+		err = shrink_zones(priority, zones, sc, &nr_reclaimed);
+		if (err == -EAGAIN) {
+			ret = 1;
+			goto out;
+		}
+
 		/*
 		 * Don't shrink slabs when reclaiming memory from
 		 * over limit cgroups
@@ -1389,8 +1450,28 @@ static unsigned long do_try_to_free_page
 
 		/* Take a nap, wait for some writeback to complete */
 		if (sc->nr_scanned && priority < DEF_PRIORITY - 2 &&
-				sc->nr_io_pages > sc->swap_cluster_max)
+		    sc->nr_io_pages > sc->swap_cluster_max) {
 			congestion_wait(WRITE, HZ/10);
+
+		}
+
+		if (scan_global_lru(sc) &&
+		    time_after(jiffies, start_time+HZ) &&
+		    time_after(jiffies, last_check_time+HZ/10)) {
+			last_check_time = jiffies;
+
+			/* more reclaim until needed? */
+			for (i = 0; zones[i] != NULL; i++) {
+				struct zone *zone = zones[i];
+
+				if (zone_watermark_ok(zone, sc->order,
+						      4*zone->pages_high,
+						      zone_idx(zones[0]), 0)) {
+					ret = 1;
+					goto out;
+				}
+			}
+		}
 	}
 	/* top priority shrink_caches still had more to do? don't OOM, then */
 	if (!sc->all_unreclaimable && scan_global_lru(sc))
@@ -1588,7 +1669,9 @@ loop_again:
 			 */
 			if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
 						end_zone, 0))
-				nr_reclaimed += shrink_zone(priority, zone, &sc);
+				nr_reclaimed += shrink_zone_throttled(priority,
+								      zone,
+								      &sc);
 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
 						lru_pages);





^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-26  2:32 [RFC][PATCH] page reclaim throttle take2 KOSAKI Motohiro
@ 2008-02-26 21:18 ` Peter Zijlstra
  2008-02-27  0:50   ` KAMEZAWA Hiroyuki
  2008-02-27  4:26   ` KOSAKI Motohiro
  0 siblings, 2 replies; 26+ messages in thread
From: Peter Zijlstra @ 2008-02-26 21:18 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-kernel, linux-mm, KAMEZAWA Hiroyuki, Balbir Singh,
	Rik van Riel, Lee Schermerhorn, Nick Piggin


On Tue, 2008-02-26 at 11:32 +0900, KOSAKI Motohiro wrote:

> Index: b/include/linux/mmzone.h
> ===================================================================
> --- a/include/linux/mmzone.h	2008-02-25 21:37:49.000000000 +0900
> +++ b/include/linux/mmzone.h	2008-02-26 10:12:12.000000000 +0900
> @@ -335,6 +335,9 @@ struct zone {
>  	unsigned long		spanned_pages;	/* total size, including holes */
>  	unsigned long		present_pages;	/* amount of memory (excluding holes) */
>  
> +
> +	atomic_t		nr_reclaimers;
> +	wait_queue_head_t	reclaim_throttle_waitq;
>  	/*
>  	 * rarely used fields:
>  	 */

Small nit, that extra blank line seems at the wrong end of the text
block :-)

> Index: b/mm/vmscan.c
> ===================================================================
> --- a/mm/vmscan.c	2008-02-25 21:37:49.000000000 +0900
> +++ b/mm/vmscan.c	2008-02-26 10:59:38.000000000 +0900
> @@ -1252,6 +1252,55 @@ static unsigned long shrink_zone(int pri
>  	return nr_reclaimed;
>  }
>  
> +
> +#define RECLAIM_LIMIT (3)
> +
> +static int do_shrink_zone_throttled(int priority, struct zone *zone,
> +				    struct scan_control *sc,
> +				    unsigned long *ret_reclaimed)
> +{
> +	u64 start_time;
> +	int ret = 0;
> +
> +	start_time = jiffies_64;
> +
> +	wait_event(zone->reclaim_throttle_waitq,
> +		   atomic_add_unless(&zone->nr_reclaimers, 1, RECLAIM_LIMIT));
> +
> +	/* more reclaim until needed? */
> +	if (scan_global_lru(sc) &&
> +	    !(current->flags & PF_KSWAPD) &&
> +	    time_after64(jiffies, start_time + HZ/10)) {
> +		if (zone_watermark_ok(zone, sc->order, 4*zone->pages_high,
> +				      MAX_NR_ZONES-1, 0)) {
> +			ret = -EAGAIN;
> +			goto out;
> +		}
> +	}
> +
> +	*ret_reclaimed += shrink_zone(priority, zone, sc);
> +
> +out:
> +	atomic_dec(&zone->nr_reclaimers);
> +	wake_up_all(&zone->reclaim_throttle_waitq);
> +
> +	return ret;
> +}

Would it be possible - and worthwhile - to make this FIFO fair?


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-26 21:18 ` Peter Zijlstra
@ 2008-02-27  0:50   ` KAMEZAWA Hiroyuki
  2008-02-27  4:26   ` KOSAKI Motohiro
  1 sibling, 0 replies; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-02-27  0:50 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: KOSAKI Motohiro, linux-kernel, linux-mm, Balbir Singh,
	Rik van Riel, Lee Schermerhorn, Nick Piggin

On Tue, 26 Feb 2008 22:18:38 +0100
Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> > +out:
> > +	atomic_dec(&zone->nr_reclaimers);
> > +	wake_up_all(&zone->reclaim_throttle_waitq);
> > +
> > +	return ret;
> > +}
> 
> Would it be possible - and worthwhile - to make this FIFO fair?
> 
I think it doesn't make sense for fairness.

IMHO, this functionality is an unfair one in nature. While someone is
reclaiming pages, other processes can get a newly reclaimed page without
calling try_to_free_page.

For high-priority processes, 

1. avoiding diving into try_to_free_pages if it's congested.
2. just waiting for that someone relcaim pages and grab it ASAP

maybe good for quick work. 

Thanks,
-Kame


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-26 21:18 ` Peter Zijlstra
  2008-02-27  0:50   ` KAMEZAWA Hiroyuki
@ 2008-02-27  4:26   ` KOSAKI Motohiro
  2008-02-27  4:27     ` Balbir Singh
  1 sibling, 1 reply; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-02-27  4:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: kosaki.motohiro, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Balbir Singh, Rik van Riel, Lee Schermerhorn, Nick Piggin

Hi Peter,

> > +
> > +	atomic_t		nr_reclaimers;
> > +	wait_queue_head_t	reclaim_throttle_waitq;
> >  	/*
> >  	 * rarely used fields:
> >  	 */
> 
> Small nit, that extra blank line seems at the wrong end of the text
> block :-)

Agghhh, sorry ;-)
I'll fix at next post.

> > +out:
> > +	atomic_dec(&zone->nr_reclaimers);
> > +	wake_up_all(&zone->reclaim_throttle_waitq);
> > +
> > +	return ret;
> > +}
> 
> Would it be possible - and worthwhile - to make this FIFO fair?

Hmmm
may be, we don't need perfectly fair.
because try_to_free_page() is unfair mechanism.

but I will test use wake_up() instead wake_up_all().
it makes so so fair order if no performance regression happend.

Thanks very useful comment.


- kosaki




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  4:26   ` KOSAKI Motohiro
@ 2008-02-27  4:27     ` Balbir Singh
  2008-02-27  4:45       ` KOSAKI Motohiro
  0 siblings, 1 reply; 26+ messages in thread
From: Balbir Singh @ 2008-02-27  4:27 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Peter Zijlstra, linux-kernel, linux-mm, KAMEZAWA Hiroyuki,
	Rik van Riel, Lee Schermerhorn, Nick Piggin

KOSAKI Motohiro wrote:
> Hi Peter,
> 
>>> +
>>> +	atomic_t		nr_reclaimers;
>>> +	wait_queue_head_t	reclaim_throttle_waitq;
>>>  	/*
>>>  	 * rarely used fields:
>>>  	 */
>> Small nit, that extra blank line seems at the wrong end of the text
>> block :-)
> 
> Agghhh, sorry ;-)
> I'll fix at next post.
> 
>>> +out:
>>> +	atomic_dec(&zone->nr_reclaimers);
>>> +	wake_up_all(&zone->reclaim_throttle_waitq);
>>> +
>>> +	return ret;
>>> +}
>> Would it be possible - and worthwhile - to make this FIFO fair?
> 
> Hmmm
> may be, we don't need perfectly fair.
> because try_to_free_page() is unfair mechanism.
> 
> but I will test use wake_up() instead wake_up_all().
> it makes so so fair order if no performance regression happend.
> 
> Thanks very useful comment.

One more thing, I would request you to add default heuristics (number of
reclaimers), based on the number of cpus in the system. Letting people tuning it
is fine, but defaults should be related to number of cpus, nodes and zones on
the system. Zones can be reaped in parallel per node and cpus allow threads to
run in parallel. So please use that to come up with good defaults, instead of a
number like "3".

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  4:27     ` Balbir Singh
@ 2008-02-27  4:45       ` KOSAKI Motohiro
  2008-02-27  5:00         ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-02-27  4:45 UTC (permalink / raw)
  To: balbir
  Cc: kosaki.motohiro, Peter Zijlstra, linux-kernel, linux-mm,
	KAMEZAWA Hiroyuki, Rik van Riel, Lee Schermerhorn, Nick Piggin

Hi

> One more thing, I would request you to add default heuristics (number of
> reclaimers), based on the number of cpus in the system. Letting people tuning it
> is fine, but defaults should be related to number of cpus, nodes and zones on
> the system. Zones can be reaped in parallel per node and cpus allow threads to
> run in parallel. So please use that to come up with good defaults, instead of a
> number like "3".

I don't think so.
all modern many cpu machine stand on NUMA.
it mean following,
 - if cpu increases, then zone increases, too.

if default value increase by #cpus, lock contension dramatically increase
on large numa.

Have I overlooked anything?


and, (but) i afraid to 3 is too small value.
if you have another test result on large machine, please show me.

- kosaki



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  4:45       ` KOSAKI Motohiro
@ 2008-02-27  5:00         ` KAMEZAWA Hiroyuki
  2008-02-27  5:04           ` KOSAKI Motohiro
  0 siblings, 1 reply; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-02-27  5:00 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: balbir, Peter Zijlstra, linux-kernel, linux-mm, Rik van Riel,
	Lee Schermerhorn, Nick Piggin

On Wed, 27 Feb 2008 13:45:18 +0900
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> Hi
> 
> > One more thing, I would request you to add default heuristics (number of
> > reclaimers), based on the number of cpus in the system. Letting people tuning it
> > is fine, but defaults should be related to number of cpus, nodes and zones on
> > the system. Zones can be reaped in parallel per node and cpus allow threads to
> > run in parallel. So please use that to come up with good defaults, instead of a
> > number like "3".
> 
> I don't think so.
> all modern many cpu machine stand on NUMA.
> it mean following,
>  - if cpu increases, then zone increases, too.
> 
> if default value increase by #cpus, lock contension dramatically increase
> on large numa.
> 
> Have I overlooked anything?
> 
> 
How about adding something like..
== 
CONFIG_SIMULTANEOUS_PAGE_RECLAIMERS 
int
default 3
depends on DEBUG
help
  This value determines the number of threads which can do page reclaim
  in a zone simultaneously. If this is too big, performance under heavy memory
  pressure will decrease.
  If unsure, use default.
==

Then, you can get performance reports from people interested in this
feature in test cycle.

Thanks,
-Kame



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:04           ` KOSAKI Motohiro
@ 2008-02-27  5:03             ` Balbir Singh
  2008-02-27  5:13               ` KOSAKI Motohiro
  2008-02-27  5:19               ` David Rientjes
  0 siblings, 2 replies; 26+ messages in thread
From: Balbir Singh @ 2008-02-27  5:03 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel, linux-mm,
	Rik van Riel, Lee Schermerhorn, Nick Piggin

KOSAKI Motohiro wrote:
> Hi
> 
>>> I don't think so.
>>> all modern many cpu machine stand on NUMA.
>>> it mean following,
>>>  - if cpu increases, then zone increases, too.
>>>
>>> if default value increase by #cpus, lock contension dramatically increase
>>> on large numa.
>>>
>>> Have I overlooked anything?
>>>
>> How about adding something like..
>> == 
>> CONFIG_SIMULTANEOUS_PAGE_RECLAIMERS 
>> int
>> default 3
>> depends on DEBUG
>> help
>>   This value determines the number of threads which can do page reclaim
>>   in a zone simultaneously. If this is too big, performance under heavy memory
>>   pressure will decrease.
>>   If unsure, use default.
>> ==
>>
>> Then, you can get performance reports from people interested in this
>> feature in test cycle.
> 
> hm, intersting.
> but sysctl parameter is more better, i think.
> 
> OK, I'll add it at next post.

I think sysctl should be interesting. The config option provides good
documentation, but it is static in nature (requires reboot to change). I wish we
could have the best of both worlds.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:00         ` KAMEZAWA Hiroyuki
@ 2008-02-27  5:04           ` KOSAKI Motohiro
  2008-02-27  5:03             ` Balbir Singh
  0 siblings, 1 reply; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-02-27  5:04 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: kosaki.motohiro, balbir, Peter Zijlstra, linux-kernel, linux-mm,
	Rik van Riel, Lee Schermerhorn, Nick Piggin

Hi

> > I don't think so.
> > all modern many cpu machine stand on NUMA.
> > it mean following,
> >  - if cpu increases, then zone increases, too.
> > 
> > if default value increase by #cpus, lock contension dramatically increase
> > on large numa.
> > 
> > Have I overlooked anything?
> > 
> How about adding something like..
> == 
> CONFIG_SIMULTANEOUS_PAGE_RECLAIMERS 
> int
> default 3
> depends on DEBUG
> help
>   This value determines the number of threads which can do page reclaim
>   in a zone simultaneously. If this is too big, performance under heavy memory
>   pressure will decrease.
>   If unsure, use default.
> ==
> 
> Then, you can get performance reports from people interested in this
> feature in test cycle.

hm, intersting.
but sysctl parameter is more better, i think.

OK, I'll add it at next post.




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:03             ` Balbir Singh
@ 2008-02-27  5:13               ` KOSAKI Motohiro
  2008-02-27  5:19               ` David Rientjes
  1 sibling, 0 replies; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-02-27  5:13 UTC (permalink / raw)
  To: balbir
  Cc: kosaki.motohiro, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

Hi balbir-san,

> >> CONFIG_SIMULTANEOUS_PAGE_RECLAIMERS 
> >> int
> >> default 3
> >> depends on DEBUG
> >> help
> >>   This value determines the number of threads which can do page reclaim
> >>   in a zone simultaneously. If this is too big, performance under heavy memory
> >>   pressure will decrease.
> >>   If unsure, use default.
> >> ==
> >>
> >> Then, you can get performance reports from people interested in this
> >> feature in test cycle.
> > 
> > hm, intersting.
> > but sysctl parameter is more better, i think.
> > 
> > OK, I'll add it at next post.
> 
> I think sysctl should be interesting. The config option provides good
> documentation, but it is static in nature (requires reboot to change). I wish we
> could have the best of both worlds.

OK, I obey your opinion.


- kosaki



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:03             ` Balbir Singh
  2008-02-27  5:13               ` KOSAKI Motohiro
@ 2008-02-27  5:19               ` David Rientjes
  2008-02-27  5:33                 ` KOSAKI Motohiro
  1 sibling, 1 reply; 26+ messages in thread
From: David Rientjes @ 2008-02-27  5:19 UTC (permalink / raw)
  To: Balbir Singh
  Cc: KOSAKI Motohiro, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

On Wed, 27 Feb 2008, Balbir Singh wrote:

> >> CONFIG_SIMULTANEOUS_PAGE_RECLAIMERS 
> >> int
> >> default 3
> >> depends on DEBUG
> >> help
> >>   This value determines the number of threads which can do page reclaim
> >>   in a zone simultaneously. If this is too big, performance under heavy memory
> >>   pressure will decrease.
> >>   If unsure, use default.
> >> ==
> >>
> >> Then, you can get performance reports from people interested in this
> >> feature in test cycle.
> > 
> > hm, intersting.
> > but sysctl parameter is more better, i think.
> > 
> > OK, I'll add it at next post.
> 
> I think sysctl should be interesting. The config option provides good
> documentation, but it is static in nature (requires reboot to change). I wish we
> could have the best of both worlds.
> 

I disagree, the config option is indeed static but so is the NUMA topology 
of the machine.  It represents the maximum number of page reclaim threads 
that should be allowed for that specific topology; a maximum should not 
need to be redefined with yet another sysctl and should remain independent 
of various workloads.

However, I would recommend adding the word "MAX" to the config option.

		David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:19               ` David Rientjes
@ 2008-02-27  5:33                 ` KOSAKI Motohiro
  2008-02-27  5:47                   ` David Rientjes
  0 siblings, 1 reply; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-02-27  5:33 UTC (permalink / raw)
  To: David Rientjes
  Cc: kosaki.motohiro, Balbir Singh, KAMEZAWA Hiroyuki, Peter Zijlstra,
	linux-kernel, linux-mm, Rik van Riel, Lee Schermerhorn,
	Nick Piggin

Hi David,

> I disagree, the config option is indeed static but so is the NUMA topology 
> of the machine.  It represents the maximum number of page reclaim threads 
> that should be allowed for that specific topology; a maximum should not 
> need to be redefined with yet another sysctl and should remain independent 
> of various workloads.

ok.

> However, I would recommend adding the word "MAX" to the config option.

MAX_PARALLEL_RECLAIM_TASK is good word?

- kosaki


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:33                 ` KOSAKI Motohiro
@ 2008-02-27  5:47                   ` David Rientjes
  2008-02-27  5:48                     ` Balbir Singh
  0 siblings, 1 reply; 26+ messages in thread
From: David Rientjes @ 2008-02-27  5:47 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Balbir Singh, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

On Wed, 27 Feb 2008, KOSAKI Motohiro wrote:

> > I disagree, the config option is indeed static but so is the NUMA topology 
> > of the machine.  It represents the maximum number of page reclaim threads 
> > that should be allowed for that specific topology; a maximum should not 
> > need to be redefined with yet another sysctl and should remain independent 
> > of various workloads.
> 
> ok.
> 
> > However, I would recommend adding the word "MAX" to the config option.
> 
> MAX_PARALLEL_RECLAIM_TASK is good word?
> 

I'd use _THREAD instead of _TASK, but I'd also wait for Balbir's input 
because perhaps I missed something in my original analysis that this 
config option represents only the maximum number of concurrent reclaim 
threads and other heuristics are used in addition to this that determine 
the exact number of threads depending on VM strain.

		David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:47                   ` David Rientjes
@ 2008-02-27  5:48                     ` Balbir Singh
  2008-02-27  6:09                       ` David Rientjes
  2008-02-27  6:52                       ` KOSAKI Motohiro
  0 siblings, 2 replies; 26+ messages in thread
From: Balbir Singh @ 2008-02-27  5:48 UTC (permalink / raw)
  To: David Rientjes
  Cc: KOSAKI Motohiro, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

David Rientjes wrote:
> On Wed, 27 Feb 2008, KOSAKI Motohiro wrote:
> 
>>> I disagree, the config option is indeed static but so is the NUMA topology 
>>> of the machine.  It represents the maximum number of page reclaim threads 
>>> that should be allowed for that specific topology; a maximum should not 
>>> need to be redefined with yet another sysctl and should remain independent 
>>> of various workloads.
>> ok.
>>
>>> However, I would recommend adding the word "MAX" to the config option.
>> MAX_PARALLEL_RECLAIM_TASK is good word?
>>
> 
> I'd use _THREAD instead of _TASK, but I'd also wait for Balbir's input 
> because perhaps I missed something in my original analysis that this 
> config option represents only the maximum number of concurrent reclaim 
> threads and other heuristics are used in addition to this that determine 
> the exact number of threads depending on VM strain.
> 


Things are changing, with memory hot-add remove, CPU hotplug , the topology can
change and is no longer static. One can create fake NUMA nodes on the fly using
a boot option as well.

Since we're talking of parallel reclaims, I think it's a function of CPUs and
Nodes. I'd rather keep it as a sysctl with a good default value based on the
topology. If we end up getting it wrong, the system administrator has a choice.
That is better than expecting him/her to recompile the kernel and boot that. A
sysctl does not create problems either w.r.t changing the number of threads, no
hard to solve race-conditions - it is fairly straight forward




-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:48                     ` Balbir Singh
@ 2008-02-27  6:09                       ` David Rientjes
  2008-02-27  7:10                         ` KOSAKI Motohiro
  2008-02-27  7:59                         ` Balbir Singh
  2008-02-27  6:52                       ` KOSAKI Motohiro
  1 sibling, 2 replies; 26+ messages in thread
From: David Rientjes @ 2008-02-27  6:09 UTC (permalink / raw)
  To: Balbir Singh
  Cc: KOSAKI Motohiro, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

On Wed, 27 Feb 2008, Balbir Singh wrote:

> Since we're talking of parallel reclaims, I think it's a function of CPUs and
> Nodes. I'd rather keep it as a sysctl with a good default value based on the
> topology. If we end up getting it wrong, the system administrator has a choice.
> That is better than expecting him/her to recompile the kernel and boot that. A
> sysctl does not create problems either w.r.t changing the number of threads, no
> hard to solve race-conditions - it is fairly straight forward
> 

We lack node hotplug, so the dependence on the number of system nodes in 
the equation is static and can easily be defined at compile-time.

I agree that the maximum number of parallel reclaim threads should be a 
function of cpus, so you can easily make it that by adding callback 
functions for cpu hotplug events.

Perhaps a better alternative than creating a set of heuristics and setting 
a user-defined maximum on the number of concurrent reclaim threads is to 
configure the number of threads to be used for each online cpu called 
CONFIG_NUM_RECLAIM_THREADS_PER_CPU.  This solves the lock contention 
problem if configured properly that was mentioned earlier.

Adding yet another sysctl for this functionality seems unnecessary, unless 
it is attempting to address other VM problems where page reclaim needs to 
be throttled when it is being stressed.  Those issues need to be addressed 
directly, in my opinion, instead of attempting to workaround it by 
limiting the number of concurrent reclaim threads.

		David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  5:48                     ` Balbir Singh
  2008-02-27  6:09                       ` David Rientjes
@ 2008-02-27  6:52                       ` KOSAKI Motohiro
  1 sibling, 0 replies; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-02-27  6:52 UTC (permalink / raw)
  To: balbir
  Cc: kosaki.motohiro, David Rientjes, KAMEZAWA Hiroyuki,
	Peter Zijlstra, linux-kernel, linux-mm, Rik van Riel,
	Lee Schermerhorn, Nick Piggin

Hi

> Things are changing, with memory hot-add remove, CPU hotplug , the topology can
> change and is no longer static. One can create fake NUMA nodes on the fly using
> a boot option as well.

agreed.

> Since we're talking of parallel reclaims, I think it's a function of CPUs and
> Nodes. I'd rather keep it as a sysctl with a good default value based on the
> topology. If we end up getting it wrong, the system administrator has a choice.
> That is better than expecting him/her to recompile the kernel and boot that. A
> sysctl does not create problems either w.r.t changing the number of threads, no
> hard to solve race-conditions - it is fairly straight forward

sorry, I don't understand yet.
I think my patch is already function of CPUs and Nodes.
per zone limit indicate propotional #cpus and #nodes.

please tell me the topology that per zone limit doesn't works so good.

I think boot option and sysctl should be used only while -mm
for get various feedback.
end up, we should select more better default, and remove sysctl.


- kosaki



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  6:09                       ` David Rientjes
@ 2008-02-27  7:10                         ` KOSAKI Motohiro
  2008-02-27  7:19                           ` David Rientjes
  2008-02-27  7:59                         ` Balbir Singh
  1 sibling, 1 reply; 26+ messages in thread
From: KOSAKI Motohiro @ 2008-02-27  7:10 UTC (permalink / raw)
  To: David Rientjes
  Cc: kosaki.motohiro, Balbir Singh, KAMEZAWA Hiroyuki, Peter Zijlstra,
	linux-kernel, linux-mm, Rik van Riel, Lee Schermerhorn,
	Nick Piggin

Hi

> Adding yet another sysctl for this functionality seems unnecessary, unless 
> it is attempting to address other VM problems where page reclaim needs to 
> be throttled when it is being stressed.  Those issues need to be addressed 
> directly, in my opinion, instead of attempting to workaround it by 
> limiting the number of concurrent reclaim threads.

hm,

could you post another patch?
I hope avoid implementless discussion.
and I hope compare by benchmark result.


-kosaki


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  7:10                         ` KOSAKI Motohiro
@ 2008-02-27  7:19                           ` David Rientjes
  2008-02-27  7:51                             ` KAMEZAWA Hiroyuki
  2008-02-27 15:30                             ` Rik van Riel
  0 siblings, 2 replies; 26+ messages in thread
From: David Rientjes @ 2008-02-27  7:19 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Balbir Singh, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

On Wed, 27 Feb 2008, KOSAKI Motohiro wrote:

> > Adding yet another sysctl for this functionality seems unnecessary, unless 
> > it is attempting to address other VM problems where page reclaim needs to 
> > be throttled when it is being stressed.  Those issues need to be addressed 
> > directly, in my opinion, instead of attempting to workaround it by 
> > limiting the number of concurrent reclaim threads.
> 
> hm,
> 
> could you post another patch?
> I hope avoid implementless discussion.
> and I hope compare by benchmark result.
> 

My suggestion is merely to make the number of concurrent page reclaim 
threads be a function of how many online cpus there are.  Threads can 
easily be added or removed for cpu hotplug events by callback functions.

That's different than allowing users to change the number of threads with 
yet another sysctl.  Unless there are situations that can be presented 
where tuning the number of threads is advantageous to reduce lock 
contention, for example, and not simply working around other VM problems, 
then I see no point for an additional sysctl.

So my suggestion is to implement this in terms of 
CONFIG_NUM_RECLAIM_THREADS_PER_CPU and add callback functions for cpu 
hotplug events that add or remove this number of threads.

		David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  7:19                           ` David Rientjes
@ 2008-02-27  7:51                             ` KAMEZAWA Hiroyuki
  2008-02-27  7:56                               ` David Rientjes
  2008-02-27 15:30                             ` Rik van Riel
  1 sibling, 1 reply; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-02-27  7:51 UTC (permalink / raw)
  To: David Rientjes
  Cc: KOSAKI Motohiro, Balbir Singh, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

On Tue, 26 Feb 2008 23:19:08 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> My suggestion is merely to make the number of concurrent page reclaim 
> threads be a function of how many online cpus there are.  Threads can 
> easily be added or removed for cpu hotplug events by callback functions.
> 
> That's different than allowing users to change the number of threads with 
> yet another sysctl.  Unless there are situations that can be presented 
> where tuning the number of threads is advantageous to reduce lock 
> contention, for example, and not simply working around other VM problems, 
> then I see no point for an additional sysctl.
> 
> So my suggestion is to implement this in terms of 
> CONFIG_NUM_RECLAIM_THREADS_PER_CPU and add callback functions for cpu 
> hotplug events that add or remove this number of threads.
> 

Hmm, but kswapd, which is main worker of page reclaiming, is per-node.
And reclaim is done based on zone.
per-zone/per-node throttling seems to make sense.

I know his environment has 4cpus per node but throttle to 3 was the best
number in his measurement. Then it seems num-per-cpu is excessive.
(At least, ratio(%) is better.)
When zone-reclaiming is improved to be scale well, we'll have to change
this throttle.

BTW, could someone try his patch on x86_64/ppc ? 
I'd like to see how contention is heavy on other machines.

Thanks,
-kame
 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  7:51                             ` KAMEZAWA Hiroyuki
@ 2008-02-27  7:56                               ` David Rientjes
  2008-02-27  8:09                                 ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 26+ messages in thread
From: David Rientjes @ 2008-02-27  7:56 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: KOSAKI Motohiro, Balbir Singh, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

On Wed, 27 Feb 2008, KAMEZAWA Hiroyuki wrote:

> Hmm, but kswapd, which is main worker of page reclaiming, is per-node.
> And reclaim is done based on zone.
> per-zone/per-node throttling seems to make sense.
> 

That's another argument for not introducing the sysctl; the number of 
nodes and zones are a static property of the machine that cannot change 
without a reboot (numa=fake, mem=, introducing movable zones, etc).  We 
don't have node hotplug that can suddenly introduce additional zones from 
which to reclaim.

My point was that there doesn't appear to be any use case for tuning this 
via a sysctl that isn't simply attempting to workaround some other reclaim 
problem when the VM is stressed.  If that's agreed upon, then deciding 
between a config option that is either per-cpu or per-node should be based 
on the benchmarks that you've run.  At this time, it appears that per-node 
is the more advantageous.

> I know his environment has 4cpus per node but throttle to 3 was the best
> number in his measurement. Then it seems num-per-cpu is excessive.
> (At least, ratio(%) is better.)

That seems to indicate that the NUMA topology is more important than lock 
contention for the reclaim throttle.

		David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  6:09                       ` David Rientjes
  2008-02-27  7:10                         ` KOSAKI Motohiro
@ 2008-02-27  7:59                         ` Balbir Singh
  2008-02-27  8:47                           ` David Rientjes
  1 sibling, 1 reply; 26+ messages in thread
From: Balbir Singh @ 2008-02-27  7:59 UTC (permalink / raw)
  To: David Rientjes
  Cc: KOSAKI Motohiro, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

David Rientjes wrote:
> On Wed, 27 Feb 2008, Balbir Singh wrote:
> 
>> Since we're talking of parallel reclaims, I think it's a function of CPUs and
>> Nodes. I'd rather keep it as a sysctl with a good default value based on the
>> topology. If we end up getting it wrong, the system administrator has a choice.
>> That is better than expecting him/her to recompile the kernel and boot that. A
>> sysctl does not create problems either w.r.t changing the number of threads, no
>> hard to solve race-conditions - it is fairly straight forward
>>
> 
> We lack node hotplug, so the dependence on the number of system nodes in 
> the equation is static and can easily be defined at compile-time.
> 

Let's forget node hotplug for the moment, but what if someone

1. Changes the machine configuration and adds more nodes, do we expect the
kernel to be recompiled? Or is it easier to update /etc/sysctl.conf?
2. Uses fake NUMA nodes and increases/decreases the number of nodes across
reboots. Should the kernel be recompiled?

> I agree that the maximum number of parallel reclaim threads should be a 
> function of cpus, so you can easily make it that by adding callback 
> functions for cpu hotplug events.
> 
> Perhaps a better alternative than creating a set of heuristics and setting 
> a user-defined maximum on the number of concurrent reclaim threads is to 
> configure the number of threads to be used for each online cpu called 
> CONFIG_NUM_RECLAIM_THREADS_PER_CPU.  This solves the lock contention 
> problem if configured properly that was mentioned earlier.
> 

I am afraid it doesn't. Consider as you scale number of CPU's with the same
amount of memory, we'll end up making the reclaim problem worse.

> Adding yet another sysctl for this functionality seems unnecessary, unless 
> it is attempting to address other VM problems where page reclaim needs to 
> be throttled when it is being stressed.  Those issues need to be addressed 
> directly, in my opinion, instead of attempting to workaround it by 
> limiting the number of concurrent reclaim threads.

We are providing a solution with a good default value, allowing the
administrator to change them when our defaults don't work well.

-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  7:56                               ` David Rientjes
@ 2008-02-27  8:09                                 ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 26+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-02-27  8:09 UTC (permalink / raw)
  To: David Rientjes
  Cc: KOSAKI Motohiro, Balbir Singh, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

On Tue, 26 Feb 2008 23:56:39 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:

> On Wed, 27 Feb 2008, KAMEZAWA Hiroyuki wrote:
> 
> > Hmm, but kswapd, which is main worker of page reclaiming, is per-node.
> > And reclaim is done based on zone.
> > per-zone/per-node throttling seems to make sense.
> > 
> 
> That's another argument for not introducing the sysctl; the number of 
> nodes and zones are a static property of the machine that cannot change 
> without a reboot (numa=fake, mem=, introducing movable zones, etc).  We 
> don't have node hotplug that can suddenly introduce additional zones from 
> which to reclaim.

Hmm, do you know there is already zone-hotplug ? ;)
(Means, onlining new memory in new zone increase the # of zones.
 Now, in our system, possible-node turns to be online nodes.)

> My point was that there doesn't appear to be any use case for tuning this 
> via a sysctl that isn't simply attempting to workaround some other reclaim 
> problem when the VM is stressed.  If that's agreed upon, then deciding 
> between a config option that is either per-cpu or per-node should be based 
> on the benchmarks that you've run.  At this time, it appears that per-node 
> is the more advantageous.
>
I agree that what is the best is based on benchmark.
I like per-node, now.
I believe there will be some change when RvR's spilit-LRU patches are applied.
 
> > I know his environment has 4cpus per node but throttle to 3 was the best
> > number in his measurement. Then it seems num-per-cpu is excessive.
> > (At least, ratio(%) is better.)
> 
> That seems to indicate that the NUMA topology is more important than lock 
> contention for the reclaim throttle.
> 
I hear that there is also I/O bottle-neck for page reclaiming, at last.


Thanks,
-Kame


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  7:59                         ` Balbir Singh
@ 2008-02-27  8:47                           ` David Rientjes
  2008-02-27  9:01                             ` Balbir Singh
  0 siblings, 1 reply; 26+ messages in thread
From: David Rientjes @ 2008-02-27  8:47 UTC (permalink / raw)
  To: Balbir Singh
  Cc: KOSAKI Motohiro, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

On Wed, 27 Feb 2008, Balbir Singh wrote:

> Let's forget node hotplug for the moment, but what if someone
> 
> 1. Changes the machine configuration and adds more nodes, do we expect the
> kernel to be recompiled? Or is it easier to update /etc/sysctl.conf?
> 2. Uses fake NUMA nodes and increases/decreases the number of nodes across
> reboots. Should the kernel be recompiled?
> 

That is why the proposal was made to make this a static configuration 
option, such as CONFIG_NUM_RECLAIM_THREADS_PER_NODE, that will handle both 
situations.

> I am afraid it doesn't. Consider as you scale number of CPU's with the same
> amount of memory, we'll end up making the reclaim problem worse.
> 

The benchmark that have been posted suggest that memory locality is more 
important than lock contention, as I've already mentioned.

		David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  8:47                           ` David Rientjes
@ 2008-02-27  9:01                             ` Balbir Singh
  2008-02-27  9:44                               ` Peter Zijlstra
  0 siblings, 1 reply; 26+ messages in thread
From: Balbir Singh @ 2008-02-27  9:01 UTC (permalink / raw)
  To: David Rientjes
  Cc: KOSAKI Motohiro, KAMEZAWA Hiroyuki, Peter Zijlstra, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin

David Rientjes wrote:
> On Wed, 27 Feb 2008, Balbir Singh wrote:
> 
>> Let's forget node hotplug for the moment, but what if someone
>>
>> 1. Changes the machine configuration and adds more nodes, do we expect the
>> kernel to be recompiled? Or is it easier to update /etc/sysctl.conf?
>> 2. Uses fake NUMA nodes and increases/decreases the number of nodes across
>> reboots. Should the kernel be recompiled?
>>
> 
> That is why the proposal was made to make this a static configuration 
> option, such as CONFIG_NUM_RECLAIM_THREADS_PER_NODE, that will handle both 
> situations.
> 

You mentioned CONFIG_NUM_RECLAIM_THREADS_PER_CPU and not
CONFIG_NUM_RECLAIM_THREADS_PER_NODE. The advantage with syscalls is that even if
we get the thing wrong, the system administrator has an alternative. Please look
through the existing sysctl's and you'll see what I mean. What is wrong with
providing the flexibility that comes with sysctl? We cannot possibly think of
all situations and come up with the right answer for a heuristic. Why not come
up with a default and let everyone use what works for them?


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  9:01                             ` Balbir Singh
@ 2008-02-27  9:44                               ` Peter Zijlstra
  0 siblings, 0 replies; 26+ messages in thread
From: Peter Zijlstra @ 2008-02-27  9:44 UTC (permalink / raw)
  To: balbir
  Cc: David Rientjes, KOSAKI Motohiro, KAMEZAWA Hiroyuki, linux-kernel,
	linux-mm, Rik van Riel, Lee Schermerhorn, Nick Piggin


On Wed, 2008-02-27 at 14:31 +0530, Balbir Singh wrote:

> You mentioned CONFIG_NUM_RECLAIM_THREADS_PER_CPU and not
> CONFIG_NUM_RECLAIM_THREADS_PER_NODE. The advantage with syscalls is that even if
> we get the thing wrong, the system administrator has an alternative. Please look
> through the existing sysctl's and you'll see what I mean. What is wrong with
> providing the flexibility that comes with sysctl? We cannot possibly think of
> all situations and come up with the right answer for a heuristic. Why not come
> up with a default and let everyone use what works for them?

I agree with Balbir, just turn it into a sysctl, its easy enough to do,
and those who need it will thank you for it instead of curse you for
hard coding it.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC][PATCH] page reclaim throttle take2
  2008-02-27  7:19                           ` David Rientjes
  2008-02-27  7:51                             ` KAMEZAWA Hiroyuki
@ 2008-02-27 15:30                             ` Rik van Riel
  1 sibling, 0 replies; 26+ messages in thread
From: Rik van Riel @ 2008-02-27 15:30 UTC (permalink / raw)
  To: David Rientjes
  Cc: KOSAKI Motohiro, Balbir Singh, KAMEZAWA Hiroyuki, Peter Zijlstra,
	linux-kernel, linux-mm, Lee Schermerhorn, Nick Piggin

On Tue, 26 Feb 2008 23:19:08 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:

> My suggestion is merely to make the number of concurrent page reclaim 
> threads be a function of how many online cpus there are.

The more CPUs there are, the more lock contention you want?

Somehow that seems backwards :)

-- 
All rights reversed.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2008-02-27 15:33 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-02-26  2:32 [RFC][PATCH] page reclaim throttle take2 KOSAKI Motohiro
2008-02-26 21:18 ` Peter Zijlstra
2008-02-27  0:50   ` KAMEZAWA Hiroyuki
2008-02-27  4:26   ` KOSAKI Motohiro
2008-02-27  4:27     ` Balbir Singh
2008-02-27  4:45       ` KOSAKI Motohiro
2008-02-27  5:00         ` KAMEZAWA Hiroyuki
2008-02-27  5:04           ` KOSAKI Motohiro
2008-02-27  5:03             ` Balbir Singh
2008-02-27  5:13               ` KOSAKI Motohiro
2008-02-27  5:19               ` David Rientjes
2008-02-27  5:33                 ` KOSAKI Motohiro
2008-02-27  5:47                   ` David Rientjes
2008-02-27  5:48                     ` Balbir Singh
2008-02-27  6:09                       ` David Rientjes
2008-02-27  7:10                         ` KOSAKI Motohiro
2008-02-27  7:19                           ` David Rientjes
2008-02-27  7:51                             ` KAMEZAWA Hiroyuki
2008-02-27  7:56                               ` David Rientjes
2008-02-27  8:09                                 ` KAMEZAWA Hiroyuki
2008-02-27 15:30                             ` Rik van Riel
2008-02-27  7:59                         ` Balbir Singh
2008-02-27  8:47                           ` David Rientjes
2008-02-27  9:01                             ` Balbir Singh
2008-02-27  9:44                               ` Peter Zijlstra
2008-02-27  6:52                       ` KOSAKI Motohiro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).