LKML Archive on lore.kernel.org help / color / mirror / Atom feed
* [PATCH] Provide an interface to limit total page cache. @ 2007-01-15 9:39 Roy Huang 2007-01-15 11:01 ` Balbir Singh ` (3 more replies) 0 siblings, 4 replies; 9+ messages in thread From: Roy Huang @ 2007-01-15 9:39 UTC (permalink / raw) To: linux-kernel; +Cc: aubreylee, nickpiggin, torvalds A patch provide a interface to limit total page cache in /proc/sys/vm/pagecache_ratio. The default value is 90 percent. Any feedback is appreciated. -Roy diff -urp a/include/linux/pagemap.h b/include/linux/pagemap.h --- a/include/linux/pagemap.h 2006-11-30 05:57:37.000000000 +0800 +++ b/include/linux/pagemap.h 2007-01-15 17:03:09.000000000 +0800 @@ -12,6 +12,12 @@ #include <asm/uaccess.h> #include <linux/gfp.h> +extern int pagecache_ratio; +extern long pagecache_limit; + +int pagecache_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); + /* * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page * allocation mode flags. diff -urp a/include/linux/sysctl.h b/include/linux/sysctl.h --- a/include/linux/sysctl.h 2007-01-15 17:18:46.000000000 +0800 +++ b/include/linux/sysctl.h 2007-01-15 17:03:09.000000000 +0800 @@ -202,6 +202,7 @@ enum VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ + VM_PAGECACHE_RATIO=36, /* Percent memory is used as page cache */ }; diff -urp a/kernel/sysctl.c b/kernel/sysctl.c --- a/kernel/sysctl.c 2007-01-15 17:18:46.000000000 +0800 +++ b/kernel/sysctl.c 2007-01-15 17:03:09.000000000 +0800 @@ -1035,6 +1035,15 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, #endif + { + .ctl_name = VM_PAGECACHE_RATIO, + .procname = "pagecache_ratio", + .data = &pagecache_ratio, + .maxlen = sizeof(pagecache_ratio), + .mode = 0644, + .proc_handler = &pagecache_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + }, { .ctl_name = 0 } }; diff -urp a/mm/filemap.c b/mm/filemap.c --- a/mm/filemap.c 2007-01-15 17:18:46.000000000 +0800 +++ b/mm/filemap.c 2007-01-15 17:03:09.000000000 +0800 @@ -30,6 +30,7 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/cpuset.h> +#include <linux/sysctl.h> #include "filemap.h" #include "internal.h" @@ -108,6 +109,48 @@ generic_file_direct_IO(int rw, struct ki */ /* + * Start release pagecache (via kswapd) at the percentage. + */ +int pagecache_ratio __read_mostly = 90; + +long pagecache_limit = 0; + +int setup_pagecache_limit(void) +{ + pagecache_limit = pagecache_ratio * nr_free_pagecache_pages() / 100; + return 0; +} + +int pagecache_ratio_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + proc_dointvec_minmax(table, write, file, buffer, length, ppos); + setup_pagecache_limit(); + return 0; +} + +static inline int balance_pagecache(void) +{ + if (global_page_state(NR_FILE_PAGES) > pagecache_limit) { + int nid, j; + pg_data_t *pgdat; + struct zone *zone; + + for_each_online_node(nid) { + pgdat = NODE_DATA(nid); + for (j = 0; j < MAX_NR_ZONES; j++) { + zone = pgdat->node_zones + j; + wakeup_kswapd(zone, 0); + } + } + } + + return 0; +} + +module_init(setup_pagecache_limit) + +/* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage * is safe. The caller must hold a write_lock on the mapping's tree_lock. @@ -1085,6 +1128,8 @@ out: page_cache_release(cached_page); if (filp) file_accessed(filp); + + balance_pagecache(); } EXPORT_SYMBOL(do_generic_mapping_read); @@ -2212,6 +2257,8 @@ zero_length_segment: status = filemap_write_and_wait(mapping); pagevec_lru_add(&lru_pvec); + balance_pagecache(); + return written ? written : status; } EXPORT_SYMBOL(generic_file_buffered_write); diff -urp a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c 2007-01-15 17:18:46.000000000 +0800 +++ b/mm/vmscan.c 2007-01-15 17:03:09.000000000 +0800 @@ -1316,6 +1316,7 @@ static int kswapd(void *p) order = 0; for ( ; ; ) { unsigned long new_order; + long over_limit; try_to_freeze(); @@ -1335,6 +1336,9 @@ static int kswapd(void *p) finish_wait(&pgdat->kswapd_wait, &wait); balance_pgdat(pgdat, order); + over_limit = global_page_state(NR_FILE_PAGES) - pagecache_limit; + if (over_limit > 0) + shrink_all_memory(over_limit); } return 0; } @@ -1350,8 +1354,10 @@ void wakeup_kswapd(struct zone *zone, in return; pgdat = zone->zone_pgdat; - if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) - return; + if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) { + if (global_page_state(NR_FILE_PAGES) < pagecache_limit) + return; + } if (pgdat->kswapd_max_order < order) pgdat->kswapd_max_order = order; if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) @@ -1361,7 +1367,6 @@ void wakeup_kswapd(struct zone *zone, in wake_up_interruptible(&pgdat->kswapd_wait); } -#ifdef CONFIG_PM /* * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages * from LRU lists system-wide, for given pass and priority, and returns the @@ -1510,7 +1515,6 @@ out: return ret; } -#endif /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Provide an interface to limit total page cache. 2007-01-15 9:39 [PATCH] Provide an interface to limit total page cache Roy Huang @ 2007-01-15 11:01 ` Balbir Singh 2007-01-16 2:34 ` Roy Huang 2007-01-15 11:57 ` Vaidyanathan Srinivasan ` (2 subsequent siblings) 3 siblings, 1 reply; 9+ messages in thread From: Balbir Singh @ 2007-01-15 11:01 UTC (permalink / raw) To: Roy Huang; +Cc: linux-kernel, aubreylee, nickpiggin, torvalds On 1/15/07, Roy Huang <royhuang9@gmail.com> wrote: > A patch provide a interface to limit total page cache in > /proc/sys/vm/pagecache_ratio. The default value is 90 percent. Any > feedback is appreciated. > [snip] wakeup_kswapd and shrink_all_memory use swappiness to determine what to reclaim (mapped pages or page cache). This patch does not ensure that only page cache is reclaimed/limited. If the swappiness value is high, mapped pages will be hit. One could get similar functionality by implementing resource management. Resource management splits tasks into groups and does management of resources for the groups rather than the whole system. Such a facility will come with a resource controller for memory (split into finer grain rss/page cache/mlock'ed memory, etc), one for cpu, etc. Balbir ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Provide an interface to limit total page cache. 2007-01-15 11:01 ` Balbir Singh @ 2007-01-16 2:34 ` Roy Huang 2007-01-16 9:57 ` Balbir Singh 0 siblings, 1 reply; 9+ messages in thread From: Roy Huang @ 2007-01-16 2:34 UTC (permalink / raw) To: balbir; +Cc: linux-kernel, aubreylee, nickpiggin, torvalds Hi Balbir, Thanks for your comment. On 1/15/07, Balbir Singh <balbir@in.ibm.com> wrote: > wakeup_kswapd and shrink_all_memory use swappiness to determine what to reclaim > (mapped pages or page cache). This patch does not ensure that only > page cache is > reclaimed/limited. If the swappiness value is high, mapped pages will be hit. > You are right, it is possible to release mapped pages. It can be avoided by add a field in "struct scan_control" to determine whether mapped pages will be released. > One could get similar functionality by implementing resource management. > > Resource management splits tasks into groups and does management of > resources for the > groups rather than the whole system. Such a facility will come with a > resource controller for > memory (split into finer grain rss/page cache/mlock'ed memory, etc), > one for cpu, etc. I s there any more information in detail about resource controller? Even there is a resource controller for tasks, all memory is also possbile to be eaten up by page cache. > > Balbir > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Provide an interface to limit total page cache. 2007-01-16 2:34 ` Roy Huang @ 2007-01-16 9:57 ` Balbir Singh 0 siblings, 0 replies; 9+ messages in thread From: Balbir Singh @ 2007-01-16 9:57 UTC (permalink / raw) To: Roy Huang; +Cc: linux-kernel, aubreylee, nickpiggin, torvalds Roy Huang wrote: > Hi Balbir, > > Thanks for your comment. > > On 1/15/07, Balbir Singh <balbir@in.ibm.com> wrote: > >> wakeup_kswapd and shrink_all_memory use swappiness to determine what to reclaim >> (mapped pages or page cache). This patch does not ensure that only >> page cache is >> reclaimed/limited. If the swappiness value is high, mapped pages will be hit. >> > You are right, it is possible to release mapped pages. It can be > avoided by add a field in "struct scan_control" to determine whether > mapped pages will be released. > Yes that could be done. I have been trying to figure out if there is a good reason why the LRU is common for both mapped and pagecache. Does it make sense to split them up? I am still digging through lkml archives to see if I can find something. >> One could get similar functionality by implementing resource management. >> >> Resource management splits tasks into groups and does management of >> resources for the >> groups rather than the whole system. Such a facility will come with a >> resource controller for >> memory (split into finer grain rss/page cache/mlock'ed memory, etc), >> one for cpu, etc. > I s there any more information in detail about resource controller? > Even there is a resource controller for tasks, all memory is also > possbile to be eaten up by page cache. Yes, please see the discussions on lkml on resource management, ckrm, beancounters and containers. http://lwn.net/Articles/206697/ RFC for memory controller, might be a good starting point -- Balbir Singh, Linux Technology Center, IBM Software Labs ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Provide an interface to limit total page cache. 2007-01-15 9:39 [PATCH] Provide an interface to limit total page cache Roy Huang 2007-01-15 11:01 ` Balbir Singh @ 2007-01-15 11:57 ` Vaidyanathan Srinivasan 2007-01-16 2:40 ` Roy Huang 2007-01-18 7:56 ` Eric W. Biederman 2007-01-18 14:00 ` Pavel Machek 3 siblings, 1 reply; 9+ messages in thread From: Vaidyanathan Srinivasan @ 2007-01-15 11:57 UTC (permalink / raw) To: Roy Huang; +Cc: linux-kernel, aubreylee, nickpiggin, torvalds Roy Huang wrote: > A patch provide a interface to limit total page cache in > /proc/sys/vm/pagecache_ratio. The default value is 90 percent. Any > feedback is appreciated. [snip] I tried to run your patch on PPC64 SMP machine, unfortunately kswapd crashes the kernel when the pagecache limit is exceeded! ->dd if=/dev/zero of=/tmp/foo bs=1M count=1200 cpu 0x0: Vector: 300 (Data Access) at [c0000000012d7ad0] pc: c0000000000976ac: .kswapd+0x3a4/0x4f0 lr: c0000000000976ac: .kswapd+0x3a4/0x4f0 sp: c0000000012d7d50 msr: 8000000000009032 dar: 0 dsisr: 42000000 current = 0xc00000000fed7040 paca = 0xc00000000063fb80 pid = 134, comm = kswapd0 ------------[ cut here ]------------ enter ? for help [c0000000012d7ee0] c000000000069150 .kthread+0x124/0x174 [c0000000012d7f90] c0000000000247b4 .kernel_thread+0x4c/0x68 0:mon> Steps to recreate fail: # sync # echo 1 > /proc/sys/vm/drop_caches MemTotal: 1014584 kB MemFree: 905536 kB Buffers: 3232 kB Cached: 57628 kB SwapCached: 0 kB Active: 47664 kB Inactive: 33160 kB SwapTotal: 1526164 kB SwapFree: 1526164 kB Dirty: 108 kB Writeback: 0 kB AnonPages: 19976 kB Mapped: 15084 kB Slab: 19724 kB SReclaimable: 8536 kB SUnreclaim: 11188 kB PageTables: 972 kB NFS_Unstable: 0 kB Bounce: 0 kB CommitLimit: 2033456 kB Committed_AS: 87884 kB VmallocTotal: 8589934592 kB VmallocUsed: 2440 kB VmallocChunk: 8589932152 kB HugePages_Total: 0 HugePages_Free: 0 HugePages_Rsvd: 0 Hugepagesize: 16384 kB # echo 50 > /proc/sys/vm/pagecache_ratio # dd if=/dev/zero of=/tmp/foo bs=1M count=1200 Basically fill pagecache with overlimit dirty file pages and check if the reclaim happened and the limit was not exceeded. --Vaidy ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Provide an interface to limit total page cache. 2007-01-15 11:57 ` Vaidyanathan Srinivasan @ 2007-01-16 2:40 ` Roy Huang 2007-01-17 14:55 ` Vaidyanathan Srinivasan 0 siblings, 1 reply; 9+ messages in thread From: Roy Huang @ 2007-01-16 2:40 UTC (permalink / raw) To: Vaidyanathan Srinivasan; +Cc: linux-kernel, aubreylee, nickpiggin, torvalds The possible cause is a bug in kswapd thread, or shrink_all_memory cannot be called in kswapd thread. On 1/15/07, Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> wrote: > > Roy Huang wrote: > > A patch provide a interface to limit total page cache in > > /proc/sys/vm/pagecache_ratio. The default value is 90 percent. Any > > feedback is appreciated. > > [snip] > > I tried to run your patch on PPC64 SMP machine, unfortunately kswapd > crashes the kernel when the pagecache limit is exceeded! > > ->dd if=/dev/zero of=/tmp/foo bs=1M count=1200 > cpu 0x0: Vector: 300 (Data Access) at [c0000000012d7ad0] > pc: c0000000000976ac: .kswapd+0x3a4/0x4f0 > lr: c0000000000976ac: .kswapd+0x3a4/0x4f0 > sp: c0000000012d7d50 > msr: 8000000000009032 > dar: 0 > dsisr: 42000000 > current = 0xc00000000fed7040 > paca = 0xc00000000063fb80 > pid = 134, comm = kswapd0 > ------------[ cut here ]------------ > enter ? for help > [c0000000012d7ee0] c000000000069150 .kthread+0x124/0x174 > [c0000000012d7f90] c0000000000247b4 .kernel_thread+0x4c/0x68 > 0:mon> > > Steps to recreate fail: > > # sync > # echo 1 > /proc/sys/vm/drop_caches > MemTotal: 1014584 kB > MemFree: 905536 kB > Buffers: 3232 kB > Cached: 57628 kB > SwapCached: 0 kB > Active: 47664 kB > Inactive: 33160 kB > SwapTotal: 1526164 kB > SwapFree: 1526164 kB > Dirty: 108 kB > Writeback: 0 kB > AnonPages: 19976 kB > Mapped: 15084 kB > Slab: 19724 kB > SReclaimable: 8536 kB > SUnreclaim: 11188 kB > PageTables: 972 kB > NFS_Unstable: 0 kB > Bounce: 0 kB > CommitLimit: 2033456 kB > Committed_AS: 87884 kB > VmallocTotal: 8589934592 kB > VmallocUsed: 2440 kB > VmallocChunk: 8589932152 kB > HugePages_Total: 0 > HugePages_Free: 0 > HugePages_Rsvd: 0 > Hugepagesize: 16384 kB > > # echo 50 > /proc/sys/vm/pagecache_ratio > # dd if=/dev/zero of=/tmp/foo bs=1M count=1200 > > Basically fill pagecache with overlimit dirty file pages and check > if the reclaim happened and the limit was not exceeded. > > --Vaidy > > > > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Provide an interface to limit total page cache. 2007-01-16 2:40 ` Roy Huang @ 2007-01-17 14:55 ` Vaidyanathan Srinivasan 0 siblings, 0 replies; 9+ messages in thread From: Vaidyanathan Srinivasan @ 2007-01-17 14:55 UTC (permalink / raw) To: Roy Huang Cc: Vaidyanathan Srinivasan, linux-kernel, aubreylee, nickpiggin, torvalds Hi Roy, I have added a different pagecache reclaim logic around your sysctl interface. This would ensure that only pagecache pages are reclaimed if the limit is exceeded. --Vaidy Pagecache pages in memory can be limited to a percentage of total RAM using this patch. New sysctl entry /proc/sys/vm/pagecache_ratio has been added that holds the total percentage of RAM that the user wants as pagecache. The default percentage is 90. Depending on the work load, any percentage value can be set to derive optimum overall performance. Minimum is 5 and max is 100. balance_pagecache() routine is called on file backed access and the current pagecache_limit is checked against utilisation. If the limit is exceeded, then shrink_all_pagecache_memory() is called that will walk the LRU list and remove unmapped pagecache pages. New scancontrol fields have been added to make decisions in shrink_page_list() and shrink_active_list(). Pages counted under pagecache limit are file pages that are not mapped. Shared memory is mapped and not counted in the limit. Test: echo 40 > /proc/sys/vm/pagecache_ratio (that is around 400MB on a 1GB RAM machine) dd if=/dev/zero of=/tmp/foo bs=1M count=1024 cat /proc/meminfo The "Cached: xxx" count should hit the set limit and not consume all available memory. Any feedback is appreciated. Signed-off-by: Roy Huang <royhuang9@gmail.com> Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> --- include/linux/pagemap.h | 6 +++ include/linux/sysctl.h | 1 kernel/sysctl.c | 9 +++++ mm/filemap.c | 65 +++++++++++++++++++++++++++++++++++++++ mm/vmscan.c | 79 +++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 156 insertions(+), 4 deletions(-) --- linux-2.6.20-rc5.orig/include/linux/pagemap.h +++ linux-2.6.20-rc5/include/linux/pagemap.h @@ -12,6 +12,12 @@ #include <asm/uaccess.h> #include <linux/gfp.h> +extern int pagecache_ratio; +extern unsigned int pagecache_limit; + +extern int pagecache_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); + /* * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page * allocation mode flags. --- linux-2.6.20-rc5.orig/include/linux/sysctl.h +++ linux-2.6.20-rc5/include/linux/sysctl.h @@ -202,6 +202,7 @@ enum VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ + VM_PAGECACHE_RATIO=36, /* Percent memory is used as page cache */ }; --- linux-2.6.20-rc5.orig/kernel/sysctl.c +++ linux-2.6.20-rc5/kernel/sysctl.c @@ -1035,6 +1035,15 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, #endif + { + .ctl_name = VM_PAGECACHE_RATIO, + .procname = "pagecache_ratio", + .data = &pagecache_ratio, + .maxlen = sizeof(pagecache_ratio), + .mode = 0644, + .proc_handler = &pagecache_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + }, { .ctl_name = 0 } }; --- linux-2.6.20-rc5.orig/mm/filemap.c +++ linux-2.6.20-rc5/mm/filemap.c @@ -30,6 +30,7 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/cpuset.h> +#include <linux/sysctl.h> #include "filemap.h" #include "internal.h" @@ -108,6 +109,66 @@ generic_file_direct_IO(int rw, struct ki */ /* + * Start release pagecache (via kswapd) at the percentage. + */ +int pagecache_ratio __read_mostly = 90; + +unsigned int pagecache_limit = 0; + +#define PAGECACHE_RECLAIM_THRESHOLD 64 /* Call reclaim after exceeding + the limit by this threshold */ + +int setup_pagecache_limit(void) +{ + if (pagecache_ratio > 100) + pagecache_ratio = 100; + if (pagecache_ratio < 5) + pagecache_ratio = 5; + pagecache_limit = pagecache_ratio * nr_free_pagecache_pages() / 100; + return 0; +} + +int pagecache_ratio_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + proc_dointvec_minmax(table, write, file, buffer, length, ppos); + setup_pagecache_limit(); + return 0; +} + +extern unsigned long shrink_all_pagecache_memory(unsigned long nr_pages); + +int check_pagecache_overlimit(void) +{ + unsigned long current_pagecache; + int nr_pages = 0; + + current_pagecache = global_page_state(NR_FILE_PAGES) - + global_page_state(NR_FILE_MAPPED); + /* NR_FILE_PAGES includes shared memory, swap cache and + * buffers. Hence exclude NR_FILE_MAPPED, since we would + * not reclaim mapped pages. Unmapped pagecache pages + * is what we really want to target */ + if ( pagecache_limit && current_pagecache > pagecache_limit) + nr_pages = current_pagecache - pagecache_limit; + + return nr_pages; +} + +static inline int balance_pagecache(void) +{ + unsigned long nr_pages; + unsigned long ret; + nr_pages = check_pagecache_overlimit(); + /* Don't call reclaim for each page */ + if (nr_pages > PAGECACHE_RECLAIM_THRESHOLD) + ret = shrink_all_pagecache_memory(nr_pages); + return 0; +} + +__initcall(setup_pagecache_limit); + +/* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage * is safe. The caller must hold a write_lock on the mapping's tree_lock. @@ -1085,6 +1146,8 @@ out: page_cache_release(cached_page); if (filp) file_accessed(filp); + + balance_pagecache(); } EXPORT_SYMBOL(do_generic_mapping_read); @@ -2212,6 +2275,8 @@ zero_length_segment: status = filemap_write_and_wait(mapping); pagevec_lru_add(&lru_pvec); + balance_pagecache(); + return written ? written : status; } EXPORT_SYMBOL(generic_file_buffered_write); --- linux-2.6.20-rc5.orig/mm/vmscan.c +++ linux-2.6.20-rc5/mm/vmscan.c @@ -45,6 +45,9 @@ #include "internal.h" + +extern int check_pagecache_overlimit(void); + struct scan_control { /* Incremented by the number of inactive pages that were scanned */ unsigned long nr_scanned; @@ -66,6 +69,10 @@ struct scan_control { int swappiness; int all_unreclaimable; + + int reclaim_pagecache_only; /* Set when called from + pagecache controller */ + }; /* @@ -470,7 +477,15 @@ static unsigned long shrink_page_list(st goto keep; VM_BUG_ON(PageActive(page)); - + /* Take it easy if we are doing only pagecache pages */ + if (sc->reclaim_pagecache_only) { + /* Check if this is a pagecache page they are not mapped */ + if (page_mapped(page)) + goto keep_locked; + /* Check if pagecache limit is exceeded */ + if (!check_pagecache_overlimit()) + goto keep_locked; + } sc->nr_scanned++; if (!sc->may_swap && page_mapped(page)) @@ -518,7 +533,8 @@ static unsigned long shrink_page_list(st } if (PageDirty(page)) { - if (referenced) + /* Reclaim even referenced pagecache pages if over limit */ + if (!check_pagecache_overlimit() && referenced) goto keep_locked; if (!may_enter_fs) goto keep_locked; @@ -832,6 +848,14 @@ force_reclaim_mapped: cond_resched(); page = lru_to_page(&l_hold); list_del(&page->lru); + /* While reclaiming pagecache make it easy */ + if (sc->reclaim_pagecache_only) { + if (page_mapped(page) || !check_pagecache_overlimit()) { + list_add(&page->lru, &l_active); + continue; + } + } + if (page_mapped(page)) { if (!reclaim_mapped || (total_swap_pages == 0 && PageAnon(page)) || @@ -1027,6 +1051,7 @@ unsigned long try_to_free_pages(struct z .swap_cluster_max = SWAP_CLUSTER_MAX, .may_swap = 1, .swappiness = vm_swappiness, + .reclaim_pagecache_only = 0, }; count_vm_event(ALLOCSTALL); @@ -1131,6 +1156,7 @@ static unsigned long balance_pgdat(pg_da .may_swap = 1, .swap_cluster_max = SWAP_CLUSTER_MAX, .swappiness = vm_swappiness, + .reclaim_pagecache_only = 0, }; /* * temp_priority is used to remember the scanning priority at which @@ -1361,7 +1387,6 @@ void wakeup_kswapd(struct zone *zone, in wake_up_interruptible(&pgdat->kswapd_wait); } -#ifdef CONFIG_PM /* * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages * from LRU lists system-wide, for given pass and priority, and returns the @@ -1436,6 +1461,7 @@ unsigned long shrink_all_memory(unsigned .swap_cluster_max = nr_pages, .may_writepage = 1, .swappiness = vm_swappiness, + .reclaim_pagecache_only = 0, }; current->reclaim_state = &reclaim_state; @@ -1510,7 +1536,52 @@ out: return ret; } -#endif + +unsigned long shrink_all_pagecache_memory(unsigned long nr_pages) +{ + unsigned long ret = 0; + int pass; + struct reclaim_state reclaim_state; + struct scan_control sc = { + .gfp_mask = GFP_KERNEL, + .may_swap = 0, + .swap_cluster_max = nr_pages, + .may_writepage = 1, + .swappiness = 0, /* Do not swap, only pagecache reclaim */ + .reclaim_pagecache_only = 1, /* Flag it */ + }; + + current->reclaim_state = &reclaim_state; + + /* + * We try to shrink LRUs in 5 passes: + * 0 = Reclaim from inactive_list only + * 1 = Reclaim from active list but don't reclaim mapped + * 2 = 2nd pass of type 1 + * 3 = Reclaim mapped (normal reclaim) + * 4 = 2nd pass of type 3 + */ + for (pass = 0; pass < 5; pass++) { + int prio; + + for (prio = DEF_PRIORITY; prio >= 0; prio--) { + unsigned long nr_to_scan = nr_pages - ret; + sc.nr_scanned = 0; + ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); + if (ret >= nr_pages) + goto out; + + if (sc.nr_scanned && prio < DEF_PRIORITY - 2) + congestion_wait(WRITE, HZ / 10); + } + } + + +out: + current->reclaim_state = NULL; + + return ret; +} /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Provide an interface to limit total page cache. 2007-01-15 9:39 [PATCH] Provide an interface to limit total page cache Roy Huang 2007-01-15 11:01 ` Balbir Singh 2007-01-15 11:57 ` Vaidyanathan Srinivasan @ 2007-01-18 7:56 ` Eric W. Biederman 2007-01-18 14:00 ` Pavel Machek 3 siblings, 0 replies; 9+ messages in thread From: Eric W. Biederman @ 2007-01-18 7:56 UTC (permalink / raw) To: Roy Huang; +Cc: linux-kernel, aubreylee, nickpiggin, torvalds "Roy Huang" <royhuang9@gmail.com> writes: > A patch provide a interface to limit total page cache in > /proc/sys/vm/pagecache_ratio. The default value is 90 percent. Any > feedback is appreciated. Anything except a default value of 100% will change the behavior and probably reduce the performance on most systems. > -Roy > > diff -urp a/include/linux/sysctl.h b/include/linux/sysctl.h > --- a/include/linux/sysctl.h 2007-01-15 17:18:46.000000000 +0800 > +++ b/include/linux/sysctl.h 2007-01-15 17:03:09.000000000 +0800 > @@ -202,6 +202,7 @@ enum > VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ > VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ > VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ > + VM_PAGECACHE_RATIO=36, /* Percent memory is used as page cache */ > }; > > > diff -urp a/kernel/sysctl.c b/kernel/sysctl.c > --- a/kernel/sysctl.c 2007-01-15 17:18:46.000000000 +0800 > +++ b/kernel/sysctl.c 2007-01-15 17:03:09.000000000 +0800 > @@ -1035,6 +1035,15 @@ static ctl_table vm_table[] = { > .extra1 = &zero, > }, > #endif > + { > + .ctl_name = VM_PAGECACHE_RATIO, > + .procname = "pagecache_ratio", > + .data = &pagecache_ratio, > + .maxlen = sizeof(pagecache_ratio), > + .mode = 0644, > + .proc_handler = &pagecache_ratio_sysctl_handler, > + .strategy = &sysctl_intvec, > + }, > { .ctl_name = 0 } > }; This is broken. You have allocated a binary number for use with sys_sysctl but did not test it. If you need a special proc_handler to take action when the value is changed you need a special strategy routine. So since you aren't going to test the binary interface and don't care about it please don't allocate a number for it and just use CTL_UNNUMBERED. And of course please read the top of linux/sysctl.h Thank you. Eric ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Provide an interface to limit total page cache. 2007-01-15 9:39 [PATCH] Provide an interface to limit total page cache Roy Huang ` (2 preceding siblings ...) 2007-01-18 7:56 ` Eric W. Biederman @ 2007-01-18 14:00 ` Pavel Machek 3 siblings, 0 replies; 9+ messages in thread From: Pavel Machek @ 2007-01-18 14:00 UTC (permalink / raw) To: Roy Huang; +Cc: linux-kernel, aubreylee, nickpiggin, torvalds Hi! > A patch provide a interface to limit total page cache in > /proc/sys/vm/pagecache_ratio. The default value is 90 > percent. Any > feedback is appreciated. Are you sure percentage is right thing to use? 1% of 200GB machine is 2GB... granularity seems too big here. KB? parts per million? Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2007-01-18 14:00 UTC | newest] Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2007-01-15 9:39 [PATCH] Provide an interface to limit total page cache Roy Huang 2007-01-15 11:01 ` Balbir Singh 2007-01-16 2:34 ` Roy Huang 2007-01-16 9:57 ` Balbir Singh 2007-01-15 11:57 ` Vaidyanathan Srinivasan 2007-01-16 2:40 ` Roy Huang 2007-01-17 14:55 ` Vaidyanathan Srinivasan 2007-01-18 7:56 ` Eric W. Biederman 2007-01-18 14:00 ` Pavel Machek
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).