LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: "Roy Huang" <royhuang9@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: aubreylee@gmail.com, nickpiggin@yahoo.com.au, torvalds@osdl.org
Subject: [PATCH] Provide an interface to limit total page cache.
Date: Mon, 15 Jan 2007 17:39:46 +0800 [thread overview]
Message-ID: <afe668f90701150139q26e41720lf06d6ee445a917b0@mail.gmail.com> (raw)
A patch provide a interface to limit total page cache in
/proc/sys/vm/pagecache_ratio. The default value is 90 percent. Any
feedback is appreciated.
-Roy
diff -urp a/include/linux/pagemap.h b/include/linux/pagemap.h
--- a/include/linux/pagemap.h 2006-11-30 05:57:37.000000000 +0800
+++ b/include/linux/pagemap.h 2007-01-15 17:03:09.000000000 +0800
@@ -12,6 +12,12 @@
#include <asm/uaccess.h>
#include <linux/gfp.h>
+extern int pagecache_ratio;
+extern long pagecache_limit;
+
+int pagecache_ratio_sysctl_handler(struct ctl_table *, int,
+ struct file *, void __user *, size_t *, loff_t *);
+
/*
* Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page
* allocation mode flags.
diff -urp a/include/linux/sysctl.h b/include/linux/sysctl.h
--- a/include/linux/sysctl.h 2007-01-15 17:18:46.000000000 +0800
+++ b/include/linux/sysctl.h 2007-01-15 17:03:09.000000000 +0800
@@ -202,6 +202,7 @@ enum
VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */
+ VM_PAGECACHE_RATIO=36, /* Percent memory is used as page cache */
};
diff -urp a/kernel/sysctl.c b/kernel/sysctl.c
--- a/kernel/sysctl.c 2007-01-15 17:18:46.000000000 +0800
+++ b/kernel/sysctl.c 2007-01-15 17:03:09.000000000 +0800
@@ -1035,6 +1035,15 @@ static ctl_table vm_table[] = {
.extra1 = &zero,
},
#endif
+ {
+ .ctl_name = VM_PAGECACHE_RATIO,
+ .procname = "pagecache_ratio",
+ .data = &pagecache_ratio,
+ .maxlen = sizeof(pagecache_ratio),
+ .mode = 0644,
+ .proc_handler = &pagecache_ratio_sysctl_handler,
+ .strategy = &sysctl_intvec,
+ },
{ .ctl_name = 0 }
};
diff -urp a/mm/filemap.c b/mm/filemap.c
--- a/mm/filemap.c 2007-01-15 17:18:46.000000000 +0800
+++ b/mm/filemap.c 2007-01-15 17:03:09.000000000 +0800
@@ -30,6 +30,7 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/cpuset.h>
+#include <linux/sysctl.h>
#include "filemap.h"
#include "internal.h"
@@ -108,6 +109,48 @@ generic_file_direct_IO(int rw, struct ki
*/
/*
+ * Start release pagecache (via kswapd) at the percentage.
+ */
+int pagecache_ratio __read_mostly = 90;
+
+long pagecache_limit = 0;
+
+int setup_pagecache_limit(void)
+{
+ pagecache_limit = pagecache_ratio * nr_free_pagecache_pages() / 100;
+ return 0;
+}
+
+int pagecache_ratio_sysctl_handler(ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+ proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+ setup_pagecache_limit();
+ return 0;
+}
+
+static inline int balance_pagecache(void)
+{
+ if (global_page_state(NR_FILE_PAGES) > pagecache_limit) {
+ int nid, j;
+ pg_data_t *pgdat;
+ struct zone *zone;
+
+ for_each_online_node(nid) {
+ pgdat = NODE_DATA(nid);
+ for (j = 0; j < MAX_NR_ZONES; j++) {
+ zone = pgdat->node_zones + j;
+ wakeup_kswapd(zone, 0);
+ }
+ }
+ }
+
+ return 0;
+}
+
+module_init(setup_pagecache_limit)
+
+/*
* Remove a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
* is safe. The caller must hold a write_lock on the mapping's tree_lock.
@@ -1085,6 +1128,8 @@ out:
page_cache_release(cached_page);
if (filp)
file_accessed(filp);
+
+ balance_pagecache();
}
EXPORT_SYMBOL(do_generic_mapping_read);
@@ -2212,6 +2257,8 @@ zero_length_segment:
status = filemap_write_and_wait(mapping);
pagevec_lru_add(&lru_pvec);
+ balance_pagecache();
+
return written ? written : status;
}
EXPORT_SYMBOL(generic_file_buffered_write);
diff -urp a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c 2007-01-15 17:18:46.000000000 +0800
+++ b/mm/vmscan.c 2007-01-15 17:03:09.000000000 +0800
@@ -1316,6 +1316,7 @@ static int kswapd(void *p)
order = 0;
for ( ; ; ) {
unsigned long new_order;
+ long over_limit;
try_to_freeze();
@@ -1335,6 +1336,9 @@ static int kswapd(void *p)
finish_wait(&pgdat->kswapd_wait, &wait);
balance_pgdat(pgdat, order);
+ over_limit = global_page_state(NR_FILE_PAGES) - pagecache_limit;
+ if (over_limit > 0)
+ shrink_all_memory(over_limit);
}
return 0;
}
@@ -1350,8 +1354,10 @@ void wakeup_kswapd(struct zone *zone, in
return;
pgdat = zone->zone_pgdat;
- if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
- return;
+ if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) {
+ if (global_page_state(NR_FILE_PAGES) < pagecache_limit)
+ return;
+ }
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
@@ -1361,7 +1367,6 @@ void wakeup_kswapd(struct zone *zone, in
wake_up_interruptible(&pgdat->kswapd_wait);
}
-#ifdef CONFIG_PM
/*
* Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages
* from LRU lists system-wide, for given pass and priority, and returns the
@@ -1510,7 +1515,6 @@ out:
return ret;
}
-#endif
/* It's optimal to keep kswapds on the same CPUs as their memory, but
not required for correctness. So if the last cpu in a node goes
next reply other threads:[~2007-01-15 9:39 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-15 9:39 Roy Huang [this message]
2007-01-15 11:01 ` Balbir Singh
2007-01-16 2:34 ` Roy Huang
2007-01-16 9:57 ` Balbir Singh
2007-01-15 11:57 ` Vaidyanathan Srinivasan
2007-01-16 2:40 ` Roy Huang
2007-01-17 14:55 ` Vaidyanathan Srinivasan
2007-01-18 7:56 ` Eric W. Biederman
2007-01-18 14:00 ` Pavel Machek
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=afe668f90701150139q26e41720lf06d6ee445a917b0@mail.gmail.com \
--to=royhuang9@gmail.com \
--cc=aubreylee@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=nickpiggin@yahoo.com.au \
--cc=torvalds@osdl.org \
--subject='Re: [PATCH] Provide an interface to limit total page cache.' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).