LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [RFC][PATCH 0/5] mem notifications v4
@ 2008-01-15  0:52 KOSAKI Motohiro
  2008-01-15  0:59 ` [RFC][PATCH 1/5] introduce poll_wait_exclusive() new API KOSAKI Motohiro
                   ` (4 more replies)
  0 siblings, 5 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  0:52 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: kosaki.motohiro, Marcelo Tosatti, Daniel Spang, Rik van Riel,
	Andrew Morton

Hi!

The /dev/mem_notify is low memory notification device.
it can avoid swappness and oom by cooperationg with the user process.

You need not be annoyed by OOM any longer :)
please any comments!


related discussion:
--------------------------------------------------------------
  LKML OOM notifications requirement discussion
     http://www.gossamer-threads.com/lists/linux/kernel/832802?nohighlight=1#832802
  OOM notifications patch [Marcelo Tosatti]
     http://marc.info/?l=linux-kernel&m=119273914027743&w=2
  mem notifications v3 [Marcelo Tosatti]
     http://marc.info/?l=linux-mm&m=119852828327044&w=2
  Thrashing notification patch  [Daniel Spang]
     http://marc.info/?l=linux-mm&m=119427416315676&w=2


Changelog
-------------------------------------------------
  v3 -> v4 (by KOSAKI Motohiro)
    o rebase to 2.6.24-rc6-mm1
    o avoid wake up all.
    o add judgement point to __free_one_page().
    o add zone awareness.

  v2 -> v3 (by Marcelo Tosatti)
    o changes the notification point to happen whenever
      the VM moves an anonymous page to the inactive list.
    o implement notification rate limit.

  v1(oom notify) -> v2 (by Marcelo Tosatti)
    o name change
    o notify timing change from just swap thrashing to
      just before thrashing.
    o also works with swapless device.





^ permalink raw reply	[flat|nested] 40+ messages in thread

* [RFC][PATCH 1/5] introduce poll_wait_exclusive() new API 
  2008-01-15  0:52 [RFC][PATCH 0/5] mem notifications v4 KOSAKI Motohiro
@ 2008-01-15  0:59 ` KOSAKI Motohiro
  2008-01-15  1:00 ` [RFC][PATCH 2/5] introduce wake_up_locked_nr() " KOSAKI Motohiro
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  0:59 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: kosaki.motohiro, Marcelo Tosatti, Daniel Spang, Rik van Riel,
	Andrew Morton

There are 2 way of adding item to wait_queue,
  1. add_wait_queue()
  2. add_wait_queue_exclusive()
and add_wait_queue_exclusive() is very useful API.

unforunately, poll_wait_exclusive() against poll_wait() doesn't exist. 
it means there is no way that wake up only 1 process where polled.
wake_up() is wake up all sleeping process by poll_wait(), not 1 process.

this patch introduce poll_wait_exclusive() new API for allow wake up only 1 process.

<example of usage>
unsigned int kosaki_poll(struct file *file,
		         struct poll_table_struct *wait)
{
	poll_wait_exclusive(file, &kosaki_wait_queue, wait);
	if (data_exist)
		return POLLIN | POLLRDNORM;
	return 0;
}


Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

---
 fs/eventpoll.c       |    7 +++++--
 fs/select.c          |    9 ++++++---
 include/linux/poll.h |   11 +++++++++--
 3 files changed, 20 insertions(+), 7 deletions(-)



Index: linux-2.6.24-rc6-memnotify/fs/eventpoll.c
===================================================================
--- linux-2.6.24-rc6-memnotify.orig/fs/eventpoll.c	2007-12-30 02:08:58.000000000 +0900
+++ linux-2.6.24-rc6-memnotify/fs/eventpoll.c	2007-12-30 07:10:46.000000000 +0900
@@ -676,7 +676,7 @@ out_unlock:
  * target file wakeup lists.
  */
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
-				 poll_table *pt)
+				 poll_table *pt, int exclusive)
 {
 	struct epitem *epi = ep_item_from_epqueue(pt);
 	struct eppoll_entry *pwq;
@@ -685,7 +685,10 @@ static void ep_ptable_queue_proc(struct 
 		init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
 		pwq->whead = whead;
 		pwq->base = epi;
-		add_wait_queue(whead, &pwq->wait);
+		if (exclusive)
+			add_wait_queue_exclusive(whead, &pwq->wait);
+		else
+			add_wait_queue(whead, &pwq->wait);
 		list_add_tail(&pwq->llink, &epi->pwqlist);
 		epi->nwait++;
 	} else {
Index: linux-2.6.24-rc6-memnotify/fs/select.c
===================================================================
--- linux-2.6.24-rc6-memnotify.orig/fs/select.c	2007-12-30 02:09:00.000000000 +0900
+++ linux-2.6.24-rc6-memnotify/fs/select.c	2007-12-30 02:34:05.000000000 +0900
@@ -48,7 +48,7 @@ struct poll_table_page {
  * poll table.
  */
 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
-		       poll_table *p);
+		       poll_table *p, int exclusive);
 
 void poll_initwait(struct poll_wqueues *pwq)
 {
@@ -117,7 +117,7 @@ static struct poll_table_entry *poll_get
 
 /* Add a new entry */
 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
-				poll_table *p)
+		       poll_table *p, int exclusive)
 {
 	struct poll_table_entry *entry = poll_get_entry(p);
 	if (!entry)
@@ -126,7 +126,10 @@ static void __pollwait(struct file *filp
 	entry->filp = filp;
 	entry->wait_address = wait_address;
 	init_waitqueue_entry(&entry->wait, current);
-	add_wait_queue(wait_address, &entry->wait);
+	if (exclusive)
+		add_wait_queue_exclusive(wait_address, &entry->wait);
+	else
+		add_wait_queue(wait_address, &entry->wait);
 }
 
 #define FDS_IN(fds, n)		(fds->in + n)
Index: linux-2.6.24-rc6-memnotify/include/linux/poll.h
===================================================================
--- linux-2.6.24-rc6-memnotify.orig/include/linux/poll.h	2007-12-30 02:09:16.000000000 +0900
+++ linux-2.6.24-rc6-memnotify/include/linux/poll.h	2007-12-30 02:41:35.000000000 +0900
@@ -28,7 +28,8 @@ struct poll_table_struct;
 /* 
  * structures and helpers for f_op->poll implementations
  */
-typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *);
+typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *,
+				struct poll_table_struct *, int);
 
 typedef struct poll_table_struct {
 	poll_queue_proc qproc;
@@ -37,7 +38,13 @@ typedef struct poll_table_struct {
 static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
 {
 	if (p && wait_address)
-		p->qproc(filp, wait_address, p);
+		p->qproc(filp, wait_address, p, 0);
+}
+
+static inline void poll_wait_exclusive(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
+{
+	if (p && wait_address)
+		p->qproc(filp, wait_address, p, 1);
 }
 
 static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)




^ permalink raw reply	[flat|nested] 40+ messages in thread

* [RFC][PATCH 2/5] introduce wake_up_locked_nr() new API
  2008-01-15  0:52 [RFC][PATCH 0/5] mem notifications v4 KOSAKI Motohiro
  2008-01-15  0:59 ` [RFC][PATCH 1/5] introduce poll_wait_exclusive() new API KOSAKI Motohiro
@ 2008-01-15  1:00 ` KOSAKI Motohiro
  2008-01-15  1:01 ` [RFC][PATCH 3/5] add /dev/mem_notify device KOSAKI Motohiro
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  1:00 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: kosaki.motohiro, Marcelo Tosatti, Daniel Spang, Rik van Riel,
	Andrew Morton


introduce new API wake_up_locked_nr() and wake_up_locked_all().
it it similar as wake_up_nr() and wake_up_all(), but it doesn't lock.

Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

---
 include/linux/wait.h |    7 +++++--
 kernel/sched.c       |    5 +++--
 2 files changed, 8 insertions(+), 4 deletions(-)

Index: linux-2.6.24-rc6-mm1-memnotify/include/linux/wait.h
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/include/linux/wait.h	2008-01-13 16:43:04.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/include/linux/wait.h	2008-01-13 16:52:21.000000000 +0900
@@ -142,7 +142,7 @@ static inline void __remove_wait_queue(w
 }
 
 void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key));
-extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
+void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr, void *key));
 extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
 void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int));
 int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
@@ -155,7 +155,10 @@ wait_queue_head_t *FASTCALL(bit_waitqueu
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
 #define wake_up_all(x)			__wake_up(x, TASK_NORMAL, 0, NULL)
-#define wake_up_locked(x)		__wake_up_locked((x), TASK_NORMAL)
+
+#define wake_up_locked(x)		__wake_up_locked((x), TASK_NORMAL, 1, NULL)
+#define wake_up_locked_nr(x, nr)	__wake_up_locked((x), TASK_NORMAL, nr, NULL)
+#define wake_up_locked_all(x)		__wake_up_locked((x), TASK_NORMAL, 0, NULL)
 
 #define wake_up_interruptible(x)	__wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
 #define wake_up_interruptible_nr(x, nr)	__wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
Index: linux-2.6.24-rc6-mm1-memnotify/kernel/sched.c
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/kernel/sched.c	2008-01-13 16:42:22.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/kernel/sched.c	2008-01-13 16:53:28.000000000 +0900
@@ -3837,9 +3837,10 @@ EXPORT_SYMBOL(__wake_up);
 /*
  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
  */
-void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
+void __wake_up_locked(wait_queue_head_t *q, unsigned int mode,
+		      int nr_exclusive, void *key)
 {
-	__wake_up_common(q, mode, 1, 0, NULL);
+	__wake_up_common(q, mode, nr_exclusive, 0, key);
 }
 
 /**



^ permalink raw reply	[flat|nested] 40+ messages in thread

* [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  0:52 [RFC][PATCH 0/5] mem notifications v4 KOSAKI Motohiro
  2008-01-15  0:59 ` [RFC][PATCH 1/5] introduce poll_wait_exclusive() new API KOSAKI Motohiro
  2008-01-15  1:00 ` [RFC][PATCH 2/5] introduce wake_up_locked_nr() " KOSAKI Motohiro
@ 2008-01-15  1:01 ` KOSAKI Motohiro
  2008-01-15  1:08   ` Randy Dunlap
                     ` (3 more replies)
  2008-01-15  1:02 ` [RFC][PATCH 4/5] memory_pressure_notify() caller KOSAKI Motohiro
  2008-01-15  1:03 ` [RFC][PATCH 5/5] /proc/zoneinfo enhancement KOSAKI Motohiro
  4 siblings, 4 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  1:01 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: kosaki.motohiro, Marcelo Tosatti, Daniel Spang, Rik van Riel,
	Andrew Morton

the core of this patch series.
add /dev/mem_notify device for notification low memory to user process.

<usage examle>

        fd = open("/dev/mem_notify", O_RDONLY);
        if (fd < 0) {
                exit(1);
        }
        pollfds.fd = fd;
        pollfds.events = POLLIN;
        pollfds.revents = 0;
	err = poll(&pollfds, 1, -1); // wake up at low memory

        ...
</usage example>

Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

---
 drivers/char/mem.c         |    6 ++
 include/linux/mem_notify.h |   41 ++++++++++++++++
 include/linux/mmzone.h     |    1 
 mm/Makefile                |    2 
 mm/mem_notify.c            |  109 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c            |    1 
 6 files changed, 159 insertions(+), 1 deletion(-)

Index: linux-2.6.24-rc6-mm1-memnotify/drivers/char/mem.c
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/drivers/char/mem.c	2008-01-13 16:56:54.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/drivers/char/mem.c	2008-01-13 16:57:10.000000000 +0900
@@ -34,6 +34,8 @@
 # include <linux/efi.h>
 #endif
 
+extern struct file_operations mem_notify_fops;
+
 /*
  * Architectures vary in how they handle caching for addresses
  * outside of main memory.
@@ -854,6 +856,9 @@ static int memory_open(struct inode * in
 			filp->f_op = &oldmem_fops;
 			break;
 #endif
+		case 13:
+			filp->f_op = &mem_notify_fops;
+			break;
 		default:
 			return -ENXIO;
 	}
@@ -886,6 +891,7 @@ static const struct {
 #ifdef CONFIG_CRASH_DUMP
 	{12,"oldmem",    S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
 #endif
+	{13,"mem_notify", S_IRUGO, &mem_notify_fops},
 };
 
 static struct class *mem_class;
Index: linux-2.6.24-rc6-mm1-memnotify/include/linux/mem_notify.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.24-rc6-mm1-memnotify/include/linux/mem_notify.h	2008-01-13 16:57:10.000000000 +0900
@@ -0,0 +1,42 @@
+/*
+ * Notify applications of memory pressure via /dev/mem_notify
+ *
+ * Copyright (C) 2008 Marcelo Tosatti <marcelo@kvack.org>,
+ *                    KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * Released under the GPL, see the file COPYING for details.
+ */
+
+#ifndef _LINUX_MEM_NOTIFY_H
+#define _LINUX_MEM_NOTIFY_H
+
+#define MEM_NOTIFY_FREQ (HZ/5)
+
+extern atomic_long_t last_mem_notify;
+
+extern void __memory_pressure_notify(struct zone *zone, int pressure);
+
+
+static inline void memory_pressure_notify(struct zone *zone, int pressure)
+{
+	unsigned long target;
+	unsigned long pages_high, pages_free, pages_reserve;
+
+	if (pressure) {
+		target = atomic_long_read(&last_mem_notify) + MEM_NOTIFY_FREQ;
+		if (likely(time_before(jiffies, target)))
+			return;
+
+		pages_high = zone->pages_high;
+		pages_free = zone_page_state(zone, NR_FREE_PAGES);
+		pages_reserve = zone->lowmem_reserve[MAX_NR_ZONES-1];
+		if (unlikely(pages_free > (pages_high+pages_reserve)*2))
+			return;
+
+	} else if (likely(!zone->mem_notify_status))
+		return;
+
+	__memory_pressure_notify(zone, pressure);
+}
+
+#endif /* _LINUX_MEM_NOTIFY_H */
Index: linux-2.6.24-rc6-mm1-memnotify/include/linux/mmzone.h
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/include/linux/mmzone.h	2008-01-13 16:56:54.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/include/linux/mmzone.h	2008-01-13 16:57:10.000000000 +0900
@@ -283,6 +283,7 @@ struct zone {
 	 */
 	int prev_priority;
 
+	int mem_notify_status;
 
 	ZONE_PADDING(_pad2_)
 	/* Rarely used or read-mostly fields */
Index: linux-2.6.24-rc6-mm1-memnotify/mm/Makefile
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/mm/Makefile	2008-01-13 16:56:54.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/mm/Makefile	2008-01-13 16:57:10.000000000 +0900
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o $(mmu-y)
+			   page_isolation.o mem_notify.o $(mmu-y)
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
Index: linux-2.6.24-rc6-mm1-memnotify/mm/mem_notify.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.24-rc6-mm1-memnotify/mm/mem_notify.c	2008-01-13 17:25:39.000000000 +0900
@@ -0,0 +1,109 @@
+/*
+ * Notify applications of memory pressure via /dev/mem_notify
+ *
+ * Copyright (C) 2008 Marcelo Tosatti <marcelo@kvack.org>,
+ *                    KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * Released under the GPL, see the file COPYING for details.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/percpu.h>
+#include <linux/timer.h>
+
+#include <asm/atomic.h>
+
+#define PROC_WAKEUP_GUARD  (10*HZ)
+
+struct mem_notify_file_info {
+	unsigned long last_proc_notify;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(mem_wait);
+static atomic_long_t nr_under_memory_pressure_zones = ATOMIC_LONG_INIT(0);
+static atomic_t nr_watcher_task = ATOMIC_INIT(0);
+
+atomic_long_t last_mem_notify = ATOMIC_LONG_INIT(INITIAL_JIFFIES);
+
+void __memory_pressure_notify(struct zone* zone, int pressure)
+{
+	int nr_wakeup;
+	int flags;
+
+	spin_lock_irqsave(&mem_wait.lock, flags);
+
+	if (pressure != zone->mem_notify_status) {
+		long val = pressure ? 1 : -1;
+		atomic_long_add(val, &nr_under_memory_pressure_zones);
+		zone->mem_notify_status = pressure;
+	}
+
+	if (pressure) {
+		nr_wakeup = max_t(int, atomic_read(&nr_watcher_task)>>4, 100);
+		atomic_long_set(&last_mem_notify, jiffies);
+		wake_up_locked_nr(&mem_wait, nr_wakeup);
+	}
+
+	spin_unlock_irqrestore(&mem_wait.lock, flags);
+}
+
+static int mem_notify_open(struct inode *inode, struct file *file)
+{
+	struct mem_notify_file_info *info;
+	int    err = 0;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+        if (!info) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	info->last_proc_notify = INITIAL_JIFFIES;
+	file->private_data = info;
+	atomic_inc(&nr_watcher_task);
+out:
+        return err;
+}
+
+static int mem_notify_release(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	atomic_dec(&nr_watcher_task);
+	return 0;
+}
+
+static unsigned int mem_notify_poll(struct file *file, poll_table *wait)
+{
+	struct mem_notify_file_info *info = file->private_data;
+	unsigned long now = jiffies;
+	unsigned long timeout;
+	unsigned int retval = 0;
+
+	poll_wait_exclusive(file, &mem_wait, wait);
+
+	timeout = info->last_proc_notify + PROC_WAKEUP_GUARD;
+	if (time_before(now, timeout))
+		goto out;
+
+	if (atomic_long_read(&nr_under_memory_pressure_zones) != 0) {
+		info->last_proc_notify = now;
+		retval = POLLIN;
+	}
+
+out:
+	return retval;
+}
+
+struct file_operations mem_notify_fops = {
+	.open = mem_notify_open,
+	.release = mem_notify_release,
+	.poll = mem_notify_poll,
+};
+EXPORT_SYMBOL(mem_notify_fops);
Index: linux-2.6.24-rc6-mm1-memnotify/mm/page_alloc.c
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/mm/page_alloc.c	2008-01-13 16:56:54.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/mm/page_alloc.c	2008-01-13 17:25:15.000000000 +0900
@@ -3456,6 +3456,7 @@ static void __meminit free_area_init_cor
 		zone->zone_pgdat = pgdat;
 
 		zone->prev_priority = DEF_PRIORITY;
+		zone->mem_notify_status = 0;
 
 		zone_pcp_init(zone);
 		INIT_LIST_HEAD(&zone->active_list);
Index: linux-2.6.24-rc6-mm1-memnotify/Documentation/devices.txt
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/Documentation/devices.txt	2008-01-13 16:42:57.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/Documentation/devices.txt	2008-01-13 17:07:05.000000000 +0900
@@ -96,6 +96,7 @@ Your cooperation is appreciated.
 		 11 = /dev/kmsg		Writes to this come out as printk's
 		 12 = /dev/oldmem	Used by crashdump kernels to access
 					the memory of the kernel that crashed.
+		 13 = /dev/mem_notify   Low memory notification.
 
   1 block	RAM disk
 		  0 = /dev/ram0		First RAM disk



^ permalink raw reply	[flat|nested] 40+ messages in thread

* [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15  0:52 [RFC][PATCH 0/5] mem notifications v4 KOSAKI Motohiro
                   ` (2 preceding siblings ...)
  2008-01-15  1:01 ` [RFC][PATCH 3/5] add /dev/mem_notify device KOSAKI Motohiro
@ 2008-01-15  1:02 ` KOSAKI Motohiro
  2008-01-15  2:06   ` KAMEZAWA Hiroyuki
  2008-01-15 22:55   ` Daniel Spång
  2008-01-15  1:03 ` [RFC][PATCH 5/5] /proc/zoneinfo enhancement KOSAKI Motohiro
  4 siblings, 2 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  1:02 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: kosaki.motohiro, Marcelo Tosatti, Daniel Spang, Rik van Riel,
	Andrew Morton

the notification point to happen whenever the VM moves an
anonymous page to the inactive list - this is a pretty good indication
that there are unused anonymous pages present which will be very likely
swapped out soon.

and, It is judged out of trouble at the fllowing situations. 
 o memory pressure decrease and stop moves an anonymous page to the inactive list.
 o free pages increase than (pages_high+lowmem_reserve)*2.


Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

---
 mm/vmscan.c |   15 +++++++++++++++
 1 file changed, 15 insertions(+)

Index: linux-2.6.24-rc6-mm1-memnotify/mm/vmscan.c
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/mm/vmscan.c	2008-01-13 16:59:28.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/mm/vmscan.c	2008-01-13 17:03:58.000000000 +0900
@@ -963,6 +963,7 @@ static int calc_reclaim_mapped(struct sc
 	long distress;
 	long swap_tendency;
 	long imbalance;
+	int reclaim_mapped = 0;
 	int prev_priority;
 
 	if (scan_global_lru(sc) && zone_is_near_oom(zone))
@@ -1089,10 +1090,14 @@ static void shrink_active_list(unsigned 
 	struct page *page;
 	struct pagevec pvec;
 	int reclaim_mapped = 0;
+	bool inactivated_anon = 0;
 
 	if (sc->may_swap)
 		reclaim_mapped = calc_reclaim_mapped(sc, zone, priority);
 
+	if (!reclaim_mapped)
+		memory_pressure_notify(zone, 0);
+
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
 	pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,
@@ -1116,6 +1121,13 @@ static void shrink_active_list(unsigned 
 			if (!reclaim_mapped ||
 			    (total_swap_pages == 0 && PageAnon(page)) ||
 			    page_referenced(page, 0, sc->mem_cgroup)) {
+				/* deal with the case where there is no
+				 * swap but an anonymous page would be
+				 * moved to the inactive list.
+				 */
+				if (!total_swap_pages && reclaim_mapped &&
+				    PageAnon(page))
+					inactivated_anon = 1;
 				list_add(&page->lru, &l_active);
 				continue;
 			}
@@ -1123,8 +1135,12 @@ static void shrink_active_list(unsigned 
 			list_add(&page->lru, &l_active);
 			continue;
 		}
+		if (PageAnon(page))
+			inactivated_anon = 1;
 		list_add(&page->lru, &l_inactive);
 	}
+	if (inactivated_anon)
+		memory_pressure_notify(zone, 1);
 
 	pagevec_init(&pvec, 1);
 	pgmoved = 0;
@@ -1158,6 +1174,8 @@ static void shrink_active_list(unsigned 
 		pagevec_strip(&pvec);
 		spin_lock_irq(&zone->lru_lock);
 	}
+	if (!reclaim_mapped)
+		memory_pressure_notify(zone, 0);
 
 	pgmoved = 0;
 	while (!list_empty(&l_active)) {
Index: linux-2.6.24-rc6-mm1-memnotify/mm/page_alloc.c
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/mm/page_alloc.c	2008-01-13 16:57:10.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/mm/page_alloc.c	2008-01-13 17:04:34.000000000 +0900
@@ -44,6 +44,7 @@
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
 #include <linux/memcontrol.h>
+#include <linux/mem_notify.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -435,6 +436,8 @@ static inline void __free_one_page(struc
 	unsigned long page_idx;
 	int order_size = 1 << order;
 	int migratetype = get_pageblock_migratetype(page);
+	unsigned long prev_free;
+	unsigned long notify_threshold;
 
 	if (unlikely(PageCompound(page)))
 		destroy_compound_page(page, order);
@@ -444,6 +447,7 @@ static inline void __free_one_page(struc
 	VM_BUG_ON(page_idx & (order_size - 1));
 	VM_BUG_ON(bad_range(zone, page));
 
+	prev_free = zone_page_state(zone, NR_FREE_PAGES);
 	__mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
 	while (order < MAX_ORDER-1) {
 		unsigned long combined_idx;
@@ -465,6 +469,13 @@ static inline void __free_one_page(struc
 	list_add(&page->lru,
 		&zone->free_area[order].free_list[migratetype]);
 	zone->free_area[order].nr_free++;
+
+	notify_threshold = (zone->pages_high +
+			    zone->lowmem_reserve[MAX_NR_ZONES-1]) * 2;
+
+	if (unlikely((prev_free <= notify_threshold) &&
+		     (zone_page_state(zone, NR_FREE_PAGES) > notify_threshold)))
+		memory_pressure_notify(zone, 0);
 }
 
 static inline int free_pages_check(struct page *page)



^ permalink raw reply	[flat|nested] 40+ messages in thread

* [RFC][PATCH 5/5] /proc/zoneinfo enhancement
  2008-01-15  0:52 [RFC][PATCH 0/5] mem notifications v4 KOSAKI Motohiro
                   ` (3 preceding siblings ...)
  2008-01-15  1:02 ` [RFC][PATCH 4/5] memory_pressure_notify() caller KOSAKI Motohiro
@ 2008-01-15  1:03 ` KOSAKI Motohiro
  2008-01-15 10:44   ` Alan Cox
  4 siblings, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  1:03 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: kosaki.motohiro, Marcelo Tosatti, Daniel Spang, Rik van Riel,
	Andrew Morton

show new member of zone struct by /proc/zoneinfo.

Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

---
 mm/vmstat.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

Index: linux-2.6.24-rc6-mm1-memnotify/mm/vmstat.c
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/mm/vmstat.c	2008-01-13 16:42:54.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/mm/vmstat.c	2008-01-13 17:07:43.000000000 +0900
@@ -795,9 +795,11 @@ static void zoneinfo_show_print(struct s
 	seq_printf(m,
 		   "\n  all_unreclaimable: %u"
 		   "\n  prev_priority:     %i"
+		   "\n  mem_notify_status: %i"
 		   "\n  start_pfn:         %lu",
-			   zone_is_all_unreclaimable(zone),
+		   zone_is_all_unreclaimable(zone),
 		   zone->prev_priority,
+		   zone->mem_notify_status,
 		   zone->zone_start_pfn);
 	seq_putc(m, '\n');
 }



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  1:01 ` [RFC][PATCH 3/5] add /dev/mem_notify device KOSAKI Motohiro
@ 2008-01-15  1:08   ` Randy Dunlap
  2008-01-15  1:20     ` KOSAKI Motohiro
  2008-01-15  2:10   ` KAMEZAWA Hiroyuki
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 40+ messages in thread
From: Randy Dunlap @ 2008-01-15  1:08 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-mm, linux-kernel, Marcelo Tosatti, Daniel Spang,
	Rik van Riel, Andrew Morton

On Tue, 15 Jan 2008 10:01:21 +0900 KOSAKI Motohiro wrote:

> the core of this patch series.
> add /dev/mem_notify device for notification low memory to user process.
> 
> <usage examle>
> 
>         fd = open("/dev/mem_notify", O_RDONLY);
>         if (fd < 0) {
>                 exit(1);
>         }
>         pollfds.fd = fd;
>         pollfds.events = POLLIN;
>         pollfds.revents = 0;
> 	err = poll(&pollfds, 1, -1); // wake up at low memory
> 
>         ...
> </usage example>
> 
> Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> 
> ---
>  drivers/char/mem.c         |    6 ++
>  include/linux/mem_notify.h |   41 ++++++++++++++++
>  include/linux/mmzone.h     |    1 
>  mm/Makefile                |    2 
>  mm/mem_notify.c            |  109 +++++++++++++++++++++++++++++++++++++++++++++
>  mm/page_alloc.c            |    1 
>  6 files changed, 159 insertions(+), 1 deletion(-)
> 

Hi,

1/ I don't see the file below listed in the diffstat above...

2/ Where is the userspace interface information for the syscall?

> Index: linux-2.6.24-rc6-mm1-memnotify/Documentation/devices.txt
> ===================================================================
> --- linux-2.6.24-rc6-mm1-memnotify.orig/Documentation/devices.txt	2008-01-13 16:42:57.000000000 +0900
> +++ linux-2.6.24-rc6-mm1-memnotify/Documentation/devices.txt	2008-01-13 17:07:05.000000000 +0900
> @@ -96,6 +96,7 @@ Your cooperation is appreciated.
>  		 11 = /dev/kmsg		Writes to this come out as printk's
>  		 12 = /dev/oldmem	Used by crashdump kernels to access
>  					the memory of the kernel that crashed.
> +		 13 = /dev/mem_notify   Low memory notification.
>  
>    1 block	RAM disk
>  		  0 = /dev/ram0		First RAM disk


---
~Randy

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  1:08   ` Randy Dunlap
@ 2008-01-15  1:20     ` KOSAKI Motohiro
  2008-01-15  1:24       ` KOSAKI Motohiro
  0 siblings, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  1:20 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

Hi randy

> Hi,
> 
> 1/ I don't see the file below listed in the diffstat above...

Agghh...
sorry, it is mistake.
I repost soon. 

thanks.


> 2/ Where is the userspace interface information for the syscall?

No.
userspace interface is only poll(2).




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  1:20     ` KOSAKI Motohiro
@ 2008-01-15  1:24       ` KOSAKI Motohiro
  0 siblings, 0 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  1:24 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: kosaki.motohiro, Randy Dunlap, linux-mm, linux-kernel,
	Marcelo Tosatti, Daniel Spang, Rik van Riel, Andrew Morton

Hi

> > 1/ I don't see the file below listed in the diffstat above...
> 
> Agghh...
> sorry, it is mistake.
> I repost soon. 
> 
> thanks.

the below diffstat is correct.
thanks!

------------------------------
 Documentation/devices.txt  |    1
 drivers/char/mem.c         |    6 ++
 include/linux/mem_notify.h |   42 +++++++++++++++++
 include/linux/mmzone.h     |    1
 mm/Makefile                |    2
 mm/mem_notify.c            |  109 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c            |    1
 7 files changed, 161 insertions(+), 1 deletion(-)



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15  1:02 ` [RFC][PATCH 4/5] memory_pressure_notify() caller KOSAKI Motohiro
@ 2008-01-15  2:06   ` KAMEZAWA Hiroyuki
  2008-01-15  2:37     ` KOSAKI Motohiro
  2008-01-15 22:55   ` Daniel Spång
  1 sibling, 1 reply; 40+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-01-15  2:06 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-mm, linux-kernel, Marcelo Tosatti, Daniel Spang,
	Rik van Riel, Andrew Morton

On Tue, 15 Jan 2008 10:02:30 +0900
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> +
> +	notify_threshold = (zone->pages_high +
> +			    zone->lowmem_reserve[MAX_NR_ZONES-1]) * 2;
> +
Why MAX_NR_ZONES-1 ?


> +	if (unlikely((prev_free <= notify_threshold) &&
> +		     (zone_page_state(zone, NR_FREE_PAGES) > notify_threshold)))
> +		memory_pressure_notify(zone, 0);
>  }

How about this
==
if (unlikely(zone->mem_notify_status && ...) 


Thanks,
-Kame


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  1:01 ` [RFC][PATCH 3/5] add /dev/mem_notify device KOSAKI Motohiro
  2008-01-15  1:08   ` Randy Dunlap
@ 2008-01-15  2:10   ` KAMEZAWA Hiroyuki
  2008-01-15  2:20     ` KOSAKI Motohiro
  2008-01-15 10:46   ` Alan Cox
  2008-01-15 22:16   ` Pavel Machek
  3 siblings, 1 reply; 40+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-01-15  2:10 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-mm, linux-kernel, Marcelo Tosatti, Daniel Spang,
	Rik van Riel, Andrew Morton

On Tue, 15 Jan 2008 10:01:21 +0900
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> +	if (pressure) {
> +		nr_wakeup = max_t(int, atomic_read(&nr_watcher_task)>>4, 100);
> +		atomic_long_set(&last_mem_notify, jiffies);
> +		wake_up_locked_nr(&mem_wait, nr_wakeup);
> +	}
What is this for ? and Why ?
Are there too many waiters ?

Thanks
-Kame


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  2:10   ` KAMEZAWA Hiroyuki
@ 2008-01-15  2:20     ` KOSAKI Motohiro
  2008-01-15  2:56       ` Rik van Riel
  0 siblings, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  2:20 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

Hi Kame

> > +	if (pressure) {
> > +		nr_wakeup = max_t(int, atomic_read(&nr_watcher_task)>>4, 100);
> > +		atomic_long_set(&last_mem_notify, jiffies);
> > +		wake_up_locked_nr(&mem_wait, nr_wakeup);
> > +	}
> What is this for ? and Why ?
> Are there too many waiters ?

my intent is for avoid thundering herd.
100 is heuristic value.

and too many wakeup cause too much memory freed.
I don't want it.

of course, if any problem happened, I will change.
Do you dislike it?



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15  2:06   ` KAMEZAWA Hiroyuki
@ 2008-01-15  2:37     ` KOSAKI Motohiro
  2008-01-15  3:00       ` KAMEZAWA Hiroyuki
  0 siblings, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  2:37 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

Hi KAME, 

> > +	notify_threshold = (zone->pages_high +
> > +			    zone->lowmem_reserve[MAX_NR_ZONES-1]) * 2;
> > +
> Why MAX_NR_ZONES-1 ?

this is intent to max lowmem_reserve.

in normal case, 
shrink_active_list isn't called when free_pages > pages_high.
but just after memory freed, it happened rarely.

I don't want incorrect notify at system enough free memory.

related discussion
  http://marc.info/?l=linux-mm&m=119878630211348&w=2


> > +	if (unlikely((prev_free <= notify_threshold) &&
> > +		     (zone_page_state(zone, NR_FREE_PAGES) > notify_threshold)))
> > +		memory_pressure_notify(zone, 0);
> >  }
> 
> How about this
> ==
> if (unlikely(zone->mem_notify_status && ...) 

Nice idea.
I will applied it at next post.

thank you!





^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  2:20     ` KOSAKI Motohiro
@ 2008-01-15  2:56       ` Rik van Riel
  0 siblings, 0 replies; 40+ messages in thread
From: Rik van Riel @ 2008-01-15  2:56 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: KAMEZAWA Hiroyuki, kosaki.motohiro, linux-mm, linux-kernel,
	Marcelo Tosatti, Daniel Spang, Andrew Morton

On Tue, 15 Jan 2008 11:20:56 +0900
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> Hi Kame
> 
> > > +	if (pressure) {
> > > +		nr_wakeup = max_t(int, atomic_read(&nr_watcher_task)>>4, 100);
> > > +		atomic_long_set(&last_mem_notify, jiffies);
> > > +		wake_up_locked_nr(&mem_wait, nr_wakeup);
> > > +	}
> > What is this for ? and Why ?
> > Are there too many waiters ?
> 
> my intent is for avoid thundering herd.
> 100 is heuristic value.
> 
> and too many wakeup cause too much memory freed.
> I don't want it.
> 
> of course, if any problem happened, I will change.

I agree with you.  Your code looks like it could be a reasonable
heuristic, but the only way to really find that out is to test
the code on live systems under varying workloads.

Maybe we need to wake up fewer tasks more often, maybe we are
better off waking up more tasks but fewer times.  Either way,
at this time we simply do not know and can stick with your current 
code.

-- 
All rights reversed.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15  2:37     ` KOSAKI Motohiro
@ 2008-01-15  3:00       ` KAMEZAWA Hiroyuki
  2008-01-15  3:08         ` KOSAKI Motohiro
  0 siblings, 1 reply; 40+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-01-15  3:00 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-mm, linux-kernel, Marcelo Tosatti, Daniel Spang,
	Rik van Riel, Andrew Morton

On Tue, 15 Jan 2008 11:37:48 +0900
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> Hi KAME, 
> 
> > > +	notify_threshold = (zone->pages_high +
> > > +			    zone->lowmem_reserve[MAX_NR_ZONES-1]) * 2;
> > > +
> > Why MAX_NR_ZONES-1 ?
> 
> this is intent to max lowmem_reserve.
> 
Ah, my point is.. how about this ?
==
if (page_zoneid(page) != ZONE_DMA)
	notify_threshold = zone->pages_high +
                   	zone->lowmem_reserve[page_zoneid(page) - 1] * 2;
==

Thanks,
-Kame


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15  3:00       ` KAMEZAWA Hiroyuki
@ 2008-01-15  3:08         ` KOSAKI Motohiro
  0 siblings, 0 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15  3:08 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

Hi Kame,

> > > > +	notify_threshold = (zone->pages_high +
> > > > +			    zone->lowmem_reserve[MAX_NR_ZONES-1]) * 2;
> > > > +
> > > Why MAX_NR_ZONES-1 ?
> > 
> > this is intent to max lowmem_reserve.
> > 
> Ah, my point is.. how about this ?
> ==
> if (page_zoneid(page) != ZONE_DMA)
> 	notify_threshold = zone->pages_high +
>                    	zone->lowmem_reserve[page_zoneid(page) - 1] * 2;

your point out is very good point.

but judged by zone size is more better, may be.
on some 64bit system, ZONE_DMA is 4GB.
small memory system can't ignore it. 

fortunately, zone size check can at free_area_init_core().


- kosaki




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 5/5] /proc/zoneinfo enhancement
  2008-01-15  1:03 ` [RFC][PATCH 5/5] /proc/zoneinfo enhancement KOSAKI Motohiro
@ 2008-01-15 10:44   ` Alan Cox
  2008-01-15 10:49     ` KOSAKI Motohiro
  0 siblings, 1 reply; 40+ messages in thread
From: Alan Cox @ 2008-01-15 10:44 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-mm, linux-kernel, kosaki.motohiro, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

On Tue, 15 Jan 2008 10:03:23 +0900
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> show new member of zone struct by /proc/zoneinfo.
> 
> Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

Minor NAK - Please put new fields at the end - it makes it less likely to
break badly written tools.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  1:01 ` [RFC][PATCH 3/5] add /dev/mem_notify device KOSAKI Motohiro
  2008-01-15  1:08   ` Randy Dunlap
  2008-01-15  2:10   ` KAMEZAWA Hiroyuki
@ 2008-01-15 10:46   ` Alan Cox
  2008-01-15 10:59     ` KOSAKI Motohiro
  2008-01-15 22:16   ` Pavel Machek
  3 siblings, 1 reply; 40+ messages in thread
From: Alan Cox @ 2008-01-15 10:46 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-mm, linux-kernel, kosaki.motohiro, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

On Tue, 15 Jan 2008 10:01:21 +0900
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> the core of this patch series.
> add /dev/mem_notify device for notification low memory to user process.

As you only wake one process how would you use this API from processes
which want to monitor and can free memory under load. Also what fairness
guarantees are there...

Alan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 5/5] /proc/zoneinfo enhancement
  2008-01-15 10:44   ` Alan Cox
@ 2008-01-15 10:49     ` KOSAKI Motohiro
  0 siblings, 0 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15 10:49 UTC (permalink / raw)
  To: Alan Cox
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

Hi alan

> > show new member of zone struct by /proc/zoneinfo.
> > 
> > Signed-off-by: Marcelo Tosatti <marcelo@kvack.org>
> > Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> 
> Minor NAK - Please put new fields at the end - it makes it less likely to
> break badly written tools.

Oh I see.
I applied your opinion at next post.

Thanks!


- kosaki


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15 10:46   ` Alan Cox
@ 2008-01-15 10:59     ` KOSAKI Motohiro
  2008-01-15 11:20       ` Alan Cox
  0 siblings, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15 10:59 UTC (permalink / raw)
  To: Alan Cox
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton


> > the core of this patch series.
> > add /dev/mem_notify device for notification low memory to user process.
> 
> As you only wake one process how would you use this API from processes
> which want to monitor and can free memory under load. Also what fairness
> guarantees are there...

Sorry, I don't make sense what you mean fairness.
Could you tell more?





^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15 10:59     ` KOSAKI Motohiro
@ 2008-01-15 11:20       ` Alan Cox
  2008-01-15 11:48         ` KOSAKI Motohiro
  2008-01-15 12:05         ` Marcelo Tosatti
  0 siblings, 2 replies; 40+ messages in thread
From: Alan Cox @ 2008-01-15 11:20 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

On Tue, 15 Jan 2008 19:59:02 +0900
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:

> 
> > > the core of this patch series.
> > > add /dev/mem_notify device for notification low memory to user process.
> > 
> > As you only wake one process how would you use this API from processes
> > which want to monitor and can free memory under load. Also what fairness
> > guarantees are there...
> 
> Sorry, I don't make sense what you mean fairness.
> Could you tell more?

If you have two processes each waiting on mem_notify is it not possible
that one of them will keep being the one woken up and the other will
remain stuck ?

It also appears there is no way to wait for memory shortages (processes
that can free memory easily) only for memory to start appearing.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15 11:20       ` Alan Cox
@ 2008-01-15 11:48         ` KOSAKI Motohiro
  2008-01-15 13:42           ` Alan Cox
  2008-01-15 12:05         ` Marcelo Tosatti
  1 sibling, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-15 11:48 UTC (permalink / raw)
  To: Alan Cox
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

Hi Alan

thank you for kindfull explain.

> > > > the core of this patch series.
> > > > add /dev/mem_notify device for notification low memory to user process.
> > > 
> > > As you only wake one process how would you use this API from processes
> > > which want to monitor and can free memory under load. Also what fairness
> > > guarantees are there...
> > 
> > Sorry, I don't make sense what you mean fairness.
> > Could you tell more?
> 
> If you have two processes each waiting on mem_notify is it not possible
> that one of them will keep being the one woken up and the other will
> remain stuck ?

current wake up order is simply FIFO by poll(2) called.
because the VM cannot know how much amount each process can do in free.
the process rss and freeable memory is not proportional.

thus I adopt wake up one after another until restoration memory shortage.


> It also appears there is no way to wait for memory shortages (processes
> that can free memory easily) only for memory to start appearing.

poll() with never timeout don't fill your requirement?
to be honest, maybe I don't understand your afraid yet. sorry.


-kosaki



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15 11:20       ` Alan Cox
  2008-01-15 11:48         ` KOSAKI Motohiro
@ 2008-01-15 12:05         ` Marcelo Tosatti
  2008-01-15 13:42           ` Alan Cox
  1 sibling, 1 reply; 40+ messages in thread
From: Marcelo Tosatti @ 2008-01-15 12:05 UTC (permalink / raw)
  To: Alan Cox
  Cc: KOSAKI Motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

Hi Alan,

On Tue, Jan 15, 2008 at 11:20:27AM +0000, Alan Cox wrote:
> On Tue, 15 Jan 2008 19:59:02 +0900
> KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
> 
> > 
> > > > the core of this patch series.
> > > > add /dev/mem_notify device for notification low memory to user process.
> > > 
> > > As you only wake one process how would you use this API from processes
> > > which want to monitor and can free memory under load. Also what fairness
> > > guarantees are there...
> > 
> > Sorry, I don't make sense what you mean fairness.
> > Could you tell more?
> 
> If you have two processes each waiting on mem_notify is it not possible
> that one of them will keep being the one woken up and the other will
> remain stuck ?

Tasks are added to the end of waitqueue->task_list through
add_wait_queue_exclusive, and waken up from the start of the list. So
I don't think that can happen (its FIFO).

> It also appears there is no way to wait for memory shortages (processes
> that can free memory easily) only for memory to start appearing.

The notification is sent once the VM starts moving anonymous pages to
the inactive list (meaning there is memory shortage). So polling on the
device is all about waiting for memory shortage.

Or do you mean something else?


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15 11:48         ` KOSAKI Motohiro
@ 2008-01-15 13:42           ` Alan Cox
  2008-01-16  2:43             ` KOSAKI Motohiro
  0 siblings, 1 reply; 40+ messages in thread
From: Alan Cox @ 2008-01-15 13:42 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

> current wake up order is simply FIFO by poll(2) called.
> because the VM cannot know how much amount each process can do in free.
> the process rss and freeable memory is not proportional.

Ok this makes sense.
> 
> thus I adopt wake up one after another until restoration memory shortage.
> 
> 
> > It also appears there is no way to wait for memory shortages (processes
> > that can free memory easily) only for memory to start appearing.
> 
> poll() with never timeout don't fill your requirement?
> to be honest, maybe I don't understand your afraid yet. sorry.

My misunderstanding. There is in fact no way to wait for memory to become
available. The poll() method you provide works nicely waiting for
shortages and responding to them by freeing memory.

It would be interesting to add FASYNC support to this. Some users have
asked for a signal when memory shortage occurs (as IBM AIX provides
this). FASYNC support would allow a SIGIO to be delivered from this
device when memory shortages occurred. Poll as you have implemented is of
course the easier way for a program to monitor memory and a better
interface.

Alan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15 12:05         ` Marcelo Tosatti
@ 2008-01-15 13:42           ` Alan Cox
  0 siblings, 0 replies; 40+ messages in thread
From: Alan Cox @ 2008-01-15 13:42 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: KOSAKI Motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

> Tasks are added to the end of waitqueue->task_list through
> add_wait_queue_exclusive, and waken up from the start of the list. So
> I don't think that can happen (its FIFO).

Agreed

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15  1:01 ` [RFC][PATCH 3/5] add /dev/mem_notify device KOSAKI Motohiro
                     ` (2 preceding siblings ...)
  2008-01-15 10:46   ` Alan Cox
@ 2008-01-15 22:16   ` Pavel Machek
  2008-01-16  1:57     ` KOSAKI Motohiro
  3 siblings, 1 reply; 40+ messages in thread
From: Pavel Machek @ 2008-01-15 22:16 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-mm, linux-kernel, Marcelo Tosatti, Daniel Spang,
	Rik van Riel, Andrew Morton

Hi!

> the core of this patch series.
> add /dev/mem_notify device for notification low memory to user process.
> 
> <usage examle>
> 
>         fd = open("/dev/mem_notify", O_RDONLY);
>         if (fd < 0) {
>                 exit(1);
>         }
>         pollfds.fd = fd;
>         pollfds.events = POLLIN;
>         pollfds.revents = 0;
> 	err = poll(&pollfds, 1, -1); // wake up at low memory
> 
>         ...
> </usage example>

Nice, this is really needed for openmoko, zaurus, etc....

But this changelog needs to go into Documentation/...

...and /dev/mem_notify is really a bad name. /dev/memory_low?
/dev/oom?

									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15  1:02 ` [RFC][PATCH 4/5] memory_pressure_notify() caller KOSAKI Motohiro
  2008-01-15  2:06   ` KAMEZAWA Hiroyuki
@ 2008-01-15 22:55   ` Daniel Spång
  2008-01-15 22:59     ` Rik van Riel
  1 sibling, 1 reply; 40+ messages in thread
From: Daniel Spång @ 2008-01-15 22:55 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: linux-mm, linux-kernel, Marcelo Tosatti, Rik van Riel, Andrew Morton

Hi,

On 1/15/08, KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
> the notification point to happen whenever the VM moves an
> anonymous page to the inactive list - this is a pretty good indication
> that there are unused anonymous pages present which will be very likely
> swapped out soon.

> +                               /* deal with the case where there is no
> +                                * swap but an anonymous page would be
> +                                * moved to the inactive list.
> +                                */
> +                               if (!total_swap_pages && reclaim_mapped &&
> +                                   PageAnon(page))
> +                                       inactivated_anon = 1;

As you know I have had some concerns regarding a too early
notification in a swapless system.

I did a test with a populated page cache in a swapless system:

$ cat /bigfile > /dev/null # populate page cache
$ cat /proc/meminfo
MemTotal:      1037040 kB
MemFree:        113976 kB
Buffers:          1068 kB
Cached:         907552 kB
SwapCached:          0 kB
Active:          11116 kB
Inactive:       903968 kB
HighTotal:      130992 kB
HighFree:          252 kB
LowTotal:       906048 kB
LowFree:        113724 kB
SwapTotal:           0 kB
SwapFree:            0 kB
Dirty:              36 kB
Writeback:           0 kB
AnonPages:        6484 kB
Mapped:           1216 kB
Slab:             4024 kB
SReclaimable:      864 kB
SUnreclaim:       3160 kB
PageTables:        444 kB
NFS_Unstable:        0 kB
Bounce:              0 kB
CommitLimit:    518520 kB
Committed_AS:    18816 kB
VmallocTotal:   114680 kB
VmallocUsed:       904 kB
VmallocChunk:   113672 kB

Start to allocate memory, 10 MB every second, exit on notification.

$ cat /proc/meminfo # just after notification
MemTotal:      1037040 kB
MemFree:        123468 kB
Buffers:           876 kB
Cached:         897976 kB
SwapCached:          0 kB
Active:          12984 kB
Inactive:       892332 kB
HighTotal:      130992 kB
HighFree:         1064 kB
LowTotal:       906048 kB
LowFree:        122404 kB
SwapTotal:           0 kB
SwapFree:            0 kB
Dirty:               0 kB
Writeback:           0 kB
AnonPages:        6484 kB
Mapped:           1220 kB
Slab:             4012 kB
SReclaimable:      864 kB
SUnreclaim:       3148 kB
PageTables:        448 kB
NFS_Unstable:        0 kB
Bounce:              0 kB
CommitLimit:    518520 kB
Committed_AS:    18816 kB
VmallocTotal:   114680 kB
VmallocUsed:       904 kB
VmallocChunk:   113672 kB

The notification fires after only ~100 MB allocated, i.e., when page
reclaim is beginning to nag from page cache. Isn't this a bit early?
Repeating the test with swap enabled results in a notification after
~600 MB allocated, which is more reasonable and just before the system
starts to swap.

Cheers,
Daniel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15 22:55   ` Daniel Spång
@ 2008-01-15 22:59     ` Rik van Riel
  2008-01-15 23:39       ` Daniel Spång
  0 siblings, 1 reply; 40+ messages in thread
From: Rik van Riel @ 2008-01-15 22:59 UTC (permalink / raw)
  To: Daniel Spång
  Cc: KOSAKI Motohiro, linux-mm, linux-kernel, Marcelo Tosatti, Andrew Morton

On Tue, 15 Jan 2008 23:55:17 +0100
"Daniel Spång" <daniel.spang@gmail.com> wrote:

> The notification fires after only ~100 MB allocated, i.e., when page
> reclaim is beginning to nag from page cache. Isn't this a bit early?
> Repeating the test with swap enabled results in a notification after
> ~600 MB allocated, which is more reasonable and just before the system
> starts to swap.

Your issue may have more to do with the fact that the
highmem zone is 128MB in size and some balancing issues
between __alloc_pages and try_to_free_pages.

-- 
All rights reversed.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15 22:59     ` Rik van Riel
@ 2008-01-15 23:39       ` Daniel Spång
  2008-01-16  1:48         ` KOSAKI Motohiro
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Spång @ 2008-01-15 23:39 UTC (permalink / raw)
  To: Rik van Riel
  Cc: KOSAKI Motohiro, linux-mm, linux-kernel, Marcelo Tosatti, Andrew Morton

On 1/15/08, Rik van Riel <riel@redhat.com> wrote:
>
> On Tue, 15 Jan 2008 23:55:17 +0100
> "Daniel Spång" <daniel.spang@gmail.com> wrote:
>
> > The notification fires after only ~100 MB allocated, i.e., when page
> > reclaim is beginning to nag from page cache. Isn't this a bit early?
> > Repeating the test with swap enabled results in a notification after
> > ~600 MB allocated, which is more reasonable and just before the system
> > starts to swap.
>
> Your issue may have more to do with the fact that the
> highmem zone is 128MB in size and some balancing issues
> between __alloc_pages and try_to_free_pages.

I don't think so. I ran the test again without highmem and noticed the
same behaviour:

$ cat /proc/meminfo
MemTotal:       895876 kB
MemFree:        111292 kB
Buffers:           924 kB
Cached:         768664 kB
SwapCached:          0 kB
Active:           9196 kB
Inactive:       767480 kB
HighTotal:           0 kB
HighFree:            0 kB
LowTotal:       895876 kB
LowFree:        111292 kB
SwapTotal:           0 kB
SwapFree:            0 kB
Dirty:              32 kB
Writeback:           0 kB
AnonPages:        7108 kB
Mapped:           1224 kB
Slab:             4288 kB
SReclaimable:     1316 kB
SUnreclaim:       2972 kB
PageTables:        448 kB
NFS_Unstable:        0 kB
Bounce:              0 kB
CommitLimit:    447936 kB
Committed_AS:    19676 kB
VmallocTotal:   122872 kB
VmallocUsed:       904 kB
VmallocChunk:   121864 kB

Start to allocate memory, 10 MB every second, exit on notification
which happened after 110 MB.

$ cat /proc/meminfo #after
MemTotal:       895876 kB
MemFree:        116748 kB
Buffers:           904 kB
Cached:         762944 kB
SwapCached:          0 kB
Active:          12864 kB
Inactive:       758064 kB
HighTotal:           0 kB
HighFree:            0 kB
LowTotal:       895876 kB
LowFree:        116748 kB
SwapTotal:           0 kB
SwapFree:            0 kB
Dirty:               4 kB
Writeback:           0 kB
AnonPages:        7108 kB
Mapped:           1224 kB
Slab:             4284 kB
SReclaimable:     1316 kB
SUnreclaim:       2968 kB
PageTables:        448 kB
NFS_Unstable:        0 kB
Bounce:              0 kB
CommitLimit:    447936 kB
Committed_AS:    19676 kB
VmallocTotal:   122872 kB
VmallocUsed:       904 kB
VmallocChunk:   121864 kB

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-15 23:39       ` Daniel Spång
@ 2008-01-16  1:48         ` KOSAKI Motohiro
  2008-01-16 11:03           ` Daniel Spång
  0 siblings, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-16  1:48 UTC (permalink / raw)
  To: "Daniel Sp蚣g"
  Cc: kosaki.motohiro, Rik van Riel, linux-mm, linux-kernel,
	Marcelo Tosatti, Andrew Morton

Hi Daniel

> > > The notification fires after only ~100 MB allocated, i.e., when page
> > > reclaim is beginning to nag from page cache. Isn't this a bit early?
> > > Repeating the test with swap enabled results in a notification after
> > > ~600 MB allocated, which is more reasonable and just before the system
> > > starts to swap.
> >
> > Your issue may have more to do with the fact that the
> > highmem zone is 128MB in size and some balancing issues
> > between __alloc_pages and try_to_free_pages.
> 
> I don't think so. I ran the test again without highmem and noticed the
> same behaviour:

Thank you for good point out!
Could you please post your test program and reproduced method?

unfortunately,
my simple test is so good works in swapless system ;-)

thanks.




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15 22:16   ` Pavel Machek
@ 2008-01-16  1:57     ` KOSAKI Motohiro
  2008-01-16  4:13       ` Marcelo Tosatti
  0 siblings, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-16  1:57 UTC (permalink / raw)
  To: Pavel Machek, Marcelo Tosatti
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Daniel Spang,
	Rik van Riel, Andrew Morton

Hi Pavel

> > 	err = poll(&pollfds, 1, -1); // wake up at low memory
> > 
> >         ...
> > </usage example>
> 
> Nice, this is really needed for openmoko, zaurus, etc....
> 
> But this changelog needs to go into Documentation/...
> 
> ...and /dev/mem_notify is really a bad name. /dev/memory_low?
> /dev/oom?

thank you for your kindful advise.

but..

to be honest, my english is very limited.
I can't make judgments name is good or not.

Marcelo, What do you think his idea?





^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-15 13:42           ` Alan Cox
@ 2008-01-16  2:43             ` KOSAKI Motohiro
  0 siblings, 0 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-16  2:43 UTC (permalink / raw)
  To: Alan Cox
  Cc: kosaki.motohiro, linux-mm, linux-kernel, Marcelo Tosatti,
	Daniel Spang, Rik van Riel, Andrew Morton

Hi Alan

> > > It also appears there is no way to wait for memory shortages (processes
> > > that can free memory easily) only for memory to start appearing.
> > 
> > poll() with never timeout don't fill your requirement?
> > to be honest, maybe I don't understand your afraid yet. sorry.
> 
> My misunderstanding. There is in fact no way to wait for memory to become
> available. The poll() method you provide works nicely waiting for
> shortages and responding to them by freeing memory.
> 
> It would be interesting to add FASYNC support to this. Some users have
> asked for a signal when memory shortage occurs (as IBM AIX provides
> this). FASYNC support would allow a SIGIO to be delivered from this
> device when memory shortages occurred. Poll as you have implemented is of
> course the easier way for a program to monitor memory and a better
> interface.

OK.
I will challenge implement at mem_notify v5.


- kosaki



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-16  1:57     ` KOSAKI Motohiro
@ 2008-01-16  4:13       ` Marcelo Tosatti
  2008-01-16 11:42         ` Pavel Machek
  0 siblings, 1 reply; 40+ messages in thread
From: Marcelo Tosatti @ 2008-01-16  4:13 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Pavel Machek, Marcelo Tosatti, linux-mm, linux-kernel,
	Daniel Spang, Rik van Riel, Andrew Morton

On Wed, Jan 16, 2008 at 10:57:16AM +0900, KOSAKI Motohiro wrote:
> Hi Pavel
> 
> > > 	err = poll(&pollfds, 1, -1); // wake up at low memory
> > > 
> > >         ...
> > > </usage example>
> > 
> > Nice, this is really needed for openmoko, zaurus, etc....
> > 
> > But this changelog needs to go into Documentation/...
> > 
> > ...and /dev/mem_notify is really a bad name. /dev/memory_low?
> > /dev/oom?
> 
> thank you for your kindful advise.
> 
> but..
> 
> to be honest, my english is very limited.
> I can't make judgments name is good or not.
> 
> Marcelo, What do you think his idea?

"mem_notify" sounds alright, but I don't really care.

Notify:

To give notice to; inform: notified the citizens of the curfew by
posting signs.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-16  1:48         ` KOSAKI Motohiro
@ 2008-01-16 11:03           ` Daniel Spång
  2008-01-17  3:26             ` KOSAKI Motohiro
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Spång @ 2008-01-16 11:03 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Rik van Riel, linux-mm, linux-kernel, Marcelo Tosatti, Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 2560 bytes --]

On 1/16/08, KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
> Hi Daniel
>
> > > > The notification fires after only ~100 MB allocated, i.e., when page
> > > > reclaim is beginning to nag from page cache. Isn't this a bit early?
> > > > Repeating the test with swap enabled results in a notification after
> > > > ~600 MB allocated, which is more reasonable and just before the system
> > > > starts to swap.
> > >
> > > Your issue may have more to do with the fact that the
> > > highmem zone is 128MB in size and some balancing issues
> > > between __alloc_pages and try_to_free_pages.
> >
> > I don't think so. I ran the test again without highmem and noticed the
> > same behaviour:
>
> Thank you for good point out!
> Could you please post your test program and reproduced method?

Sure:

1. Fill almost all available memory with page cache in a system without swap.
2. Run attached alloc-test program.
3. Notification fires when page cache is reclaimed.

Example:

$ cat /bigfile > /dev/null
$ cat /proc/meminfo
MemTotal:       895876 kB
MemFree:         94272 kB
Buffers:           884 kB
Cached:         782868 kB
SwapCached:          0 kB
Active:          15356 kB
Inactive:       778000 kB
HighTotal:           0 kB
HighFree:            0 kB
LowTotal:       895876 kB
LowFree:         94272 kB
SwapTotal:           0 kB
SwapFree:            0 kB
Dirty:               0 kB
Writeback:           0 kB
AnonPages:        9624 kB
Mapped:           1352 kB
Slab:             4220 kB
SReclaimable:     1168 kB
SUnreclaim:       3052 kB
PageTables:        528 kB
NFS_Unstable:        0 kB
Bounce:              0 kB
CommitLimit:    447936 kB
Committed_AS:    28988 kB
VmallocTotal:   122872 kB
VmallocUsed:       904 kB
VmallocChunk:   121864 kB
$ ./test-alloc
---------
Got notification, allocated 90 MB
$ cat /proc/meminfo
MemTotal:       895876 kB
MemFree:        101960 kB
Buffers:           888 kB
Cached:         775200 kB
SwapCached:          0 kB
Active:          15356 kB
Inactive:       770336 kB
HighTotal:           0 kB
HighFree:            0 kB
LowTotal:       895876 kB
LowFree:        101960 kB
SwapTotal:           0 kB
SwapFree:            0 kB
Dirty:              28 kB
Writeback:           0 kB
AnonPages:        9624 kB
Mapped:           1352 kB
Slab:             4224 kB
SReclaimable:     1168 kB
SUnreclaim:       3056 kB
PageTables:        532 kB
NFS_Unstable:        0 kB
Bounce:              0 kB
CommitLimit:    447936 kB
Committed_AS:    28988 kB
VmallocTotal:   122872 kB
VmallocUsed:       904 kB
VmallocChunk:   121864 kB

[-- Attachment #2: alloc-test.c --]
[-- Type: application/octet-stream, Size: 1738 bytes --]

/*
 * Allocate 10 MB each second. Exit on notification.
 */

#include <sys/mman.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <poll.h>
#include <pthread.h>
#include <errno.h>

int count = 0;
int size = 10;

void *do_alloc() 
{
        for(;;) {
                int *buffer;
                buffer = mmap(NULL,  size*1024*1024,
                              PROT_READ | PROT_WRITE,
                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
                if (buffer == MAP_FAILED) {
                        perror("mmap");
                        exit(EXIT_FAILURE);
                }
                memset(buffer, 1 , size*1024*1024);

                printf("-");
                fflush(stdout);

                count++;
                sleep(1);
        }
}

int wait_for_notification(struct pollfd *pfd)
{
        int ret;
        read(pfd->fd, 0, 0);
        ret = poll(pfd, 1, -1);
        if (ret == -1 && errno != EINTR) {
                perror("poll");
                exit(EXIT_FAILURE);
        }
        return ret;
}

void do_free() 
{
        struct pollfd pfd;

        pfd.fd = open("/dev/mem_notify", O_RDONLY);
        if (pfd.fd == -1) {
                perror("open");
                exit(EXIT_FAILURE);
        }
        pfd.events = POLLIN;
        for(;;)
                if (wait_for_notification(&pfd) > 0) {
                        printf("\nGot notification, allocated %d MB\n",
                               size * count);
                        exit(EXIT_SUCCESS);
                }
}

int main(int argc, char *argv[])
{
        pthread_t allocator;

        pthread_create(&allocator, NULL, do_alloc, NULL);
        do_free();
}

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-16  4:13       ` Marcelo Tosatti
@ 2008-01-16 11:42         ` Pavel Machek
  2008-01-16 11:51           ` Daniel Spång
  0 siblings, 1 reply; 40+ messages in thread
From: Pavel Machek @ 2008-01-16 11:42 UTC (permalink / raw)
  To: Marcelo Tosatti
  Cc: KOSAKI Motohiro, linux-mm, linux-kernel, Daniel Spang,
	Rik van Riel, Andrew Morton

On Wed 2008-01-16 02:13:32, Marcelo Tosatti wrote:
> On Wed, Jan 16, 2008 at 10:57:16AM +0900, KOSAKI Motohiro wrote:
> > Hi Pavel
> > 
> > > > 	err = poll(&pollfds, 1, -1); // wake up at low memory
> > > > 
> > > >         ...
> > > > </usage example>
> > > 
> > > Nice, this is really needed for openmoko, zaurus, etc....
> > > 
> > > But this changelog needs to go into Documentation/...
> > > 
> > > ...and /dev/mem_notify is really a bad name. /dev/memory_low?
> > > /dev/oom?
> > 
> > thank you for your kindful advise.
> > 
> > but..
> > 
> > to be honest, my english is very limited.
> > I can't make judgments name is good or not.
> > 
> > Marcelo, What do you think his idea?
> 
> "mem_notify" sounds alright, but I don't really care.
> 
> Notify:
> 
> To give notice to; inform: notified the citizens of the curfew by
> posting signs.

I'd read mem_notify as "tell me when new memory is unplugged" or
something. /dev/oom_notify? Plus, /dev/ names usually do not have "_"
in them.
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-16 11:42         ` Pavel Machek
@ 2008-01-16 11:51           ` Daniel Spång
  2008-01-17  3:04             ` KOSAKI Motohiro
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Spång @ 2008-01-16 11:51 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Marcelo Tosatti, KOSAKI Motohiro, linux-mm, linux-kernel,
	Rik van Riel, Andrew Morton

On 1/16/08, Pavel Machek <pavel@ucw.cz> wrote:
> On Wed 2008-01-16 02:13:32, Marcelo Tosatti wrote:
> > On Wed, Jan 16, 2008 at 10:57:16AM +0900, KOSAKI Motohiro wrote:
> > > Hi Pavel
> > >
> > > > >         err = poll(&pollfds, 1, -1); // wake up at low memory
> > > > >
> > > > >         ...
> > > > > </usage example>
> > > >
> > > > Nice, this is really needed for openmoko, zaurus, etc....
> > > >
> > > > But this changelog needs to go into Documentation/...
> > > >
> > > > ...and /dev/mem_notify is really a bad name. /dev/memory_low?
> > > > /dev/oom?
> > >
> > > thank you for your kindful advise.
> > >
> > > but..
> > >
> > > to be honest, my english is very limited.
> > > I can't make judgments name is good or not.
> > >
> > > Marcelo, What do you think his idea?
> >
> > "mem_notify" sounds alright, but I don't really care.
> >
> > Notify:
> >
> > To give notice to; inform: notified the citizens of the curfew by
> > posting signs.
>
> I'd read mem_notify as "tell me when new memory is unplugged" or
> something. /dev/oom_notify? Plus, /dev/ names usually do not have "_"
> in them.

I don't think we should use oom in the name, since the notification is
sent long before oom.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 3/5] add /dev/mem_notify device
  2008-01-16 11:51           ` Daniel Spång
@ 2008-01-17  3:04             ` KOSAKI Motohiro
  0 siblings, 0 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-17  3:04 UTC (permalink / raw)
  To: "Daniel Sp蚣g"
  Cc: kosaki.motohiro, Pavel Machek, Marcelo Tosatti, linux-mm,
	linux-kernel, Rik van Riel, Andrew Morton

Hi

> > I'd read mem_notify as "tell me when new memory is unplugged" or
> > something. /dev/oom_notify? Plus, /dev/ names usually do not have "_"
> > in them.
> 
> I don't think we should use oom in the name, since the notification is
> sent long before oom.

OK, I don't change name.
Of cource, I will change soon if anyone propose more good name.

thanks

- kosaki



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-16 11:03           ` Daniel Spång
@ 2008-01-17  3:26             ` KOSAKI Motohiro
  2008-01-18 10:24               ` Daniel Spång
  0 siblings, 1 reply; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-17  3:26 UTC (permalink / raw)
  To: "Daniel Sp蚣g"
  Cc: kosaki.motohiro, Rik van Riel, linux-mm, linux-kernel,
	Marcelo Tosatti, Andrew Morton

Hi Daniel

> > Thank you for good point out!
> > Could you please post your test program and reproduced method?
> 
> Sure:
> 
> 1. Fill almost all available memory with page cache in a system without swap.
> 2. Run attached alloc-test program.
> 3. Notification fires when page cache is reclaimed.

Unfortunately, I can't reproduce it.

my machine
	CPU:    Pentium4 2.8GHz with HT
	memory: 512M


1. I doubt ZONE_DMA, please shipment ignore zone_dma patch(below).
2. Could you please send your .config and /etc/sysctl.conf?
   I hope more reproduce challenge.

thanks.

- kosaki




Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>

---
 include/linux/mem_notify.h |    3 +++
 mm/page_alloc.c            |    6 +++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

Index: linux-2.6.24-rc6-mm1-memnotify/include/linux/mem_notify.h
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/include/linux/mem_notify.h
 2008-01-16 21:31:09.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/include/linux/mem_notify.h
2008-01-16 21:34:24.000000000 +0900
@@ -22,6 +22,9 @@ static inline void memory_pressure_notif
        unsigned long target;
        unsigned long pages_high, pages_free, pages_reserve;

+       if (unlikely(zone->mem_notify_status == -1))
+               return;
+
        if (pressure) {
                target = atomic_long_read(&last_mem_notify) + MEM_NOTIFY_FREQ;
                if (likely(time_before(jiffies, target)))
Index: linux-2.6.24-rc6-mm1-memnotify/mm/page_alloc.c
===================================================================
--- linux-2.6.24-rc6-mm1-memnotify.orig/mm/page_alloc.c 2008-01-13
19:50:27.000000000 +0900
+++ linux-2.6.24-rc6-mm1-memnotify/mm/page_alloc.c      2008-01-16
21:41:58.000000000 +0900
@@ -3467,7 +3467,11 @@ static void __meminit free_area_init_cor
                zone->zone_pgdat = pgdat;

                zone->prev_priority = DEF_PRIORITY;
-               zone->mem_notify_status = 0;
+
+               if (zone->present_pages < (pgdat->node_present_pages / 10))
+                       zone->mem_notify_status = -1;
+               else
+                       zone->mem_notify_status = 0;

                zone_pcp_init(zone);
                INIT_LIST_HEAD(&zone->active_list);




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-17  3:26             ` KOSAKI Motohiro
@ 2008-01-18 10:24               ` Daniel Spång
  2008-01-18 10:30                 ` KOSAKI Motohiro
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Spång @ 2008-01-18 10:24 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Rik van Riel, linux-mm, linux-kernel, Marcelo Tosatti, Andrew Morton

On 1/17/08, KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote:
> Hi Daniel
>
> > > Thank you for good point out!
> > > Could you please post your test program and reproduced method?
> >
> > Sure:
> >
> > 1. Fill almost all available memory with page cache in a system without swap.
> > 2. Run attached alloc-test program.
> > 3. Notification fires when page cache is reclaimed.
>
> Unfortunately, I can't reproduce it.
>
> my machine
>         CPU:    Pentium4 2.8GHz with HT
>         memory: 512M
>
>
> 1. I doubt ZONE_DMA, please shipment ignore zone_dma patch(below).
> 2. Could you please send your .config and /etc/sysctl.conf?
>    I hope more reproduce challenge.
>
> thanks.
>
> - kosaki
>
>
>
>
> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
>
> ---
>  include/linux/mem_notify.h |    3 +++
>  mm/page_alloc.c            |    6 +++++-
>  2 files changed, 8 insertions(+), 1 deletion(-)
>
> Index: linux-2.6.24-rc6-mm1-memnotify/include/linux/mem_notify.h
> ===================================================================
> --- linux-2.6.24-rc6-mm1-memnotify.orig/include/linux/mem_notify.h
>  2008-01-16 21:31:09.000000000 +0900
> +++ linux-2.6.24-rc6-mm1-memnotify/include/linux/mem_notify.h
> 2008-01-16 21:34:24.000000000 +0900
> @@ -22,6 +22,9 @@ static inline void memory_pressure_notif
>         unsigned long target;
>         unsigned long pages_high, pages_free, pages_reserve;
>
> +       if (unlikely(zone->mem_notify_status == -1))
> +               return;
> +
>         if (pressure) {
>                 target = atomic_long_read(&last_mem_notify) + MEM_NOTIFY_FREQ;
>                 if (likely(time_before(jiffies, target)))
> Index: linux-2.6.24-rc6-mm1-memnotify/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.24-rc6-mm1-memnotify.orig/mm/page_alloc.c 2008-01-13
> 19:50:27.000000000 +0900
> +++ linux-2.6.24-rc6-mm1-memnotify/mm/page_alloc.c      2008-01-16
> 21:41:58.000000000 +0900
> @@ -3467,7 +3467,11 @@ static void __meminit free_area_init_cor
>                 zone->zone_pgdat = pgdat;
>
>                 zone->prev_priority = DEF_PRIORITY;
> -               zone->mem_notify_status = 0;
> +
> +               if (zone->present_pages < (pgdat->node_present_pages / 10))
> +                       zone->mem_notify_status = -1;
> +               else
> +                       zone->mem_notify_status = 0;
>
>                 zone_pcp_init(zone);
>                 INIT_LIST_HEAD(&zone->active_list);

Your patch above solves the problem I had with early notification.

Cheers,
Daniel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC][PATCH 4/5] memory_pressure_notify() caller
  2008-01-18 10:24               ` Daniel Spång
@ 2008-01-18 10:30                 ` KOSAKI Motohiro
  0 siblings, 0 replies; 40+ messages in thread
From: KOSAKI Motohiro @ 2008-01-18 10:30 UTC (permalink / raw)
  To: "Daniel Sp蚣g"
  Cc: kosaki.motohiro, Rik van Riel, linux-mm, linux-kernel,
	Marcelo Tosatti, Andrew Morton

Hi!

> > 1. I doubt ZONE_DMA, please shipment ignore zone_dma patch(below).
>
> Your patch above solves the problem I had with early notification.

really!?
I am really happy!!

Thanks you.


- kosaki



^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2008-01-18 10:31 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-01-15  0:52 [RFC][PATCH 0/5] mem notifications v4 KOSAKI Motohiro
2008-01-15  0:59 ` [RFC][PATCH 1/5] introduce poll_wait_exclusive() new API KOSAKI Motohiro
2008-01-15  1:00 ` [RFC][PATCH 2/5] introduce wake_up_locked_nr() " KOSAKI Motohiro
2008-01-15  1:01 ` [RFC][PATCH 3/5] add /dev/mem_notify device KOSAKI Motohiro
2008-01-15  1:08   ` Randy Dunlap
2008-01-15  1:20     ` KOSAKI Motohiro
2008-01-15  1:24       ` KOSAKI Motohiro
2008-01-15  2:10   ` KAMEZAWA Hiroyuki
2008-01-15  2:20     ` KOSAKI Motohiro
2008-01-15  2:56       ` Rik van Riel
2008-01-15 10:46   ` Alan Cox
2008-01-15 10:59     ` KOSAKI Motohiro
2008-01-15 11:20       ` Alan Cox
2008-01-15 11:48         ` KOSAKI Motohiro
2008-01-15 13:42           ` Alan Cox
2008-01-16  2:43             ` KOSAKI Motohiro
2008-01-15 12:05         ` Marcelo Tosatti
2008-01-15 13:42           ` Alan Cox
2008-01-15 22:16   ` Pavel Machek
2008-01-16  1:57     ` KOSAKI Motohiro
2008-01-16  4:13       ` Marcelo Tosatti
2008-01-16 11:42         ` Pavel Machek
2008-01-16 11:51           ` Daniel Spång
2008-01-17  3:04             ` KOSAKI Motohiro
2008-01-15  1:02 ` [RFC][PATCH 4/5] memory_pressure_notify() caller KOSAKI Motohiro
2008-01-15  2:06   ` KAMEZAWA Hiroyuki
2008-01-15  2:37     ` KOSAKI Motohiro
2008-01-15  3:00       ` KAMEZAWA Hiroyuki
2008-01-15  3:08         ` KOSAKI Motohiro
2008-01-15 22:55   ` Daniel Spång
2008-01-15 22:59     ` Rik van Riel
2008-01-15 23:39       ` Daniel Spång
2008-01-16  1:48         ` KOSAKI Motohiro
2008-01-16 11:03           ` Daniel Spång
2008-01-17  3:26             ` KOSAKI Motohiro
2008-01-18 10:24               ` Daniel Spång
2008-01-18 10:30                 ` KOSAKI Motohiro
2008-01-15  1:03 ` [RFC][PATCH 5/5] /proc/zoneinfo enhancement KOSAKI Motohiro
2008-01-15 10:44   ` Alan Cox
2008-01-15 10:49     ` KOSAKI Motohiro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).